From 8efe6e33f6df6c7ea613acc36fa1d75c42782dae Mon Sep 17 00:00:00 2001 From: C-K-Loan Date: Fri, 19 Mar 2021 09:50:38 +0100 Subject: [PATCH 1/2] Integrated return_spark_df --- nlu/pipeline.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nlu/pipeline.py b/nlu/pipeline.py index 27e68ddd..7a34fa90 100644 --- a/nlu/pipeline.py +++ b/nlu/pipeline.py @@ -1194,7 +1194,7 @@ def save(self, path, component='entire_pipeline', overwrite=False): print(f'Stored model in {path}') # else : print('Please fit untrained pipeline first or predict on a String to save it') def predict(self, data, output_level='', positions=False, keep_stranger_features=True, metadata=False, - multithread=True, drop_irrelevant_cols=True, verbose=False): + multithread=True, drop_irrelevant_cols=True, verbose=False,return_spark_df=False): ''' Annotates a Pandas Dataframe/Pandas Series/Numpy Array/Spark DataFrame/Python List strings /Python String @@ -1372,8 +1372,8 @@ def predict(self, data, output_level='', positions=False, keep_stranger_features except: print( "If you use Modin, make sure you have installed 'pip install modin[ray]' or 'pip install modin[dask]' backend for Modin ") - - return self.pythonify_spark_dataframe(sdf, self.output_different_levels, + if return_spark_df : return sdf + else : return self.pythonify_spark_dataframe(sdf, self.output_different_levels, keep_stranger_features=keep_stranger_features, stranger_features=stranger_features, output_metadata=metadata, index_provided=index_provided, From 04a5f00480cf4485040bd9df5482f556420114da Mon Sep 17 00:00:00 2001 From: C-K-Loan Date: Fri, 19 Mar 2021 11:11:09 +0100 Subject: [PATCH 2/2] Examples Added and old notebooks updated --- ...ion_classifier_demo_biological_texts.ipynb | 1 + ...casam_classifier_demo_news_headlines.ipynb | 1 + ...ining_sentiment_classifier_demo_IMDB.ipynb | 2 +- ...timent_classifier_demo_apple_twitter.ipynb | 2 +- ...g_sentiment_classifier_demo_covid_19.ipynb | 1 + ...iment_classifier_demo_finanical_news.ipynb | 2 +- ...nt_classifier_demo_natural_disasters.ipynb | 1 + ...ntiment_classifier_demo_stock_market.ipynb | 1 + ...ing_multi_class_text_classifier_demo.ipynb | 2 +- ...ti_class_text_classifier_demo_amazon.ipynb | 2 +- ...timent_classifier_demo_apple_twitter.ipynb | 1 + ...g_sentiment_classifier_demo_covid_19.ipynb | 1 + ...ing_sentiment_classifier_demo_reddit.ipynb | 1 + ...ntiment_classifier_demo_stock_market.ipynb | 1 + ...ng_sentiment_classifier_demo_twitter.ipynb | 1 + ...ual_multi_class_text_classifier_demo.ipynb | 1 + ...ti_class_text_classifier_demo_amazon.ipynb | 1 + ...s_text_classifier_demo_hotel_reviews.ipynb | 1 + .../NER_aspect_airline_ATIS.ipynb | 577 +----------------- .../NLU_ner_CONLL_2003_5class_example.ipynb | 2 +- .../NLU_ner_ONTO_18class_example.ipynb | 2 +- ...pect_based_ner_sentiment_restaurants.ipynb | 358 +---------- 22 files changed, 22 insertions(+), 940 deletions(-) create mode 100644 examples/colab/Training/binary_text_classification/NLU_training_negation_classifier_demo_biological_texts.ipynb create mode 100644 examples/colab/Training/binary_text_classification/NLU_training_sarcasam_classifier_demo_news_headlines.ipynb create mode 100644 examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_covid_19.ipynb create mode 100644 examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_natural_disasters.ipynb create mode 100644 examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_stock_market.ipynb create mode 100644 examples/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_apple_twitter.ipynb create mode 100644 examples/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_covid_19.ipynb create mode 100644 examples/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_reddit.ipynb create mode 100644 examples/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_stock_market.ipynb create mode 100644 examples/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_twitter.ipynb create mode 100644 examples/colab/Training/multi_lingual/multi_class_text_classification/NLU_training_multi_lingual_multi_class_text_classifier_demo.ipynb create mode 100644 examples/colab/Training/multi_lingual/multi_class_text_classification/NLU_training_multi_lingual_multi_class_text_classifier_demo_amazon.ipynb create mode 100644 examples/colab/Training/multi_lingual/multi_class_text_classification/NLU_training_multi_lingual_multi_class_text_classifier_demo_hotel_reviews.ipynb diff --git a/examples/colab/Training/binary_text_classification/NLU_training_negation_classifier_demo_biological_texts.ipynb b/examples/colab/Training/binary_text_classification/NLU_training_negation_classifier_demo_biological_texts.ipynb new file mode 100644 index 00000000..a0556aee --- /dev/null +++ b/examples/colab/Training/binary_text_classification/NLU_training_negation_classifier_demo_biological_texts.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_negation_classifier_demo_biological_texts.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_negation_classifier_demo_biological_texts.ipynb)\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 Class Biological Negation Classifer Training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","You can achieve these results or even better on this dataset with training data : \n","\n","
\n","\n","![image.png]()\n","\n","\n","You can achieve these results or even better on this dataset with test data : \n","\n","
\n","\n","\n","![Screenshot 2021-02-25 140123.png]()\n","\n","\n","\n","\n","\n","\n","\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614241644372,"user_tz":-300,"elapsed":92837,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b7007a05-dfcb-4e96-f2ba-35c699f6c253"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install pyspark==2.4.7 \n","! pip install nlu > /dev/null \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Collecting pyspark==2.4.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e2/06/29f80e5a464033432eedf89924e7aa6ebbc47ce4dcd956853a73627f2c07/pyspark-2.4.7.tar.gz (217.9MB)\n","\u001b[K |████████████████████████████████| 217.9MB 68kB/s \n","\u001b[?25hCollecting py4j==0.10.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e3/53/c737818eb9a7dc32a7cd4f1396e787bd94200c3997c72c1dbe028587bd76/py4j-0.10.7-py2.py3-none-any.whl (197kB)\n","\u001b[K |████████████████████████████████| 204kB 20.1MB/s \n","\u001b[?25hBuilding wheels for collected packages: pyspark\n"," Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for pyspark: filename=pyspark-2.4.7-py2.py3-none-any.whl size=218279465 sha256=c6493b191ba1bbe111dcc8ea7139d8d6e0fdc11c923bc238c3391f08bfa6fb92\n"," Stored in directory: /root/.cache/pip/wheels/34/1f/2e/1e7460f80acf26b08dbb8c53d7ff9e07146f2a68dd5c732be5\n","Successfully built pyspark\n","Installing collected packages: py4j, pyspark\n","Successfully installed py4j-0.10.7 pyspark-2.4.7\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Negation Bilogical Texts dataset \n","https://www.kaggle.com/ma7555/bioscope-corpus-negation-annotated\n","#Context\n","The BioScope corpus consists of medical and biological texts annotated for negation and their linguistic scope. This was done to allow a comparison between the development of systems for negation/hedge detection and scope resolution.\n","The corpus is publicly available for research purposes.\n","\n","You can use this corpus to fine-tune a BERT-like model for negation detection.\n","\n","This dataset was created in this format during the COVID-19 crisis as a training set for detecting negations regarding treatment of specific drugs in the released research papers.\n","\n","Creators of the original dataset: MTA-SZTE Research Group on Artificial Intelligence - RGAI\n","https://rgai.inf.u-szeged.hu/node/105\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1614241645363,"user_tz":-300,"elapsed":93810,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8173c54a-d215-44e8-a272-f81b7dd204af"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/02/bioscope_abstract.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-02-25 08:26:14-- http://ckl-it.de/wp-content/uploads/2021/02/bioscope_abstract.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 802898 (784K) [text/csv]\n","Saving to: ‘bioscope_abstract.csv’\n","\n","bioscope_abstract.c 100%[===================>] 784.08K 1.20MB/s in 0.6s \n","\n","2021-02-25 08:26:15 (1.20 MB/s) - ‘bioscope_abstract.csv’ saved [802898/802898]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":399},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1614241645794,"user_tz":-300,"elapsed":94227,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e2d29243-d84e-46a9-f292-738e64ad7de9"},"source":["import pandas as pd\n","train_path = '/content/bioscope_abstract.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df = train_df.dropna()\n","train_df = train_df.sample(frac=1).reset_index(drop=True)\n","from sklearn.model_selection import train_test_split\n","\n","train_df, test_df = train_test_split(train_df, test_size=0.2)\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
1529Recent biochemical and genetic studies indicat...negative
1230Insulin dose-dependently increased the PI-3,4,...positive
813It is crucial that these results mirror recent...negative
473In the absence of TCR-mediated activation, Vpr...positive
1545We have shown previously that trifluoromethylk...negative
.........
1027However, the C26 element alone, even when repe...positive
329Whereas the CRE site was essential in mediatin...positive
935In this study, we demonstrate that constitutiv...positive
1266In contrast, the binding activities of NF1 and...positive
121We show that in the human T lymphoblastic tumo...negative
\n","

1600 rows × 2 columns

\n","
"],"text/plain":[" text y\n","1529 Recent biochemical and genetic studies indicat... negative\n","1230 Insulin dose-dependently increased the PI-3,4,... positive\n","813 It is crucial that these results mirror recent... negative\n","473 In the absence of TCR-mediated activation, Vpr... positive\n","1545 We have shown previously that trifluoromethylk... negative\n","... ... ...\n","1027 However, the C26 element alone, even when repe... positive\n","329 Whereas the CRE site was essential in mediatin... positive\n","935 In this study, we demonstrate that constitutiv... positive\n","1266 In contrast, the binding activities of NF1 and... positive\n","121 We show that in the human T lymphoblastic tumo... negative\n","\n","[1600 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1613511904292,"user_tz":-300,"elapsed":13160,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"dbced97f-919d-4777-e3e7-cc10d31abff3"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.00 0.00 0.00 19\n"," positive 0.62 1.00 0.77 31\n","\n"," accuracy 0.62 50\n"," macro avg 0.31 0.50 0.38 50\n","weighted avg 0.38 0.62 0.47 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytextsentimentsentiment_confidencedocumentdefault_name_embeddings
origin_index
0positiveInterestingly, phase 1 adhesion was inversely ...positive0.975958Interestingly, phase 1 adhesion was inversely ...[0.020294584333896637, -0.003580901538953185, ...
1positivePBL constitutively express c-jun transcripts, ...positive0.981556PBL constitutively express c-jun transcripts, ...[0.03842758387327194, 0.004267632961273193, 0....
2negativeErbstatin and genistein inhibited the sIg-cros...positive0.973579Erbstatin and genistein inhibited the sIg-cros...[0.04114547371864319, -0.011271283961832523, 0...
3negativeRecombinant I kappa B-alpha(MAD-3) inhibited t...positive0.963648Recombinant I kappa B-alpha(MAD-3) inhibited t...[-0.008668724447488785, 0.004286200739443302, ...
4positiveUsing electrophoretic mobility shift analysis ...positive0.981299Using electrophoretic mobility shift analysis ...[0.03153171390295029, 0.0300369281321764, 0.01...
5positiveEach binding site contributes to the overall a...positive0.975567Each binding site contributes to the overall a...[0.026832187548279762, 0.06317257136106491, -0...
6positiveIn this investigation an enhanced and prolonge...positive0.978890In this investigation an enhanced and prolonge...[0.03458377718925476, 0.0585494302213192, 0.03...
7positiveThe average values from healthy volunteers did...positive0.923696The average values from healthy volunteers did...[-0.01714322902262211, 0.021471993997693062, -...
8positiveThese studies have been extended to show that ...positive0.970053These studies have been extended to show that ...[0.022129273042082787, -0.05606718361377716, 0...
9positiveIn a variety of other cell types, which displa...positive0.979132In a variety of other cell types, which displa...[0.047291748225688934, -0.007037220057100058, ...
10negativeDose-response experiments suggested that stero...positive0.942210Dose-response experiments suggested that stero...[0.07178540527820587, 0.015175681561231613, -0...
11positiveCompounds that interfere selectively with the ...positive0.965995Compounds that interfere selectively with the ...[0.053061071783304214, 0.02100301906466484, -0...
12positiveTo assess whether this inhibition was mediated...positive0.972738To assess whether this inhibition was mediated...[0.02914738841354847, 0.04804287105798721, 0.0...
13negativeFreshly isolated T cells from lupus patients, ...positive0.938295Freshly isolated T cells from lupus patients, ...[0.05932210013270378, 0.06283659487962723, 0.0...
14positiveIn EMSAs, NF(P) binding was inhibited by the u...positive0.982893In EMSAs, NF(P) binding was inhibited by the u...[0.019155262038111687, 0.024585263803601265, 0...
15positivec-myc did not correlate with c-fos and c-jun p...positive0.972702c-myc did not correlate with c-fos and c-jun p...[0.06592310220003128, -0.04704660177230835, -0...
16positiveInterleukin-2 (IL-2) promoter activity stimula...positive0.973839Interleukin-2 (IL-2) promoter activity stimula...[0.03582179546356201, 0.02672964334487915, 0.0...
17negativeIn this study, we examined seven EBV-positive ...positive0.940680In this study, we examined seven EBV-positive ...[0.05369773879647255, 0.0632983073592186, 0.00...
18positivePhosphorylated Bcl-2 cannot bind calcineurin, ...positive0.978725Phosphorylated Bcl-2 cannot bind calcineurin, ...[0.0348842516541481, 0.03341799974441528, -0.0...
19negativeThere is a single methionine codon-initiated o...positive0.961217There is a single methionine codon-initiated o...[0.05550860986113548, -0.05578289553523064, -0...
20negativeNasal NK- and T-cell lymphomas share the same ...positive0.904758Nasal NK- and T-cell lymphomas share the same ...[0.030753592029213905, 0.030849099159240723, -...
21positiveThus, the requirement for CD28 in IL-2 promote...positive0.971671Thus, the requirement for CD28 in IL-2 promote...[0.0430801659822464, -0.06272805482149124, -0....
22positiveNeither of the two toxins affected kappaB bind...positive0.978535Neither of the two toxins affected kappaB bind...[0.028597429394721985, 0.06279577314853668, -0...
23positiveWe conclude that downregulation of WT1 during ...positive0.976209We conclude that downregulation of WT1 during ...[0.00441081915050745, 0.021023083478212357, -0...
24negativeThe former can become latently infected and ar...positive0.944969The former can become latently infected and ar...[0.037766434252262115, -0.01205922570079565, -...
25positiveDeletion of the 5'-most Ets site had no effect...positive0.980429Deletion of the 5'-most Ets site had no effect...[0.01163046807050705, 0.04765436425805092, 0.0...
26negativeA number of recent studies have described inte...positive0.948274A number of recent studies have described inte...[0.0460846908390522, 0.054417867213487625, 0.0...
27negativeWe suggest that extinction of Ig genes is part...positive0.933735We suggest that extinction of Ig genes is part...[0.013370374217629433, -0.028526239097118378, ...
28positiveIn some patients, no activation of RelA/NFkapp...positive0.973488In some patients, no activation of RelA/NFkapp...[0.001402425579726696, 0.058718983083963394, -...
29negativeThese may partly account for the differential ...positive0.936957These may partly account for the differential ...[-0.007175407372415066, 0.011748190969228745, ...
30positiveSimilarly, in transfection experiments in whic...positive0.980887Similarly, in transfection experiments in whic...[0.011392833665013313, -0.023685814812779427, ...
31positiveHowever, the molecular basis for the commitmen...positive0.965524However, the molecular basis for the commitmen...[0.04663031920790672, -0.011451571248471737, -...
32positiveIn vitro translated MAD-3 protein was found to...positive0.974413In vitro translated MAD-3 protein was found to...[0.020160771906375885, 0.011534353718161583, 0...
33positiveWe also tested for the ability of defensins NP...positive0.971970We also tested for the ability of defensins NP...[0.0475110188126564, -0.01823158748447895, 0.0...
34positiveIn the absence of TCR-mediated activation, Vpr...positive0.978770In the absence of TCR-mediated activation, Vpr...[0.039186377078294754, 0.030655119568109512, -...
35negativeConversely, IL-4 and IL-13 markedly and signif...positive0.943897Conversely, IL-4 and IL-13 markedly and signif...[0.039589762687683105, 0.03933874890208244, -0...
36negativeThese results suggest that secretion of PGE2 b...positive0.958460These results suggest that secretion of PGE2 b...[0.021247662603855133, 0.015753811225295067, 0...
37negativeLike other enhancers, the Ig heavy chain enhan...positive0.961463Like other enhancers, the Ig heavy chain enhan...[0.05650434270501137, 0.005788780748844147, -0...
38negativeA principal objective of the present study was...positive0.964000A principal objective of the present study was...[0.017783455550670624, -0.026092059910297394, ...
39negativeTo investigate whether the molar ratio of HSF2...positive0.976527To investigate whether the molar ratio of HSF2...[0.033542290329933167, 0.006205122452229261, 0...
40negativeThe expression of the gene encoding the granul...positive0.959701The expression of the gene encoding the granul...[0.04365483671426773, 0.023773200809955597, -0...
41positiveTransactivation of PTHrP by Tax was seen in T ...positive0.979695Transactivation of PTHrP by Tax was seen in T ...[0.019169364124536514, -0.03477424755692482, 0...
42positiveHere we report that human leukaemia cells carr...positive0.962115Here we report that human leukaemia cells carr...[0.00010157756332773715, 0.006516697350889444,...
43negativeFurthermore, the hemin-induced accumulation of...positive0.972569Furthermore, the hemin-induced accumulation of...[0.063593789935112, 0.043789442628622055, 0.00...
44positiveNAC had no effect on the half-life of E-select...positive0.977131NAC had no effect on the half-life of E-select...[0.0225575752556324, 0.04326668754220009, -0.0...
45positiveGranulocyte/macrophage colony-stimulating fact...positive0.970762Granulocyte/macrophage colony-stimulating fact...[0.04828358814120293, -0.006254654377698898, -...
46positiveWe have shown that 1-(5-chloronaphthalene-1-su...positive0.974028We have shown that 1-(5-chloronaphthalene-1-su...[0.02836417406797409, 0.0573764331638813, 0.01...
47negativeDuring the course of serious bacterial infecti...positive0.908578During the course of serious bacterial infecti...[0.04267750680446625, 0.03990871459245682, -0....
48positiveWe here report that (i) like PML-RAR alpha exp...positive0.979658We here report that (i) like PML-RAR alpha exp...[0.018129097297787666, -0.04197037220001221, 0...
49positiveAnalysis of Tax mutants showed that two mutant...positive0.961423Analysis of Tax mutants showed that two mutant...[0.06011602282524109, -0.02545783296227455, -0...
\n","
"],"text/plain":[" y ... default_name_embeddings\n","origin_index ... \n","0 positive ... [0.020294584333896637, -0.003580901538953185, ...\n","1 positive ... [0.03842758387327194, 0.004267632961273193, 0....\n","2 negative ... [0.04114547371864319, -0.011271283961832523, 0...\n","3 negative ... [-0.008668724447488785, 0.004286200739443302, ...\n","4 positive ... [0.03153171390295029, 0.0300369281321764, 0.01...\n","5 positive ... [0.026832187548279762, 0.06317257136106491, -0...\n","6 positive ... [0.03458377718925476, 0.0585494302213192, 0.03...\n","7 positive ... [-0.01714322902262211, 0.021471993997693062, -...\n","8 positive ... [0.022129273042082787, -0.05606718361377716, 0...\n","9 positive ... [0.047291748225688934, -0.007037220057100058, ...\n","10 negative ... [0.07178540527820587, 0.015175681561231613, -0...\n","11 positive ... [0.053061071783304214, 0.02100301906466484, -0...\n","12 positive ... [0.02914738841354847, 0.04804287105798721, 0.0...\n","13 negative ... [0.05932210013270378, 0.06283659487962723, 0.0...\n","14 positive ... [0.019155262038111687, 0.024585263803601265, 0...\n","15 positive ... [0.06592310220003128, -0.04704660177230835, -0...\n","16 positive ... [0.03582179546356201, 0.02672964334487915, 0.0...\n","17 negative ... [0.05369773879647255, 0.0632983073592186, 0.00...\n","18 positive ... [0.0348842516541481, 0.03341799974441528, -0.0...\n","19 negative ... [0.05550860986113548, -0.05578289553523064, -0...\n","20 negative ... [0.030753592029213905, 0.030849099159240723, -...\n","21 positive ... [0.0430801659822464, -0.06272805482149124, -0....\n","22 positive ... [0.028597429394721985, 0.06279577314853668, -0...\n","23 positive ... [0.00441081915050745, 0.021023083478212357, -0...\n","24 negative ... [0.037766434252262115, -0.01205922570079565, -...\n","25 positive ... [0.01163046807050705, 0.04765436425805092, 0.0...\n","26 negative ... [0.0460846908390522, 0.054417867213487625, 0.0...\n","27 negative ... [0.013370374217629433, -0.028526239097118378, ...\n","28 positive ... [0.001402425579726696, 0.058718983083963394, -...\n","29 negative ... [-0.007175407372415066, 0.011748190969228745, ...\n","30 positive ... [0.011392833665013313, -0.023685814812779427, ...\n","31 positive ... [0.04663031920790672, -0.011451571248471737, -...\n","32 positive ... [0.020160771906375885, 0.011534353718161583, 0...\n","33 positive ... [0.0475110188126564, -0.01823158748447895, 0.0...\n","34 positive ... [0.039186377078294754, 0.030655119568109512, -...\n","35 negative ... [0.039589762687683105, 0.03933874890208244, -0...\n","36 negative ... [0.021247662603855133, 0.015753811225295067, 0...\n","37 negative ... [0.05650434270501137, 0.005788780748844147, -0...\n","38 negative ... [0.017783455550670624, -0.026092059910297394, ...\n","39 negative ... [0.033542290329933167, 0.006205122452229261, 0...\n","40 negative ... [0.04365483671426773, 0.023773200809955597, -0...\n","41 positive ... [0.019169364124536514, -0.03477424755692482, 0...\n","42 positive ... [0.00010157756332773715, 0.006516697350889444,...\n","43 negative ... [0.063593789935112, 0.043789442628622055, 0.00...\n","44 positive ... [0.0225575752556324, 0.04326668754220009, -0.0...\n","45 positive ... [0.04828358814120293, -0.006254654377698898, -...\n","46 positive ... [0.02836417406797409, 0.0573764331638813, 0.01...\n","47 negative ... [0.04267750680446625, 0.03990871459245682, -0....\n","48 positive ... [0.018129097297787666, -0.04197037220001221, 0...\n","49 positive ... [0.06011602282524109, -0.02545783296227455, -0...\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# 4. Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1613511988627,"user_tz":-300,"elapsed":3430,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ea26e4fd-de90-4d43-b39b-16b313b91ff9"},"source":["fitted_pipe.predict(\"The virus had a direct impact on the nervous system\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentimentsentiment_confidencedocumentdefault_name_embeddings
origin_index
0positive0.936225The virus had a direct impact on the nervous s...[0.005800435785204172, 0.025611303746700287, -...
\n","
"],"text/plain":[" sentiment ... default_name_embeddings\n","origin_index ... \n","0 positive ... [0.005800435785204172, 0.025611303746700287, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## 5. Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1613511989123,"user_tz":-300,"elapsed":609,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3b24ad6b-5e8a-4abb-946b-9c18dce55dcf"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setLoadSP(False) | Info: Whether to load SentencePiece ops file which is required only by multi-lingual models. This is not changeable after it's set with a pretrained model nor it is compatible with Windows. | Currently set to : False\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## 6. Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1613512001863,"user_tz":-300,"elapsed":6700,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"33ca4330-d026-4b6e-ac70-64a0edadc2e6"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.00 0.00 0.00 19\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.72 1.00 0.84 31\n","\n"," accuracy 0.62 50\n"," macro avg 0.24 0.33 0.28 50\n","weighted avg 0.45 0.62 0.52 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytextsentimentsentiment_confidencedocumentdefault_name_embeddings
origin_index
0positiveInterestingly, phase 1 adhesion was inversely ...positive0.858951Interestingly, phase 1 adhesion was inversely ...[0.020294584333896637, -0.003580901538953185, ...
1positivePBL constitutively express c-jun transcripts, ...positive0.900715PBL constitutively express c-jun transcripts, ...[0.03842758387327194, 0.004267632961273193, 0....
2negativeErbstatin and genistein inhibited the sIg-cros...positive0.787789Erbstatin and genistein inhibited the sIg-cros...[0.04114547371864319, -0.011271283961832523, 0...
3negativeRecombinant I kappa B-alpha(MAD-3) inhibited t...positive0.740510Recombinant I kappa B-alpha(MAD-3) inhibited t...[-0.008668724447488785, 0.004286200739443302, ...
4positiveUsing electrophoretic mobility shift analysis ...positive0.865358Using electrophoretic mobility shift analysis ...[0.03153171390295029, 0.0300369281321764, 0.01...
5positiveEach binding site contributes to the overall a...positive0.885173Each binding site contributes to the overall a...[0.026832187548279762, 0.06317257136106491, -0...
6positiveIn this investigation an enhanced and prolonge...positive0.839118In this investigation an enhanced and prolonge...[0.03458377718925476, 0.0585494302213192, 0.03...
7positiveThe average values from healthy volunteers did...positive0.775531The average values from healthy volunteers did...[-0.01714322902262211, 0.021471993997693062, -...
8positiveThese studies have been extended to show that ...positive0.766982These studies have been extended to show that ...[0.022129273042082787, -0.05606718361377716, 0...
9positiveIn a variety of other cell types, which displa...positive0.895120In a variety of other cell types, which displa...[0.047291748225688934, -0.007037220057100058, ...
10negativeDose-response experiments suggested that stero...positive0.613039Dose-response experiments suggested that stero...[0.07178540527820587, 0.015175681561231613, -0...
11positiveCompounds that interfere selectively with the ...positive0.823946Compounds that interfere selectively with the ...[0.053061071783304214, 0.02100301906466484, -0...
12positiveTo assess whether this inhibition was mediated...positive0.834207To assess whether this inhibition was mediated...[0.02914738841354847, 0.04804287105798721, 0.0...
13negativeFreshly isolated T cells from lupus patients, ...neutral0.585066Freshly isolated T cells from lupus patients, ...[0.05932210013270378, 0.06283659487962723, 0.0...
14positiveIn EMSAs, NF(P) binding was inhibited by the u...positive0.921363In EMSAs, NF(P) binding was inhibited by the u...[0.019155262038111687, 0.024585263803601265, 0...
15positivec-myc did not correlate with c-fos and c-jun p...positive0.831225c-myc did not correlate with c-fos and c-jun p...[0.06592310220003128, -0.04704660177230835, -0...
16positiveInterleukin-2 (IL-2) promoter activity stimula...positive0.833683Interleukin-2 (IL-2) promoter activity stimula...[0.03582179546356201, 0.02672964334487915, 0.0...
17negativeIn this study, we examined seven EBV-positive ...neutral0.544334In this study, we examined seven EBV-positive ...[0.05369773879647255, 0.0632983073592186, 0.00...
18positivePhosphorylated Bcl-2 cannot bind calcineurin, ...positive0.862028Phosphorylated Bcl-2 cannot bind calcineurin, ...[0.0348842516541481, 0.03341799974441528, -0.0...
19negativeThere is a single methionine codon-initiated o...positive0.738093There is a single methionine codon-initiated o...[0.05550860986113548, -0.05578289553523064, -0...
20negativeNasal NK- and T-cell lymphomas share the same ...neutral0.525487Nasal NK- and T-cell lymphomas share the same ...[0.030753592029213905, 0.030849099159240723, -...
21positiveThus, the requirement for CD28 in IL-2 promote...positive0.925509Thus, the requirement for CD28 in IL-2 promote...[0.0430801659822464, -0.06272805482149124, -0....
22positiveNeither of the two toxins affected kappaB bind...positive0.884952Neither of the two toxins affected kappaB bind...[0.028597429394721985, 0.06279577314853668, -0...
23positiveWe conclude that downregulation of WT1 during ...positive0.796671We conclude that downregulation of WT1 during ...[0.00441081915050745, 0.021023083478212357, -0...
24negativeThe former can become latently infected and ar...neutral0.579465The former can become latently infected and ar...[0.037766434252262115, -0.01205922570079565, -...
25positiveDeletion of the 5'-most Ets site had no effect...positive0.941455Deletion of the 5'-most Ets site had no effect...[0.01163046807050705, 0.04765436425805092, 0.0...
26negativeA number of recent studies have described inte...positive0.606441A number of recent studies have described inte...[0.0460846908390522, 0.054417867213487625, 0.0...
27negativeWe suggest that extinction of Ig genes is part...neutral0.585354We suggest that extinction of Ig genes is part...[0.013370374217629433, -0.028526239097118378, ...
28positiveIn some patients, no activation of RelA/NFkapp...positive0.912251In some patients, no activation of RelA/NFkapp...[0.001402425579726696, 0.058718983083963394, -...
29negativeThese may partly account for the differential ...positive0.695873These may partly account for the differential ...[-0.007175407372415066, 0.011748190969228745, ...
30positiveSimilarly, in transfection experiments in whic...positive0.870236Similarly, in transfection experiments in whic...[0.011392833665013313, -0.023685814812779427, ...
31positiveHowever, the molecular basis for the commitmen...positive0.826238However, the molecular basis for the commitmen...[0.04663031920790672, -0.011451571248471737, -...
32positiveIn vitro translated MAD-3 protein was found to...positive0.814683In vitro translated MAD-3 protein was found to...[0.020160771906375885, 0.011534353718161583, 0...
33positiveWe also tested for the ability of defensins NP...positive0.843203We also tested for the ability of defensins NP...[0.0475110188126564, -0.01823158748447895, 0.0...
34positiveIn the absence of TCR-mediated activation, Vpr...positive0.888412In the absence of TCR-mediated activation, Vpr...[0.039186377078294754, 0.030655119568109512, -...
35negativeConversely, IL-4 and IL-13 markedly and signif...positive0.608741Conversely, IL-4 and IL-13 markedly and signif...[0.039589762687683105, 0.03933874890208244, -0...
36negativeThese results suggest that secretion of PGE2 b...neutral0.597105These results suggest that secretion of PGE2 b...[0.021247662603855133, 0.015753811225295067, 0...
37negativeLike other enhancers, the Ig heavy chain enhan...positive0.766628Like other enhancers, the Ig heavy chain enhan...[0.05650434270501137, 0.005788780748844147, -0...
38negativeA principal objective of the present study was...positive0.733849A principal objective of the present study was...[0.017783455550670624, -0.026092059910297394, ...
39negativeTo investigate whether the molar ratio of HSF2...positive0.797511To investigate whether the molar ratio of HSF2...[0.033542290329933167, 0.006205122452229261, 0...
40negativeThe expression of the gene encoding the granul...positive0.601104The expression of the gene encoding the granul...[0.04365483671426773, 0.023773200809955597, -0...
41positiveTransactivation of PTHrP by Tax was seen in T ...positive0.909007Transactivation of PTHrP by Tax was seen in T ...[0.019169364124536514, -0.03477424755692482, 0...
42positiveHere we report that human leukaemia cells carr...positive0.704064Here we report that human leukaemia cells carr...[0.00010157756332773715, 0.006516697350889444,...
43negativeFurthermore, the hemin-induced accumulation of...positive0.768831Furthermore, the hemin-induced accumulation of...[0.063593789935112, 0.043789442628622055, 0.00...
44positiveNAC had no effect on the half-life of E-select...positive0.834020NAC had no effect on the half-life of E-select...[0.0225575752556324, 0.04326668754220009, -0.0...
45positiveGranulocyte/macrophage colony-stimulating fact...positive0.836410Granulocyte/macrophage colony-stimulating fact...[0.04828358814120293, -0.006254654377698898, -...
46positiveWe have shown that 1-(5-chloronaphthalene-1-su...positive0.762240We have shown that 1-(5-chloronaphthalene-1-su...[0.02836417406797409, 0.0573764331638813, 0.01...
47negativeDuring the course of serious bacterial infecti...neutral0.525606During the course of serious bacterial infecti...[0.04267750680446625, 0.03990871459245682, -0....
48positiveWe here report that (i) like PML-RAR alpha exp...positive0.849820We here report that (i) like PML-RAR alpha exp...[0.018129097297787666, -0.04197037220001221, 0...
49positiveAnalysis of Tax mutants showed that two mutant...positive0.872556Analysis of Tax mutants showed that two mutant...[0.06011602282524109, -0.02545783296227455, -0...
\n","
"],"text/plain":[" y ... default_name_embeddings\n","origin_index ... \n","0 positive ... [0.020294584333896637, -0.003580901538953185, ...\n","1 positive ... [0.03842758387327194, 0.004267632961273193, 0....\n","2 negative ... [0.04114547371864319, -0.011271283961832523, 0...\n","3 negative ... [-0.008668724447488785, 0.004286200739443302, ...\n","4 positive ... [0.03153171390295029, 0.0300369281321764, 0.01...\n","5 positive ... [0.026832187548279762, 0.06317257136106491, -0...\n","6 positive ... [0.03458377718925476, 0.0585494302213192, 0.03...\n","7 positive ... [-0.01714322902262211, 0.021471993997693062, -...\n","8 positive ... [0.022129273042082787, -0.05606718361377716, 0...\n","9 positive ... [0.047291748225688934, -0.007037220057100058, ...\n","10 negative ... [0.07178540527820587, 0.015175681561231613, -0...\n","11 positive ... [0.053061071783304214, 0.02100301906466484, -0...\n","12 positive ... [0.02914738841354847, 0.04804287105798721, 0.0...\n","13 negative ... [0.05932210013270378, 0.06283659487962723, 0.0...\n","14 positive ... [0.019155262038111687, 0.024585263803601265, 0...\n","15 positive ... [0.06592310220003128, -0.04704660177230835, -0...\n","16 positive ... [0.03582179546356201, 0.02672964334487915, 0.0...\n","17 negative ... [0.05369773879647255, 0.0632983073592186, 0.00...\n","18 positive ... [0.0348842516541481, 0.03341799974441528, -0.0...\n","19 negative ... [0.05550860986113548, -0.05578289553523064, -0...\n","20 negative ... [0.030753592029213905, 0.030849099159240723, -...\n","21 positive ... [0.0430801659822464, -0.06272805482149124, -0....\n","22 positive ... [0.028597429394721985, 0.06279577314853668, -0...\n","23 positive ... [0.00441081915050745, 0.021023083478212357, -0...\n","24 negative ... [0.037766434252262115, -0.01205922570079565, -...\n","25 positive ... [0.01163046807050705, 0.04765436425805092, 0.0...\n","26 negative ... [0.0460846908390522, 0.054417867213487625, 0.0...\n","27 negative ... [0.013370374217629433, -0.028526239097118378, ...\n","28 positive ... [0.001402425579726696, 0.058718983083963394, -...\n","29 negative ... [-0.007175407372415066, 0.011748190969228745, ...\n","30 positive ... [0.011392833665013313, -0.023685814812779427, ...\n","31 positive ... [0.04663031920790672, -0.011451571248471737, -...\n","32 positive ... [0.020160771906375885, 0.011534353718161583, 0...\n","33 positive ... [0.0475110188126564, -0.01823158748447895, 0.0...\n","34 positive ... [0.039186377078294754, 0.030655119568109512, -...\n","35 negative ... [0.039589762687683105, 0.03933874890208244, -0...\n","36 negative ... [0.021247662603855133, 0.015753811225295067, 0...\n","37 negative ... [0.05650434270501137, 0.005788780748844147, -0...\n","38 negative ... [0.017783455550670624, -0.026092059910297394, ...\n","39 negative ... [0.033542290329933167, 0.006205122452229261, 0...\n","40 negative ... [0.04365483671426773, 0.023773200809955597, -0...\n","41 positive ... [0.019169364124536514, -0.03477424755692482, 0...\n","42 positive ... [0.00010157756332773715, 0.006516697350889444,...\n","43 negative ... [0.063593789935112, 0.043789442628622055, 0.00...\n","44 positive ... [0.0225575752556324, 0.04326668754220009, -0.0...\n","45 positive ... [0.04828358814120293, -0.006254654377698898, -...\n","46 positive ... [0.02836417406797409, 0.0573764331638813, 0.01...\n","47 negative ... [0.04267750680446625, 0.03990871459245682, -0....\n","48 positive ... [0.018129097297787666, -0.04197037220001221, 0...\n","49 positive ... [0.06011602282524109, -0.02545783296227455, -0...\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# 7. Try training with different Embeddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nxWFzQOhjWC8","executionInfo":{"status":"ok","timestamp":1613512013319,"user_tz":-300,"elapsed":983,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b6e7dea3-3f5e-4f3d-a66b-b0f05fa799d9"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1614243224688,"user_tz":-300,"elapsed":1555054,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1b70666f-d178-403d-e43a-e7897625a03a"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(120) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.95 0.86 0.90 793\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.91 0.92 0.92 807\n","\n"," accuracy 0.89 1600\n"," macro avg 0.62 0.59 0.61 1600\n","weighted avg 0.93 0.89 0.91 1600\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"_1jxw3GnVGlI"},"source":["# 7.1 evaluate on Test Data"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Fxx4yNkNVGFl","executionInfo":{"status":"ok","timestamp":1614243593910,"user_tz":-300,"elapsed":367888,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e73da2f7-91f6-4107-e896-34afc9397a8b"},"source":["preds = fitted_pipe.predict(test_df,output_level='document')\r\n","\r\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\r\n","preds.dropna(inplace=True)\r\n","print(classification_report(preds['y'], preds['sentiment']))"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.93 0.86 0.89 207\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.91 0.87 0.89 193\n","\n"," accuracy 0.87 400\n"," macro avg 0.61 0.58 0.59 400\n","weighted avg 0.92 0.87 0.89 400\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 8. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1613515743123,"user_tz":-300,"elapsed":205815,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"70eca3dc-2a52-42ec-e19d-dfd67c141f97"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 9. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1613515758156,"user_tz":-300,"elapsed":220493,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ce0620e9-6cae-4e55-e873-d0079cfa028e"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('The virus had a direct impact on the nervous system')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidenceen_embed_sentence_small_bert_L12_768_embeddingssentimentdocument
origin_index
00.999432[0.19975340366363525, 0.40417471528053284, 0.3...negativeThe virus had a direct impact on the nervous s...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999432 ... The virus had a direct impact on the nervous s...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1613515758158,"user_tz":-300,"elapsed":220271,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c04fd8ca-9a81-4356-db41-121becaf6cd4"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this SentimentDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/binary_text_classification/NLU_training_sarcasam_classifier_demo_news_headlines.ipynb b/examples/colab/Training/binary_text_classification/NLU_training_sarcasam_classifier_demo_news_headlines.ipynb new file mode 100644 index 00000000..8a84b8fe --- /dev/null +++ b/examples/colab/Training/binary_text_classification/NLU_training_sarcasam_classifier_demo_news_headlines.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sarcasam_classifier_demo_news_headlines.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sarcasam_classifier_demo_news_headlines.ipynb)\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 Class News Headlines Sarcasam Training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","You can achieve these results or even better on this dataset with training data:\n","\n","\n","
\n","\n","![img.png]()\n","\n","You can achieve these results or even better on this dataset with test data:\n","\n","\n","
\n","\n","![Screenshot 2021-02-25 150812.png]()\n","\n","\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","\n","! pip install nlu pyspark==2.4.7 > /dev/null \n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download News Headlines Sarcsam dataset \n","https://www.kaggle.com/rmisra/news-headlines-dataset-for-sarcasm-detection\n","#Context\n","Past studies in Sarcasm Detection mostly make use of Twitter datasets collected using hashtag based supervision but such datasets are noisy in terms of labels and language. Furthermore, many tweets are replies to other tweets and detecting sarcasm in these requires the availability of contextual tweets.\n","\n","To overcome the limitations related to noise in Twitter datasets, this News Headlines dataset for Sarcasm Detection is collected from two news website. TheOnion aims at producing sarcastic versions of current events and we collected all the headlines from News in Brief and News in Photos categories (which are sarcastic). We collect real (and non-sarcastic) news headlines from HuffPost.\n","\n","This new dataset has following advantages over the existing Twitter datasets:\n","\n","Since news headlines are written by professionals in a formal manner, there are no spelling mistakes and informal usage. This reduces the sparsity and also increases the chance of finding pre-trained embeddings.\n","\n","Furthermore, since the sole purpose of TheOnion is to publish sarcastic news, we get high-quality labels with much less noise as compared to Twitter datasets.\n","\n","Unlike tweets which are replies to other tweets, the news headlines we obtained are self-contained. This would help us in teasing apart the real sarcastic elements.\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1614242478532,"user_tz":-300,"elapsed":78739,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"42b94da2-ea42-4895-899a-09d98141d256"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/02/Sarcasm_Headlines_Dataset_v2.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-02-25 08:40:07-- http://ckl-it.de/wp-content/uploads/2021/02/Sarcasm_Headlines_Dataset_v2.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 2381880 (2.3M) [text/csv]\n","Saving to: ‘Sarcasm_Headlines_Dataset_v2.csv’\n","\n","Sarcasm_Headlines_D 100%[===================>] 2.27M 2.68MB/s in 0.8s \n","\n","2021-02-25 08:40:09 (2.68 MB/s) - ‘Sarcasm_Headlines_Dataset_v2.csv’ saved [2381880/2381880]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":399},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1614242478540,"user_tz":-300,"elapsed":78735,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"69286e1f-6b94-4cac-b2d2-ed85a2900dd4"},"source":["import pandas as pd\n","test_path = '/content/Sarcasm_Headlines_Dataset_v2.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","from sklearn.model_selection import train_test_split\n","\n","train_df, test_df = train_test_split(train_df, test_size=0.2)\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytext
699negativedonald trump's 'do not congratulate' putin blu...
9614negativethe best clothes for the man-child in your life
3202positivereport: good thing world has unlimited quantit...
8497negativehow a family's lack of access to medical marij...
3328negativesean bean's most memorable death wasn't 'lord ...
.........
5893positiveolympic speed skater thinking about maybe taki...
3905positivedick durbin wakes up chained to radiator with ...
8607negativetaraji p. henson reacts to first family's thou...
4404negativeparenting in the time of viral
1690positivekavanaugh surprised senate not questioning fac...
\n","

8000 rows × 2 columns

\n","
"],"text/plain":[" y text\n","699 negative donald trump's 'do not congratulate' putin blu...\n","9614 negative the best clothes for the man-child in your life\n","3202 positive report: good thing world has unlimited quantit...\n","8497 negative how a family's lack of access to medical marij...\n","3328 negative sean bean's most memorable death wasn't 'lord ...\n","... ... ...\n","5893 positive olympic speed skater thinking about maybe taki...\n","3905 positive dick durbin wakes up chained to radiator with ...\n","8607 negative taraji p. henson reacts to first family's thou...\n","4404 negative parenting in the time of viral\n","1690 positive kavanaugh surprised senate not questioning fac...\n","\n","[8000 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1613513699785,"user_tz":-300,"elapsed":107605,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f7071472-6bfe-493a-9c9a-0c67c4a28b83"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 1.00 0.54 0.70 26\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.96 0.96 0.96 24\n","\n"," accuracy 0.74 50\n"," macro avg 0.65 0.50 0.55 50\n","weighted avg 0.98 0.74 0.82 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
default_name_embeddingsdocumentsentiment_confidencetextysentiment
origin_index
0[0.03101399727165699, -0.06800110638141632, -0...briefcase full of porn0.681521briefcase full of pornpositivepositive
1[-0.04206841439008713, 0.014935214072465897, 0...here's carrie fisher in 'one of the most enter...0.784738here's carrie fisher in 'one of the most enter...negativenegative
2[0.05720885097980499, -0.004087412729859352, -...transplanted new yorker disappointed with loca...0.749485transplanted new yorker disappointed with loca...positivepositive
3[-0.06800246238708496, -0.01590467058122158, -...this 'catfight' clip pauses anne heche and san...0.719429this 'catfight' clip pauses anne heche and san...negativenegative
4[-0.010691618546843529, -0.011138608679175377,...congress establishes bill suggestion hotline0.794444congress establishes bill suggestion hotlinepositivepositive
5[0.06491611897945404, -0.022880295291543007, -...ben carson suggests obama's iran deal is 'anti...0.655964ben carson suggests obama's iran deal is 'anti...negativenegative
6[-0.06249837204813957, 0.026482177898287773, 0...eden baylee is a stranger at sunset0.708668eden baylee is a stranger at sunsetnegativenegative
7[-0.03163134679198265, 0.034580837935209274, 0...lois gibbs: 'the government wouldn't help me, ...0.684051lois gibbs: 'the government wouldn't help me, ...negativenegative
8[0.05561395362019539, -0.04143591225147247, -0...time-warner ceo announces plans to merge with ...0.807884time-warner ceo announces plans to merge with ...positivepositive
9[-0.0431799553334713, -0.046702779829502106, -...typhoon meranti slams into china causing mayhem0.653778typhoon meranti slams into china causing mayhemnegativepositive
10[0.01190741267055273, -0.038865745067596436, -...raid on nacho-supremacist compound uncovers gu...0.686112raid on nacho-supremacist compound uncovers gu...positivepositive
11[0.0011607632040977478, 0.03130388632416725, -...goose suddenly realizes it doesn't have to hon...0.662792goose suddenly realizes it doesn't have to hon...positivepositive
12[0.038939230144023895, -0.028029074892401695, ...frantic, last-second study finds old-fashioned...0.746304frantic, last-second study finds old-fashioned...positivepositive
13[-0.023152370005846024, -0.041393812745809555,...couple discovers shop that sells cakes0.640520couple discovers shop that sells cakespositivepositive
14[0.010137411765754223, -0.06706183403730392, -...frito-lay targets blacks with new menthol doritos0.731602frito-lay targets blacks with new menthol doritospositivepositive
15[0.037099793553352356, 0.02509041130542755, -0...all u.s. males renamed dudley0.655183all u.s. males renamed dudleypositivepositive
16[-0.05982975661754608, -0.04541410878300667, -...xabraxian astronomers discover new planet0.774594xabraxian astronomers discover new planetpositivepositive
17[0.01637415960431099, 0.03079209476709366, -0....obama blasts afghans for expelling reporter --...0.556610obama blasts afghans for expelling reporter --...negativeneutral
18[0.053151000291109085, -0.04056103900074959, 0...riz ahmed, mindy kaling, aziz ansari and other...0.672518riz ahmed, mindy kaling, aziz ansari and other...negativenegative
19[-0.005488495342433453, 0.009531817398965359, ...human slave from future remembers when cyber m...0.659418human slave from future remembers when cyber m...positivepositive
20[-0.0410454235970974, -0.04932034760713577, -0...toward a fairer admissions process0.599870toward a fairer admissions processnegativeneutral
21[0.015400867909193039, 0.010604296810925007, 0...cupid cop gave out roses, cards on valentine's...0.562837cupid cop gave out roses, cards on valentine's...negativeneutral
22[-0.011158468201756477, 0.047883182764053345, ...new york to host 1998 ill-will games0.753071new york to host 1998 ill-will gamespositivepositive
23[0.01367797702550888, -0.040253281593322754, -...kendall jenner is 'wonder'-fully blonde in vogue0.737244kendall jenner is 'wonder'-fully blonde in voguenegativenegative
24[-0.07428913563489914, -0.052280243486166, -0....family hoping mother knows birthday nature wal...0.606846family hoping mother knows birthday nature wal...positivepositive
25[-0.0625331699848175, 0.02265137806534767, -0....senate votes 64-36, not sure on what0.750950senate votes 64-36, not sure on whatpositivepositive
26[0.018849631771445274, -0.05681510269641876, -...'the interview' is having a very good weekend ...0.703691'the interview' is having a very good weekend ...negativenegative
27[-0.006397973746061325, -0.01870238408446312, ...mom uses face-painting skills to turn kids int...0.657493mom uses face-painting skills to turn kids int...negativenegative
28[-0.03488464280962944, -0.08252372592687607, -...does policing summons warrants really prevent ...0.535460does policing summons warrants really prevent ...negativeneutral
29[0.019256893545389175, -0.04089582711458206, -...urban polling centers recommend voters start l...0.723588urban polling centers recommend voters start l...positivepositive
30[-0.06287706643342972, -0.019301993772387505, ...deadlocked supreme court: 'someone's voting tw...0.675472deadlocked supreme court: 'someone's voting tw...positivepositive
31[-0.01678450219333172, 0.01658053882420063, -0...the gop has a split personality when it comes ...0.513112the gop has a split personality when it comes ...negativeneutral
32[0.026957523077726364, 0.04000873491168022, -0...world leaders pour into washington to pay last...0.762945world leaders pour into washington to pay last...positivepositive
33[0.07982629537582397, -0.0787980854511261, -0....jyothi rao: on threads of authenticity0.658911jyothi rao: on threads of authenticitynegativenegative
34[0.023493440821766853, -0.0007498250342905521,...bruce jenner okay following deadly car crash0.691150bruce jenner okay following deadly car crashnegativenegative
35[0.033786237239837646, -0.04347962886095047, 0...a guide to sex at 50 and beyond0.559358a guide to sex at 50 and beyondnegativeneutral
36[-0.064402736723423, 0.025162827223539352, 0.0...you may have missed the 6th woman on time's pe...0.689836you may have missed the 6th woman on time's pe...negativenegative
37[-0.002320488216355443, 0.008552802726626396, ...texas moves to block medicaid funding for plan...0.578904texas moves to block medicaid funding for plan...negativeneutral
38[-0.0027419731486588717, -0.06013084575533867,...federal government to be run by cheaper mexica...0.778061federal government to be run by cheaper mexica...positivepositive
39[0.01821870170533657, -0.014606344513595104, -...dog doesn't consider itself part of family0.667528dog doesn't consider itself part of familypositivepositive
40[-0.04760941118001938, -0.01184796541929245, 0...report: doing your part to stop climate change...0.804057report: doing your part to stop climate change...positivepositive
41[0.038119763135910034, -0.0385950542986393, -0...breitbart traffic down as readers now getting ...0.710686breitbart traffic down as readers now getting ...positivepositive
42[-0.020920872688293457, -0.041152868419885635,...obama: i am where i am today because of voting...0.529584obama: i am where i am today because of voting...negativeneutral
43[0.08012807369232178, 0.05087059363722801, -0....kentucky's gop bromance deepens, even without ...0.578254kentucky's gop bromance deepens, even without ...negativeneutral
44[-0.021665776148438454, -0.02926868386566639, ...stop calling young adults \"college kids\"0.535330stop calling young adults \"college kids\"negativeneutral
45[0.03609812259674072, 0.007918721996247768, -0...the real reason trump can't break the gop0.664778the real reason trump can't break the gopnegativenegative
46[-0.05974249914288521, -0.009394897148013115, ...testing: enhanced interrogation in the classroom0.691502testing: enhanced interrogation in the classroomnegativenegative
47[0.028496630489826202, 0.004139357712119818, -...disembodied voice in elevator wants to know wa...0.715279disembodied voice in elevator wants to know wa...positivepositive
48[0.017045874148607254, -0.045841291546821594, ...a classic jason somehow gets mixed into area m...0.585903a classic jason somehow gets mixed into area m...positiveneutral
49[-0.039329949766397476, -0.03347548097372055, ...wisconsin students trumped the rest with their...0.539730wisconsin students trumped the rest with their...negativeneutral
\n","
"],"text/plain":[" default_name_embeddings ... sentiment\n","origin_index ... \n","0 [0.03101399727165699, -0.06800110638141632, -0... ... positive\n","1 [-0.04206841439008713, 0.014935214072465897, 0... ... negative\n","2 [0.05720885097980499, -0.004087412729859352, -... ... positive\n","3 [-0.06800246238708496, -0.01590467058122158, -... ... negative\n","4 [-0.010691618546843529, -0.011138608679175377,... ... positive\n","5 [0.06491611897945404, -0.022880295291543007, -... ... negative\n","6 [-0.06249837204813957, 0.026482177898287773, 0... ... negative\n","7 [-0.03163134679198265, 0.034580837935209274, 0... ... negative\n","8 [0.05561395362019539, -0.04143591225147247, -0... ... positive\n","9 [-0.0431799553334713, -0.046702779829502106, -... ... positive\n","10 [0.01190741267055273, -0.038865745067596436, -... ... positive\n","11 [0.0011607632040977478, 0.03130388632416725, -... ... positive\n","12 [0.038939230144023895, -0.028029074892401695, ... ... positive\n","13 [-0.023152370005846024, -0.041393812745809555,... ... positive\n","14 [0.010137411765754223, -0.06706183403730392, -... ... positive\n","15 [0.037099793553352356, 0.02509041130542755, -0... ... positive\n","16 [-0.05982975661754608, -0.04541410878300667, -... ... positive\n","17 [0.01637415960431099, 0.03079209476709366, -0.... ... neutral\n","18 [0.053151000291109085, -0.04056103900074959, 0... ... negative\n","19 [-0.005488495342433453, 0.009531817398965359, ... ... positive\n","20 [-0.0410454235970974, -0.04932034760713577, -0... ... neutral\n","21 [0.015400867909193039, 0.010604296810925007, 0... ... neutral\n","22 [-0.011158468201756477, 0.047883182764053345, ... ... positive\n","23 [0.01367797702550888, -0.040253281593322754, -... ... negative\n","24 [-0.07428913563489914, -0.052280243486166, -0.... ... positive\n","25 [-0.0625331699848175, 0.02265137806534767, -0.... ... positive\n","26 [0.018849631771445274, -0.05681510269641876, -... ... negative\n","27 [-0.006397973746061325, -0.01870238408446312, ... ... negative\n","28 [-0.03488464280962944, -0.08252372592687607, -... ... neutral\n","29 [0.019256893545389175, -0.04089582711458206, -... ... positive\n","30 [-0.06287706643342972, -0.019301993772387505, ... ... positive\n","31 [-0.01678450219333172, 0.01658053882420063, -0... ... neutral\n","32 [0.026957523077726364, 0.04000873491168022, -0... ... positive\n","33 [0.07982629537582397, -0.0787980854511261, -0.... ... negative\n","34 [0.023493440821766853, -0.0007498250342905521,... ... negative\n","35 [0.033786237239837646, -0.04347962886095047, 0... ... neutral\n","36 [-0.064402736723423, 0.025162827223539352, 0.0... ... negative\n","37 [-0.002320488216355443, 0.008552802726626396, ... ... neutral\n","38 [-0.0027419731486588717, -0.06013084575533867,... ... positive\n","39 [0.01821870170533657, -0.014606344513595104, -... ... positive\n","40 [-0.04760941118001938, -0.01184796541929245, 0... ... positive\n","41 [0.038119763135910034, -0.0385950542986393, -0... ... positive\n","42 [-0.020920872688293457, -0.041152868419885635,... ... neutral\n","43 [0.08012807369232178, 0.05087059363722801, -0.... ... neutral\n","44 [-0.021665776148438454, -0.02926868386566639, ... ... neutral\n","45 [0.03609812259674072, 0.007918721996247768, -0... ... negative\n","46 [-0.05974249914288521, -0.009394897148013115, ... ... negative\n","47 [0.028496630489826202, 0.004139357712119818, -... ... positive\n","48 [0.017045874148607254, -0.045841291546821594, ... ... neutral\n","49 [-0.039329949766397476, -0.03347548097372055, ... ... neutral\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# 4. Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1613513707167,"user_tz":-300,"elapsed":3892,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a5a0e00d-fb28-4378-b9cb-5c8cfff5dc78"},"source":["fitted_pipe.predict('Aliens are immortal!')\r\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
default_name_embeddingsdocumentsentiment_confidencesentiment
origin_index
0[-0.0700131505727768, -0.06706050038337708, -0...Aliens are immortal!0.664938positive
\n","
"],"text/plain":[" default_name_embeddings ... sentiment\n","origin_index ... \n","0 [-0.0700131505727768, -0.06706050038337708, -0... ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## 5. Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1613513707169,"user_tz":-300,"elapsed":3879,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7e345cbe-e153-4da1-9655-119ba6e0acaf"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setLoadSP(False) | Info: Whether to load SentencePiece ops file which is required only by multi-lingual models. This is not changeable after it's set with a pretrained model nor it is compatible with Windows. | Currently set to : False\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## 6. Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1613513714606,"user_tz":-300,"elapsed":11302,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"baa0d5ed-0008-420c-d50b-3df30a673603"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 1.00 0.96 0.98 26\n"," positive 0.96 1.00 0.98 24\n","\n"," accuracy 0.98 50\n"," macro avg 0.98 0.98 0.98 50\n","weighted avg 0.98 0.98 0.98 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
default_name_embeddingsdocumentsentiment_confidencetextysentiment
origin_index
0[0.03101399727165699, -0.06800110638141632, -0...briefcase full of porn0.915265briefcase full of pornpositivepositive
1[-0.04206841439008713, 0.014935214072465897, 0...here's carrie fisher in 'one of the most enter...0.987602here's carrie fisher in 'one of the most enter...negativenegative
2[0.05720885097980499, -0.004087412729859352, -...transplanted new yorker disappointed with loca...0.957332transplanted new yorker disappointed with loca...positivepositive
3[-0.06800246238708496, -0.01590467058122158, -...this 'catfight' clip pauses anne heche and san...0.968843this 'catfight' clip pauses anne heche and san...negativenegative
4[-0.010691618546843529, -0.011138608679175377,...congress establishes bill suggestion hotline0.964053congress establishes bill suggestion hotlinepositivepositive
5[0.06491611897945404, -0.022880295291543007, -...ben carson suggests obama's iran deal is 'anti...0.974479ben carson suggests obama's iran deal is 'anti...negativenegative
6[-0.06249837204813957, 0.026482177898287773, 0...eden baylee is a stranger at sunset0.961377eden baylee is a stranger at sunsetnegativenegative
7[-0.03163134679198265, 0.034580837935209274, 0...lois gibbs: 'the government wouldn't help me, ...0.971898lois gibbs: 'the government wouldn't help me, ...negativenegative
8[0.05561395362019539, -0.04143591225147247, -0...time-warner ceo announces plans to merge with ...0.977598time-warner ceo announces plans to merge with ...positivepositive
9[-0.0431799553334713, -0.046702779829502106, -...typhoon meranti slams into china causing mayhem0.638766typhoon meranti slams into china causing mayhemnegativepositive
10[0.01190741267055273, -0.038865745067596436, -...raid on nacho-supremacist compound uncovers gu...0.898022raid on nacho-supremacist compound uncovers gu...positivepositive
11[0.0011607632040977478, 0.03130388632416725, -...goose suddenly realizes it doesn't have to hon...0.870899goose suddenly realizes it doesn't have to hon...positivepositive
12[0.038939230144023895, -0.028029074892401695, ...frantic, last-second study finds old-fashioned...0.946996frantic, last-second study finds old-fashioned...positivepositive
13[-0.023152370005846024, -0.041393812745809555,...couple discovers shop that sells cakes0.854924couple discovers shop that sells cakespositivepositive
14[0.010137411765754223, -0.06706183403730392, -...frito-lay targets blacks with new menthol doritos0.926026frito-lay targets blacks with new menthol doritospositivepositive
15[0.037099793553352356, 0.02509041130542755, -0...all u.s. males renamed dudley0.880769all u.s. males renamed dudleypositivepositive
16[-0.05982975661754608, -0.04541410878300667, -...xabraxian astronomers discover new planet0.971677xabraxian astronomers discover new planetpositivepositive
17[0.01637415960431099, 0.03079209476709366, -0....obama blasts afghans for expelling reporter --...0.946057obama blasts afghans for expelling reporter --...negativenegative
18[0.053151000291109085, -0.04056103900074959, 0...riz ahmed, mindy kaling, aziz ansari and other...0.965899riz ahmed, mindy kaling, aziz ansari and other...negativenegative
19[-0.005488495342433453, 0.009531817398965359, ...human slave from future remembers when cyber m...0.872274human slave from future remembers when cyber m...positivepositive
20[-0.0410454235970974, -0.04932034760713577, -0...toward a fairer admissions process0.947733toward a fairer admissions processnegativenegative
21[0.015400867909193039, 0.010604296810925007, 0...cupid cop gave out roses, cards on valentine's...0.903808cupid cop gave out roses, cards on valentine's...negativenegative
22[-0.011158468201756477, 0.047883182764053345, ...new york to host 1998 ill-will games0.961589new york to host 1998 ill-will gamespositivepositive
23[0.01367797702550888, -0.040253281593322754, -...kendall jenner is 'wonder'-fully blonde in vogue0.979704kendall jenner is 'wonder'-fully blonde in voguenegativenegative
24[-0.07428913563489914, -0.052280243486166, -0....family hoping mother knows birthday nature wal...0.819510family hoping mother knows birthday nature wal...positivepositive
25[-0.0625331699848175, 0.02265137806534767, -0....senate votes 64-36, not sure on what0.940529senate votes 64-36, not sure on whatpositivepositive
26[0.018849631771445274, -0.05681510269641876, -...'the interview' is having a very good weekend ...0.964672'the interview' is having a very good weekend ...negativenegative
27[-0.006397973746061325, -0.01870238408446312, ...mom uses face-painting skills to turn kids int...0.948459mom uses face-painting skills to turn kids int...negativenegative
28[-0.03488464280962944, -0.08252372592687607, -...does policing summons warrants really prevent ...0.805765does policing summons warrants really prevent ...negativenegative
29[0.019256893545389175, -0.04089582711458206, -...urban polling centers recommend voters start l...0.886659urban polling centers recommend voters start l...positivepositive
30[-0.06287706643342972, -0.019301993772387505, ...deadlocked supreme court: 'someone's voting tw...0.841328deadlocked supreme court: 'someone's voting tw...positivepositive
31[-0.01678450219333172, 0.01658053882420063, -0...the gop has a split personality when it comes ...0.896577the gop has a split personality when it comes ...negativenegative
32[0.026957523077726364, 0.04000873491168022, -0...world leaders pour into washington to pay last...0.954260world leaders pour into washington to pay last...positivepositive
33[0.07982629537582397, -0.0787980854511261, -0....jyothi rao: on threads of authenticity0.947035jyothi rao: on threads of authenticitynegativenegative
34[0.023493440821766853, -0.0007498250342905521,...bruce jenner okay following deadly car crash0.969706bruce jenner okay following deadly car crashnegativenegative
35[0.033786237239837646, -0.04347962886095047, 0...a guide to sex at 50 and beyond0.873368a guide to sex at 50 and beyondnegativenegative
36[-0.064402736723423, 0.025162827223539352, 0.0...you may have missed the 6th woman on time's pe...0.969335you may have missed the 6th woman on time's pe...negativenegative
37[-0.002320488216355443, 0.008552802726626396, ...texas moves to block medicaid funding for plan...0.853918texas moves to block medicaid funding for plan...negativenegative
38[-0.0027419731486588717, -0.06013084575533867,...federal government to be run by cheaper mexica...0.950003federal government to be run by cheaper mexica...positivepositive
39[0.01821870170533657, -0.014606344513595104, -...dog doesn't consider itself part of family0.890232dog doesn't consider itself part of familypositivepositive
40[-0.04760941118001938, -0.01184796541929245, 0...report: doing your part to stop climate change...0.978735report: doing your part to stop climate change...positivepositive
41[0.038119763135910034, -0.0385950542986393, -0...breitbart traffic down as readers now getting ...0.885908breitbart traffic down as readers now getting ...positivepositive
42[-0.020920872688293457, -0.041152868419885635,...obama: i am where i am today because of voting...0.913123obama: i am where i am today because of voting...negativenegative
43[0.08012807369232178, 0.05087059363722801, -0....kentucky's gop bromance deepens, even without ...0.947572kentucky's gop bromance deepens, even without ...negativenegative
44[-0.021665776148438454, -0.02926868386566639, ...stop calling young adults \"college kids\"0.829551stop calling young adults \"college kids\"negativenegative
45[0.03609812259674072, 0.007918721996247768, -0...the real reason trump can't break the gop0.977416the real reason trump can't break the gopnegativenegative
46[-0.05974249914288521, -0.009394897148013115, ...testing: enhanced interrogation in the classroom0.969963testing: enhanced interrogation in the classroomnegativenegative
47[0.028496630489826202, 0.004139357712119818, -...disembodied voice in elevator wants to know wa...0.932753disembodied voice in elevator wants to know wa...positivepositive
48[0.017045874148607254, -0.045841291546821594, ...a classic jason somehow gets mixed into area m...0.770142a classic jason somehow gets mixed into area m...positivepositive
49[-0.039329949766397476, -0.03347548097372055, ...wisconsin students trumped the rest with their...0.785354wisconsin students trumped the rest with their...negativenegative
\n","
"],"text/plain":[" default_name_embeddings ... sentiment\n","origin_index ... \n","0 [0.03101399727165699, -0.06800110638141632, -0... ... positive\n","1 [-0.04206841439008713, 0.014935214072465897, 0... ... negative\n","2 [0.05720885097980499, -0.004087412729859352, -... ... positive\n","3 [-0.06800246238708496, -0.01590467058122158, -... ... negative\n","4 [-0.010691618546843529, -0.011138608679175377,... ... positive\n","5 [0.06491611897945404, -0.022880295291543007, -... ... negative\n","6 [-0.06249837204813957, 0.026482177898287773, 0... ... negative\n","7 [-0.03163134679198265, 0.034580837935209274, 0... ... negative\n","8 [0.05561395362019539, -0.04143591225147247, -0... ... positive\n","9 [-0.0431799553334713, -0.046702779829502106, -... ... positive\n","10 [0.01190741267055273, -0.038865745067596436, -... ... positive\n","11 [0.0011607632040977478, 0.03130388632416725, -... ... positive\n","12 [0.038939230144023895, -0.028029074892401695, ... ... positive\n","13 [-0.023152370005846024, -0.041393812745809555,... ... positive\n","14 [0.010137411765754223, -0.06706183403730392, -... ... positive\n","15 [0.037099793553352356, 0.02509041130542755, -0... ... positive\n","16 [-0.05982975661754608, -0.04541410878300667, -... ... positive\n","17 [0.01637415960431099, 0.03079209476709366, -0.... ... negative\n","18 [0.053151000291109085, -0.04056103900074959, 0... ... negative\n","19 [-0.005488495342433453, 0.009531817398965359, ... ... positive\n","20 [-0.0410454235970974, -0.04932034760713577, -0... ... negative\n","21 [0.015400867909193039, 0.010604296810925007, 0... ... negative\n","22 [-0.011158468201756477, 0.047883182764053345, ... ... positive\n","23 [0.01367797702550888, -0.040253281593322754, -... ... negative\n","24 [-0.07428913563489914, -0.052280243486166, -0.... ... positive\n","25 [-0.0625331699848175, 0.02265137806534767, -0.... ... positive\n","26 [0.018849631771445274, -0.05681510269641876, -... ... negative\n","27 [-0.006397973746061325, -0.01870238408446312, ... ... negative\n","28 [-0.03488464280962944, -0.08252372592687607, -... ... negative\n","29 [0.019256893545389175, -0.04089582711458206, -... ... positive\n","30 [-0.06287706643342972, -0.019301993772387505, ... ... positive\n","31 [-0.01678450219333172, 0.01658053882420063, -0... ... negative\n","32 [0.026957523077726364, 0.04000873491168022, -0... ... positive\n","33 [0.07982629537582397, -0.0787980854511261, -0.... ... negative\n","34 [0.023493440821766853, -0.0007498250342905521,... ... negative\n","35 [0.033786237239837646, -0.04347962886095047, 0... ... negative\n","36 [-0.064402736723423, 0.025162827223539352, 0.0... ... negative\n","37 [-0.002320488216355443, 0.008552802726626396, ... ... negative\n","38 [-0.0027419731486588717, -0.06013084575533867,... ... positive\n","39 [0.01821870170533657, -0.014606344513595104, -... ... positive\n","40 [-0.04760941118001938, -0.01184796541929245, 0... ... positive\n","41 [0.038119763135910034, -0.0385950542986393, -0... ... positive\n","42 [-0.020920872688293457, -0.041152868419885635,... ... negative\n","43 [0.08012807369232178, 0.05087059363722801, -0.... ... negative\n","44 [-0.021665776148438454, -0.02926868386566639, ... ... negative\n","45 [0.03609812259674072, 0.007918721996247768, -0... ... negative\n","46 [-0.05974249914288521, -0.009394897148013115, ... ... negative\n","47 [0.028496630489826202, 0.004139357712119818, -... ... positive\n","48 [0.017045874148607254, -0.045841291546821594, ... ... positive\n","49 [-0.039329949766397476, -0.03347548097372055, ... ... negative\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# 7. Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1613513714610,"user_tz":-300,"elapsed":11294,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"0eb70967-5ba7-429e-e85b-2365de893647"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614246277444,"user_tz":-300,"elapsed":3877627,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"851afe21-2adb-41bd-b377-fb61511a08c0"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(120) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.91 0.87 0.89 3952\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.90 0.89 0.89 4048\n","\n"," accuracy 0.88 8000\n"," macro avg 0.60 0.59 0.59 8000\n","weighted avg 0.90 0.88 0.89 8000\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"_1jxw3GnVGlI"},"source":["# 7.1 evaluate on Test Data"]},{"cell_type":"code","metadata":{"id":"Fxx4yNkNVGFl","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614246981422,"user_tz":-300,"elapsed":703959,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6e79124f-a8c9-4129-da93-ca3a3c3abf3f"},"source":["preds = fitted_pipe.predict(test_df,output_level='document')\r\n","\r\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\r\n","preds.dropna(inplace=True)\r\n","print(classification_report(preds['y'], preds['sentiment']))"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.87 0.82 0.84 1048\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.83 0.84 0.83 952\n","\n"," accuracy 0.83 2000\n"," macro avg 0.57 0.55 0.56 2000\n","weighted avg 0.85 0.83 0.84 2000\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 8. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1613521881494,"user_tz":-300,"elapsed":177870,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8f796a6b-0b73-4840-98de-15766cd8878a"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 9. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":106},"executionInfo":{"status":"ok","timestamp":1613521896813,"user_tz":-300,"elapsed":191661,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"57cbefda-d9ac-4c2e-a33d-8f0cb1d3f80a"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Aliens are immortal!')\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentsentiment_confidenceen_embed_sentence_small_bert_L12_768_embeddingssentiment
origin_index
0Aliens are immortal!0.999956[0.3093055188655853, 0.12947319447994232, 0.06...negative
\n","
"],"text/plain":[" document ... sentiment\n","origin_index ... \n","0 Aliens are immortal! ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":14}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1613521896815,"user_tz":-300,"elapsed":191024,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b19bd54b-3b00-44a5-afed-cc941b956215"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this SentimentDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_IMDB.ipynb b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_IMDB.ipynb index 490dc371..1c25fc90 100644 --- a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_IMDB.ipynb +++ b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_IMDB.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_IMDB.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_IMDB.ipynb)\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 class IMDB Movie sentiment classifier training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null \n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download IMDB dataset\n","https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews\n","\n","IMDB dataset having 50K movie reviews for natural language processing or Text analytics.\n","This is a dataset for binary sentiment classification containing substantially more data than previous benchmark datasets. We provide a set of 25,000 highly polar movie reviews for training and 25,000 for testing. So, predict the number of positive and negative reviews using either classification or deep learning algorithms.\n","For more dataset information, please go through the following link,\n","http://ai.stanford.edu/~amaas/data/sentiment/"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610788112062,"user_tz":-300,"elapsed":2594,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a99c604d-fad2-4ace-c9b3-13dcb5893e03"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/IMDB-Dataset.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:07:54-- http://ckl-it.de/wp-content/uploads/2021/01/IMDB-Dataset.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 3288450 (3.1M) [text/csv]\n","Saving to: ‘IMDB-Dataset.csv’\n","\n","IMDB-Dataset.csv 100%[===================>] 3.14M 2.29MB/s in 1.4s \n","\n","2021-01-16 09:07:56 (2.29 MB/s) - ‘IMDB-Dataset.csv’ saved [3288450/3288450]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610788130009,"user_tz":-300,"elapsed":1019,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"fa40642c-aeea-4506-b40e-3542a49a2ee9"},"source":["import pandas as pd\n","train_path = '/content/IMDB-Dataset.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
0One of the other reviewers has mentioned that ...positive
1A wonderful little production. <br /><br />The...positive
2I thought this was a wonderful way to spend ti...positive
3Basically there's a family where a little boy ...negative
4Petter Mattei's \"Love in the Time of Money\" is...positive
.........
2495Another great movie by Costa-Gavras. It's a gr...negative
2496Though structured totally different from the b...positive
2497Handsome and dashing British airline pilot Geo...positive
2498This film breeches the fine line between satir...negative
2499Mardi Gras: Made in China provides a wonderful...positive
\n","

2500 rows × 2 columns

\n","
"],"text/plain":[" text y\n","0 One of the other reviewers has mentioned that ... positive\n","1 A wonderful little production.

The... positive\n","2 I thought this was a wonderful way to spend ti... positive\n","3 Basically there's a family where a little boy ... negative\n","4 Petter Mattei's \"Love in the Time of Money\" is... positive\n","... ... ...\n","2495 Another great movie by Costa-Gavras. It's a gr... negative\n","2496 Though structured totally different from the b... positive\n","2497 Handsome and dashing British airline pilot Geo... positive\n","2498 This film breeches the fine line between satir... negative\n","2499 Mardi Gras: Made in China provides a wonderful... positive\n","\n","[2500 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609464660630,"user_tz":-300,"elapsed":19440,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3be85e48-38c8-4a7c-bbd7-226e204fa739"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.70 0.70 0.70 27\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.79 0.65 0.71 23\n","\n"," accuracy 0.68 50\n"," macro avg 0.50 0.45 0.47 50\n","weighted avg 0.74 0.68 0.71 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textdefault_name_embeddingssentimentsentiment_confidenceydocument
origin_index
0One of the other reviewers has mentioned that ...[-0.04935329407453537, -0.01034686528146267, -...positive0.968638positiveOne of the other reviewers has mentioned that ...
1A wonderful little production. <br /><br />The...[0.040489643812179565, -0.054199717938899994, ...negative0.990273positiveA wonderful little production. <br /><br />The...
2I thought this was a wonderful way to spend ti...[0.026364900171756744, 0.07112795859575272, 0....negative0.957352positiveI thought this was a wonderful way to spend ti...
3Basically there's a family where a little boy ...[-0.05151151493191719, 0.008207003585994244, -...negative0.958503negativeBasically there's a family where a little boy ...
4Petter Mattei's \"Love in the Time of Money\" is...[0.06880538165569305, 0.019250543788075447, -0...positive0.999108positivePetter Mattei's \"Love in the Time of Money\" is...
5Probably my all-time favorite movie, a story o...[0.004764211364090443, 0.027671916410326958, -...positive0.993937positiveProbably my all-time favorite movie, a story o...
6I sure would like to see a resurrection of a u...[-0.03813941031694412, -0.03322296217083931, 0...positive0.974884positiveI sure would like to see a resurrection of a u...
7This show was an amazing, fresh & innovative i...[0.010670202784240246, -0.04322813078761101, -...negative0.721451negativeThis show was an amazing, fresh & innovative i...
8Encouraged by the positive comments about this...[0.010801736265420914, -0.07724311947822571, -...positive0.884824negativeEncouraged by the positive comments about this...
9If you like original gut wrenching laughter yo...[-0.0245585348457098, 0.0005475765210576355, -...negative0.850509positiveIf you like original gut wrenching laughter yo...
10Phil the Alien is one of those quirky films wh...[0.023403573781251907, 0.017464609816670418, -...negative0.836944negativePhil the Alien is one of those quirky films wh...
11I saw this movie when I was about 12 when it c...[-0.046517230570316315, -0.025949953123927116,...negative0.999218negativeI saw this movie when I was about 12 when it c...
12So im not a big fan of Boll's work but then ag...[0.0032458826899528503, -0.013339877128601074,...negative0.999841negativeSo im not a big fan of Boll's work but then ag...
13The cast played Shakespeare.<br /><br />Shakes...[0.044309284538030624, 0.061706289649009705, -...neutral0.504574negativeThe cast played Shakespeare.<br /><br />Shakes...
14This a fantastic movie of three prisoners who ...[0.005487383343279362, -0.005359508562833071, ...positive0.956110positiveThis a fantastic movie of three prisoners who ...
15Kind of drawn in by the erotic scenes, only to...[0.04357790946960449, -0.034652918577194214, -...negative0.990112negativeKind of drawn in by the erotic scenes, only to...
16Some films just simply should not be remade. T...[0.006823724135756493, -0.0692802369594574, -0...negative0.996081positiveSome films just simply should not be remade. T...
17This movie made it into one of my top 10 most ...[-0.013747279532253742, -0.0038213622756302357...negative0.999338negativeThis movie made it into one of my top 10 most ...
18I remember this film,it was the first film i h...[-0.005101265385746956, 0.022435873746871948, ...positive0.986708positiveI remember this film,it was the first film i h...
19An awful film! It must have been up against so...[0.011224010959267616, -0.007102800067514181, ...negative0.998881negativeAn awful film! It must have been up against so...
20After the success of Die Hard and it's sequels...[0.022048521786928177, -0.020497862249612808, ...negative0.650546positiveAfter the success of Die Hard and it's sequels...
21I had the terrible misfortune of having to vie...[-0.010102338157594204, -0.05102328583598137, ...negative0.999930negativeI had the terrible misfortune of having to vie...
22What an absolutely stunning movie, if you have...[-0.016428396105766296, 0.007074637804180384, ...positive0.981123positiveWhat an absolutely stunning movie, if you have...
23First of all, let's get a few things straight ...[-0.06437410414218903, -0.029181038960814476, ...negative0.957470negativeFirst of all, let's get a few things straight ...
24This was the worst movie I saw at WorldFest an...[0.03901044651865959, 0.06355303525924683, -0....negative0.999471negativeThis was the worst movie I saw at WorldFest an...
25The Karen Carpenter Story shows a little more ...[-0.021897025406360626, 0.04400184750556946, 0...positive0.997247positiveThe Karen Carpenter Story shows a little more ...
26\"The Cell\" is an exotic masterpiece, a dizzyin...[0.0439823754131794, -0.007468021009117365, -0...positive0.996351positive\"The Cell\" is an exotic masterpiece, a dizzyin...
27This film tried to be too many things all at o...[-0.004155139438807964, -0.03771881386637688, ...neutral0.570219negativeThis film tried to be too many things all at o...
28This movie was so frustrating. Everything seem...[0.015594013035297394, -0.007509331218898296, ...negative0.999906negativeThis movie was so frustrating. Everything seem...
29'War movie' is a Hollywood genre that has been...[-0.036022596061229706, -0.006816706154495478,...negative0.733068positive'War movie' is a Hollywood genre that has been...
30Taut and organically gripping, Edward Dmytryk'...[0.0312348585575819, -0.04670163244009018, -0....positive0.995882positiveTaut and organically gripping, Edward Dmytryk'...
31\"Ardh Satya\" is one of the finest film ever ma...[0.060114260762929916, -0.0590929239988327, -0...positive0.999671positive\"Ardh Satya\" is one of the finest film ever ma...
32My first exposure to the Templarios & not a go...[0.013515714555978775, -0.004898980725556612, ...negative0.999994negativeMy first exposure to the Templarios & not a go...
33One of the most significant quotes from the en...[0.022280631586909294, -0.00839739479124546, -...positive0.997032positiveOne of the most significant quotes from the en...
34I watched this film not really expecting much,...[0.009434111416339874, -0.046402934938669205, ...negative0.992625negativeI watched this film not really expecting much,...
35I bought this film at Blockbuster for $3.00, b...[0.011683089658617973, -0.047437384724617004, ...negative0.999485negativeI bought this film at Blockbuster for $3.00, b...
36The plot is about the death of little children...[-0.0348515659570694, 0.01680166646838188, -0....neutral0.537487negativeThe plot is about the death of little children...
37Ever watched a movie that lost the plot? Well,...[-0.02899913117289543, 0.0164097361266613, -0....negative0.998984negativeEver watched a movie that lost the plot? Well,...
38Okay, so this series kind of takes the route o...[0.002110496163368225, 0.02887572906911373, -0...positive0.965860positiveOkay, so this series kind of takes the route o...
39After sitting through this pile of dung, my hu...[0.013781447894871235, -0.010363072156906128, ...positive0.697232negativeAfter sitting through this pile of dung, my hu...
40It had all the clichés of movies of this type ...[0.03799372911453247, -0.038665950298309326, -...negative0.998307negativeIt had all the clichés of movies of this type ...
41This movie is based on the book, \"A Many Splen...[-0.00033091730438172817, -0.05126418545842171...positive0.998454positiveThis movie is based on the book, \"A Many Splen...
42Of all the films I have seen, this one, The Ra...[0.014630819670855999, -0.04907294735312462, -...negative0.999938negativeOf all the films I have seen, this one, The Ra...
43I had heard good things about \"States of Grace...[0.027017194777727127, 0.002088379580527544, 0...positive0.884466negativeI had heard good things about \"States of Grace...
44This movie struck home for me. Being 29, I rem...[-0.0009387845057062805, -0.048219360411167145...negative0.969341positiveThis movie struck home for me. Being 29, I rem...
45As a disclaimer, I've seen the movie 5-6 times...[0.0065035647712647915, 0.00230638706125319, 0...negative0.967124positiveAs a disclaimer, I've seen the movie 5-6 times...
46Protocol is an implausible movie whose only sa...[0.05113476142287254, 0.04671141505241394, -0....neutral0.593109negativeProtocol is an implausible movie whose only sa...
47How this film could be classified as Drama, I ...[0.011419376358389854, -0.0828876867890358, -0...negative0.991421negativeHow this film could be classified as Drama, I ...
48Preston Sturgis' THE POWER AND THE GLORY was u...[0.024031344801187515, 0.03399205952882767, 0....positive0.994996positivePreston Sturgis' THE POWER AND THE GLORY was u...
49Average (and surprisingly tame) Fulci giallo w...[0.015038557350635529, -0.0037642912939190865,...positive0.996770negativeAverage (and surprisingly tame) Fulci giallo w...
\n","
"],"text/plain":[" text ... document\n","origin_index ... \n","0 One of the other reviewers has mentioned that ... ... One of the other reviewers has mentioned that ...\n","1 A wonderful little production.

The... ... A wonderful little production.

The...\n","2 I thought this was a wonderful way to spend ti... ... I thought this was a wonderful way to spend ti...\n","3 Basically there's a family where a little boy ... ... Basically there's a family where a little boy ...\n","4 Petter Mattei's \"Love in the Time of Money\" is... ... Petter Mattei's \"Love in the Time of Money\" is...\n","5 Probably my all-time favorite movie, a story o... ... Probably my all-time favorite movie, a story o...\n","6 I sure would like to see a resurrection of a u... ... I sure would like to see a resurrection of a u...\n","7 This show was an amazing, fresh & innovative i... ... This show was an amazing, fresh & innovative i...\n","8 Encouraged by the positive comments about this... ... Encouraged by the positive comments about this...\n","9 If you like original gut wrenching laughter yo... ... If you like original gut wrenching laughter yo...\n","10 Phil the Alien is one of those quirky films wh... ... Phil the Alien is one of those quirky films wh...\n","11 I saw this movie when I was about 12 when it c... ... I saw this movie when I was about 12 when it c...\n","12 So im not a big fan of Boll's work but then ag... ... So im not a big fan of Boll's work but then ag...\n","13 The cast played Shakespeare.

Shakes... ... The cast played Shakespeare.

Shakes...\n","14 This a fantastic movie of three prisoners who ... ... This a fantastic movie of three prisoners who ...\n","15 Kind of drawn in by the erotic scenes, only to... ... Kind of drawn in by the erotic scenes, only to...\n","16 Some films just simply should not be remade. T... ... Some films just simply should not be remade. T...\n","17 This movie made it into one of my top 10 most ... ... This movie made it into one of my top 10 most ...\n","18 I remember this film,it was the first film i h... ... I remember this film,it was the first film i h...\n","19 An awful film! It must have been up against so... ... An awful film! It must have been up against so...\n","20 After the success of Die Hard and it's sequels... ... After the success of Die Hard and it's sequels...\n","21 I had the terrible misfortune of having to vie... ... I had the terrible misfortune of having to vie...\n","22 What an absolutely stunning movie, if you have... ... What an absolutely stunning movie, if you have...\n","23 First of all, let's get a few things straight ... ... First of all, let's get a few things straight ...\n","24 This was the worst movie I saw at WorldFest an... ... This was the worst movie I saw at WorldFest an...\n","25 The Karen Carpenter Story shows a little more ... ... The Karen Carpenter Story shows a little more ...\n","26 \"The Cell\" is an exotic masterpiece, a dizzyin... ... \"The Cell\" is an exotic masterpiece, a dizzyin...\n","27 This film tried to be too many things all at o... ... This film tried to be too many things all at o...\n","28 This movie was so frustrating. Everything seem... ... This movie was so frustrating. Everything seem...\n","29 'War movie' is a Hollywood genre that has been... ... 'War movie' is a Hollywood genre that has been...\n","30 Taut and organically gripping, Edward Dmytryk'... ... Taut and organically gripping, Edward Dmytryk'...\n","31 \"Ardh Satya\" is one of the finest film ever ma... ... \"Ardh Satya\" is one of the finest film ever ma...\n","32 My first exposure to the Templarios & not a go... ... My first exposure to the Templarios & not a go...\n","33 One of the most significant quotes from the en... ... One of the most significant quotes from the en...\n","34 I watched this film not really expecting much,... ... I watched this film not really expecting much,...\n","35 I bought this film at Blockbuster for $3.00, b... ... I bought this film at Blockbuster for $3.00, b...\n","36 The plot is about the death of little children... ... The plot is about the death of little children...\n","37 Ever watched a movie that lost the plot? Well,... ... Ever watched a movie that lost the plot? Well,...\n","38 Okay, so this series kind of takes the route o... ... Okay, so this series kind of takes the route o...\n","39 After sitting through this pile of dung, my hu... ... After sitting through this pile of dung, my hu...\n","40 It had all the clichés of movies of this type ... ... It had all the clichés of movies of this type ...\n","41 This movie is based on the book, \"A Many Splen... ... This movie is based on the book, \"A Many Splen...\n","42 Of all the films I have seen, this one, The Ra... ... Of all the films I have seen, this one, The Ra...\n","43 I had heard good things about \"States of Grace... ... I had heard good things about \"States of Grace...\n","44 This movie struck home for me. Being 29, I rem... ... This movie struck home for me. Being 29, I rem...\n","45 As a disclaimer, I've seen the movie 5-6 times... ... As a disclaimer, I've seen the movie 5-6 times...\n","46 Protocol is an implausible movie whose only sa... ... Protocol is an implausible movie whose only sa...\n","47 How this film could be classified as Drama, I ... ... How this film could be classified as Drama, I ...\n","48 Preston Sturgis' THE POWER AND THE GLORY was u... ... Preston Sturgis' THE POWER AND THE GLORY was u...\n","49 Average (and surprisingly tame) Fulci giallo w... ... Average (and surprisingly tame) Fulci giallo w...\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609464663328,"user_tz":-300,"elapsed":2733,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ae604bdd-49fb-4b5e-978e-5190dd03b227"},"source":["fitted_pipe.predict('It was one of the best films i have ever watched in my entire life !!')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
default_name_embeddingssentimentsentiment_confidencedocument
origin_index
0[0.06468033790588379, -0.040837567299604416, -...positive0.982375Bitcoin is going to the moon!
\n","
"],"text/plain":[" default_name_embeddings ... document\n","origin_index ... \n","0 [0.06468033790588379, -0.040837567299604416, -... ... Bitcoin is going to the moon!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609464663334,"user_tz":-300,"elapsed":31,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a9059197-9e1c-4afe-ca3b-97c6d310f60c"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609464673090,"user_tz":-300,"elapsed":9777,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"164e4f03-f48a-4347-95e8-fd3509bf146e"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.81 0.96 0.88 27\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.94 0.70 0.80 23\n","\n"," accuracy 0.84 50\n"," macro avg 0.58 0.55 0.56 50\n","weighted avg 0.87 0.84 0.84 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textdefault_name_embeddingssentimentsentiment_confidenceydocument
origin_index
0One of the other reviewers has mentioned that ...[-0.04935329407453537, -0.01034686528146267, -...positive0.966858positiveOne of the other reviewers has mentioned that ...
1A wonderful little production. <br /><br />The...[0.040489643812179565, -0.054199717938899994, ...negative0.985679positiveA wonderful little production. <br /><br />The...
2I thought this was a wonderful way to spend ti...[0.026364900171756744, 0.07112795859575272, 0....negative0.988745positiveI thought this was a wonderful way to spend ti...
3Basically there's a family where a little boy ...[-0.05151151493191719, 0.008207003585994244, -...negative0.999291negativeBasically there's a family where a little boy ...
4Petter Mattei's \"Love in the Time of Money\" is...[0.06880538165569305, 0.019250543788075447, -0...positive0.999684positivePetter Mattei's \"Love in the Time of Money\" is...
5Probably my all-time favorite movie, a story o...[0.004764211364090443, 0.027671916410326958, -...positive0.996598positiveProbably my all-time favorite movie, a story o...
6I sure would like to see a resurrection of a u...[-0.03813941031694412, -0.03322296217083931, 0...positive0.960203positiveI sure would like to see a resurrection of a u...
7This show was an amazing, fresh & innovative i...[0.010670202784240246, -0.04322813078761101, -...negative0.753273negativeThis show was an amazing, fresh & innovative i...
8Encouraged by the positive comments about this...[0.010801736265420914, -0.07724311947822571, -...negative0.958928negativeEncouraged by the positive comments about this...
9If you like original gut wrenching laughter yo...[-0.0245585348457098, 0.0005475765210576355, -...neutral0.536441positiveIf you like original gut wrenching laughter yo...
10Phil the Alien is one of those quirky films wh...[0.023403573781251907, 0.017464609816670418, -...negative0.959978negativePhil the Alien is one of those quirky films wh...
11I saw this movie when I was about 12 when it c...[-0.046517230570316315, -0.025949953123927116,...negative0.999949negativeI saw this movie when I was about 12 when it c...
12So im not a big fan of Boll's work but then ag...[0.0032458826899528503, -0.013339877128601074,...negative0.999997negativeSo im not a big fan of Boll's work but then ag...
13The cast played Shakespeare.<br /><br />Shakes...[0.044309284538030624, 0.061706289649009705, -...negative0.984033negativeThe cast played Shakespeare.<br /><br />Shakes...
14This a fantastic movie of three prisoners who ...[0.005487383343279362, -0.005359508562833071, ...positive0.775998positiveThis a fantastic movie of three prisoners who ...
15Kind of drawn in by the erotic scenes, only to...[0.04357790946960449, -0.034652918577194214, -...negative0.999683negativeKind of drawn in by the erotic scenes, only to...
16Some films just simply should not be remade. T...[0.006823724135756493, -0.0692802369594574, -0...negative0.999245positiveSome films just simply should not be remade. T...
17This movie made it into one of my top 10 most ...[-0.013747279532253742, -0.0038213622756302357...negative0.999970negativeThis movie made it into one of my top 10 most ...
18I remember this film,it was the first film i h...[-0.005101265385746956, 0.022435873746871948, ...positive0.975574positiveI remember this film,it was the first film i h...
19An awful film! It must have been up against so...[0.011224010959267616, -0.007102800067514181, ...negative0.999990negativeAn awful film! It must have been up against so...
20After the success of Die Hard and it's sequels...[0.022048521786928177, -0.020497862249612808, ...positive0.951596positiveAfter the success of Die Hard and it's sequels...
21I had the terrible misfortune of having to vie...[-0.010102338157594204, -0.05102328583598137, ...negative0.999999negativeI had the terrible misfortune of having to vie...
22What an absolutely stunning movie, if you have...[-0.016428396105766296, 0.007074637804180384, ...positive0.931946positiveWhat an absolutely stunning movie, if you have...
23First of all, let's get a few things straight ...[-0.06437410414218903, -0.029181038960814476, ...negative0.990350negativeFirst of all, let's get a few things straight ...
24This was the worst movie I saw at WorldFest an...[0.03901044651865959, 0.06355303525924683, -0....negative0.999986negativeThis was the worst movie I saw at WorldFest an...
25The Karen Carpenter Story shows a little more ...[-0.021897025406360626, 0.04400184750556946, 0...positive0.999463positiveThe Karen Carpenter Story shows a little more ...
26\"The Cell\" is an exotic masterpiece, a dizzyin...[0.0439823754131794, -0.007468021009117365, -0...positive0.998291positive\"The Cell\" is an exotic masterpiece, a dizzyin...
27This film tried to be too many things all at o...[-0.004155139438807964, -0.03771881386637688, ...negative0.865707negativeThis film tried to be too many things all at o...
28This movie was so frustrating. Everything seem...[0.015594013035297394, -0.007509331218898296, ...negative0.999998negativeThis movie was so frustrating. Everything seem...
29'War movie' is a Hollywood genre that has been...[-0.036022596061229706, -0.006816706154495478,...negative0.993793positive'War movie' is a Hollywood genre that has been...
30Taut and organically gripping, Edward Dmytryk'...[0.0312348585575819, -0.04670163244009018, -0....positive0.997460positiveTaut and organically gripping, Edward Dmytryk'...
31\"Ardh Satya\" is one of the finest film ever ma...[0.060114260762929916, -0.0590929239988327, -0...positive0.999880positive\"Ardh Satya\" is one of the finest film ever ma...
32My first exposure to the Templarios & not a go...[0.013515714555978775, -0.004898980725556612, ...negative1.000000negativeMy first exposure to the Templarios & not a go...
33One of the most significant quotes from the en...[0.022280631586909294, -0.00839739479124546, -...positive0.999292positiveOne of the most significant quotes from the en...
34I watched this film not really expecting much,...[0.009434111416339874, -0.046402934938669205, ...negative0.999848negativeI watched this film not really expecting much,...
35I bought this film at Blockbuster for $3.00, b...[0.011683089658617973, -0.047437384724617004, ...negative0.999993negativeI bought this film at Blockbuster for $3.00, b...
36The plot is about the death of little children...[-0.0348515659570694, 0.01680166646838188, -0....negative0.997690negativeThe plot is about the death of little children...
37Ever watched a movie that lost the plot? Well,...[-0.02899913117289543, 0.0164097361266613, -0....negative0.999995negativeEver watched a movie that lost the plot? Well,...
38Okay, so this series kind of takes the route o...[0.002110496163368225, 0.02887572906911373, -0...positive0.993408positiveOkay, so this series kind of takes the route o...
39After sitting through this pile of dung, my hu...[0.013781447894871235, -0.010363072156906128, ...negative0.905860negativeAfter sitting through this pile of dung, my hu...
40It had all the clichés of movies of this type ...[0.03799372911453247, -0.038665950298309326, -...negative0.999892negativeIt had all the clichés of movies of this type ...
41This movie is based on the book, \"A Many Splen...[-0.00033091730438172817, -0.05126418545842171...positive0.999837positiveThis movie is based on the book, \"A Many Splen...
42Of all the films I have seen, this one, The Ra...[0.014630819670855999, -0.04907294735312462, -...negative1.000000negativeOf all the films I have seen, this one, The Ra...
43I had heard good things about \"States of Grace...[0.027017194777727127, 0.002088379580527544, 0...negative0.978662negativeI had heard good things about \"States of Grace...
44This movie struck home for me. Being 29, I rem...[-0.0009387845057062805, -0.048219360411167145...negative0.993965positiveThis movie struck home for me. Being 29, I rem...
45As a disclaimer, I've seen the movie 5-6 times...[0.0065035647712647915, 0.00230638706125319, 0...negative0.999341positiveAs a disclaimer, I've seen the movie 5-6 times...
46Protocol is an implausible movie whose only sa...[0.05113476142287254, 0.04671141505241394, -0....negative0.913287negativeProtocol is an implausible movie whose only sa...
47How this film could be classified as Drama, I ...[0.011419376358389854, -0.0828876867890358, -0...negative0.999841negativeHow this film could be classified as Drama, I ...
48Preston Sturgis' THE POWER AND THE GLORY was u...[0.024031344801187515, 0.03399205952882767, 0....positive0.998516positivePreston Sturgis' THE POWER AND THE GLORY was u...
49Average (and surprisingly tame) Fulci giallo w...[0.015038557350635529, -0.0037642912939190865,...positive0.995483negativeAverage (and surprisingly tame) Fulci giallo w...
\n","
"],"text/plain":[" text ... document\n","origin_index ... \n","0 One of the other reviewers has mentioned that ... ... One of the other reviewers has mentioned that ...\n","1 A wonderful little production.

The... ... A wonderful little production.

The...\n","2 I thought this was a wonderful way to spend ti... ... I thought this was a wonderful way to spend ti...\n","3 Basically there's a family where a little boy ... ... Basically there's a family where a little boy ...\n","4 Petter Mattei's \"Love in the Time of Money\" is... ... Petter Mattei's \"Love in the Time of Money\" is...\n","5 Probably my all-time favorite movie, a story o... ... Probably my all-time favorite movie, a story o...\n","6 I sure would like to see a resurrection of a u... ... I sure would like to see a resurrection of a u...\n","7 This show was an amazing, fresh & innovative i... ... This show was an amazing, fresh & innovative i...\n","8 Encouraged by the positive comments about this... ... Encouraged by the positive comments about this...\n","9 If you like original gut wrenching laughter yo... ... If you like original gut wrenching laughter yo...\n","10 Phil the Alien is one of those quirky films wh... ... Phil the Alien is one of those quirky films wh...\n","11 I saw this movie when I was about 12 when it c... ... I saw this movie when I was about 12 when it c...\n","12 So im not a big fan of Boll's work but then ag... ... So im not a big fan of Boll's work but then ag...\n","13 The cast played Shakespeare.

Shakes... ... The cast played Shakespeare.

Shakes...\n","14 This a fantastic movie of three prisoners who ... ... This a fantastic movie of three prisoners who ...\n","15 Kind of drawn in by the erotic scenes, only to... ... Kind of drawn in by the erotic scenes, only to...\n","16 Some films just simply should not be remade. T... ... Some films just simply should not be remade. T...\n","17 This movie made it into one of my top 10 most ... ... This movie made it into one of my top 10 most ...\n","18 I remember this film,it was the first film i h... ... I remember this film,it was the first film i h...\n","19 An awful film! It must have been up against so... ... An awful film! It must have been up against so...\n","20 After the success of Die Hard and it's sequels... ... After the success of Die Hard and it's sequels...\n","21 I had the terrible misfortune of having to vie... ... I had the terrible misfortune of having to vie...\n","22 What an absolutely stunning movie, if you have... ... What an absolutely stunning movie, if you have...\n","23 First of all, let's get a few things straight ... ... First of all, let's get a few things straight ...\n","24 This was the worst movie I saw at WorldFest an... ... This was the worst movie I saw at WorldFest an...\n","25 The Karen Carpenter Story shows a little more ... ... The Karen Carpenter Story shows a little more ...\n","26 \"The Cell\" is an exotic masterpiece, a dizzyin... ... \"The Cell\" is an exotic masterpiece, a dizzyin...\n","27 This film tried to be too many things all at o... ... This film tried to be too many things all at o...\n","28 This movie was so frustrating. Everything seem... ... This movie was so frustrating. Everything seem...\n","29 'War movie' is a Hollywood genre that has been... ... 'War movie' is a Hollywood genre that has been...\n","30 Taut and organically gripping, Edward Dmytryk'... ... Taut and organically gripping, Edward Dmytryk'...\n","31 \"Ardh Satya\" is one of the finest film ever ma... ... \"Ardh Satya\" is one of the finest film ever ma...\n","32 My first exposure to the Templarios & not a go... ... My first exposure to the Templarios & not a go...\n","33 One of the most significant quotes from the en... ... One of the most significant quotes from the en...\n","34 I watched this film not really expecting much,... ... I watched this film not really expecting much,...\n","35 I bought this film at Blockbuster for $3.00, b... ... I bought this film at Blockbuster for $3.00, b...\n","36 The plot is about the death of little children... ... The plot is about the death of little children...\n","37 Ever watched a movie that lost the plot? Well,... ... Ever watched a movie that lost the plot? Well,...\n","38 Okay, so this series kind of takes the route o... ... Okay, so this series kind of takes the route o...\n","39 After sitting through this pile of dung, my hu... ... After sitting through this pile of dung, my hu...\n","40 It had all the clichés of movies of this type ... ... It had all the clichés of movies of this type ...\n","41 This movie is based on the book, \"A Many Splen... ... This movie is based on the book, \"A Many Splen...\n","42 Of all the films I have seen, this one, The Ra... ... Of all the films I have seen, this one, The Ra...\n","43 I had heard good things about \"States of Grace... ... I had heard good things about \"States of Grace...\n","44 This movie struck home for me. Being 29, I rem... ... This movie struck home for me. Being 29, I rem...\n","45 As a disclaimer, I've seen the movie 5-6 times... ... As a disclaimer, I've seen the movie 5-6 times...\n","46 Protocol is an implausible movie whose only sa... ... Protocol is an implausible movie whose only sa...\n","47 How this film could be classified as Drama, I ... ... How this film could be classified as Drama, I ...\n","48 Preston Sturgis' THE POWER AND THE GLORY was u... ... Preston Sturgis' THE POWER AND THE GLORY was u...\n","49 Average (and surprisingly tame) Fulci giallo w... ... Average (and surprisingly tame) Fulci giallo w...\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1609469926255,"user_tz":-300,"elapsed":140492,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"64f54fdd-699a-4559-f6e4-74b7b5f3e92e"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(120) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.85 0.81 0.83 1234\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.87 0.79 0.83 1266\n","\n"," accuracy 0.80 2500\n"," macro avg 0.57 0.54 0.55 2500\n","weighted avg 0.86 0.80 0.83 2500\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1609470097011,"user_tz":-300,"elapsed":170766,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"59899be4-e33a-4b5e-ff37-df6a9a3994b2"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":127},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1609470112616,"user_tz":-300,"elapsed":15622,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cbe7fc37-7794-4c28-d1de-5ba88d3db58b"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It was one of the best films i have ever watched in my entire life !!')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentimenten_embed_sentence_small_bert_L12_768_embeddingssentiment_confidencedocument
origin_index
0positive[0.09222018718719482, 0.11720675230026245, 0.1...0.999543It was one of the best films i have ever watch...
\n","
"],"text/plain":[" sentiment ... document\n","origin_index ... \n","0 positive ... It was one of the best films i have ever watch...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1609470112618,"user_tz":-300,"elapsed":17,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c7467f93-d619-470f-fd40-c2be1805b83f"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_IMDB.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_IMDB.ipynb)\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 class IMDB Movie sentiment classifier training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","You can achieve these results or even better on this dataset with training data:\n","\n","\n","
\n","\n","\n","![image.png]()\n","\n","\n","\n","You can achieve these results or even better on this dataset with test data:\n","\n","\n","
\n","\n","\n","![image.png]()\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null \n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download IMDB dataset\n","https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews\n","\n","IMDB dataset having 50K movie reviews for natural language processing or Text analytics.\n","This is a dataset for binary sentiment classification containing substantially more data than previous benchmark datasets. We provide a set of 25,000 highly polar movie reviews for training and 25,000 for testing. So, predict the number of positive and negative reviews using either classification or deep learning algorithms.\n","For more dataset information, please go through the following link,\n","http://ai.stanford.edu/~amaas/data/sentiment/"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1614562823085,"user_tz":-300,"elapsed":60044,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d0eb4cc4-8270-41eb-e81d-b013c9567256"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/IMDB-Dataset.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-03-01 01:39:07-- http://ckl-it.de/wp-content/uploads/2021/01/IMDB-Dataset.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 3288450 (3.1M) [text/csv]\n","Saving to: ‘IMDB-Dataset.csv’\n","\n","IMDB-Dataset.csv 100%[===================>] 3.14M 2.26MB/s in 1.4s \n","\n","2021-03-01 01:39:09 (2.26 MB/s) - ‘IMDB-Dataset.csv’ saved [3288450/3288450]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1614562823089,"user_tz":-300,"elapsed":60038,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"924e9c0f-514e-45de-9e20-34c099c369f0"},"source":["import pandas as pd\n","train_path = '/content/IMDB-Dataset.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","from sklearn.model_selection import train_test_split\n","\n","train_df, test_df = train_test_split(train_df, test_size=0.2)\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
538Sixth escapade for Freddy Krueger in which he ...negative
725As a long-time fan of all the Star Trek series...negative
219Ed Wood rides again. The fact that this movie ...negative
2302There aren't many good things to say at all ab...negative
2156A humorous voyage into the normally somber fun...positive
.........
339I find the critique of many IMDb users a littl...negative
1838All the kids aged from 14-16 want to see this ...negative
2113Once a wise man from India once said, \"First t...positive
676I have to say this is my favorite movie of all...positive
1887A young couple -- father Ben (solid Charles Ba...positive
\n","

2000 rows × 2 columns

\n","
"],"text/plain":[" text y\n","538 Sixth escapade for Freddy Krueger in which he ... negative\n","725 As a long-time fan of all the Star Trek series... negative\n","219 Ed Wood rides again. The fact that this movie ... negative\n","2302 There aren't many good things to say at all ab... negative\n","2156 A humorous voyage into the normally somber fun... positive\n","... ... ...\n","339 I find the critique of many IMDb users a littl... negative\n","1838 All the kids aged from 14-16 want to see this ... negative\n","2113 Once a wise man from India once said, \"First t... positive\n","676 I have to say this is my favorite movie of all... positive\n","1887 A young couple -- father Ben (solid Charles Ba... positive\n","\n","[2000 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1614562949762,"user_tz":-300,"elapsed":186700,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"02d62d97-0d74-4901-aa59-044a24325cbc"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 1.00 0.75 0.86 24\n"," positive 0.81 1.00 0.90 26\n","\n"," accuracy 0.88 50\n"," macro avg 0.91 0.88 0.88 50\n","weighted avg 0.90 0.88 0.88 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentimentdefault_name_embeddingstextdocumentysentiment_confidence
origin_index
538positive[0.061155349016189575, -0.04764186963438988, -...Sixth escapade for Freddy Krueger in which he ...Sixth escapade for Freddy Krueger in which he ...negative0.661084
725positive[-0.04899926111102104, 0.04536202922463417, -0...As a long-time fan of all the Star Trek series...As a long-time fan of all the Star Trek series...negative0.819501
219negative[0.06203395500779152, -0.015943678095936775, -...Ed Wood rides again. The fact that this movie ...Ed Wood rides again. The fact that this movie ...negative0.990434
2302negative[0.05896253511309624, 0.012287295423448086, -0...There aren't many good things to say at all ab...There aren't many good things to say at all ab...negative0.834288
2156positive[-0.05029047280550003, -0.002712259767577052, ...A humorous voyage into the normally somber fun...A humorous voyage into the normally somber fun...positive0.995070
950positive[0.04065236821770668, 0.006623167544603348, 0....What a good movie! At last a picture revealing...What a good movie! At last a picture revealing...positive0.992898
2454negative[0.053874578326940536, -0.050784021615982056, ...Deep Shock plays out like a TV movie: a whole ...Deep Shock plays out like a TV movie: a whole ...negative0.994428
585positive[0.005484528839588165, -0.003261507721617818, ...A drifter looking for a job is mistaken for a ...A drifter looking for a job is mistaken for a ...positive0.933213
1658positive[0.0026102750562131405, -0.028329210355877876,...Yes, I know that this movie is meant as a come...Yes, I know that this movie is meant as a come...positive0.862425
699positive[0.0034183745738118887, 0.028293922543525696, ...I decided to watch this movie because I'd not ...I decided to watch this movie because I'd not ...negative0.965151
2250positive[-0.026181913912296295, -0.04428006708621979, ...I had heard good things about this film and wa...I had heard good things about this film and wa...positive0.980683
1128negative[0.007198462262749672, 0.002508964156731963, -...This is the worst movie I have ever seen. The ...This is the worst movie I have ever seen. The ...negative0.983800
1092negative[0.0014401263324543834, 0.042675577104091644, ...When I first saw it 9 years ago, when I was 9....When I first saw it 9 years ago, when I was 9....negative0.972695
1786positive[-0.04156527668237686, -0.02018941566348076, -...Excellent endearing film with Peter Falk and P...Excellent endearing film with Peter Falk and P...positive0.997251
354negative[-0.038548123091459274, -0.05298566818237305, ...Like the previous poster, I am from northern V...Like the previous poster, I am from northern V...negative0.977224
97negative[0.05253484845161438, 0.003816538257524371, -0...Well, I like to watch bad horror B-Movies, cau...Well, I like to watch bad horror B-Movies, cau...negative0.995163
1339positive[0.00035910092992708087, -0.03019331395626068,...At a risk of sounding slightly sacrilegious, o...At a risk of sounding slightly sacrilegious, o...positive0.997966
1648positive[0.046263981610536575, 0.05645913630723953, -0...My nose is bent slightly out of shape as I wri...My nose is bent slightly out of shape as I wri...negative0.898811
443negative[0.06970927119255066, -0.03712767735123634, -0...I had been amazed by director Antal's Kontroll...I had been amazed by director Antal's Kontroll...negative0.985158
1968positive[0.02333565428853035, -0.024841919541358948, -...The first time I saw this movie, it didn't see...The first time I saw this movie, it didn't see...positive0.955286
157positive[0.006765617057681084, 0.017314564436674118, -...I've always enjoyed films that depict life as ...I've always enjoyed films that depict life as ...positive0.991408
909positive[0.046574220061302185, -0.06137171760201454, -...I had never seen a film by John Cassavetes up ...I had never seen a film by John Cassavetes up ...positive0.971099
2003positive[0.0015181098133325577, -0.06557010114192963, ...This Showtime cable film features a talented c...This Showtime cable film features a talented c...positive0.898893
2186positive[0.03471103683114052, 0.06530531495809555, 0.0...My cable TV has what's called the Arts channel...My cable TV has what's called the Arts channel...positive0.999113
1109positive[-0.012417787685990334, 0.0360068641602993, -0...I have not read the book that this was based u...I have not read the book that this was based u...positive0.999633
1520positive[-0.010225639678537846, 0.017600659281015396, ...A Murder investigation goes on back stage whil...A Murder investigation goes on back stage whil...positive0.998053
1405positive[-0.038452234119176865, 0.0006458977586589754,...Absolutely stunning, warmth for the head and t...Absolutely stunning, warmth for the head and t...positive0.969840
1936positive[-0.03630257025361061, -0.04945565387606621, -...This story had a good plot to it about four el...This story had a good plot to it about four el...negative0.715630
1009positive[-0.0012479485012590885, -0.018733300268650055...I am a huge fan of Harald Zwart, and I just kn...I am a huge fan of Harald Zwart, and I just kn...positive0.979062
2284positive[0.05055391415953636, 0.01362769864499569, -0....Okay, there are a ton of reviews here, what ca...Okay, there are a ton of reviews here, what ca...positive0.998871
2241positive[-0.06763697415590286, 0.031879279762506485, 0...At the time that this movie was made most hous...At the time that this movie was made most hous...positive0.997658
2259negative[0.057414233684539795, -0.02066304162144661, 0...Okul\"The School\" is a result of a new trend in...Okul\"The School\" is a result of a new trend in...negative0.919953
1488negative[0.048472076654434204, -0.02173914574086666, -...Really, truly, abysmally, garishly, awful. But...Really, truly, abysmally, garishly, awful. But...negative0.997300
258positive[0.02095893770456314, 0.0484045185148716, -0.0...Black comedy isn't always an easy sell. Every ...Black comedy isn't always an easy sell. Every ...positive0.994659
130positive[0.009698283858597279, -0.025106241926550865, ...Two hours ago I was watching this brilliant mo...Two hours ago I was watching this brilliant mo...positive0.968123
1404negative[0.04247045889496803, -0.06722509115934372, -0...I buy or at least watch every Seagall movie. H...I buy or at least watch every Seagall movie. H...negative0.998181
2495negative[0.05309217423200607, -0.0037488690577447414, ...Another great movie by Costa-Gavras. It's a gr...Another great movie by Costa-Gavras. It's a gr...negative0.986873
1651negative[-0.00877030473202467, -0.03184518218040466, -...Okay,I had watched this movie when I was very ...Okay,I had watched this movie when I was very ...negative0.865547
1402positive[0.05373821780085564, -0.015125636011362076, -...ROAD TO PERDITION can be summed up by Thomas N...ROAD TO PERDITION can be summed up by Thomas N...positive0.984621
1039positive[0.020128324627876282, -0.047135304659605026, ...At the height of the 'Celebrity Big Brother' r...At the height of the 'Celebrity Big Brother' r...positive0.999136
1469positive[0.007572077214717865, 0.01606682315468788, 0....Sex,Drugs,Rock & Roll is without a doubt the w...Sex,Drugs,Rock & Roll is without a doubt the w...negative0.971885
398negative[-0.05642805993556976, -0.03362744674086571, -...After a snowstorm, the roads are blocked and t...After a snowstorm, the roads are blocked and t...negative0.908734
2114negative[-0.02119983173906803, 0.04433616250753403, -0...I was very surprised to see that this movie ha...I was very surprised to see that this movie ha...negative0.990297
1317positive[0.03002399578690529, 0.017277313396334648, -0...This is an incredible film. I can't remember t...This is an incredible film. I can't remember t...positive0.978412
943positive[0.038790568709373474, -0.04091215878725052, -...A film for mature, educated audiences...<br />...A film for mature, educated audiences...<br />...positive0.965369
799positive[0.04426809027791023, 0.0171112772077322, -0.0...Thank God this wasn't based on a true story, b...Thank God this wasn't based on a true story, b...positive0.985029
882negative[0.0758691355586052, 0.029356516897678375, -0....How can so many blundering decisions can be ma...How can so many blundering decisions can be ma...negative0.995624
2151negative[0.01764845661818981, -0.03746427595615387, -0...This movie had the potential to be really good...This movie had the potential to be really good...negative0.965565
1498positive[0.05424240231513977, -0.05653670057654381, -0...According to John Ford's lyrically shot, ficti...According to John Ford's lyrically shot, ficti...positive0.993533
674negative[0.032531678676605225, -0.00041807559318840504...worst. movie. ever made. EVER. I have no words...worst. movie. ever made. EVER. I have no words...negative0.997371
\n","
"],"text/plain":[" sentiment ... sentiment_confidence\n","origin_index ... \n","538 positive ... 0.661084\n","725 positive ... 0.819501\n","219 negative ... 0.990434\n","2302 negative ... 0.834288\n","2156 positive ... 0.995070\n","950 positive ... 0.992898\n","2454 negative ... 0.994428\n","585 positive ... 0.933213\n","1658 positive ... 0.862425\n","699 positive ... 0.965151\n","2250 positive ... 0.980683\n","1128 negative ... 0.983800\n","1092 negative ... 0.972695\n","1786 positive ... 0.997251\n","354 negative ... 0.977224\n","97 negative ... 0.995163\n","1339 positive ... 0.997966\n","1648 positive ... 0.898811\n","443 negative ... 0.985158\n","1968 positive ... 0.955286\n","157 positive ... 0.991408\n","909 positive ... 0.971099\n","2003 positive ... 0.898893\n","2186 positive ... 0.999113\n","1109 positive ... 0.999633\n","1520 positive ... 0.998053\n","1405 positive ... 0.969840\n","1936 positive ... 0.715630\n","1009 positive ... 0.979062\n","2284 positive ... 0.998871\n","2241 positive ... 0.997658\n","2259 negative ... 0.919953\n","1488 negative ... 0.997300\n","258 positive ... 0.994659\n","130 positive ... 0.968123\n","1404 negative ... 0.998181\n","2495 negative ... 0.986873\n","1651 negative ... 0.865547\n","1402 positive ... 0.984621\n","1039 positive ... 0.999136\n","1469 positive ... 0.971885\n","398 negative ... 0.908734\n","2114 negative ... 0.990297\n","1317 positive ... 0.978412\n","943 positive ... 0.965369\n","799 positive ... 0.985029\n","882 negative ... 0.995624\n","2151 negative ... 0.965565\n","1498 positive ... 0.993533\n","674 negative ... 0.997371\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# 4. Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1614562952398,"user_tz":-300,"elapsed":189319,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8f45e84e-6cf3-4d59-ff3f-d2a2ed1ff515"},"source":["fitted_pipe.predict('It was one of the best films i have ever watched in my entire life !!')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentimentdefault_name_embeddingsdocumentsentiment_confidence
origin_index
0negative[-0.022810865193605423, 0.015739120543003082, ...It was one of the best films i have ever watch...0.629151
\n","
"],"text/plain":[" sentiment ... sentiment_confidence\n","origin_index ... \n","0 negative ... 0.629151\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## 5. Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1614562952406,"user_tz":-300,"elapsed":189316,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"57c6cd93-f560-492f-dbd2-21ebabecb283"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setLoadSP(False) | Info: Whether to load SentencePiece ops file which is required only by multi-lingual models. This is not changeable after it's set with a pretrained model nor it is compatible with Windows. | Currently set to : False\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## 6. Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1614562959643,"user_tz":-300,"elapsed":196544,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cb915df3-b2e1-47ef-c690-262f6d9074ff"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.96 0.96 0.96 24\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.96 0.88 0.92 26\n","\n"," accuracy 0.92 50\n"," macro avg 0.64 0.61 0.63 50\n","weighted avg 0.96 0.92 0.94 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentimentdefault_name_embeddingstextdocumentysentiment_confidence
origin_index
538negative[0.061155349016189575, -0.04764186963438988, -...Sixth escapade for Freddy Krueger in which he ...Sixth escapade for Freddy Krueger in which he ...negative0.736202
725negative[-0.04899926111102104, 0.04536202922463417, -0...As a long-time fan of all the Star Trek series...As a long-time fan of all the Star Trek series...negative0.829032
219negative[0.06203395500779152, -0.015943678095936775, -...Ed Wood rides again. The fact that this movie ...Ed Wood rides again. The fact that this movie ...negative0.937502
2302negative[0.05896253511309624, 0.012287295423448086, -0...There aren't many good things to say at all ab...There aren't many good things to say at all ab...negative0.897272
2156positive[-0.05029047280550003, -0.002712259767577052, ...A humorous voyage into the normally somber fun...A humorous voyage into the normally somber fun...positive0.948001
950positive[0.04065236821770668, 0.006623167544603348, 0....What a good movie! At last a picture revealing...What a good movie! At last a picture revealing...positive0.874555
2454negative[0.053874578326940536, -0.050784021615982056, ...Deep Shock plays out like a TV movie: a whole ...Deep Shock plays out like a TV movie: a whole ...negative0.971650
585positive[0.005484528839588165, -0.003261507721617818, ...A drifter looking for a job is mistaken for a ...A drifter looking for a job is mistaken for a ...positive0.753176
1658negative[0.0026102750562131405, -0.028329210355877876,...Yes, I know that this movie is meant as a come...Yes, I know that this movie is meant as a come...positive0.767478
699positive[0.0034183745738118887, 0.028293922543525696, ...I decided to watch this movie because I'd not ...I decided to watch this movie because I'd not ...negative0.840567
2250positive[-0.026181913912296295, -0.04428006708621979, ...I had heard good things about this film and wa...I had heard good things about this film and wa...positive0.658274
1128negative[0.007198462262749672, 0.002508964156731963, -...This is the worst movie I have ever seen. The ...This is the worst movie I have ever seen. The ...negative0.946714
1092negative[0.0014401263324543834, 0.042675577104091644, ...When I first saw it 9 years ago, when I was 9....When I first saw it 9 years ago, when I was 9....negative0.946290
1786positive[-0.04156527668237686, -0.02018941566348076, -...Excellent endearing film with Peter Falk and P...Excellent endearing film with Peter Falk and P...positive0.959468
354negative[-0.038548123091459274, -0.05298566818237305, ...Like the previous poster, I am from northern V...Like the previous poster, I am from northern V...negative0.848761
97negative[0.05253484845161438, 0.003816538257524371, -0...Well, I like to watch bad horror B-Movies, cau...Well, I like to watch bad horror B-Movies, cau...negative0.970629
1339positive[0.00035910092992708087, -0.03019331395626068,...At a risk of sounding slightly sacrilegious, o...At a risk of sounding slightly sacrilegious, o...positive0.906463
1648negative[0.046263981610536575, 0.05645913630723953, -0...My nose is bent slightly out of shape as I wri...My nose is bent slightly out of shape as I wri...negative0.749613
443negative[0.06970927119255066, -0.03712767735123634, -0...I had been amazed by director Antal's Kontroll...I had been amazed by director Antal's Kontroll...negative0.954385
1968neutral[0.02333565428853035, -0.024841919541358948, -...The first time I saw this movie, it didn't see...The first time I saw this movie, it didn't see...positive0.574081
157positive[0.006765617057681084, 0.017314564436674118, -...I've always enjoyed films that depict life as ...I've always enjoyed films that depict life as ...positive0.938987
909positive[0.046574220061302185, -0.06137171760201454, -...I had never seen a film by John Cassavetes up ...I had never seen a film by John Cassavetes up ...positive0.672924
2003positive[0.0015181098133325577, -0.06557010114192963, ...This Showtime cable film features a talented c...This Showtime cable film features a talented c...positive0.662296
2186positive[0.03471103683114052, 0.06530531495809555, 0.0...My cable TV has what's called the Arts channel...My cable TV has what's called the Arts channel...positive0.956926
1109positive[-0.012417787685990334, 0.0360068641602993, -0...I have not read the book that this was based u...I have not read the book that this was based u...positive0.968361
1520positive[-0.010225639678537846, 0.017600659281015396, ...A Murder investigation goes on back stage whil...A Murder investigation goes on back stage whil...positive0.966288
1405positive[-0.038452234119176865, 0.0006458977586589754,...Absolutely stunning, warmth for the head and t...Absolutely stunning, warmth for the head and t...positive0.755360
1936negative[-0.03630257025361061, -0.04945565387606621, -...This story had a good plot to it about four el...This story had a good plot to it about four el...negative0.854486
1009positive[-0.0012479485012590885, -0.018733300268650055...I am a huge fan of Harald Zwart, and I just kn...I am a huge fan of Harald Zwart, and I just kn...positive0.859311
2284positive[0.05055391415953636, 0.01362769864499569, -0....Okay, there are a ton of reviews here, what ca...Okay, there are a ton of reviews here, what ca...positive0.947632
2241positive[-0.06763697415590286, 0.031879279762506485, 0...At the time that this movie was made most hous...At the time that this movie was made most hous...positive0.964139
2259negative[0.057414233684539795, -0.02066304162144661, 0...Okul\"The School\" is a result of a new trend in...Okul\"The School\" is a result of a new trend in...negative0.925747
1488negative[0.048472076654434204, -0.02173914574086666, -...Really, truly, abysmally, garishly, awful. But...Really, truly, abysmally, garishly, awful. But...negative0.951509
258positive[0.02095893770456314, 0.0484045185148716, -0.0...Black comedy isn't always an easy sell. Every ...Black comedy isn't always an easy sell. Every ...positive0.905956
130neutral[0.009698283858597279, -0.025106241926550865, ...Two hours ago I was watching this brilliant mo...Two hours ago I was watching this brilliant mo...positive0.582632
1404negative[0.04247045889496803, -0.06722509115934372, -0...I buy or at least watch every Seagall movie. H...I buy or at least watch every Seagall movie. H...negative0.965406
2495negative[0.05309217423200607, -0.0037488690577447414, ...Another great movie by Costa-Gavras. It's a gr...Another great movie by Costa-Gavras. It's a gr...negative0.861529
1651negative[-0.00877030473202467, -0.03184518218040466, -...Okay,I had watched this movie when I was very ...Okay,I had watched this movie when I was very ...negative0.875443
1402positive[0.05373821780085564, -0.015125636011362076, -...ROAD TO PERDITION can be summed up by Thomas N...ROAD TO PERDITION can be summed up by Thomas N...positive0.862109
1039positive[0.020128324627876282, -0.047135304659605026, ...At the height of the 'Celebrity Big Brother' r...At the height of the 'Celebrity Big Brother' r...positive0.957513
1469negative[0.007572077214717865, 0.01606682315468788, 0....Sex,Drugs,Rock & Roll is without a doubt the w...Sex,Drugs,Rock & Roll is without a doubt the w...negative0.700001
398negative[-0.05642805993556976, -0.03362744674086571, -...After a snowstorm, the roads are blocked and t...After a snowstorm, the roads are blocked and t...negative0.758657
2114negative[-0.02119983173906803, 0.04433616250753403, -0...I was very surprised to see that this movie ha...I was very surprised to see that this movie ha...negative0.942450
1317positive[0.03002399578690529, 0.017277313396334648, -0...This is an incredible film. I can't remember t...This is an incredible film. I can't remember t...positive0.871614
943positive[0.038790568709373474, -0.04091215878725052, -...A film for mature, educated audiences...<br />...A film for mature, educated audiences...<br />...positive0.858415
799positive[0.04426809027791023, 0.0171112772077322, -0.0...Thank God this wasn't based on a true story, b...Thank God this wasn't based on a true story, b...positive0.885541
882negative[0.0758691355586052, 0.029356516897678375, -0....How can so many blundering decisions can be ma...How can so many blundering decisions can be ma...negative0.954071
2151negative[0.01764845661818981, -0.03746427595615387, -0...This movie had the potential to be really good...This movie had the potential to be really good...negative0.919427
1498positive[0.05424240231513977, -0.05653670057654381, -0...According to John Ford's lyrically shot, ficti...According to John Ford's lyrically shot, ficti...positive0.880525
674negative[0.032531678676605225, -0.00041807559318840504...worst. movie. ever made. EVER. I have no words...worst. movie. ever made. EVER. I have no words...negative0.971043
\n","
"],"text/plain":[" sentiment ... sentiment_confidence\n","origin_index ... \n","538 negative ... 0.736202\n","725 negative ... 0.829032\n","219 negative ... 0.937502\n","2302 negative ... 0.897272\n","2156 positive ... 0.948001\n","950 positive ... 0.874555\n","2454 negative ... 0.971650\n","585 positive ... 0.753176\n","1658 negative ... 0.767478\n","699 positive ... 0.840567\n","2250 positive ... 0.658274\n","1128 negative ... 0.946714\n","1092 negative ... 0.946290\n","1786 positive ... 0.959468\n","354 negative ... 0.848761\n","97 negative ... 0.970629\n","1339 positive ... 0.906463\n","1648 negative ... 0.749613\n","443 negative ... 0.954385\n","1968 neutral ... 0.574081\n","157 positive ... 0.938987\n","909 positive ... 0.672924\n","2003 positive ... 0.662296\n","2186 positive ... 0.956926\n","1109 positive ... 0.968361\n","1520 positive ... 0.966288\n","1405 positive ... 0.755360\n","1936 negative ... 0.854486\n","1009 positive ... 0.859311\n","2284 positive ... 0.947632\n","2241 positive ... 0.964139\n","2259 negative ... 0.925747\n","1488 negative ... 0.951509\n","258 positive ... 0.905956\n","130 neutral ... 0.582632\n","1404 negative ... 0.965406\n","2495 negative ... 0.861529\n","1651 negative ... 0.875443\n","1402 positive ... 0.862109\n","1039 positive ... 0.957513\n","1469 negative ... 0.700001\n","398 negative ... 0.758657\n","2114 negative ... 0.942450\n","1317 positive ... 0.871614\n","943 positive ... 0.858415\n","799 positive ... 0.885541\n","882 negative ... 0.954071\n","2151 negative ... 0.919427\n","1498 positive ... 0.880525\n","674 negative ... 0.971043\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# 7. Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614562959651,"user_tz":-300,"elapsed":196546,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"54d0d93e-06dd-4b51-8d12-9200e2214431"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1614567415018,"user_tz":-300,"elapsed":4651904,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"34393e34-8b4a-4db0-f7d5-30a1dddb5963"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(120) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.86 0.80 0.82 977\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.88 0.81 0.84 1023\n","\n"," accuracy 0.80 2000\n"," macro avg 0.58 0.53 0.55 2000\n","weighted avg 0.87 0.80 0.83 2000\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"_1jxw3GnVGlI"},"source":["# 7.1 evaluate on Test Data"]},{"cell_type":"code","metadata":{"id":"Fxx4yNkNVGFl","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614568329258,"user_tz":-300,"elapsed":5566142,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6ae16887-deab-4ec5-ae9a-2ef0285aef07"},"source":["preds = fitted_pipe.predict(test_df,output_level='document')\r\n","\r\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\r\n","preds.dropna(inplace=True)\r\n","print(classification_report(preds['y'], preds['sentiment']))"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.80 0.74 0.77 257\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.81 0.72 0.76 243\n","\n"," accuracy 0.73 500\n"," macro avg 0.54 0.49 0.51 500\n","weighted avg 0.80 0.73 0.77 500\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 8. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1609470097011,"user_tz":-300,"elapsed":170766,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"59899be4-e33a-4b5e-ff37-df6a9a3994b2"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 9. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":127},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1609470112616,"user_tz":-300,"elapsed":15622,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cbe7fc37-7794-4c28-d1de-5ba88d3db58b"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It was one of the best films i have ever watched in my entire life !!')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentimenten_embed_sentence_small_bert_L12_768_embeddingssentiment_confidencedocument
origin_index
0positive[0.09222018718719482, 0.11720675230026245, 0.1...0.999543It was one of the best films i have ever watch...
\n","
"],"text/plain":[" sentiment ... document\n","origin_index ... \n","0 positive ... It was one of the best films i have ever watch...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1609470112618,"user_tz":-300,"elapsed":17,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c7467f93-d619-470f-fd40-c2be1805b83f"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_apple_twitter.ipynb b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_apple_twitter.ipynb index 3808340d..49f597ef 100644 --- a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_apple_twitter.ipynb +++ b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_apple_twitter.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_apple_twitter.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"RIV-9vEqxTBB"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\r\n","\r\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_apple_twitter.ipynb)\r\n","\r\n","\r\n","\r\n","# Training a Sentiment Analysis Classifier with NLU \r\n","## 2 class Apple Tweets sentiment classifier training\r\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \r\n","\r\n","This notebook showcases the following features : \r\n","\r\n","- How to train the deep learning classifier\r\n","- How to store a pipeline to disk\r\n","- How to load the pipeline from disk (Enables NLU offline mode)\r\n","\r\n"]},{"cell_type":"code","metadata":{"id":"05-mAOF6ol-0"},"source":["import os\r\n","from sklearn.metrics import classification_report\r\n","! apt-get update -qq > /dev/null \r\n","# Install java\r\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\r\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\r\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\r\n","! pip install nlu pyspark==2.4.7 > /dev/null \r\n","\r\n","\r\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download appple twitter Sentiment dataset \n","https://www.kaggle.com/seriousran/appletwittersentimenttexts\n","\n","this dataset contains tweets made towards apple and today we are going to train our model to predict whether the tweet contains sentiment!\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1609468082890,"user_tz":-300,"elapsed":77740,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a791d4cf-bfa3-4cc6-a60d-c885afe2e917"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/apple-twitter-sentiment-texts.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-01 02:27:38-- https://raw.githubusercontent.com/ahmedlone127/nlu-master/main/apple-twitter-sentiment-texts.csv\n","Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n","Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 31678 (31K) [text/plain]\n","Saving to: ‘apple-twitter-sentiment-texts.csv’\n","\n","apple-twitter-senti 100%[===================>] 30.94K --.-KB/s in 0.002s \n","\n","2021-01-01 02:27:39 (12.9 MB/s) - ‘apple-twitter-sentiment-texts.csv’ saved [31678/31678]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1609468083287,"user_tz":-300,"elapsed":78124,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1a23969f-abf0-4bc3-e2ec-0879b2b77cad"},"source":["import pandas as pd\n","train_path = '/content/apple-twitter-sentiment-texts.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df = train_df[~train_df[\"y\"].isin([\"neuteral\"])]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
0@Apple you need to sort your phones out.negative
1Wow. Yall needa step it up @Apple RT @heynyla:...negative
2I'm surprised there isn't more talk about what...negative
3Realised the reason @apple make huge phones is...negative
4Apple Inc. CEO Donates $291K To Pennsylvania S...positive
.........
281@apple so thanks for being greedy assholes who...negative
282@apple iCal AGAIN!!! it reset all my recurring...negative
283Just did my first transaction with @Apple Pay ...positive
284RT @JPDesloges: Kantar Worldpanel: iPhone sale...positive
285Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...positive
\n","

286 rows × 2 columns

\n","
"],"text/plain":[" text y\n","0 @Apple you need to sort your phones out. negative\n","1 Wow. Yall needa step it up @Apple RT @heynyla:... negative\n","2 I'm surprised there isn't more talk about what... negative\n","3 Realised the reason @apple make huge phones is... negative\n","4 Apple Inc. CEO Donates $291K To Pennsylvania S... positive\n",".. ... ...\n","281 @apple so thanks for being greedy assholes who... negative\n","282 @apple iCal AGAIN!!! it reset all my recurring... negative\n","283 Just did my first transaction with @Apple Pay ... positive\n","284 RT @JPDesloges: Kantar Worldpanel: iPhone sale... positive\n","285 Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc... positive\n","\n","[286 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":845},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609468191792,"user_tz":-300,"elapsed":186618,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"41d18f44-64e1-4766-a8cf-4545813930d7"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.91 0.80 0.85 143\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.82 0.91 0.86 143\n","\n"," accuracy 0.86 286\n"," macro avg 0.58 0.57 0.57 286\n","weighted avg 0.86 0.86 0.86 286\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidenceydefault_name_embeddingstextdocumentsentiment
origin_index
00.998447negative[-0.01731022447347641, 0.010604134760797024, -...@Apple you need to sort your phones out.@Apple you need to sort your phones out.negative
10.990570negative[0.019931159913539886, -0.04991159215569496, -...Wow. Yall needa step it up @Apple RT @heynyla:...Wow. Yall needa step it up @Apple RT @heynyla:...positive
20.969844negative[0.01646081730723381, -0.02681073546409607, -0...I'm surprised there isn't more talk about what...I'm surprised there isn't more talk about what...negative
30.996128negative[0.04638500511646271, -0.037105873227119446, -...Realised the reason @apple make huge phones is...Realised the reason @apple make huge phones is...negative
40.959235positive[-0.028623634949326515, 0.03947276994585991, -...Apple Inc. CEO Donates $291K To Pennsylvania S...Apple Inc. CEO Donates $291K To Pennsylvania S...positive
.....................
2810.978435negative[0.03778046742081642, 0.03407461196184158, 0.0...@apple so thanks for being greedy assholes who...@apple so thanks for being greedy assholes who...negative
2820.623791negative[-0.013547728769481182, -0.001025827950797975,...@apple iCal AGAIN!!! it reset all my recurring...@apple iCal AGAIN!!! it reset all my recurring...positive
2830.999104positive[-0.0015363194979727268, -0.01644994132220745,...Just did my first transaction with @Apple Pay ...Just did my first transaction with @Apple Pay ...positive
2840.999854positive[0.0656985342502594, 0.028557728976011276, -0....RT @JPDesloges: Kantar Worldpanel: iPhone sale...RT @JPDesloges: Kantar Worldpanel: iPhone sale...positive
2850.983244positive[0.02311933971941471, 0.05785432830452919, -0....Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...positive
\n","

286 rows × 6 columns

\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.998447 ... negative\n","1 0.990570 ... positive\n","2 0.969844 ... negative\n","3 0.996128 ... negative\n","4 0.959235 ... positive\n","... ... ... ...\n","281 0.978435 ... negative\n","282 0.623791 ... positive\n","283 0.999104 ... positive\n","284 0.999854 ... positive\n","285 0.983244 ... positive\n","\n","[286 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"id":"qdCUg2MR0PD2","colab":{"base_uri":"https://localhost:8080/","height":110},"executionInfo":{"status":"ok","timestamp":1609468194339,"user_tz":-300,"elapsed":189158,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"00d8c7b6-22e1-4979-8c51-58471540a3dd"},"source":["fitted_pipe.predict('I hate the newest update')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedefault_name_embeddingsdocumentsentiment
origin_index
00.996097[0.06468033790588379, -0.040837567299604416, -...Bitcoin is going to the moon!positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.996097 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"id":"UtsAUGTmOTms","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468194341,"user_tz":-300,"elapsed":189154,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3ab00ec5-5894-400f-c6c9-e32099fed1f5"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"id":"mptfvHx-MMMX","colab":{"base_uri":"https://localhost:8080/","height":793},"executionInfo":{"status":"ok","timestamp":1609468205048,"user_tz":-300,"elapsed":199854,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"9c9a1628-3034-4be0-94bc-7c109d2c3263"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.96 0.85 0.90 143\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.87 0.95 0.91 143\n","\n"," accuracy 0.90 286\n"," macro avg 0.61 0.60 0.60 286\n","weighted avg 0.92 0.90 0.91 286\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidenceydefault_name_embeddingstextdocumentsentiment
origin_index
00.999738negative[-0.01731022447347641, 0.010604134760797024, -...@Apple you need to sort your phones out.@Apple you need to sort your phones out.negative
10.937319negative[0.019931159913539886, -0.04991159215569496, -...Wow. Yall needa step it up @Apple RT @heynyla:...Wow. Yall needa step it up @Apple RT @heynyla:...positive
20.974594negative[0.01646081730723381, -0.02681073546409607, -0...I'm surprised there isn't more talk about what...I'm surprised there isn't more talk about what...negative
30.997196negative[0.04638500511646271, -0.037105873227119446, -...Realised the reason @apple make huge phones is...Realised the reason @apple make huge phones is...negative
40.709098positive[-0.028623634949326515, 0.03947276994585991, -...Apple Inc. CEO Donates $291K To Pennsylvania S...Apple Inc. CEO Donates $291K To Pennsylvania S...positive
.....................
2810.984257negative[0.03778046742081642, 0.03407461196184158, 0.0...@apple so thanks for being greedy assholes who...@apple so thanks for being greedy assholes who...negative
2820.904880negative[-0.013547728769481182, -0.001025827950797975,...@apple iCal AGAIN!!! it reset all my recurring...@apple iCal AGAIN!!! it reset all my recurring...negative
2830.995687positive[-0.0015363194979727268, -0.01644994132220745,...Just did my first transaction with @Apple Pay ...Just did my first transaction with @Apple Pay ...positive
2840.998746positive[0.0656985342502594, 0.028557728976011276, -0....RT @JPDesloges: Kantar Worldpanel: iPhone sale...RT @JPDesloges: Kantar Worldpanel: iPhone sale...positive
2850.710708positive[0.02311933971941471, 0.05785432830452919, -0....Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...positive
\n","

286 rows × 6 columns

\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.999738 ... negative\n","1 0.937319 ... positive\n","2 0.974594 ... negative\n","3 0.997196 ... negative\n","4 0.709098 ... positive\n","... ... ... ...\n","281 0.984257 ... negative\n","282 0.904880 ... negative\n","283 0.995687 ... positive\n","284 0.998746 ... positive\n","285 0.710708 ... positive\n","\n","[286 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468205058,"user_tz":-300,"elapsed":199858,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"11560398-8fb9-4110-aed3-f7d9c1f71268"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468415116,"user_tz":-300,"elapsed":409908,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b8e4f245-595a-40f3-9e1d-76f71e76b74e"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(110) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.96 0.85 0.90 143\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.92 0.92 0.92 143\n","\n"," accuracy 0.88 286\n"," macro avg 0.63 0.59 0.61 286\n","weighted avg 0.94 0.88 0.91 286\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"bZZpObLOtqo8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468632998,"user_tz":-300,"elapsed":627783,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e6a87d34-ce84-4968-c3a0-9aade476874b"},"source":["stored_model_path = './models/classifier_dl_trained' \r\n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":127},"executionInfo":{"status":"ok","timestamp":1609468646911,"user_tz":-300,"elapsed":641690,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"454b2c7d-7c32-4cc2-cf25-a52b5a879abd"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('I hate the newest update')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidenceen_embed_sentence_small_bert_L12_768_embeddingsdocumentsentiment
origin_index
00.974083[-0.058236218988895416, -0.3061041235923767, 0...I hate itnegative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.974083 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468646914,"user_tz":-300,"elapsed":641685,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"80ce5918-3803-45f4-e10f-300144342295"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_apple_twitter.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"RIV-9vEqxTBB"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\r\n","\r\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_apple_twitter.ipynb)\r\n","\r\n","\r\n","\r\n","# Training a Sentiment Analysis Classifier with NLU \r\n","## 2 class Apple Tweets sentiment classifier training\r\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \r\n","\r\n","This notebook showcases the following features : \r\n","\r\n","- How to train the deep learning classifier\r\n","- How to store a pipeline to disk\r\n","- How to load the pipeline from disk (Enables NLU offline mode)\r\n","\r\n","You can achieve these results or even better on this dataset with training data:\r\n","\r\n","
\r\n","\r\n","![image.png]()\r\n","\r\n","You can achieve these results or even better on this dataset with test data:\r\n","\r\n","\r\n","\r\n","
\r\n","\r\n","\r\n","![image.png]()\r\n","\r\n"]},{"cell_type":"code","metadata":{"id":"05-mAOF6ol-0"},"source":["import os\r\n","from sklearn.metrics import classification_report\r\n","! apt-get update -qq > /dev/null \r\n","# Install java\r\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\r\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\r\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\r\n","! pip install nlu pyspark==2.4.7 > /dev/null \r\n","\r\n","\r\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download appple twitter Sentiment dataset \n","https://www.kaggle.com/seriousran/appletwittersentimenttexts\n","\n","this dataset contains tweets made towards apple and today we are going to train our model to predict whether the tweet contains sentiment!\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1614562965668,"user_tz":-300,"elapsed":58726,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1e8381bf-8581-48d3-f6e0-28f3db6e5144"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/apple-twitter-sentiment-texts.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-03-01 01:41:31-- http://ckl-it.de/wp-content/uploads/2021/01/apple-twitter-sentiment-texts.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 31678 (31K) [text/csv]\n","Saving to: ‘apple-twitter-sentiment-texts.csv’\n","\n","apple-twitter-senti 100%[===================>] 30.94K 149KB/s in 0.2s \n","\n","2021-03-01 01:41:31 (149 KB/s) - ‘apple-twitter-sentiment-texts.csv’ saved [31678/31678]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1614562965670,"user_tz":-300,"elapsed":58717,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a6eb98f9-621a-4703-b8b0-2a2ca8396578"},"source":["import pandas as pd\n","train_path = '/content/apple-twitter-sentiment-texts.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df = train_df[~train_df[\"y\"].isin([\"neuteral\"])]\n","from sklearn.model_selection import train_test_split\n","\n","train_df, test_df = train_test_split(train_df, test_size=0.2)\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
194@fullcircleone ThanX! Big @Apple ThanX goes 2 ...positive
279Can't believe @Apple said melted keys on Mac P...negative
259@apple had a dream last night where I downgrad...negative
148RT @paisley_smithh: Real tired of my charger b...negative
158@OneRepublic @Apple THIS IS SO BEAUTIFULpositive
.........
797 awesome paid #iPhone and #iPad apps that are...positive
166@apple is laughing its pants off with the scho...negative
276@jimmymujaj @Apple it works again fucker, not ...negative
131@apple Why is your NYC Grand Central store so ...negative
178RT @saigeist: the most offensive thing is the ...negative
\n","

228 rows × 2 columns

\n","
"],"text/plain":[" text y\n","194 @fullcircleone ThanX! Big @Apple ThanX goes 2 ... positive\n","279 Can't believe @Apple said melted keys on Mac P... negative\n","259 @apple had a dream last night where I downgrad... negative\n","148 RT @paisley_smithh: Real tired of my charger b... negative\n","158 @OneRepublic @Apple THIS IS SO BEAUTIFUL positive\n",".. ... ...\n","79 7 awesome paid #iPhone and #iPad apps that are... positive\n","166 @apple is laughing its pants off with the scho... negative\n","276 @jimmymujaj @Apple it works again fucker, not ... negative\n","131 @apple Why is your NYC Grand Central store so ... negative\n","178 RT @saigeist: the most offensive thing is the ... negative\n","\n","[228 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1614563092881,"user_tz":-300,"elapsed":185911,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"29427741-2ffa-4036-8ddb-9e877afbb44d"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.94 0.59 0.73 27\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.95 0.87 0.91 23\n","\n"," accuracy 0.72 50\n"," macro avg 0.63 0.49 0.55 50\n","weighted avg 0.95 0.72 0.81 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
default_name_embeddingssentimenttextsentiment_confidenceydocument
origin_index
194[0.0020813194569200277, -0.025028454139828682,...positive@fullcircleone ThanX! Big @Apple ThanX goes 2 ...0.843989positive@fullcircleone ThanX! Big @Apple ThanX goes 2 ...
279[0.05548453330993652, 0.06619943678379059, -0....negativeCan't believe @Apple said melted keys on Mac P...0.682702negativeCan't believe @Apple said melted keys on Mac P...
259[0.06910835206508636, -0.052032221108675, -0.0...negative@apple had a dream last night where I downgrad...0.657026negative@apple had a dream last night where I downgrad...
148[0.059678513556718826, 0.06913778185844421, -0...neutralRT @paisley_smithh: Real tired of my charger b...0.590428negativeRT @paisley_smithh: Real tired of my charger b...
158[0.011856582947075367, 0.016805896535515785, -...positive@OneRepublic @Apple THIS IS SO BEAUTIFUL0.708730positive@OneRepublic @Apple THIS IS SO BEAUTIFUL
224[0.023869305849075317, -0.033373694866895676, ...positive@stevewoz @Outback Personally I am excited abo...0.614970positive@stevewoz @Outback Personally I am excited abo...
142[0.008206114172935486, 0.0042152707464993, -0....neutralThese Damn @Apple Commercials Are Getting Wors...0.563933negativeThese Damn @Apple Commercials Are Getting Wors...
23[0.044899821281433105, 0.033001091331243515, 0...positiveThanks @Apple store @RobinaTC for helping me o...0.815056positiveThanks @Apple store @RobinaTC for helping me o...
24[0.06401650607585907, 0.02939487248659134, -0....positive#iPhone5s is the best mobile that I have had. ...0.772757positive#iPhone5s is the best mobile that I have had. ...
97[0.036246202886104584, 0.014746210537850857, -...negativefuck type of shit is this @Apple @autocorrect ...0.633962negativefuck type of shit is this @Apple @autocorrect ...
267[-0.025635968893766403, 0.026251723989844322, ...negativehey @apple my 5s keeps glitching :/0.719015negativehey @apple my 5s keeps glitching :/
68[-0.017101608216762543, 0.0018259129719808698,...negativeFucking goddamn stupid @apple @iTunes , ur too...0.729995negativeFucking goddamn stupid @apple @iTunes , ur too...
55[0.008575155399739742, 0.015347552485764027, 0...neutral@apple please get yourself together! I need my...0.542997negative@apple please get yourself together! I need my...
239[0.038337450474500656, 0.042477983981370926, -...negativewaiting a week for a DUNS number is preventing...0.643586negativewaiting a week for a DUNS number is preventing...
274[0.031279537826776505, 0.039565060287714005, 0...negativeDear @apple replace my phone my battery sucks0.709939negativeDear @apple replace my phone my battery sucks
99[0.005306210368871689, 0.00793572049587965, 0....positive@OneRepublic @Apple You all are ready to go. #...0.805694positive@OneRepublic @Apple You all are ready to go. #...
181[0.05127181485295296, 0.03388502821326256, -0....negative@CharlesJMeyer @Apple @Appy_Geek Hasn't Apple ...0.605825negative@CharlesJMeyer @Apple @Appy_Geek Hasn't Apple ...
172[0.02794131077826023, 0.008706841617822647, 0....negative@Apple This was NORMAL use-- NO ABUSE! Wrapped...0.738088negative@Apple This was NORMAL use-- NO ABUSE! Wrapped...
58[0.03853776305913925, -0.003850174369290471, -...neutralYO YOU AINT SHIT @apple0.567957negativeYO YOU AINT SHIT @apple
229[0.054637834429740906, 0.023656053468585014, -...positive@Apple's share of UK smartphone sales soared t...0.818830positive@Apple's share of UK smartphone sales soared t...
168[0.0441458635032177, 0.03536572307348251, -0.0...neutral@brwnskin_beauti cause fuck @apple0.561626negative@brwnskin_beauti cause fuck @apple
84[-0.02587798982858658, 0.03256731107831001, -0...negativeiTunes is pissing me tf off @apple0.758408negativeiTunes is pissing me tf off @apple
171[0.007304496597498655, -0.00493069039657712, -...positiveThank you @apple0.736227positiveThank you @apple
250[0.07199912518262863, 0.0021514107938855886, -...positiveThe king of the phablets! Apple's iPhone 6 plu...0.779995positiveThe king of the phablets! Apple's iPhone 6 plu...
7[0.01400269940495491, 0.04662228375673294, 0.0...neutralThank you @Apple for fixing the #Swift sourcek...0.555058positiveThank you @Apple for fixing the #Swift sourcek...
30[0.04305626079440117, 0.0728026032447815, -0.0...positiveThank you @apple #AppleSantaMonica for fixing ...0.687004positiveThank you @apple #AppleSantaMonica for fixing ...
33[0.0709962323307991, -0.018187634646892548, -0...positiveSteve Jobs Predicted Future Of E-Commerce Back...0.829057positiveSteve Jobs Predicted Future Of E-Commerce Back...
61[0.02938288450241089, 0.02077152580022812, -0....positiveIt makes you smarter. Elevate is @apple app o...0.842974positiveIt makes you smarter. Elevate is @apple app of...
249[0.03343122825026512, 0.06543298065662384, -0....negative@apple #apple tech support is so nice. Free st...0.601036positive@apple #apple tech support is so nice. Free st...
155[0.04070044681429863, 0.02998330444097519, 0.0...negativeI hate my MacBook now. Fuck this update and fu...0.698858negativeI hate my MacBook now. Fuck this update and fu...
162[-0.025308595970273018, -0.00607736362144351, ...neutralthanks @apple for making life complicated0.569041negativethanks @apple for making life complicated
4[0.036090489476919174, 0.033749453723430634, -...positiveApple Inc. CEO Donates $291K To Pennsylvania S...0.724116positiveApple Inc. CEO Donates $291K To Pennsylvania S...
34[-0.008514916524291039, -0.02772395685315132, ...negativeFinally updated to iOS 8...this happened ha we...0.663779negativeFinally updated to iOS 8...this happened ha we...
154[-0.021790482103824615, 0.026734383776783943, ...positiveThank you to the nice lady at @Apple in #Regen...0.846050positiveThank you to the nice lady at @Apple in #Regen...
200[-0.006971406284719706, 0.024408211931586266, ...positive@OneRepublic @Apple this is awesome! Can I join?0.804950positive@OneRepublic @Apple this is awesome! Can I join?
180[0.07439534366130829, 0.02210349403321743, -0....positiveCurrently on hold with @Apple in Cupertino to ...0.633425negativeCurrently on hold with @Apple in Cupertino to ...
187[0.008138233795762062, -0.026052597910165787, ...negativemy dad called now my musics arent playing jesu...0.702004negativemy dad called now my musics arent playing jesu...
93[0.06705766916275024, 0.05627201497554779, -0....positive#apple earns more #profit in on quarter than #...0.783651positive#apple earns more #profit in on quarter than #...
189[0.040938232094049454, 0.060883838683366776, -...neutralThe customer service from @apple is the BEST....0.535951positiveThe customer service from @apple is the BEST. ...
37[0.012832739390432835, 0.051798250526189804, 0...positiveAt the Genius Bar@apple store. Very helpful #...0.815347positiveAt the Genius Bar@apple store. Very helpful #A...
177[0.023287296295166016, 0.03480781987309456, 0....positiveEnjoying the introductions at the @apple #conn...0.883996positiveEnjoying the introductions at the @apple #conn...
47[0.02025674283504486, 0.019427286460995674, -0...negative@iTunes @apple my music library wont sync new ...0.741416negative@iTunes @apple my music library wont sync new ...
49[0.007304496597498655, -0.00493069039657712, -...positiveThank you @Apple0.736227positiveThank you @Apple
9[0.019063668325543404, 0.012368913739919662, -...neutral@sometimesboring also wtf @apple i dont even l...0.556204negative@sometimesboring also wtf @apple i dont even l...
188[0.04265458136796951, 0.03990786150097847, -0....neutralProtesters in the @Apple store pretty much sum...0.529252negativeProtesters in the @Apple store pretty much sum...
128[0.02501610852777958, 0.04794774204492569, -0....neutral@apple I have been on hold for 30 minutes than...0.557699negative@apple I have been on hold for 30 minutes than...
117[0.07592077553272247, 0.04938220977783203, -0....negativeApple makes bad chargers. The @apple genius sa...0.700672negativeApple makes bad chargers. The @apple genius sa...
39[0.0005533038638532162, -0.014303376898169518,...negativeOh @apple, why do I have to delete my unwanted...0.762471negativeOh @apple, why do I have to delete my unwanted...
206[0.05893688648939133, 0.0094262370839715, -0.0...positive#Apple dominates mobile online shopping at 78 ...0.844476positive#Apple dominates mobile online shopping at 78 ...
63[0.016833314672112465, 0.025263063609600067, -...neutral@apple fuck you0.519171negative@apple fuck you
\n","
"],"text/plain":[" default_name_embeddings ... document\n","origin_index ... \n","194 [0.0020813194569200277, -0.025028454139828682,... ... @fullcircleone ThanX! Big @Apple ThanX goes 2 ...\n","279 [0.05548453330993652, 0.06619943678379059, -0.... ... Can't believe @Apple said melted keys on Mac P...\n","259 [0.06910835206508636, -0.052032221108675, -0.0... ... @apple had a dream last night where I downgrad...\n","148 [0.059678513556718826, 0.06913778185844421, -0... ... RT @paisley_smithh: Real tired of my charger b...\n","158 [0.011856582947075367, 0.016805896535515785, -... ... @OneRepublic @Apple THIS IS SO BEAUTIFUL\n","224 [0.023869305849075317, -0.033373694866895676, ... ... @stevewoz @Outback Personally I am excited abo...\n","142 [0.008206114172935486, 0.0042152707464993, -0.... ... These Damn @Apple Commercials Are Getting Wors...\n","23 [0.044899821281433105, 0.033001091331243515, 0... ... Thanks @Apple store @RobinaTC for helping me o...\n","24 [0.06401650607585907, 0.02939487248659134, -0.... ... #iPhone5s is the best mobile that I have had. ...\n","97 [0.036246202886104584, 0.014746210537850857, -... ... fuck type of shit is this @Apple @autocorrect ...\n","267 [-0.025635968893766403, 0.026251723989844322, ... ... hey @apple my 5s keeps glitching :/\n","68 [-0.017101608216762543, 0.0018259129719808698,... ... Fucking goddamn stupid @apple @iTunes , ur too...\n","55 [0.008575155399739742, 0.015347552485764027, 0... ... @apple please get yourself together! I need my...\n","239 [0.038337450474500656, 0.042477983981370926, -... ... waiting a week for a DUNS number is preventing...\n","274 [0.031279537826776505, 0.039565060287714005, 0... ... Dear @apple replace my phone my battery sucks\n","99 [0.005306210368871689, 0.00793572049587965, 0.... ... @OneRepublic @Apple You all are ready to go. #...\n","181 [0.05127181485295296, 0.03388502821326256, -0.... ... @CharlesJMeyer @Apple @Appy_Geek Hasn't Apple ...\n","172 [0.02794131077826023, 0.008706841617822647, 0.... ... @Apple This was NORMAL use-- NO ABUSE! Wrapped...\n","58 [0.03853776305913925, -0.003850174369290471, -... ... YO YOU AINT SHIT @apple\n","229 [0.054637834429740906, 0.023656053468585014, -... ... @Apple's share of UK smartphone sales soared t...\n","168 [0.0441458635032177, 0.03536572307348251, -0.0... ... @brwnskin_beauti cause fuck @apple\n","84 [-0.02587798982858658, 0.03256731107831001, -0... ... iTunes is pissing me tf off @apple\n","171 [0.007304496597498655, -0.00493069039657712, -... ... Thank you @apple\n","250 [0.07199912518262863, 0.0021514107938855886, -... ... The king of the phablets! Apple's iPhone 6 plu...\n","7 [0.01400269940495491, 0.04662228375673294, 0.0... ... Thank you @Apple for fixing the #Swift sourcek...\n","30 [0.04305626079440117, 0.0728026032447815, -0.0... ... Thank you @apple #AppleSantaMonica for fixing ...\n","33 [0.0709962323307991, -0.018187634646892548, -0... ... Steve Jobs Predicted Future Of E-Commerce Back...\n","61 [0.02938288450241089, 0.02077152580022812, -0.... ... It makes you smarter. Elevate is @apple app of...\n","249 [0.03343122825026512, 0.06543298065662384, -0.... ... @apple #apple tech support is so nice. Free st...\n","155 [0.04070044681429863, 0.02998330444097519, 0.0... ... I hate my MacBook now. Fuck this update and fu...\n","162 [-0.025308595970273018, -0.00607736362144351, ... ... thanks @apple for making life complicated\n","4 [0.036090489476919174, 0.033749453723430634, -... ... Apple Inc. CEO Donates $291K To Pennsylvania S...\n","34 [-0.008514916524291039, -0.02772395685315132, ... ... Finally updated to iOS 8...this happened ha we...\n","154 [-0.021790482103824615, 0.026734383776783943, ... ... Thank you to the nice lady at @Apple in #Regen...\n","200 [-0.006971406284719706, 0.024408211931586266, ... ... @OneRepublic @Apple this is awesome! Can I join?\n","180 [0.07439534366130829, 0.02210349403321743, -0.... ... Currently on hold with @Apple in Cupertino to ...\n","187 [0.008138233795762062, -0.026052597910165787, ... ... my dad called now my musics arent playing jesu...\n","93 [0.06705766916275024, 0.05627201497554779, -0.... ... #apple earns more #profit in on quarter than #...\n","189 [0.040938232094049454, 0.060883838683366776, -... ... The customer service from @apple is the BEST. ...\n","37 [0.012832739390432835, 0.051798250526189804, 0... ... At the Genius Bar@apple store. Very helpful #A...\n","177 [0.023287296295166016, 0.03480781987309456, 0.... ... Enjoying the introductions at the @apple #conn...\n","47 [0.02025674283504486, 0.019427286460995674, -0... ... @iTunes @apple my music library wont sync new ...\n","49 [0.007304496597498655, -0.00493069039657712, -... ... Thank you @Apple\n","9 [0.019063668325543404, 0.012368913739919662, -... ... @sometimesboring also wtf @apple i dont even l...\n","188 [0.04265458136796951, 0.03990786150097847, -0.... ... Protesters in the @Apple store pretty much sum...\n","128 [0.02501610852777958, 0.04794774204492569, -0.... ... @apple I have been on hold for 30 minutes than...\n","117 [0.07592077553272247, 0.04938220977783203, -0.... ... Apple makes bad chargers. The @apple genius sa...\n","39 [0.0005533038638532162, -0.014303376898169518,... ... Oh @apple, why do I have to delete my unwanted...\n","206 [0.05893688648939133, 0.0094262370839715, -0.0... ... #Apple dominates mobile online shopping at 78 ...\n","63 [0.016833314672112465, 0.025263063609600067, -... ... @apple fuck you\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["#4. Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"id":"qdCUg2MR0PD2","colab":{"base_uri":"https://localhost:8080/","height":110},"executionInfo":{"status":"ok","timestamp":1614563092888,"user_tz":-300,"elapsed":185905,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"27017458-057c-4f33-f16d-ce1498ccd444"},"source":["fitted_pipe.predict('I hate the newest update')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
default_name_embeddingssentimentsentiment_confidencedocument
origin_index
0[-0.023322951048612595, -0.04157407209277153, ...negative0.604448I hate the newest update
\n","
"],"text/plain":[" default_name_embeddings ... document\n","origin_index ... \n","0 [-0.023322951048612595, -0.04157407209277153, ... ... I hate the newest update\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["##5. Configure pipe training parameters"]},{"cell_type":"code","metadata":{"id":"UtsAUGTmOTms","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614563092892,"user_tz":-300,"elapsed":185898,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"24d0313f-f7f2-4baa-8fac-3a59651240e2"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setLoadSP(False) | Info: Whether to load SentencePiece ops file which is required only by multi-lingual models. This is not changeable after it's set with a pretrained model nor it is compatible with Windows. | Currently set to : False\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["##6. Retrain with new parameters"]},{"cell_type":"code","metadata":{"id":"mptfvHx-MMMX","colab":{"base_uri":"https://localhost:8080/","height":793},"executionInfo":{"status":"ok","timestamp":1614563101782,"user_tz":-300,"elapsed":194776,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d1bec93e-f1b0-4e04-95dd-1227885944a4"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.92 0.92 0.92 49\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.96 0.88 0.92 51\n","\n"," accuracy 0.90 100\n"," macro avg 0.63 0.60 0.61 100\n","weighted avg 0.94 0.90 0.92 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
default_name_embeddingssentimenttextsentiment_confidenceydocument
origin_index
194[0.0020813194569200277, -0.025028454139828682,...positive@fullcircleone ThanX! Big @Apple ThanX goes 2 ...0.997639positive@fullcircleone ThanX! Big @Apple ThanX goes 2 ...
279[0.05548453330993652, 0.06619943678379059, -0....negativeCan't believe @Apple said melted keys on Mac P...0.951257negativeCan't believe @Apple said melted keys on Mac P...
259[0.06910835206508636, -0.052032221108675, -0.0...negative@apple had a dream last night where I downgrad...0.957475negative@apple had a dream last night where I downgrad...
148[0.059678513556718826, 0.06913778185844421, -0...negativeRT @paisley_smithh: Real tired of my charger b...0.748118negativeRT @paisley_smithh: Real tired of my charger b...
158[0.011856582947075367, 0.016805896535515785, -...positive@OneRepublic @Apple THIS IS SO BEAUTIFUL0.992686positive@OneRepublic @Apple THIS IS SO BEAUTIFUL
.....................
12[0.06702571362257004, -0.04291766509413719, -0...positive#AAPL providing another great entry point &lt;...0.998947positive#AAPL providing another great entry point &lt;...
277[0.01444460079073906, -0.020860610529780388, -...negative@apple fucking let everyone name the group cha...0.953368negative@apple fucking let everyone name the group cha...
121[0.013054611161351204, 0.04479760676622391, -0...positiveGreat service at @Apple #BethesdaRow thanks Je...0.993416positiveGreat service at @Apple #BethesdaRow thanks Je...
212[0.018735762685537338, 0.07813401520252228, -0...positiveRT @_iamGambino: Thank you @Apple0.977753positiveRT @_iamGambino: Thank you @Apple
167[0.033374980092048645, 0.05603685975074768, -0...negative@HlPSTALUKE @Apple one of you stepped on my ph...0.979619negative@HlPSTALUKE @Apple one of you stepped on my ph...
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" default_name_embeddings ... document\n","origin_index ... \n","194 [0.0020813194569200277, -0.025028454139828682,... ... @fullcircleone ThanX! Big @Apple ThanX goes 2 ...\n","279 [0.05548453330993652, 0.06619943678379059, -0.... ... Can't believe @Apple said melted keys on Mac P...\n","259 [0.06910835206508636, -0.052032221108675, -0.0... ... @apple had a dream last night where I downgrad...\n","148 [0.059678513556718826, 0.06913778185844421, -0... ... RT @paisley_smithh: Real tired of my charger b...\n","158 [0.011856582947075367, 0.016805896535515785, -... ... @OneRepublic @Apple THIS IS SO BEAUTIFUL\n","... ... ... ...\n","12 [0.06702571362257004, -0.04291766509413719, -0... ... #AAPL providing another great entry point <...\n","277 [0.01444460079073906, -0.020860610529780388, -... ... @apple fucking let everyone name the group cha...\n","121 [0.013054611161351204, 0.04479760676622391, -0... ... Great service at @Apple #BethesdaRow thanks Je...\n","212 [0.018735762685537338, 0.07813401520252228, -0... ... RT @_iamGambino: Thank you @Apple\n","167 [0.033374980092048645, 0.05603685975074768, -0... ... @HlPSTALUKE @Apple one of you stepped on my ph...\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["#7. Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614563101814,"user_tz":-300,"elapsed":194798,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d0ace687-f3a1-41cd-84f0-8fd4f4e138f2"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614563324463,"user_tz":-300,"elapsed":417439,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"436f5788-b69e-489e-d4e8-815a9830fcb3"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(110) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.95 0.87 0.91 117\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.90 0.89 0.90 111\n","\n"," accuracy 0.88 228\n"," macro avg 0.62 0.59 0.60 228\n","weighted avg 0.93 0.88 0.90 228\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"_1jxw3GnVGlI"},"source":["# 7.1 evaluate on Test Data"]},{"cell_type":"code","metadata":{"id":"Fxx4yNkNVGFl","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614563367220,"user_tz":-300,"elapsed":460194,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"523a49ac-acc9-4aac-bc6f-f5da43f69889"},"source":["preds = fitted_pipe.predict(test_df,output_level='document')\r\n","\r\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\r\n","preds.dropna(inplace=True)\r\n","print(classification_report(preds['y'], preds['sentiment']))"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.88 0.85 0.86 26\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.93 0.84 0.89 32\n","\n"," accuracy 0.84 58\n"," macro avg 0.60 0.56 0.58 58\n","weighted avg 0.91 0.84 0.88 58\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 8. Lets save the model"]},{"cell_type":"code","metadata":{"id":"bZZpObLOtqo8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468632998,"user_tz":-300,"elapsed":627783,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e6a87d34-ce84-4968-c3a0-9aade476874b"},"source":["stored_model_path = './models/classifier_dl_trained' \r\n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 9. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":127},"executionInfo":{"status":"ok","timestamp":1609468646911,"user_tz":-300,"elapsed":641690,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"454b2c7d-7c32-4cc2-cf25-a52b5a879abd"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('I hate the newest update')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidenceen_embed_sentence_small_bert_L12_768_embeddingsdocumentsentiment
origin_index
00.974083[-0.058236218988895416, -0.3061041235923767, 0...I hate itnegative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.974083 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609468646914,"user_tz":-300,"elapsed":641685,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"80ce5918-3803-45f4-e10f-300144342295"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_covid_19.ipynb b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_covid_19.ipynb new file mode 100644 index 00000000..4c926f49 --- /dev/null +++ b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_covid_19.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_covid_19.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_covid_19.ipynb)\n","\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 Class COVID-19 Sentiment Classifer Training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","\n","You can achieve these results or even better on this dataset with training data:\n","\n","\n","
\n","\n","![image.png]()\n","\n","\n","You can achieve these results or even better on this dataset with training data:\n","\n","\n","
\n","\n","![Screenshot 2021-02-25 190003.png]()"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"hFGnBCHavltY","executionInfo":{"elapsed":87702,"status":"ok","timestamp":1614242484855,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"},"user_tz":-300},"outputId":"c3071220-e810-4019-f194-1417c3d432ce"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java|\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install pyspark==2.4.7 \n","! pip install nlu > /dev/null \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Collecting pyspark==2.4.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e2/06/29f80e5a464033432eedf89924e7aa6ebbc47ce4dcd956853a73627f2c07/pyspark-2.4.7.tar.gz (217.9MB)\n","\u001b[K |████████████████████████████████| 217.9MB 73kB/s \n","\u001b[?25hCollecting py4j==0.10.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e3/53/c737818eb9a7dc32a7cd4f1396e787bd94200c3997c72c1dbe028587bd76/py4j-0.10.7-py2.py3-none-any.whl (197kB)\n","\u001b[K |████████████████████████████████| 204kB 22.0MB/s \n","\u001b[?25hBuilding wheels for collected packages: pyspark\n"," Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for pyspark: filename=pyspark-2.4.7-py2.py3-none-any.whl size=218279465 sha256=fbb7c2e44539433abea58a5db430b789061641d87940efb6113dd4a2b40403ee\n"," Stored in directory: /root/.cache/pip/wheels/34/1f/2e/1e7460f80acf26b08dbb8c53d7ff9e07146f2a68dd5c732be5\n","Successfully built pyspark\n","Installing collected packages: py4j, pyspark\n","Successfully installed py4j-0.10.7 pyspark-2.4.7\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Coivd19 NLP Text Sentiemnt Classifcation dataset \n","https://www.kaggle.com/datatattle/covid-19-nlp-text-classification\n","#Context\n","\n","This is a Dataset made of tweets about coivid 19 "]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"elapsed":3480,"status":"ok","timestamp":1614242369617,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"},"user_tz":-300},"outputId":"9b3f1f23-33bf-4a48-ca8a-3f3af4b93cdd"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/02/Corona_NLP_train.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-02-25 08:38:17-- http://ckl-it.de/wp-content/uploads/2021/02/Corona_NLP_train.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 5293639 (5.0M) [text/csv]\n","Saving to: ‘Corona_NLP_train.csv’\n","\n","Corona_NLP_train.cs 100%[===================>] 5.05M 2.68MB/s in 1.9s \n","\n","2021-02-25 08:38:19 (2.68 MB/s) - ‘Corona_NLP_train.csv’ saved [5293639/5293639]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":399},"id":"y4xSRWIhwT28","executionInfo":{"elapsed":1508,"status":"ok","timestamp":1614242371754,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"},"user_tz":-300},"outputId":"58d7462e-6e7a-4c5a-b339-e764a1eb82f6"},"source":["import pandas as pd\n","train_path = '/content/Corona_NLP_train.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","columns=['text','y']\n","train_df = train_df[columns]\n","from sklearn.model_selection import train_test_split\n","\n","train_df, test_df = train_test_split(train_df, test_size=0.2)\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
1696A sickening attack on a Salisbury councillor h...negative
4290If you answered yes what kind of bottle does y...positive
9998Currently at the food bank. We appreciate our ...positive
9106*Take note of the following against covid 19* ...positive
1459Please all give a shout out to our amazing sup...positive
.........
5428So online grocery shopping is now almost impos...positive
7097When you fill your fridge with ice for a #musi...negative
6465#ChipChirps™ from #VLSIresearch's app. #weVIS...positive
7188Are We Respecting The Hustle Of People Reselli...positive
7900@Tweeter4Trump1 @deus_rock @AngelaOrme3 @Capon...negative
\n","

8000 rows × 2 columns

\n","
"],"text/plain":[" text y\n","1696 A sickening attack on a Salisbury councillor h... negative\n","4290 If you answered yes what kind of bottle does y... positive\n","9998 Currently at the food bank. We appreciate our ... positive\n","9106 *Take note of the following against covid 19* ... positive\n","1459 Please all give a shout out to our amazing sup... positive\n","... ... ...\n","5428 So online grocery shopping is now almost impos... positive\n","7097 When you fill your fridge with ice for a #musi... negative\n","6465 #ChipChirps™ from #VLSIresearch's app. #weVIS... positive\n","7188 Are We Respecting The Hustle Of People Reselli... positive\n","7900 @Tweeter4Trump1 @deus_rock @AngelaOrme3 @Capon... negative\n","\n","[8000 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"elapsed":199434,"status":"ok","timestamp":1613549294614,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"},"user_tz":-300},"outputId":"ec18ede3-6c20-4aba-d557-73b12bdd073e"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 1.00 0.21 0.34 24\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.70 1.00 0.83 26\n","\n"," accuracy 0.62 50\n"," macro avg 0.57 0.40 0.39 50\n","weighted avg 0.85 0.62 0.59 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentimentsentiment_confidencedefault_name_embeddingstextydocument
origin_index
0positive0.761295[0.004783493932336569, -0.02814795821905136, -...#Cheerios maker @GeneralMills on Wednesday rai...positive#Cheerios maker @GeneralMills on Wednesday rai...
1positive0.924655[0.018235845491290092, 0.05186385661363602, -0...Got to visit this morning where they have swit...positiveGot to visit this morning where they have swit...
2neutral0.552810[-0.010089528746902943, -0.03414658457040787, ...Got a video doctors appointment first thing as...negativeGot a video doctors appointment first thing as...
3negative0.607435[-0.03430407866835594, 0.015884995460510254, -...More likely to die from starvation during a 2 ...negativeMore likely to die from starvation during a 2 ...
4positive0.934333[0.05470110848546028, 0.0018378241220489144, 0...Don't move around unnecessary \\r\\r\\r\\nStay at...positiveDon't move around unnecessary Stay at home. Us...
5positive0.852907[0.0012291448656469584, 0.049797337502241135, ...My dad who is a professor just discover about ...positiveMy dad who is a professor just discover about ...
6neutral0.528781[0.03790939226746559, -0.013407733291387558, -...Riverfront Times: Cody Pfister, the 26-year-ol...negativeRiverfront Times: Cody Pfister, the 26-year-ol...
7positive0.771463[0.04207884147763252, 0.02299105003476143, 0.0...At sale of sanitizers at factory prices is a m...positiveAt sale of sanitizers at factory prices is a m...
8positive0.708429[-0.03874765709042549, -0.006220388226211071, ...Watch this if you are one of those idiots who ...negativeWatch this if you are one of those idiots who ...
9positive0.839835[-0.07232732325792313, 0.014406188391149044, 0...The dedication of our farmers and farm workers...positiveThe dedication of our farmers and farm workers...
10positive0.600599[-0.01565781980752945, 0.010397609323263168, -...My friends husband has died this morning due t...negativeMy friends husband has died this morning due t...
11positive0.633946[-0.05496770888566971, -0.04461418837308884, -...\"While it is always a shame when food is waste...negative\"While it is always a shame when food is waste...
12neutral0.516407[0.0015882003353908658, 0.07789196819067001, 0...@AirCanada is cancelling existing flights and ...negative@AirCanada is cancelling existing flights and ...
13positive0.634921[0.03064206801354885, -0.01829364150762558, -0...Consumer advocacy group CHOICE has slammed pan...negativeConsumer advocacy group CHOICE has slammed pan...
14positive0.910824[0.02500816248357296, 0.04072476178407669, -0....@Lowes is there a reason why you’re not suppl...positive@Lowes is there a reason why you’re not suppl...
15positive0.918036[-0.03191220760345459, 0.04579667001962662, 0....Whilst u queue 4 your beauty treatment get you...positiveWhilst u queue 4 your beauty treatment get you...
16neutral0.519694[0.0034340715501457453, 0.038198184221982956, ...I can’t be the only person (CONSUMER, they on...negativeI can’t be the only person (CONSUMER, they on...
17negative0.622014[0.02401014231145382, 0.056036192923784256, -0...Not sure if this is the common cold, flu, or t...negativeNot sure if this is the common cold, flu, or t...
18positive0.841300[-0.02556675486266613, 0.004143165424466133, -...And it’s a great listen in the car if you hav...positiveAnd it’s a great listen in the car if you hav...
19positive0.914320[-0.02416982874274254, 0.02811759151518345, -0...Please be smart and prepare. Here my take “St...positivePlease be smart and prepare. Here my take “St...
20positive0.929326[-0.01671682484447956, 0.01305483840405941, 0....Lessons from COVID-19\\r\\r\\r\\n\\r\\r\\r\\nStock up ...positiveLessons from COVID-19 Stock up food at home Al...
21positive0.930061[0.07605478912591934, -0.026303431019186974, 0...This is a really great selection of advice fro...positiveThis is a really great selection of advice fro...
22positive0.692401[-0.0005388688296079636, 0.03235577791929245, ...The worst thing about TP hoarders is that when...negativeThe worst thing about TP hoarders is that when...
23positive0.898010[0.008443073369562626, -0.02788439393043518, -...Consumer psychology ? about which would result...positiveConsumer psychology ? about which would result...
24positive0.894567[-0.05909405276179314, -0.0424291230738163, -0...Helpful tips to prevent 19 from entering your ...positiveHelpful tips to prevent 19 from entering your ...
25positive0.766391[-0.01403691153973341, 0.03554365783929825, -0...@jacksenwolf @scp1471wolf #memes #coronamemes ...positive@jacksenwolf @scp1471wolf #memes #coronamemes ...
26positive0.775324[-0.06682447344064713, -0.046077944338321686, ...This grocery store worker says some customers ...negativeThis grocery store worker says some customers ...
27positive0.934482[0.013833973556756973, -0.06955333054065704, 0...Don t panic Stay healthy protect yourself amp ...positiveDon t panic Stay healthy protect yourself amp ...
28neutral0.512394[0.012594238854944706, -0.06423342227935791, -...7 for lettuce is ridiculous Fruit and veg pric...negative7 for lettuce is ridiculous Fruit and veg pric...
29positive0.634858[-0.020443512126803398, 0.008256517350673676, ...Quit harassing grocery store workers. WE KNOW ...negativeQuit harassing grocery store workers. WE KNOW ...
30negative0.605742[0.0064558726735413074, -0.03096119686961174, ...Scary Gun sales gone up in amid Fearing shorta...negativeScary Gun sales gone up in amid Fearing shorta...
31positive0.941815[-0.01913328282535076, 0.02475474402308464, 0....I m temporarily working in a supermarket to he...positiveI m temporarily working in a supermarket to he...
32positive0.904202[-0.026081491261720657, 0.03242545947432518, 0...#horningsea is a caring community. Let’s ALL ...positive#horningsea is a caring community. Let’s ALL ...
33negative0.631536[0.03639446198940277, 0.05759260803461075, -0....@exxonmobil said today it is reducing its 2020...negative@exxonmobil said today it is reducing its 2020...
34positive0.957126[0.0147019037976861, -0.02653518319129944, -0....19 Using and a distance sensor to create a tou...positive19 Using and a distance sensor to create a tou...
35negative0.600725[0.05711821839213371, 0.05161363631486893, -0....#British consumer confidence has fallen by the...negative#British consumer confidence has fallen by the...
36positive0.612837[0.07101022452116013, -0.048838380724191666, -...Isolation Illustration - For fuck sake... we’...negativeIsolation Illustration - For fuck sake... we’...
37positive0.858738[0.04629743844270706, -0.031434960663318634, -...Clever, won't happen here though\\r\\r\\r\\n\\r\\r\\r...positiveClever, won't happen here though Supermarket I...
38positive0.663183[0.010728790424764156, -0.03496933355927467, -...I dunno if this makes me a bad apocalypse comp...negativeI dunno if this makes me a bad apocalypse comp...
39positive0.883665[0.013930771499872208, 0.0005065983277745545, ...Delivery companies and other services are adap...positiveDelivery companies and other services are adap...
40neutral0.582677[0.08638226240873337, 0.0718393325805664, -0.0...Panic-buying is pushing up prices you dumb-ass...negativePanic-buying is pushing up prices you dumb-ass...
41positive0.762912[0.0711701512336731, 0.051328204572200775, -0....With consumer concern over their finances due ...positiveWith consumer concern over their finances due ...
42positive0.855599[0.03079848363995552, -0.045532431453466415, -...How to queue safely for the bill in supermarke...positiveHow to queue safely for the bill in supermarke...
43positive0.681266[0.0025339308194816113, 0.014236150309443474, ...#COVID2019 Makhura: There are two areas where ...negative#COVID2019 Makhura: There are two areas where ...
44positive0.950545[-0.015687722712755203, -0.010347431525588036,...Make sure to take every measure against #coron...positiveMake sure to take every measure against #coron...
45neutral0.554849[0.04179179295897484, -0.02091711200773716, -0...\"As shoppers shut their wallets, a consumer-dr...negative\"As shoppers shut their wallets, a consumer-dr...
46neutral0.548503[-0.03860854730010033, -0.005080435890704393, ...If Corona virus ever comes to Uganda, some of ...negativeIf Corona virus ever comes to Uganda, some of ...
47positive0.930881[-0.0628998801112175, 0.03796340152621269, 0.0...Dear supply chain, supermarket and pharmacies ...positiveDear supply chain, supermarket and pharmacies ...
48positive0.956872[0.04039526358246803, 0.052726808935403824, 0....Travel Portable Mini Hand Sanitizer Anti-Bacte...positiveTravel Portable Mini Hand Sanitizer Anti-Bacte...
49positive0.777220[-0.06671527773141861, 0.023245809599757195, 0...So my sister has just told me at her @asda sto...negativeSo my sister has just told me at her @asda sto...
\n","
"],"text/plain":[" sentiment ... document\n","origin_index ... \n","0 positive ... #Cheerios maker @GeneralMills on Wednesday rai...\n","1 positive ... Got to visit this morning where they have swit...\n","2 neutral ... Got a video doctors appointment first thing as...\n","3 negative ... More likely to die from starvation during a 2 ...\n","4 positive ... Don't move around unnecessary Stay at home. Us...\n","5 positive ... My dad who is a professor just discover about ...\n","6 neutral ... Riverfront Times: Cody Pfister, the 26-year-ol...\n","7 positive ... At sale of sanitizers at factory prices is a m...\n","8 positive ... Watch this if you are one of those idiots who ...\n","9 positive ... The dedication of our farmers and farm workers...\n","10 positive ... My friends husband has died this morning due t...\n","11 positive ... \"While it is always a shame when food is waste...\n","12 neutral ... @AirCanada is cancelling existing flights and ...\n","13 positive ... Consumer advocacy group CHOICE has slammed pan...\n","14 positive ... @Lowes is there a reason why you’re not suppl...\n","15 positive ... Whilst u queue 4 your beauty treatment get you...\n","16 neutral ... I can’t be the only person (CONSUMER, they on...\n","17 negative ... Not sure if this is the common cold, flu, or t...\n","18 positive ... And it’s a great listen in the car if you hav...\n","19 positive ... Please be smart and prepare. Here my take “St...\n","20 positive ... Lessons from COVID-19 Stock up food at home Al...\n","21 positive ... This is a really great selection of advice fro...\n","22 positive ... The worst thing about TP hoarders is that when...\n","23 positive ... Consumer psychology ? about which would result...\n","24 positive ... Helpful tips to prevent 19 from entering your ...\n","25 positive ... @jacksenwolf @scp1471wolf #memes #coronamemes ...\n","26 positive ... This grocery store worker says some customers ...\n","27 positive ... Don t panic Stay healthy protect yourself amp ...\n","28 neutral ... 7 for lettuce is ridiculous Fruit and veg pric...\n","29 positive ... Quit harassing grocery store workers. WE KNOW ...\n","30 negative ... Scary Gun sales gone up in amid Fearing shorta...\n","31 positive ... I m temporarily working in a supermarket to he...\n","32 positive ... #horningsea is a caring community. Let’s ALL ...\n","33 negative ... @exxonmobil said today it is reducing its 2020...\n","34 positive ... 19 Using and a distance sensor to create a tou...\n","35 negative ... #British consumer confidence has fallen by the...\n","36 positive ... Isolation Illustration - For fuck sake... we’...\n","37 positive ... Clever, won't happen here though Supermarket I...\n","38 positive ... I dunno if this makes me a bad apocalypse comp...\n","39 positive ... Delivery companies and other services are adap...\n","40 neutral ... Panic-buying is pushing up prices you dumb-ass...\n","41 positive ... With consumer concern over their finances due ...\n","42 positive ... How to queue safely for the bill in supermarke...\n","43 positive ... #COVID2019 Makhura: There are two areas where ...\n","44 positive ... Make sure to take every measure against #coron...\n","45 neutral ... \"As shoppers shut their wallets, a consumer-dr...\n","46 neutral ... If Corona virus ever comes to Uganda, some of ...\n","47 positive ... Dear supply chain, supermarket and pharmacies ...\n","48 positive ... Travel Portable Mini Hand Sanitizer Anti-Bacte...\n","49 positive ... So my sister has just told me at her @asda sto...\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# 4. Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"qdCUg2MR0PD2","executionInfo":{"elapsed":201736,"status":"ok","timestamp":1613549296935,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"},"user_tz":-300},"outputId":"f654210d-a842-42a0-b327-5d49d64d1012"},"source":["fitted_pipe.predict(\"Everything is under control !\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentimentsentiment_confidencedefault_name_embeddingsdocument
origin_index
0negative0.690746[0.027917474508285522, -0.06684374064207077, -...Everything is under control !
\n","
"],"text/plain":[" sentiment ... document\n","origin_index ... \n","0 negative ... Everything is under control !\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## 5. Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"elapsed":201725,"status":"ok","timestamp":1613549296937,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"},"user_tz":-300},"outputId":"df084dab-ae75-4689-f854-b42c5744881e"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setLoadSP(False) | Info: Whether to load SentencePiece ops file which is required only by multi-lingual models. This is not changeable after it's set with a pretrained model nor it is compatible with Windows. | Currently set to : False\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["##6. Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"mptfvHx-MMMX","executionInfo":{"elapsed":208512,"status":"ok","timestamp":1613549303736,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"},"user_tz":-300},"outputId":"8d7f0af3-58d5-41cb-d663-b3d2dc6620f0"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.96 1.00 0.98 24\n"," neutral 0.00 0.00 0.00 0\n"," positive 1.00 0.92 0.96 26\n","\n"," accuracy 0.96 50\n"," macro avg 0.65 0.64 0.65 50\n","weighted avg 0.98 0.96 0.97 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentimentsentiment_confidencedefault_name_embeddingstextydocument
origin_index
0neutral0.501342[0.004783493932336569, -0.02814795821905136, -...#Cheerios maker @GeneralMills on Wednesday rai...positive#Cheerios maker @GeneralMills on Wednesday rai...
1positive0.988317[0.018235845491290092, 0.05186385661363602, -0...Got to visit this morning where they have swit...positiveGot to visit this morning where they have swit...
2negative0.919849[-0.010089528746902943, -0.03414658457040787, ...Got a video doctors appointment first thing as...negativeGot a video doctors appointment first thing as...
3negative0.969353[-0.03430407866835594, 0.015884995460510254, -...More likely to die from starvation during a 2 ...negativeMore likely to die from starvation during a 2 ...
4positive0.988857[0.05470110848546028, 0.0018378241220489144, 0...Don't move around unnecessary \\r\\r\\r\\nStay at...positiveDon't move around unnecessary Stay at home. Us...
5positive0.943565[0.0012291448656469584, 0.049797337502241135, ...My dad who is a professor just discover about ...positiveMy dad who is a professor just discover about ...
6negative0.905861[0.03790939226746559, -0.013407733291387558, -...Riverfront Times: Cody Pfister, the 26-year-ol...negativeRiverfront Times: Cody Pfister, the 26-year-ol...
7negative0.648055[0.04207884147763252, 0.02299105003476143, 0.0...At sale of sanitizers at factory prices is a m...positiveAt sale of sanitizers at factory prices is a m...
8negative0.938932[-0.03874765709042549, -0.006220388226211071, ...Watch this if you are one of those idiots who ...negativeWatch this if you are one of those idiots who ...
9positive0.896486[-0.07232732325792313, 0.014406188391149044, 0...The dedication of our farmers and farm workers...positiveThe dedication of our farmers and farm workers...
10negative0.839513[-0.01565781980752945, 0.010397609323263168, -...My friends husband has died this morning due t...negativeMy friends husband has died this morning due t...
11negative0.932206[-0.05496770888566971, -0.04461418837308884, -...\"While it is always a shame when food is waste...negative\"While it is always a shame when food is waste...
12negative0.906857[0.0015882003353908658, 0.07789196819067001, 0...@AirCanada is cancelling existing flights and ...negative@AirCanada is cancelling existing flights and ...
13negative0.930158[0.03064206801354885, -0.01829364150762558, -0...Consumer advocacy group CHOICE has slammed pan...negativeConsumer advocacy group CHOICE has slammed pan...
14positive0.925111[0.02500816248357296, 0.04072476178407669, -0....@Lowes is there a reason why you’re not suppl...positive@Lowes is there a reason why you’re not suppl...
15positive0.986977[-0.03191220760345459, 0.04579667001962662, 0....Whilst u queue 4 your beauty treatment get you...positiveWhilst u queue 4 your beauty treatment get you...
16negative0.928900[0.0034340715501457453, 0.038198184221982956, ...I can’t be the only person (CONSUMER, they on...negativeI can’t be the only person (CONSUMER, they on...
17negative0.932876[0.02401014231145382, 0.056036192923784256, -0...Not sure if this is the common cold, flu, or t...negativeNot sure if this is the common cold, flu, or t...
18positive0.917828[-0.02556675486266613, 0.004143165424466133, -...And it’s a great listen in the car if you hav...positiveAnd it’s a great listen in the car if you hav...
19positive0.982969[-0.02416982874274254, 0.02811759151518345, -0...Please be smart and prepare. Here my take “St...positivePlease be smart and prepare. Here my take “St...
20positive0.992138[-0.01671682484447956, 0.01305483840405941, 0....Lessons from COVID-19\\r\\r\\r\\n\\r\\r\\r\\nStock up ...positiveLessons from COVID-19 Stock up food at home Al...
21positive0.990756[0.07605478912591934, -0.026303431019186974, 0...This is a really great selection of advice fro...positiveThis is a really great selection of advice fro...
22negative0.857424[-0.0005388688296079636, 0.03235577791929245, ...The worst thing about TP hoarders is that when...negativeThe worst thing about TP hoarders is that when...
23positive0.988393[0.008443073369562626, -0.02788439393043518, -...Consumer psychology ? about which would result...positiveConsumer psychology ? about which would result...
24positive0.939364[-0.05909405276179314, -0.0424291230738163, -0...Helpful tips to prevent 19 from entering your ...positiveHelpful tips to prevent 19 from entering your ...
25positive0.958614[-0.01403691153973341, 0.03554365783929825, -0...@jacksenwolf @scp1471wolf #memes #coronamemes ...positive@jacksenwolf @scp1471wolf #memes #coronamemes ...
26negative0.880688[-0.06682447344064713, -0.046077944338321686, ...This grocery store worker says some customers ...negativeThis grocery store worker says some customers ...
27positive0.985295[0.013833973556756973, -0.06955333054065704, 0...Don t panic Stay healthy protect yourself amp ...positiveDon t panic Stay healthy protect yourself amp ...
28negative0.943985[0.012594238854944706, -0.06423342227935791, -...7 for lettuce is ridiculous Fruit and veg pric...negative7 for lettuce is ridiculous Fruit and veg pric...
29negative0.904067[-0.020443512126803398, 0.008256517350673676, ...Quit harassing grocery store workers. WE KNOW ...negativeQuit harassing grocery store workers. WE KNOW ...
30negative0.952852[0.0064558726735413074, -0.03096119686961174, ...Scary Gun sales gone up in amid Fearing shorta...negativeScary Gun sales gone up in amid Fearing shorta...
31positive0.994854[-0.01913328282535076, 0.02475474402308464, 0....I m temporarily working in a supermarket to he...positiveI m temporarily working in a supermarket to he...
32positive0.973395[-0.026081491261720657, 0.03242545947432518, 0...#horningsea is a caring community. Let’s ALL ...positive#horningsea is a caring community. Let’s ALL ...
33negative0.960644[0.03639446198940277, 0.05759260803461075, -0....@exxonmobil said today it is reducing its 2020...negative@exxonmobil said today it is reducing its 2020...
34positive0.997540[0.0147019037976861, -0.02653518319129944, -0....19 Using and a distance sensor to create a tou...positive19 Using and a distance sensor to create a tou...
35negative0.918195[0.05711821839213371, 0.05161363631486893, -0....#British consumer confidence has fallen by the...negative#British consumer confidence has fallen by the...
36negative0.765415[0.07101022452116013, -0.048838380724191666, -...Isolation Illustration - For fuck sake... we’...negativeIsolation Illustration - For fuck sake... we’...
37positive0.939485[0.04629743844270706, -0.031434960663318634, -...Clever, won't happen here though\\r\\r\\r\\n\\r\\r\\r...positiveClever, won't happen here though Supermarket I...
38negative0.870772[0.010728790424764156, -0.03496933355927467, -...I dunno if this makes me a bad apocalypse comp...negativeI dunno if this makes me a bad apocalypse comp...
39positive0.964808[0.013930771499872208, 0.0005065983277745545, ...Delivery companies and other services are adap...positiveDelivery companies and other services are adap...
40negative0.929598[0.08638226240873337, 0.0718393325805664, -0.0...Panic-buying is pushing up prices you dumb-ass...negativePanic-buying is pushing up prices you dumb-ass...
41positive0.684870[0.0711701512336731, 0.051328204572200775, -0....With consumer concern over their finances due ...positiveWith consumer concern over their finances due ...
42positive0.958181[0.03079848363995552, -0.045532431453466415, -...How to queue safely for the bill in supermarke...positiveHow to queue safely for the bill in supermarke...
43negative0.923118[0.0025339308194816113, 0.014236150309443474, ...#COVID2019 Makhura: There are two areas where ...negative#COVID2019 Makhura: There are two areas where ...
44positive0.996742[-0.015687722712755203, -0.010347431525588036,...Make sure to take every measure against #coron...positiveMake sure to take every measure against #coron...
45negative0.952125[0.04179179295897484, -0.02091711200773716, -0...\"As shoppers shut their wallets, a consumer-dr...negative\"As shoppers shut their wallets, a consumer-dr...
46negative0.913734[-0.03860854730010033, -0.005080435890704393, ...If Corona virus ever comes to Uganda, some of ...negativeIf Corona virus ever comes to Uganda, some of ...
47positive0.983328[-0.0628998801112175, 0.03796340152621269, 0.0...Dear supply chain, supermarket and pharmacies ...positiveDear supply chain, supermarket and pharmacies ...
48positive0.996729[0.04039526358246803, 0.052726808935403824, 0....Travel Portable Mini Hand Sanitizer Anti-Bacte...positiveTravel Portable Mini Hand Sanitizer Anti-Bacte...
49negative0.775618[-0.06671527773141861, 0.023245809599757195, 0...So my sister has just told me at her @asda sto...negativeSo my sister has just told me at her @asda sto...
\n","
"],"text/plain":[" sentiment ... document\n","origin_index ... \n","0 neutral ... #Cheerios maker @GeneralMills on Wednesday rai...\n","1 positive ... Got to visit this morning where they have swit...\n","2 negative ... Got a video doctors appointment first thing as...\n","3 negative ... More likely to die from starvation during a 2 ...\n","4 positive ... Don't move around unnecessary Stay at home. Us...\n","5 positive ... My dad who is a professor just discover about ...\n","6 negative ... Riverfront Times: Cody Pfister, the 26-year-ol...\n","7 negative ... At sale of sanitizers at factory prices is a m...\n","8 negative ... Watch this if you are one of those idiots who ...\n","9 positive ... The dedication of our farmers and farm workers...\n","10 negative ... My friends husband has died this morning due t...\n","11 negative ... \"While it is always a shame when food is waste...\n","12 negative ... @AirCanada is cancelling existing flights and ...\n","13 negative ... Consumer advocacy group CHOICE has slammed pan...\n","14 positive ... @Lowes is there a reason why you’re not suppl...\n","15 positive ... Whilst u queue 4 your beauty treatment get you...\n","16 negative ... I can’t be the only person (CONSUMER, they on...\n","17 negative ... Not sure if this is the common cold, flu, or t...\n","18 positive ... And it’s a great listen in the car if you hav...\n","19 positive ... Please be smart and prepare. Here my take “St...\n","20 positive ... Lessons from COVID-19 Stock up food at home Al...\n","21 positive ... This is a really great selection of advice fro...\n","22 negative ... The worst thing about TP hoarders is that when...\n","23 positive ... Consumer psychology ? about which would result...\n","24 positive ... Helpful tips to prevent 19 from entering your ...\n","25 positive ... @jacksenwolf @scp1471wolf #memes #coronamemes ...\n","26 negative ... This grocery store worker says some customers ...\n","27 positive ... Don t panic Stay healthy protect yourself amp ...\n","28 negative ... 7 for lettuce is ridiculous Fruit and veg pric...\n","29 negative ... Quit harassing grocery store workers. WE KNOW ...\n","30 negative ... Scary Gun sales gone up in amid Fearing shorta...\n","31 positive ... I m temporarily working in a supermarket to he...\n","32 positive ... #horningsea is a caring community. Let’s ALL ...\n","33 negative ... @exxonmobil said today it is reducing its 2020...\n","34 positive ... 19 Using and a distance sensor to create a tou...\n","35 negative ... #British consumer confidence has fallen by the...\n","36 negative ... Isolation Illustration - For fuck sake... we’...\n","37 positive ... Clever, won't happen here though Supermarket I...\n","38 negative ... I dunno if this makes me a bad apocalypse comp...\n","39 positive ... Delivery companies and other services are adap...\n","40 negative ... Panic-buying is pushing up prices you dumb-ass...\n","41 positive ... With consumer concern over their finances due ...\n","42 positive ... How to queue safely for the bill in supermarke...\n","43 negative ... #COVID2019 Makhura: There are two areas where ...\n","44 positive ... Make sure to take every measure against #coron...\n","45 negative ... \"As shoppers shut their wallets, a consumer-dr...\n","46 negative ... If Corona virus ever comes to Uganda, some of ...\n","47 positive ... Dear supply chain, supermarket and pharmacies ...\n","48 positive ... Travel Portable Mini Hand Sanitizer Anti-Bacte...\n","49 negative ... So my sister has just told me at her @asda sto...\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# 7. Try training with different Embeddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nxWFzQOhjWC8","executionInfo":{"elapsed":208506,"status":"ok","timestamp":1613549303741,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"},"user_tz":-300},"outputId":"48f86d3a-8bdb-4f45-976f-52a84d5bc439"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IKK_Ii_gjJfF","executionInfo":{"elapsed":6329680,"status":"ok","timestamp":1614248732249,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"},"user_tz":-300},"outputId":"f93a4985-227a-4c71-ff67-652bb1f8d0bb"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(120) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.89 0.88 0.89 3989\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.91 0.86 0.88 4011\n","\n"," accuracy 0.87 8000\n"," macro avg 0.60 0.58 0.59 8000\n","weighted avg 0.90 0.87 0.88 8000\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"_1jxw3GnVGlI"},"source":["# 7.1 evaluate on Test Data"]},{"cell_type":"code","metadata":{"colab":{"background_save":true},"id":"Fxx4yNkNVGFl","outputId":"a941cd66-a89d-4a5c-dffa-b28c8cea6e3d"},"source":["preds = fitted_pipe.predict(test_df,output_level='document')\r\n","\r\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\r\n","preds.dropna(inplace=True)\r\n","print(classification_report(preds['y'], preds['sentiment']))"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.87 0.86 0.87 1011\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.88 0.84 0.86 989\n","\n"," accuracy 0.85 2000\n"," macro avg 0.58 0.57 0.57 2000\n","weighted avg 0.87 0.85 0.86 2000\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 8. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"elapsed":11096565,"status":"ok","timestamp":1613560191812,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"},"user_tz":-300},"outputId":"8cd5ccb6-8c20-47f3-e4b6-9ec5d58366c4"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 9. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"SO4uz45MoRgp","executionInfo":{"elapsed":11112108,"status":"ok","timestamp":1613560207359,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"},"user_tz":-300},"outputId":"1a9ceaad-9993-4aa3-f350-188ab3e82b29"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Everything is under control !')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentimentsentiment_confidenceen_embed_sentence_small_bert_L12_768_embeddingsdocument
origin_index
0negative0.948881[0.37780338525772095, 0.29955390095710754, 0.1...Everything is under control !
\n","
"],"text/plain":[" sentiment ... document\n","origin_index ... \n","0 negative ... Everything is under control !\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"elapsed":11112105,"status":"ok","timestamp":1613560207361,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"},"user_tz":-300},"outputId":"d31d41a8-8a15-4d83-f9db-52779f1eb993"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this SentimentDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_finanical_news.ipynb b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_finanical_news.ipynb index 81f918c0..d36817e3 100644 --- a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_finanical_news.ipynb +++ b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_finanical_news.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_finanical_news.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_finanical_news.ipynb)\n","\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 class Finance News sentiment classifier training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null \n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Finanical News Sentiment dataset \n","https://www.kaggle.com/ankurzing/sentiment-analysis-for-financial-news\n","\n","This dataset contains the sentiments for financial news headlines from the perspective of a retail investor. Further details about the dataset can be found in: Malo, P., Sinha, A., Takala, P., Korhonen, P. and Wallenius, J. (2014): “Good debt or bad debt: Detecting semantic orientations in economic texts.” Journal of the American Society for Information Science and Technology."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610788018304,"user_tz":-300,"elapsed":2399,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f494fab0-8f9c-4087-f554-31a21764a207"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/all-data.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 09:06:20-- http://ckl-it.de/wp-content/uploads/2021/01/all-data.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 704799 (688K) [text/csv]\n","Saving to: ‘all-data.csv’\n","\n","all-data.csv 100%[===================>] 688.28K 1.09MB/s in 0.6s \n","\n","2021-01-16 09:06:21 (1.09 MB/s) - ‘all-data.csv’ saved [704799/704799]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610788018314,"user_tz":-300,"elapsed":660,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e1e2496a-8df8-4e5d-db53-63d62ef1f050"},"source":["import pandas as pd\n","train_path = '/content/all-data.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df = train_df[~train_df[\"y\"].isin([\"neutral\"])]\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
1The international electronic industry company ...negative
2With the new production plant the company woul...positive
3According to the company 's updated strategy f...positive
4FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...positive
5For the last quarter of 2010 , Componenta 's n...positive
.........
4839HELSINKI Thomson Financial - Shares in Cargote...negative
4840LONDON MarketWatch -- Share prices ended lower...negative
4842Operating profit fell to EUR 35.4 mn from EUR ...negative
4843Net sales of the Paper segment decreased to EU...negative
4844Sales in Finland decreased by 10.5 % in Januar...negative
\n","

1967 rows × 2 columns

\n","
"],"text/plain":[" text y\n","1 The international electronic industry company ... negative\n","2 With the new production plant the company woul... positive\n","3 According to the company 's updated strategy f... positive\n","4 FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag... positive\n","5 For the last quarter of 2010 , Componenta 's n... positive\n","... ... ...\n","4839 HELSINKI Thomson Financial - Shares in Cargote... negative\n","4840 LONDON MarketWatch -- Share prices ended lower... negative\n","4842 Operating profit fell to EUR 35.4 mn from EUR ... negative\n","4843 Net sales of the Paper segment decreased to EU... negative\n","4844 Sales in Finland decreased by 10.5 % in Januar... negative\n","\n","[1967 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609264914996,"user_tz":-300,"elapsed":191025,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6dc536e4-252e-4324-e070-cd477a79330d"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.00 0.00 0.00 1\n"," positive 0.98 1.00 0.99 49\n","\n"," accuracy 0.98 50\n"," macro avg 0.49 0.50 0.49 50\n","weighted avg 0.96 0.98 0.97 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentsentimentysentiment_confidencetextdefault_name_embeddings
origin_index
1The international electronic industry company ...positivenegative1.000000The international electronic industry company ...[0.002136496128514409, 0.07194118946790695, -0...
2With the new production plant the company woul...positivepositive1.000000With the new production plant the company woul...[0.05198746547102928, 0.03577739745378494, -0....
3According to the company 's updated strategy f...positivepositive1.000000According to the company 's updated strategy f...[0.03416536748409271, 0.04053246229887009, -0....
4FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...positivepositive1.000000FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...[0.07730763405561447, -0.045694783329963684, -...
5For the last quarter of 2010 , Componenta 's n...positivepositive1.000000For the last quarter of 2010 , Componenta 's n...[0.05603468790650368, 0.04817350581288338, -0....
6In the third quarter of 2010 , net sales incre...positivepositive1.000000In the third quarter of 2010 , net sales incre...[0.037710510194301605, 0.037198420614004135, -...
7Operating profit rose to EUR 13.1 mn from EUR ...positivepositive1.000000Operating profit rose to EUR 13.1 mn from EUR ...[0.04557091370224953, 0.0453636609017849, -0.0...
8Operating profit totalled EUR 21.1 mn , up fro...positivepositive1.000000Operating profit totalled EUR 21.1 mn , up fro...[0.05191247910261154, 0.059505216777324677, -0...
9TeliaSonera TLSN said the offer is in line wit...positivepositive1.000000TeliaSonera TLSN said the offer is in line wit...[0.07441692799329758, -0.0487477071583271, -0....
10STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMEN...positivepositive1.000000STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMEN...[0.03200741112232208, 0.03773287683725357, -0....
11A purchase agreement for 7,200 tons of gasolin...positivepositive1.000000A purchase agreement for 7,200 tons of gasolin...[0.05590442568063736, 0.041032955050468445, -0...
12Finnish Talentum reports its operating profit ...positivepositive1.000000Finnish Talentum reports its operating profit ...[0.06596074998378754, 0.05897102504968643, -0....
13Clothing retail chain Sepp+Æl+Æ 's sales incre...positivepositive1.000000Clothing retail chain Sepp+Æl+Æ 's sales incre...[0.03395465016365051, 0.05171804875135422, 0.0...
14Consolidated net sales increased 16 % to reach...positivepositive1.000000Consolidated net sales increased 16 % to reach...[0.060446273535490036, 0.03799470514059067, -0...
15Foundries division reports its sales increased...positivepositive1.000000Foundries division reports its sales increased...[0.0494563989341259, 0.05158388614654541, -0.0...
16HELSINKI ( AFX ) - Shares closed higher , led ...positivepositive1.000000HELSINKI ( AFX ) - Shares closed higher , led ...[0.0629865899682045, -0.045351240783929825, -0...
17Incap Contract Manufacturing Services Pvt Ltd ...positivepositive1.000000Incap Contract Manufacturing Services Pvt Ltd ...[0.05365738272666931, -0.055247869342565536, -...
18Its board of directors will propose a dividend...positivepositive1.000000Its board of directors will propose a dividend...[0.0692642331123352, 0.02292279154062271, -0.0...
19Lifetree was founded in 2000 , and its revenue...positivepositive1.000000Lifetree was founded in 2000 , and its revenue...[0.0810408890247345, 0.039108917117118835, -0....
20( Filippova ) A trilateral agreement on invest...positivepositive0.999998( Filippova ) A trilateral agreement on invest...[0.05172618478536606, 0.02967883087694645, -0....
21MegaFon 's subscriber base increased 16.1 % in...positivepositive1.000000MegaFon 's subscriber base increased 16.1 % in...[0.03825156390666962, 0.001971189398318529, -0...
22Net income from life insurance doubled to EUR ...positivepositive1.000000Net income from life insurance doubled to EUR ...[0.05222763866186142, 0.05695151165127754, -0....
23Net sales increased to EUR193 .3 m from EUR179...positivepositive1.000000Net sales increased to EUR193 .3 m from EUR179...[0.02272764965891838, 0.016222774982452393, 0....
24Net sales surged by 18.5 % to EUR167 .8 m. Tel...positivepositive1.000000Net sales surged by 18.5 % to EUR167 .8 m. Tel...[0.05020830035209656, 0.03307913616299629, -0....
25Nordea Group 's operating profit increased in ...positivepositive1.000000Nordea Group 's operating profit increased in ...[0.0497022308409214, 0.023793146014213562, -0....
26Operating profit for the nine-month period inc...positivepositive1.000000Operating profit for the nine-month period inc...[0.04339126497507095, 0.024815633893013, -0.02...
27Operating profit for the nine-month period inc...positivepositive1.000000Operating profit for the nine-month period inc...[0.035663120448589325, 0.03037247434258461, -0...
28Operating profit for the three-month period in...positivepositive1.000000Operating profit for the three-month period in...[0.029575243592262268, 0.007764187641441822, -...
29The Brazilian unit of Finnish security solutio...positivepositive1.000000The Brazilian unit of Finnish security solutio...[0.047570426017045975, -0.023694489151239395, ...
30The company 's net profit rose 11.4 % on the y...positivepositive1.000000The company 's net profit rose 11.4 % on the y...[0.06896018236875534, 0.046189870685338974, -0...
31The Lithuanian beer market made up 14.41 milli...positivepositive0.999999The Lithuanian beer market made up 14.41 milli...[0.0020184037275612354, -0.044685497879981995,...
32Viking Line 's cargo revenue increased by 5.4 ...positivepositive1.000000Viking Line 's cargo revenue increased by 5.4 ...[-0.007756179664283991, -0.04868081212043762, ...
33The fair value of the property portfolio doubl...positivepositive1.000000The fair value of the property portfolio doubl...[0.06604734063148499, -0.025070184841752052, 0...
3410 February 2011 - Finnish media company Sanom...positivepositive1.00000010 February 2011 - Finnish media company Sanom...[0.05996786803007126, 0.03255663812160492, -0....
35A Helsinki : ELIiV today reported EPS of EUR1 ...positivepositive0.999999A Helsinki : ELIiV today reported EPS of EUR1 ...[0.051878154277801514, -0.03290269523859024, -...
36Aspo Plc STOCK EXCHANGE RELEASE February 11 , ...positivepositive1.000000Aspo Plc STOCK EXCHANGE RELEASE February 11 , ...[0.03545805439352989, -0.04956813529133797, -0...
37Commission income increased by 22 % to EUR 4.4...positivepositive1.000000Commission income increased by 22 % to EUR 4.4...[0.05664118379354477, 0.004533933009952307, -0...
38In January , traffic , measured in revenue pas...positivepositive1.000000In January , traffic , measured in revenue pas...[-0.026962362229824066, 0.010590712539851665, ...
39In January-September 2010 , Fiskars ' net prof...positivepositive1.000000In January-September 2010 , Fiskars ' net prof...[0.056088510900735855, 0.0369233600795269, -0....
40Net income from life insurance rose to EUR 16....positivepositive1.000000Net income from life insurance rose to EUR 16....[0.05793088302016258, 0.06312950700521469, -0....
41Nyrstar has also agreed to supply to Talvivaar...positivepositive1.000000Nyrstar has also agreed to supply to Talvivaar...[0.004785533994436264, 0.004442625679075718, -...
42Sales for both the Department Store Division a...positivepositive1.000000Sales for both the Department Store Division a...[-0.050088364630937576, 0.04885219410061836, 0...
43Sales have risen in other export markets .positivepositive1.000000Sales have risen in other export markets .[0.058916959911584854, 0.018443405628204346, -...
44Sales increased due to growing market rates an...positivepositive1.000000Sales increased due to growing market rates an...[0.047733016312122345, 0.010620158165693283, 0...
45The agreement strengthens our long-term partne...positivepositive1.000000The agreement strengthens our long-term partne...[0.06433788686990738, 0.027824176475405693, -0...
46The agreement was signed with Biohit Healthcar...positivepositive1.000000The agreement was signed with Biohit Healthcar...[0.03612205758690834, 0.038267459720373154, -0...
47The company also estimates the already carried...positivepositive1.000000The company also estimates the already carried...[0.04304526373744011, 0.023360760882496834, -0...
48The company 's order book stood at 1.5 bln eur...positivepositive1.000000The company 's order book stood at 1.5 bln eur...[0.036210183054208755, -0.010278576985001564, ...
49The company said that paper demand increased i...positivepositive1.000000The company said that paper demand increased i...[0.06558039039373398, 0.04877239838242531, -0....
50The world 's second largest stainless steel ma...positivepositive1.000000The world 's second largest stainless steel ma...[0.04267223924398422, 0.03184577450156212, -0....
\n","
"],"text/plain":[" document ... default_name_embeddings\n","origin_index ... \n","1 The international electronic industry company ... ... [0.002136496128514409, 0.07194118946790695, -0...\n","2 With the new production plant the company woul... ... [0.05198746547102928, 0.03577739745378494, -0....\n","3 According to the company 's updated strategy f... ... [0.03416536748409271, 0.04053246229887009, -0....\n","4 FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag... ... [0.07730763405561447, -0.045694783329963684, -...\n","5 For the last quarter of 2010 , Componenta 's n... ... [0.05603468790650368, 0.04817350581288338, -0....\n","6 In the third quarter of 2010 , net sales incre... ... [0.037710510194301605, 0.037198420614004135, -...\n","7 Operating profit rose to EUR 13.1 mn from EUR ... ... [0.04557091370224953, 0.0453636609017849, -0.0...\n","8 Operating profit totalled EUR 21.1 mn , up fro... ... [0.05191247910261154, 0.059505216777324677, -0...\n","9 TeliaSonera TLSN said the offer is in line wit... ... [0.07441692799329758, -0.0487477071583271, -0....\n","10 STORA ENSO , NORSKE SKOG , M-REAL , UPM-KYMMEN... ... [0.03200741112232208, 0.03773287683725357, -0....\n","11 A purchase agreement for 7,200 tons of gasolin... ... [0.05590442568063736, 0.041032955050468445, -0...\n","12 Finnish Talentum reports its operating profit ... ... [0.06596074998378754, 0.05897102504968643, -0....\n","13 Clothing retail chain Sepp+Æl+Æ 's sales incre... ... [0.03395465016365051, 0.05171804875135422, 0.0...\n","14 Consolidated net sales increased 16 % to reach... ... [0.060446273535490036, 0.03799470514059067, -0...\n","15 Foundries division reports its sales increased... ... [0.0494563989341259, 0.05158388614654541, -0.0...\n","16 HELSINKI ( AFX ) - Shares closed higher , led ... ... [0.0629865899682045, -0.045351240783929825, -0...\n","17 Incap Contract Manufacturing Services Pvt Ltd ... ... [0.05365738272666931, -0.055247869342565536, -...\n","18 Its board of directors will propose a dividend... ... [0.0692642331123352, 0.02292279154062271, -0.0...\n","19 Lifetree was founded in 2000 , and its revenue... ... [0.0810408890247345, 0.039108917117118835, -0....\n","20 ( Filippova ) A trilateral agreement on invest... ... [0.05172618478536606, 0.02967883087694645, -0....\n","21 MegaFon 's subscriber base increased 16.1 % in... ... [0.03825156390666962, 0.001971189398318529, -0...\n","22 Net income from life insurance doubled to EUR ... ... [0.05222763866186142, 0.05695151165127754, -0....\n","23 Net sales increased to EUR193 .3 m from EUR179... ... [0.02272764965891838, 0.016222774982452393, 0....\n","24 Net sales surged by 18.5 % to EUR167 .8 m. Tel... ... [0.05020830035209656, 0.03307913616299629, -0....\n","25 Nordea Group 's operating profit increased in ... ... [0.0497022308409214, 0.023793146014213562, -0....\n","26 Operating profit for the nine-month period inc... ... [0.04339126497507095, 0.024815633893013, -0.02...\n","27 Operating profit for the nine-month period inc... ... [0.035663120448589325, 0.03037247434258461, -0...\n","28 Operating profit for the three-month period in... ... [0.029575243592262268, 0.007764187641441822, -...\n","29 The Brazilian unit of Finnish security solutio... ... [0.047570426017045975, -0.023694489151239395, ...\n","30 The company 's net profit rose 11.4 % on the y... ... [0.06896018236875534, 0.046189870685338974, -0...\n","31 The Lithuanian beer market made up 14.41 milli... ... [0.0020184037275612354, -0.044685497879981995,...\n","32 Viking Line 's cargo revenue increased by 5.4 ... ... [-0.007756179664283991, -0.04868081212043762, ...\n","33 The fair value of the property portfolio doubl... ... [0.06604734063148499, -0.025070184841752052, 0...\n","34 10 February 2011 - Finnish media company Sanom... ... [0.05996786803007126, 0.03255663812160492, -0....\n","35 A Helsinki : ELIiV today reported EPS of EUR1 ... ... [0.051878154277801514, -0.03290269523859024, -...\n","36 Aspo Plc STOCK EXCHANGE RELEASE February 11 , ... ... [0.03545805439352989, -0.04956813529133797, -0...\n","37 Commission income increased by 22 % to EUR 4.4... ... [0.05664118379354477, 0.004533933009952307, -0...\n","38 In January , traffic , measured in revenue pas... ... [-0.026962362229824066, 0.010590712539851665, ...\n","39 In January-September 2010 , Fiskars ' net prof... ... [0.056088510900735855, 0.0369233600795269, -0....\n","40 Net income from life insurance rose to EUR 16.... ... [0.05793088302016258, 0.06312950700521469, -0....\n","41 Nyrstar has also agreed to supply to Talvivaar... ... [0.004785533994436264, 0.004442625679075718, -...\n","42 Sales for both the Department Store Division a... ... [-0.050088364630937576, 0.04885219410061836, 0...\n","43 Sales have risen in other export markets . ... [0.058916959911584854, 0.018443405628204346, -...\n","44 Sales increased due to growing market rates an... ... [0.047733016312122345, 0.010620158165693283, 0...\n","45 The agreement strengthens our long-term partne... ... [0.06433788686990738, 0.027824176475405693, -0...\n","46 The agreement was signed with Biohit Healthcar... ... [0.03612205758690834, 0.038267459720373154, -0...\n","47 The company also estimates the already carried... ... [0.04304526373744011, 0.023360760882496834, -0...\n","48 The company 's order book stood at 1.5 bln eur... ... [0.036210183054208755, -0.010278576985001564, ...\n","49 The company said that paper demand increased i... ... [0.06558039039373398, 0.04877239838242531, -0....\n","50 The world 's second largest stainless steel ma... ... [0.04267223924398422, 0.03184577450156212, -0....\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":107},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609264917602,"user_tz":-300,"elapsed":193623,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8fe5b9aa-c87a-42d3-e00d-920e63ca6aa4"},"source":["fitted_pipe.predict('According to the most recent update there has been a major decrese in the rate of oil')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentsentimentsentiment_confidencedefault_name_embeddings
origin_index
0Bitcoin is going to the moon!positive0.999994[0.06468033790588379, -0.040837567299604416, -...
\n","
"],"text/plain":[" document ... default_name_embeddings\n","origin_index ... \n","0 Bitcoin is going to the moon! ... [0.06468033790588379, -0.040837567299604416, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609264917604,"user_tz":-300,"elapsed":193620,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ac9c8b1a-7fdd-4a6f-bdfd-1dbb823d9bf4"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":753},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609264924472,"user_tz":-300,"elapsed":200484,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1dd94bc8-09c8-45db-ab81-bbd64acb8a4b"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.00 0.00 0.00 1\n"," positive 0.99 1.00 0.99 99\n","\n"," accuracy 0.99 100\n"," macro avg 0.49 0.50 0.50 100\n","weighted avg 0.98 0.99 0.99 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentsentimentysentiment_confidencetextdefault_name_embeddings
origin_index
1The international electronic industry company ...positivenegative1.000000The international electronic industry company ...[0.002136496128514409, 0.07194118946790695, -0...
2With the new production plant the company woul...positivepositive1.000000With the new production plant the company woul...[0.05198746547102928, 0.03577739745378494, -0....
3According to the company 's updated strategy f...positivepositive1.000000According to the company 's updated strategy f...[0.03416536748409271, 0.04053246229887009, -0....
4FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...positivepositive1.000000FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...[0.07730763405561447, -0.045694783329963684, -...
5For the last quarter of 2010 , Componenta 's n...positivepositive1.000000For the last quarter of 2010 , Componenta 's n...[0.05603468790650368, 0.04817350581288338, -0....
.....................
116Operating profit margin increased from 11.2 % ...positivepositive1.000000Operating profit margin increased from 11.2 % ...[0.01058729737997055, -0.008798183873295784, -...
117Operating profit rose to EUR 3.11 mn from EUR ...positivepositive1.000000Operating profit rose to EUR 3.11 mn from EUR ...[0.03610285371541977, 0.04256380349397659, -0....
118Operating profit rose to EUR 5mn from EUR 2.8 ...positivepositive1.000000Operating profit rose to EUR 5mn from EUR 2.8 ...[0.04815328121185303, 0.050376053899526596, -0...
119Operating profit was EUR 24.5 mn , up from EUR...positivepositive1.000000Operating profit was EUR 24.5 mn , up from EUR...[0.048205215483903885, 0.05145161226391792, -0...
120Ramirent 's net sales in the second quarterend...positivepositive1.000000Ramirent 's net sales in the second quarterend...[0.0638015866279602, 0.0272374227643013, -0.04...
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" document ... default_name_embeddings\n","origin_index ... \n","1 The international electronic industry company ... ... [0.002136496128514409, 0.07194118946790695, -0...\n","2 With the new production plant the company woul... ... [0.05198746547102928, 0.03577739745378494, -0....\n","3 According to the company 's updated strategy f... ... [0.03416536748409271, 0.04053246229887009, -0....\n","4 FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag... ... [0.07730763405561447, -0.045694783329963684, -...\n","5 For the last quarter of 2010 , Componenta 's n... ... [0.05603468790650368, 0.04817350581288338, -0....\n","... ... ... ...\n","116 Operating profit margin increased from 11.2 % ... ... [0.01058729737997055, -0.008798183873295784, -...\n","117 Operating profit rose to EUR 3.11 mn from EUR ... ... [0.03610285371541977, 0.04256380349397659, -0....\n","118 Operating profit rose to EUR 5mn from EUR 2.8 ... ... [0.04815328121185303, 0.050376053899526596, -0...\n","119 Operating profit was EUR 24.5 mn , up from EUR... ... [0.048205215483903885, 0.05145161226391792, -0...\n","120 Ramirent 's net sales in the second quarterend... ... [0.0638015866279602, 0.0272374227643013, -0.04...\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609264924477,"user_tz":-300,"elapsed":200483,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e259763c-470b-4d46-b3d1-28cf545f5dcd"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609266286092,"user_tz":-300,"elapsed":1562094,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4237752f-4fbe-4235-b33d-5d7b8ba29d48"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(70) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.88 0.87 0.88 604\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.96 0.92 0.94 1363\n","\n"," accuracy 0.91 1967\n"," macro avg 0.62 0.60 0.61 1967\n","weighted avg 0.94 0.91 0.92 1967\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609266449598,"user_tz":-300,"elapsed":1725594,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b31b5e1e-3f09-4ab3-e97a-fb32ac87b319"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":124},"executionInfo":{"status":"ok","timestamp":1609266465229,"user_tz":-300,"elapsed":1741220,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5d9cc34a-693c-44d7-e50a-6e0ca5d4e024"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('According to the most recent update there has been a major decrese in the rate of oil')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentsentimentsentiment_confidenceen_embed_sentence_small_bert_L12_768_embeddings
origin_index
0Tesla plans to invest 10M into the ML sectorpositive0.999980[0.15737222135066986, 0.2598555386066437, 0.85...
\n","
"],"text/plain":[" document ... en_embed_sentence_small_bert_L12_768_embeddings\n","origin_index ... \n","0 Tesla plans to invest 10M into the ML sector ... [0.15737222135066986, 0.2598555386066437, 0.85...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609266465232,"user_tz":-300,"elapsed":1741218,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ec54f7c0-8174-4fd4-9db8-51c1d15be3eb"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_finanical_news.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_finanical_news.ipynb)\n","\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 class Finance News sentiment classifier training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","You can achieve these results or even better on this dataset with training data:\n","\n","
\n","\n","![image.png]()\n","\n","\n","\n","\n","You can achieve these results or even better on this dataset with test data:\n","\n","\n","
\n","\n","\n","![image.png]()"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null \n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Finanical News Sentiment dataset \n","https://www.kaggle.com/ankurzing/sentiment-analysis-for-financial-news\n","\n","This dataset contains the sentiments for financial news headlines from the perspective of a retail investor. Further details about the dataset can be found in: Malo, P., Sinha, A., Takala, P., Korhonen, P. and Wallenius, J. (2014): “Good debt or bad debt: Detecting semantic orientations in economic texts.” Journal of the American Society for Information Science and Technology."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1614562864048,"user_tz":-300,"elapsed":60916,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a08ebe75-173f-4882-cc2b-c7ba5b85bc15"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/all-data.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-03-01 01:39:50-- http://ckl-it.de/wp-content/uploads/2021/01/all-data.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 704799 (688K) [text/csv]\n","Saving to: ‘all-data.csv’\n","\n","all-data.csv 100%[===================>] 688.28K --.-KB/s in 0.1s \n","\n","2021-03-01 01:39:50 (5.78 MB/s) - ‘all-data.csv’ saved [704799/704799]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1614562864051,"user_tz":-300,"elapsed":60812,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"75677d69-1c05-4504-9988-49d5a5d9cdd1"},"source":["import pandas as pd\n","train_path = '/content/all-data.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df = train_df[~train_df[\"y\"].isin([\"neutral\"])]\n","from sklearn.model_selection import train_test_split\n","\n","train_df, test_df = train_test_split(train_df, test_size=0.2)\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
4719The fair value of the company 's investment pr...negative
373The company 's scheduled traffic , measured in...positive
841Forestries were also higher , driven by yester...positive
2072H+_kan Dahlstr+¦m , head of mobility services ...positive
712Both loans will be used to finance strategic i...positive
.........
4395EMSA Deputy Chairman of the Board Juri Lember ...negative
4810Thanks to the internet , consumers compare pro...negative
34917 March 2011 - Finnish IT company Digia Oyj HE...positive
51Within the framework of the partnership , Noki...positive
2340Asian traffic declined by 3.4 per cent .negative
\n","

1573 rows × 2 columns

\n","
"],"text/plain":[" text y\n","4719 The fair value of the company 's investment pr... negative\n","373 The company 's scheduled traffic , measured in... positive\n","841 Forestries were also higher , driven by yester... positive\n","2072 H+_kan Dahlstr+¦m , head of mobility services ... positive\n","712 Both loans will be used to finance strategic i... positive\n","... ... ...\n","4395 EMSA Deputy Chairman of the Board Juri Lember ... negative\n","4810 Thanks to the internet , consumers compare pro... negative\n","3491 7 March 2011 - Finnish IT company Digia Oyj HE... positive\n","51 Within the framework of the partnership , Noki... positive\n","2340 Asian traffic declined by 3.4 per cent . negative\n","\n","[1573 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1614562993168,"user_tz":-300,"elapsed":189902,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"20a3b173-4be4-408a-9523-8bf21f18439b"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.00 0.00 0.00 14\n"," positive 0.72 1.00 0.84 36\n","\n"," accuracy 0.72 50\n"," macro avg 0.36 0.50 0.42 50\n","weighted avg 0.52 0.72 0.60 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytextdocumentsentimentdefault_name_embeddingssentiment_confidence
origin_index
4719negativeThe fair value of the company 's investment pr...The fair value of the company 's investment pr...positive[0.036923788487911224, 0.06703860312700272, -0...0.985028
373positiveThe company 's scheduled traffic , measured in...The company 's scheduled traffic , measured in...positive[-0.02716548554599285, 0.041895847767591476, -...0.989986
841positiveForestries were also higher , driven by yester...Forestries were also higher , driven by yester...positive[0.07610785216093063, -0.017332645133137703, -...0.990836
2072positiveH+_kan Dahlstr+¦m , head of mobility services ...H+_kan Dahlstr+¦m , head of mobility services ...positive[0.059579234570264816, 0.01714557223021984, -0...0.987673
712positiveBoth loans will be used to finance strategic i...Both loans will be used to finance strategic i...positive[0.06402906030416489, 0.037328142672777176, -0...0.980973
1722positiveHELSINKI (Thomson Financial)- Kemira GrowHow s...HELSINKI (Thomson Financial)- Kemira GrowHow s...positive[0.07542752474546432, 0.03456662967801094, -0....0.988675
793positive` This is a repeat order to follow successfull...` This is a repeat order to follow successfull...positive[0.016312969848513603, 0.03869722783565521, -0...0.980360
1483positive`` We have significant experience in smartphon...`` We have significant experience in smartphon...positive[0.056398119777441025, -0.010660165920853615, ...0.980428
807positiveFinnish Metso Paper has won an order to supply...Finnish Metso Paper has won an order to supply...positive[0.03834717348217964, 0.05267363414168358, 0.0...0.977349
836positiveFinnish handling systems company Cargotec Oyj ...Finnish handling systems company Cargotec Oyj ...positive[0.0402568019926548, 0.02064136043190956, -0.0...0.987665
1077positiveTeliaSonera is the leading telecommunications ...TeliaSonera is the leading telecommunications ...positive[0.05541086569428444, 0.048524245619773865, -0...0.992725
4078positiveOperating profit excluding restructuring costs...Operating profit excluding restructuring costs...positive[0.06243692338466644, 0.054526831954717636, -0...0.991681
4135negativeThe number of bodily injury cases quadrupled i...The number of bodily injury cases quadrupled i...positive[0.009022029116749763, -0.026918234303593636, ...0.754257
834positiveFinnish consulting and engineering group Poyry...Finnish consulting and engineering group Poyry...positive[0.01760948821902275, -0.015370494686067104, -...0.987969
663positiveThe costs of the new ropax vessels are 30 % lo...The costs of the new ropax vessels are 30 % lo...positive[0.016379177570343018, 0.042399514466524124, 0...0.968588
2131positiveIt also turned in earnings per share ( EPS ) o...It also turned in earnings per share ( EPS ) o...positive[0.04963838681578636, 0.016262372955679893, -0...0.991324
24positiveNet sales surged by 18.5 % to EUR167 .8 m. Tel...Net sales surged by 18.5 % to EUR167 .8 m. Tel...positive[0.07095607370138168, 0.01779617927968502, -0....0.987843
696negativeFinnish power supply solutions and systems pro...Finnish power supply solutions and systems pro...positive[0.02955046109855175, 0.013048158958554268, -0...0.988096
2933positiveHELSINKI ( Thomson Financial ) - M-real said i...HELSINKI ( Thomson Financial ) - M-real said i...positive[0.05395246669650078, 0.032728634774684906, -0...0.993642
727positiveBy 14:29 CET on Monday , shares in Bavarian No...By 14:29 CET on Monday , shares in Bavarian No...positive[0.06754495203495026, 0.02371809259057045, -0....0.986807
3637positiveIn the autumn , it plans to expand service to ...In the autumn , it plans to expand service to ...positive[-0.02860916219651699, 0.03146557882428169, -0...0.988312
2285positiveADP News - Feb 25 , 2009 - Finnish printed cir...ADP News - Feb 25 , 2009 - Finnish printed cir...positive[0.07102368772029877, -0.005087006371468306, -...0.991177
4101positiveBy cutting the number of plants , the group wi...By cutting the number of plants , the group wi...positive[-0.005840806290507317, 0.06139799952507019, -...0.968257
4438negative`` We see that the market continues to be tigh...`` We see that the market continues to be tigh...positive[0.05384731665253639, 0.06735912710428238, -0....0.956433
1825positive`` We are delighted to welcome Elisa to our Bo...`` We are delighted to welcome Elisa to our Bo...positive[0.018601788207888603, 0.04971592128276825, -0...0.974901
950positiveFinnish mobile operator DNA will function as a...Finnish mobile operator DNA will function as a...positive[0.052730534225702286, -0.079906165599823, -0....0.986985
4548negativeThe administrators have indicated a need for 9...The administrators have indicated a need for 9...positive[-0.008651155978441238, 0.02552383951842785, -...0.923833
4716negativeThe company said that the fall in turnover had...The company said that the fall in turnover had...positive[-0.007754180580377579, -0.0018574600107967854...0.974364
639positiveIn the second quarter of 2010 , the group 's p...In the second quarter of 2010 , the group 's p...positive[0.049069344997406006, 0.05393636226654053, -0...0.990063
4550negativeThe total need for staff cuts corresponds to a...The total need for staff cuts corresponds to a...positive[-0.016216445714235306, 0.07977687567472458, -...0.908392
1704negativeNet sales of Finnish Sanoma Learning & Literat...Net sales of Finnish Sanoma Learning & Literat...positive[0.06210406869649887, 0.04862183332443237, -0....0.978863
780positiveTalvivaara also maintains its assumption of tu...Talvivaara also maintains its assumption of tu...positive[0.05548756942152977, 0.011896908283233643, -0...0.981222
165positiveBoth operating profit and net sales for the si...Both operating profit and net sales for the si...positive[0.0282859168946743, 0.02847444638609886, -0.0...0.987370
4615negativeThe airline 's share price closed down slightl...The airline 's share price closed down slightl...positive[0.03676854446530342, 0.04419051110744476, -0....0.988250
4191negativeFinnish Vaahto Group that provides paper-makin...Finnish Vaahto Group that provides paper-makin...positive[-0.023766979575157166, 0.02527744509279728, -...0.985640
4836negativeSales in Finland decreased by 2.0 % , and inte...Sales in Finland decreased by 2.0 % , and inte...positive[0.051058329641819, 0.03175395354628563, -0.03...0.980755
121positiveRevenue grew by 2 percent to x20ac 580 millio...Revenue grew by 2 percent to x20ac 580 million...positive[0.054190147668123245, -0.029659369960427284, ...0.982119
115positiveOperating profit increased by 145.1 % to EUR 8...Operating profit increased by 145.1 % to EUR 8...positive[0.04452420398592949, 0.046940360218286514, -0...0.986740
580positiveThe contract was signed in August with Papua N...The contract was signed in August with Papua N...positive[0.05882549658417702, -0.014190285466611385, 0...0.991058
685positiveIn August-October 2010 , the company 's result...In August-October 2010 , the company 's result...positive[0.02870381809771061, 0.04685402661561966, -0....0.983873
385positiveThe value of the firm 's forestry holdings inc...The value of the firm 's forestry holdings inc...positive[0.06179884076118469, 0.009074743837118149, 0....0.981110
188positiveEquity ratio was 60.9 % compared to 54.2 % In ...Equity ratio was 60.9 % compared to 54.2 % In ...positive[0.012241799384355545, 0.04907766357064247, -0...0.982970
3023positiveMartela said plans to expand its recycled furn...Martela said plans to expand its recycled furn...positive[0.06031253933906555, 0.02930644527077675, -0....0.984959
4663negativeComparable operating profit decreased to EUR 1...Comparable operating profit decreased to EUR 1...positive[0.041685257107019424, 0.030715754255652428, -...0.988551
4036negativeOperating loss totalled EUR 3.2 mn , compared ...Operating loss totalled EUR 3.2 mn , compared ...positive[0.056506961584091187, 0.03850569948554039, -0...0.984684
923positive`` Demand for sports equipment was good in 2005 .`` Demand for sports equipment was good in 2005 .positive[0.053851932287216187, 0.05388070270419121, -0...0.956324
268positivePreviously , the company had guided for EBIT a...Previously , the company had guided for EBIT a...positive[0.030418097972869873, -0.010177094489336014, ...0.986280
722positiveOperating profit was EUR 0.6 mn , up from a lo...Operating profit was EUR 0.6 mn , up from a lo...positive[0.052058979868888855, 0.0432511568069458, -0....0.991042
2208positiveThe company feels these leases are prime locat...The company feels these leases are prime locat...positive[0.013362281024456024, 0.03749179467558861, -0...0.982359
4661negativeCash flow after investments amounted to EUR45m...Cash flow after investments amounted to EUR45m...positive[0.055296167731285095, 0.0607072152197361, -0....0.963672
\n","
"],"text/plain":[" y ... sentiment_confidence\n","origin_index ... \n","4719 negative ... 0.985028\n","373 positive ... 0.989986\n","841 positive ... 0.990836\n","2072 positive ... 0.987673\n","712 positive ... 0.980973\n","1722 positive ... 0.988675\n","793 positive ... 0.980360\n","1483 positive ... 0.980428\n","807 positive ... 0.977349\n","836 positive ... 0.987665\n","1077 positive ... 0.992725\n","4078 positive ... 0.991681\n","4135 negative ... 0.754257\n","834 positive ... 0.987969\n","663 positive ... 0.968588\n","2131 positive ... 0.991324\n","24 positive ... 0.987843\n","696 negative ... 0.988096\n","2933 positive ... 0.993642\n","727 positive ... 0.986807\n","3637 positive ... 0.988312\n","2285 positive ... 0.991177\n","4101 positive ... 0.968257\n","4438 negative ... 0.956433\n","1825 positive ... 0.974901\n","950 positive ... 0.986985\n","4548 negative ... 0.923833\n","4716 negative ... 0.974364\n","639 positive ... 0.990063\n","4550 negative ... 0.908392\n","1704 negative ... 0.978863\n","780 positive ... 0.981222\n","165 positive ... 0.987370\n","4615 negative ... 0.988250\n","4191 negative ... 0.985640\n","4836 negative ... 0.980755\n","121 positive ... 0.982119\n","115 positive ... 0.986740\n","580 positive ... 0.991058\n","685 positive ... 0.983873\n","385 positive ... 0.981110\n","188 positive ... 0.982970\n","3023 positive ... 0.984959\n","4663 negative ... 0.988551\n","4036 negative ... 0.984684\n","923 positive ... 0.956324\n","268 positive ... 0.986280\n","722 positive ... 0.991042\n","2208 positive ... 0.982359\n","4661 negative ... 0.963672\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# 4. Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1614562995712,"user_tz":-300,"elapsed":192406,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b8d794ca-756b-44ad-b89c-f2b4d2f3f8bd"},"source":["fitted_pipe.predict('According to the most recent update there has been a major decrese in the rate of oil')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentsentimentdefault_name_embeddingssentiment_confidence
origin_index
0According to the most recent update there has ...positive[0.00991145521402359, 0.041628580540418625, -0...0.968396
\n","
"],"text/plain":[" document ... sentiment_confidence\n","origin_index ... \n","0 According to the most recent update there has ... ... 0.968396\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## 5. Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1614562995714,"user_tz":-300,"elapsed":192393,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e82d5e73-c9cc-4e73-be7b-f8fe6bbb767b"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setLoadSP(False) | Info: Whether to load SentencePiece ops file which is required only by multi-lingual models. This is not changeable after it's set with a pretrained model nor it is compatible with Windows. | Currently set to : False\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## 6. Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":793},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1614563007186,"user_tz":-300,"elapsed":203845,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d73a2b68-a719-4664-e4dd-6fedd31a70f7"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.00 0.00 0.00 30\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.74 1.00 0.85 70\n","\n"," accuracy 0.70 100\n"," macro avg 0.25 0.33 0.28 100\n","weighted avg 0.52 0.70 0.60 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytextdocumentsentimentdefault_name_embeddingssentiment_confidence
origin_index
4719negativeThe fair value of the company 's investment pr...The fair value of the company 's investment pr...positive[0.036923788487911224, 0.06703860312700272, -0...0.790602
373positiveThe company 's scheduled traffic , measured in...The company 's scheduled traffic , measured in...positive[-0.02716548554599285, 0.041895847767591476, -...0.968895
841positiveForestries were also higher , driven by yester...Forestries were also higher , driven by yester...positive[0.07610785216093063, -0.017332645133137703, -...0.954134
2072positiveH+_kan Dahlstr+¦m , head of mobility services ...H+_kan Dahlstr+¦m , head of mobility services ...positive[0.059579234570264816, 0.01714557223021984, -0...0.978695
712positiveBoth loans will be used to finance strategic i...Both loans will be used to finance strategic i...positive[0.06402906030416489, 0.037328142672777176, -0...0.732152
.....................
542negativePharmaceuticals group Orion Corp reported a fa...Pharmaceuticals group Orion Corp reported a fa...positive[0.037232112139463425, 0.0271251630038023, -0....0.782454
2061positive` Very recommendable ' is the Nokian Z G2 acco...` Very recommendable ' is the Nokian Z G2 acco...positive[0.014071580022573471, 0.0036627694498747587, ...0.979991
565positiveThe growth of net sales has continued favourab...The growth of net sales has continued favourab...positive[0.05051519721746445, 0.05987134203314781, -0....0.953826
50positiveThe world 's second largest stainless steel ma...The world 's second largest stainless steel ma...positive[0.04267223924398422, 0.03184577450156212, -0....0.972509
692negativeAlso construction expenses have gone up in Rus...Also construction expenses have gone up in Rus...positive[0.015224196948111057, 0.0498930849134922, -0....0.807450
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" y ... sentiment_confidence\n","origin_index ... \n","4719 negative ... 0.790602\n","373 positive ... 0.968895\n","841 positive ... 0.954134\n","2072 positive ... 0.978695\n","712 positive ... 0.732152\n","... ... ... ...\n","542 negative ... 0.782454\n","2061 positive ... 0.979991\n","565 positive ... 0.953826\n","50 positive ... 0.972509\n","692 negative ... 0.807450\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["#7. Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614563007189,"user_tz":-300,"elapsed":203788,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d4ac9450-f6df-4139-a1bd-f4459e68115a"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614564053723,"user_tz":-300,"elapsed":1250308,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"9520a8fb-0699-40e1-db20-6787de0b0f4b"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(70) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.87 0.85 0.86 484\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.97 0.92 0.94 1089\n","\n"," accuracy 0.90 1573\n"," macro avg 0.61 0.59 0.60 1573\n","weighted avg 0.94 0.90 0.92 1573\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"_1jxw3GnVGlI"},"source":["# 7.1 evaluate on Test Data"]},{"cell_type":"code","metadata":{"id":"Fxx4yNkNVGFl","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614564323006,"user_tz":-300,"elapsed":1519585,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"937933cc-9b84-4cf7-9610-8e79e221b648"},"source":["preds = fitted_pipe.predict(test_df,output_level='document')\r\n","\r\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\r\n","preds.dropna(inplace=True)\r\n","print(classification_report(preds['y'], preds['sentiment']))"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.80 0.76 0.78 120\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.96 0.88 0.92 274\n","\n"," accuracy 0.85 394\n"," macro avg 0.58 0.55 0.57 394\n","weighted avg 0.91 0.85 0.88 394\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 8. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609266449598,"user_tz":-300,"elapsed":1725594,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b31b5e1e-3f09-4ab3-e97a-fb32ac87b319"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 9. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":124},"executionInfo":{"status":"ok","timestamp":1609266465229,"user_tz":-300,"elapsed":1741220,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5d9cc34a-693c-44d7-e50a-6e0ca5d4e024"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('According to the most recent update there has been a major decrese in the rate of oil')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentsentimentsentiment_confidenceen_embed_sentence_small_bert_L12_768_embeddings
origin_index
0Tesla plans to invest 10M into the ML sectorpositive0.999980[0.15737222135066986, 0.2598555386066437, 0.85...
\n","
"],"text/plain":[" document ... en_embed_sentence_small_bert_L12_768_embeddings\n","origin_index ... \n","0 Tesla plans to invest 10M into the ML sector ... [0.15737222135066986, 0.2598555386066437, 0.85...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609266465232,"user_tz":-300,"elapsed":1741218,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ec54f7c0-8174-4fd4-9db8-51c1d15be3eb"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_natural_disasters.ipynb b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_natural_disasters.ipynb new file mode 100644 index 00000000..da869354 --- /dev/null +++ b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_natural_disasters.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_natural_disasters.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_natural_disasters.ipynb)\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 Class Natural Disasters Sentiment Classifer Training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","You can achieve these results or even better on this dataset with training data:\n","\n","\n","
\n","\n","![image.png]()\n","\n","You can achieve these results or even better on this dataset with test data:\n","\n","\n","
\n","\n","\n","![Screenshot 2021-02-25 142700.png]()"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614242445896,"user_tz":-300,"elapsed":94650,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a0a80772-71a7-410e-d8fe-2cd174162e51"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install pyspark==2.4.7 \n","! pip install nlu > /dev/null \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Collecting pyspark==2.4.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e2/06/29f80e5a464033432eedf89924e7aa6ebbc47ce4dcd956853a73627f2c07/pyspark-2.4.7.tar.gz (217.9MB)\n","\u001b[K |████████████████████████████████| 217.9MB 63kB/s \n","\u001b[?25hCollecting py4j==0.10.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e3/53/c737818eb9a7dc32a7cd4f1396e787bd94200c3997c72c1dbe028587bd76/py4j-0.10.7-py2.py3-none-any.whl (197kB)\n","\u001b[K |████████████████████████████████| 204kB 19.5MB/s \n","\u001b[?25hBuilding wheels for collected packages: pyspark\n"," Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for pyspark: filename=pyspark-2.4.7-py2.py3-none-any.whl size=218279465 sha256=3602214fb80c68afcb3d7c4ea233dcff49c3f269b66f32be9e3a461f9628b0e2\n"," Stored in directory: /root/.cache/pip/wheels/34/1f/2e/1e7460f80acf26b08dbb8c53d7ff9e07146f2a68dd5c732be5\n","Successfully built pyspark\n","Installing collected packages: py4j, pyspark\n","Successfully installed py4j-0.10.7 pyspark-2.4.7\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Disaster Sentiment dataset \n","https://www.kaggle.com/vstepanenko/disaster-tweets\n","#Context\n","\n","The file contains over 11,000 tweets associated with disaster keywords like “crash”, “quarantine”, and “bush fires” as well as the location and keyword itself. The data structure was inherited from Disasters on social media\n","\n","The tweets were collected on Jan 14th, 2020.\n","\n","Some of the topics people were tweeting:\n","\n","The eruption of Taal Volcano in Batangas, Philippines\n","Coronavirus\n","Bushfires in Australia\n","Iran downing of the airplane flight PS752\n","Disclaimer: The dataset contains text that may be considered profane, vulgar, or offensive."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1614242446852,"user_tz":-300,"elapsed":95588,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d71ebb53-c808-40e0-a331-293a02d5a519"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/02/tweets.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-02-25 08:39:36-- http://ckl-it.de/wp-content/uploads/2021/02/tweets.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 1207952 (1.2M) [text/csv]\n","Saving to: ‘tweets.csv’\n","\n","tweets.csv 100%[===================>] 1.15M 1.56MB/s in 0.7s \n","\n","2021-02-25 08:39:37 (1.56 MB/s) - ‘tweets.csv’ saved [1207952/1207952]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":399},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1614242447251,"user_tz":-300,"elapsed":95968,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"40cfe5ac-728d-4c83-f189-ac57491f1f6e"},"source":["import pandas as pd\n","train_path = '/content/tweets.csv'\n","\n","train_df = pd.read_csv(train_path,sep=\",\", encoding='latin-1')\n","# the text data to use for classification should be in a column named 'text'\n","columns=['text','y']\n","train_df = train_df.dropna()\n","positive = train_df[train_df['y']==(\"positive\")].iloc[:1500]\n","negative = train_df[train_df['y']==(\"negative\")].iloc[:1500]\n","positive = positive.append(negative, ignore_index = True)\n","positive = positive.sample(frac=1).reset_index(drop=True)\n","train_df = positive\n","from sklearn.model_selection import train_test_split\n","\n","train_df, test_df = train_test_split(train_df, test_size=0.2)\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Unnamed: 0idkeywordlocationtexttargety
2326602.010216.0thunderstormatl•nola•nshThis thunderstorm is amazing rn 🥴1.0positive
220956.04783.0emergencyBharatThis idiot ws CM of UP. He is asking doc on em...0.0negative
321147.03775.0derailmentChiraq,DriLLinoisBe not endowed in the flesh of your ego nor dr...0.0negative
1412785.011154.0woundedIslamabad, PakistanSnow and Avalunch Update (SDMA) 18 people incl...1.0positive
11302726.01056.0blew%20upuk | ˣᵗʳᵃ ᵇᵖʷum good morning this blew up what https://t.co...0.0negative
........................
2630718.04232.0displacedGlobalJanuary 12 marks the anniversary of the 2010 #...1.0positive
18332645.03905.0destroymetz | Sarah♡But the story is this She’ll destroy with...0.0negative
6103400.010250.0thunderstormGreenwood, MississippiOne of the hardest-hit areas of he severe thun...1.0positive
11811552.05080.0evacuatedBtgGood News: Original Post: \"Residents had gone ...0.0negative
20562201.01350.0body%20bagIrelandHuman body parts found in bag outside houses i...1.0positive
\n","

2400 rows × 7 columns

\n","
"],"text/plain":[" Unnamed: 0 id ... target y\n","232 6602.0 10216.0 ... 1.0 positive\n","2209 56.0 4783.0 ... 0.0 negative\n","321 147.0 3775.0 ... 0.0 negative\n","141 2785.0 11154.0 ... 1.0 positive\n","1130 2726.0 1056.0 ... 0.0 negative\n","... ... ... ... ... ...\n","2630 718.0 4232.0 ... 1.0 positive\n","1833 2645.0 3905.0 ... 0.0 negative\n","610 3400.0 10250.0 ... 1.0 positive\n","1181 1552.0 5080.0 ... 0.0 negative\n","2056 2201.0 1350.0 ... 1.0 positive\n","\n","[2400 rows x 7 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1613549340589,"user_tz":-300,"elapsed":223159,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b32c4f24-da34-4f2d-e28b-571ccce3d6bc"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 1.00 0.40 0.57 25\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.94 0.64 0.76 25\n","\n"," accuracy 0.52 50\n"," macro avg 0.65 0.35 0.44 50\n","weighted avg 0.97 0.52 0.67 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
targetUnnamed: 0ysentimentkeywordsentiment_confidenceidtextdefault_name_embeddingslocationdocument
origin_index
00.01649.0negativenegativehail0.6200926069.0Ladies &amp; Gentleman Pls Welcome The Full Me...[0.0718693882226944, -0.018772797659039497, -0...Panama, INLadies &amp; Gentleman Pls Welcome The Full Me...
10.0999.0negativeneutraldrowning0.5211894509.0Emergency meeting for her millionaire grandson...[-0.06196709722280502, 0.03117111697793007, 0....BurntislandEmergency meeting for her millionaire grandson...
21.0475.0positiveneutralrescued0.5571768573.0Update from . Looks like they're getting rescu...[0.0428241565823555, -0.04131714627146721, -0....PhilippinesUpdate from . Looks like they're getting rescu...
30.02779.0negativeneutralevacuate0.5058615026.0Bridal couple were little affected in contrast...[-0.040058307349681854, 0.006242941599339247, ...JPBridal couple were little affected in contrast...
40.01517.0negativeneutralairplane%20accident0.564590163.0How exactly does one accidentally fire a rocke...[-0.016879839822649956, 0.06649093329906464, -...IowaHow exactly does one accidentally fire a rocke...
50.01400.0negativeneutralfamine0.5584665307.0President Edgar Lungu told journalists his buy...[0.05988643690943718, -0.03577134758234024, -0...EarthPresident Edgar Lungu told journalists his buy...
60.0215.0negativenegativeapocalypse0.666880338.0Yea, but let's be honest. Kids value pretty mu...[-0.029979687184095383, -0.016652535647153854,...Sword Fight On The HeightsYea, but let's be honest. Kids value pretty mu...
70.0704.0negativeneutraldanger0.5634723039.0Can anyone believe the cheek of calling a bush...[-0.04716842249035835, -0.03333980590105057, -...CanberraCan anyone believe the cheek of calling a bush...
81.03149.0positiveneutralapocalypse0.568612323.0no amount of sci-fi media could’ve prepar...[-0.03867451846599579, 0.027363918721675873, -...Madridno amount of sci-fi media could’ve prepar...
91.03861.0positivepositivesmoke0.6290869348.0Wildfire smoke to circle planet and return to ...[-0.04033537954092026, -0.006633501499891281, ...SydneyWildfire smoke to circle planet and return to ...
101.06698.0positiveneutralweapon0.59064210883.0\"Somehow or other, we've got to stop the Irani...[0.047660667449235916, -0.0322858951985836, -0...Hungary\"Somehow or other, we've got to stop the Irani...
111.05728.0positivepositivewild%20fires0.60380011016.0An estimated 2 million hectares of land burnt ...[0.006747904233634472, -0.05033961310982704, -...Global Citizen. NYC, L.A. etc,An estimated 2 million hectares of land burnt ...
121.04099.0positivepositiveflooding0.6097655908.0Storm surge from Storm Brendan flooding the ha...[-0.01745571382343769, -0.02025921642780304, 0...BallycastleStorm surge from Storm Brendan flooding the ha...
130.01221.0negativenegativebattle0.666491723.0*\"DMC3 Cerberus Battle\" playing* https://t.co/...[-0.05303790792822838, 0.029018903151154518, -...Украина*\"DMC3 Cerberus Battle\" playing* https://t.co/...
140.02082.0negativenegativehail0.6473616052.0ALL HAIL CHEETOS[0.025289693847298622, -0.006686831824481487, ...cheetosALL HAIL CHEETOS
151.0658.0positivepositivearson0.624684458.0Climate Change Or Arson Jihad? - https://t.co/...[-0.04567283019423485, 0.0007029167609289289, ...USAClimate Change Or Arson Jihad? - https://t.co/...
160.01405.0negativepositiveairplane%20accident0.614355144.0Mass anti-government protests erupt in Iran af...[0.007361058611422777, -1.4808237210672814e-05...NYCMass anti-government protests erupt in Iran af...
171.03461.0positiveneutraloutbreak0.5824958012.0He Greta you might want to raise the issue of ...[0.007896085269749165, -0.018556153401732445, ...Islamabad, PakistanHe Greta you might want to raise the issue of ...
181.05616.0positiveneutralcollision0.5983942546.0Police are seeking witnesses as they investiga...[-0.07210832089185715, -0.0818963497877121, -0...Hamilton OntarioPolice are seeking witnesses as they investiga...
191.06274.0positivepositiveinundated0.6119186926.0Indonesia 🇮🇩 is facing its own c...[0.021613234654068947, -0.006164511665701866, ...AfricaIndonesia 🇮🇩 is facing its own c...
201.01242.0positivepositivewild%20fires0.60645711046.0There's method to the madness with the Califor...[0.047039393335580826, 0.029535269364714622, -...OklahomaThere's method to the madness with the Califor...
210.01865.0negativeneutralcrash0.5395192678.0#NotMyPM on BBC says #ReleaseTheRussiaReport w...[-0.036455944180488586, -0.04565983638167381, ...United Kingdom, Europe#NotMyPM on BBC says #ReleaseTheRussiaReport w...
220.0298.0negativenegativeambulance0.693735203.0Jinu : Are you okay? You hit your head and fai...[-0.07107605040073395, 0.05997276306152344, 0....EverywhereJinu : Are you okay? You hit your head and fai...
230.02881.0negativeneutraldemolish0.5454023421.0We won’t demolish properties without enga...[0.08048725128173828, 0.04733644425868988, -0....Lagos, NigeriaWe won’t demolish properties without enga...
240.0809.0negativeneutralannihilated0.503223272.0A devastating and thorough refutation of and N...[0.03053135797381401, 0.018328236415982246, -0...USAA devastating and thorough refutation of and N...
251.05736.0positivepositivehazardous0.6241056279.0Updated air quality forecast for today and the...[-0.03537615016102791, -0.009700290858745575, ...Melbourne, AustraliaUpdated air quality forecast for today and the...
260.0490.0negativeneutralannihilated0.587514234.0Zero chance of any of those candidates winning...[0.016370240598917007, 0.015815315768122673, -...West Midlands, EnglandZero chance of any of those candidates winning...
271.02133.0positiveneutralderailment0.5276093739.0They tell lies. A train I get was showing as c...[-0.07224728167057037, 0.0546182319521904, 0.0...Welwyn, EastThey tell lies. A train I get was showing as c...
281.01170.0positiveneutralemergency%20services0.5909414865.0Emergency services at scene of multi-vehicle c...[-0.053703196346759796, -0.05499571934342384, ...DonegalEmergency services at scene of multi-vehicle c...
291.04017.0positiveneutralpolice0.5872938226.0In 2017, attack on police lines in Shopian. Da...[0.06364629417657852, -0.07933878153562546, -0...New Delhi, IndiaIn 2017, attack on police lines in Shopian. Da...
301.0284.0positivepositiveburned0.6080991819.0Thousands of churches in France and the UK hav...[0.027487952262163162, 0.052743472158908844, -...Limerick, IrelandThousands of churches in France and the UK hav...
310.0942.0negativenegativedesolation0.6655783823.0Musician or band contact us https://t.co/NKsqk...[-0.06283976882696152, 0.04715868458151817, -0...Rock PlanetMusician or band contact us https://t.co/NKsqk...
321.06823.0positivepositivearmy0.614401418.0Army IDs Two Paratroopers Killed by Roadside B...[-0.0569257028400898, -0.07342782616615295, -0...New York City & a VegetarianArmy IDs Two Paratroopers Killed by Roadside B...
331.01353.0positivepositivedestroyed0.6036053953.0Shot and killed in front of his wife, sister a...[-0.009694310836493969, -0.040014732629060745,...Houston, TXShot and killed in front of his wife, sister a...
341.0887.0positivepositivefamine0.6031175290.0Adolf Hitler took 12 years to murder 6 million...[0.017979739233851433, -0.019764618948101997, ...HyderabadAdolf Hitler took 12 years to murder 6 million...
350.02846.0negativeneutralbridge%20collapse0.5393491678.0Human error and poor engineering caused that b...[-0.02368932031095028, 0.05375693738460541, -0...Beacon Falls, CTHuman error and poor engineering caused that b...
361.06805.0positiveneutralcrashed0.5922702693.0Boeing employees mocked Lion Air staff for see...[0.013637347146868706, 0.040167536586523056, -...Tucson, AZBoeing employees mocked Lion Air staff for see...
371.04998.0positivepositivefatal0.6191925335.0U.S. Attorney General William Barr said a Saud...[0.029709110036492348, -0.06744032353162766, -...https://maps.app.goo.gl/YLrsVhU.S. Attorney General William Barr said a Saud...
380.02476.0negativenegativeattack0.664207553.0me having an anxiety attack trying to look nor...[-0.04544655606150627, -0.07944270968437195, -...Lao P.D.Rme having an anxiety attack trying to look nor...
390.01844.0negativenegativeannihilation0.611182288.0I loved and hated #UncutGems, like several mov...[-0.010108539834618568, 0.013281909748911858, ...Occupied Multnomah Chinook PDXI loved and hated #UncutGems, like several mov...
400.02323.0negativeneutralcollision0.5898312608.0Simple learning of kites. 1) As soon as you go...[-0.0300322026014328, 0.04086627438664436, 0.0...Gujarat, IndiaSimple learning of kites. 1) As soon as you go...
410.02068.0negativeneutralbody%20bags0.5090741459.0-stung and soldiers go home in body bags. IâÂ...[-0.02494075335562229, 0.044942084699869156, -...St.Paul Minnesota-stung and soldiers go home in body bags. IâÂ...
421.01692.0positivepositivedebris0.6202053279.0Hi , A737 south at Linwood cut off just passed...[-0.027976034209132195, -0.01899741217494011, ...DumbartonHi , A737 south at Linwood cut off just passed...
431.03255.0positivepositiveoil%20spill0.6134307940.0Livestock Carrier Spills Fuel Oil at Port of C...[-0.050569694489240646, 0.02741994708776474, -...Fort Lauderdale, FloridaLivestock Carrier Spills Fuel Oil at Port of C...
441.02253.0positivepositivehijacking0.6239016516.0LAW &amp; ORDER: Woman charged with DUI after ...[0.0002868208975996822, -0.07291881740093231, ...Atlanta, GA 770-578-2344LAW &amp; ORDER: Woman charged with DUI after ...
451.04046.0positivepositivecrash0.6238712628.0BREAKING: Iranian Judiciary spokesman says sev...[0.03708343580365181, -0.05643052980303764, -0...Russia. In the dense forests.BREAKING: Iranian Judiciary spokesman says sev...
460.0283.0negativenegativecrashed0.6580092695.0They hear ur having a bad time, no sympathy. h...[0.005220184568315744, 0.07696899026632309, 0....You know where I dey, na NaijaThey hear ur having a bad time, no sympathy. h...
470.02280.0negativenegativedestroyed0.6709743959.0She's spot on though. She's explaining how clo...[-0.016363494098186493, 0.05821079760789871, -...Selma, ALShe's spot on though. She's explaining how clo...
480.0236.0negativeneutralderail0.5451483628.0A subtle design feature of the AR-15 rifle has...[0.04045148566365242, -0.027726944535970688, -...Atlanta/Stone Mountain, GAA subtle design feature of the AR-15 rifle has...
490.02556.0negativeneutralcollapsed0.5992912412.0The wedding party all collapsed in the room So...[-0.010037141852080822, 0.006907069124281406, ...Wonderland🌹The wedding party all collapsed in the room So...
\n","
"],"text/plain":[" target ... document\n","origin_index ... \n","0 0.0 ... Ladies & Gentleman Pls Welcome The Full Me...\n","1 0.0 ... Emergency meeting for her millionaire grandson...\n","2 1.0 ... Update from . Looks like they're getting rescu...\n","3 0.0 ... Bridal couple were little affected in contrast...\n","4 0.0 ... How exactly does one accidentally fire a rocke...\n","5 0.0 ... President Edgar Lungu told journalists his buy...\n","6 0.0 ... Yea, but let's be honest. Kids value pretty mu...\n","7 0.0 ... Can anyone believe the cheek of calling a bush...\n","8 1.0 ... no amount of sci-fi media could’ve prepar...\n","9 1.0 ... Wildfire smoke to circle planet and return to ...\n","10 1.0 ... \"Somehow or other, we've got to stop the Irani...\n","11 1.0 ... An estimated 2 million hectares of land burnt ...\n","12 1.0 ... Storm surge from Storm Brendan flooding the ha...\n","13 0.0 ... *\"DMC3 Cerberus Battle\" playing* https://t.co/...\n","14 0.0 ... ALL HAIL CHEETOS\n","15 1.0 ... Climate Change Or Arson Jihad? - https://t.co/...\n","16 0.0 ... Mass anti-government protests erupt in Iran af...\n","17 1.0 ... He Greta you might want to raise the issue of ...\n","18 1.0 ... Police are seeking witnesses as they investiga...\n","19 1.0 ... Indonesia 🇮🇩 is facing its own c...\n","20 1.0 ... There's method to the madness with the Califor...\n","21 0.0 ... #NotMyPM on BBC says #ReleaseTheRussiaReport w...\n","22 0.0 ... Jinu : Are you okay? You hit your head and fai...\n","23 0.0 ... We won’t demolish properties without enga...\n","24 0.0 ... A devastating and thorough refutation of and N...\n","25 1.0 ... Updated air quality forecast for today and the...\n","26 0.0 ... Zero chance of any of those candidates winning...\n","27 1.0 ... They tell lies. A train I get was showing as c...\n","28 1.0 ... Emergency services at scene of multi-vehicle c...\n","29 1.0 ... In 2017, attack on police lines in Shopian. Da...\n","30 1.0 ... Thousands of churches in France and the UK hav...\n","31 0.0 ... Musician or band contact us https://t.co/NKsqk...\n","32 1.0 ... Army IDs Two Paratroopers Killed by Roadside B...\n","33 1.0 ... Shot and killed in front of his wife, sister a...\n","34 1.0 ... Adolf Hitler took 12 years to murder 6 million...\n","35 0.0 ... Human error and poor engineering caused that b...\n","36 1.0 ... Boeing employees mocked Lion Air staff for see...\n","37 1.0 ... U.S. Attorney General William Barr said a Saud...\n","38 0.0 ... me having an anxiety attack trying to look nor...\n","39 0.0 ... I loved and hated #UncutGems, like several mov...\n","40 0.0 ... Simple learning of kites. 1) As soon as you go...\n","41 0.0 ... -stung and soldiers go home in body bags. IâÂ...\n","42 1.0 ... Hi , A737 south at Linwood cut off just passed...\n","43 1.0 ... Livestock Carrier Spills Fuel Oil at Port of C...\n","44 1.0 ... LAW & ORDER: Woman charged with DUI after ...\n","45 1.0 ... BREAKING: Iranian Judiciary spokesman says sev...\n","46 0.0 ... They hear ur having a bad time, no sympathy. h...\n","47 0.0 ... She's spot on though. She's explaining how clo...\n","48 0.0 ... A subtle design feature of the AR-15 rifle has...\n","49 0.0 ... The wedding party all collapsed in the room So...\n","\n","[50 rows x 11 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# 4. Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1613549343044,"user_tz":-300,"elapsed":225599,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"da6c3c0b-085d-473e-d4f0-28bfcc165f8c"},"source":["fitted_pipe.predict(\"All the buildings in the capital were destroyed\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentimentsentiment_confidencedefault_name_embeddingsdocument
origin_index
0neutral0.592779[0.01043090783059597, 0.06007970869541168, -0....All the buildings in the capital were destroyed
\n","
"],"text/plain":[" sentiment ... document\n","origin_index ... \n","0 neutral ... All the buildings in the capital were destroyed\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## 5. Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1613549343046,"user_tz":-300,"elapsed":225592,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2389fc73-ff47-43a4-a860-756ecb03509d"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setLoadSP(False) | Info: Whether to load SentencePiece ops file which is required only by multi-lingual models. This is not changeable after it's set with a pretrained model nor it is compatible with Windows. | Currently set to : False\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## 6. Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1613549349275,"user_tz":-300,"elapsed":231811,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"bc2745cc-3e35-4100-8d91-48ea87382f17"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.96 0.96 0.96 25\n"," positive 0.96 0.96 0.96 25\n","\n"," accuracy 0.96 50\n"," macro avg 0.96 0.96 0.96 50\n","weighted avg 0.96 0.96 0.96 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
targetUnnamed: 0ysentimentkeywordsentiment_confidenceidtextdefault_name_embeddingslocationdocument
origin_index
00.01649.0negativenegativehail0.9696916069.0Ladies &amp; Gentleman Pls Welcome The Full Me...[0.0718693882226944, -0.018772797659039497, -0...Panama, INLadies &amp; Gentleman Pls Welcome The Full Me...
10.0999.0negativenegativedrowning0.8827124509.0Emergency meeting for her millionaire grandson...[-0.06196709722280502, 0.03117111697793007, 0....BurntislandEmergency meeting for her millionaire grandson...
21.0475.0positivepositiverescued0.7994558573.0Update from . Looks like they're getting rescu...[0.0428241565823555, -0.04131714627146721, -0....PhilippinesUpdate from . Looks like they're getting rescu...
30.02779.0negativenegativeevacuate0.9262485026.0Bridal couple were little affected in contrast...[-0.040058307349681854, 0.006242941599339247, ...JPBridal couple were little affected in contrast...
40.01517.0negativenegativeairplane%20accident0.799277163.0How exactly does one accidentally fire a rocke...[-0.016879839822649956, 0.06649093329906464, -...IowaHow exactly does one accidentally fire a rocke...
50.01400.0negativenegativefamine0.7766905307.0President Edgar Lungu told journalists his buy...[0.05988643690943718, -0.03577134758234024, -0...EarthPresident Edgar Lungu told journalists his buy...
60.0215.0negativenegativeapocalypse0.966522338.0Yea, but let's be honest. Kids value pretty mu...[-0.029979687184095383, -0.016652535647153854,...Sword Fight On The HeightsYea, but let's be honest. Kids value pretty mu...
70.0704.0negativenegativedanger0.8880853039.0Can anyone believe the cheek of calling a bush...[-0.04716842249035835, -0.03333980590105057, -...CanberraCan anyone believe the cheek of calling a bush...
81.03149.0positivepositiveapocalypse0.796345323.0no amount of sci-fi media could’ve prepar...[-0.03867451846599579, 0.027363918721675873, -...Madridno amount of sci-fi media could’ve prepar...
91.03861.0positivepositivesmoke0.9450419348.0Wildfire smoke to circle planet and return to ...[-0.04033537954092026, -0.006633501499891281, ...SydneyWildfire smoke to circle planet and return to ...
101.06698.0positivepositiveweapon0.65260310883.0\"Somehow or other, we've got to stop the Irani...[0.047660667449235916, -0.0322858951985836, -0...Hungary\"Somehow or other, we've got to stop the Irani...
111.05728.0positivepositivewild%20fires0.92294811016.0An estimated 2 million hectares of land burnt ...[0.006747904233634472, -0.05033961310982704, -...Global Citizen. NYC, L.A. etc,An estimated 2 million hectares of land burnt ...
121.04099.0positivepositiveflooding0.9328915908.0Storm surge from Storm Brendan flooding the ha...[-0.01745571382343769, -0.02025921642780304, 0...BallycastleStorm surge from Storm Brendan flooding the ha...
130.01221.0negativenegativebattle0.971089723.0*\"DMC3 Cerberus Battle\" playing* https://t.co/...[-0.05303790792822838, 0.029018903151154518, -...Украина*\"DMC3 Cerberus Battle\" playing* https://t.co/...
140.02082.0negativenegativehail0.9769466052.0ALL HAIL CHEETOS[0.025289693847298622, -0.006686831824481487, ...cheetosALL HAIL CHEETOS
151.0658.0positivepositivearson0.947676458.0Climate Change Or Arson Jihad? - https://t.co/...[-0.04567283019423485, 0.0007029167609289289, ...USAClimate Change Or Arson Jihad? - https://t.co/...
160.01405.0negativepositiveairplane%20accident0.759179144.0Mass anti-government protests erupt in Iran af...[0.007361058611422777, -1.4808237210672814e-05...NYCMass anti-government protests erupt in Iran af...
171.03461.0positivepositiveoutbreak0.8476318012.0He Greta you might want to raise the issue of ...[0.007896085269749165, -0.018556153401732445, ...Islamabad, PakistanHe Greta you might want to raise the issue of ...
181.05616.0positivepositivecollision0.8971202546.0Police are seeking witnesses as they investiga...[-0.07210832089185715, -0.0818963497877121, -0...Hamilton OntarioPolice are seeking witnesses as they investiga...
191.06274.0positivepositiveinundated0.9341496926.0Indonesia 🇮🇩 is facing its own c...[0.021613234654068947, -0.006164511665701866, ...AfricaIndonesia 🇮🇩 is facing its own c...
201.01242.0positivepositivewild%20fires0.87105411046.0There's method to the madness with the Califor...[0.047039393335580826, 0.029535269364714622, -...OklahomaThere's method to the madness with the Califor...
210.01865.0negativenegativecrash0.8882672678.0#NotMyPM on BBC says #ReleaseTheRussiaReport w...[-0.036455944180488586, -0.04565983638167381, ...United Kingdom, Europe#NotMyPM on BBC says #ReleaseTheRussiaReport w...
220.0298.0negativenegativeambulance0.985977203.0Jinu : Are you okay? You hit your head and fai...[-0.07107605040073395, 0.05997276306152344, 0....EverywhereJinu : Are you okay? You hit your head and fai...
230.02881.0negativenegativedemolish0.9635543421.0We won’t demolish properties without enga...[0.08048725128173828, 0.04733644425868988, -0....Lagos, NigeriaWe won’t demolish properties without enga...
240.0809.0negativenegativeannihilated0.922194272.0A devastating and thorough refutation of and N...[0.03053135797381401, 0.018328236415982246, -0...USAA devastating and thorough refutation of and N...
251.05736.0positivepositivehazardous0.9701646279.0Updated air quality forecast for today and the...[-0.03537615016102791, -0.009700290858745575, ...Melbourne, AustraliaUpdated air quality forecast for today and the...
260.0490.0negativenegativeannihilated0.966277234.0Zero chance of any of those candidates winning...[0.016370240598917007, 0.015815315768122673, -...West Midlands, EnglandZero chance of any of those candidates winning...
271.02133.0positivenegativederailment0.6407933739.0They tell lies. A train I get was showing as c...[-0.07224728167057037, 0.0546182319521904, 0.0...Welwyn, EastThey tell lies. A train I get was showing as c...
281.01170.0positivepositiveemergency%20services0.7650484865.0Emergency services at scene of multi-vehicle c...[-0.053703196346759796, -0.05499571934342384, ...DonegalEmergency services at scene of multi-vehicle c...
291.04017.0positivepositivepolice0.8243288226.0In 2017, attack on police lines in Shopian. Da...[0.06364629417657852, -0.07933878153562546, -0...New Delhi, IndiaIn 2017, attack on police lines in Shopian. Da...
301.0284.0positivepositiveburned0.7923801819.0Thousands of churches in France and the UK hav...[0.027487952262163162, 0.052743472158908844, -...Limerick, IrelandThousands of churches in France and the UK hav...
310.0942.0negativenegativedesolation0.9818443823.0Musician or band contact us https://t.co/NKsqk...[-0.06283976882696152, 0.04715868458151817, -0...Rock PlanetMusician or band contact us https://t.co/NKsqk...
321.06823.0positivepositivearmy0.916005418.0Army IDs Two Paratroopers Killed by Roadside B...[-0.0569257028400898, -0.07342782616615295, -0...New York City & a VegetarianArmy IDs Two Paratroopers Killed by Roadside B...
331.01353.0positivepositivedestroyed0.8493633953.0Shot and killed in front of his wife, sister a...[-0.009694310836493969, -0.040014732629060745,...Houston, TXShot and killed in front of his wife, sister a...
341.0887.0positivepositivefamine0.8532405290.0Adolf Hitler took 12 years to murder 6 million...[0.017979739233851433, -0.019764618948101997, ...HyderabadAdolf Hitler took 12 years to murder 6 million...
350.02846.0negativenegativebridge%20collapse0.8960281678.0Human error and poor engineering caused that b...[-0.02368932031095028, 0.05375693738460541, -0...Beacon Falls, CTHuman error and poor engineering caused that b...
361.06805.0positivepositivecrashed0.7281272693.0Boeing employees mocked Lion Air staff for see...[0.013637347146868706, 0.040167536586523056, -...Tucson, AZBoeing employees mocked Lion Air staff for see...
371.04998.0positivepositivefatal0.9271185335.0U.S. Attorney General William Barr said a Saud...[0.029709110036492348, -0.06744032353162766, -...https://maps.app.goo.gl/YLrsVhU.S. Attorney General William Barr said a Saud...
380.02476.0negativenegativeattack0.976725553.0me having an anxiety attack trying to look nor...[-0.04544655606150627, -0.07944270968437195, -...Lao P.D.Rme having an anxiety attack trying to look nor...
390.01844.0negativenegativeannihilation0.958863288.0I loved and hated #UncutGems, like several mov...[-0.010108539834618568, 0.013281909748911858, ...Occupied Multnomah Chinook PDXI loved and hated #UncutGems, like several mov...
400.02323.0negativenegativecollision0.9645932608.0Simple learning of kites. 1) As soon as you go...[-0.0300322026014328, 0.04086627438664436, 0.0...Gujarat, IndiaSimple learning of kites. 1) As soon as you go...
410.02068.0negativenegativebody%20bags0.9293611459.0-stung and soldiers go home in body bags. IâÂ...[-0.02494075335562229, 0.044942084699869156, -...St.Paul Minnesota-stung and soldiers go home in body bags. IâÂ...
421.01692.0positivepositivedebris0.9566623279.0Hi , A737 south at Linwood cut off just passed...[-0.027976034209132195, -0.01899741217494011, ...DumbartonHi , A737 south at Linwood cut off just passed...
431.03255.0positivepositiveoil%20spill0.9369867940.0Livestock Carrier Spills Fuel Oil at Port of C...[-0.050569694489240646, 0.02741994708776474, -...Fort Lauderdale, FloridaLivestock Carrier Spills Fuel Oil at Port of C...
441.02253.0positivepositivehijacking0.9269256516.0LAW &amp; ORDER: Woman charged with DUI after ...[0.0002868208975996822, -0.07291881740093231, ...Atlanta, GA 770-578-2344LAW &amp; ORDER: Woman charged with DUI after ...
451.04046.0positivepositivecrash0.9288402628.0BREAKING: Iranian Judiciary spokesman says sev...[0.03708343580365181, -0.05643052980303764, -0...Russia. In the dense forests.BREAKING: Iranian Judiciary spokesman says sev...
460.0283.0negativenegativecrashed0.9734632695.0They hear ur having a bad time, no sympathy. h...[0.005220184568315744, 0.07696899026632309, 0....You know where I dey, na NaijaThey hear ur having a bad time, no sympathy. h...
470.02280.0negativenegativedestroyed0.9720693959.0She's spot on though. She's explaining how clo...[-0.016363494098186493, 0.05821079760789871, -...Selma, ALShe's spot on though. She's explaining how clo...
480.0236.0negativenegativederail0.9002283628.0A subtle design feature of the AR-15 rifle has...[0.04045148566365242, -0.027726944535970688, -...Atlanta/Stone Mountain, GAA subtle design feature of the AR-15 rifle has...
490.02556.0negativenegativecollapsed0.9719112412.0The wedding party all collapsed in the room So...[-0.010037141852080822, 0.006907069124281406, ...Wonderland🌹The wedding party all collapsed in the room So...
\n","
"],"text/plain":[" target ... document\n","origin_index ... \n","0 0.0 ... Ladies & Gentleman Pls Welcome The Full Me...\n","1 0.0 ... Emergency meeting for her millionaire grandson...\n","2 1.0 ... Update from . Looks like they're getting rescu...\n","3 0.0 ... Bridal couple were little affected in contrast...\n","4 0.0 ... How exactly does one accidentally fire a rocke...\n","5 0.0 ... President Edgar Lungu told journalists his buy...\n","6 0.0 ... Yea, but let's be honest. Kids value pretty mu...\n","7 0.0 ... Can anyone believe the cheek of calling a bush...\n","8 1.0 ... no amount of sci-fi media could’ve prepar...\n","9 1.0 ... Wildfire smoke to circle planet and return to ...\n","10 1.0 ... \"Somehow or other, we've got to stop the Irani...\n","11 1.0 ... An estimated 2 million hectares of land burnt ...\n","12 1.0 ... Storm surge from Storm Brendan flooding the ha...\n","13 0.0 ... *\"DMC3 Cerberus Battle\" playing* https://t.co/...\n","14 0.0 ... ALL HAIL CHEETOS\n","15 1.0 ... Climate Change Or Arson Jihad? - https://t.co/...\n","16 0.0 ... Mass anti-government protests erupt in Iran af...\n","17 1.0 ... He Greta you might want to raise the issue of ...\n","18 1.0 ... Police are seeking witnesses as they investiga...\n","19 1.0 ... Indonesia 🇮🇩 is facing its own c...\n","20 1.0 ... There's method to the madness with the Califor...\n","21 0.0 ... #NotMyPM on BBC says #ReleaseTheRussiaReport w...\n","22 0.0 ... Jinu : Are you okay? You hit your head and fai...\n","23 0.0 ... We won’t demolish properties without enga...\n","24 0.0 ... A devastating and thorough refutation of and N...\n","25 1.0 ... Updated air quality forecast for today and the...\n","26 0.0 ... Zero chance of any of those candidates winning...\n","27 1.0 ... They tell lies. A train I get was showing as c...\n","28 1.0 ... Emergency services at scene of multi-vehicle c...\n","29 1.0 ... In 2017, attack on police lines in Shopian. Da...\n","30 1.0 ... Thousands of churches in France and the UK hav...\n","31 0.0 ... Musician or band contact us https://t.co/NKsqk...\n","32 1.0 ... Army IDs Two Paratroopers Killed by Roadside B...\n","33 1.0 ... Shot and killed in front of his wife, sister a...\n","34 1.0 ... Adolf Hitler took 12 years to murder 6 million...\n","35 0.0 ... Human error and poor engineering caused that b...\n","36 1.0 ... Boeing employees mocked Lion Air staff for see...\n","37 1.0 ... U.S. Attorney General William Barr said a Saud...\n","38 0.0 ... me having an anxiety attack trying to look nor...\n","39 0.0 ... I loved and hated #UncutGems, like several mov...\n","40 0.0 ... Simple learning of kites. 1) As soon as you go...\n","41 0.0 ... -stung and soldiers go home in body bags. IâÂ...\n","42 1.0 ... Hi , A737 south at Linwood cut off just passed...\n","43 1.0 ... Livestock Carrier Spills Fuel Oil at Port of C...\n","44 1.0 ... LAW & ORDER: Woman charged with DUI after ...\n","45 1.0 ... BREAKING: Iranian Judiciary spokesman says sev...\n","46 0.0 ... They hear ur having a bad time, no sympathy. h...\n","47 0.0 ... She's spot on though. She's explaining how clo...\n","48 0.0 ... A subtle design feature of the AR-15 rifle has...\n","49 0.0 ... The wedding party all collapsed in the room So...\n","\n","[50 rows x 11 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# 7. Try training with different Embeddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nxWFzQOhjWC8","executionInfo":{"status":"ok","timestamp":1613549349279,"user_tz":-300,"elapsed":231804,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cef428e9-17a3-43c2-c319-8f19f5779154"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1614244649135,"user_tz":-300,"elapsed":2297835,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"afaf09b1-9e5b-4514-87c6-4e2197974700"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(120) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.88 0.84 0.86 1186\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.88 0.85 0.87 1214\n","\n"," accuracy 0.85 2400\n"," macro avg 0.59 0.56 0.58 2400\n","weighted avg 0.88 0.85 0.86 2400\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"_1jxw3GnVGlI"},"source":["# 7.1 evaluate on Test Data"]},{"cell_type":"code","metadata":{"id":"Fxx4yNkNVGFl","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614245118829,"user_tz":-300,"elapsed":502828,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f967e12a-67d2-46ae-b454-99a3d539c3e7"},"source":["preds = fitted_pipe.predict(test_df,output_level='document')\r\n","\r\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\r\n","preds.dropna(inplace=True)\r\n","print(classification_report(preds['y'], preds['sentiment']))"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.86 0.81 0.83 314\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.84 0.83 0.84 286\n","\n"," accuracy 0.82 600\n"," macro avg 0.57 0.55 0.56 600\n","weighted avg 0.85 0.82 0.83 600\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 8. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1613551695340,"user_tz":-300,"elapsed":2577852,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"01e76247-814c-4ad9-bc52-76e52c94e59b"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 9. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1613551709867,"user_tz":-300,"elapsed":2592375,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"00046bb7-e3b2-4ba3-f6da-d96dc15202b3"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('All the buildings in the capital were destroyed')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentimenten_embed_sentence_small_bert_L12_768_embeddingssentiment_confidencedocument
origin_index
0positive[-0.39346572756767273, 0.33815106749534607, -0...0.994965All the buildings in the capital were destroyed
\n","
"],"text/plain":[" sentiment ... document\n","origin_index ... \n","0 positive ... All the buildings in the capital were destroyed\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1613551709869,"user_tz":-300,"elapsed":2592373,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8daf218d-b381-44be-a807-9e9631e5a34a"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this SentimentDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_stock_market.ipynb b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_stock_market.ipynb new file mode 100644 index 00000000..cc4d800c --- /dev/null +++ b/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_stock_market.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_sentiment_classifier_demo_stock_market.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/binary_text_classification/NLU_training_sentiment_classifier_demo_stock_market.ipynb)\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 Class Demo Stock Market Sentiment Training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","\n","You can achieve these results or even better on this dataset with training data:\n","\n","\n","
\n","\n","\n","![image.png]()\n","\n","\n","\n","\n","You can achieve these results or even better on this dataset with test data:\n","\n","\n","
\n","\n","![img.png]()\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614242474079,"user_tz":-300,"elapsed":147747,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"0237f7c1-bb21-40d8-faa2-947edc8a1750"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install pyspark==2.4.7 \n","! pip install nlu > /dev/null \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Collecting pyspark==2.4.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e2/06/29f80e5a464033432eedf89924e7aa6ebbc47ce4dcd956853a73627f2c07/pyspark-2.4.7.tar.gz (217.9MB)\n","\u001b[K |████████████████████████████████| 217.9MB 68kB/s \n","\u001b[?25hCollecting py4j==0.10.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e3/53/c737818eb9a7dc32a7cd4f1396e787bd94200c3997c72c1dbe028587bd76/py4j-0.10.7-py2.py3-none-any.whl (197kB)\n","\u001b[K |████████████████████████████████| 204kB 20.5MB/s \n","\u001b[?25hBuilding wheels for collected packages: pyspark\n"," Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for pyspark: filename=pyspark-2.4.7-py2.py3-none-any.whl size=218279465 sha256=7f0edfd295bb5c8ac57fdffbd78fc64f6f9b0a9bcf4d81f4960d6bd0ed20fc26\n"," Stored in directory: /root/.cache/pip/wheels/34/1f/2e/1e7460f80acf26b08dbb8c53d7ff9e07146f2a68dd5c732be5\n","Successfully built pyspark\n","Installing collected packages: py4j, pyspark\n","Successfully installed py4j-0.10.7 pyspark-2.4.7\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Stock Market Sentiment dataset \n","https://www.kaggle.com/yash612/stockmarket-sentiment-dataset\n","#Context\n","\n","Gathered Stock news from Multiple twitter Handles regarding Economic news dividing into two parts : Negative and positive."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1614242476002,"user_tz":-300,"elapsed":149669,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b94ee2f4-688e-4d18-af63-70a00ae55f26"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/02/stock_data.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-02-25 08:40:04-- http://ckl-it.de/wp-content/uploads/2021/02/stock_data.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 758217 (740K) [text/csv]\n","Saving to: ‘stock_data.csv’\n","\n","stock_data.csv 100%[===================>] 740.45K 461KB/s in 1.6s \n","\n","2021-02-25 08:40:06 (461 KB/s) - ‘stock_data.csv’ saved [758217/758217]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":399},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1614242476014,"user_tz":-300,"elapsed":149660,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"87bea20a-581b-4b8c-838b-583a22fe3fed"},"source":["import pandas as pd\n","train_path = '/content/stock_data.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","columns=['text','y']\n","train_df = train_df[columns]\n","from sklearn.model_selection import train_test_split\n","\n","train_df, test_df = train_test_split(train_df, test_size=0.2)\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
303DPS breaking out this am. looks good. earnings...positive
2921DDD SSY Stops honored. So be it. Now.... must...negative
2497user arge % owners on Morningstar rival positi...negative
3827FS obama speech tonight moving it on up in cha...positive
3659user: Fascinating email chain between user and...positive
.........
432The key is not to panic here. AAP is in the d...positive
1810ES_F Hedges rotating into Junk stocks like Sol...negative
569WH gap filledpositive
1873BCM I work in this industry. don't just look a...positive
3882MITK moving againpositive
\n","

3200 rows × 2 columns

\n","
"],"text/plain":[" text y\n","303 DPS breaking out this am. looks good. earnings... positive\n","2921 DDD SSY Stops honored. So be it. Now.... must... negative\n","2497 user arge % owners on Morningstar rival positi... negative\n","3827 FS obama speech tonight moving it on up in cha... positive\n","3659 user: Fascinating email chain between user and... positive\n","... ... ...\n","432 The key is not to panic here. AAP is in the d... positive\n","1810 ES_F Hedges rotating into Junk stocks like Sol... negative\n","569 WH gap filled positive\n","1873 BCM I work in this industry. don't just look a... positive\n","3882 MITK moving again positive\n","\n","[3200 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1613549314532,"user_tz":-300,"elapsed":213328,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"177ff1b2-8bda-4a2b-99d5-3e02758286b8"},"source":["import nlu \n","# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# by default the Universal Sentence Encoder (USE) Sentence embeddings are used for generation\n","trainable_pipe = nlu.load('train.sentiment')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.00 0.00 0.00 23\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.69 1.00 0.82 27\n","\n"," accuracy 0.54 50\n"," macro avg 0.23 0.33 0.27 50\n","weighted avg 0.37 0.54 0.44 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytextdefault_name_embeddingssentiment_confidencedocumentsentiment
origin_index
0positiveAAP 950 lot bid in the Feb 580 call at 9.50, m...[0.039819009602069855, -0.01317680161446333, -...0.652981AAP 950 lot bid in the Feb 580 call at 9.50, m...positive
1negativeuser: ADBE continues to weaken, not following ...[0.05989246815443039, 0.06275778263807297, -0....0.630831user: ADBE continues to weaken, not following ...positive
2negativepnra missed this one yest on the short side bu...[-0.0019929897971451283, -0.026955554261803627...0.701571pnra missed this one yest on the short side bu...positive
3negativePPO: 2) GM estimates 20% Y/Y unit growth for v...[0.0499078631401062, -0.019602565094828606, -0...0.577670PPO: 2) GM estimates 20% Y/Y unit growth for v...neutral
4positiveBAC avrg positive2.positive27[0.03407237306237221, 0.0034944196231663227, -...0.642415BAC avrg positive2.positive27positive
5negativeCMG While traveling through Kalamazoo, MI on F...[0.017684495076537132, -0.03009415790438652, -...0.567573CMG While traveling through Kalamazoo, MI on F...neutral
6positivei dont have a tech friend in world that owns t...[0.05138593912124634, 0.05815130099654198, -0....0.714708i dont have a tech friend in world that owns t...positive
7positiveSWI was a beaut[0.04824110120534897, -0.05754169449210167, -0...0.722410SWI was a beautpositive
8positiveAgain, this mkt is FAT while AAP is down 2.5% ...[0.050297632813453674, -0.0360858328640461, -0...0.670725Again, this mkt is FAT while AAP is down 2.5% ...positive
9positiveAPO good volume bounce; entry 22.positive9 sto...[-0.013337346725165844, -0.03307655081152916, ...0.733935APO good volume bounce; entry 22.positive9 sto...positive
10negativeike it or Not AAP will fill the gap B4 will s...[0.06327012926340103, 0.003341271774843335, -0...0.535553ike it or Not AAP will fill the gap B4 will se...neutral
11negative2020 was supposed to be the year everything ca...[0.1055862084031105, -0.003243181621655822, -0...0.5959692020 was supposed to be the year everything ca...neutral
12positiveong setup SM[0.045743703842163086, -0.048678476363420486, ...0.730938ong setup SMpositive
13positiveSWHC added more march 9's and positive0's here...[-0.008223574608564377, -0.03544619306921959, ...0.755500SWHC added more march 9's and positive0's here...positive
14positiveTrade Setup AA -[0.04049460217356682, -0.04978525638580322, -0...0.761686Trade Setup AA -positive
15positivedon't tell everyone ... user: That's a nice lo...[0.06689376384019852, -0.029914824292063713, -...0.723632don't tell everyone ... user: That's a nice lo...positive
16positiveSGMO monthly[-0.07218629121780396, -0.01272683497518301, -...0.721094SGMO monthlypositive
17positiveaapl in long per earlier tweet on topside brea...[0.057043224573135376, -0.012128235772252083, ...0.735970aapl in long per earlier tweet on topside brea...positive
18negativeBAC Will go to positive2 imho[0.05091923102736473, -0.04665602371096611, -0...0.601387BAC Will go to positive2 imhopositive
19negativeAAP 465 is resistance and heading to 435 in th...[0.03843201324343681, -0.0733688548207283, -0....0.602865AAP 465 is resistance and heading to 435 in th...positive
20negativeJust finished The Week Ahead Commentary and po...[-0.05284350365400314, 0.024635571986436844, -...0.698348Just finished The Week Ahead Commentary and po...positive
21positiveNew buys today from watch list all did well N...[0.02797831781208515, -0.07139448076486588, -0...0.756334New buys today from watch list all did well NX...positive
22positiveOVI on watch list has volume of positive0% of ...[0.04008709639310837, -0.03572438657283783, -0...0.749699OVI on watch list has volume of positive0% of ...positive
23negativeAAP Fib 50% retracement from 2006 low of 6.36 ...[0.011571422219276428, -0.058767061680555344, ...0.565362AAP Fib 50% retracement from 2006 low of 6.36 ...neutral
24negativeWatch NX under 24.70[0.03347814455628395, -0.015002097003161907, -...0.704729Watch NX under 24.70positive
25negativewill AAP go down?[0.02786560356616974, -0.04974859952926636, -0...0.563953will AAP go down?neutral
26positiveMarket Wrap Video + Additions to Watch ist in...[0.02167339436709881, 0.0070633664727211, -0.0...0.769903Market Wrap Video + Additions to Watch ist inc...positive
27negativeGreen Weekly Triangle on KTOS,....pdating[-0.0672115832567215, -0.007515197619795799, -...0.632066Green Weekly Triangle on KTOS,....pdatingpositive
28negativeNN looking quite bearish at the moment.[0.03382507339119911, -0.029242942109704018, -...0.677875NN looking quite bearish at the moment.positive
29positiveT user INVN ..lots of bullish technical discus...[0.06488332152366638, 0.005766488146036863, -0...0.706750T user INVN ..lots of bullish technical discus...positive
30negativeuser: BT Cortos en Peabody Energy si cae bajo ...[0.06904835999011993, 0.018765972927212715, -0...0.544308user: BT Cortos en Peabody Energy si cae bajo ...neutral
31positiveMarket Wrap Video + Additions to Watch ist in...[0.06079310178756714, -0.052379265427589417, -...0.730841Market Wrap Video + Additions to Watch ist inc...positive
32positiveHS Over 48.20 at first for a small trade to se...[0.03508533164858818, -0.013354266993701458, -...0.773186HS Over 48.20 at first for a small trade to se...positive
33negativeAMZN short 275.positive3[0.023175707086920738, 0.001034723361954093, -...0.626640AMZN short 275.positive3positive
34positiveVNG - positive2 mil shares shorts plus long bu...[0.008707035332918167, 0.03645578399300575, -0...0.695301VNG - positive2 mil shares shorts plus long bu...positive
35positiveMarket Wrap Video + Additions to Watch ist in...[0.03514407202601433, 0.01656987890601158, -0....0.707386Market Wrap Video + Additions to Watch ist inc...positive
36negativeGreen Weekly Triangle on PBY,....Open Sell Sho...[-0.004248426761478186, -0.0024954124819487333...0.678482Green Weekly Triangle on PBY,....Open Sell Sho...positive
37negativeCIE Seems geady to go South[0.03585235774517059, 0.048238031566143036, -0...0.553114CIE Seems geady to go Southneutral
38negativeuser: AAP people think growth slowing is not ...[0.05234825611114502, -0.011760670691728592, -...0.580370user: AAP people think growth slowing is not t...neutral
39positiveDECK this sucker moves both ways this time i l...[0.01123942993581295, -0.057358115911483765, -...0.688387DECK this sucker moves both ways this time i l...positive
40positiveNew Post: Shorts Will Make Dimes, ongs Will Ma...[0.052262794226408005, -0.053826283663511276, ...0.668805New Post: Shorts Will Make Dimes, ongs Will Ma...positive
41positiveMON - added[0.004127390217036009, -0.05779232084751129, -...0.685398MON - addedpositive
42positivewait to you see what they do w/schools in 20po...[0.03918222710490227, -0.004734774120151997, -...0.689349wait to you see what they do w/schools in 20po...positive
43negativeWhiting, one of top drillers in North Dakotaâ...[0.008908223360776901, 0.043478433042764664, -...0.563377Whiting, one of top drillers in North Dakotaâ...neutral
44negativeGold Demand Could Hit 3-Decade Low As COVIDneg...[0.04732766002416611, -0.086189404129982, 0.01...0.604626Gold Demand Could Hit 3-Decade Low As COVIDneg...positive
45negativeThe most bearish thing I have seen in a week, ...[0.0485176146030426, 0.003743809647858143, -0....0.621680The most bearish thing I have seen in a week, ...positive
46positiveNG - traditional and AT point and figure - bul...[0.06986340880393982, -0.010513159446418285, -...0.653522NG - traditional and AT point and figure - bul...positive
47positiveCBMX B/O smart money coming in this afternoon ...[0.08315714448690414, 0.015806563198566437, -0...0.720131CBMX B/O smart money coming in this afternoon ...positive
48positiveP approaching resistance area around positive2...[-0.05507417768239975, 0.026514528319239616, -...0.715011P approaching resistance area around positive2...positive
49negativeWFM - looks like a bear flag, we are short, wi...[-0.04197699576616287, -0.07297921925783157, -...0.514607WFM - looks like a bear flag, we are short, wi...neutral
\n","
"],"text/plain":[" y ... sentiment\n","origin_index ... \n","0 positive ... positive\n","1 negative ... positive\n","2 negative ... positive\n","3 negative ... neutral\n","4 positive ... positive\n","5 negative ... neutral\n","6 positive ... positive\n","7 positive ... positive\n","8 positive ... positive\n","9 positive ... positive\n","10 negative ... neutral\n","11 negative ... neutral\n","12 positive ... positive\n","13 positive ... positive\n","14 positive ... positive\n","15 positive ... positive\n","16 positive ... positive\n","17 positive ... positive\n","18 negative ... positive\n","19 negative ... positive\n","20 negative ... positive\n","21 positive ... positive\n","22 positive ... positive\n","23 negative ... neutral\n","24 negative ... positive\n","25 negative ... neutral\n","26 positive ... positive\n","27 negative ... positive\n","28 negative ... positive\n","29 positive ... positive\n","30 negative ... neutral\n","31 positive ... positive\n","32 positive ... positive\n","33 negative ... positive\n","34 positive ... positive\n","35 positive ... positive\n","36 negative ... positive\n","37 negative ... neutral\n","38 negative ... neutral\n","39 positive ... positive\n","40 positive ... positive\n","41 positive ... positive\n","42 positive ... positive\n","43 negative ... neutral\n","44 negative ... positive\n","45 negative ... positive\n","46 positive ... positive\n","47 positive ... positive\n","48 positive ... positive\n","49 negative ... neutral\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# 4. Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1613549317638,"user_tz":-300,"elapsed":216343,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b77f4b28-b4ff-48f5-dd65-09ca429540c6"},"source":["fitted_pipe.predict(\"Bitcoin dropped by 50 percent!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
default_name_embeddingssentiment_confidencedocumentsentiment
origin_index
0[0.06509938091039658, -0.057081300765275955, -...0.687247Bitcoin dropped by 50 percent!positive
\n","
"],"text/plain":[" default_name_embeddings ... sentiment\n","origin_index ... \n","0 [0.06509938091039658, -0.057081300765275955, -... ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## 5. Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1613549317640,"user_tz":-300,"elapsed":216302,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"636f29b9-abd0-4ecd-a110-1d1afd74a9b0"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setMaxEpochs(2) | Info: Maximum number of epochs to train | Currently set to : 2\n","pipe['sentiment_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['sentiment_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['sentiment_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['sentiment_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setLoadSP(False) | Info: Whether to load SentencePiece ops file which is required only by multi-lingual models. This is not changeable after it's set with a pretrained model nor it is compatible with Windows. | Currently set to : False\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## 6. Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1613549325177,"user_tz":-300,"elapsed":223771,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"559b5176-6a41-4379-ebaf-bcdfcf3bd42c"},"source":["# Train longer!\n","trainable_pipe['sentiment_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.93 0.61 0.74 23\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.86 0.89 0.87 27\n","\n"," accuracy 0.76 50\n"," macro avg 0.60 0.50 0.54 50\n","weighted avg 0.89 0.76 0.81 50\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytextdefault_name_embeddingssentiment_confidencedocumentsentiment
origin_index
0positiveAAP 950 lot bid in the Feb 580 call at 9.50, m...[0.039819009602069855, -0.01317680161446333, -...0.612311AAP 950 lot bid in the Feb 580 call at 9.50, m...negative
1negativeuser: ADBE continues to weaken, not following ...[0.05989246815443039, 0.06275778263807297, -0....0.581842user: ADBE continues to weaken, not following ...neutral
2negativepnra missed this one yest on the short side bu...[-0.0019929897971451283, -0.026955554261803627...0.563239pnra missed this one yest on the short side bu...neutral
3negativePPO: 2) GM estimates 20% Y/Y unit growth for v...[0.0499078631401062, -0.019602565094828606, -0...0.684193PPO: 2) GM estimates 20% Y/Y unit growth for v...negative
4positiveBAC avrg positive2.positive27[0.03407237306237221, 0.0034944196231663227, -...0.845220BAC avrg positive2.positive27positive
5negativeCMG While traveling through Kalamazoo, MI on F...[0.017684495076537132, -0.03009415790438652, -...0.664491CMG While traveling through Kalamazoo, MI on F...negative
6positivei dont have a tech friend in world that owns t...[0.05138593912124634, 0.05815130099654198, -0....0.857243i dont have a tech friend in world that owns t...positive
7positiveSWI was a beaut[0.04824110120534897, -0.05754169449210167, -0...0.951524SWI was a beautpositive
8positiveAgain, this mkt is FAT while AAP is down 2.5% ...[0.050297632813453674, -0.0360858328640461, -0...0.582303Again, this mkt is FAT while AAP is down 2.5% ...neutral
9positiveAPO good volume bounce; entry 22.positive9 sto...[-0.013337346725165844, -0.03307655081152916, ...0.887402APO good volume bounce; entry 22.positive9 sto...positive
10negativeike it or Not AAP will fill the gap B4 will s...[0.06327012926340103, 0.003341271774843335, -0...0.830195ike it or Not AAP will fill the gap B4 will se...negative
11negative2020 was supposed to be the year everything ca...[0.1055862084031105, -0.003243181621655822, -0...0.6946232020 was supposed to be the year everything ca...negative
12positiveong setup SM[0.045743703842163086, -0.048678476363420486, ...0.957175ong setup SMpositive
13positiveSWHC added more march 9's and positive0's here...[-0.008223574608564377, -0.03544619306921959, ...0.894367SWHC added more march 9's and positive0's here...positive
14positiveTrade Setup AA -[0.04049460217356682, -0.04978525638580322, -0...0.964757Trade Setup AA -positive
15positivedon't tell everyone ... user: That's a nice lo...[0.06689376384019852, -0.029914824292063713, -...0.922478don't tell everyone ... user: That's a nice lo...positive
16positiveSGMO monthly[-0.07218629121780396, -0.01272683497518301, -...0.923514SGMO monthlypositive
17positiveaapl in long per earlier tweet on topside brea...[0.057043224573135376, -0.012128235772252083, ...0.896109aapl in long per earlier tweet on topside brea...positive
18negativeBAC Will go to positive2 imho[0.05091923102736473, -0.04665602371096611, -0...0.737084BAC Will go to positive2 imhonegative
19negativeAAP 465 is resistance and heading to 435 in th...[0.03843201324343681, -0.0733688548207283, -0....0.773113AAP 465 is resistance and heading to 435 in th...negative
20negativeJust finished The Week Ahead Commentary and po...[-0.05284350365400314, 0.024635571986436844, -...0.668573Just finished The Week Ahead Commentary and po...positive
21positiveNew buys today from watch list all did well N...[0.02797831781208515, -0.07139448076486588, -0...0.882660New buys today from watch list all did well NX...positive
22positiveOVI on watch list has volume of positive0% of ...[0.04008709639310837, -0.03572438657283783, -0...0.899480OVI on watch list has volume of positive0% of ...positive
23negativeAAP Fib 50% retracement from 2006 low of 6.36 ...[0.011571422219276428, -0.058767061680555344, ...0.830456AAP Fib 50% retracement from 2006 low of 6.36 ...negative
24negativeWatch NX under 24.70[0.03347814455628395, -0.015002097003161907, -...0.843018Watch NX under 24.70positive
25negativewill AAP go down?[0.02786560356616974, -0.04974859952926636, -0...0.837108will AAP go down?negative
26positiveMarket Wrap Video + Additions to Watch ist in...[0.02167339436709881, 0.0070633664727211, -0.0...0.973447Market Wrap Video + Additions to Watch ist inc...positive
27negativeGreen Weekly Triangle on KTOS,....pdating[-0.0672115832567215, -0.007515197619795799, -...0.527610Green Weekly Triangle on KTOS,....pdatingneutral
28negativeNN looking quite bearish at the moment.[0.03382507339119911, -0.029242942109704018, -...0.743928NN looking quite bearish at the moment.positive
29positiveT user INVN ..lots of bullish technical discus...[0.06488332152366638, 0.005766488146036863, -0...0.881713T user INVN ..lots of bullish technical discus...positive
30negativeuser: BT Cortos en Peabody Energy si cae bajo ...[0.06904835999011993, 0.018765972927212715, -0...0.601138user: BT Cortos en Peabody Energy si cae bajo ...negative
31positiveMarket Wrap Video + Additions to Watch ist in...[0.06079310178756714, -0.052379265427589417, -...0.944614Market Wrap Video + Additions to Watch ist inc...positive
32positiveHS Over 48.20 at first for a small trade to se...[0.03508533164858818, -0.013354266993701458, -...0.961685HS Over 48.20 at first for a small trade to se...positive
33negativeAMZN short 275.positive3[0.023175707086920738, 0.001034723361954093, -...0.629248AMZN short 275.positive3positive
34positiveVNG - positive2 mil shares shorts plus long bu...[0.008707035332918167, 0.03645578399300575, -0...0.849978VNG - positive2 mil shares shorts plus long bu...positive
35positiveMarket Wrap Video + Additions to Watch ist in...[0.03514407202601433, 0.01656987890601158, -0....0.908923Market Wrap Video + Additions to Watch ist inc...positive
36negativeGreen Weekly Triangle on PBY,....Open Sell Sho...[-0.004248426761478186, -0.0024954124819487333...0.551505Green Weekly Triangle on PBY,....Open Sell Sho...neutral
37negativeCIE Seems geady to go South[0.03585235774517059, 0.048238031566143036, -0...0.780726CIE Seems geady to go Southnegative
38negativeuser: AAP people think growth slowing is not ...[0.05234825611114502, -0.011760670691728592, -...0.794824user: AAP people think growth slowing is not t...negative
39positiveDECK this sucker moves both ways this time i l...[0.01123942993581295, -0.057358115911483765, -...0.902623DECK this sucker moves both ways this time i l...positive
40positiveNew Post: Shorts Will Make Dimes, ongs Will Ma...[0.052262794226408005, -0.053826283663511276, ...0.657548New Post: Shorts Will Make Dimes, ongs Will Ma...positive
41positiveMON - added[0.004127390217036009, -0.05779232084751129, -...0.928928MON - addedpositive
42positivewait to you see what they do w/schools in 20po...[0.03918222710490227, -0.004734774120151997, -...0.814769wait to you see what they do w/schools in 20po...positive
43negativeWhiting, one of top drillers in North Dakotaâ...[0.008908223360776901, 0.043478433042764664, -...0.719284Whiting, one of top drillers in North Dakotaâ...negative
44negativeGold Demand Could Hit 3-Decade Low As COVIDneg...[0.04732766002416611, -0.086189404129982, 0.01...0.637345Gold Demand Could Hit 3-Decade Low As COVIDneg...negative
45negativeThe most bearish thing I have seen in a week, ...[0.0485176146030426, 0.003743809647858143, -0....0.576141The most bearish thing I have seen in a week, ...neutral
46positiveNG - traditional and AT point and figure - bul...[0.06986340880393982, -0.010513159446418285, -...0.557736NG - traditional and AT point and figure - bul...neutral
47positiveCBMX B/O smart money coming in this afternoon ...[0.08315714448690414, 0.015806563198566437, -0...0.814244CBMX B/O smart money coming in this afternoon ...positive
48positiveP approaching resistance area around positive2...[-0.05507417768239975, 0.026514528319239616, -...0.871801P approaching resistance area around positive2...positive
49negativeWFM - looks like a bear flag, we are short, wi...[-0.04197699576616287, -0.07297921925783157, -...0.736266WFM - looks like a bear flag, we are short, wi...negative
\n","
"],"text/plain":[" y ... sentiment\n","origin_index ... \n","0 positive ... negative\n","1 negative ... neutral\n","2 negative ... neutral\n","3 negative ... negative\n","4 positive ... positive\n","5 negative ... negative\n","6 positive ... positive\n","7 positive ... positive\n","8 positive ... neutral\n","9 positive ... positive\n","10 negative ... negative\n","11 negative ... negative\n","12 positive ... positive\n","13 positive ... positive\n","14 positive ... positive\n","15 positive ... positive\n","16 positive ... positive\n","17 positive ... positive\n","18 negative ... negative\n","19 negative ... negative\n","20 negative ... positive\n","21 positive ... positive\n","22 positive ... positive\n","23 negative ... negative\n","24 negative ... positive\n","25 negative ... negative\n","26 positive ... positive\n","27 negative ... neutral\n","28 negative ... positive\n","29 positive ... positive\n","30 negative ... negative\n","31 positive ... positive\n","32 positive ... positive\n","33 negative ... positive\n","34 positive ... positive\n","35 positive ... positive\n","36 negative ... neutral\n","37 negative ... negative\n","38 negative ... negative\n","39 positive ... positive\n","40 positive ... positive\n","41 positive ... positive\n","42 positive ... positive\n","43 negative ... negative\n","44 negative ... negative\n","45 negative ... neutral\n","46 positive ... neutral\n","47 positive ... positive\n","48 positive ... positive\n","49 negative ... negative\n","\n","[50 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# 7. Try training with different Embeddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nxWFzQOhjWC8","executionInfo":{"status":"ok","timestamp":1613549325181,"user_tz":-300,"elapsed":223750,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a771003c-4295-459f-9b21-43f9bd90c94b"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1614244518434,"user_tz":-300,"elapsed":2188594,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8e62f5bb-0134-40ab-b753-411cb1a2beef"},"source":["trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(120) \n","trainable_pipe['sentiment_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.84 0.71 0.77 1596\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.82 0.76 0.79 1604\n","\n"," accuracy 0.74 3200\n"," macro avg 0.55 0.49 0.52 3200\n","weighted avg 0.83 0.74 0.78 3200\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"_1jxw3GnVGlI"},"source":["# 7.1 evaluate on Test Data"]},{"cell_type":"code","metadata":{"id":"Fxx4yNkNVGFl","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614244944855,"user_tz":-300,"elapsed":408320,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"83917363-8c79-4d16-d291-f28ddfd81cd7"},"source":["preds = fitted_pipe.predict(test_df,output_level='document')\r\n","\r\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\r\n","preds.dropna(inplace=True)\r\n","print(classification_report(preds['y'], preds['sentiment']))"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.76 0.61 0.68 404\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.73 0.69 0.71 396\n","\n"," accuracy 0.65 800\n"," macro avg 0.50 0.43 0.46 800\n","weighted avg 0.75 0.65 0.69 800\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 8. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1613552484284,"user_tz":-300,"elapsed":176634,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e13b99a7-e11a-4889-edb8-24516bd3058c"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 9. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1613552775613,"user_tz":-300,"elapsed":46087,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"9c120b5b-8e2d-4450-f4c4-a3e5230818af"},"source":["hdd_pipe = nlu.load(path=\"./models/classifier_dl_trained\")\n","\n","preds = hdd_pipe.predict('Bitcoin dropped by 50 percent!!')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
en_embed_sentence_small_bert_L12_768_embeddingssentimentsentiment_confidencedocument
origin_index
0[0.20597101747989655, 0.16840755939483643, 0.0...negative0.761807Bitcoin dropped by 50 percent!!
\n","
"],"text/plain":[" en_embed_sentence_small_bert_L12_768_embeddings ... document\n","origin_index ... \n","0 [0.20597101747989655, 0.16840755939483643, 0.0... ... Bitcoin dropped by 50 percent!!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1613552782983,"user_tz":-300,"elapsed":1678,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"913230f0-8ff3-4ee1-89f5-7a911e73ab58"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this SentimentDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo.ipynb b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo.ipynb index d259a24b..b784deae 100644 --- a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo.ipynb +++ b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu > /dev/null pyspark==2.4.7\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download news classification dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1607912618662,"user_tz":-60,"elapsed":94251,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"4fe5c4cb-76ff-44a0-9936-dfbddfeb5140"},"source":["! wget https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_train.csv\n","! wget https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_test.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2020-12-14 02:23:36-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_train.csv\n","Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.154.38\n","Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.154.38|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 24032125 (23M) [text/csv]\n","Saving to: ‘news_category_train.csv’\n","\n","news_category_train 100%[===================>] 22.92M 21.7MB/s in 1.1s \n","\n","2020-12-14 02:23:37 (21.7 MB/s) - ‘news_category_train.csv’ saved [24032125/24032125]\n","\n","--2020-12-14 02:23:37-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_test.csv\n","Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.74.118\n","Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.74.118|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 1504408 (1.4M) [text/csv]\n","Saving to: ‘news_category_test.csv’\n","\n","news_category_test. 100%[===================>] 1.43M 2.77MB/s in 0.5s \n","\n","2020-12-14 02:23:38 (2.77 MB/s) - ‘news_category_test.csv’ saved [1504408/1504408]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":419},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1607912619037,"user_tz":-60,"elapsed":94620,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"1cf7867f-21ab-4ba1-9ab3-c95a191b0286"},"source":["import pandas as pd\n","test_path = '/content/news_category_test.csv'\n","train_df = pd.read_csv(test_path)\n","train_df.columns=['y','text']\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytext
0BusinessUnions representing workers at Turner Newall...
1Sci/TechTORONTO, Canada A second team of rocketeer...
2Sci/TechA company founded by a chemistry researcher a...
3Sci/TechIt's barely dawn when Mike Fitzpatrick starts...
4Sci/TechSouthern California's smog fighting agency we...
.........
7595WorldUkrainian presidential candidate Viktor Yushch...
7596SportsWith the supply of attractive pitching options...
7597SportsLike Roger Clemens did almost exactly eight ye...
7598BusinessSINGAPORE : Doctors in the United States have ...
7599BusinessEBay plans to buy the apartment and home renta...
\n","

7600 rows × 2 columns

\n","
"],"text/plain":[" y text\n","0 Business Unions representing workers at Turner Newall...\n","1 Sci/Tech TORONTO, Canada A second team of rocketeer...\n","2 Sci/Tech A company founded by a chemistry researcher a...\n","3 Sci/Tech It's barely dawn when Mike Fitzpatrick starts...\n","4 Sci/Tech Southern California's smog fighting agency we...\n","... ... ...\n","7595 World Ukrainian presidential candidate Viktor Yushch...\n","7596 Sports With the supply of attractive pitching options...\n","7597 Sports Like Roger Clemens did almost exactly eight ye...\n","7598 Business SINGAPORE : Doctors in the United States have ...\n","7599 Business EBay plans to buy the apartment and home renta...\n","\n","[7600 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","By default, the Universal Sentence Encoder Embeddings (USE) are beeing downloaded to provide embeddings for the classifier. You can use any of the 50+ other sentence Emeddings in NLU tough!\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":501},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1607912857369,"user_tz":-60,"elapsed":332946,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"8bce881e-edb7-4d2b-cf61-b9f26a05ea4b"},"source":["# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# Since there are no\n","fitted_pipe = nlu.load('train.classifier').fit(train_df)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df)\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ydefault_name_embeddingstextsentencecategory_confidencecategory
origin_index
0Business[0.012997539713978767, 0.019844762980937958, -...Unions representing workers at Turner Newall...Unions representing workers at Turner Newall s...0.999985Business
1Sci/Tech[0.023022323846817017, -0.01595703884959221, -...TORONTO, Canada A second team of rocketeer...TORONTO, Canada A second team of rocketeers co...1.000000Sports
1Sci/Tech[-0.010587693192064762, 0.011531050316989422, ...TORONTO, Canada A second team of rocketeer...10 million Ansari X Prize, a contest for priva...1.000000Sports
2Sci/Tech[0.038641855120658875, 0.02322080172598362, -0...A company founded by a chemistry researcher a...A company founded by a chemistry researcher at...0.744563Business
3Sci/Tech[-0.006857294123619795, 0.01967567577958107, -...It's barely dawn when Mike Fitzpatrick starts...It's barely dawn when Mike Fitzpatrick starts ...0.999360Sci/Tech
.....................
7596Sports[0.005107458680868149, -0.011805553920567036, ...With the supply of attractive pitching options....1.000000Sports
7596Sports[0.005107458680868149, -0.011805553920567036, ...With the supply of attractive pitching options....2.000000Sports
7597Sports[0.044696468859910965, 0.0015660696662962437, ...Like Roger Clemens did almost exactly eight ye...Like Roger Clemens did almost exactly eight ye...1.000000Sports
7598Business[0.05564942583441734, -0.021285761147737503, -...SINGAPORE : Doctors in the United States have ...SINGAPORE : Doctors in the United States have ...0.999433Business
7599Business[0.08172684907913208, -0.013251541182398796, -...EBay plans to buy the apartment and home renta...EBay plans to buy the apartment and home renta...0.820492Business
\n","

14399 rows × 6 columns

\n","
"],"text/plain":[" y ... category\n","origin_index ... \n","0 Business ... Business\n","1 Sci/Tech ... Sports\n","1 Sci/Tech ... Sports\n","2 Sci/Tech ... Business\n","3 Sci/Tech ... Sci/Tech\n","... ... ... ...\n","7596 Sports ... Sports\n","7596 Sports ... Sports\n","7597 Sports ... Sports\n","7598 Business ... Business\n","7599 Business ... Business\n","\n","[14399 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"DL_5aY9b3jSd"},"source":["# 4. Evaluate the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"djtoZVKBw2WU","executionInfo":{"status":"ok","timestamp":1607912858793,"user_tz":-60,"elapsed":334365,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"c41b52d9-2a4b-47ee-92e8-758399ef45cc"},"source":["from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," Business 0.76 0.81 0.78 3671\n"," Sci/Tech 0.80 0.79 0.79 3983\n"," Sports 0.86 0.92 0.89 3687\n"," World 0.89 0.77 0.83 3058\n","\n"," accuracy 0.82 14399\n"," macro avg 0.83 0.82 0.82 14399\n","weighted avg 0.82 0.82 0.82 14399\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mhFKVN93o1ZO"},"source":["# 5. Lets try different Sentence Emebddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CzJd8omao0gt","executionInfo":{"status":"ok","timestamp":1607912858794,"user_tz":-60,"elapsed":334358,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"8021f8c3-d711-4d06-d184-88df1a29441e"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ABHLgirmG1n9","executionInfo":{"status":"ok","timestamp":1607918642391,"user_tz":-60,"elapsed":6117950,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"fcc6f823-4332-471f-c2dc-201916ef1b97"},"source":["# Load pipe with bert embeds\n","# using large embeddings can take a few hours..\n","# fitted_pipe = nlu.load('en.embed_sentence.bert_large_uncased train.classifier').fit(train_df)\n","fitted_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier').fit(train_df)\n","\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df)\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," Business 0.00 0.00 0.00 1900\n"," Sci/Tech 0.25 1.00 0.40 1900\n"," Sports 0.00 0.00 0.00 1900\n"," World 0.00 0.00 0.00 1900\n","\n"," accuracy 0.25 7600\n"," macro avg 0.06 0.25 0.10 7600\n","weighted avg 0.06 0.25 0.10 7600\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nbpdZGoZPslz","executionInfo":{"status":"ok","timestamp":1607918778139,"user_tz":-60,"elapsed":6253693,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"1dcc8aa9-fd89-4b7a-d78d-c641c09f67d6"},"source":["# Load pipe with bert embeds\n","fitted_pipe = nlu.load('embed_sentence.bert train.classifier').fit(train_df)\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df)\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L2_128 download started this may take some time.\n","Approximate size to download 16.1 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," Business 0.81 0.74 0.77 1900\n"," Sci/Tech 0.74 0.87 0.80 1900\n"," Sports 0.92 0.94 0.93 1900\n"," World 0.91 0.81 0.86 1900\n","\n"," accuracy 0.84 7600\n"," macro avg 0.85 0.84 0.84 7600\n","weighted avg 0.85 0.84 0.84 7600\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607918802363,"user_tz":-60,"elapsed":6277910,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"79c442f9-959e-4b14-ae85-6ef9f654f297"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":128},"executionInfo":{"status":"ok","timestamp":1607918809822,"user_tz":-60,"elapsed":6285365,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"f755aaa0-974c-4c6f-c079-0f3d681dbc82"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Tesla plans to invest 10M into the ML sector')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
classifier_confidencedocumentclassifierembed_sentence_bert_embeddings
origin_index
00.997592Tesla plans to invest 10M into the ML sectorBusiness[-0.07111635059118271, 0.9532930850982666, -1....
\n","
"],"text/plain":[" classifier_confidence ... embed_sentence_bert_embeddings\n","origin_index ... \n","0 0.997592 ... [-0.07111635059118271, 0.9532930850982666, -1....\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607918809824,"user_tz":-60,"elapsed":6285363,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"5e8b8c8a-5cd1-4d20-bde2-a4003d5687d0"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(128) | Info: Number of embedding dimensions | Currently set to : 128\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L2_128') | Info: unique reference name for identification | Currently set to : sent_small_bert_L2_128\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['World', 'Sci/Tech', 'Sports', 'Business']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['World', 'Sci/Tech', 'Sports', 'Business']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L2_128') | Info: unique reference name for identification | Currently set to : sent_small_bert_L2_128\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"M1LjAwJVJxun"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","\n","You can achieve these results or even better on this dataset with training data:\n","\n","
\n","\n","![image.png]()\n","\n","You can achieve these results or even better on this dataset with test data:\n","\n","\n","
\n","\n","![image.png]()\n","\n","\n","\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu > /dev/null pyspark==2.4.7\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download news classification dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1614563561978,"user_tz":-300,"elapsed":56441,"user":{"displayName":"Gammer Otaku","photoUrl":"","userId":"18042713576744284398"}},"outputId":"e9e6f7f8-2bae-471e-8e52-129be76b148d"},"source":["! wget https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_train.csv\n","! wget https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_test.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-03-01 01:51:28-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_train.csv\n","Resolving s3.amazonaws.com (s3.amazonaws.com)... 54.231.98.139\n","Connecting to s3.amazonaws.com (s3.amazonaws.com)|54.231.98.139|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 24032125 (23M) [text/csv]\n","Saving to: ‘news_category_train.csv’\n","\n","news_category_train 100%[===================>] 22.92M 50.0MB/s in 0.5s \n","\n","2021-03-01 01:51:28 (50.0 MB/s) - ‘news_category_train.csv’ saved [24032125/24032125]\n","\n","--2021-03-01 01:51:28-- https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_test.csv\n","Resolving s3.amazonaws.com (s3.amazonaws.com)... 54.231.98.139\n","Connecting to s3.amazonaws.com (s3.amazonaws.com)|54.231.98.139|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 1504408 (1.4M) [text/csv]\n","Saving to: ‘news_category_test.csv’\n","\n","news_category_test. 100%[===================>] 1.43M 6.82MB/s in 0.2s \n","\n","2021-03-01 01:51:29 (6.82 MB/s) - ‘news_category_test.csv’ saved [1504408/1504408]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1614563562784,"user_tz":-300,"elapsed":57231,"user":{"displayName":"Gammer Otaku","photoUrl":"","userId":"18042713576744284398"}},"outputId":"ce49fae7-0be7-47c7-9ee4-ba1a495fbe4e"},"source":["import pandas as pd\n","test_path = '/content/news_category_test.csv'\n","train_df = pd.read_csv(test_path)\n","train_df.columns=['y','text']\n","from sklearn.model_selection import train_test_split\n","\n","train_df, test_df = train_test_split(train_df, test_size=0.2)\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytext
989Sci/TechBy acquiring KVault, which makes e-mail-archiv...
6701Sci/TechEarly next year, the computer maker will begin...
5384Worldquot;Aeolus was keeper of the winds. He gave ...
1309SportsA late rally gave the San Diego Padres a rar...
4043SportsAs happy as Utah coach Urban Meyer was to hea...
.........
6925BusinessCola has decided not to sell its C2 brand in t...
3688Worldseeded Ivan Ljubicic of Croatia beat Victor Ha...
5132Sci/TechA zoo in the Gulf has bred a bird which is thr...
6058WorldThe 150 brokers and traders on the Iraq Stock ...
2846WorldTammy Hough is a life long Republican, a soci...
\n","

6080 rows × 2 columns

\n","
"],"text/plain":[" y text\n","989 Sci/Tech By acquiring KVault, which makes e-mail-archiv...\n","6701 Sci/Tech Early next year, the computer maker will begin...\n","5384 World quot;Aeolus was keeper of the winds. He gave ...\n","1309 Sports A late rally gave the San Diego Padres a rar...\n","4043 Sports As happy as Utah coach Urban Meyer was to hea...\n","... ... ...\n","6925 Business Cola has decided not to sell its C2 brand in t...\n","3688 World seeded Ivan Ljubicic of Croatia beat Victor Ha...\n","5132 Sci/Tech A zoo in the Gulf has bred a bird which is thr...\n","6058 World The 150 brokers and traders on the Iraq Stock ...\n","2846 World Tammy Hough is a life long Republican, a soci...\n","\n","[6080 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","By default, the Universal Sentence Encoder Embeddings (USE) are beeing downloaded to provide embeddings for the classifier. You can use any of the 50+ other sentence Emeddings in NLU tough!\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":671},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1614563790900,"user_tz":-300,"elapsed":285335,"user":{"displayName":"Gammer Otaku","photoUrl":"","userId":"18042713576744284398"}},"outputId":"d0507200-05ac-450e-ecb0-01f5650fedd8"},"source":["# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# Since there are no\n","fitted_pipe = nlu.load('train.classifier').fit(train_df)\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df)\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencesentencedefault_name_embeddingstexty
origin_index
989Sci/Tech1.000000By acquiring KVault, which makes e-mail-archiv...[0.07190942019224167, -0.05929875373840332, -0...By acquiring KVault, which makes e-mail-archiv...Sci/Tech
989Sci/Tech1.000000s lead and rebuild investors #39;[0.053723689168691635, 0.019650151953101158, -...By acquiring KVault, which makes e-mail-archiv...Sci/Tech
989Sci/Tech1.000000confidence.[0.022217048332095146, -0.02411036007106304, -...By acquiring KVault, which makes e-mail-archiv...Sci/Tech
6701Sci/Tech1.000000Early next year, the computer maker will begin...[0.04153631627559662, -0.016046393662691116, -...Early next year, the computer maker will begin...Sci/Tech
5384Business0.611293quot;[0.05674704536795616, 0.05588801950216293, -0....quot;Aeolus was keeper of the winds. He gave ...World
.....................
3688Sports1.000000seeded Ivan Ljubicic of Croatia beat Victor Ha...[0.045119430869817734, -0.06392733752727509, -...seeded Ivan Ljubicic of Croatia beat Victor Ha...World
5132Sci/Tech0.998652A zoo in the Gulf has bred a bird which is thr...[0.06242712214589119, -0.05575601011514664, -0...A zoo in the Gulf has bred a bird which is thr...Sci/Tech
6058Business0.999789The 150 brokers and traders on the Iraq Stock ...[-0.03430037200450897, 0.03404074162244797, -0...The 150 brokers and traders on the Iraq Stock ...World
2846World0.996201Tammy Hough is a life long Republican, a socia...[-0.04292772337794304, 0.02602585218846798, -0...Tammy Hough is a life long Republican, a soci...World
2846World0.996201But she's not so sure.[0.03599904477596283, 0.003016551723703742, -0...Tammy Hough is a life long Republican, a soci...World
\n","

11480 rows × 6 columns

\n","
"],"text/plain":[" category ... y\n","origin_index ... \n","989 Sci/Tech ... Sci/Tech\n","989 Sci/Tech ... Sci/Tech\n","989 Sci/Tech ... Sci/Tech\n","6701 Sci/Tech ... Sci/Tech\n","5384 Business ... World\n","... ... ... ...\n","3688 Sports ... World\n","5132 Sci/Tech ... Sci/Tech\n","6058 Business ... World\n","2846 World ... World\n","2846 World ... World\n","\n","[11480 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"DL_5aY9b3jSd"},"source":["# 4. Evaluate the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"djtoZVKBw2WU","executionInfo":{"status":"ok","timestamp":1614563791442,"user_tz":-300,"elapsed":285867,"user":{"displayName":"Gammer Otaku","photoUrl":"","userId":"18042713576744284398"}},"outputId":"53373fa3-d843-43a9-99fd-b6cbfd6f31cf"},"source":["from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," Business 0.72 0.84 0.77 2941\n"," Sci/Tech 0.81 0.78 0.79 3179\n"," Sports 0.91 0.88 0.89 2848\n"," World 0.88 0.79 0.83 2512\n","\n"," accuracy 0.82 11480\n"," macro avg 0.83 0.82 0.82 11480\n","weighted avg 0.83 0.82 0.82 11480\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mhFKVN93o1ZO"},"source":["# 5. Lets try different Sentence Emebddings"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CzJd8omao0gt","executionInfo":{"status":"ok","timestamp":1614563791443,"user_tz":-300,"elapsed":285857,"user":{"displayName":"Gammer Otaku","photoUrl":"","userId":"18042713576744284398"}},"outputId":"ff9edbfd-2882-4989-c869-8c58b3da0120"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ABHLgirmG1n9","executionInfo":{"status":"ok","timestamp":1614568520891,"user_tz":-300,"elapsed":3997133,"user":{"displayName":"Gammer Otaku","photoUrl":"","userId":"18042713576744284398"}},"outputId":"b7d059b2-51b5-460f-ef2c-494ceec1985d"},"source":["# Load pipe with bert embeds\n","# using large embeddings can take a few hours..\n","# fitted_pipe = nlu.load('en.embed_sentence.bert_large_uncased train.classifier').fit(train_df)\n","fitted_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier').fit(train_df)\n","\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df)\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," Business 0.00 0.00 0.00 1518\n"," Sci/Tech 0.25 1.00 0.40 1515\n"," Sports 0.00 0.00 0.00 1489\n"," World 0.00 0.00 0.00 1558\n","\n"," accuracy 0.25 6080\n"," macro avg 0.06 0.25 0.10 6080\n","weighted avg 0.06 0.25 0.10 6080\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nbpdZGoZPslz","executionInfo":{"status":"ok","timestamp":1614568636958,"user_tz":-300,"elapsed":116078,"user":{"displayName":"Gammer Otaku","photoUrl":"","userId":"18042713576744284398"}},"outputId":"05191e5e-7eea-4e38-f288-23b3adf01234"},"source":["# Load pipe with bert embeds\n","fitted_pipe = nlu.load('embed_sentence.bert train.classifier').fit(train_df)\n","\n","# predict with the trained pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df)\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L2_128 download started this may take some time.\n","Approximate size to download 16.1 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," Business 0.69 0.87 0.77 1518\n"," Sci/Tech 0.84 0.73 0.78 1515\n"," Sports 0.93 0.93 0.93 1489\n"," World 0.91 0.80 0.85 1558\n","\n"," accuracy 0.83 6080\n"," macro avg 0.84 0.83 0.83 6080\n","weighted avg 0.84 0.83 0.83 6080\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"_1jxw3GnVGlI"},"source":["# 5.1 evaluate on Test Data"]},{"cell_type":"code","metadata":{"id":"Fxx4yNkNVGFl","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614568659263,"user_tz":-300,"elapsed":22318,"user":{"displayName":"Gammer Otaku","photoUrl":"","userId":"18042713576744284398"}},"outputId":"790414af-e841-48f1-c3fa-613404f5a867"},"source":["preds = fitted_pipe.predict(test_df,output_level='document')\r\n","\r\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\r\n","preds.dropna(inplace=True)\r\n","print(classification_report(preds['y'], preds['category']))"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," Business 0.68 0.87 0.76 382\n"," Sci/Tech 0.84 0.71 0.77 385\n"," Sports 0.92 0.91 0.91 411\n"," World 0.91 0.78 0.84 342\n","\n"," accuracy 0.82 1520\n"," macro avg 0.83 0.82 0.82 1520\n","weighted avg 0.83 0.82 0.82 1520\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 6. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607918802363,"user_tz":-60,"elapsed":6277910,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"79c442f9-959e-4b14-ae85-6ef9f654f297"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 7. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":128},"executionInfo":{"status":"ok","timestamp":1607918809822,"user_tz":-60,"elapsed":6285365,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"f755aaa0-974c-4c6f-c079-0f3d681dbc82"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Tesla plans to invest 10M into the ML sector')\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Fitting on empty Dataframe, could not infer correct training method!\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
classifier_confidencedocumentclassifierembed_sentence_bert_embeddings
origin_index
00.997592Tesla plans to invest 10M into the ML sectorBusiness[-0.07111635059118271, 0.9532930850982666, -1....
\n","
"],"text/plain":[" classifier_confidence ... embed_sentence_bert_embeddings\n","origin_index ... \n","0 0.997592 ... [-0.07111635059118271, 0.9532930850982666, -1....\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1607918809824,"user_tz":-60,"elapsed":6285363,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"5e8b8c8a-5cd1-4d20-bde2-a4003d5687d0"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(128) | Info: Number of embedding dimensions | Currently set to : 128\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L2_128') | Info: unique reference name for identification | Currently set to : sent_small_bert_L2_128\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['World', 'Sci/Tech', 'Sports', 'Business']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['World', 'Sci/Tech', 'Sports', 'Business']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L2_128') | Info: unique reference name for identification | Currently set to : sent_small_bert_L2_128\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"M1LjAwJVJxun"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_amazon.ipynb b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_amazon.ipynb index 8255e3ed..4d44817d 100644 --- a/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_amazon.ipynb +++ b/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_amazon.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo_amazon.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_amazon.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","## 3 class Amazon Phone review classifier training]\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install pyspark==2.4.7 \n","! pip install nlu > /dev/null \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Amazon Unlocked mobile phones dataset \n","https://www.kaggle.com/PromptCloudHQ/amazon-reviews-unlocked-mobile-phones\n","\n","dataset with unlocked mobile phone reviews in 5 review classes\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1610787546042,"user_tz":-300,"elapsed":3459,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ca2d6419-7d62-400b-d3d7-9b16fa9bce2c"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/Amazon_Unlocked_Mobile.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-01-16 08:58:27-- http://ckl-it.de/wp-content/uploads/2021/01/Amazon_Unlocked_Mobile.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 452621 (442K) [text/csv]\n","Saving to: ‘Amazon_Unlocked_Mobile.csv’\n","\n","Amazon_Unlocked_Mob 100%[===================>] 442.01K 308KB/s in 1.4s \n","\n","2021-01-16 08:58:29 (308 KB/s) - ‘Amazon_Unlocked_Mobile.csv’ saved [452621/452621]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1610787551525,"user_tz":-300,"elapsed":1188,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"dfe55b6f-f33a-4bd2-a2ba-5b1a306e1ab4"},"source":["import pandas as pd\n","test_path = '/content/Amazon_Unlocked_Mobile.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytext
0poorBought it, turned it on, did not work. Opened ...
1averageCurrently it is 2014, the 3gs is discontinued....
2good100% recomendado
3averageIt's a good phone but if you use it to browse ...
4averageIt's nice that this phone has LTE and it funct...
.........
1495poorNot happy with this phone. Not able to get but...
1496goodgreat phablet for all general uses
1497poorHate this phone had it for one day
1498goodGreat cheap phone.
1499goodVery good
\n","

1500 rows × 2 columns

\n","
"],"text/plain":[" y text\n","0 poor Bought it, turned it on, did not work. Opened ...\n","1 average Currently it is 2014, the 3gs is discontinued....\n","2 good 100% recomendado\n","3 average It's a good phone but if you use it to browse ...\n","4 average It's nice that this phone has LTE and it funct...\n","... ... ...\n","1495 poor Not happy with this phone. Not able to get but...\n","1496 good great phablet for all general uses\n","1497 poor Hate this phone had it for one day\n","1498 good Great cheap phone.\n","1499 good Very good\n","\n","[1500 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":501},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1609621542716,"user_tz":-300,"elapsed":207913,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d0eb19cc-8849-43f7-9cdf-a88fd8f11676"},"source":["# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# Since there are no\n","\n","trainable_pipe = nlu.load('train.classifier')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50] )\n","\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50] )\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytextcategorydefault_name_embeddingscategory_confidencesentence
origin_index
0poorBought it, turned it on, did not work. Opened ...average[0.020834514871239662, 0.03326118737459183, -0...0.763940Bought it, turned it on, did not work.
0poorBought it, turned it on, did not work. Opened ...average[0.030574046075344086, -0.009678893722593784, ...1.000000Opened up the back, made sure it was in right,...
0poorBought it, turned it on, did not work. Opened ...average[0.023421283811330795, 0.02294657751917839, -0...2.000000It was supposed to be new, but i it was used.
0poorBought it, turned it on, did not work. Opened ...average[0.06009713560342789, 0.046434734016656876, -0...3.000000Found scratches on cover.
1averageCurrently it is 2014, the 3gs is discontinued....average[0.04893391206860542, -0.010221654549241066, -...0.631228Currently it is 2014, the 3gs is discontinued.
.....................
47goodBought for my mom! She loves it!good[0.021471485495567322, -0.027823669835925102, ...0.656713Bought for my mom!
47goodBought for my mom! She loves it!good[0.0001737327256705612, -0.014630521647632122,...1.000000She loves it!
48goodGave the phone as a birthday gift. My friend s...good[0.03572574257850647, 0.013357092626392841, -0...0.701626Gave the phone as a birthday gift.
48goodGave the phone as a birthday gift. My friend s...good[0.08371475338935852, -0.01581401191651821, -0...1.000000My friend seems happy with it so far.
49goodGreat Productgood[0.03334435820579529, -0.05353177338838577, -0...0.593622Great Product
\n","

215 rows × 6 columns

\n","
"],"text/plain":[" y ... sentence\n","origin_index ... \n","0 poor ... Bought it, turned it on, did not work.\n","0 poor ... Opened up the back, made sure it was in right,...\n","0 poor ... It was supposed to be new, but i it was used.\n","0 poor ... Found scratches on cover.\n","1 average ... Currently it is 2014, the 3gs is discontinued.\n","... ... ... ...\n","47 good ... Bought for my mom!\n","47 good ... She loves it!\n","48 good ... Gave the phone as a birthday gift.\n","48 good ... My friend seems happy with it so far.\n","49 good ... Great Product\n","\n","[215 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1609621546162,"user_tz":-300,"elapsed":211344,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5dc268e6-e97f-4378-85d1-8319d3f7893f"},"source":["fitted_pipe.predict(\"It worked perfectly .\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydefault_name_embeddingscategory_confidencesentence
origin_index
0average[0.06468033790588379, -0.040837567299604416, -...0.460187Bitcoin is going to the moon!
\n","
"],"text/plain":[" category ... sentence\n","origin_index ... \n","0 average ... Bitcoin is going to the moon!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1609621546165,"user_tz":-300,"elapsed":211336,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c0633c00-9bfd-412b-ee55-0f6e5b150f39"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setMaxEpochs(3) | Info: Maximum number of epochs to train | Currently set to : 3\n","pipe['classifier_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['classifier_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['classifier_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['classifier_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":620},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1609621557024,"user_tz":-300,"elapsed":222179,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"12b53152-fcdf-4180-91b8-cc150e5bb23a"},"source":["# Train longer!\n","trainable_pipe['classifier_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," average 0.00 0.00 0.00 29\n"," good 0.65 0.94 0.77 32\n"," poor 0.69 0.95 0.80 39\n","\n"," accuracy 0.67 100\n"," macro avg 0.45 0.63 0.52 100\n","weighted avg 0.48 0.67 0.56 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytextdocumentcategorydefault_name_embeddingscategory_confidence
origin_index
0poorBought it, turned it on, did not work. Opened ...Bought it, turned it on, did not work. Opened ...poor[0.059367865324020386, 0.05043933913111687, -0...0.952295
1averageCurrently it is 2014, the 3gs is discontinued....Currently it is 2014, the 3gs is discontinued....good[0.0046275281347334385, 0.012452688068151474, ...0.396265
2good100% recomendado100% recomendadogood[0.008266163989901543, 0.00396152026951313, -0...0.773682
3averageIt's a good phone but if you use it to browse ...It's a good phone but if you use it to browse ...poor[0.05291805788874626, 0.002292224671691656, -0...0.506015
4averageIt's nice that this phone has LTE and it funct...It's nice that this phone has LTE and it funct...good[0.03426238149404526, -0.024366019293665886, -...0.648859
.....................
95poorHola, compramos dos teléfonos y vienieron tot...Hola, compramos dos teléfonos y vienieron tot...poor[0.06324272602796555, -0.06387951225042343, -0...0.790492
96goodExcelenteExcelentegood[0.03246314451098442, -0.01719777286052704, -0...0.813424
97poorthe product is good but the English language s...the product is good but the English language s...poor[0.056343767791986465, -0.016822000965476036, ...0.940151
98poorSupposed to be a brand new unlock phone. The p...Supposed to be a brand new unlock phone. The p...poor[0.03210984170436859, 0.018154876306653023, -0...0.984983
99averageMinor, very annoying glitch when texting. Not ...Minor, very annoying glitch when texting. Not ...poor[-0.026854539290070534, 0.03769969940185547, 0...0.969512
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" y ... category_confidence\n","origin_index ... \n","0 poor ... 0.952295\n","1 average ... 0.396265\n","2 good ... 0.773682\n","3 average ... 0.506015\n","4 average ... 0.648859\n","... ... ... ...\n","95 poor ... 0.790492\n","96 good ... 0.813424\n","97 poor ... 0.940151\n","98 poor ... 0.984983\n","99 average ... 0.969512\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["# Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609621557034,"user_tz":-300,"elapsed":222174,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1a690639-c397-4ced-c222-981776472766"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609622725629,"user_tz":-300,"elapsed":1390760,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"9e8f7464-0bca-4a03-9212-2ab8ccb8f319"},"source":["from sklearn.metrics import classification_report\n","trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['classifier_dl'].setMaxEpochs(90) \n","trainable_pipe['classifier_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","\n","#preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," average 0.72 0.67 0.69 500\n"," good 0.85 0.87 0.86 500\n"," poor 0.78 0.83 0.80 500\n","\n"," accuracy 0.79 1500\n"," macro avg 0.78 0.79 0.79 1500\n","weighted avg 0.78 0.79 0.79 1500\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609622897186,"user_tz":-300,"elapsed":1562308,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a3175762-9ea0-472e-a8bf-0a64fd1176c9"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":111},"executionInfo":{"status":"ok","timestamp":1609622933158,"user_tz":-300,"elapsed":1598267,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a40c74fc-c2f1-4a58-ba4e-5d1e21e39da3"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It worked perfectly.')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
classifier_confidencedocumenten_embed_sentence_small_bert_L12_768_embeddingsclassifier
origin_index
00.950214It worked perfectly.[0.275971919298172, 0.4924655854701996, 0.2755...good
\n","
"],"text/plain":[" classifier_confidence ... classifier\n","origin_index ... \n","0 0.950214 ... good\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609622963569,"user_tz":-300,"elapsed":903,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"662a1dc1-b3fc-4137-b95a-8d7f38326fd5"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['average', 'poor', 'good']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['average', 'poor', 'good']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_class_text_classifier_demo_amazon.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_class_text_classification/NLU_training_multi_class_text_classifier_demo_amazon.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","## 3 class Amazon Phone review classifier training]\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","\n","\n","\n","You can achieve these results or even better on this dataset with training data:\n","\n","\n","\n","
\n","\n","![image.png]()\n","\n","\n","\n","\n","You can achieve these results or even better on this dataset with test data:\n","\n","\n","
\n","\n","\n","![image.png]()"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614564189229,"user_tz":-300,"elapsed":67595,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"fcf335a0-c53d-409a-832e-7fdb1e54da39"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install pyspark==2.4.7 \n","! pip install nlu > /dev/null \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Collecting pyspark==2.4.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e2/06/29f80e5a464033432eedf89924e7aa6ebbc47ce4dcd956853a73627f2c07/pyspark-2.4.7.tar.gz (217.9MB)\n","\u001b[K |████████████████████████████████| 217.9MB 67kB/s \n","\u001b[?25hCollecting py4j==0.10.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e3/53/c737818eb9a7dc32a7cd4f1396e787bd94200c3997c72c1dbe028587bd76/py4j-0.10.7-py2.py3-none-any.whl (197kB)\n","\u001b[K |████████████████████████████████| 204kB 17.7MB/s \n","\u001b[?25hBuilding wheels for collected packages: pyspark\n"," Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for pyspark: filename=pyspark-2.4.7-py2.py3-none-any.whl size=218279465 sha256=90dbb9e58f0f2c2d84f268ee8dd9f2f6334927c1a225a2bb63a9208d3133b1ae\n"," Stored in directory: /root/.cache/pip/wheels/34/1f/2e/1e7460f80acf26b08dbb8c53d7ff9e07146f2a68dd5c732be5\n","Successfully built pyspark\n","Installing collected packages: py4j, pyspark\n","Successfully installed py4j-0.10.7 pyspark-2.4.7\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Amazon Unlocked mobile phones dataset \n","https://www.kaggle.com/PromptCloudHQ/amazon-reviews-unlocked-mobile-phones\n","\n","dataset with unlocked mobile phone reviews in 5 review classes\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1614564189976,"user_tz":-300,"elapsed":68325,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d3605fca-f1df-43b3-9f52-0e2b96fdcdcb"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/01/Amazon_Unlocked_Mobile.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-03-01 02:01:56-- http://ckl-it.de/wp-content/uploads/2021/01/Amazon_Unlocked_Mobile.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 452621 (442K) [text/csv]\n","Saving to: ‘Amazon_Unlocked_Mobile.csv’\n","\n","Amazon_Unlocked_Mob 100%[===================>] 442.01K 817KB/s in 0.5s \n","\n","2021-03-01 02:01:57 (817 KB/s) - ‘Amazon_Unlocked_Mobile.csv’ saved [452621/452621]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":415},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1614564190392,"user_tz":-300,"elapsed":68732,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"793154b5-f762-4ec4-d9d5-5b93b1a71b00"},"source":["import pandas as pd\n","test_path = '/content/Amazon_Unlocked_Mobile.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","from sklearn.model_selection import train_test_split\n","\n","train_df, test_df = train_test_split(train_df, test_size=0.2)\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytext
1040goodNice phone. Easy to read screen. I am a senior...
157goodJust as promised. This a great SmartPhone. Ver...
331averageIt'sn a powerfull phone.
842good1- the manual don`t have english.just it. I li...
766averagethe iphone is good! but i have an issue the he...
.........
1307goodthis phone met all our expectations, my dad lo...
1396averageBy mistake, I bought a wrong iPhone. It can no...
538averageA nice inexpensive phone (in its day) bought u...
1460goodExcellent phone!
1352averageGood
\n","

1200 rows × 2 columns

\n","
"],"text/plain":[" y text\n","1040 good Nice phone. Easy to read screen. I am a senior...\n","157 good Just as promised. This a great SmartPhone. Ver...\n","331 average It'sn a powerfull phone.\n","842 good 1- the manual don`t have english.just it. I li...\n","766 average the iphone is good! but i have an issue the he...\n","... ... ...\n","1307 good this phone met all our expectations, my dad lo...\n","1396 average By mistake, I bought a wrong iPhone. It can no...\n","538 average A nice inexpensive phone (in its day) bought u...\n","1460 good Excellent phone!\n","1352 average Good\n","\n","[1200 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":671},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1614564324507,"user_tz":-300,"elapsed":202824,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"56b20328-bda0-4167-8194-bd137d2e0f89"},"source":["# load a trainable pipeline by specifying the train. prefix and fit it on a datset with label and text columns\n","# Since there are no\n","\n","trainable_pipe = nlu.load('train.classifier')\n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:50] )\n","\n","\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:50] )\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["tfhub_use download started this may take some time.\n","Approximate size to download 923.7 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textcategory_confidenceysentencecategorydefault_name_embeddings
origin_index
1040Nice phone. Easy to read screen. I am a senior...0.898174goodNice phone.good[0.013208696618676186, -0.07603943347930908, -...
1040Nice phone. Easy to read screen. I am a senior...0.898174goodEasy to read screen.good[0.000669187749736011, -0.014507204294204712, ...
1040Nice phone. Easy to read screen. I am a senior...0.898174goodI am a senior and not a tech so the learning c...good[-0.007202684413641691, 0.02755507454276085, -...
1040Nice phone. Easy to read screen. I am a senior...0.898174goodThe support at Jethro, however, made it easy.good[0.05683695524930954, 0.025926098227500916, -0...
1040Nice phone. Easy to read screen. I am a senior...0.898174goodThey were knowledgeable, easy to understand, p...good[-8.556042303098366e-05, 0.017817411571741104,...
.....................
402I bought it for my mom who is in her late 70s ...0.916765goodI wish it had an alarm.When it comes to \"senio...good[0.06437729299068451, -0.06652847677469254, -0...
402I bought it for my mom who is in her late 70s ...0.916765goodThis one is half the price of the old one and ...good[0.0695129781961441, 0.001408637617714703, -0....
402I bought it for my mom who is in her late 70s ...0.916765goodSince most seniors are hard of hearing, they r...good[0.019210360944271088, 0.07541776448488235, -0...
834good quality phone0.851306goodgood quality phonegood[0.044256918132305145, -0.012881082482635975, ...
594Excellent Product.0.989440goodExcellent Product.good[0.04413348436355591, -0.07214203476905823, -0...
\n","

169 rows × 6 columns

\n","
"],"text/plain":[" text ... default_name_embeddings\n","origin_index ... \n","1040 Nice phone. Easy to read screen. I am a senior... ... [0.013208696618676186, -0.07603943347930908, -...\n","1040 Nice phone. Easy to read screen. I am a senior... ... [0.000669187749736011, -0.014507204294204712, ...\n","1040 Nice phone. Easy to read screen. I am a senior... ... [-0.007202684413641691, 0.02755507454276085, -...\n","1040 Nice phone. Easy to read screen. I am a senior... ... [0.05683695524930954, 0.025926098227500916, -0...\n","1040 Nice phone. Easy to read screen. I am a senior... ... [-8.556042303098366e-05, 0.017817411571741104,...\n","... ... ... ...\n","402 I bought it for my mom who is in her late 70s ... ... [0.06437729299068451, -0.06652847677469254, -0...\n","402 I bought it for my mom who is in her late 70s ... ... [0.0695129781961441, 0.001408637617714703, -0....\n","402 I bought it for my mom who is in her late 70s ... ... [0.019210360944271088, 0.07541776448488235, -0...\n","834 good quality phone ... [0.044256918132305145, -0.012881082482635975, ...\n","594 Excellent Product. ... [0.04413348436355591, -0.07214203476905823, -0...\n","\n","[169 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"lVyOE2wV0fw_"},"source":["# 4. Test the fitted pipe on new example"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"qdCUg2MR0PD2","executionInfo":{"status":"ok","timestamp":1614564327393,"user_tz":-300,"elapsed":205697,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"9f027cce-4780-4634-a283-2a83553390af"},"source":["fitted_pipe.predict(\"It worked perfectly .\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencesentencecategorydefault_name_embeddings
origin_index
00.939155It worked perfectly .good[0.016563203185796738, 0.002423828700557351, -...
\n","
"],"text/plain":[" category_confidence ... default_name_embeddings\n","origin_index ... \n","0 0.939155 ... [0.016563203185796738, 0.002423828700557351, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"xflpwrVjjBVD"},"source":["## 5. Configure pipe training parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UtsAUGTmOTms","executionInfo":{"status":"ok","timestamp":1614564327397,"user_tz":-300,"elapsed":205690,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ef2083f2-899f-4afb-ba61-5d5db829655e"},"source":["trainable_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setMaxEpochs(3) | Info: Maximum number of epochs to train | Currently set to : 3\n","pipe['classifier_dl'].setLr(0.005) | Info: Learning Rate | Currently set to : 0.005\n","pipe['classifier_dl'].setBatchSize(64) | Info: Batch size | Currently set to : 64\n","pipe['classifier_dl'].setDropout(0.5) | Info: Dropout coefficient | Currently set to : 0.5\n","pipe['classifier_dl'].setEnableOutputLogs(True) | Info: Whether to use stdout in addition to Spark logs. | Currently set to : True\n",">>> pipe['default_tokenizer'] has settable params:\n","pipe['default_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['default_tokenizer'].setContextChars(['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]) | Info: character list used to separate from token boundaries | Currently set to : ['.', ',', ';', ':', '!', '?', '*', '-', '(', ')', '\"', \"'\"]\n","pipe['default_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['default_tokenizer'].setMinLength(0) | Info: Set the minimum allowed legth for each token | Currently set to : 0\n","pipe['default_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed legth for each token | Currently set to : 99999\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n",">>> pipe['default_name'] has settable params:\n","pipe['default_name'].setDimension(512) | Info: Number of embedding dimensions | Currently set to : 512\n","pipe['default_name'].setLoadSP(False) | Info: Whether to load SentencePiece ops file which is required only by multi-lingual models. This is not changeable after it's set with a pretrained model nor it is compatible with Windows. | Currently set to : False\n","pipe['default_name'].setStorageRef('tfhub_use') | Info: unique reference name for identification | Currently set to : tfhub_use\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2GJdDNV9jEIe"},"source":["## 6. Retrain with new parameters"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":793},"id":"mptfvHx-MMMX","executionInfo":{"status":"ok","timestamp":1614564343531,"user_tz":-300,"elapsed":221814,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7636f973-58f5-4463-d642-e3ddea15c0df"},"source":["# Train longer!\n","trainable_pipe['classifier_dl'].setMaxEpochs(5) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:100])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," average 0.43 0.90 0.58 29\n"," good 0.80 0.84 0.82 38\n"," poor 0.00 0.00 0.00 33\n","\n"," accuracy 0.58 100\n"," macro avg 0.41 0.58 0.47 100\n","weighted avg 0.43 0.58 0.48 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textcategory_confidenceycategorydefault_name_embeddingsdocument
origin_index
1040Nice phone. Easy to read screen. I am a senior...0.986631goodgood[0.04795455560088158, 0.04623281955718994, 0.0...Nice phone. Easy to read screen. I am a senior...
157Just as promised. This a great SmartPhone. Ver...0.944038goodgood[0.07041990756988525, -0.04600438475608826, -0...Just as promised. This a great SmartPhone. Ver...
331It'sn a powerfull phone.0.941313averagegood[0.06664986908435822, -0.010421414859592915, -...It'sn a powerfull phone.
8421- the manual don`t have english.just it. I li...0.989852goodgood[0.035181764513254166, -0.06844104826450348, 0...1- the manual don`t have english.just it. I li...
766the iphone is good! but i have an issue the he...0.956226averageaverage[0.05243289843201637, -0.0002974773815367371, ...the iphone is good! but i have an issue the he...
.....................
165It's Ok.0.851562goodgood[0.010556062683463097, -0.0037851801607757807,...It's Ok.
1181you can download music, applications, comes un...0.976442goodgood[0.05078510940074921, -0.022081272676587105, -...you can download music, applications, comes un...
531Personally, I would NEVER purchase any Apple h...0.911216averageaverage[0.06057704612612724, 0.0446881465613842, -0.0...Personally, I would NEVER purchase any Apple h...
1298dynamic isn't working0.413555pooraverage[0.041587673127651215, -0.012929275631904602, ...dynamic isn't working
769The volume music cannot to stop when it's turn...0.643960pooraverage[-0.02557053416967392, -0.006043205037713051, ...The volume music cannot to stop when it's turn...
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" text ... document\n","origin_index ... \n","1040 Nice phone. Easy to read screen. I am a senior... ... Nice phone. Easy to read screen. I am a senior...\n","157 Just as promised. This a great SmartPhone. Ver... ... Just as promised. This a great SmartPhone. Ver...\n","331 It'sn a powerfull phone. ... It'sn a powerfull phone.\n","842 1- the manual don`t have english.just it. I li... ... 1- the manual don`t have english.just it. I li...\n","766 the iphone is good! but i have an issue the he... ... the iphone is good! but i have an issue the he...\n","... ... ... ...\n","165 It's Ok. ... It's Ok.\n","1181 you can download music, applications, comes un... ... you can download music, applications, comes un...\n","531 Personally, I would NEVER purchase any Apple h... ... Personally, I would NEVER purchase any Apple h...\n","1298 dynamic isn't working ... dynamic isn't working\n","769 The volume music cannot to stop when it's turn... ... The volume music cannot to stop when it's turn...\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"qFoT-s1MjTSS"},"source":["#7. Try training with different Embeddings"]},{"cell_type":"code","metadata":{"id":"nxWFzQOhjWC8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614564343535,"user_tz":-300,"elapsed":221806,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b97384ba-67f5-425e-d75f-e26804308b0b"},"source":["# We can use nlu.print_components(action='embed_sentence') to see every possibler sentence embedding we could use. Lets use bert!\n","nlu.print_components(action='embed_sentence')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('en.embed_sentence') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model tfhub_use\n","nlu.load('en.embed_sentence.use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model tfhub_use_lg\n","nlu.load('en.embed_sentence.albert') returns Spark NLP model albert_base_uncased\n","nlu.load('en.embed_sentence.electra') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model sent_electra_small_uncased\n","nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model sent_electra_base_uncased\n","nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model sent_electra_large_uncased\n","nlu.load('en.embed_sentence.bert') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model sent_bert_base_uncased\n","nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model sent_bert_base_cased\n","nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model sent_bert_large_uncased\n","nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model sent_bert_large_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model sent_biobert_pubmed_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model sent_biobert_pubmed_large_cased\n","nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model sent_biobert_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model sent_biobert_pubmed_pmc_base_cased\n","nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model sent_biobert_clinical_base_cased\n","nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model sent_biobert_discharge_base_cased\n","nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model sent_covidbert_large_uncased\n","nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model sent_small_bert_L2_128\n","nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model sent_small_bert_L4_128\n","nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model sent_small_bert_L6_128\n","nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model sent_small_bert_L8_128\n","nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model sent_small_bert_L10_128\n","nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model sent_small_bert_L12_128\n","nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model sent_small_bert_L2_256\n","nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model sent_small_bert_L4_256\n","nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model sent_small_bert_L6_256\n","nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model sent_small_bert_L8_256\n","nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model sent_small_bert_L10_256\n","nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model sent_small_bert_L12_256\n","nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model sent_small_bert_L2_512\n","nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model sent_small_bert_L4_512\n","nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model sent_small_bert_L6_512\n","nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model sent_small_bert_L8_512\n","nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model sent_small_bert_L10_512\n","nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model sent_small_bert_L12_512\n","nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model sent_small_bert_L2_768\n","nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model sent_small_bert_L4_768\n","nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model sent_small_bert_L6_768\n","nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model sent_small_bert_L8_768\n","nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model sent_small_bert_L10_768\n","nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model sent_small_bert_L12_768\n","For language NLU provides the following Models : \n","nlu.load('fi.embed_sentence') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model sent_bert_finnish_cased\n","nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model sent_bert_finnish_uncased\n","For language NLU provides the following Models : \n","nlu.load('xx.embed_sentence') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model sent_bert_multi_cased\n","nlu.load('xx.embed_sentence.labse') returns Spark NLP model labse\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IKK_Ii_gjJfF","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614565560400,"user_tz":-300,"elapsed":1438661,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"21cfec6b-08a8-4fa8-cf9b-659edbf33a77"},"source":["from sklearn.metrics import classification_report\n","trainable_pipe = nlu.load('en.embed_sentence.small_bert_L12_768 train.classifier')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['classifier_dl'].setMaxEpochs(90) \n","trainable_pipe['classifier_dl'].setLr(0.0005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","\n","#preds\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["sent_small_bert_L12_768 download started this may take some time.\n","Approximate size to download 392.9 MB\n","[OK!]\n"," precision recall f1-score support\n","\n"," average 0.73 0.66 0.69 392\n"," good 0.84 0.87 0.86 408\n"," poor 0.79 0.83 0.81 400\n","\n"," accuracy 0.79 1200\n"," macro avg 0.79 0.79 0.79 1200\n","weighted avg 0.79 0.79 0.79 1200\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"_1jxw3GnVGlI"},"source":["# 7.1 evaluate on Test Data"]},{"cell_type":"code","metadata":{"id":"Fxx4yNkNVGFl","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614565866281,"user_tz":-300,"elapsed":1744539,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f70f8379-66b2-4037-b0ac-209c1fd88b70"},"source":["preds = fitted_pipe.predict(test_df,output_level='document')\r\n","\r\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\r\n","preds.dropna(inplace=True)\r\n","print(classification_report(preds['y'], preds['category']))"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," average 0.70 0.66 0.68 108\n"," good 0.79 0.82 0.80 92\n"," poor 0.75 0.77 0.76 100\n","\n"," accuracy 0.74 300\n"," macro avg 0.74 0.75 0.75 300\n","weighted avg 0.74 0.74 0.74 300\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 8. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609622897186,"user_tz":-300,"elapsed":1562308,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a3175762-9ea0-472e-a8bf-0a64fd1176c9"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 9. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":111},"executionInfo":{"status":"ok","timestamp":1609622933158,"user_tz":-300,"elapsed":1598267,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a40c74fc-c2f1-4a58-ba4e-5d1e21e39da3"},"source":["hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It worked perfectly.')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
classifier_confidencedocumenten_embed_sentence_small_bert_L12_768_embeddingsclassifier
origin_index
00.950214It worked perfectly.[0.275971919298172, 0.4924655854701996, 0.2755...good
\n","
"],"text/plain":[" classifier_confidence ... classifier\n","origin_index ... \n","0 0.950214 ... good\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609622963569,"user_tz":-300,"elapsed":903,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"662a1dc1-b3fc-4137-b95a-8d7f38326fd5"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['average', 'poor', 'good']) | Info: get the tags used to trained this NerDLModel | Currently set to : ['average', 'poor', 'good']\n","pipe['classifier_dl'].setStorageRef('sent_small_bert_L12_768') | Info: unique reference name for identification | Currently set to : sent_small_bert_L12_768\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_apple_twitter.ipynb b/examples/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_apple_twitter.ipynb new file mode 100644 index 00000000..c989bc34 --- /dev/null +++ b/examples/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_apple_twitter.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_multi_lingual_training_sentiment_classifier_demo_apple_twitter.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"RIV-9vEqxTBB"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/multi_lingual_text_classification/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_apple_twitter.ipynb)\n","\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 class Apple Tweets Sentiment Classifier Training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","You can achieve these results or even better on this dataset with training data : \n","\n","
\n","\n","\n","![Screenshot 2021-02-25 192910.png]()\n","\n","You can achieve these results or even better on this dataset with test data :\n","\n","
\n","\n","![image.png]()"]},{"cell_type":"code","metadata":{"id":"05-mAOF6ol-0"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null \n","\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download appple twitter Sentiment dataset \n","https://www.kaggle.com/seriousran/appletwittersentimenttexts\n","\n","this dataset contains tweets made towards apple and today we are going to train our model to predict whether the tweet contains sentiment!\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1614262217245,"user_tz":-300,"elapsed":78342,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b467da65-51e5-4412-b9ec-a152dd99bd38"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/02/apple-twitter-sentiment-texts_multi_lingual.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-02-25 14:09:06-- http://ckl-it.de/wp-content/uploads/2021/02/apple-twitter-sentiment-texts_multi_lingual.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 48565 (47K) [text/csv]\n","Saving to: ‘apple-twitter-sentiment-texts_multi_lingual.csv’\n","\n","apple-twitter-senti 100%[===================>] 47.43K 159KB/s in 0.3s \n","\n","2021-02-25 14:09:07 (159 KB/s) - ‘apple-twitter-sentiment-texts_multi_lingual.csv’ saved [48565/48565]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":416},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1614262238347,"user_tz":-300,"elapsed":1238,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"0313826c-0c30-4d88-eb45-f5fefd7fa59d"},"source":["import pandas as pd\n","train_path = '/content/apple-twitter-sentiment-texts_multi_lingual.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","columns=['text','y']\n","train_df = train_df[columns]\n","train_df = train_df[~train_df[\"y\"].isin([\"neuteral\"])]\n","from sklearn.model_selection import train_test_split\n","train_df, test_df = train_test_split(train_df, test_size=0.2)\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
19Companies i admire : @3QDigital @vaynermedia @...positive
239waiting a week for a DUNS number is preventing...negative
64Thanks @Apple for the preview of your smartwat...positive
108@apple ur a disgracenegative
285Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...positive
.........
115@MhDaDon @Apple def gotta have it, I don't eve...positive
185Hey @apple are you even thinking about fixing ...negative
57Free s/o @apple for this nice iPadpositive
199@OneRepublic @Apple #AppleAddict ! The show in...positive
76NO @apple NO! When I make an I phone Album I W...negative
\n","

228 rows × 2 columns

\n","
"],"text/plain":[" text y\n","19 Companies i admire : @3QDigital @vaynermedia @... positive\n","239 waiting a week for a DUNS number is preventing... negative\n","64 Thanks @Apple for the preview of your smartwat... positive\n","108 @apple ur a disgrace negative\n","285 Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc... positive\n",".. ... ...\n","115 @MhDaDon @Apple def gotta have it, I don't eve... positive\n","185 Hey @apple are you even thinking about fixing ... negative\n","57 Free s/o @apple for this nice iPad positive\n","199 @OneRepublic @Apple #AppleAddict ! The show in... positive\n","76 NO @apple NO! When I make an I phone Album I W... negative\n","\n","[228 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":828},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1614262641584,"user_tz":-300,"elapsed":403989,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b2152731-4cf9-466a-b5cb-fc10b39507dc"},"source":["trainable_pipe = nlu.load('xx.embed_sentence.labse train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(60) \n","\n","trainable_pipe['sentiment_dl'].setLr(0.005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["labse download started this may take some time.\n","Approximate size to download 1.7 GB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.97 0.97 0.97 118\n"," positive 0.97 0.97 0.97 110\n","\n"," accuracy 0.97 228\n"," macro avg 0.97 0.97 0.97 228\n","weighted avg 0.97 0.97 0.97 228\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencetextdocumentyxx_embed_sentence_labse_embeddingssentiment
origin_index
190.999924Companies i admire : @3QDigital @vaynermedia @...Companies i admire : @3QDigital @vaynermedia @...positive[0.003697995562106371, -0.033728744834661484, ...positive
2390.999974waiting a week for a DUNS number is preventing...waiting a week for a DUNS number is preventing...negative[0.010905967094004154, 0.04764788970351219, 0....negative
640.999988Thanks @Apple for the preview of your smartwat...Thanks @Apple for the preview of your smartwat...positive[0.0010029257973656058, -0.0327875055372715, 0...positive
1080.999997@apple ur a disgrace@apple ur a disgracenegative[-0.03643779084086418, -0.035430122166872025, ...negative
2850.999737Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...Yeeaaayyy....awesome OS X Yosemite 10.10.1 roc...positive[-0.05403903126716614, -0.05731330066919327, -...positive
.....................
1150.999779@MhDaDon @Apple def gotta have it, I don't eve...@MhDaDon @Apple def gotta have it, I don't eve...positive[-0.055206991732120514, 0.02291109785437584, -...negative
1850.999997Hey @apple are you even thinking about fixing ...Hey @apple are you even thinking about fixing ...negative[0.012110532261431217, -0.026404496282339096, ...negative
570.999070Free s/o @apple for this nice iPadFree s/o @apple for this nice iPadpositive[-0.022697972133755684, -0.03592884913086891, ...positive
1990.999985@OneRepublic @Apple #AppleAddict ! The show in...@OneRepublic @Apple #AppleAddict ! The show in...positive[-0.033660534769296646, 0.007074450608342886, ...positive
760.999806NO @apple NO! When I make an I phone Album I W...NO @apple NO! When I make an I phone Album I W...negative[-0.05417351797223091, -0.013795379549264908, ...negative
\n","

228 rows × 6 columns

\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","19 0.999924 ... positive\n","239 0.999974 ... negative\n","64 0.999988 ... positive\n","108 0.999997 ... negative\n","285 0.999737 ... positive\n","... ... ... ...\n","115 0.999779 ... negative\n","185 0.999997 ... negative\n","57 0.999070 ... positive\n","199 0.999985 ... positive\n","76 0.999806 ... negative\n","\n","[228 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"_1jxw3GnVGlI"},"source":["# 3.1 evaluate on Test Data"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Fxx4yNkNVGFl","executionInfo":{"status":"ok","timestamp":1614262683228,"user_tz":-300,"elapsed":445190,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"0fec0148-00fe-45ab-97e1-75d050c20a1d"},"source":["preds = fitted_pipe.predict(test_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.79 0.88 0.83 25\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.93 0.79 0.85 33\n","\n"," accuracy 0.83 58\n"," macro avg 0.57 0.56 0.56 58\n","weighted avg 0.87 0.83 0.84 58\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"BD5OKO4Umc5U"},"source":["#4. Test Model on 20 languages!"]},{"cell_type":"code","metadata":{"id":"OQ72hP9unML7","colab":{"base_uri":"https://localhost:8080/","height":793},"executionInfo":{"status":"ok","timestamp":1614262761504,"user_tz":-300,"elapsed":523025,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8fe2964b-fb7c-4e69-9c9d-73de6913fe70"},"source":["import pandas as pd\n","\n","train_df = pd.read_csv('/content/apple-twitter-sentiment-texts_multi_lingual.csv')\n","columns=['test_sentences','y']\n","train_df = train_df[columns]\n","train_df = train_df[~train_df[\"y\"].isin([\"neuteral\"])]\n","train_df\n","preds = fitted_pipe.predict(train_df[[\"test_sentences\",\"y\"]].iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.94 0.94 0.94 47\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.96 0.94 0.95 53\n","\n"," accuracy 0.94 100\n"," macro avg 0.63 0.63 0.63 100\n","weighted avg 0.95 0.94 0.94 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencetextdocumentyxx_embed_sentence_labse_embeddingssentiment
origin_index
00.999736@Apple, du skal sortere dine telefoner.@Apple, du skal sortere dine telefoner.negative[-0.06157918646931648, -0.006414669565856457, ...negative
10.509814వావ్. యాల్ నీడా స్టెప్ అప్ @ యాపిల్ ఆర్టి y హే...వావ్. యాల్ నీడా స్టెప్ అప్ @ యాపిల్ ఆర్టి y హే...negative[-0.06188170239329338, -0.0713067352771759, -0...neutral
20.947591আমি আশ্চর্য হয়েছি যে গতকাল # এএপএল-তে ফ্ল্যাশ...আমি আশ্চর্য হয়েছি যে গতকাল # এএপএল-তে ফ্ল্যাশ...negative[-0.009524056687951088, -0.0228135883808136, -...negative
30.804160Uvědomili jsme si, že @apple vyrábí obrovské t...Uvědomili jsme si, že @apple vyrábí obrovské t...negative[-0.05080720782279968, 0.035508088767528534, 0...negative
40.999854Apple Inc.'s administrerende direktør donerer ...Apple Inc.'s administrerende direktør donerer ...positive[-0.04884449020028114, -0.026541609317064285, ...positive
.....................
950.994841Təşəkkür edirəm @Apple İndi bir yerdə ünsiyyət...Təşəkkür edirəm @Apple İndi bir yerdə ünsiyyət...positive[-0.030292551964521408, -0.047181397676467896,...positive
960.999003. @ tim_cook Die woede wanneer hulle @ Apple G.... @ tim_cook Die woede wanneer hulle @ Apple G...negative[0.013441476970911026, -0.053970836102962494, ...negative
970.871066ছদ্মবেশের ধরণটি হ'ল এই @ অ্যাপল @ অটোকোরেক্ট @...ছদ্মবেশের ধরণটি হ'ল এই @ অ্যাপল @ অটোকোরেক্ট @...negative[-0.02878599800169468, -0.06805533915758133, 0...negative
980.819139Было бы неплохо, если бы я мог набрать более о...Было бы неплохо, если бы я мог набрать более о...negative[0.0006763554411008954, -0.0028165297117084265...positive
990.957305@OneRepublic @Apple Вы все готовы. #ColoradoLo...@OneRepublic @Apple Вы все готовы. #ColoradoLo...positive[-0.006210440769791603, -0.053716398775577545,...positive
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.999736 ... negative\n","1 0.509814 ... neutral\n","2 0.947591 ... negative\n","3 0.804160 ... negative\n","4 0.999854 ... positive\n","... ... ... ...\n","95 0.994841 ... positive\n","96 0.999003 ... negative\n","97 0.871066 ... negative\n","98 0.819139 ... positive\n","99 0.957305 ... positive\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"RjtuNUcvuJTT"},"source":["# The Model understands Englsih\n","![en](https://www.worldometers.info/img/flags/small/tn_nz-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"o0vu7PaWkcI7","executionInfo":{"status":"ok","timestamp":1614262857568,"user_tz":-300,"elapsed":5089,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6e5fa717-8d0f-45ee-a4cf-e35298e31501"},"source":["\n","fitted_pipe.predict(\"I hate the newest update!! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.955099I hate the newest update!![-0.03925565630197525, -0.0203737560659647, -0...negative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.955099 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":43}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"1ykjRQhCtQ4w","executionInfo":{"status":"ok","timestamp":1614262860801,"user_tz":-300,"elapsed":8289,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c91d24bf-525d-4c61-94dd-619002e2b677"},"source":["fitted_pipe.predict(\"I love the newest update!! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.999108I love the newest update!![-0.03266981616616249, -0.03438195586204529, -...positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.999108 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":44}]},{"cell_type":"markdown","metadata":{"id":"vohym-XbuNHn"},"source":["# The Model understands German\n","![de](https://www.worldometers.info/img/flags/small/tn_gm-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"BbhgTSBGtTtJ","executionInfo":{"status":"ok","timestamp":1614262862167,"user_tz":-300,"elapsed":9623,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"9b1a9ca1-69d4-4bc1-d12b-055459617197"},"source":["# german for: 'I love the newest update!!'\n","fitted_pipe.predict(\"Ich liebe das neueste Update !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.999108Ich liebe das neueste Update !![-0.03806369751691818, -0.03677768632769585, -...positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.999108 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":45}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"ZaPYBxeL33pH","executionInfo":{"status":"ok","timestamp":1614268187940,"user_tz":-300,"elapsed":5536,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ea16253a-5d73-47b3-dff3-bf1f5b130858"},"source":["# german for: 'Apple is the worst company ever , I hate it !'\n","fitted_pipe.predict(\"Apple ist das schlechteste Unternehmen aller Zeiten, ich hasse es! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.999567Apple ist das schlechteste Unternehmen aller Z...[-0.06440500915050507, 0.011019396595656872, -...negative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.999567 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":78}]},{"cell_type":"markdown","metadata":{"id":"a1JbtmWquQwj"},"source":["# The Model understands Chinese\n","![zh](https://www.worldometers.info/img/flags/small/tn_ch-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"kYSYqtoRtc-P","executionInfo":{"status":"ok","timestamp":1614262865405,"user_tz":-300,"elapsed":12837,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b10b07fa-dc6a-4717-bf54-0ad5b3dd5a6b"},"source":["# Chinese for: 'I hate the newest update!!'\n","fitted_pipe.predict(\"我讨厌最新的更新! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.956440我讨厌最新的更新![-0.035996973514556885, -0.038595858961343765,...negative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.956440 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":46}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"06v9SD-QtlBU","executionInfo":{"status":"ok","timestamp":1614262866947,"user_tz":-300,"elapsed":14346,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"66c97cda-e25f-4e98-b532-3f4832e89fb7"},"source":["# Chinese for: 'I love the newest update!!'\n","fitted_pipe.predict(\"我喜欢最新的更新! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.999690我喜欢最新的更新![-0.03270617872476578, -0.03804901987314224, -...positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.999690 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":47}]},{"cell_type":"markdown","metadata":{"id":"9h7CvN4uu9Pb"},"source":["# Model understands Afrikaans\n","\n","![af](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)\n","\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"VMPhbgw9twtf","executionInfo":{"status":"ok","timestamp":1614262869932,"user_tz":-300,"elapsed":17309,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cc187ddf-8473-4b01-fae7-fea173ea62d3"},"source":["\t\t\n","# Afrikaans for: 'I hate the newest update!!'\n","fitted_pipe.predict(\"Ek haat die nuutste opdatering !! \")\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.973612Ek haat die nuutste opdatering !![-0.04320189356803894, -0.023186640813946724, ...negative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.973612 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":48}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"zWgNTIdkumhX","executionInfo":{"status":"ok","timestamp":1614262871645,"user_tz":-300,"elapsed":18996,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d54f1fa4-d68c-4a72-89d5-037ec59ef65e"},"source":["# Afrikaans for: 'I love the newest update!!'\n","fitted_pipe.predict(\"Ek is lief vir die nuutste opdatering !! \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.999405Ek is lief vir die nuutste opdatering !![-0.031754832714796066, -0.03518301621079445, ...positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.999405 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":49}]},{"cell_type":"markdown","metadata":{"id":"rSEPkC-Bwnpg"},"source":["# The model understands Vietnamese\n","![vi](https://www.worldometers.info/img/flags/small/tn_vm-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"M6giDPK-wm2G","executionInfo":{"status":"ok","timestamp":1614262874014,"user_tz":-300,"elapsed":21347,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"805186fb-5086-4e1d-8f77-a9792f5aba92"},"source":["\n","# Vietnamese for: 'I love the newest update!!'\n","fitted_pipe.predict(\"Tôi yêu bản cập nhật mới nhất !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.760163Tôi yêu bản cập nhật mới nhất !![-0.007813846692442894, -0.0691518485546112, -...positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.760163 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":50}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"benoJUN_4i3Q","executionInfo":{"status":"ok","timestamp":1614268507970,"user_tz":-300,"elapsed":4272,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"49226903-3a90-4203-edd9-1f9f757589c1"},"source":["\n","# Vietnamese for: 'Apple is the worst company ever , I hate it !'\n","fitted_pipe.predict(\"Apple là công ty tồi tệ nhất từ ​​trước đến nay, tôi ghét nó!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.999311Apple là công ty tồi tệ nhất từ ​​trước đến na...[-0.06147119030356407, 0.037075284868478775, 0...negative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.999311 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":83}]},{"cell_type":"markdown","metadata":{"id":"IlkmAaMoxTuy"},"source":["# The model understands Japanese\n","![ja](https://www.worldometers.info/img/flags/small/tn_ja-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"1IfJu3q8wwUt","executionInfo":{"status":"ok","timestamp":1614262876373,"user_tz":-300,"elapsed":23684,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"65e29fd1-2465-48f7-8413-b5ace9d0e4a0"},"source":["# Japanese for: 'I hate the newest update!!'\n","fitted_pipe.predict(\"私は最新のアップデートが嫌いです! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.969206私は最新のアップデートが嫌いです![-0.030076518654823303, -0.002390796784311533,...negative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.969206 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":51}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"h3k7_PFhxOve","executionInfo":{"status":"ok","timestamp":1614262879338,"user_tz":-300,"elapsed":26617,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4ed372ab-e712-4450-b92a-fc8493a6598d"},"source":["\t\t\n","# Japanese for: 'I love the newest update!!'\n","fitted_pipe.predict(\"私は最新のアップデートが大好きです! \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.999745私は最新のアップデートが大好きです![-0.015637995675206184, -0.022478939965367317,...positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.999745 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":52}]},{"cell_type":"markdown","metadata":{"id":"GITfT7FK0CGv"},"source":["# The model understands Zulu\n","![zu](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"DKnrkkXzzpd5","executionInfo":{"status":"ok","timestamp":1614262881655,"user_tz":-300,"elapsed":28907,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"dc7e42d0-5881-4341-fac7-81cbea6700d6"},"source":["# Zulu for: 'I love the newest update!!'\n","fitted_pipe.predict(\"Ngiyasithanda isibuyekezo esisha !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.998697Ngiyasithanda isibuyekezo esisha !![-0.018757890909910202, -0.031556978821754456,...positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.998697 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":53}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"HInPIW9A4rg2","executionInfo":{"status":"ok","timestamp":1614268528972,"user_tz":-300,"elapsed":4232,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3714fced-23cc-4147-aca1-d0942ea9e6ea"},"source":["# Zulu for: Apple is the worst company ever , I hate it !'\n","fitted_pipe.predict(\"I-Apple iyinkampani embi kunazo zonke ezake, ngiyayizonda!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.997667I-Apple iyinkampani embi kunazo zonke ezake, n...[-0.060404010117053986, 0.02596486359834671, 0...negative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.997667 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":84}]},{"cell_type":"markdown","metadata":{"id":"VGVvzl_30a0T"},"source":["# The Model understands Turkish\n","![tr](https://www.worldometers.info/img/flags/small/tn_tu-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"DRNnuEeQz2pd","executionInfo":{"status":"ok","timestamp":1614262883908,"user_tz":-300,"elapsed":31138,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"531debdc-2cad-4c29-aa11-cd42173463f0"},"source":["# Turkish for: 'I hate the newest update!!'\n","fitted_pipe.predict(\"En yeni güncellemekten nefret ediyorum !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.928913En yeni güncellemekten nefret ediyorum !![-0.006802689284086227, -0.03453183174133301, ...negative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.928913 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":54}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"aOSsiK6J0jWs","executionInfo":{"status":"ok","timestamp":1614262886213,"user_tz":-300,"elapsed":33425,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"512ad665-fd78-4e44-85e3-04f078f05c27"},"source":["# Turkish for: 'I love the newest update!!'\n","fitted_pipe.predict(\"En yeni güncellemeyi seviyorum !! \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.999000En yeni güncellemeyi seviyorum !![-0.013165177777409554, -0.04192955046892166, ...positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.999000 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":55}]},{"cell_type":"markdown","metadata":{"id":"803qL2gt0vlb"},"source":["# The Model understands Hebrew\n","![he](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"XQ5VCtxw0pc0","executionInfo":{"status":"ok","timestamp":1614262888433,"user_tz":-300,"elapsed":35622,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"669b8693-d37b-41fd-9dfb-460ce51183c6"},"source":["# Hebrew for: 'I hate the newest update!!'\n","fitted_pipe.predict(\"אני שונא את העדכון החדש ביותר! \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.866008אני שונא את העדכון החדש ביותר![-0.02679138444364071, -0.02492380701005459, 0...negative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.866008 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":56}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"9w2ZHfns05A4","executionInfo":{"status":"ok","timestamp":1614262890929,"user_tz":-300,"elapsed":38093,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b0bd51ca-1020-4941-cd93-a613dac85116"},"source":["# Hebrew for: 'I love the newest update!!'\n","fitted_pipe.predict(\"אני אוהב את העדכון החדש ביותר !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.999521אני אוהב את העדכון החדש ביותר !![-0.025626324117183685, -0.03748653084039688, ...positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.999521 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":57}]},{"cell_type":"markdown","metadata":{"id":"SDlpd33H1HIX"},"source":["# The Model understands Telugu\n","![te](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"-l-u6vrz1Obe","executionInfo":{"status":"ok","timestamp":1614262893235,"user_tz":-300,"elapsed":40381,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"26cd1766-952d-4dc5-e573-7bccdad52eee"},"source":["# Telugu for: 'I love the newest update!!'\n","fitted_pipe.predict(\"నేను సరికొత్త నవీకరణను ప్రేమిస్తున్నాను !! \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.999573నేను సరికొత్త నవీకరణను ప్రేమిస్తున్నాను !![-0.035736508667469025, -0.04187411814928055, ...positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.999573 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":58}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"uuR3Reqc5JbT","executionInfo":{"status":"ok","timestamp":1614268552287,"user_tz":-300,"elapsed":3384,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"36bc87f6-5a40-4252-cfdc-e865ad0a382a"},"source":["# Telugu for: 'Apple is the worst company ever , I hate it !'\n","fitted_pipe.predict(\" ఆపిల్ ఎప్పుడూ చెత్త సంస్థ, నేను దానిని ద్వేషిస్తున్నాను! \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.996478ఆపిల్ ఎప్పుడూ చెత్త సంస్థ, నేను దానిని ద్వేషిస...[-0.06062706932425499, 0.01440807431936264, -0...negative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.996478 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":85}]},{"cell_type":"markdown","metadata":{"id":"nziBUe8t1Zwn"},"source":["# Model understands Russian\n","![ru](https://www.worldometers.info/img/flags/small/tn_rs-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"Ckyjl3YQ1VFn","executionInfo":{"status":"ok","timestamp":1614262896010,"user_tz":-300,"elapsed":43137,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3dcb6bd3-c970-479d-ac01-db99353fceaa"},"source":["# Russian for: 'I hate the newest update!!'\n","fitted_pipe.predict(\"Я ненавижу новейшее обновление !! \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.965200Я ненавижу новейшее обновление !![-0.05251258239150047, -0.02137315832078457, -...negative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.965200 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":59}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"GIdWkfGv1gFz","executionInfo":{"status":"ok","timestamp":1614262897937,"user_tz":-300,"elapsed":45043,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"38db6b30-7903-4267-ecb1-64681b306104"},"source":["\n","\n","# Russian for: 'I love the newest update!!'\n","fitted_pipe.predict(\"Я люблю новейшее обновление !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.999347Я люблю новейшее обновление !![-0.04557504877448082, -0.0393301397562027, -0...positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.999347 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":60}]},{"cell_type":"markdown","metadata":{"id":"8R1j9mwz2Cm4"},"source":["# Model understands Urdu\n","![ur](https://www.worldometers.info/img/flags/small/tn_pk-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"j4zwvRV11pcG","executionInfo":{"status":"ok","timestamp":1614262900319,"user_tz":-300,"elapsed":47407,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"67f66556-f229-497a-b6d4-1c5082e98ca7"},"source":["# Urdu for: 'I hate the newest update!!'\n","fitted_pipe.predict(\"مجھے تازہ ترین اپ ڈیٹ سے نفرت ہے !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.983793مجھے تازہ ترین اپ ڈیٹ سے نفرت ہے !![-0.04204098507761955, -0.040164727717638016, ...negative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.983793 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":61}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"SxzTuK4b2UKV","executionInfo":{"status":"ok","timestamp":1614262902393,"user_tz":-300,"elapsed":49460,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"383c15c3-2d6c-4f30-e442-fb80eced9e16"},"source":["# Urdu for: 'I love the newest update!!'\n","fitted_pipe.predict(\"مجھے تازہ ترین تازہ کاری پسند ہے !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.997037مجھے تازہ ترین تازہ کاری پسند ہے !![-0.020344946533441544, -0.050028394907712936,...positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.997037 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":62}]},{"cell_type":"markdown","metadata":{"id":"RoNg-C3k1qcX"},"source":["# Model understands Hindi\n","![hi](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"QZ9RT5Wv1r1n","executionInfo":{"status":"ok","timestamp":1614262904782,"user_tz":-300,"elapsed":51830,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ee19971a-a5da-4008-e7ba-11975896e8bb"},"source":["# hindi for: 'I hate the newest update!!'\n","fitted_pipe.predict(\"मुझे नवीनतम अपडेट से नफरत है !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.974632मुझे नवीनतम अपडेट से नफरत है !![-0.045306481420993805, -0.03867834806442261, ...negative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.974632 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":63}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"quM-IL2i12-B","executionInfo":{"status":"ok","timestamp":1614262906872,"user_tz":-300,"elapsed":53901,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a8d3f11d-d520-4a86-b50c-6687d641c64f"},"source":["# hindi for: 'I love the newest update!!'\n","fitted_pipe.predict(\"मैं नवीनतम अद्यतन प्यार करता हूँ !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.998251मैं नवीनतम अद्यतन प्यार करता हूँ !![-0.03802282363176346, -0.04779476672410965, -...positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.998251 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":64}]},{"cell_type":"markdown","metadata":{"id":"R4ByHOZn35Lc"},"source":["# The model understands Tartar\n","![tt](https://www.worldometers.info/img/flags/small/tn_rs-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"2JrzusSQ18F5","executionInfo":{"status":"ok","timestamp":1614262909319,"user_tz":-300,"elapsed":56327,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5c507694-7ebd-454f-e304-6b58b2acec2b"},"source":["# Tartar for: 'I hate the newest update!!'\n","fitted_pipe.predict(\"Мин яңа яңартуны нәфрәт итәм !! \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.633860Мин яңа яңартуны нәфрәт итәм !![-0.03812285140156746, -0.03321801871061325, -...negative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.633860 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":65}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"J06Xm_Ln4AYu","executionInfo":{"status":"ok","timestamp":1614262911597,"user_tz":-300,"elapsed":58583,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"881a5e3e-7b15-4556-ec1a-1aed7da87591"},"source":["\n","# Tartar for: 'I love the newest update!!'\n","fitted_pipe.predict(\"Мин иң яңа яңартуны яратам !! \")\n","\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.999220Мин иң яңа яңартуны яратам !![-0.022450298070907593, -0.04027741029858589, ...positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.999220 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":66}]},{"cell_type":"markdown","metadata":{"id":"HKj5yWwwMplH"},"source":["# The Model understands French\n","![fr](https://www.worldometers.info/img/flags/small/tn_fr-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"CUHcJZfJMplL","executionInfo":{"status":"ok","timestamp":1614262914092,"user_tz":-300,"elapsed":61055,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"91211d0f-5513-4c88-b4d1-d584d7008de1"},"source":["# French for: 'I hate the newest update!!'\n","fitted_pipe.predict(\"Je déteste la nouvelle mise à jour !! \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.621008Je déteste la nouvelle mise à jour !![-0.0480484738945961, -0.019815556704998016, -...negative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.621008 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":67}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"57NY2XoTMplM","executionInfo":{"status":"ok","timestamp":1614262916485,"user_tz":-300,"elapsed":63427,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c06b7ae1-602f-4b8a-f516-f26d70401f09"},"source":["# French for: 'I love the newest update!!'\n","fitted_pipe.predict(\"J'adore la dernière mise à jour !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.998835J'adore la dernière mise à jour !![-0.03898211941123009, -0.04296712577342987, -...positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.998835 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":68}]},{"cell_type":"markdown","metadata":{"id":"jD2TBgT0Nq6F"},"source":["# The Model understands Thai\n","![th](https://www.worldometers.info/img/flags/small/tn_th-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"ICp_qoAhNq6Q","executionInfo":{"status":"ok","timestamp":1614262918943,"user_tz":-300,"elapsed":65867,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4966ef89-e747-458b-cf0b-a71fed8e330d"},"source":["# Thai for: 'I hate the newest update!!'\n","fitted_pipe.predict(\"ฉันเกลียดการอัปเดตใหม่ล่าสุด !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.989458ฉันเกลียดการอัปเดตใหม่ล่าสุด !![-0.05030808597803116, -0.03610168397426605, -...negative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.989458 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":69}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"gBp11S5GNq6S","executionInfo":{"status":"ok","timestamp":1614262921583,"user_tz":-300,"elapsed":68485,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5a1701eb-3ec0-4f29-bb77-d96a6f3e2e57"},"source":["# Thai for: 'I love the newest update!!'\n","fitted_pipe.predict(\"โดนใจอัพเดทใหม่ล่าสุด !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.996418โดนใจอัพเดทใหม่ล่าสุด !![-0.046895794570446014, -0.047694914042949677,...positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.996418 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":70}]},{"cell_type":"markdown","metadata":{"id":"mLItI4KZOElB"},"source":["# The Model understands Khmer\n","![km](https://www.worldometers.info/img/flags/small/tn_cb-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"Fxh1gasROElC","executionInfo":{"status":"ok","timestamp":1614262924873,"user_tz":-300,"elapsed":71754,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"928d3123-3c2f-4170-9df2-8daa0f4d8c8a"},"source":["# Khmer for: 'I hate the newest update!!'\n","fitted_pipe.predict(\"ខ្ញុំស្អប់ការអាប់ដេតថ្មីបំផុត !! \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.945463ខ្ញុំស្អប់ការអាប់ដេតថ្មីបំផុត !![-0.04094553366303444, -0.04082178324460983, -...negative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.945463 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":71}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"SWbqMgAwOElC","executionInfo":{"status":"ok","timestamp":1614262927582,"user_tz":-300,"elapsed":74439,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"485684e6-12ec-4bcb-864e-d659e229701a"},"source":["# Khmer for: 'I love the newest update!!'\n","fitted_pipe.predict(\"ខ្ញុំចូលចិត្តការអាប់ដេតថ្មីបំផុត !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.999485ខ្ញុំចូលចិត្តការអាប់ដេតថ្មីបំផុត !![-0.0357854850590229, -0.035129521042108536, -...positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.999485 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":72}]},{"cell_type":"markdown","metadata":{"id":"lvE-LbNiPoBT"},"source":["# The Model understands Yiddish\n","![yi](https://www.worldometers.info/img/flags/small/tn_pl-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"5h-pha_nPoBc","executionInfo":{"status":"ok","timestamp":1614262930370,"user_tz":-300,"elapsed":77204,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f877466c-16d3-408b-dd6e-b74cdbfe6921"},"source":["\n","# Yiddish for: 'I love the newest update!!'\n","fitted_pipe.predict(\"איך ליבע דער נואַסט דערהייַנטיקן !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.999170איך ליבע דער נואַסט דערהייַנטיקן !![-0.030090758576989174, -0.060998495668172836,...positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.999170 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":73}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"YcM5k6Ce5Vpo","executionInfo":{"status":"ok","timestamp":1614268459462,"user_tz":-300,"elapsed":4685,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6040c044-a368-40c1-e2c0-7b3c40e167a1"},"source":["\n","# Yiddish for: 'Apple is the worst company ever , I hate it !'\n","fitted_pipe.predict(\"עפּל איז די ערגסט פירמע טאָמיד, איך האַס עס!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.972911עפּל איז די ערגסט פירמע טאָמיד, איך האַס עס![-0.04640829935669899, 0.007680136244744062, 0...negative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.972911 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":82}]},{"cell_type":"markdown","metadata":{"id":"XSz4WzScaAHj"},"source":["# The Model understands Kygrgyz\n","![ky](https://www.worldometers.info/img/flags/small/tn_kg-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"DXz6fhJSaAHu","executionInfo":{"status":"ok","timestamp":1614262932759,"user_tz":-300,"elapsed":79574,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"883df516-eb8a-4ddc-ecec-7d74ce580570"},"source":["\t\t\n","# Kygrgyz for: 'I hate the newest update!!'\n","fitted_pipe.predict(\"Мен жаңы жаңыртууну жек көрөм !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.945150Мен жаңы жаңыртууну жек көрөм !![-0.040810152888298035, -0.03718705102801323, ...negative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.945150 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":74}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"lh_ZSHlPaAHv","executionInfo":{"status":"ok","timestamp":1614262935091,"user_tz":-300,"elapsed":81887,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5fab48e2-9d27-4e57-fb81-3d319fca6c3b"},"source":["\t\t\n","# Kygrgyz for: 'I love the newest update!!'\n","fitted_pipe.predict(\"Мен жаңы жаңыртууну жакшы көрөм !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.998985Мен жаңы жаңыртууну жакшы көрөм !![-0.029403185471892357, -0.0417410284280777, -...positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.998985 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":75}]},{"cell_type":"markdown","metadata":{"id":"DGMVMKaTdJFj"},"source":["# The Model understands Tamil\n","![ta](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"JWDr_LoCdJFn","executionInfo":{"status":"ok","timestamp":1614262937303,"user_tz":-300,"elapsed":84045,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8a30bd66-2143-45da-de42-43d7086182c5"},"source":["\t\t\n","# Tamil for: 'I hate the newest update!!'\n","fitted_pipe.predict(\"நான் புதிய புதுப்பிப்பை வெறுக்கிறேன் !! \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.678744நான் புதிய புதுப்பிப்பை வெறுக்கிறேன் !![-0.04580854997038841, -0.03527894988656044, -...negative
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.678744 ... negative\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":76}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":110},"id":"Q6C0BmTtdJFp","executionInfo":{"status":"ok","timestamp":1614262940088,"user_tz":-300,"elapsed":85956,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"dee474ab-3613-4369-9762-298f30a7542a"},"source":["\n","# Tamil for: 'I love the newest update!!'\n","fitted_pipe.predict(\"நான் புதிய புதுப்பிப்பை விரும்புகிறேன் !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencedocumentxx_embed_sentence_labse_embeddingssentiment
origin_index
00.995342நான் புதிய புதுப்பிப்பை விரும்புகிறேன் !![-0.03591679036617279, -0.05217977613210678, -...positive
\n","
"],"text/plain":[" sentiment_confidence ... sentiment\n","origin_index ... \n","0 0.995342 ... positive\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":77}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"bZZpObLOtqo8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1613856549588,"user_tz":-300,"elapsed":774877,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2aa6cc83-35cc-4ee3-aca6-7766cf3f3eac"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":105},"executionInfo":{"status":"ok","timestamp":1613856853936,"user_tz":-300,"elapsed":108557,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"35add93b-9422-4018-f8a3-e8fb0908c252"},"source":["stored_model_path = './models/classifier_dl_trained' \n","hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('I hate the newest update')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
documentsentimentsentiment_confidencexx_embed_sentence_labse_embeddings
origin_index
0I hate the newest updatenegative0.998501[-0.04603004455566406, 0.032716553658246994, 0...
\n","
"],"text/plain":[" document ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 I hate the newest update ... [-0.04603004455566406, 0.032716553658246994, 0...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1613856853939,"user_tz":-300,"elapsed":108546,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3f881695-6cf7-412c-e8a7-583febe8c9f0"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('labse') | Info: unique reference name for identification | Currently set to : labse\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this SentimentDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('labse') | Info: unique reference name for identification | Currently set to : labse\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_covid_19.ipynb b/examples/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_covid_19.ipynb new file mode 100644 index 00000000..33f635b7 --- /dev/null +++ b/examples/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_covid_19.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_multi_lingual_training_sentiment_classifier_demo_covid_19.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/multi_lingual_text_classification/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_covid_19.ipynb)\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 Class COVID19 Sentiment Classifier Training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","You can achieve these results or even better on this dataset with training data : \n","\n","
\n","\n","![image.png]()\n","\n","\n","You can achieve these results or even better on this dataset with test data : \n","\n","
\n","\n","![image.png]()"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614262357345,"user_tz":-300,"elapsed":69804,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a7e6eadb-f695-44df-b198-b9d8b80caa50"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java|\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install pyspark==2.4.7 \n","! pip install nlu > /dev/null \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Collecting pyspark==2.4.7\n"," Using cached https://files.pythonhosted.org/packages/e2/06/29f80e5a464033432eedf89924e7aa6ebbc47ce4dcd956853a73627f2c07/pyspark-2.4.7.tar.gz\n","Collecting py4j==0.10.7\n"," Using cached https://files.pythonhosted.org/packages/e3/53/c737818eb9a7dc32a7cd4f1396e787bd94200c3997c72c1dbe028587bd76/py4j-0.10.7-py2.py3-none-any.whl\n","Building wheels for collected packages: pyspark\n"," Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for pyspark: filename=pyspark-2.4.7-py2.py3-none-any.whl size=218279465 sha256=e3a3a2ae95f9fd10debe0af25e59e0dd814994c213a6bb2caeeb5aca138987eb\n"," Stored in directory: /root/.cache/pip/wheels/34/1f/2e/1e7460f80acf26b08dbb8c53d7ff9e07146f2a68dd5c732be5\n","Successfully built pyspark\n","Installing collected packages: py4j, pyspark\n","Successfully installed py4j-0.10.7 pyspark-2.4.7\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Coivd19 NLP Text Sentiemnt Classifcation dataset \n","https://www.kaggle.com/datatattle/covid-19-nlp-text-classification\n","#Context\n","\n","This is a Dataset made of tweets about coivid 19 "]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1614264195966,"user_tz":-300,"elapsed":67,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3d8a4cec-3478-4c75-e895-ef86c6818a5e"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/02/Corona_NLP_train_multi_lingual.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-02-25 14:42:05-- http://ckl-it.de/wp-content/uploads/2021/02/Corona_NLP_train_multi_lingual.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 838005 (818K) [text/csv]\n","Saving to: ‘Corona_NLP_train_multi_lingual.csv.1’\n","\n","Corona_NLP_train_mu 100%[===================>] 818.36K --.-KB/s in 0.1s \n","\n","2021-02-25 14:42:05 (7.33 MB/s) - ‘Corona_NLP_train_multi_lingual.csv.1’ saved [838005/838005]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":399},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1614262357357,"user_tz":-300,"elapsed":40963,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5afff45a-cff9-4336-d97d-86584b35be44"},"source":["import pandas as pd\n","train_path = '/content/Corona_NLP_train_multi_lingual.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","columns=['text','y']\n","train_df = train_df[columns]\n","from sklearn.model_selection import train_test_split\n","train_df, test_df = train_test_split(train_df, test_size=0.2)\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
706Since many of us are not able to continue our ...positive
240Slum Areas of metro cities where people are de...positive
493#nofilter I don't care what you're offering - ...positive
776Great interview with a USMC Lt. General who's ...positive
538US sees panic buying of weapons with huge line...negative
.........
976Things have gone from bad to worse in the UK. ...negative
327My mother-in-law went to the supermarket this ...negative
440Who would have thought that something so basic...positive
822Very impressed with Village supermarket they h...positive
517credit card tapping, new technologies like Dys...positive
\n","

1200 rows × 2 columns

\n","
"],"text/plain":[" text y\n","706 Since many of us are not able to continue our ... positive\n","240 Slum Areas of metro cities where people are de... positive\n","493 #nofilter I don't care what you're offering - ... positive\n","776 Great interview with a USMC Lt. General who's ... positive\n","538 US sees panic buying of weapons with huge line... negative\n",".. ... ...\n","976 Things have gone from bad to worse in the UK. ... negative\n","327 My mother-in-law went to the supermarket this ... negative\n","440 Who would have thought that something so basic... positive\n","822 Very impressed with Village supermarket they h... positive\n","517 credit card tapping, new technologies like Dys... positive\n","\n","[1200 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":811},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1614265349806,"user_tz":-300,"elapsed":766927,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5aa296f7-62d9-453f-8ee0-61544cbe5285"},"source":["trainable_pipe = nlu.load('xx.embed_sentence.labse train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(60) \n","trainable_pipe['sentiment_dl'].setLr(0.005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["labse download started this may take some time.\n","Approximate size to download 1.7 GB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.96 0.97 0.96 602\n"," positive 0.97 0.96 0.96 598\n","\n"," accuracy 0.96 1200\n"," macro avg 0.96 0.96 0.96 1200\n","weighted avg 0.96 0.96 0.96 1200\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textxx_embed_sentence_labse_embeddingsysentimentdocumentsentiment_confidence
origin_index
387In the wake of COVID-19, we are working with a...[-0.016477668657898903, -0.01123256329447031, ...positivepositiveIn the wake of COVID-19, we are working with a...0.999895
581My first job was a stock boy at a grocery stor...[0.001647047232836485, 0.02078898437321186, -0...positivepositiveMy first job was a stock boy at a grocery stor...0.999992
559#Shipping outlook cut to ‘negative’ as #coro...[-0.037411317229270935, -0.04748864844441414, ...negativenegative#Shipping outlook cut to ‘negative’ as #coro...0.999918
1063@BBCNews Will never like these as more exposur...[-0.005866150837391615, 0.030684819445014, -0....negativenegative@BBCNews Will never like these as more exposur...0.999972
319Reviewing national reporting, I am now aware o...[-0.061537005007267, -0.023719770833849907, -0...negativenegativeReviewing national reporting, I am now aware o...1.000000
.....................
348Real Heroes in Covid 19 - Supermarket staff, #...[-0.002750938991084695, -0.015061184763908386,...positivepositiveReal Heroes in Covid 19 - Supermarket staff, #...0.999999
169Given the increasingly urgent emphasis on #Soc...[0.023976236581802368, -0.03232092410326004, 0...positivepositiveGiven the increasingly urgent emphasis on #Soc...0.999937
1435This government doesn't need anything research...[0.023141534999012947, -0.006682656239718199, ...positivepositiveThis government doesn't need anything research...0.993930
729@nuaansw has produced this COVID-19 Harm Reduc...[-0.022102229297161102, -0.05045003071427345, ...negativenegative@nuaansw has produced this COVID-19 Harm Reduc...0.999574
252After working for 48 hours tearful NHS nurse u...[-0.020220007747411728, 0.012734449468553066, ...negativenegativeAfter working for 48 hours tearful NHS nurse u...0.999954
\n","

1200 rows × 6 columns

\n","
"],"text/plain":[" text ... sentiment_confidence\n","origin_index ... \n","387 In the wake of COVID-19, we are working with a... ... 0.999895\n","581 My first job was a stock boy at a grocery stor... ... 0.999992\n","559 #Shipping outlook cut to ‘negative’ as #coro... ... 0.999918\n","1063 @BBCNews Will never like these as more exposur... ... 0.999972\n","319 Reviewing national reporting, I am now aware o... ... 1.000000\n","... ... ... ...\n","348 Real Heroes in Covid 19 - Supermarket staff, #... ... 0.999999\n","169 Given the increasingly urgent emphasis on #Soc... ... 0.999937\n","1435 This government doesn't need anything research... ... 0.993930\n","729 @nuaansw has produced this COVID-19 Harm Reduc... ... 0.999574\n","252 After working for 48 hours tearful NHS nurse u... ... 0.999954\n","\n","[1200 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":45}]},{"cell_type":"markdown","metadata":{"id":"_1jxw3GnVGlI"},"source":["# 3.1 evaluate on Test Data"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Fxx4yNkNVGFl","executionInfo":{"status":"ok","timestamp":1614265643566,"user_tz":-300,"elapsed":293794,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4757bb5e-02e6-4978-b52c-a8245ae2dd9c"},"source":["preds = fitted_pipe.predict(test_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.85 0.92 0.88 143\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.92 0.84 0.88 157\n","\n"," accuracy 0.88 300\n"," macro avg 0.59 0.59 0.59 300\n","weighted avg 0.89 0.88 0.88 300\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"BD5OKO4Umc5U"},"source":["# 4. Test Model on 20 languages!"]},{"cell_type":"code","metadata":{"id":"OQ72hP9unML7","colab":{"base_uri":"https://localhost:8080/","height":792},"executionInfo":{"status":"ok","timestamp":1614265758435,"user_tz":-300,"elapsed":114880,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f6d3595b-8abf-47a4-8d65-c002056ecfe0"},"source":["train_df = pd.read_csv(\"/content/Corona_NLP_train_multi_lingual.csv\")\n","preds = fitted_pipe.predict(train_df[[\"test_sentences\",\"y\"]].iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.89 0.93 0.91 44\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.96 0.89 0.93 56\n","\n"," accuracy 0.91 100\n"," macro avg 0.62 0.61 0.61 100\n","weighted avg 0.93 0.91 0.92 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textxx_embed_sentence_labse_embeddingsysentimentdocumentsentiment_confidence
origin_index
0#Cheerios Hersteller @GeneralMills hat am Mitt...[0.009235074743628502, -0.05358916148543358, 0...positivepositive#Cheerios Hersteller @GeneralMills hat am Mitt...0.999981
1ต้องไปเยี่ยมชมเมื่อเช้านี้ซึ่งพวกเขาได้เปลี่ยน...[-0.0758102685213089, 0.023602211847901344, -0...positivepositiveต้องไปเยี่ยมชมเมื่อเช้านี้ซึ่งพวกเขาได้เปลี่ยน...0.999974
2मुझे पहली बार Covid-19 के लक्षण होने के कारण ए...[0.0010586004937067628, 0.014147285372018814, ...negativenegativeमुझे पहली बार Covid-19 के लक्षण होने के कारण ए...0.999977
3公元前2周的封锁期间更有可能死于饥饿,由于恐慌的买家,我们没有食物了。 #新冠病毒[-0.012551132589578629, 0.034000080078840256, ...negativenegative公元前2周的封锁期间更有可能死于饥饿,由于恐慌的买家,我们没有食物了。 #新冠病毒1.000000
4Don't move around unnecessary \\r\\r\\r\\nStay at...[-0.026801805943250656, -0.0068460507318377495...positivepositiveDon't move around unnecessary Stay at home. Us...0.998435
.....................
95Киберкылмышты жөнөкөйлөштүрүү максатында COVID...[-0.016219811514019966, -0.07136885076761246, ...negativenegativeКиберкылмышты жөнөкөйлөштүрүү максатында COVID...1.000000
96Ich bin alles dafür, die Kurve zu glätten und ...[-0.054645996540784836, -0.028411071747541428,...negativenegativeIch bin alles dafür, die Kurve zu glätten und ...1.000000
97איר זוכט צו רעדן צו עמעצער וואָס אַרבעט אין אַ...[-0.053356610238552094, -0.020032551139593124,...positivepositiveאיר זוכט צו רעדן צו עמעצער וואָס אַרבעט אין אַ...0.999996
98Comment les bureaux de poste gèrent le boom de...[-0.01647106371819973, -0.004591759294271469, ...positivepositiveComment les bureaux de poste gèrent le boom de...0.999818
99Acabo de ver un artículo que dice que las pers...[-0.030190108343958855, 0.0009936955757439137,...positivepositiveAcabo de ver un artículo que dice que las pers...0.999787
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" text ... sentiment_confidence\n","origin_index ... \n","0 #Cheerios Hersteller @GeneralMills hat am Mitt... ... 0.999981\n","1 ต้องไปเยี่ยมชมเมื่อเช้านี้ซึ่งพวกเขาได้เปลี่ยน... ... 0.999974\n","2 मुझे पहली बार Covid-19 के लक्षण होने के कारण ए... ... 0.999977\n","3 公元前2周的封锁期间更有可能死于饥饿,由于恐慌的买家,我们没有食物了。 #新冠病毒 ... 1.000000\n","4 Don't move around unnecessary \\r\\r\\r\\nStay at... ... 0.998435\n","... ... ... ...\n","95 Киберкылмышты жөнөкөйлөштүрүү максатында COVID... ... 1.000000\n","96 Ich bin alles dafür, die Kurve zu glätten und ... ... 1.000000\n","97 איר זוכט צו רעדן צו עמעצער וואָס אַרבעט אין אַ... ... 0.999996\n","98 Comment les bureaux de poste gèrent le boom de... ... 0.999818\n","99 Acabo de ver un artículo que dice que las pers... ... 0.999787\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":47}]},{"cell_type":"markdown","metadata":{"id":"RjtuNUcvuJTT"},"source":["# The Model understands Englsih\n","![en](https://www.worldometers.info/img/flags/small/tn_nz-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"o0vu7PaWkcI7","executionInfo":{"status":"ok","timestamp":1614265760808,"user_tz":-300,"elapsed":2399,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"38a60b34-c232-4952-fc72-9ac86197fa75"},"source":["\n","fitted_pipe.predict(\"5000 people died because of COVID 19 Today!! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.025880252942442894, -0.031868163496255875,...negative5000 people died because of COVID 19 Today!!1.000000
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.025880252942442894, -0.031868163496255875,... ... 1.000000\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":48}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"1ykjRQhCtQ4w","executionInfo":{"status":"ok","timestamp":1614265763275,"user_tz":-300,"elapsed":2480,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"42392194-8933-4852-de5e-2704e16911e1"},"source":["fitted_pipe.predict(\"We finally found a cure to COVID!! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.016816433519124985, -0.048802293837070465,...positiveWe finally found a cure to COVID!!0.999656
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.016816433519124985, -0.048802293837070465,... ... 0.999656\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":49}]},{"cell_type":"markdown","metadata":{"id":"vohym-XbuNHn"},"source":["# The Model understands German\n","![de](https://www.worldometers.info/img/flags/small/tn_gm-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"dzaaZrI4tVWc","executionInfo":{"status":"ok","timestamp":1614265765582,"user_tz":-300,"elapsed":2319,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"13aae876-e205-4190-f4d2-67cf57f8a909"},"source":["# german for: '5000 people died because of COVID19 Today!!'\n","fitted_pipe.predict(\"5000 Menschen starben heute an COVID 19 !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.026059169322252274, -0.023131389170885086,...negative5000 Menschen starben heute an COVID 19 !!1.000000
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.026059169322252274, -0.023131389170885086,... ... 1.000000\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":50}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"BbhgTSBGtTtJ","executionInfo":{"status":"ok","timestamp":1614265768102,"user_tz":-300,"elapsed":2533,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7e812a0b-c9c5-4ff1-9916-4887821ec8ee"},"source":["# german for: 'We finally found a cure to COVID!!'\n","fitted_pipe.predict(\"Wir haben endlich ein Heilmittel gegen COVID gefunden !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.019082775339484215, -0.04215678572654724, ...positiveWir haben endlich ein Heilmittel gegen COVID g...0.999515
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.019082775339484215, -0.04215678572654724, ... ... 0.999515\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":51}]},{"cell_type":"markdown","metadata":{"id":"a1JbtmWquQwj"},"source":["# The Model understands Chinese\n","![zh](https://www.worldometers.info/img/flags/small/tn_ch-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"kYSYqtoRtc-P","executionInfo":{"status":"ok","timestamp":1614265770629,"user_tz":-300,"elapsed":2541,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2e067e09-358a-4139-f379-45c12392394c"},"source":["# Chinese for: '5000 people died because of COVID19 Today!!'\n","fitted_pipe.predict(\"5000人因今天的Covid 19人死了! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.008618051186203957, -0.01350542064756155, ...negative5000人因今天的Covid 19人死了!1.000000
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.008618051186203957, -0.01350542064756155, ... ... 1.000000\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":52}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"06v9SD-QtlBU","executionInfo":{"status":"ok","timestamp":1614265773351,"user_tz":-300,"elapsed":2732,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"fad0fe6b-18f0-49aa-dc36-e7e06219f214"},"source":["# Chinese for: 'We finally found a cure to COVID!!'\n","fitted_pipe.predict(\"我们终于找到了治愈COVID的方法! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.01916874758899212, -0.06377226114273071, -...positive我们终于找到了治愈COVID的方法!0.998607
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.01916874758899212, -0.06377226114273071, -... ... 0.998607\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":53}]},{"cell_type":"markdown","metadata":{"id":"9h7CvN4uu9Pb"},"source":["# Model understands Afrikaans\n","\n","![af](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)\n","\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"VMPhbgw9twtf","executionInfo":{"status":"ok","timestamp":1614265776019,"user_tz":-300,"elapsed":2686,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c8b43f59-8d0b-4afc-9fc9-d526d62e7835"},"source":["# Afrikaans for: '5000 people died because of COVID19 Today!!'\n","fitted_pipe.predict(\"5000 mense is dood weens COVID 19 Vandag !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.02801789715886116, -0.04049520194530487, -...negative5000 mense is dood weens COVID 19 Vandag !!1.000000
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.02801789715886116, -0.04049520194530487, -... ... 1.000000\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":54}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"zWgNTIdkumhX","executionInfo":{"status":"ok","timestamp":1614265778161,"user_tz":-300,"elapsed":2158,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5acd6b80-3b97-4cd9-d9fe-8102d3f8e30c"},"source":["# Afrikaans for: 'We finally found a cure to COVID!!'\n","fitted_pipe.predict(\"Ons het uiteindelik 'n geneesmiddel gevind om te covid !! \")\n","\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.03133018687367439, -0.03976881504058838, -...positiveOns het uiteindelik 'n geneesmiddel gevind om ...0.999834
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.03133018687367439, -0.03976881504058838, -... ... 0.999834\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":55}]},{"cell_type":"markdown","metadata":{"id":"rSEPkC-Bwnpg"},"source":["# The model understands Vietnamese\n","![vi](https://www.worldometers.info/img/flags/small/tn_vm-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"7ksJosuTOYpE","executionInfo":{"status":"ok","timestamp":1614265780694,"user_tz":-300,"elapsed":2546,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cd28f2ac-ee0a-4eec-f1fe-a24df5be7dd6"},"source":["# Vietnamese for: 'We finally found a cure to COVID!!'\n","fitted_pipe.predict(\"Cuối cùng chúng tôi đã tìm ra cách chữa khỏi COVID !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.011992470361292362, -0.08133696019649506, ...positiveCuối cùng chúng tôi đã tìm ra cách chữa khỏi C...0.921375
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.011992470361292362, -0.08133696019649506, ... ... 0.921375\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":56}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"VfG3UaCTEZB_","executionInfo":{"status":"ok","timestamp":1614271551252,"user_tz":-300,"elapsed":5888,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d2986221-dc09-4775-fdda-29df1c00bbf8"},"source":["# Vietnamese for: 'Many people lost their lives to the virus'\n","fitted_pipe.predict(\"Nhiều người đã mất mạng vì virus\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[0.057947225868701935, -0.03248953819274902, -...negativeNhiều người đã mất mạng vì virus0.999957
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [0.057947225868701935, -0.03248953819274902, -... ... 0.999957\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":85}]},{"cell_type":"markdown","metadata":{"id":"IlkmAaMoxTuy"},"source":["# The model understands Japanese\n","![ja](https://www.worldometers.info/img/flags/small/tn_ja-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"1IfJu3q8wwUt","executionInfo":{"status":"ok","timestamp":1614265784023,"user_tz":-300,"elapsed":3342,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"abd4a1d7-8192-4ebc-af01-e3503c8d3f61"},"source":["# Japanese for: '5000 people died because of COVID19 Today!!'\n","fitted_pipe.predict(\"今日はCOVID19で5000人が亡くなりました!! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.027568351477384567, -0.046357180923223495,...negative今日はCOVID19で5000人が亡くなりました!!0.999995
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.027568351477384567, -0.046357180923223495,... ... 0.999995\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":57}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":123},"id":"-RjXWbFIPvIs","executionInfo":{"status":"ok","timestamp":1614274380023,"user_tz":-300,"elapsed":5620,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a9c7496d-7710-4e96-c497-76fe532221da"},"source":["# Japanese for: 'We are finally free from the virus !! We can now live our lives the way we want !'\n","fitted_pipe.predict(\"ついにウイルスから解放されました!!私たちは今、私たちが望むように私たちの生活を送ることができます!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[0.040638964623212814, -0.03328423202037811, -...positiveついにウイルスから解放されました!!私たちは今、私たちが望むように私たちの生活を送ることがで...0.998569
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [0.040638964623212814, -0.03328423202037811, -... ... 0.998569\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":86}]},{"cell_type":"markdown","metadata":{"id":"GITfT7FK0CGv"},"source":["# The model understands Zulu\n","![zu](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"ifRhs6e7OcR3","executionInfo":{"status":"ok","timestamp":1614265785622,"user_tz":-300,"elapsed":1618,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"00f924b3-415d-4021-cff9-d810007b5148"},"source":["# Zulu for: 'We finally found a cure to COVID!!'\n","fitted_pipe.predict(\"Ekugcineni sathola ikhambi le-Covion !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.028748247772455215, -0.05506820231676102, ...positiveEkugcineni sathola ikhambi le-Covion !!0.990476
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.028748247772455215, -0.05506820231676102, ... ... 0.990476\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":58}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"6uelDwq4xdWv","executionInfo":{"status":"ok","timestamp":1614265789157,"user_tz":-300,"elapsed":3546,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"030328e3-d339-4df7-dc69-9487327a12c5"},"source":["# Zulu for: '5000 people died because of COVID19 Today!!'\n","fitted_pipe.predict(\"Kushone abantu abayi-5000 ngenxa ye-COVID 19 Namuhla !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.028865862637758255, -0.025487471371889114,...negativeKushone abantu abayi-5000 ngenxa ye-COVID 19 N...1.000000
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.028865862637758255, -0.025487471371889114,... ... 1.000000\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":59}]},{"cell_type":"markdown","metadata":{"id":"VGVvzl_30a0T"},"source":["# The Model understands Turkish\n","![tr](https://www.worldometers.info/img/flags/small/tn_tu-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"DRNnuEeQz2pd","executionInfo":{"status":"ok","timestamp":1614265790122,"user_tz":-300,"elapsed":971,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"11aa06ef-9718-40e0-ecfc-d8ef499c3a09"},"source":["# Turkish for: '5000 people died because of COVID19 Today!!'\n","fitted_pipe.predict(\"Bugün Covid 19 yüzünden 5000 kişi öldü !! \")\n","\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.019384166225790977, -0.01711968705058098, ...negativeBugün Covid 19 yüzünden 5000 kişi öldü !!0.999975
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.019384166225790977, -0.01711968705058098, ... ... 0.999975\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":60}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"aOSsiK6J0jWs","executionInfo":{"status":"ok","timestamp":1614265793803,"user_tz":-300,"elapsed":3696,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f150e53d-abf7-4e40-9989-9985840e5a4e"},"source":["# Turkish for: 'We finally found a cure to COVID!!'\n","fitted_pipe.predict(\"Sonunda COVID'e bir tedavi bulduk !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.02832956053316593, -0.05583049729466438, -...positiveSonunda COVID'e bir tedavi bulduk !!0.998565
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.02832956053316593, -0.05583049729466438, -... ... 0.998565\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":61}]},{"cell_type":"markdown","metadata":{"id":"803qL2gt0vlb"},"source":["# The Model understands Hebrew\n","![he](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"XQ5VCtxw0pc0","executionInfo":{"status":"ok","timestamp":1614265795700,"user_tz":-300,"elapsed":1904,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f9768126-f481-47f3-ab96-da1e9a319751"},"source":["# Hebrew for: '5000 people died because of COVID19 Today!!'\n","fitted_pipe.predict(\"Bugün Covid 19 yüzünden 5000 kişi öldü !! \")\n","\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.019384166225790977, -0.01711968705058098, ...negativeBugün Covid 19 yüzünden 5000 kişi öldü !!0.999975
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.019384166225790977, -0.01711968705058098, ... ... 0.999975\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":62}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"9w2ZHfns05A4","executionInfo":{"status":"ok","timestamp":1614265797387,"user_tz":-300,"elapsed":1693,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"59c6461d-0d5b-4d6c-a61d-e72e2272be2b"},"source":["# Hebrew for: 'We finally found a cure to COVID!!'\n","fitted_pipe.predict(\"סוף סוף מצאנו תרופה ל- COVID !! \")\n","\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.03819584101438522, -0.056543394923210144, ...positiveסוף סוף מצאנו תרופה ל- COVID !!0.997294
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.03819584101438522, -0.056543394923210144, ... ... 0.997294\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":63}]},{"cell_type":"markdown","metadata":{"id":"SDlpd33H1HIX"},"source":["# The Model understands Telugu\n","![te](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"Kc5n1bzv1BJT","executionInfo":{"status":"ok","timestamp":1614265801129,"user_tz":-300,"elapsed":3751,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"70a56069-bd6c-41fb-a113-61e3dbc5c04d"},"source":["# Telugu for: '5000 people died because of COVID19 Today!!'\n","fitted_pipe.predict(\"Bugün Covid 19 yüzünden 5000 kişi öldü !! \")\n","\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.019384166225790977, -0.01711968705058098, ...negativeBugün Covid 19 yüzünden 5000 kişi öldü !!0.999975
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.019384166225790977, -0.01711968705058098, ... ... 0.999975\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":64}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"-l-u6vrz1Obe","executionInfo":{"status":"ok","timestamp":1614265802640,"user_tz":-300,"elapsed":1517,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"13aae8b0-0c0e-41ed-abbc-bd2af611835c"},"source":["# Telugu for: 'We finally found a cure to COVID!!'\n","fitted_pipe.predict(\"చివరకు మేము కోవిడ్ కు నివారణను కనుగొన్నాము !! \")\n","\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.0431729294359684, -0.054265137761831284, -...positiveచివరకు మేము కోవిడ్ కు నివారణను కనుగొన్నాము !!0.984499
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.0431729294359684, -0.054265137761831284, -... ... 0.984499\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":65}]},{"cell_type":"markdown","metadata":{"id":"nziBUe8t1Zwn"},"source":["# Model understands Russian\n","![ru](https://www.worldometers.info/img/flags/small/tn_rs-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"Ckyjl3YQ1VFn","executionInfo":{"status":"ok","timestamp":1614265804129,"user_tz":-300,"elapsed":1504,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"196ecafe-781d-4bef-c12b-f900a7796c8a"},"source":["# Russian for: '5000 people died because of COVID19 Today!!'\n","fitted_pipe.predict(\"Сегодня 5000 человек погибли из-за COVID 19 !! \")\n","\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.0318034328520298, -0.03972546011209488, -0...negativeСегодня 5000 человек погибли из-за COVID 19 !!1.000000
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.0318034328520298, -0.03972546011209488, -0... ... 1.000000\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":66}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"GIdWkfGv1gFz","executionInfo":{"status":"ok","timestamp":1614265807135,"user_tz":-300,"elapsed":3022,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"483bd323-7b49-40a8-d34a-98e095ce6606"},"source":["# Russian for: 'We finally found a cure to COVID!!'\n","fitted_pipe.predict(\"Мы наконец нашли лекарство от COVID !! \")\n","\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.03489401191473007, -0.045090336352586746, ...positiveМы наконец нашли лекарство от COVID !!0.999111
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.03489401191473007, -0.045090336352586746, ... ... 0.999111\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":67}]},{"cell_type":"markdown","metadata":{"id":"8R1j9mwz2Cm4"},"source":["# Model understands Urdu\n","![ur](https://www.worldometers.info/img/flags/small/tn_pk-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"j4zwvRV11pcG","executionInfo":{"status":"ok","timestamp":1614265810199,"user_tz":-300,"elapsed":3077,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"084fe49e-011f-42f9-be6a-b61632d8b46a"},"source":["# Urdu for: '5000 people died because of COVID19 Today!!'\n","fitted_pipe.predict(\"آج کوویڈ 19 کی وجہ سے 5000 افراد ہلاک ہوگئے !! \")\n","\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.0472625195980072, -0.04384293034672737, -0...negativeآج کوویڈ 19 کی وجہ سے 5000 افراد ہلاک ہوگئے !!1.000000
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.0472625195980072, -0.04384293034672737, -0... ... 1.000000\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":68}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"SxzTuK4b2UKV","executionInfo":{"status":"ok","timestamp":1614265811584,"user_tz":-300,"elapsed":1391,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"25cff301-9abc-4158-a501-5008d7fe581d"},"source":["# Urdu for: 'We finally found a cure to COVID!!'\n","fitted_pipe.predict(\"Мы наконец нашли лекарство от COVID !! \")\n","\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.03489401191473007, -0.045090336352586746, ...positiveМы наконец нашли лекарство от COVID !!0.999111
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.03489401191473007, -0.045090336352586746, ... ... 0.999111\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":69}]},{"cell_type":"markdown","metadata":{"id":"RoNg-C3k1qcX"},"source":["# Model understands Hindi\n","![hi](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"QZ9RT5Wv1r1n","executionInfo":{"status":"ok","timestamp":1614265813854,"user_tz":-300,"elapsed":2279,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"35d27ac7-0b7c-4f12-f450-8423f7e836c5"},"source":["# hindi for: '5000 people died because of COVID19 Today!!'\n","fitted_pipe.predict(\"COVID 19 की वजह से 5000 लोग मारे गए !! \")\n","\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.013016759417951107, -0.0392494760453701, -...negativeCOVID 19 की वजह से 5000 लोग मारे गए !!1.000000
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.013016759417951107, -0.0392494760453701, -... ... 1.000000\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":70}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"quM-IL2i12-B","executionInfo":{"status":"ok","timestamp":1614265816329,"user_tz":-300,"elapsed":2482,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"93240961-5ca4-465c-94f6-f891f9b439f5"},"source":["# hindi for: 'We finally found a cure to COVID!!'\n","fitted_pipe.predict(\"हम अंत में कोविद को एक इलाज मिला !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.04375813901424408, -0.05282778665423393, -...positiveहम अंत में कोविद को एक इलाज मिला !!0.999338
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.04375813901424408, -0.05282778665423393, -... ... 0.999338\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":71}]},{"cell_type":"markdown","metadata":{"id":"R4ByHOZn35Lc"},"source":["# The model understands Tartar\n","![tt](https://www.worldometers.info/img/flags/small/tn_rs-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"2JrzusSQ18F5","executionInfo":{"status":"ok","timestamp":1614265820153,"user_tz":-300,"elapsed":3835,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8b29ffec-facc-43ef-f6c2-544ed6ba78e4"},"source":["# Tartar for: '5000 people died because of COVID19 Today!!'\n","fitted_pipe.predict(\"COVID 19 аркасында 5000 кеше үлде Бүген !! \")\n","\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.027416089549660683, -0.03223974257707596, ...negativeCOVID 19 аркасында 5000 кеше үлде Бүген !!1.000000
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.027416089549660683, -0.03223974257707596, ... ... 1.000000\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":72}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"J06Xm_Ln4AYu","executionInfo":{"status":"ok","timestamp":1614265821662,"user_tz":-300,"elapsed":1565,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"120a4b4a-8252-460d-b0be-6b952d1e6c53"},"source":["\t\n","# Tartar for: 'We finally found a cure to COVID!!'\n","fitted_pipe.predict(\"Ниһаять, без COVID өчен дәвалау таптык !! \")\n","\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.040308788418769836, -0.05435319244861603, ...positiveНиһаять, без COVID өчен дәвалау таптык !!0.999670
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.040308788418769836, -0.05435319244861603, ... ... 0.999670\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":73}]},{"cell_type":"markdown","metadata":{"id":"HKj5yWwwMplH"},"source":["# The Model understands French\n","![fr](https://www.worldometers.info/img/flags/small/tn_fr-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"CUHcJZfJMplL","executionInfo":{"status":"ok","timestamp":1614265823923,"user_tz":-300,"elapsed":2276,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"34a76371-9b08-4269-c469-fbe737b4a5d0"},"source":["\t\n","# French for: '5000 people died because of COVID19 Today!!'\n","fitted_pipe.predict(\"5000 personnes sont mortes à cause du COVID 19 aujourd'hui !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.025422198697924614, -0.04044008627533913, ...negative5000 personnes sont mortes à cause du COVID 19...1.000000
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.025422198697924614, -0.04044008627533913, ... ... 1.000000\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":74}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"57NY2XoTMplM","executionInfo":{"status":"ok","timestamp":1614265826276,"user_tz":-300,"elapsed":2359,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"65f119bd-1173-42b3-a11d-cd8bf3cf5b93"},"source":["# French for: 'We finally found a cure to COVID!!'\n","fitted_pipe.predict(\"Nous avons finalement trouvé un remède à Covid !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.03685029596090317, -0.04665077477693558, -...positiveNous avons finalement trouvé un remède à Covid !!0.999732
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.03685029596090317, -0.04665077477693558, -... ... 0.999732\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":75}]},{"cell_type":"markdown","metadata":{"id":"jD2TBgT0Nq6F"},"source":["# The Model understands Thai\n","![th](https://www.worldometers.info/img/flags/small/tn_th-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"gBp11S5GNq6S","executionInfo":{"status":"ok","timestamp":1614274481993,"user_tz":-300,"elapsed":5260,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a74007d6-5e65-426e-e1ce-a169ccebcd77"},"source":["# Thai for: 'Many people faced depression because of the virus '\n","fitted_pipe.predict(\"หลายคนต้องเผชิญกับภาวะซึมเศร้าเพราะไวรัส\")\n","\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.013120845891535282, -0.056096360087394714,...negativeหลายคนต้องเผชิญกับภาวะซึมเศร้าเพราะไวรัส1.000000
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.013120845891535282, -0.056096360087394714,... ... 1.000000\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":89}]},{"cell_type":"code","metadata":{"id":"R6nKI7C3QKa3"},"source":["# Thai for: 'We finally found a cure to COVID!!'\n","fitted_pipe.predict(\"หลายคนเสียชีวิตให้กับไวรัส \")\n","\t"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"mLItI4KZOElB"},"source":["# The Model understands Khmer\n","![km](https://www.worldometers.info/img/flags/small/tn_cb-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"SWbqMgAwOElC","executionInfo":{"status":"ok","timestamp":1614265831073,"user_tz":-300,"elapsed":2321,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3417d6f8-4f2b-4647-97fe-b798ae79f89e"},"source":["# Khmer for: 'We finally found a cure to COVID!!'\n","fitted_pipe.predict(\"ទីបំផុតយើងបានរកឃើញការព្យាបាលដើម្បី covid !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.0293172188103199, -0.05483138933777809, -0...positiveទីបំផុតយើងបានរកឃើញការព្យាបាលដើម្បី covid !!0.999511
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.0293172188103199, -0.05483138933777809, -0... ... 0.999511\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":77}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"beoCtm4xQf2P","executionInfo":{"status":"ok","timestamp":1614274517614,"user_tz":-300,"elapsed":3570,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f7a12437-344b-4bb2-d8be-a1cbc80cf40e"},"source":["# Khmer for: 'Many people faced depression because of the virus '\n","fitted_pipe.predict(\"មនុស្សជាច្រើនប្រឈមនឹងជំងឺធ្លាក់ទឹកចិត្តដោយសារតែវីរុស\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[0.010295511223375797, -0.045677755028009415, ...negativeមនុស្សជាច្រើនប្រឈមនឹងជំងឺធ្លាក់ទឹកចិត្តដោយសារត...1.000000
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [0.010295511223375797, -0.045677755028009415, ... ... 1.000000\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":90}]},{"cell_type":"markdown","metadata":{"id":"lvE-LbNiPoBT"},"source":["# The Model understands Yiddish\n","![yi](https://www.worldometers.info/img/flags/small/tn_pl-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"sZlmLhajPoBb","executionInfo":{"status":"ok","timestamp":1614265833338,"user_tz":-300,"elapsed":2271,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f6ca485d-6916-4076-d8da-4fb4de76c312"},"source":["# Yiddish for: '5000 people died because of COVID19 Today!!'\n","fitted_pipe.predict(\"5000 מענטשן געשטארבן ווייַל פון COVID 19 הייַנט !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.026097236201167107, -0.03218623995780945, ...negative5000 מענטשן געשטארבן ווייַל פון COVID 19 הייַנ...1.000000
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.026097236201167107, -0.03218623995780945, ... ... 1.000000\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":78}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"5h-pha_nPoBc","executionInfo":{"status":"ok","timestamp":1614265835936,"user_tz":-300,"elapsed":2608,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6dacd83b-2beb-4e81-f615-e47dd2d98f13"},"source":["# Yiddish for: 'We finally found a cure to COVID!!'\n","fitted_pipe.predict(\"מיר לעסאָף געפֿונען אַ היילונג צו קאָוויד !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.04493892192840576, -0.054013773798942566, ...positiveמיר לעסאָף געפֿונען אַ היילונג צו קאָוויד !!0.999784
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.04493892192840576, -0.054013773798942566, ... ... 0.999784\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":79}]},{"cell_type":"markdown","metadata":{"id":"XSz4WzScaAHj"},"source":["# The Model understands Kygrgyz\n","![ky](https://www.worldometers.info/img/flags/small/tn_kg-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"DXz6fhJSaAHu","executionInfo":{"status":"ok","timestamp":1614265838103,"user_tz":-300,"elapsed":2176,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1575b738-14e9-46c1-b015-58bdaf7917c0"},"source":["\t\n","# Kygrgyz for: '5000 people died because of COVID19 Today!!'\n","fitted_pipe.predict(\"COVID 19дун айынан 5000 адам каза болду !! \")\n","\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.019092924892902374, -0.03979476913809776, ...negativeCOVID 19дун айынан 5000 адам каза болду !!1.000000
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.019092924892902374, -0.03979476913809776, ... ... 1.000000\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":80}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"lh_ZSHlPaAHv","executionInfo":{"status":"ok","timestamp":1614265840677,"user_tz":-300,"elapsed":2585,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e8ffbc0c-6d03-4ca0-f719-2423c0f2b137"},"source":["# Kygrgyz for: 'We finally found a cure to COVID!!'\n","fitted_pipe.predict(\"Акыры, ковидди айыктырдык !! \")\n","\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.030513573437929153, -0.06864563375711441, ...positiveАкыры, ковидди айыктырдык !!0.998759
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.030513573437929153, -0.06864563375711441, ... ... 0.998759\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":81}]},{"cell_type":"markdown","metadata":{"id":"DGMVMKaTdJFj"},"source":["# The Model understands Tamil\n","![ta](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"JWDr_LoCdJFn","executionInfo":{"status":"ok","timestamp":1614265842915,"user_tz":-300,"elapsed":2245,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"64ae5163-20df-4679-83af-213d0d36f80b"},"source":["# Tamil for: '5000 people died because of COVID19 Today!!'\n","fitted_pipe.predict(\"5000 பேர் இன்று Covid 19 இன்று இறந்தனர் !! \")\n","\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.025186140090227127, -0.02476668730378151, ...negative5000 பேர் இன்று Covid 19 இன்று இறந்தனர் !!0.999998
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.025186140090227127, -0.02476668730378151, ... ... 0.999998\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":82}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"Q6C0BmTtdJFp","executionInfo":{"status":"ok","timestamp":1614265845255,"user_tz":-300,"elapsed":2348,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5a0a7150-0eb5-400a-9dd3-b38f8f1ab49c"},"source":["# Tamil for: 'We finally found a cure to COVID!!'\n","fitted_pipe.predict(\"COVID க்கு ஒரு தீர்வைக் கண்டுபிடித்தோம் !! \")\n","\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentimentdocumentsentiment_confidence
origin_index
0[-0.025669323280453682, -0.05522870644927025, ...positiveCOVID க்கு ஒரு தீர்வைக் கண்டுபிடித்தோம் !!0.905109
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.025669323280453682, -0.05522870644927025, ... ... 0.905109\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":83}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1613911765181,"user_tz":-300,"elapsed":2799490,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ed7a1530-71f6-408c-b580-6c2a765d250b"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":105},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1613912046180,"user_tz":-300,"elapsed":140838,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2e961bfa-0cee-4669-d56f-2175a4015b5a"},"source":["stored_model_path = './models/classifier_dl_trained' \n","hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Everything is under control !')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentimentdocumentxx_embed_sentence_labse_embeddingssentiment_confidence
origin_index
0positiveEverything is under control ![-0.02810787223279476, -0.06088888645172119, -...0.999970
\n","
"],"text/plain":[" sentiment ... sentiment_confidence\n","origin_index ... \n","0 positive ... 0.999970\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UqXHHH-TQTuu","executionInfo":{"status":"ok","timestamp":1613855324504,"user_tz":-300,"elapsed":1038,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b7f1025d-db21-423f-b45e-6bf676d83d85"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('labse') | Info: unique reference name for identification | Currently set to : labse\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this SentimentDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('labse') | Info: unique reference name for identification | Currently set to : labse\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_reddit.ipynb b/examples/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_reddit.ipynb new file mode 100644 index 00000000..a0e5f492 --- /dev/null +++ b/examples/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_reddit.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_multi_lingual_training_sentiment_classifier_demo_reddit.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/multi_lingual_text_classification/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_reddit.ipynb)\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 class Reddit comments sentiment classifier training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","You can achieve these results or even better on this dataset with training data : \n","\n","
\n","\n","![image.png]()\n","\n","\n","You can achieve these results or even better on this dataset with training data : \n","\n","
\n","\n","![image.png]()"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614262739805,"user_tz":-300,"elapsed":89714,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7dbb19af-8e37-49da-bd1c-925ad3c1a763"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install pyspark==2.4.7 \n","! pip install nlu > /dev/null \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Collecting pyspark==2.4.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e2/06/29f80e5a464033432eedf89924e7aa6ebbc47ce4dcd956853a73627f2c07/pyspark-2.4.7.tar.gz (217.9MB)\n","\u001b[K |████████████████████████████████| 217.9MB 77kB/s \n","\u001b[?25hCollecting py4j==0.10.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e3/53/c737818eb9a7dc32a7cd4f1396e787bd94200c3997c72c1dbe028587bd76/py4j-0.10.7-py2.py3-none-any.whl (197kB)\n","\u001b[K |████████████████████████████████| 204kB 19.7MB/s \n","\u001b[?25hBuilding wheels for collected packages: pyspark\n"," Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for pyspark: filename=pyspark-2.4.7-py2.py3-none-any.whl size=218279465 sha256=32cc92322f5ad0116734a766394fc081f85f2ea384c673eb154fe7cee8b14f82\n"," Stored in directory: /root/.cache/pip/wheels/34/1f/2e/1e7460f80acf26b08dbb8c53d7ff9e07146f2a68dd5c732be5\n","Successfully built pyspark\n","Installing collected packages: py4j, pyspark\n","Successfully installed py4j-0.10.7 pyspark-2.4.7\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Reddit Sentiment dataset \n","https://www.kaggle.com/cosmos98/twitter-and-reddit-sentimental-analysis-dataset\n","#Context\n","\n","This is was a Dataset Created as a part of the university Project On Sentimental Analysis On Multi-Source Social Media Platforms using PySpark."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1614262739814,"user_tz":-300,"elapsed":89672,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ce24e73a-6e5e-4e69-c0cd-11466b2b75ce"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/02/Reddit_Data_multi_lingual.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-02-25 14:17:49-- http://ckl-it.de/wp-content/uploads/2021/02/Reddit_Data_multi_lingual.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 193200 (189K) [text/csv]\n","Saving to: ‘Reddit_Data_multi_lingual.csv’\n","\n","Reddit_Data_multi_l 100%[===================>] 188.67K --.-KB/s in 0.07s \n","\n","2021-02-25 14:17:50 (2.51 MB/s) - ‘Reddit_Data_multi_lingual.csv’ saved [193200/193200]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":399},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1614262739817,"user_tz":-300,"elapsed":89626,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"94a2db5c-8eff-4f6a-9664-0a8265e7502f"},"source":["import pandas as pd\n","train_path = '/content/Reddit_Data_multi_lingual.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","columns=['text','y']\n","train_df = train_df[columns]\n","from sklearn.model_selection import train_test_split\n","train_df, test_df = train_test_split(train_df, test_size=0.2)\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
26tea partier expresses support for namo after ...negative
485tried being secular before india after seeing...positive
518your stage you should strong enough take the ...positive
299they were worried about the woman rights they...negative
107please don’ editorialise post titles you ha...negative
.........
32most religions have dogmatic orthodox well eso...positive
126keep fighting gif today twgok girls kusunoki k...positive
531take some notes valve this jpg really nice sla...positive
440great goalpositive
319modi the leader the indian neo nazi party bjp ...positive
\n","

480 rows × 2 columns

\n","
"],"text/plain":[" text y\n","26 tea partier expresses support for namo after ... negative\n","485 tried being secular before india after seeing... positive\n","518 your stage you should strong enough take the ... positive\n","299 they were worried about the woman rights they... negative\n","107 please don’ editorialise post titles you ha... negative\n",".. ... ...\n","32 most religions have dogmatic orthodox well eso... positive\n","126 keep fighting gif today twgok girls kusunoki k... positive\n","531 take some notes valve this jpg really nice sla... positive\n","440 great goal positive\n","319 modi the leader the indian neo nazi party bjp ... positive\n","\n","[480 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":642},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1614263361027,"user_tz":-300,"elapsed":710804,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"59ed578b-4a9e-418b-fddd-d8e82b5b8cfe"},"source":["trainable_pipe = nlu.load('xx.embed_sentence.labse train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(60) \n","trainable_pipe['sentiment_dl'].setLr(0.005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["labse download started this may take some time.\n","Approximate size to download 1.7 GB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.98 0.99 0.98 229\n"," positive 0.99 0.98 0.98 251\n","\n"," accuracy 0.98 480\n"," macro avg 0.98 0.98 0.98 480\n","weighted avg 0.98 0.98 0.98 480\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textsentiment_confidencesentimentyxx_embed_sentence_labse_embeddingsdocument
origin_index
26tea partier expresses support for namo after ...0.999853negativenegative[0.010386765003204346, 0.024560054764151573, 0...tea partier expresses support for namo after e...
485tried being secular before india after seeing...0.999999positivepositive[-0.02900528907775879, 0.04157489538192749, -0...tried being secular before india after seeing ...
518your stage you should strong enough take the ...0.999873positivepositive[0.013262578286230564, 0.04397854581475258, -0...your stage you should strong enough take the b...
299they were worried about the woman rights they...0.999381negativenegative[0.04970186576247215, -0.02286474034190178, -0...they were worried about the woman rights they ...
107please don’ editorialise post titles you ha...0.996976negativenegative[-0.0039879390969872475, -0.013197453692555428...please don’ editorialise post titles you hav...
.....................
32most religions have dogmatic orthodox well eso...0.997794positivepositive[-0.01782401092350483, 0.008441080339252949, -...most religions have dogmatic orthodox well eso...
126keep fighting gif today twgok girls kusunoki k...0.999957positivepositive[0.06429395824670792, 0.006657848134636879, -0...keep fighting gif today twgok girls kusunoki k...
531take some notes valve this jpg really nice sla...0.999426positivepositive[-0.023985132575035095, -0.040043871849775314,...take some notes valve this jpg really nice sla...
440great goal1.000000positivepositive[0.028093935921788216, 0.007526209577918053, -...great goal
319modi the leader the indian neo nazi party bjp ...0.997143positivepositive[0.00526454858481884, 0.04812856763601303, 0.0...modi the leader the indian neo nazi party bjp ...
\n","

480 rows × 6 columns

\n","
"],"text/plain":[" text ... document\n","origin_index ... \n","26 tea partier expresses support for namo after ... ... tea partier expresses support for namo after e...\n","485 tried being secular before india after seeing... ... tried being secular before india after seeing ...\n","518 your stage you should strong enough take the ... ... your stage you should strong enough take the b...\n","299 they were worried about the woman rights they... ... they were worried about the woman rights they ...\n","107 please don’ editorialise post titles you ha... ... please don’ editorialise post titles you hav...\n","... ... ... ...\n","32 most religions have dogmatic orthodox well eso... ... most religions have dogmatic orthodox well eso...\n","126 keep fighting gif today twgok girls kusunoki k... ... keep fighting gif today twgok girls kusunoki k...\n","531 take some notes valve this jpg really nice sla... ... take some notes valve this jpg really nice sla...\n","440 great goal ... great goal\n","319 modi the leader the indian neo nazi party bjp ... ... modi the leader the indian neo nazi party bjp ...\n","\n","[480 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"_1jxw3GnVGlI"},"source":["# 3.1 evaluate on Test Data"]},{"cell_type":"code","metadata":{"id":"Fxx4yNkNVGFl","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614263457855,"user_tz":-300,"elapsed":807627,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3cbba159-ce6d-47e9-dacb-24ffdf56753d"},"source":["preds = fitted_pipe.predict(test_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.81 0.85 0.83 71\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.75 0.67 0.71 49\n","\n"," accuracy 0.78 120\n"," macro avg 0.52 0.51 0.51 120\n","weighted avg 0.79 0.78 0.78 120\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"BD5OKO4Umc5U"},"source":["#4. Test Model with 20 languages!"]},{"cell_type":"code","metadata":{"id":"OQ72hP9unML7","colab":{"base_uri":"https://localhost:8080/","height":606},"executionInfo":{"status":"ok","timestamp":1614263547292,"user_tz":-300,"elapsed":897033,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"96b46d9f-59a5-49fb-b8bd-5d5517a9d10e"},"source":["train_df = pd.read_csv(\"Reddit_Data_multi_lingual.csv\")\n","preds = fitted_pipe.predict(train_df[[\"test_sentences\",\"y\"]].iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.80 0.83 0.82 48\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.84 0.79 0.81 52\n","\n"," accuracy 0.81 100\n"," macro avg 0.55 0.54 0.54 100\n","weighted avg 0.82 0.81 0.81 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textsentiment_confidencesentimentyxx_embed_sentence_labse_embeddingsdocument
origin_index
0je pravda, že přerušili moc, jaký kongres douc...0.901069negativepositive[0.04707138240337372, -0.02307470142841339, -0...je pravda, že přerušili moc, jaký kongres douc...
1今月のようにジルーをより良く仕上げる1.000000positivepositive[-0.016900546848773956, -0.02084471471607685, ...今月のようにジルーをより良く仕上げる
2נראה חרא עכשיו אבל עדיין גאה0.994826positivepositive[-0.02598225139081478, -0.02113635651767254, -...נראה חרא עכשיו אבל עדיין גאה
3פלור הבוער שונא את האל הרע הטוב ביותר0.999867negativenegative[-0.04441581293940544, -0.010804536752402782, ...פלור הבוער שונא את האל הרע הטוב ביותר
4पूछ सकते हैं कि आप इस शक्तिशाली चीज़ के साथ क्...0.853560positivepositive[0.04069403186440468, -0.027412423864006996, 0...पूछ सकते हैं कि आप इस शक्तिशाली चीज़ के साथ क्...
.....................
95这并不奇怪0.916389negativepositive[0.026199793443083763, -0.061731234192848206, ...这并不奇怪
96এই পোস্টটি বিধি লঙ্ঘন করে সর্বশেষে আপনার ছাগল ...0.999999negativenegative[0.03437671437859535, -0.03329189494252205, -0...এই পোস্টটি বিধি লঙ্ঘন করে সর্বশেষে আপনার ছাগল ...
97سنا ہے براہ راست لائن نریندر مودی ہے جو کسی بھ...0.999968positivepositive[-0.0582120418548584, 0.05610270798206329, 0.0...سنا ہے براہ راست لائن نریندر مودی ہے جو کسی بھ...
98allah lanet olsun bu şərhlərə hindistandan çox...0.686604positivenegative[-0.021423539146780968, 0.011710301972925663, ...allah lanet olsun bu şərhlərə hindistandan çox...
99อัห์มดาบาดมีโอกาสที่จะกลายเป็นเมืองรถไฟใต้ดินท...0.999983positivepositive[0.0035696288105100393, 0.017301443964242935, ...อัห์มดาบาดมีโอกาสที่จะกลายเป็นเมืองรถไฟใต้ดินท...
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" text ... document\n","origin_index ... \n","0 je pravda, že přerušili moc, jaký kongres douc... ... je pravda, že přerušili moc, jaký kongres douc...\n","1 今月のようにジルーをより良く仕上げる ... 今月のようにジルーをより良く仕上げる\n","2 נראה חרא עכשיו אבל עדיין גאה ... נראה חרא עכשיו אבל עדיין גאה\n","3 פלור הבוער שונא את האל הרע הטוב ביותר ... פלור הבוער שונא את האל הרע הטוב ביותר\n","4 पूछ सकते हैं कि आप इस शक्तिशाली चीज़ के साथ क्... ... पूछ सकते हैं कि आप इस शक्तिशाली चीज़ के साथ क्...\n","... ... ... ...\n","95 这并不奇怪 ... 这并不奇怪\n","96 এই পোস্টটি বিধি লঙ্ঘন করে সর্বশেষে আপনার ছাগল ... ... এই পোস্টটি বিধি লঙ্ঘন করে সর্বশেষে আপনার ছাগল ...\n","97 سنا ہے براہ راست لائن نریندر مودی ہے جو کسی بھ... ... سنا ہے براہ راست لائن نریندر مودی ہے جو کسی بھ...\n","98 allah lanet olsun bu şərhlərə hindistandan çox... ... allah lanet olsun bu şərhlərə hindistandan çox...\n","99 อัห์มดาบาดมีโอกาสที่จะกลายเป็นเมืองรถไฟใต้ดินท... ... อัห์มดาบาดมีโอกาสที่จะกลายเป็นเมืองรถไฟใต้ดินท...\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"RjtuNUcvuJTT"},"source":["# The Model understands Englsih\n","![en](https://www.worldometers.info/img/flags/small/tn_nz-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"o0vu7PaWkcI7","executionInfo":{"status":"ok","timestamp":1614269411545,"user_tz":-300,"elapsed":4682,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cc445a5c-e32e-4c40-92f0-564837ce3032"},"source":["fitted_pipe.predict(\"I am depressed because of my recent break up and I spend all my time crying, I want the pain to go away... \")\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999973negative[-0.023605991154909134, -0.028282877057790756,...I am depressed because of my recent break up a...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999973 ... I am depressed because of my recent break up a...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":44}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"1ykjRQhCtQ4w","executionInfo":{"status":"ok","timestamp":1614269309123,"user_tz":-300,"elapsed":5162,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"29b6a1ae-cb7a-4ca3-8acc-c0d99167cfc6"},"source":["fitted_pipe.predict(\"The love of my life proposed me , I feel like the happiest person alive!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999999positive[-0.005035023204982281, 0.022905221208930016, ...The love of my life proposed me , I feel like ...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999999 ... The love of my life proposed me , I feel like ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":42}]},{"cell_type":"markdown","metadata":{"id":"vohym-XbuNHn"},"source":["# The Model understands German\n","![de](https://www.worldometers.info/img/flags/small/tn_gm-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"dzaaZrI4tVWc","executionInfo":{"status":"ok","timestamp":1614263557729,"user_tz":-300,"elapsed":907370,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c81fa909-983a-4e15-8964-6859ce69ac98"},"source":["# German for:'The love of my life proposed me , I feel like the happiest person alive!'\n","fitted_pipe.predict(\"Die Liebe meines Lebens schlug mich vor, ich fühle mich wie die glücklichste Person am Leben!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999998positive[0.006932579912245274, -0.00897424016147852, -...Die Liebe meines Lebens schlug mich vor, ich f...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999998 ... Die Liebe meines Lebens schlug mich vor, ich f...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"BbhgTSBGtTtJ","executionInfo":{"status":"ok","timestamp":1614269430599,"user_tz":-300,"elapsed":6817,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"58f6bcdf-2d1b-4be7-a2c5-4ddccc7db7dd"},"source":["# German for: 'I am depressed because of my recent break up and I spend all my time crying, I want the pain to go away...'\n","fitted_pipe.predict(\"Ich bin wegen meiner kürzlichen Trennung depressiv und verbringe meine ganze Zeit damit zu weinen. Ich möchte, dass die Schmerzen verschwinden ... \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999994negative[-0.04037580266594887, -0.01992807351052761, 0...Ich bin wegen meiner kürzlichen Trennung depre...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999994 ... Ich bin wegen meiner kürzlichen Trennung depre...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":45}]},{"cell_type":"markdown","metadata":{"id":"a1JbtmWquQwj"},"source":["# The Model understands Chinese\n","![zh](https://www.worldometers.info/img/flags/small/tn_ch-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"kYSYqtoRtc-P","executionInfo":{"status":"ok","timestamp":1614269439782,"user_tz":-300,"elapsed":5547,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"78a3b5ec-9f29-44a6-ae6d-6d19090f97fd"},"source":["\n","# Chinese for: 'I am depressed because of my recent break up and I spend all my time crying, I want the pain to go away...'\n","fitted_pipe.predict(\"由于最近的分手,我感到沮丧,我花了所有的时间哭泣,我希望痛苦能够消失... \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999958negative[-0.04107382893562317, -0.04438792169094086, 0...由于最近的分手,我感到沮丧,我花了所有的时间哭泣,我希望痛苦能够消失...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999958 ... 由于最近的分手,我感到沮丧,我花了所有的时间哭泣,我希望痛苦能够消失...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":46}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"06v9SD-QtlBU","executionInfo":{"status":"ok","timestamp":1614263566192,"user_tz":-300,"elapsed":915722,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b4e7e02f-10fa-4b6c-f54c-753954a5ca3f"},"source":["# Chinese for : \"The love of my life proposed me , I feel like the happiest person alive!\"\n","fitted_pipe.predict(\"我一生的爱向我提出了我,我感觉自己是最幸福的人!\")\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
01.000000positive[-0.0253176037222147, -0.04454462602734566, -0...我一生的爱向我提出了我,我感觉自己是最幸福的人!
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 1.000000 ... 我一生的爱向我提出了我,我感觉自己是最幸福的人!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"markdown","metadata":{"id":"9h7CvN4uu9Pb"},"source":["# Model understands Afrikaans\n","\n","![af](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)\n","\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"VMPhbgw9twtf","executionInfo":{"status":"ok","timestamp":1614263569180,"user_tz":-300,"elapsed":918692,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e4b6df23-1e7f-45e1-fd80-0cf97ac868ef"},"source":["# Afrikaans for 'The love of my life proposed me , I feel like the happiest person alive!'\n","fitted_pipe.predict(\"Die liefde van my lewe het my voorgestel, ek voel soos die gelukkigste persoon wat lewendig is!\")\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999999positive[-0.017393898218870163, 0.005962143652141094, ...Die liefde van my lewe het my voorgestel, ek v...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999999 ... Die liefde van my lewe het my voorgestel, ek v...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":13}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"zWgNTIdkumhX","executionInfo":{"status":"ok","timestamp":1614269451031,"user_tz":-300,"elapsed":5672,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a8616e7c-c69b-4af3-fea9-2793be1223b9"},"source":["# Afrikaans for: 'I am depressed because of my recent break up and I spend all my time crying, I want the pain to go away...'\n","fitted_pipe.predict(\"Ek is depressief as gevolg van my onlangse breuk en ek spandeer al my tyd om te huil, ek wil hê dat die pyn moet verdwyn ... \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999960negative[-0.042933885008096695, -0.033448245376348495,...Ek is depressief as gevolg van my onlangse bre...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999960 ... Ek is depressief as gevolg van my onlangse bre...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":47}]},{"cell_type":"markdown","metadata":{"id":"rSEPkC-Bwnpg"},"source":["# The model understands Vietnamese\n","![vi](https://www.worldometers.info/img/flags/small/tn_vm-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"wCcTS5gIu511","executionInfo":{"status":"ok","timestamp":1614263574837,"user_tz":-300,"elapsed":924308,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8f17d9c1-fac7-4937-8ccf-f3405100cd57"},"source":["# Vietnamese for : 'The love of my life proposed me , I feel like the happiest person alive!'\n","fitted_pipe.predict('Tình yêu của đời tôi đề xuất tôi, tôi cảm thấy như người hạnh phúc nhất còn sống!')\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999998positive[-0.03576834872364998, -0.0172688327729702, -0...Tình yêu của đời tôi đề xuất tôi, tôi cảm thấy...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999998 ... Tình yêu của đời tôi đề xuất tôi, tôi cảm thấy...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":15}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"lpQmMRA59REb","executionInfo":{"status":"ok","timestamp":1614269458982,"user_tz":-300,"elapsed":4708,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ae88f6a5-a2fd-4eb3-c4db-797ec74d8445"},"source":["# Vietnamese for: 'I am depressed because of my recent break up and I spend all my time crying, I want the pain to go away...'\n","fitted_pipe.predict(\"Tôi chán nản vì cuộc chia tay gần đây và tôi dành toàn bộ thời gian để khóc, tôi muốn nỗi đau qua đi ... \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999738negative[-0.00911672506481409, -0.007585278246551752, ...Tôi chán nản vì cuộc chia tay gần đây và tôi d...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999738 ... Tôi chán nản vì cuộc chia tay gần đây và tôi d...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":48}]},{"cell_type":"markdown","metadata":{"id":"IlkmAaMoxTuy"},"source":["# The model understands Japanese\n","![ja](https://www.worldometers.info/img/flags/small/tn_ja-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"1IfJu3q8wwUt","executionInfo":{"status":"ok","timestamp":1614263578142,"user_tz":-300,"elapsed":927584,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1122a4a9-1d2c-4458-ddfb-deb349640f83"},"source":["\n","# Japanese for : 'The love of my life proposed me , I feel like the happiest person alive!'\n","fitted_pipe.predict('私の人生の愛は私を提案しました、私は生きている最も幸せな人のように感じます!')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999999positive[-0.011561025865375996, 0.009653945453464985, ...私の人生の愛は私を提案しました、私は生きている最も幸せな人のように感じます!
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999999 ... 私の人生の愛は私を提案しました、私は生きている最も幸せな人のように感じます!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":16}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"h3k7_PFhxOve","executionInfo":{"status":"ok","timestamp":1614269471153,"user_tz":-300,"elapsed":4315,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"89d98b4e-c67a-4676-d6cb-d7c73a8afa86"},"source":["# Japanese for: 'I am depressed because of my recent break up and I spend all my time crying, I want the pain to go away...'\n","fitted_pipe.predict(\"私は最近の別れのために落ち込んでいて、私はずっと泣いて過ごしています、私は痛みを取り除きたいです... \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999992negative[-0.0351201631128788, -0.03604917600750923, 0....私は最近の別れのために落ち込んでいて、私はずっと泣いて過ごしています、私は痛みを取り除きたい...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999992 ... 私は最近の別れのために落ち込んでいて、私はずっと泣いて過ごしています、私は痛みを取り除きたい...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":49}]},{"cell_type":"markdown","metadata":{"id":"GITfT7FK0CGv"},"source":["# The model understands Zulu\n","![zu](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"6uelDwq4xdWv","executionInfo":{"status":"ok","timestamp":1614263582705,"user_tz":-300,"elapsed":932102,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cc72aa25-f496-4988-f912-524d427870b6"},"source":["# Zulu for : 'The love of my life proposed me , I feel like the happiest person alive!'\n","fitted_pipe.predict('Uthando lwempilo yami lungihlongosile, ngizwa sengathi umuntu ojabule kunabo bonke ephila!')\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999999positive[-0.011278754100203514, -0.0050370218232274055...Uthando lwempilo yami lungihlongosile, ngizwa ...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999999 ... Uthando lwempilo yami lungihlongosile, ngizwa ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":18}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"VS5JE0tC9W-h","executionInfo":{"status":"ok","timestamp":1614269483014,"user_tz":-300,"elapsed":4164,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"952dce5c-338a-48da-b84c-460f59c52b0f"},"source":["# Zulu for: 'I am depressed because of my recent break up and I spend all my time crying, I want the pain to go away...'\n","fitted_pipe.predict(\"Ngicindezelekile ngenxa yokuhlukana kwami kwakamuva futhi ngichitha sonke isikhathi sami ngikhala, ngifuna ubuhlungu buphele ... \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999986negative[-0.01392589695751667, -0.0321076363325119, 0....Ngicindezelekile ngenxa yokuhlukana kwami kwak...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999986 ... Ngicindezelekile ngenxa yokuhlukana kwami kwak...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":50}]},{"cell_type":"markdown","metadata":{"id":"VGVvzl_30a0T"},"source":["# The Model understands Turkish\n","![tr](https://www.worldometers.info/img/flags/small/tn_tu-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"aOSsiK6J0jWs","executionInfo":{"status":"ok","timestamp":1614263586051,"user_tz":-300,"elapsed":935430,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"230a6b79-542e-4dcb-f363-084556560f58"},"source":["# Turkish for : 'The love of my life proposed me , I feel like the happiest person alive!'\n","fitted_pipe.predict('Hayatımın aşkı bana teklif etti, yaşayan en mutlu insan gibi hissediyorum! ')\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999998positive[0.0015444562304764986, 0.01713963784277439, -...Hayatımın aşkı bana teklif etti, yaşayan en mu...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999998 ... Hayatımın aşkı bana teklif etti, yaşayan en mu...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":19}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"RFsJ9tZs9aCX","executionInfo":{"status":"ok","timestamp":1614269495682,"user_tz":-300,"elapsed":4986,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3d2facd9-a9f2-44f4-b717-5857d200254a"},"source":["# Turkish for: 'I am depressed because of my recent break up and I spend all my time crying, I want the pain to go away...'\n","fitted_pipe.predict(\"Son ayrılığımdan dolayı depresyondayım ve tüm zamanımı ağlayarak geçiriyorum, acının gitmesini istiyorum ... \")\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999989negative[-0.048465922474861145, -0.041023433208465576,...Son ayrılığımdan dolayı depresyondayım ve tüm ...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999989 ... Son ayrılığımdan dolayı depresyondayım ve tüm ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":51}]},{"cell_type":"markdown","metadata":{"id":"803qL2gt0vlb"},"source":["# The Model understands Hebrew\n","![he](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"XQ5VCtxw0pc0","executionInfo":{"status":"ok","timestamp":1614269506031,"user_tz":-300,"elapsed":5670,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7e58fbe8-ebb5-4440-ba1c-59303e720387"},"source":["# Hebrew for: 'I am depressed because of my recent break up and I spend all my time crying, I want the pain to go away...'\n","fitted_pipe.predict(\"אני בדיכאון בגלל הפרידה האחרונה שלי ואני מבלה את כל זמני בבכי, אני רוצה שהכאב ייעלם ... \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999986negative[-0.04138199985027313, -0.05670205503702164, 0...אני בדיכאון בגלל הפרידה האחרונה שלי ואני מבלה ...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999986 ... אני בדיכאון בגלל הפרידה האחרונה שלי ואני מבלה ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":52}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"9w2ZHfns05A4","executionInfo":{"status":"ok","timestamp":1614263590763,"user_tz":-300,"elapsed":940099,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"32af7a1a-66e4-4909-9fcd-ef877a4467a9"},"source":["# Hebrew for : 'The love of my life proposed me , I feel like the happiest person alive!'\n","fitted_pipe.predict('אהבת חיי הציעה אותי, אני מרגישה כמו האדם המאושר ביותר בחיים!')\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
01.000000positive[-0.021271968260407448, -0.021334702149033546,...אהבת חיי הציעה אותי, אני מרגישה כמו האדם המאוש...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 1.000000 ... אהבת חיי הציעה אותי, אני מרגישה כמו האדם המאוש...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":21}]},{"cell_type":"markdown","metadata":{"id":"SDlpd33H1HIX"},"source":["# The Model understands Telugu\n","![te](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"Kc5n1bzv1BJT","executionInfo":{"status":"ok","timestamp":1614263594054,"user_tz":-300,"elapsed":943367,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2b73ea87-55b3-48d3-82a7-0a468060a1a5"},"source":["# Telugu for : 'The love of my life proposed me , I feel like the happiest person alive!'\n","fitted_pipe.predict('నా జీవితం యొక్క ప్రేమ నన్ను ప్రతిపాదించింది, సజీవంగా ఉన్న వ్యక్తిగా నేను భావిస్తున్నాను!' )"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999574positive[-0.011076054535806179, -0.022865092381834984,...నా జీవితం యొక్క ప్రేమ నన్ను ప్రతిపాదించింది, స...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999574 ... నా జీవితం యొక్క ప్రేమ నన్ను ప్రతిపాదించింది, స...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":22}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"-l-u6vrz1Obe","executionInfo":{"status":"ok","timestamp":1614269518012,"user_tz":-300,"elapsed":4784,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7db0efd3-cc9d-4d60-873c-735bdf2cbe12"},"source":["\n","# Telugu for: 'I am depressed because of my recent break up and I spend all my time crying, I want the pain to go away...'\n","fitted_pipe.predict(\"నా ఇటీవలి విడిపోవడం వల్ల నేను నిరాశకు గురయ్యాను మరియు నా సమయాన్ని ఏడుస్తూనే ఉన్నాను, నొప్పి పోవాలని నేను కోరుకుంటున్నాను ... \")\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999970negative[-0.025107212364673615, -0.03688900172710419, ...నా ఇటీవలి విడిపోవడం వల్ల నేను నిరాశకు గురయ్యాన...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999970 ... నా ఇటీవలి విడిపోవడం వల్ల నేను నిరాశకు గురయ్యాన...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":53}]},{"cell_type":"markdown","metadata":{"id":"nziBUe8t1Zwn"},"source":["# Model understands Russian\n","![ru](https://www.worldometers.info/img/flags/small/tn_rs-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"Ckyjl3YQ1VFn","executionInfo":{"status":"ok","timestamp":1614269530474,"user_tz":-300,"elapsed":3872,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"670b3ef3-8f01-4303-a28a-42352b9ac3b3"},"source":["# Russian for: 'I am depressed because of my recent break up and I spend all my time crying, I want the pain to go away...'\n","fitted_pipe.predict(\"Я в депрессии из-за моего недавнего разрыва, и я все время плачу, я хочу, чтобы боль ушла ... \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999982negative[-0.04230489954352379, -0.02028733491897583, 0...Я в депрессии из-за моего недавнего разрыва, и...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999982 ... Я в депрессии из-за моего недавнего разрыва, и...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":54}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"GIdWkfGv1gFz","executionInfo":{"status":"ok","timestamp":1614263601297,"user_tz":-300,"elapsed":950555,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"856c24dd-7ae0-4ffc-d692-2ebb3b072ea3"},"source":["# Russian for : 'The love of my life proposed me , I feel like the happiest person alive!'\n","fitted_pipe.predict('Этот фильм был отличным!')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
01.000000positive[-0.03249521926045418, -0.04056306555867195, -...Этот фильм был отличным!
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 1.000000 ... Этот фильм был отличным!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":25}]},{"cell_type":"markdown","metadata":{"id":"8R1j9mwz2Cm4"},"source":["# Model understands Urdu\n","![ur](https://www.worldometers.info/img/flags/small/tn_pk-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"j4zwvRV11pcG","executionInfo":{"status":"ok","timestamp":1614269539385,"user_tz":-300,"elapsed":3977,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"34f52c5b-f57a-4410-8010-015fd8ba1bbe"},"source":["# Urdu for: 'I am depressed because of my recent break up and I spend all my time crying, I want the pain to go away...'\n","fitted_pipe.predict(\"میں حالیہ بریک اپ کی وجہ سے افسردہ ہوں اور میں اپنا سارا وقت روتے ہوئے گزارتا ہوں ، میں چاہتا ہوں کہ درد دور ہو ... \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999937negative[-0.03198534622788429, -0.04250375181436539, 0...میں حالیہ بریک اپ کی وجہ سے افسردہ ہوں اور میں...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999937 ... میں حالیہ بریک اپ کی وجہ سے افسردہ ہوں اور میں...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":55}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"SxzTuK4b2UKV","executionInfo":{"status":"ok","timestamp":1614263606277,"user_tz":-300,"elapsed":955487,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"0769fa63-688d-41d8-e2bd-1e4d726fd643"},"source":["# Urdu for : 'The love of my life proposed me , I feel like the happiest person alive!'\n","fitted_pipe.predict('میری زندگی کی محبت نے مجھے پیش کیا، مجھے سب سے خوشگوار شخص زندہ لگتا ہے!')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999997positive[-0.014539845287799835, -0.022852811962366104,...میری زندگی کی محبت نے مجھے پیش کیا، مجھے سب سے...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999997 ... میری زندگی کی محبت نے مجھے پیش کیا، مجھے سب سے...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":27}]},{"cell_type":"markdown","metadata":{"id":"RoNg-C3k1qcX"},"source":["# Model understands Hindi\n","![hi](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"QZ9RT5Wv1r1n","executionInfo":{"status":"ok","timestamp":1614263608674,"user_tz":-300,"elapsed":957866,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"91e6075f-2d8b-49ff-d18f-3194d9d4b74f"},"source":["# Hindi for : 'The love of my life proposed me , I feel like the happiest person alive!'\n","fitted_pipe.predict('मेरे जीवन के प्यार ने मुझे प्रस्तावित किया, मुझे लगता है कि सबसे खुश व्यक्ति जीवित है!')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999987positive[-0.007552078925073147, 0.003644779557362199, ...मेरे जीवन के प्यार ने मुझे प्रस्तावित किया, मु...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999987 ... मेरे जीवन के प्यार ने मुझे प्रस्तावित किया, मु...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":28}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"quM-IL2i12-B","executionInfo":{"status":"ok","timestamp":1614269548900,"user_tz":-300,"elapsed":3962,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6f43b703-81f5-4e88-f823-c359cffb987b"},"source":["# Hindi for: 'I am depressed because of my recent break up and I spend all my time crying, I want the pain to go away...'\n","fitted_pipe.predict(\"मेरे हालिया ब्रेक अप के कारण मैं उदास हूं और मैं अपना सारा समय रोने में बिताता हूं, मैं चाहता हूं कि दर्द दूर हो जाए ... \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999950negative[-0.03387507051229477, -0.03928767517209053, 0...मेरे हालिया ब्रेक अप के कारण मैं उदास हूं और म...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999950 ... मेरे हालिया ब्रेक अप के कारण मैं उदास हूं और म...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":56}]},{"cell_type":"markdown","metadata":{"id":"R4ByHOZn35Lc"},"source":["# The model understands Tartar\n","![tt](https://www.worldometers.info/img/flags/small/tn_rs-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"2JrzusSQ18F5","executionInfo":{"status":"ok","timestamp":1614269588311,"user_tz":-300,"elapsed":5382,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"77c7ea06-3ffe-47c8-df2e-939a29a9779f"},"source":["# Tartar for: 'I am depressed because of my recent break up and I spend all my time crying, I want the pain to go away...'\n","fitted_pipe.predict(\"Соңгы аерылышуым аркасында мин депрессияләнәм һәм бөтен вакытымны елыйм, авыртуның китүен телим ... \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999960negative[-0.04341090843081474, -0.044361814856529236, ...Соңгы аерылышуым аркасында мин депрессияләнәм ...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999960 ... Соңгы аерылышуым аркасында мин депрессияләнәм ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":57}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"J06Xm_Ln4AYu","executionInfo":{"status":"ok","timestamp":1614269589794,"user_tz":-300,"elapsed":6597,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"001ee135-f80f-49af-ab8f-32c3bc7af1e4"},"source":["# Tartar for : 'The love of my life proposed me , I feel like the happiest person alive!'\n","fitted_pipe.predict('Тормышымның мәхәббәте миңа тәкъдим итте, мин үземне иң бәхетле кеше кебек хис итәм!')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999999positive[-0.002561990637332201, -0.02031663805246353, ...Тормышымның мәхәббәте миңа тәкъдим итте, мин ү...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999999 ... Тормышымның мәхәббәте миңа тәкъдим итте, мин ү...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":58}]},{"cell_type":"markdown","metadata":{"id":"HKj5yWwwMplH"},"source":["# The Model understands French\n","![fr](https://www.worldometers.info/img/flags/small/tn_fr-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"CUHcJZfJMplL","executionInfo":{"status":"ok","timestamp":1614269600749,"user_tz":-300,"elapsed":3832,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8cd69f2b-62cc-4f1d-aca6-3a08d73e6a59"},"source":["# French for: 'I am depressed because of my recent break up and I spend all my time crying, I want the pain to go away...'\n","fitted_pipe.predict(\"Je suis déprimé à cause de ma récente rupture et je passe tout mon temps à pleurer, je veux que la douleur disparaisse ... \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999986negative[-0.0261949822306633, -0.031189769506454468, 0...Je suis déprimé à cause de ma récente rupture ...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999986 ... Je suis déprimé à cause de ma récente rupture ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":59}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"57NY2XoTMplM","executionInfo":{"status":"ok","timestamp":1614263621796,"user_tz":-300,"elapsed":970887,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ea4c2c51-a9ee-407b-f4fc-5962fcb0df8e"},"source":["# French for : 'The love of my life proposed me , I feel like the happiest person alive!'\n","fitted_pipe.predict(\"L'amour de ma vie m'a proposé, je me sens comme la personne la plus heureuse en vie!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
01.000000positive[-0.01895563304424286, -0.02439437434077263, -...L'amour de ma vie m'a proposé, je me sens comm...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 1.000000 ... L'amour de ma vie m'a proposé, je me sens comm...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":33}]},{"cell_type":"markdown","metadata":{"id":"jD2TBgT0Nq6F"},"source":["# The Model understands Thai\n","![th](https://www.worldometers.info/img/flags/small/tn_th-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"gBp11S5GNq6S","executionInfo":{"status":"ok","timestamp":1614263624599,"user_tz":-300,"elapsed":973662,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"0e15f11a-826c-428b-fad6-235f24d8be44"},"source":["# Thai for : 'The love of my life proposed me , I feel like the happiest person alive!'\n","fitted_pipe.predict(\"ความรักในชีวิตของฉันเสนอให้ฉันฉันรู้สึกเหมือนคนที่มีความสุขที่สุดที่มีชีวิตอยู่!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999998positive[-0.031056925654411316, -0.04219935089349747, ...ความรักในชีวิตของฉันเสนอให้ฉันฉันรู้สึกเหมือนค...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999998 ... ความรักในชีวิตของฉันเสนอให้ฉันฉันรู้สึกเหมือนค...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":34}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"uvlK8HEZ92qr","executionInfo":{"status":"ok","timestamp":1614269612674,"user_tz":-300,"elapsed":4183,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7430180d-0619-4cda-e578-7a03470527e6"},"source":["# Thai for: 'I am depressed because of my recent break up and I spend all my time crying, I want the pain to go away...'\n","fitted_pipe.predict(\"ฉันรู้สึกหดหู่ใจเพราะเพิ่งเลิกกันและฉันใช้เวลาร้องไห้ตลอดเวลาฉันอยากให้ความเจ็บปวดหายไป ... \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999848negative[-0.014413643628358841, -0.05407709628343582, ...ฉันรู้สึกหดหู่ใจเพราะเพิ่งเลิกกันและฉันใช้เวลา...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999848 ... ฉันรู้สึกหดหู่ใจเพราะเพิ่งเลิกกันและฉันใช้เวลา...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":60}]},{"cell_type":"markdown","metadata":{"id":"mLItI4KZOElB"},"source":["# The Model understands Khmer\n","![km](https://www.worldometers.info/img/flags/small/tn_cb-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"SWbqMgAwOElC","executionInfo":{"status":"ok","timestamp":1614263627208,"user_tz":-300,"elapsed":976253,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4b75a42d-dd42-4df1-9832-1bd63440d224"},"source":["# Khmer for : 'The love of my life proposed me , I feel like the happiest person alive!'\n","fitted_pipe.predict(\"សេចក្តីស្រឡាញ់នៃជីវិតរបស់ខ្ញុំបានស្នើខ្ញុំខ្ញុំមានអារម្មណ៍ថាដូចជាមនុស្សដែលសប្បាយរីករាយបំផុតនៅរស់! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999999positive[0.0028431271202862263, -0.021134141832590103,...សេចក្តីស្រឡាញ់នៃជីវិតរបស់ខ្ញុំបានស្នើខ្ញុំខ្ញុ...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999999 ... សេចក្តីស្រឡាញ់នៃជីវិតរបស់ខ្ញុំបានស្នើខ្ញុំខ្ញុ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":35}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"YAKYMOJD95Ep","executionInfo":{"status":"ok","timestamp":1614269622635,"user_tz":-300,"elapsed":4537,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3ef0dfb2-c904-4ca6-ba03-a3b65b1e8ac2"},"source":["# Khmer for: 'I am depressed because of my recent break up and I spend all my time crying, I want the pain to go away...'\n","fitted_pipe.predict(\"ខ្ញុំក្រៀមក្រំណាស់ដោយសារតែការបែកបាក់ថ្មីៗនេះហើយខ្ញុំចំណាយពេលវេលាយំអស់មួយជីវិតខ្ញុំចង់អោយការឈឺចាប់បាត់ទៅវិញ ... \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999994negative[-0.02485613152384758, -0.053151313215494156, ...ខ្ញុំក្រៀមក្រំណាស់ដោយសារតែការបែកបាក់ថ្មីៗនេះហើ...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999994 ... ខ្ញុំក្រៀមក្រំណាស់ដោយសារតែការបែកបាក់ថ្មីៗនេះហើ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":61}]},{"cell_type":"markdown","metadata":{"id":"lvE-LbNiPoBT"},"source":["# The Model understands Yiddish\n","![yi](https://www.worldometers.info/img/flags/small/tn_pl-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"5h-pha_nPoBc","executionInfo":{"status":"ok","timestamp":1614263629835,"user_tz":-300,"elapsed":978860,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"bb8abb6e-e0f4-4293-8361-c26c1a00f10c"},"source":["# Yiddish for : 'The love of my life proposed me , I feel like the happiest person alive!'\n","fitted_pipe.predict(\"די ליבע פון מיין לעבן פארגעלייגט מיר, איך פילן ווי די כאַפּיאַסט מענטש לעבעדיק!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999996positive[-0.019913040101528168, 0.015512144193053246, ...די ליבע פון מיין לעבן פארגעלייגט מיר, איך פילן...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999996 ... די ליבע פון מיין לעבן פארגעלייגט מיר, איך פילן...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":36}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"yNRa1bWt97rg","executionInfo":{"status":"ok","timestamp":1614269632057,"user_tz":-300,"elapsed":3854,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2fcb6082-6225-4a39-854e-96febe228442"},"source":["# Yiddish for: 'I am depressed because of my recent break up and I spend all my time crying, I want the pain to go away...'\n","fitted_pipe.predict(\"איך בין דערשלאָגן ווייַל פון מיין לעצטנס ברעכן זיך און איך פאַרברענגען אַלע מיין צייט וויינען, איך ווילן די ווייטיק וועט גיין אַוועק ... \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999903negative[-0.02142147719860077, -0.04973935708403587, 0...איך בין דערשלאָגן ווייַל פון מיין לעצטנס ברעכן...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999903 ... איך בין דערשלאָגן ווייַל פון מיין לעצטנס ברעכן...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":62}]},{"cell_type":"markdown","metadata":{"id":"XSz4WzScaAHj"},"source":["# The Model understands Kygrgyz\n","![ky](https://www.worldometers.info/img/flags/small/tn_kg-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"lh_ZSHlPaAHv","executionInfo":{"status":"ok","timestamp":1614263632263,"user_tz":-300,"elapsed":981263,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"58be7a4e-3fa4-443e-ee23-00b9e6905d44"},"source":["# Kygrgyz for : 'The love of my life proposed me , I feel like the happiest person alive!'\n","fitted_pipe.predict(\"Менин жашоомдун сүйүүсү мени сунуш кылды, мен өзүмдү бактылуу адамдай сезип жатам!|\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999970positive[-0.030747272074222565, -0.025966472923755646,...Менин жашоомдун сүйүүсү мени сунуш кылды, мен ...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999970 ... Менин жашоомдун сүйүүсү мени сунуш кылды, мен ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":37}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"2kfUZ33P9-hX","executionInfo":{"status":"ok","timestamp":1614269644369,"user_tz":-300,"elapsed":4200,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"911c7868-7d10-444a-b27c-ede6353ffed2"},"source":["\n","# Kygrgyz for: 'I am depressed because of my recent break up and I spend all my time crying, I want the pain to go away...'\n","fitted_pipe.predict(\"Менин акыркы ажырашуумдан улам депрессияга кабылып, бардык убактымды ыйлап өткөрөм, азаптын басылышын каалайм ... \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999924negative[-0.04204170033335686, -0.02293466404080391, 0...Менин акыркы ажырашуумдан улам депрессияга каб...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999924 ... Менин акыркы ажырашуумдан улам депрессияга каб...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":63}]},{"cell_type":"markdown","metadata":{"id":"DGMVMKaTdJFj"},"source":["# The Model understands Tamil\n","![ta](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"JWDr_LoCdJFn","executionInfo":{"status":"ok","timestamp":1614269650912,"user_tz":-300,"elapsed":3677,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3613ee44-7855-415c-9a42-995f14a374cc"},"source":["# Tamil for: 'I am depressed because of my recent break up and I spend all my time crying, I want the pain to go away...'\n","fitted_pipe.predict(\"நான் சமீபத்தில் பிரிந்ததால் மனச்சோர்வடைந்து, என் நேரத்தை அழுதபடி செலவிடுகிறேன், வலி நீங்க வேண்டும் என்று நான் விரும்புகிறேன் ... \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999895negative[-0.023217450827360153, -0.035981468856334686,...நான் சமீபத்தில் பிரிந்ததால் மனச்சோர்வடைந்து, எ...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999895 ... நான் சமீபத்தில் பிரிந்ததால் மனச்சோர்வடைந்து, எ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":64}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"Q6C0BmTtdJFp","executionInfo":{"status":"ok","timestamp":1614263637431,"user_tz":-300,"elapsed":986397,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4d93ddce-d9b5-46e0-b3de-df7ec8370667"},"source":["# Tamil for : 'The love of my life proposed me , I feel like the happiest person alive!'\n","fitted_pipe.predict(\"என் வாழ்க்கையின் அன்பு என்னை முன்மொழிந்தது, உயிருடன் இருக்கும் மகிழ்ச்சியான நபராக நான் உணர்கிறேன்! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentxx_embed_sentence_labse_embeddingsdocument
origin_index
00.999990positive[-0.0001591969921719283, -0.012185919098556042...என் வாழ்க்கையின் அன்பு என்னை முன்மொழிந்தது, உய...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999990 ... என் வாழ்க்கையின் அன்பு என்னை முன்மொழிந்தது, உய...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":39}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1613854901863,"user_tz":-300,"elapsed":958785,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"76502277-986b-4ccc-b2ca-47ea42eaee08"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":103},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1613855310959,"user_tz":-300,"elapsed":104671,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1a9cb18e-30a9-47f5-f04b-7c153b4936de"},"source":["stored_model_path = './models/classifier_dl_trained' \n","hdd_pipe = nlu.load(path=stored_model_path)\n","preds = hdd_pipe.predict('It was one of the best films i have ever watched in my entire life !!')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencexx_embed_sentence_labse_embeddingssentimentdocument
origin_index
00.999920[0.011100736446678638, -0.0029744331259280443,...positiveIt was one of the best films i have ever watch...
\n","
"],"text/plain":[" sentiment_confidence ... document\n","origin_index ... \n","0 0.999920 ... It was one of the best films i have ever watch...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UqXHHH-TQTuu","executionInfo":{"status":"ok","timestamp":1613855324504,"user_tz":-300,"elapsed":1038,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b7f1025d-db21-423f-b45e-6bf676d83d85"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('labse') | Info: unique reference name for identification | Currently set to : labse\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this SentimentDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('labse') | Info: unique reference name for identification | Currently set to : labse\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_stock_market.ipynb b/examples/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_stock_market.ipynb new file mode 100644 index 00000000..3eba6c20 --- /dev/null +++ b/examples/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_stock_market.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_multi_lingual_training_sentiment_classifier_demo_stock_market.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/multi_lingual_text_classification/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_stock_market.ipynb)\n","\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 Class Stock Market Sentiment Classifier Training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","You can achieve these results or even better on this dataset with training data : \n","\n","
\n","\n","![image.png]()\n","\n","\n","You can achieve these results or even better on this dataset with test data : \n","\n","
\n","\n","![image.png]()\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614262836694,"user_tz":-300,"elapsed":95672,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b194f851-e5fb-4fee-c157-afa76c932d7e"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install pyspark==2.4.7 \n","! pip install nlu > /dev/null \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Collecting pyspark==2.4.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e2/06/29f80e5a464033432eedf89924e7aa6ebbc47ce4dcd956853a73627f2c07/pyspark-2.4.7.tar.gz (217.9MB)\n","\u001b[K |████████████████████████████████| 217.9MB 60kB/s \n","\u001b[?25hCollecting py4j==0.10.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e3/53/c737818eb9a7dc32a7cd4f1396e787bd94200c3997c72c1dbe028587bd76/py4j-0.10.7-py2.py3-none-any.whl (197kB)\n","\u001b[K |████████████████████████████████| 204kB 20.5MB/s \n","\u001b[?25hBuilding wheels for collected packages: pyspark\n"," Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for pyspark: filename=pyspark-2.4.7-py2.py3-none-any.whl size=218279465 sha256=c56d2544e96e924a9a3b4a33eadae4c854f9cb0d05b22a95a93c95196f61a481\n"," Stored in directory: /root/.cache/pip/wheels/34/1f/2e/1e7460f80acf26b08dbb8c53d7ff9e07146f2a68dd5c732be5\n","Successfully built pyspark\n","Installing collected packages: py4j, pyspark\n","Successfully installed py4j-0.10.7 pyspark-2.4.7\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Stock Market Sentiment dataset \n","https://www.kaggle.com/yash612/stockmarket-sentiment-dataset\n","#Context\n","\n","Gathered Stock news from Multiple twitter Handles regarding Economic news dividing into two parts : Negative and positive."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1614262837473,"user_tz":-300,"elapsed":96365,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"05e37136-a568-42fc-ffbb-a564cff4859d"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/02/stock_data_multi_lingual.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-02-25 14:19:27-- http://ckl-it.de/wp-content/uploads/2021/02/stock_data_multi_lingual.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 564444 (551K) [text/csv]\n","Saving to: ‘stock_data_multi_lingual.csv’\n","\n","stock_data_multi_li 100%[===================>] 551.21K 954KB/s in 0.6s \n","\n","2021-02-25 14:19:27 (954 KB/s) - ‘stock_data_multi_lingual.csv’ saved [564444/564444]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":399},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1614262838185,"user_tz":-300,"elapsed":97043,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4f3e4460-ea62-45fb-9e58-cc0275e63a17"},"source":["import pandas as pd\n","train_path = '/content/stock_data_multi_lingual.csv'\n","\n","train_df = pd.read_csv(train_path)\n","# the text data to use for classification should be in a column named 'text'\n","columns=['text','y']\n","train_df = train_df[columns]\n","from sklearn.model_selection import train_test_split\n","train_df, test_df = train_test_split(train_df, test_size=0.2)\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
1764Green Weekly Triangle on CB,....Open Sell Shor...negative
5155Prices for debt from companies including Ford ...positive
2042user: EXC = undervalued here. Where is the yi...positive
106VNG - 12 mil shares shorts plus long buying = ...positive
131CEE Over 35.12positive
.........
5461India's 2019-20 Fuel Demand Growth Worst In Ov...negative
5727Weak Listing For SBI Cards; Shares Fall 12% At...negative
5508Sensex Opens Over 350 Points Lower, Nifty Belo...negative
4216EN wants higherpositive
1659Has anyone shorted KEX? What price?negative
\n","

4632 rows × 2 columns

\n","
"],"text/plain":[" text y\n","1764 Green Weekly Triangle on CB,....Open Sell Shor... negative\n","5155 Prices for debt from companies including Ford ... positive\n","2042 user: EXC = undervalued here. Where is the yi... positive\n","106 VNG - 12 mil shares shorts plus long buying = ... positive\n","131 CEE Over 35.12 positive\n","... ... ...\n","5461 India's 2019-20 Fuel Demand Growth Worst In Ov... negative\n","5727 Weak Listing For SBI Cards; Shares Fall 12% At... negative\n","5508 Sensex Opens Over 350 Points Lower, Nifty Belo... negative\n","4216 EN wants higher positive\n","1659 Has anyone shorted KEX? What price? negative\n","\n","[4632 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":811},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1614266068001,"user_tz":-300,"elapsed":1486353,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"dfb3a4c8-7836-48db-abf9-5f2b6d7bdff1"},"source":["trainable_pipe = nlu.load('xx.embed_sentence.labse train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(60) \n","trainable_pipe['sentiment_dl'].setLr(0.005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["labse download started this may take some time.\n","Approximate size to download 1.7 GB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.92 0.81 0.86 1705\n"," positive 0.90 0.96 0.93 2927\n","\n"," accuracy 0.90 4632\n"," macro avg 0.91 0.88 0.89 4632\n","weighted avg 0.91 0.90 0.90 4632\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ysentiment_confidencesentimentdocumenttextxx_embed_sentence_labse_embeddings
origin_index
1764negative0.999999negativeGreen Weekly Triangle on CB,....Open Sell Shor...Green Weekly Triangle on CB,....Open Sell Shor...[-0.007076278328895569, -0.021284468472003937,...
5155positive1.000000positivePrices for debt from companies including Ford ...Prices for debt from companies including Ford ...[-0.01115849893540144, -0.009219897910952568, ...
2042positive0.997436positiveuser: EXC = undervalued here. Where is the yie...user: EXC = undervalued here. Where is the yi...[-0.036913227289915085, -0.06303010880947113, ...
106positive0.999727positiveVNG - 12 mil shares shorts plus long buying = ...VNG - 12 mil shares shorts plus long buying = ...[-0.06516604125499725, 0.03032655082643032, 0....
131positive1.000000positiveCEE Over 35.12CEE Over 35.12[-0.03404775634407997, -0.014357814565300941, ...
.....................
5461negative0.999992negativeIndia's 2019-20 Fuel Demand Growth Worst In Ov...India's 2019-20 Fuel Demand Growth Worst In Ov...[0.03744948282837868, -0.040541376918554306, -...
5727negative1.000000negativeWeak Listing For SBI Cards; Shares Fall 12% At...Weak Listing For SBI Cards; Shares Fall 12% At...[-0.0797317773103714, 0.0019395140698179603, 0...
5508negative1.000000negativeSensex Opens Over 350 Points Lower, Nifty Belo...Sensex Opens Over 350 Points Lower, Nifty Belo...[-0.07855619490146637, -0.012150928378105164, ...
4216positive1.000000positiveEN wants higherEN wants higher[0.018729722127318382, -0.010736167430877686, ...
1659negative0.999878negativeHas anyone shorted KEX? What price?Has anyone shorted KEX? What price?[-0.00179337989538908, -0.04223024472594261, -...
\n","

4632 rows × 6 columns

\n","
"],"text/plain":[" y ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","1764 negative ... [-0.007076278328895569, -0.021284468472003937,...\n","5155 positive ... [-0.01115849893540144, -0.009219897910952568, ...\n","2042 positive ... [-0.036913227289915085, -0.06303010880947113, ...\n","106 positive ... [-0.06516604125499725, 0.03032655082643032, 0....\n","131 positive ... [-0.03404775634407997, -0.014357814565300941, ...\n","... ... ... ...\n","5461 negative ... [0.03744948282837868, -0.040541376918554306, -...\n","5727 negative ... [-0.0797317773103714, 0.0019395140698179603, 0...\n","5508 negative ... [-0.07855619490146637, -0.012150928378105164, ...\n","4216 positive ... [0.018729722127318382, -0.010736167430877686, ...\n","1659 negative ... [-0.00179337989538908, -0.04223024472594261, -...\n","\n","[4632 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"_1jxw3GnVGlI"},"source":["# 3.1 evaluate on Test Data"]},{"cell_type":"code","metadata":{"id":"Fxx4yNkNVGFl","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614266690310,"user_tz":-300,"elapsed":622369,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7a4fcc3e-6a14-4322-b7d3-47b754e2ce5e"},"source":["preds = fitted_pipe.predict(test_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.67 0.60 0.63 401\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.80 0.84 0.82 758\n","\n"," accuracy 0.75 1159\n"," macro avg 0.49 0.48 0.48 1159\n","weighted avg 0.75 0.75 0.75 1159\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"BD5OKO4Umc5U"},"source":["# 4. Test Model on 20 languages!"]},{"cell_type":"code","metadata":{"id":"OQ72hP9unML7","colab":{"base_uri":"https://localhost:8080/","height":775},"executionInfo":{"status":"ok","timestamp":1614266766715,"user_tz":-300,"elapsed":76428,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5e1ffd4e-edc7-4955-e282-8c0471f27345"},"source":["train_df = pd.read_csv(\"/content/stock_data_multi_lingual.csv\")\n","preds = fitted_pipe.predict(train_df[[\"test_sentences\",\"y\"]].iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.56 0.77 0.65 13\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.96 0.90 0.93 87\n","\n"," accuracy 0.88 100\n"," macro avg 0.51 0.56 0.52 100\n","weighted avg 0.91 0.88 0.89 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ysentiment_confidencesentimentdocumenttextxx_embed_sentence_labse_embeddings
origin_index
0positive1.000000positiveİzləmə siyahımdakı Kickers XIDE TIT SOQ PNK CP...İzləmə siyahımdakı Kickers XIDE TIT SOQ PNK CP...[-0.0005844637053087354, 0.012249339371919632,...
1positive1.000000positiveउपयोगकर्ता: AAP MOVIE। वर्ष के लिए FEA / GEED ...उपयोगकर्ता: AAP MOVIE। वर्ष के लिए FEA / GEED ...[-0.02774488739669323, -0.03464013338088989, -...
2positive0.839511negativeמשתמש אני מפחד לקצר את AMZN - הם נראים כמו מונ...משתמש אני מפחד לקצר את AMZN - הם נראים כמו מונ...[-0.029554717242717743, -0.002601801883429289,...
3positive1.000000positive12.00 से अधिक MNTA12.00 से अधिक MNTA[-0.03708070144057274, 0.05491333454847336, 0....
4positive1.000000positiveOI 21.37ден жогоруOI 21.37ден жогору[-0.043793778866529465, 0.031909242272377014, ...
.....................
95positive0.768944positiveNG nhod - kyk na die weeklikse - teiken vorige...NG nhod - kyk na die weeklikse - teiken vorige...[-0.03982162848114967, -0.07269032299518585, -...
96positive1.000000positiveNG nhod - ¿qué ves? consulte el semanario - ob...NG nhod - ¿qué ves? consulte el semanario - ob...[-0.04516296088695526, -0.04459896311163902, 0...
97negative0.999901negativeএআইজি আমেরিকান ইন্টারন্যাশনাল গ্রুপ অপশন ট্রেড...এআইজি আমেরিকান ইন্টারন্যাশনাল গ্রুপ অপশন ট্রেড...[-0.07162036746740341, -0.009962097741663456, ...
98positive1.000000positiveP out balance +.32P out balance +.32[-0.041263166815042496, -0.0414082333445549, -...
99positive1.000000positiveVNG ซื้อเทียบกับขาย?VNG ซื้อเทียบกับขาย?[-0.038816630840301514, 0.04581353813409805, -...
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" y ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 positive ... [-0.0005844637053087354, 0.012249339371919632,...\n","1 positive ... [-0.02774488739669323, -0.03464013338088989, -...\n","2 positive ... [-0.029554717242717743, -0.002601801883429289,...\n","3 positive ... [-0.03708070144057274, 0.05491333454847336, 0....\n","4 positive ... [-0.043793778866529465, 0.031909242272377014, ...\n","... ... ... ...\n","95 positive ... [-0.03982162848114967, -0.07269032299518585, -...\n","96 positive ... [-0.04516296088695526, -0.04459896311163902, 0...\n","97 negative ... [-0.07162036746740341, -0.009962097741663456, ...\n","98 positive ... [-0.041263166815042496, -0.0414082333445549, -...\n","99 positive ... [-0.038816630840301514, 0.04581353813409805, -...\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"RjtuNUcvuJTT"},"source":["# The Model understands Englsih\n","![en](https://www.worldometers.info/img/flags/small/tn_nz-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"o0vu7PaWkcI7","executionInfo":{"status":"ok","timestamp":1614266770177,"user_tz":-300,"elapsed":3488,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"50708056-cdfb-40b4-b204-777621a010df"},"source":["fitted_pipe.predict(\"Bitcoin dropped by 50 percent !!!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999990negativeBitcoin dropped by 50 percent !!![-0.05163612216711044, -0.029772669076919556, ...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999990 ... [-0.05163612216711044, -0.029772669076919556, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"1ykjRQhCtQ4w","executionInfo":{"status":"ok","timestamp":1614266774637,"user_tz":-300,"elapsed":4476,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"28343112-7210-4c7e-ca0a-4a29a60e7d17"},"source":["fitted_pipe.predict(\"Bitcoin went up by 50 percent !!!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999999positiveBitcoin went up by 50 percent !!![-0.028688771650195122, -0.026630524545907974,...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999999 ... [-0.028688771650195122, -0.026630524545907974,...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"markdown","metadata":{"id":"vohym-XbuNHn"},"source":["# The Model understands German\n","![de](https://www.worldometers.info/img/flags/small/tn_gm-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"dzaaZrI4tVWc","executionInfo":{"status":"ok","timestamp":1614266777152,"user_tz":-300,"elapsed":2529,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4bc1b720-33ea-4051-d2bc-b3446965b0f4"},"source":["# German for:'Bitcoin went up by 50 percent !!!'\n","fitted_pipe.predict(\"Bitcoin stieg um 50 Prozent auf !!! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999999positiveBitcoin stieg um 50 Prozent auf !!![-0.03815653175115585, -0.0247296504676342, -0...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999999 ... [-0.03815653175115585, -0.0247296504676342, -0...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"BbhgTSBGtTtJ","executionInfo":{"status":"ok","timestamp":1614270421116,"user_tz":-300,"elapsed":6055,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2cb51bbf-c4d6-47d6-920c-bf5a3de71246"},"source":["# German for: 'Bitcoin dropped by 50 percent !!!'\n","fitted_pipe.predict(\"Bitcoin fiel um 50 Prozent !!! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999986negativeBitcoin fiel um 50 Prozent !!![-0.05688923969864845, -0.02544567361474037, -...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999986 ... [-0.05688923969864845, -0.02544567361474037, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":44}]},{"cell_type":"markdown","metadata":{"id":"a1JbtmWquQwj"},"source":["# The Model understands Chinese\n","![zh](https://www.worldometers.info/img/flags/small/tn_ch-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"kYSYqtoRtc-P","executionInfo":{"status":"ok","timestamp":1614266784362,"user_tz":-300,"elapsed":3794,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ce1e5c7c-a9ba-4c23-8e3c-dda000c89a17"},"source":["# Chinese for: \"Bitcoin dropped by 50 percent !!!\"\n","fitted_pipe.predict(\"比特币下跌了50%!!!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999996negative比特币下跌了50%!!![-0.07537585496902466, -0.027679990977048874, ...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999996 ... [-0.07537585496902466, -0.027679990977048874, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"06v9SD-QtlBU","executionInfo":{"status":"ok","timestamp":1614266786930,"user_tz":-300,"elapsed":2581,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"39bc82ae-e7ac-4903-e405-67164c4a3c73"},"source":["# Chinese for : \"Bitcoin went up by 50 percent !!!\"\n","fitted_pipe.predict(\"比特币上涨了50%!\")\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
01.000000positive比特币上涨了50%![-0.041133344173431396, -0.009412097744643688,...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 1.000000 ... [-0.041133344173431396, -0.009412097744643688,...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"markdown","metadata":{"id":"9h7CvN4uu9Pb"},"source":["# Model understands Afrikaans\n","\n","![af](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)\n","\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"VMPhbgw9twtf","executionInfo":{"status":"ok","timestamp":1614266790118,"user_tz":-300,"elapsed":3196,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"03e742bd-17e9-47d9-8933-f988a5ea3b2a"},"source":["# Afrikaans for 'Bitcoin went up by 50 percent !!!'\n","fitted_pipe.predict(\"Bitcoin het met 50 persent toegeneem !!!\")\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999865positiveBitcoin het met 50 persent toegeneem !!![-0.03586146980524063, -0.03901936113834381, -...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999865 ... [-0.03586146980524063, -0.03901936113834381, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":13}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"zWgNTIdkumhX","executionInfo":{"status":"ok","timestamp":1614266793172,"user_tz":-300,"elapsed":3072,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3612b8a2-eaab-4cb8-f747-f3a952b75a71"},"source":["# Afrikaans for :'Bitcoin dropped by 50 percent !!!'\n","fitted_pipe.predict('Bitcoin het met 50 persent gedaal !!! |')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999794negativeBitcoin het met 50 persent gedaal !!! |[-0.05142545327544212, -0.03921075537800789, -...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999794 ... [-0.05142545327544212, -0.03921075537800789, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":14}]},{"cell_type":"markdown","metadata":{"id":"rSEPkC-Bwnpg"},"source":["# The model understands Vietnamese\n","![vi](https://www.worldometers.info/img/flags/small/tn_vm-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"wCcTS5gIu511","executionInfo":{"status":"ok","timestamp":1614266796368,"user_tz":-300,"elapsed":3202,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e45b90be-d284-4d18-8c19-31a5017f62c8"},"source":["# Vietnamese for : 'Bitcoin went up by 50 percent !!!'\n","fitted_pipe.predict('Bitcoin đã tăng 50% !!! ')\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999934positiveBitcoin đã tăng 50% !!![-0.03511587902903557, -0.053469520062208176, ...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999934 ... [-0.03511587902903557, -0.053469520062208176, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":15}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"3Y-kLeGp5uc0","executionInfo":{"status":"ok","timestamp":1614266800065,"user_tz":-300,"elapsed":3711,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"331fd1e6-c8e5-4b14-940c-a48d2038cba4"},"source":["# Vietnamese for : 'Bitcoin droppedy by 50 percent !!!'\n","fitted_pipe.predict('Bitcoin giảm 50% !!! ')\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999608negativeBitcoin giảm 50% !!![-0.013879713602364063, -0.0543162003159523, -...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999608 ... [-0.013879713602364063, -0.0543162003159523, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":16}]},{"cell_type":"markdown","metadata":{"id":"IlkmAaMoxTuy"},"source":["# The model understands Japanese\n","![ja](https://www.worldometers.info/img/flags/small/tn_ja-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"1IfJu3q8wwUt","executionInfo":{"status":"ok","timestamp":1614266803007,"user_tz":-300,"elapsed":2962,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"94b5edc1-cf8b-44cb-dafb-2b7ae92b3b46"},"source":["\n","# Japanese for : 'Bitcoin went up by 50 percent !!!'\n","fitted_pipe.predict('ビットコインは50%上昇しました!!! ')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
01.000000positiveビットコインは50%上昇しました!!![-0.044070836156606674, -0.013696333393454552,...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 1.000000 ... [-0.044070836156606674, -0.013696333393454552,...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":17}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"h3k7_PFhxOve","executionInfo":{"status":"ok","timestamp":1614266805072,"user_tz":-300,"elapsed":2078,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b3d5f8b1-0e8c-4983-c2c3-758e439d2f94"},"source":["\n","# Japanese for : 'Bitcoin dropped by 50 percent !!!'\n","fitted_pipe.predict('Bitcoinは50%減少しました!')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999990negativeBitcoinは50%減少しました![-0.056898221373558044, -0.04127807542681694, ...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999990 ... [-0.056898221373558044, -0.04127807542681694, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":18}]},{"cell_type":"markdown","metadata":{"id":"GITfT7FK0CGv"},"source":["# The model understands Zulu\n","![zu](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"6uelDwq4xdWv","executionInfo":{"status":"ok","timestamp":1614266808086,"user_tz":-300,"elapsed":3025,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"72cb4270-948c-4754-cf3a-d58c34fb72f5"},"source":["# Zulu for : 'Bitcoin went up by 50 percent !!!'\n","fitted_pipe.predict('I-Bitcoin inyuke ngamaphesenti ama-50 !!!')\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.998867positiveI-Bitcoin inyuke ngamaphesenti ama-50 !!![-0.042268361896276474, -0.029177553951740265,...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.998867 ... [-0.042268361896276474, -0.029177553951740265,...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":19}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"NY_D-USEBeFe","executionInfo":{"status":"ok","timestamp":1614270582162,"user_tz":-300,"elapsed":5309,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7910970b-3cbd-40fd-e635-c1cf26cfba63"},"source":["# Zulu for : 'The whole crypto system crashed!!! '\n","fitted_pipe.predict('Lonke uhlelo lwe-crypto luphahlazeka !!!')\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999740negativeLonke uhlelo lwe-crypto luphahlazeka !!![0.014095775783061981, -0.06292989104986191, -...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999740 ... [0.014095775783061981, -0.06292989104986191, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":45}]},{"cell_type":"markdown","metadata":{"id":"VGVvzl_30a0T"},"source":["# The Model understands Turkish\n","![tr](https://www.worldometers.info/img/flags/small/tn_tu-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"DRNnuEeQz2pd","executionInfo":{"status":"ok","timestamp":1614266811332,"user_tz":-300,"elapsed":3259,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"dd9f438c-9960-4f87-d02e-68c735775528"},"source":["# Turkish for : 'Bitcoin dropped by 50 percent !!!'\n","fitted_pipe.predict('Bitcoin yüzde 50 düştü !!! ')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999880negativeBitcoin yüzde 50 düştü !!![-0.02441185712814331, -0.041124653071165085, ...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999880 ... [-0.02441185712814331, -0.041124653071165085, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":20}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"aOSsiK6J0jWs","executionInfo":{"status":"ok","timestamp":1614266813997,"user_tz":-300,"elapsed":2694,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b0fe734e-375a-44b8-ee44-c7212b965588"},"source":["# Turkish for : 'Bitcoin went up by 50 percent !!!'\n","fitted_pipe.predict('Bitcoin yüzde 50 arttı !!!')\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999990positiveBitcoin yüzde 50 arttı !!![-0.02950633130967617, -0.02281446009874344, -...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999990 ... [-0.02950633130967617, -0.02281446009874344, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":21}]},{"cell_type":"markdown","metadata":{"id":"803qL2gt0vlb"},"source":["# The Model understands Hebrew\n","![he](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"XQ5VCtxw0pc0","executionInfo":{"status":"ok","timestamp":1614266817408,"user_tz":-300,"elapsed":3422,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8524838b-c43b-44c6-ca3d-dad32571204e"},"source":["# Hebrew for : 'Bitcoin dropped by 50 percent !!!'\n","fitted_pipe.predict('Bitcoin ירד ב -50% !!! ')\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999989negativeBitcoin ירד ב -50% !!![-0.058318842202425, -0.04578538239002228, -0....
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999989 ... [-0.058318842202425, -0.04578538239002228, -0....\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":22}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"9w2ZHfns05A4","executionInfo":{"status":"ok","timestamp":1614266820476,"user_tz":-300,"elapsed":3079,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"77515e2d-011b-4d20-e8bc-2ae851e8cc02"},"source":["# Hebrew for : 'Bitcoin went up by 50 percent !!!'\n","fitted_pipe.predict('Bitcoin עלה ב -50% !!! ')\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999998positiveBitcoin עלה ב -50% !!![-0.038753289729356766, -0.04096424579620361, ...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999998 ... [-0.038753289729356766, -0.04096424579620361, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":23}]},{"cell_type":"markdown","metadata":{"id":"SDlpd33H1HIX"},"source":["# The Model understands Telugu\n","![te](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"Kc5n1bzv1BJT","executionInfo":{"status":"ok","timestamp":1614266823316,"user_tz":-300,"elapsed":2852,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"fd41d8da-30f5-4fee-9c92-cdaacdaacb49"},"source":["# Telugu for : 'Bitcoin went up by 50 percent !!!'\n","fitted_pipe.predict('బిట్\\u200cకాయిన్ 50 శాతం పెరిగింది !!!' )"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
01.000000positiveబిట్‌కాయిన్ 50 శాతం పెరిగింది !!![-0.046535082161426544, 0.020184200257062912, ...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 1.000000 ... [-0.046535082161426544, 0.020184200257062912, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":24}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"-l-u6vrz1Obe","executionInfo":{"status":"ok","timestamp":1614266826384,"user_tz":-300,"elapsed":3077,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"89bafb24-6c35-4a22-e025-ff6012eea311"},"source":["# Telgu for : 'Bitcoin dropped by 50 percent !!!'\n","fitted_pipe.predict('బిట్\\u200cకాయిన్ 50 శాతం పడిపోయింది !!! ')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999992negativeబిట్‌కాయిన్ 50 శాతం పడిపోయింది !!![-0.07144765555858612, 0.004319730680435896, -...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999992 ... [-0.07144765555858612, 0.004319730680435896, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":25}]},{"cell_type":"markdown","metadata":{"id":"nziBUe8t1Zwn"},"source":["# Model understands Russian\n","![ru](https://www.worldometers.info/img/flags/small/tn_rs-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"Ckyjl3YQ1VFn","executionInfo":{"status":"ok","timestamp":1614266829257,"user_tz":-300,"elapsed":2888,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6d9b33c1-82bd-44d3-a117-1b55ffbca20c"},"source":["# Russian for : 'Bitcoin dropped by 50 percent !!!'\n","fitted_pipe.predict('Биткойн упал на 50 процентов !!! ')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999980negativeБиткойн упал на 50 процентов !!![-0.05514690652489662, -0.025273717939853668, ...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999980 ... [-0.05514690652489662, -0.025273717939853668, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":26}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"GIdWkfGv1gFz","executionInfo":{"status":"ok","timestamp":1614266831825,"user_tz":-300,"elapsed":2574,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7d9baaf2-a41c-455f-a2cd-f41354d73122"},"source":["# Russian for : 'Bitcoin went up by 50 percent !!!'\n","fitted_pipe.predict('Биткойн поднялся на 50 процентов !!!')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
01.000000positiveБиткойн поднялся на 50 процентов !!![-0.0347013995051384, -0.016639329493045807, -...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 1.000000 ... [-0.0347013995051384, -0.016639329493045807, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":27}]},{"cell_type":"markdown","metadata":{"id":"8R1j9mwz2Cm4"},"source":["# Model understands Urdu\n","![ur](https://www.worldometers.info/img/flags/small/tn_pk-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"j4zwvRV11pcG","executionInfo":{"status":"ok","timestamp":1614266834444,"user_tz":-300,"elapsed":2629,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"54e3d81e-1b12-4624-94bf-35b80062a655"},"source":["# Urdu for : 'Bitcoin dropped by 50 percent !!!'\n","fitted_pipe.predict('Bitcoin 50 فیصد کی طرف سے گرا دیا !!!')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999964negativeBitcoin 50 فیصد کی طرف سے گرا دیا !!![-0.04297984763979912, -0.042187489569187164, ...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999964 ... [-0.04297984763979912, -0.042187489569187164, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":28}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"_EwpYn_hBpzt","executionInfo":{"status":"ok","timestamp":1614270688693,"user_tz":-300,"elapsed":5023,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"57e4be02-181e-4c7b-c847-a245f7443874"},"source":["# Urdu for : 'TDollar rates skyrocketed!!'\n","fitted_pipe.predict('ڈالر کے نرخ آسمان چھائے ہوئے ہیں !!!')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999181positiveڈالر کے نرخ آسمان چھائے ہوئے ہیں !!![-0.0676291286945343, -0.02299042046070099, -0...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999181 ... [-0.0676291286945343, -0.02299042046070099, -0...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":48}]},{"cell_type":"markdown","metadata":{"id":"RoNg-C3k1qcX"},"source":["# Model understands Hindi\n","![hi](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"QZ9RT5Wv1r1n","executionInfo":{"status":"ok","timestamp":1614266837357,"user_tz":-300,"elapsed":2925,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8852f61a-7d92-4990-b1de-5aaa3d703235"},"source":["# Hindi for : 'Bitcoin went up by 50 percent !!!'\n","fitted_pipe.predict('बिटकॉइन 50 प्रतिशत चढ़ गया !!! ')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999517positiveबिटकॉइन 50 प्रतिशत चढ़ गया !!![-0.03370288014411926, -0.027637386694550514, ...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999517 ... [-0.03370288014411926, -0.027637386694550514, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":29}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"quM-IL2i12-B","executionInfo":{"status":"ok","timestamp":1614266840344,"user_tz":-300,"elapsed":3025,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e41d63f8-4cbc-4e72-c570-72702aac89bc"},"source":["# Hindi for : 'Bitcoin dropped by 50 percent !!!'\n","fitted_pipe.predict('बिटकॉइन में 50 प्रतिशत की गिरावट !!!')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999991negativeबिटकॉइन में 50 प्रतिशत की गिरावट !!![-0.05678804963827133, -0.04536443203687668, -...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999991 ... [-0.05678804963827133, -0.04536443203687668, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":30}]},{"cell_type":"markdown","metadata":{"id":"R4ByHOZn35Lc"},"source":["# The model understands Tartar\n","![tt](https://www.worldometers.info/img/flags/small/tn_rs-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"2JrzusSQ18F5","executionInfo":{"status":"ok","timestamp":1614266844127,"user_tz":-300,"elapsed":3794,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ac92838e-a36c-44b5-bc5f-67995f875c2f"},"source":["# Tartar for : 'Bitcoin dropped by 50 percent !!!'\n","fitted_pipe.predict('Bitcoin 50 процентка төште !!!')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999964negativeBitcoin 50 процентка төште !!![-0.04669538140296936, -0.04520617797970772, -...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999964 ... [-0.04669538140296936, -0.04520617797970772, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":31}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"J06Xm_Ln4AYu","executionInfo":{"status":"ok","timestamp":1614266845575,"user_tz":-300,"elapsed":1454,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"00d03c3d-6b31-49d8-e539-20d0212e1a11"},"source":["# Tartar for : 'Bitcoin went up by 50 percent !!!'\n","fitted_pipe.predict('Биткойн 50 процентка артты !!!')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999982positiveБиткойн 50 процентка артты !!![-0.015944577753543854, -0.024691367521882057,...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999982 ... [-0.015944577753543854, -0.024691367521882057,...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":32}]},{"cell_type":"markdown","metadata":{"id":"HKj5yWwwMplH"},"source":["# The Model understands French\n","![fr](https://www.worldometers.info/img/flags/small/tn_fr-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"CUHcJZfJMplL","executionInfo":{"status":"ok","timestamp":1614266848529,"user_tz":-300,"elapsed":2973,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"274fb581-6bef-4a67-c03d-e55457ee39cc"},"source":["# French for : 'Bitcoin dropped by 50 percent !!!'\n","fitted_pipe.predict(\"Bitcoin a chuté de 50% !!! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999962negativeBitcoin a chuté de 50% !!![-0.05751338228583336, -0.05539099499583244, -...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999962 ... [-0.05751338228583336, -0.05539099499583244, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":33}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"57NY2XoTMplM","executionInfo":{"status":"ok","timestamp":1614266852247,"user_tz":-300,"elapsed":3728,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"03f99a3f-0424-4113-bf2d-888aabb5c983"},"source":["# French for : 'Bitcoin went up by 50 percent !!!'\n","fitted_pipe.predict(\"Le Bitcoin a augmenté de 50% !!! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
01.000000positiveLe Bitcoin a augmenté de 50% !!![-0.036177001893520355, -0.03910716623067856, ...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 1.000000 ... [-0.036177001893520355, -0.03910716623067856, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":34}]},{"cell_type":"markdown","metadata":{"id":"jD2TBgT0Nq6F"},"source":["# The Model understands Thai\n","![th](https://www.worldometers.info/img/flags/small/tn_th-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"gBp11S5GNq6S","executionInfo":{"status":"ok","timestamp":1614266853764,"user_tz":-300,"elapsed":1530,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f2be617c-245c-48e6-9ebd-04da8abce99c"},"source":["# Thai for : 'Bitcoin went up by 50 percent !!!'\n","fitted_pipe.predict(\"Bitcoin เพิ่มขึ้น 50 เปอร์เซ็นต์ !!!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
01.000000positiveBitcoin เพิ่มขึ้น 50 เปอร์เซ็นต์ !!![-0.03620089590549469, -0.027675965800881386, ...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 1.000000 ... [-0.03620089590549469, -0.027675965800881386, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":35}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"Fxh1gasROElC","executionInfo":{"status":"ok","timestamp":1614266856703,"user_tz":-300,"elapsed":2986,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"999f6a48-dcc4-4b21-8dcd-fc9c0fee8eb3"},"source":["# Thai for : 'Bitcoin dropped by 50 percent !!!'\n","fitted_pipe.predict(\"Bitcoin ลดลง 50 เปอร์เซ็นต์ !!!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999993negativeBitcoin ลดลง 50 เปอร์เซ็นต์ !!![-0.05988080054521561, -0.04294700548052788, -...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999993 ... [-0.05988080054521561, -0.04294700548052788, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":36}]},{"cell_type":"markdown","metadata":{"id":"mLItI4KZOElB"},"source":["# The Model understands Khmer\n","![km](https://www.worldometers.info/img/flags/small/tn_cb-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"Mv0KkUxo7Lh_","executionInfo":{"status":"ok","timestamp":1614266859361,"user_tz":-300,"elapsed":2667,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3d43f248-a01a-47b4-e1ed-b7adc17a89e1"},"source":["# Khmer for : 'Bitcoin dropped by 50 percent !!!'\n","fitted_pipe.predict(\"Bitcoin បានធ្លាក់ចុះ 50 ភាគរយ !!!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999993negativeBitcoin បានធ្លាក់ចុះ 50 ភាគរយ !!![-0.05702926591038704, -0.04252532869577408, -...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999993 ... [-0.05702926591038704, -0.04252532869577408, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":37}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"SWbqMgAwOElC","executionInfo":{"status":"ok","timestamp":1614266862667,"user_tz":-300,"elapsed":3311,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"718b8f7d-e3f7-4377-dd06-7848162e3b08"},"source":["# Khmer for : 'Bitcoin went up by 50 percent !!!'\n","fitted_pipe.predict(\"Bitcoin បានកើនឡើង 50 ភាគរយ !!! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
01.000000positiveBitcoin បានកើនឡើង 50 ភាគរយ !!![-0.0398690328001976, -0.03344429284334183, -0...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 1.000000 ... [-0.0398690328001976, -0.03344429284334183, -0...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":38}]},{"cell_type":"markdown","metadata":{"id":"lvE-LbNiPoBT"},"source":["# The Model understands Yiddish\n","![yi](https://www.worldometers.info/img/flags/small/tn_pl-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"sZlmLhajPoBb","executionInfo":{"status":"ok","timestamp":1614266865000,"user_tz":-300,"elapsed":2360,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7f68992f-36d6-4c27-b7b8-d9f93c72c42d"},"source":["# Yiddish for : 'Bitcoin dropped by 50 percent !!!'\n","fitted_pipe.predict(\"ביטקאָין דראַפּט דורך 50 פּראָצענט !!!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999909negativeביטקאָין דראַפּט דורך 50 פּראָצענט !!![-0.05685276538133621, -0.04757661744952202, -...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999909 ... [-0.05685276538133621, -0.04757661744952202, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":39}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"5h-pha_nPoBc","executionInfo":{"status":"ok","timestamp":1614266868647,"user_tz":-300,"elapsed":3661,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e8e826c6-350f-4246-e650-dc3e08dc6fc7"},"source":["# Yiddish for : 'Bitcoin went up by 50 percent !!!'\n","fitted_pipe.predict(\"ביטקאָין איז אַרויף מיט 50 פּראָצענט !!!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
01.000000positiveביטקאָין איז אַרויף מיט 50 פּראָצענט !!![-0.049434419721364975, -0.051430195569992065,...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 1.000000 ... [-0.049434419721364975, -0.051430195569992065,...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":40}]},{"cell_type":"markdown","metadata":{"id":"XSz4WzScaAHj"},"source":["# The Model understands Kygrgyz\n","![ky](https://www.worldometers.info/img/flags/small/tn_kg-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"DXz6fhJSaAHu","executionInfo":{"status":"ok","timestamp":1614266870434,"user_tz":-300,"elapsed":1800,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d48bcfa9-ac3b-40d0-accb-aee5720ee0b5"},"source":["# Kygrgyz for : 'Bitcoin dropped by 50 percent !!!'\n","fitted_pipe.predict(\"Bitcoin 50 пайызга төмөндөдү !!! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999997negativeBitcoin 50 пайызга төмөндөдү !!![-0.0606391541659832, -0.02077414281666279, -0...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999997 ... [-0.0606391541659832, -0.02077414281666279, -0...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":41}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"lh_ZSHlPaAHv","executionInfo":{"status":"ok","timestamp":1614266873377,"user_tz":-300,"elapsed":2949,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2e237a39-adc0-4d50-9320-95c7d4ac141c"},"source":["# Kygrgyz for : 'Bitcoin went up by 50 percent !!!'\n","fitted_pipe.predict(\"Bitcoin 50 пайызга көтөрүлдү !!!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.982491positiveBitcoin 50 пайызга көтөрүлдү !!![-0.03188328444957733, -0.00919796247035265, -...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.982491 ... [-0.03188328444957733, -0.00919796247035265, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":42}]},{"cell_type":"markdown","metadata":{"id":"DGMVMKaTdJFj"},"source":["# The Model understands Tamil\n","![ta](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"JWDr_LoCdJFn","executionInfo":{"status":"ok","timestamp":1614266875989,"user_tz":-300,"elapsed":2628,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2c1b68d5-f035-4276-d1a9-7822938f6c23"},"source":["# Tamil for : 'Bitcoin dropped by 50 percent !!!'\n","fitted_pipe.predict(\"பிட்காயின் 50 சதவீதம் குறைந்தது !!! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
00.999998negativeபிட்காயின் 50 சதவீதம் குறைந்தது !!![-0.06173098459839821, -0.03497230261564255, -...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999998 ... [-0.06173098459839821, -0.03497230261564255, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":43}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"J8dGo9wLCFSa","executionInfo":{"status":"ok","timestamp":1614270739604,"user_tz":-300,"elapsed":5733,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"9749e70e-cd05-4890-c45c-070dc2cc786d"},"source":["# Tamil for : 'Dollar rates skyrocketed!!'\n","fitted_pipe.predict(\"ڈالر کے نرخ آسمان چھائے ہوئے ہیں !!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
sentiment_confidencesentimentdocumentxx_embed_sentence_labse_embeddings
origin_index
01.000000positiveڈالر کے نرخ آسمان چھائے ہوئے ہیں !![-0.06327780336141586, -0.029234571382403374, ...
\n","
"],"text/plain":[" sentiment_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 1.000000 ... [-0.06327780336141586, -0.029234571382403374, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":49}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1613916852307,"user_tz":-300,"elapsed":813964,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b79573d0-c4a2-45c9-c13e-109a00579273"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":107},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1613917010265,"user_tz":-300,"elapsed":100792,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"0d80ba87-0cd9-4eab-a6bf-8dbc19780b97"},"source":["stored_model_path = './models/classifier_dl_trained' \n","\n","hdd_pipe = nlu.load(path=\"./models/classifier_dl_trained\")\n","\n","preds = hdd_pipe.predict('Bitcoin dropped by 50 percent!!')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[-0.04765033349394798, -0.03717166185379028, -...0.999998negativeBitcoin dropped by 50 percent!!
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [-0.04765033349394798, -0.03717166185379028, -... ... Bitcoin dropped by 50 percent!!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"e0CVlkk9v6Qi","executionInfo":{"status":"ok","timestamp":1613917010268,"user_tz":-300,"elapsed":100781,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a78d4d8d-7132-4c5f-853a-8634934a288f"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('labse') | Info: unique reference name for identification | Currently set to : labse\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this SentimentDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('labse') | Info: unique reference name for identification | Currently set to : labse\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_twitter.ipynb b/examples/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_twitter.ipynb new file mode 100644 index 00000000..e0b0f188 --- /dev/null +++ b/examples/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_twitter.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_multi_lingual_training_sentiment_classifier_demo_twitter.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/multi_lingual_text_classification/colab/Training/multi_lingual/binary_text_classification/NLU_multi_lingual_training_sentiment_classifier_demo_twitter.ipynb)\n","\n","# Training a Sentiment Analysis Classifier with NLU \n","## 2 Class Twitter Sentiment Classifier Training\n","With the [SentimentDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#sentimentdl-multi-class-sentiment-analysis-annotator) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","\n","* List item\n","* List item\n","\n","\n","You can achieve these results or even better on this dataset with training data : \n","\n","
\n","\n","![image.png]()\n","\n","You can achieve these results or even better on this dataset with test data : \n","\n","
\n","\n","![image.png]()"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"hFGnBCHavltY","executionInfo":{"status":"ok","timestamp":1614275046135,"user_tz":-300,"elapsed":92799,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c57a711f-c1bc-4b42-b8ff-2b4a75df669e"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install pyspark==2.4.7 \n","! pip install nlu > /dev/null \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Collecting pyspark==2.4.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e2/06/29f80e5a464033432eedf89924e7aa6ebbc47ce4dcd956853a73627f2c07/pyspark-2.4.7.tar.gz (217.9MB)\n","\u001b[K |████████████████████████████████| 217.9MB 62kB/s \n","\u001b[?25hCollecting py4j==0.10.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e3/53/c737818eb9a7dc32a7cd4f1396e787bd94200c3997c72c1dbe028587bd76/py4j-0.10.7-py2.py3-none-any.whl (197kB)\n","\u001b[K |████████████████████████████████| 204kB 18.8MB/s \n","\u001b[?25hBuilding wheels for collected packages: pyspark\n"," Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for pyspark: filename=pyspark-2.4.7-py2.py3-none-any.whl size=218279465 sha256=7236adf9353cce5912057eb957aa7a21314e0f9753237475e8fd609b90e438e8\n"," Stored in directory: /root/.cache/pip/wheels/34/1f/2e/1e7460f80acf26b08dbb8c53d7ff9e07146f2a68dd5c732be5\n","Successfully built pyspark\n","Installing collected packages: py4j, pyspark\n","Successfully installed py4j-0.10.7 pyspark-2.4.7\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download twitter Sentiment dataset \n","https://www.kaggle.com/cosmos98/twitter-and-reddit-sentimental-analysis-dataset\n","#Context\n","\n","This is was a Dataset Created as a part of the university Project On Sentimental Analysis On Multi-Source Social Media Platforms using PySpark."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1614275047576,"user_tz":-300,"elapsed":93891,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6bde9b8e-1d6c-4d44-ab49-761ffd1d185b"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/02/twitter_data_multi_lang.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-02-25 17:42:56-- http://ckl-it.de/wp-content/uploads/2021/02/twitter_data_multi_lang.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 125908 (123K) [text/csv]\n","Saving to: ‘twitter_data_multi_lang.csv’\n","\n","twitter_data_multi_ 100%[===================>] 122.96K 232KB/s in 0.5s \n","\n","2021-02-25 17:42:57 (232 KB/s) - ‘twitter_data_multi_lang.csv’ saved [125908/125908]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":399},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1614275047581,"user_tz":-300,"elapsed":93734,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"24c76468-7102-4e64-f610-59347bd94897"},"source":["import pandas as pd\n","train_path = '/content/twitter_data_multi_lang.csv'\n","\n","train_df = pd.read_csv(train_path)\n","train_df.test_sentences = train_df.test_sentences.astype(str)\n","# the text data to use for classification should be in a column named 'text'\n","# the label column must have name 'y' name be of type str\n","train_df= train_df[[\"text\",\"y\"]]\n","from sklearn.model_selection import train_test_split\n","train_df, test_df = train_test_split(train_df, test_size=0.2)\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
texty
101more than million additional farmers will get ...positive
440bjp struggles find candidates west bengal graf...negative
28know going into dirty details nehru family its...negative
317apke yar modi message our people celebrate pak...positive
477why cant write one single tweet without blamin...negative
.........
330governor kalyan singh aligarh 23rd march all a...positive
142after going thru all the comedy speeches shri ...positive
38tag this fast growing youtuber cared abt this ...negative
205not huge resulting bloodshed there will protes...negative
369this the new india modi trying build with thes...negative
\n","

480 rows × 2 columns

\n","
"],"text/plain":[" text y\n","101 more than million additional farmers will get ... positive\n","440 bjp struggles find candidates west bengal graf... negative\n","28 know going into dirty details nehru family its... negative\n","317 apke yar modi message our people celebrate pak... positive\n","477 why cant write one single tweet without blamin... negative\n",".. ... ...\n","330 governor kalyan singh aligarh 23rd march all a... positive\n","142 after going thru all the comedy speeches shri ... positive\n","38 tag this fast growing youtuber cared abt this ... negative\n","205 not huge resulting bloodshed there will protes... negative\n","369 this the new india modi trying build with thes... negative\n","\n","[480 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.sentiment')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":811},"id":"IKK_Ii_gjJfF","executionInfo":{"status":"ok","timestamp":1614275723265,"user_tz":-300,"elapsed":769150,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"757e83a5-e517-46a4-d73f-303d334c6f43"},"source":["trainable_pipe = nlu.load('xx.embed_sentence.labse train.sentiment')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['sentiment_dl'].setMaxEpochs(60) \n","trainable_pipe['sentiment_dl'].setLr(0.005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["labse download started this may take some time.\n","Approximate size to download 1.7 GB\n","[OK!]\n"," precision recall f1-score support\n","\n"," negative 0.97 0.98 0.97 237\n"," positive 0.98 0.97 0.98 243\n","\n"," accuracy 0.97 480\n"," macro avg 0.98 0.98 0.97 480\n","weighted avg 0.98 0.97 0.98 480\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textsentimentxx_embed_sentence_labse_embeddingssentiment_confidenceydocument
origin_index
101more than million additional farmers will get ...positive[0.007220160216093063, 0.02621868997812271, 0....0.999999positivemore than million additional farmers will get ...
440bjp struggles find candidates west bengal graf...negative[-0.0011240983149036765, 0.01309733372181654, ...0.997356negativebjp struggles find candidates west bengal graf...
28know going into dirty details nehru family its...negative[-0.0208485908806324, 0.02829776145517826, 0.0...0.999963negativeknow going into dirty details nehru family its...
317apke yar modi message our people celebrate pak...positive[-0.0184789951890707, 0.011275646276772022, -0...1.000000positiveapke yar modi message our people celebrate pak...
477why cant write one single tweet without blamin...negative[-0.005628153216093779, 0.017803337424993515, ...0.999919negativewhy cant write one single tweet without blamin...
.....................
330governor kalyan singh aligarh 23rd march all a...positive[0.006282471120357513, -0.027118012309074402, ...1.000000positivegovernor kalyan singh aligarh 23rd march all a...
142after going thru all the comedy speeches shri ...positive[-0.051873888820409775, 0.007939846254885197, ...0.999910positiveafter going thru all the comedy speeches shri ...
38tag this fast growing youtuber cared abt this ...negative[0.01988791488111019, -0.011552331037819386, 0...0.999895negativetag this fast growing youtuber cared abt this ...
205not huge resulting bloodshed there will protes...negative[-0.029743511229753494, -0.0021075434051454067...0.994726negativenot huge resulting bloodshed there will protes...
369this the new india modi trying build with thes...negative[-0.02394954301416874, 0.0441671684384346, 0.0...0.999804negativethis the new india modi trying build with thes...
\n","

480 rows × 6 columns

\n","
"],"text/plain":[" text ... document\n","origin_index ... \n","101 more than million additional farmers will get ... ... more than million additional farmers will get ...\n","440 bjp struggles find candidates west bengal graf... ... bjp struggles find candidates west bengal graf...\n","28 know going into dirty details nehru family its... ... know going into dirty details nehru family its...\n","317 apke yar modi message our people celebrate pak... ... apke yar modi message our people celebrate pak...\n","477 why cant write one single tweet without blamin... ... why cant write one single tweet without blamin...\n","... ... ... ...\n","330 governor kalyan singh aligarh 23rd march all a... ... governor kalyan singh aligarh 23rd march all a...\n","142 after going thru all the comedy speeches shri ... ... after going thru all the comedy speeches shri ...\n","38 tag this fast growing youtuber cared abt this ... ... tag this fast growing youtuber cared abt this ...\n","205 not huge resulting bloodshed there will protes... ... not huge resulting bloodshed there will protes...\n","369 this the new india modi trying build with thes... ... this the new india modi trying build with thes...\n","\n","[480 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"_1jxw3GnVGlI"},"source":["# 3.1 evaluate on Test Data"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Fxx4yNkNVGFl","executionInfo":{"status":"ok","timestamp":1614275818852,"user_tz":-300,"elapsed":863091,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"632c6d07-dd25-455c-89fd-6c08836e2840"},"source":["preds = fitted_pipe.predict(test_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.71 0.70 0.70 63\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.69 0.65 0.67 57\n","\n"," accuracy 0.68 120\n"," macro avg 0.46 0.45 0.46 120\n","weighted avg 0.70 0.68 0.69 120\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"BD5OKO4Umc5U"},"source":["# 4. Test Model on 20 languages!"]},{"cell_type":"code","metadata":{"id":"OQ72hP9unML7","colab":{"base_uri":"https://localhost:8080/","height":826},"executionInfo":{"status":"ok","timestamp":1614275917241,"user_tz":-300,"elapsed":98353,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b051fdcd-cbbc-4d11-f070-8679d7e9e040"},"source":["train_df = pd.read_csv(\"/content/twitter_data_multi_lang.csv\")\n","preds = fitted_pipe.predict(train_df[[\"test_sentences\",\"y\"]].iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['sentiment']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," negative 0.91 0.80 0.85 49\n"," neutral 0.00 0.00 0.00 0\n"," positive 0.85 0.92 0.89 51\n","\n"," accuracy 0.86 100\n"," macro avg 0.59 0.57 0.58 100\n","weighted avg 0.88 0.86 0.87 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textsentimentxx_embed_sentence_labse_embeddingssentiment_confidenceydocument
origin_index
0how narendra modi has almost killed the indian...negative[-0.055935658514499664, 0.05042083561420441, -...0.999031negativehow narendra modi has almost killed the indian...
1تعتقد أنه كان مودي وراء هذا الحادثnegative[0.00735881645232439, -0.0520767942070961, 0.0...0.950884negativeتعتقد أنه كان مودي وراء هذا الحادث
2カマル・ハサーンがチョウキダール・モディを連れて行くカマル・ハサーン・モディの金持ちが貧しい...negative[-0.012155445292592049, -0.020653903484344482,...0.999994negativeカマル・ハサーンがチョウキダール・モディを連れて行くカマル・ハサーン・モディの金持ちが貧しい...
3связанное имя с фамилией, а не bcz религия, св...negative[-0.006620911415666342, 0.025743944570422173, ...0.999951negativeсвязанное имя с фамилией, а не bcz религия, св...
4kdokoli lepší než modi, když nehruji vypršela,...positive[-0.04917776957154274, 0.01752305217087269, -0...1.000000positivekdokoli lepší než modi, když nehruji vypršela,...
.....................
95lol qui va épouser son hippopotame tous les ho...positive[-0.010019153356552124, -0.03171534836292267, ...0.999032positivelol qui va épouser son hippopotame tous les ho...
96拉贾斯坦邦州长卡莉安·辛格·阿里加3月23日全都是bjp工人,希望bjp胜利,希望莫迪再次成...positive[0.009000579826533794, -0.021888382732868195, ...0.999999positive拉贾斯坦邦州长卡莉安·辛格·阿里加3月23日全都是bjp工人,希望bjp胜利,希望莫迪再次成...
97మోడీ భక్తులు రాహుల్ గురించి అబద్ధాలు చెబుతున్న...positive[-0.05518202483654022, -0.004170998930931091, ...0.891503positiveమోడీ భక్తులు రాహుల్ గురించి అబద్ధాలు చెబుతున్న...
98lol neha, je to jako dát hlavu zabít těm, kteř...positive[-0.019701899960637093, -0.019368575885891914,...0.748650positivelol neha, je to jako dát hlavu zabít těm, kteř...
99por favor venda nuestro bosque por favor haga ...positive[-0.03966624662280083, -0.019480157643556595, ...0.999935positivepor favor venda nuestro bosque por favor haga ...
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" text ... document\n","origin_index ... \n","0 how narendra modi has almost killed the indian... ... how narendra modi has almost killed the indian...\n","1 تعتقد أنه كان مودي وراء هذا الحادث ... تعتقد أنه كان مودي وراء هذا الحادث\n","2 カマル・ハサーンがチョウキダール・モディを連れて行くカマル・ハサーン・モディの金持ちが貧しい... ... カマル・ハサーンがチョウキダール・モディを連れて行くカマル・ハサーン・モディの金持ちが貧しい...\n","3 связанное имя с фамилией, а не bcz религия, св... ... связанное имя с фамилией, а не bcz религия, св...\n","4 kdokoli lepší než modi, když nehruji vypršela,... ... kdokoli lepší než modi, když nehruji vypršela,...\n","... ... ... ...\n","95 lol qui va épouser son hippopotame tous les ho... ... lol qui va épouser son hippopotame tous les ho...\n","96 拉贾斯坦邦州长卡莉安·辛格·阿里加3月23日全都是bjp工人,希望bjp胜利,希望莫迪再次成... ... 拉贾斯坦邦州长卡莉安·辛格·阿里加3月23日全都是bjp工人,希望bjp胜利,希望莫迪再次成...\n","97 మోడీ భక్తులు రాహుల్ గురించి అబద్ధాలు చెబుతున్న... ... మోడీ భక్తులు రాహుల్ గురించి అబద్ధాలు చెబుతున్న...\n","98 lol neha, je to jako dát hlavu zabít těm, kteř... ... lol neha, je to jako dát hlavu zabít těm, kteř...\n","99 por favor venda nuestro bosque por favor haga ... ... por favor venda nuestro bosque por favor haga ...\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"RjtuNUcvuJTT"},"source":["# The Model understands Englsih\n","![en](https://www.worldometers.info/img/flags/small/tn_nz-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"o0vu7PaWkcI7","executionInfo":{"status":"ok","timestamp":1614276122523,"user_tz":-300,"elapsed":5715,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"9e51df5c-25b0-4981-c631-133852a6b1fd"},"source":["fitted_pipe.predict(\"Congress's new policies made many people sad \")\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[0.004380677826702595, -0.002109142020344734, ...0.993856negativeCongress's new policies made many people sad
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [0.004380677826702595, -0.002109142020344734, ... ... Congress's new policies made many people sad\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"1ykjRQhCtQ4w","executionInfo":{"status":"ok","timestamp":1614276130115,"user_tz":-300,"elapsed":4295,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1e733dfe-f096-4c2d-d09e-768b78fda799"},"source":["fitted_pipe.predict(\"Congress's new policies made many people happy \")\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[0.02597951516509056, -0.007445341441780329, -...0.999998positiveCongress's new policies made many people happy
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [0.02597951516509056, -0.007445341441780329, -... ... Congress's new policies made many people happy\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"markdown","metadata":{"id":"vohym-XbuNHn"},"source":["# The Model understands German\n","![de](https://www.worldometers.info/img/flags/small/tn_gm-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"dzaaZrI4tVWc","executionInfo":{"status":"ok","timestamp":1614276631990,"user_tz":-300,"elapsed":5147,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4b873b8b-ada8-47dd-cec8-16ca0da2ee77"},"source":["# German for: 'Congress's newest polices made many people poor, sad and depressed '\n","fitted_pipe.predict(\"Die neue Politik des Kongresses machte viele Menschen arm, traurig und depressiv \")\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[-0.02746930904686451, 0.015148884616792202, -...0.994498negativeDie neue Politik des Kongresses machte viele M...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [-0.02746930904686451, 0.015148884616792202, -... ... Die neue Politik des Kongresses machte viele M...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":16}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"BbhgTSBGtTtJ","executionInfo":{"status":"ok","timestamp":1614276144053,"user_tz":-300,"elapsed":6494,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5abf1136-27ad-409d-8691-1cf8911a250c"},"source":["# German for: 'Congress's newest polices made many people happy '\n","fitted_pipe.predict(\"Die neue Politik des Kongresses machte viele Menschen glücklich \")\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[0.008141150698065758, -0.009829352609813213, ...0.999969positiveDie neue Politik des Kongresses machte viele M...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [0.008141150698065758, -0.009829352609813213, ... ... Die neue Politik des Kongresses machte viele M...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"markdown","metadata":{"id":"a1JbtmWquQwj"},"source":["# The Model understands Chinese\n","![zh](https://www.worldometers.info/img/flags/small/tn_ch-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"kYSYqtoRtc-P","executionInfo":{"status":"ok","timestamp":1614276310814,"user_tz":-300,"elapsed":4049,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e444f669-80aa-4fa0-8a53-09b7bc8f74da"},"source":["# Chinese for: 'Congress's newest polices made many people happy '\n","fitted_pipe.predict(\"国会的新政策使许多人感到高兴 \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[0.009464382193982601, -0.012016323395073414, ...0.999999positive国会的新政策使许多人感到高兴
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [0.009464382193982601, -0.012016323395073414, ... ... 国会的新政策使许多人感到高兴\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"06v9SD-QtlBU","executionInfo":{"status":"ok","timestamp":1614276660518,"user_tz":-300,"elapsed":4765,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b86574ad-5089-477b-a123-f75fcf83703e"},"source":["# Chinese for: 'Congress's newest polices made many people poor, sad and depressed '\n","fitted_pipe.predict(\"国会的新政策使许多人变得贫穷,悲伤和沮丧 \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[-0.05506608635187149, -0.0026403777301311493,...0.999973negative国会的新政策使许多人变得贫穷,悲伤和沮丧
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [-0.05506608635187149, -0.0026403777301311493,... ... 国会的新政策使许多人变得贫穷,悲伤和沮丧\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":17}]},{"cell_type":"markdown","metadata":{"id":"9h7CvN4uu9Pb"},"source":["# Model understands Afrikaans\n","\n","![af](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)\n","\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"VMPhbgw9twtf","executionInfo":{"status":"ok","timestamp":1614276670548,"user_tz":-300,"elapsed":5405,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5cb8f03b-9264-482a-a60e-e7a82abe4e51"},"source":["# Afrikaans for: 'Congress's newest polices made many people poor, sad and depressed '\n","fitted_pipe.predict(\"Die Kongres se nuwe beleid het baie mense arm, hartseer en depressief gemaak \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[-0.023684455081820488, 0.0034083002246916294,...0.991269negativeDie Kongres se nuwe beleid het baie mense arm,...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [-0.023684455081820488, 0.0034083002246916294,... ... Die Kongres se nuwe beleid het baie mense arm,...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":18}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"zWgNTIdkumhX","executionInfo":{"status":"ok","timestamp":1614276678805,"user_tz":-300,"elapsed":4776,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2bb8ff94-704e-4a74-af7d-9b8e2ece9234"},"source":["# Afrikaans for: 'Congress's newest polices made many people happy '\n","fitted_pipe.predict(\"Die Kongres se nuwe beleid het baie mense gelukkig gemaak \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[0.005836580414324999, -0.029826413840055466, ...0.999999positiveDie Kongres se nuwe beleid het baie mense gelu...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [0.005836580414324999, -0.029826413840055466, ... ... Die Kongres se nuwe beleid het baie mense gelu...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":19}]},{"cell_type":"markdown","metadata":{"id":"IlkmAaMoxTuy"},"source":["# The model understands Japanese\n","![ja](https://www.worldometers.info/img/flags/small/tn_ja-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"1IfJu3q8wwUt","executionInfo":{"status":"ok","timestamp":1614276737307,"user_tz":-300,"elapsed":4592,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"23723a2e-fb2a-4b12-892a-c6d9372c4df6"},"source":["# Japanese for: 'Congress's newest polices made many people poor, sad and depressed '\n","fitted_pipe.predict(\"議会の新しい政策は多くの人々を貧しく、悲しくそして落ち込んだものにしました \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[-0.04006955772638321, 0.0033476173412054777, ...0.999940negative議会の新しい政策は多くの人々を貧しく、悲しくそして落ち込んだものにしました
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [-0.04006955772638321, 0.0033476173412054777, ... ... 議会の新しい政策は多くの人々を貧しく、悲しくそして落ち込んだものにしました\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":22}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"h3k7_PFhxOve","executionInfo":{"status":"ok","timestamp":1614276745466,"user_tz":-300,"elapsed":4167,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d42f719b-7bb4-42db-c331-05ac48b2b2be"},"source":["\n","\t\t\n","# Japanese for: 'Congress's newest polices made many people happy '\n","fitted_pipe.predict(\"議会の新しい政策は多くの人々を幸せにしました \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[-0.017957257106900215, -0.015919474884867668,...0.999990positive議会の新しい政策は多くの人々を幸せにしました
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [-0.017957257106900215, -0.015919474884867668,... ... 議会の新しい政策は多くの人々を幸せにしました\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":23}]},{"cell_type":"markdown","metadata":{"id":"VGVvzl_30a0T"},"source":["# The Model understands Turkish\n","![tr](https://www.worldometers.info/img/flags/small/tn_tu-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"DRNnuEeQz2pd","executionInfo":{"status":"ok","timestamp":1614276801871,"user_tz":-300,"elapsed":5304,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"0dce3103-5b29-466c-bede-daa4c862326c"},"source":["# Turkish for: 'Congress's newest polices made many people poor, sad and depressed '\n","fitted_pipe.predict(\"Kongrenin yeni politikaları birçok insanı fakir, hüzünlü ve depresif hale getirdi \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[-0.02755211666226387, 0.012688509188592434, -...0.999879negativeKongrenin yeni politikaları birçok insanı faki...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [-0.02755211666226387, 0.012688509188592434, -... ... Kongrenin yeni politikaları birçok insanı faki...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":26}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"aOSsiK6J0jWs","executionInfo":{"status":"ok","timestamp":1614276808413,"user_tz":-300,"elapsed":4536,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d519ed93-88b1-4bac-a359-057ba477707b"},"source":["# Turkish for: 'Congress's newest polices made many people happy '\n","fitted_pipe.predict(\"Kongrenin yeni politikaları birçok insanı mutlu etti \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[0.01936783455312252, -0.00632244348526001, -0...0.999999positiveKongrenin yeni politikaları birçok insanı mutl...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [0.01936783455312252, -0.00632244348526001, -0... ... Kongrenin yeni politikaları birçok insanı mutl...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":27}]},{"cell_type":"markdown","metadata":{"id":"803qL2gt0vlb"},"source":["# The Model understands Hebrew\n","![he](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"XQ5VCtxw0pc0","executionInfo":{"status":"ok","timestamp":1614276815919,"user_tz":-300,"elapsed":5264,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"53e868b3-8ac3-4168-cd40-1cb8979b4339"},"source":["# Hebrew for: 'Congress's newest polices made many people poor, sad and depressed '\n","fitted_pipe.predict(\"המדיניות החדשה של הקונגרס גרמה לאנשים רבים להיות עניים, עצובים ומדוכאים \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[-0.03273191303014755, -0.016592293977737427, ...0.999585negativeהמדיניות החדשה של הקונגרס גרמה לאנשים רבים להי...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [-0.03273191303014755, -0.016592293977737427, ... ... המדיניות החדשה של הקונגרס גרמה לאנשים רבים להי...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":28}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"9w2ZHfns05A4","executionInfo":{"status":"ok","timestamp":1614276821060,"user_tz":-300,"elapsed":5120,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d3345bd6-7968-4505-e274-00e52920629a"},"source":["# Hebrew for: 'Congress's newest polices made many people happy '\n","fitted_pipe.predict(\"המדיניות החדשה של הקונגרס שימחה אנשים רבים \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[0.0014839960495010018, -0.01997889205813408, ...0.999985positiveהמדיניות החדשה של הקונגרס שימחה אנשים רבים
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [0.0014839960495010018, -0.01997889205813408, ... ... המדיניות החדשה של הקונגרס שימחה אנשים רבים\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":29}]},{"cell_type":"markdown","metadata":{"id":"SDlpd33H1HIX"},"source":["# The Model understands Telugu\n","![te](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"Kc5n1bzv1BJT","executionInfo":{"status":"ok","timestamp":1614276827423,"user_tz":-300,"elapsed":4897,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"de685667-717c-4289-e1cb-1b51f05328c9"},"source":["# Telugu for: 'Congress's newest polices made many people poor, sad and depressed '\n","fitted_pipe.predict(\"కాంగ్రెస్ కొత్త విధానాలు చాలా మందిని పేదలుగా, విచారంగా, నిరాశకు గురి చేశాయి \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[-0.029074613004922867, -0.022254789248108864,...0.999552negativeకాంగ్రెస్ కొత్త విధానాలు చాలా మందిని పేదలుగా, ...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [-0.029074613004922867, -0.022254789248108864,... ... కాంగ్రెస్ కొత్త విధానాలు చాలా మందిని పేదలుగా, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":30}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"-l-u6vrz1Obe","executionInfo":{"status":"ok","timestamp":1614276833539,"user_tz":-300,"elapsed":5247,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2ae6765e-4194-47ee-d8c0-c1200179ff1a"},"source":["# Telugu for: 'Congress's newest polices made many people happy '\n","fitted_pipe.predict(\"కాంగ్రెస్ కొత్త విధానాలు చాలా మందికి సంతోషాన్నిచ్చాయి \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[0.0038313723634928465, -0.0348954014480114, -...1.000000positiveకాంగ్రెస్ కొత్త విధానాలు చాలా మందికి సంతోషాన్న...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [0.0038313723634928465, -0.0348954014480114, -... ... కాంగ్రెస్ కొత్త విధానాలు చాలా మందికి సంతోషాన్న...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":31}]},{"cell_type":"markdown","metadata":{"id":"nziBUe8t1Zwn"},"source":["# Model understands Russian\n","![ru](https://www.worldometers.info/img/flags/small/tn_rs-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"Ckyjl3YQ1VFn","executionInfo":{"status":"ok","timestamp":1614276840964,"user_tz":-300,"elapsed":7048,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8806483f-8b77-409b-c6a0-7f529a87d397"},"source":["# Russian for: 'Congress's newest polices made many people poor, sad and depressed '\n","fitted_pipe.predict(\"Новая политика Конгресса сделала многих людей бедными, грустными и подавленными \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[-0.029941828921437263, 0.016272377222776413, ...0.999666negativeНовая политика Конгресса сделала многих людей ...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [-0.029941828921437263, 0.016272377222776413, ... ... Новая политика Конгресса сделала многих людей ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":32}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"GIdWkfGv1gFz","executionInfo":{"status":"ok","timestamp":1614276847675,"user_tz":-300,"elapsed":6667,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"38e1e9af-ab6d-4b32-ad68-0e9d4f547b1c"},"source":["\n","\t\t\n","# Russian for: 'Congress's newest polices made many people happy '\n","fitted_pipe.predict(\"Новая политика Конгресса порадовала многих людей \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[-0.002074663760140538, 0.014204155653715134, ...0.999997positiveНовая политика Конгресса порадовала многих людей
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [-0.002074663760140538, 0.014204155653715134, ... ... Новая политика Конгресса порадовала многих людей\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":33}]},{"cell_type":"markdown","metadata":{"id":"8R1j9mwz2Cm4"},"source":["# Model understands Urdu\n","![ur](https://www.worldometers.info/img/flags/small/tn_pk-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"j4zwvRV11pcG","executionInfo":{"status":"ok","timestamp":1614276852721,"user_tz":-300,"elapsed":5020,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"673c85b7-ad3b-41c5-ddce-d18d59e5cb24"},"source":["\n","\t\t\n","# Urdu for: 'Congress's newest polices made many people poor, sad and depressed '\n","fitted_pipe.predict(\"کانگریس کی نئی پالیسیوں نے بہت سارے لوگوں کو غریب ، افسردہ اور افسردہ کردیا \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[-0.03277842700481415, -0.019150162115693092, ...0.999972negativeکانگریس کی نئی پالیسیوں نے بہت سارے لوگوں کو غ...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [-0.03277842700481415, -0.019150162115693092, ... ... کانگریس کی نئی پالیسیوں نے بہت سارے لوگوں کو غ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":34}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"SxzTuK4b2UKV","executionInfo":{"status":"ok","timestamp":1614276861784,"user_tz":-300,"elapsed":4774,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5810c92e-af41-4d65-9033-016e8d5fa37f"},"source":["# Urdu for: 'Congress's newest polices made many people happy '\n","fitted_pipe.predict(\"کانگریس کی نئی پالیسیوں نے بہت سارے لوگوں کو خوش کیا \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[0.0033543522004038095, -0.03387867286801338, ...0.994481positiveکانگریس کی نئی پالیسیوں نے بہت سارے لوگوں کو خ...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [0.0033543522004038095, -0.03387867286801338, ... ... کانگریس کی نئی پالیسیوں نے بہت سارے لوگوں کو خ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":35}]},{"cell_type":"markdown","metadata":{"id":"RoNg-C3k1qcX"},"source":["# Model understands Hindi\n","![hi](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"QZ9RT5Wv1r1n","executionInfo":{"status":"ok","timestamp":1614276868665,"user_tz":-300,"elapsed":5227,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c06bbde5-273c-4d41-ded2-90f77c38b94e"},"source":["# hindi for: 'Congress's newest polices made many people poor, sad and depressed '\n","fitted_pipe.predict(\"कांग्रेस की नई नीतियों ने कई लोगों को गरीब, दुखी और उदास बना दिया \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[-0.030935170128941536, -0.011918678879737854,...0.999578negativeकांग्रेस की नई नीतियों ने कई लोगों को गरीब, दु...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [-0.030935170128941536, -0.011918678879737854,... ... कांग्रेस की नई नीतियों ने कई लोगों को गरीब, दु...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":36}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"quM-IL2i12-B","executionInfo":{"status":"ok","timestamp":1614276872834,"user_tz":-300,"elapsed":4549,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"74941ce5-b5a6-4bda-aa63-1fc47a10c6ee"},"source":["# hindi for: 'Congress's newest polices made many people happy '\n","fitted_pipe.predict(\"कांग्रेस की नई नीतियों ने कई लोगों को खुश किया \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[0.005392682272940874, -0.024082256481051445, ...0.999996positiveकांग्रेस की नई नीतियों ने कई लोगों को खुश किया
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [0.005392682272940874, -0.024082256481051445, ... ... कांग्रेस की नई नीतियों ने कई लोगों को खुश किया\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":37}]},{"cell_type":"markdown","metadata":{"id":"R4ByHOZn35Lc"},"source":["# The model understands Tartar\n","![tt](https://www.worldometers.info/img/flags/small/tn_rs-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"2JrzusSQ18F5","executionInfo":{"status":"ok","timestamp":1614276882648,"user_tz":-300,"elapsed":5587,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"873e687b-4d01-4350-c9a0-cac340f803cb"},"source":["# Tartar for: 'Congress's newest polices made many people poor, sad and depressed '\n","fitted_pipe.predict(\"Конгрессның яңа политикасы күп кешеләрне ярлы, моңсу һәм депрессиягә китерде \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[-0.003766064764931798, -0.0006652609445154667...0.982602negativeКонгрессның яңа политикасы күп кешеләрне ярлы,...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [-0.003766064764931798, -0.0006652609445154667... ... Конгрессның яңа политикасы күп кешеләрне ярлы,...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":38}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"J06Xm_Ln4AYu","executionInfo":{"status":"ok","timestamp":1614276887515,"user_tz":-300,"elapsed":4845,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"beebc172-0b4b-4d99-9b1b-3d35cde9f24d"},"source":["\n","\t\t\n","# Tartar for: 'Congress's newest polices made many people happy '\n","fitted_pipe.predict(\"Конгрессның яңа политикасы күпләрне сөендерде \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[0.00829650741070509, -0.007502756081521511, -...0.999998positiveКонгрессның яңа политикасы күпләрне сөендерде
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [0.00829650741070509, -0.007502756081521511, -... ... Конгрессның яңа политикасы күпләрне сөендерде\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":39}]},{"cell_type":"markdown","metadata":{"id":"HKj5yWwwMplH"},"source":["# The Model understands French\n","![fr](https://www.worldometers.info/img/flags/small/tn_fr-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"CUHcJZfJMplL","executionInfo":{"status":"ok","timestamp":1614276895800,"user_tz":-300,"elapsed":4891,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"58e401f4-da4a-4774-9723-2e75655f7527"},"source":["\t\t\n","# French for: 'Congress's newest polices made many people poor, sad and depressed '\n","fitted_pipe.predict(\"Les nouvelles politiques du Congrès ont rendu de nombreuses personnes pauvres, tristes et déprimées \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[-0.017834072932600975, 0.011118757538497448, ...0.999893negativeLes nouvelles politiques du Congrès ont rendu ...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [-0.017834072932600975, 0.011118757538497448, ... ... Les nouvelles politiques du Congrès ont rendu ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":40}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"57NY2XoTMplM","executionInfo":{"status":"ok","timestamp":1614276901842,"user_tz":-300,"elapsed":3925,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8a81e6d2-f19a-4d6c-a321-f7ae83b7a1d4"},"source":["# French for: 'Congress's newest polices made many people happy '\n","fitted_pipe.predict(\"Les nouvelles politiques du Congrès ont rendu de nombreuses personnes heureuses \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[0.01951567642390728, -0.010051790624856949, -...1.000000positiveLes nouvelles politiques du Congrès ont rendu ...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [0.01951567642390728, -0.010051790624856949, -... ... Les nouvelles politiques du Congrès ont rendu ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":41}]},{"cell_type":"markdown","metadata":{"id":"jD2TBgT0Nq6F"},"source":["# The Model understands Thai\n","![th](https://www.worldometers.info/img/flags/small/tn_th-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"gBp11S5GNq6S","executionInfo":{"status":"ok","timestamp":1614276907324,"user_tz":-300,"elapsed":4211,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"210c560f-5dcb-4f62-ce19-885fff57ed25"},"source":["\t\t\n","# Thai for: 'Congress's newest polices made many people poor, sad and depressed '\n","fitted_pipe.predict(\"นโยบายใหม่ของสภาคองเกรสทำให้หลายคนยากจนเศร้าและหดหู่ \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[3.636999394984741e-07, -0.0028806282207369804...0.983253negativeนโยบายใหม่ของสภาคองเกรสทำให้หลายคนยากจนเศร้าแล...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [3.636999394984741e-07, -0.0028806282207369804... ... นโยบายใหม่ของสภาคองเกรสทำให้หลายคนยากจนเศร้าแล...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":42}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"KgatiiyuZumz","executionInfo":{"status":"ok","timestamp":1614276920252,"user_tz":-300,"elapsed":5061,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"11068004-0c0c-432e-dcc0-3938eac4c60a"},"source":["# Thai for: 'Congress's newest polices made many people happy '\n","fitted_pipe.predict(\"นโยบายใหม่ของสภาคองเกรสทำให้หลายคนพอใจ \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[0.012098119594156742, 0.006513879634439945, -...1.000000positiveนโยบายใหม่ของสภาคองเกรสทำให้หลายคนพอใจ
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [0.012098119594156742, 0.006513879634439945, -... ... นโยบายใหม่ของสภาคองเกรสทำให้หลายคนพอใจ\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":43}]},{"cell_type":"markdown","metadata":{"id":"mLItI4KZOElB"},"source":["# The Model understands Khmer\n","![km](https://www.worldometers.info/img/flags/small/tn_cb-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"Fxh1gasROElC","executionInfo":{"status":"ok","timestamp":1614276930093,"user_tz":-300,"elapsed":4481,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cf434d7b-7a32-499f-d094-24da1dbca7b6"},"source":["# Khmer for: 'Congress's newest polices made many people poor, sad and depressed '\n","fitted_pipe.predict(\"គោលនយោបាយថ្មីរបស់សភាបានធ្វើឱ្យប្រជាជនជាច្រើនក្រីក្រក្រៀមក្រំនិងធ្លាក់ទឹកចិត្ត \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[-0.04521242156624794, 0.010355296544730663, -...0.999896negativeគោលនយោបាយថ្មីរបស់សភាបានធ្វើឱ្យប្រជាជនជាច្រើនក្...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [-0.04521242156624794, 0.010355296544730663, -... ... គោលនយោបាយថ្មីរបស់សភាបានធ្វើឱ្យប្រជាជនជាច្រើនក្...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":44}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"SWbqMgAwOElC","executionInfo":{"status":"ok","timestamp":1614276936826,"user_tz":-300,"elapsed":3746,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"25de67bf-e27c-458f-e904-f27cbca7efb5"},"source":["# Khmer for: 'Congress's newest polices made many people happy '\n","fitted_pipe.predict(\"គោលនយោបាយថ្មីរបស់សភាបានធ្វើឱ្យមនុស្សជាច្រើនសប្បាយរីករាយ \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[-0.025576740503311157, -0.020313693210482597,...0.999995positiveគោលនយោបាយថ្មីរបស់សភាបានធ្វើឱ្យមនុស្សជាច្រើនសប្...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [-0.025576740503311157, -0.020313693210482597,... ... គោលនយោបាយថ្មីរបស់សភាបានធ្វើឱ្យមនុស្សជាច្រើនសប្...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":45}]},{"cell_type":"markdown","metadata":{"id":"lvE-LbNiPoBT"},"source":["# The Model understands Yiddish\n","![yi](https://www.worldometers.info/img/flags/small/tn_pl-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"sZlmLhajPoBb","executionInfo":{"status":"ok","timestamp":1614276944538,"user_tz":-300,"elapsed":4110,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2601e901-778b-4703-d8ca-3e3134d0a773"},"source":["\t\t\n","# Yiddish for: 'Congress's newest polices made many people poor, sad and depressed '\n","fitted_pipe.predict(\"קאָנגרעס ס נייַ פּאַלאַסיז געמאכט פילע מענטשן נעבעך, טרויעריק און דערשלאָגן \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[-0.007056358736008406, -0.0033369245938956738...0.940492negativeקאָנגרעס ס נייַ פּאַלאַסיז געמאכט פילע מענטשן ...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [-0.007056358736008406, -0.0033369245938956738... ... קאָנגרעס ס נייַ פּאַלאַסיז געמאכט פילע מענטשן ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":46}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"5h-pha_nPoBc","executionInfo":{"status":"ok","timestamp":1614276958250,"user_tz":-300,"elapsed":4657,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"85c44aaf-128a-4abc-ba8c-c2d027734963"},"source":["# Yiddish for: 'Congress's newest polices made many people happy '\n","fitted_pipe.predict(\"קאָנגרעס ס נייַ פּאַלאַסיז געמאכט פילע מענטשן צופרידן \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[0.002619842765852809, -0.018449867144227028, ...0.999999positiveקאָנגרעס ס נייַ פּאַלאַסיז געמאכט פילע מענטשן ...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [0.002619842765852809, -0.018449867144227028, ... ... קאָנגרעס ס נייַ פּאַלאַסיז געמאכט פילע מענטשן ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":47}]},{"cell_type":"markdown","metadata":{"id":"XSz4WzScaAHj"},"source":["# The Model understands Kygrgyz\n","![ky](https://www.worldometers.info/img/flags/small/tn_kg-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"DXz6fhJSaAHu","executionInfo":{"status":"ok","timestamp":1614276964712,"user_tz":-300,"elapsed":4135,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3b20d7ec-a3c8-46ad-a2b7-da5d2e2b787e"},"source":["# Kygrgyz for: 'Congress's newest polices made many people poor, sad and depressed '\n","fitted_pipe.predict(\"Конгресстин жаңы саясаты көптөгөн адамдарды жакыр, кайгыга чөгүп, көңүл чөгөттү \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[-0.0002845969283953309, -0.002948887180536985...0.999439negativeКонгресстин жаңы саясаты көптөгөн адамдарды жа...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [-0.0002845969283953309, -0.002948887180536985... ... Конгресстин жаңы саясаты көптөгөн адамдарды жа...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":48}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"lh_ZSHlPaAHv","executionInfo":{"status":"ok","timestamp":1614276969186,"user_tz":-300,"elapsed":3853,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cc925ae4-a8ab-4a71-9e68-fa9c15d649ec"},"source":["# Kygrgyz for: 'Congress's newest polices made many people happy '\n","fitted_pipe.predict(\"Конгресстин жаңы саясаты көпчүлүктү кубандырды \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[0.01854480803012848, -0.0032602460123598576, ...1.000000positiveКонгресстин жаңы саясаты көпчүлүктү кубандырды
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [0.01854480803012848, -0.0032602460123598576, ... ... Конгресстин жаңы саясаты көпчүлүктү кубандырды\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":49}]},{"cell_type":"markdown","metadata":{"id":"DGMVMKaTdJFj"},"source":["# The Model understands Tamil\n","![ta](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"JWDr_LoCdJFn","executionInfo":{"status":"ok","timestamp":1614276978399,"user_tz":-300,"elapsed":4262,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2b01c26a-1523-4319-f90a-a936a055dcf5"},"source":["# Tamil for: 'Congress's newest polices made many people poor, sad and depressed '\n","fitted_pipe.predict(\"காங்கிரசின் புதிய கொள்கைகள் பலரை ஏழைகளாகவும், சோகமாகவும், மனச்சோர்வடையச் செய்தன \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[-0.0065728831104934216, -0.000398304604459553...0.999544negativeகாங்கிரசின் புதிய கொள்கைகள் பலரை ஏழைகளாகவும், ...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [-0.0065728831104934216, -0.000398304604459553... ... காங்கிரசின் புதிய கொள்கைகள் பலரை ஏழைகளாகவும், ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":50}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"Q6C0BmTtdJFp","executionInfo":{"status":"ok","timestamp":1614276984526,"user_tz":-300,"elapsed":5457,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4e3e4bab-76f8-45ea-927e-625ad97b81bb"},"source":["# Tamil for: 'Congress's newest polices made many people happy '\n","fitted_pipe.predict(\"காங்கிரசின் புதிய கொள்கைகள் பலரை மகிழ்ச்சியடையச் செய்தன \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingssentiment_confidencesentimentdocument
origin_index
0[0.01883488893508911, -0.01959705352783203, -0...1.000000positiveகாங்கிரசின் புதிய கொள்கைகள் பலரை மகிழ்ச்சியடைய...
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [0.01883488893508911, -0.01959705352783203, -0... ... காங்கிரசின் புதிய கொள்கைகள் பலரை மகிழ்ச்சியடைய...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":51}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eLex095goHwm","executionInfo":{"status":"ok","timestamp":1613854901863,"user_tz":-300,"elapsed":958785,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"76502277-986b-4ccc-b2ca-47ea42eaee08"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":103},"id":"SO4uz45MoRgp","executionInfo":{"status":"ok","timestamp":1613855192012,"user_tz":-300,"elapsed":102760,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"63ed9cc5-2640-4cc4-9600-9a7bf4979e84"},"source":["stored_model_path = './models/classifier_dl_trained' \n","hdd_pipe = nlu.load(path=stored_model_path)\n","preds = hdd_pipe.predict('I am extremly depressed and down cause of school and just feel like ending my life...')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingsdocumentsentimentsentiment_confidence
origin_index
0[-0.02176278457045555, -0.04837987199425697, -...I am extremly depressed and down cause of scho...negative0.999979
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... sentiment_confidence\n","origin_index ... \n","0 [-0.02176278457045555, -0.04837987199425697, -... ... 0.999979\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"btTWUdsDNhfx","executionInfo":{"status":"ok","timestamp":1613855223311,"user_tz":-300,"elapsed":1136,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d5bb5163-e944-48e0-e509-6cbbebaf255d"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('labse') | Info: unique reference name for identification | Currently set to : labse\n",">>> pipe['sentiment_dl'] has settable params:\n","pipe['sentiment_dl'].setThreshold(0.6) | Info: The minimum threshold for the final result otheriwse it will be neutral | Currently set to : 0.6\n","pipe['sentiment_dl'].setThresholdLabel('neutral') | Info: In case the score is less than threshold, what should be the label. Default is neutral. | Currently set to : neutral\n","pipe['sentiment_dl'].setClasses(['positive', 'negative']) | Info: get the tags used to trained this SentimentDLModel | Currently set to : ['positive', 'negative']\n","pipe['sentiment_dl'].setStorageRef('labse') | Info: unique reference name for identification | Currently set to : labse\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/multi_lingual/multi_class_text_classification/NLU_training_multi_lingual_multi_class_text_classifier_demo.ipynb b/examples/colab/Training/multi_lingual/multi_class_text_classification/NLU_training_multi_lingual_multi_class_text_classifier_demo.ipynb new file mode 100644 index 00000000..5e68aa88 --- /dev/null +++ b/examples/colab/Training/multi_lingual/multi_class_text_classification/NLU_training_multi_lingual_multi_class_text_classifier_demo.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_lingual_multi_class_text_classifier_demo.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_lingual/multi_class_text_classification/NLU_training_multi_lingual_multi_class_text_classifier_demo.ipynb)\n","\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","You can achieve these results or even better on this dataset with training data :\n","\n","![image.png]()\n","\n","
\n","\n","\n","You can achieve these results or even better on this dataset with test data :\n","\n","
\n","\n","![image.png]()\n"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu > /dev/null pyspark==2.4.7\n","\n","import nlu"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download news classification dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1614340737448,"user_tz":-300,"elapsed":76356,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"76004150-a292-4e20-9e81-5010a1180f99"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/02/news_category_test_multi_lingual.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-02-26 11:57:45-- http://ckl-it.de/wp-content/uploads/2021/02/news_category_test_multi_lingual.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 1592801 (1.5M) [text/csv]\n","Saving to: ‘news_category_test_multi_lingual.csv’\n","\n","news_category_test_ 100%[===================>] 1.52M 1.40MB/s in 1.1s \n","\n","2021-02-26 11:57:47 (1.40 MB/s) - ‘news_category_test_multi_lingual.csv’ saved [1592801/1592801]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":419},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1614340738498,"user_tz":-300,"elapsed":77384,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"813c3dc3-554c-4d60-8691-366c67eae387"},"source":["import pandas as pd\n","test_path = '/content/news_category_test_multi_lingual.csv'\n","train_df = pd.read_csv(test_path)\n","from sklearn.model_selection import train_test_split\n","train_df, test_df = train_test_split(train_df, test_size=0.2)\n","train_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Unnamed: 0ytexttest_sentences
10311031Sci/TechAether Systems Inc., a Maryland wireless data...
65666566BusinessRenee McDonald remembers the Christmas trees o...
51225122WorldPersonal freedoms in Canada are being eroded b...
73127312WorldThe DUP was last night reconsidering its boyco...
12311231Sci/TechMaking games for the future consoles is going ...
...............
21892189BusinessThe Ontario Securities Commission is warning ...
15441544WorldA US military intelligence soldier in Iraq has...
25352535BusinessTroubled carrier US Airways has asked a US ban...
73837383SportsManchester City chairman John Wardle has not ...
19201920BusinessConsumer prices barely budged in August, sugg...
\n","

6080 rows × 4 columns

\n","
"],"text/plain":[" Unnamed: 0 ... test_sentences\n","1031 1031 ... \n","6566 6566 ... \n","5122 5122 ... \n","7312 7312 ... \n","1231 1231 ... \n","... ... ... ...\n","2189 2189 ... \n","1544 1544 ... \n","2535 2535 ... \n","7383 7383 ... \n","1920 1920 ... \n","\n","[6080 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","By default, the Universal Sentence Encoder Embeddings (USE) are beeing downloaded to provide embeddings for the classifier. You can use any of the 50+ other sentence Emeddings in NLU tough!\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"id":"3ZIPkRkWftBG","colab":{"base_uri":"https://localhost:8080/","height":875},"executionInfo":{"status":"ok","timestamp":1614342255869,"user_tz":-300,"elapsed":1594738,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4ecfe4e4-bef0-43dc-9a13-5845f4b4add8"},"source":["trainable_pipe = nlu.load('xx.embed_sentence.labse train.classifier')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['classifier_dl'].setMaxEpochs(60) \n","trainable_pipe['classifier_dl'].setLr(0.005) \n","fitted_pipe = trainable_pipe.fit(train_df.iloc[:1500])\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df.iloc[:1500],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","from sklearn.metrics import classification_report\n","print(classification_report(preds['y'], preds['category']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["labse download started this may take some time.\n","Approximate size to download 1.7 GB\n","[OK!]\n"," precision recall f1-score support\n","\n"," Business 0.90 0.92 0.91 378\n"," Sci/Tech 0.94 0.92 0.93 391\n"," Sports 0.95 0.99 0.97 352\n"," World 0.95 0.92 0.93 379\n","\n"," accuracy 0.93 1500\n"," macro avg 0.94 0.94 0.94 1500\n","weighted avg 0.93 0.93 0.93 1500\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorytest_sentencesdocumentyUnnamed: 0xx_embed_sentence_labse_embeddingscategory_confidencetext
origin_index
1031Sci/TechAether Systems Inc., a Maryland wireless data ...Sci/Tech1031[-0.018817514181137085, -0.018930165097117424,...0.992930Aether Systems Inc., a Maryland wireless data...
6566SportsRenee McDonald remembers the Christmas trees o...Business6566[-0.033650271594524384, 0.04151156544685364, 0...0.999996Renee McDonald remembers the Christmas trees o...
5122WorldPersonal freedoms in Canada are being eroded b...World5122[-0.00797173660248518, -0.05635803937911987, -...0.999978Personal freedoms in Canada are being eroded b...
7312WorldThe DUP was last night reconsidering its boyco...World7312[-0.04457780346274376, -0.013035906478762627, ...0.998248The DUP was last night reconsidering its boyco...
1231Sci/TechMaking games for the future consoles is going ...Sci/Tech1231[0.00020110986952204257, -0.04935045540332794,...1.000000Making games for the future consoles is going ...
...........................
1518SportsWhile Rafael Furcal #39;s DUI arrest on Friday...Sports1518[0.03824271634221077, -0.050272781401872635, 0...1.000000While Rafael Furcal #39;s DUI arrest on Friday...
2631SportsThe Redskins and Cowboys are underway from Fed...Sports2631[0.03182480111718178, -0.027464378625154495, -...1.000000The Redskins and Cowboys are underway from Fed...
809Sportsseeded Russian Elena Bovina won her first titl...Sports809[-0.017956998199224472, 0.05119836702942848, 0...1.000000seeded Russian Elena Bovina won her first titl...
3414Sci/TechMusician Brian Eno, who has been turning ideas...Sci/Tech3414[-0.0022161505185067654, -0.030591804534196854...0.999987Musician Brian Eno, who has been turning ideas...
7202Sci/TechHollywood movie powerhouse Walt Disney has tak...Sci/Tech7202[-0.05487370863556862, 0.003584994236007333, 0...0.998174Hollywood movie powerhouse Walt Disney has tak...
\n","

1500 rows × 8 columns

\n","
"],"text/plain":[" category ... text\n","origin_index ... \n","1031 Sci/Tech ... Aether Systems Inc., a Maryland wireless data...\n","6566 Sports ... Renee McDonald remembers the Christmas trees o...\n","5122 World ... Personal freedoms in Canada are being eroded b...\n","7312 World ... The DUP was last night reconsidering its boyco...\n","1231 Sci/Tech ... Making games for the future consoles is going ...\n","... ... ... ...\n","1518 Sports ... While Rafael Furcal #39;s DUI arrest on Friday...\n","2631 Sports ... The Redskins and Cowboys are underway from Fed...\n","809 Sports ... seeded Russian Elena Bovina won her first titl...\n","3414 Sci/Tech ... Musician Brian Eno, who has been turning ideas...\n","7202 Sci/Tech ... Hollywood movie powerhouse Walt Disney has tak...\n","\n","[1500 rows x 8 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"_1jxw3GnVGlI"},"source":["# 3.1 evaluate on Test Data"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Fxx4yNkNVGFl","executionInfo":{"status":"ok","timestamp":1614343276557,"user_tz":-300,"elapsed":1176,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"073cd72f-66cb-4e33-9852-013aa8bb2fec"},"source":["preds = fitted_pipe.predict(test_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," Business 0.79 0.81 0.80 364\n"," Sci/Tech 0.83 0.80 0.82 388\n"," Sports 0.93 0.95 0.94 392\n"," World 0.84 0.83 0.83 376\n","\n"," accuracy 0.85 1520\n"," macro avg 0.85 0.85 0.85 1520\n","weighted avg 0.85 0.85 0.85 1520\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"BD5OKO4Umc5U"},"source":["# 4. Test Model with 20 languages!"]},{"cell_type":"code","metadata":{"id":"OQ72hP9unML7","colab":{"base_uri":"https://localhost:8080/","height":793},"executionInfo":{"status":"ok","timestamp":1614187334235,"user_tz":-300,"elapsed":1774970,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d79bed47-2868-4a7b-b6d5-f44e3630ee3c"},"source":["train_df = pd.read_csv(\"news_category_test_multi_lingual.csv\")\n","preds = fitted_pipe.predict(train_df[[\"test_sentences\",\"y\"]].iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," Business 0.65 0.92 0.76 12\n"," Sci/Tech 0.94 0.78 0.85 37\n"," Sports 0.81 1.00 0.89 21\n"," World 0.96 0.83 0.89 30\n","\n"," accuracy 0.86 100\n"," macro avg 0.84 0.88 0.85 100\n","weighted avg 0.88 0.86 0.86 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textxx_embed_sentence_labse_embeddingscategory_confidenceydocumentcategory
origin_index
0టర్నర్ నెవాల్ వద్ద కార్మికులకు ప్రాతినిధ్యం వహ...[-0.05777165666222572, -0.011031209491193295, ...0.999994Businessటర్నర్ నెవాల్ వద్ద కార్మికులకు ప్రాతినిధ్యం వహ...Business
1Торонто, Канада # 36; 10 миллион Ансари X прем...[-0.039270948618650436, -0.05998489260673523, ...0.999874Sci/TechТоронто, Канада # 36; 10 миллион Ансари X прем...Sci/Tech
2Une société fondée par un chercheur en chimie ...[-0.047125183045864105, -0.025509396567940712,...0.999995Sci/TechUne société fondée par un chercheur en chimie ...Sci/Tech
3সবেমাত্র ভোর যখন মাইক ফিৎসপ্যাট্রিক রঙিন মানচি...[-0.04609032720327377, -0.05127093940973282, -...0.999889Sci/Techসবেমাত্র ভোর যখন মাইক ফিৎসপ্যাট্রিক রঙিন মানচি...Sci/Tech
4Көньяк Калифорниянең томанга каршы көрәш агент...[-0.02939729019999504, -0.040420372039079666, ...0.314981Sci/TechКөньяк Калифорниянең томанга каршы көрәш агент...Sports
.....................
95ఫుట్‌బాల్ అసోసియేషన్ ప్రతిష్టను దెబ్బతీసిన కుం...[0.02515975944697857, -0.026320775970816612, -...0.999963Sportsఫుట్‌బాల్ అసోసియేషన్ ప్రతిష్టను దెబ్బతీసిన కుం...Sports
96Hücumçu Emile Heskey, Çərşənbə # 39-un Çərşənb...[0.04458567127585411, 0.03187408298254013, -0....1.000000SportsHücumçu Emile Heskey, Çərşənbə # 39-un Çərşənb...Sports
97Staples Inc. & lt; A HREF = \"http://www.invest...[-0.016342557966709137, -0.004877099301666021,...0.999987BusinessStaples Inc. & lt; A HREF = \"http://www.invest...Business
98គណៈប្រតិភូនៃប្រទេសអ៊ីរ៉ាក់ត្រូវបានពន្យារពេលដោយ...[0.030007358640432358, -0.0027152197435498238,...0.999985Worldគណៈប្រតិភូនៃប្រទេសអ៊ីរ៉ាក់ត្រូវបានពន្យារពេលដោយ...World
99امریکی صارفین کی قیمتوں میں جولائی میں پہلی با...[-0.04715617746114731, -0.04999865964055061, -...0.999984Businessامریکی صارفین کی قیمتوں میں جولائی میں پہلی با...Business
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" text ... category\n","origin_index ... \n","0 టర్నర్ నెవాల్ వద్ద కార్మికులకు ప్రాతినిధ్యం వహ... ... Business\n","1 Торонто, Канада # 36; 10 миллион Ансари X прем... ... Sci/Tech\n","2 Une société fondée par un chercheur en chimie ... ... Sci/Tech\n","3 সবেমাত্র ভোর যখন মাইক ফিৎসপ্যাট্রিক রঙিন মানচি... ... Sci/Tech\n","4 Көньяк Калифорниянең томанга каршы көрәш агент... ... Sports\n","... ... ... ...\n","95 ఫుట్‌బాల్ అసోసియేషన్ ప్రతిష్టను దెబ్బతీసిన కుం... ... Sports\n","96 Hücumçu Emile Heskey, Çərşənbə # 39-un Çərşənb... ... Sports\n","97 Staples Inc. & lt; A HREF = \"http://www.invest... ... Business\n","98 គណៈប្រតិភូនៃប្រទេសអ៊ីរ៉ាក់ត្រូវបានពន្យារពេលដោយ... ... World\n","99 امریکی صارفین کی قیمتوں میں جولائی میں پہلی با... ... Business\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"RjtuNUcvuJTT"},"source":["# The Model understands Englsih\n","![en](https://www.worldometers.info/img/flags/small/tn_nz-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"o0vu7PaWkcI7","executionInfo":{"status":"ok","timestamp":1614343762090,"user_tz":-300,"elapsed":4257,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"568e618d-8116-4f8a-fd81-f6e9f76cb8f5"},"source":["fitted_pipe.predict(\"There have been a great increase in businesses over the last decade \")\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0BusinessThere have been a great increase in businesses...[0.012169226072728634, -0.002660397905856371, ...0.996130
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Business ... 0.996130\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"1ykjRQhCtQ4w","executionInfo":{"status":"ok","timestamp":1614343770849,"user_tz":-300,"elapsed":3981,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6b26a567-f1a3-4a35-fdf3-4740447a413a"},"source":["fitted_pipe.predict(\"Science has advanced rapidly over the last century \")\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Sci/TechScience has advanced rapidly over the last cen...[0.022739626467227936, -0.03467154502868652, -...0.999995
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Sci/Tech ... 0.999995\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"markdown","metadata":{"id":"vohym-XbuNHn"},"source":["# The Model understands German\n","![de](https://www.worldometers.info/img/flags/small/tn_gm-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"dzaaZrI4tVWc","executionInfo":{"status":"ok","timestamp":1614344150287,"user_tz":-300,"elapsed":3394,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b953df9a-d3b5-41d4-85cf-2eb9b7b0d6df"},"source":["# German for: 'Businesses are the best way of making profit'\n","fitted_pipe.predict(\"Unternehmen sind der beste Weg, um Gewinn zu erzielen\")\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0BusinessUnternehmen sind der beste Weg, um Gewinn zu e...[-0.048822492361068726, -0.0071628582663834095...0.999487
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Business ... 0.999487\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":49}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"BbhgTSBGtTtJ","executionInfo":{"status":"ok","timestamp":1614343796141,"user_tz":-300,"elapsed":4033,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"441a3ddf-5676-4d65-ced0-a256a8fb2f77"},"source":["# German for: 'Science has advanced rapidly over the last century'\n","fitted_pipe.predict(\"Die Wissenschaft hat im letzten Jahrhundert rasante Fortschritte gemacht \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Sci/TechDie Wissenschaft hat im letzten Jahrhundert ra...[0.035708025097846985, -0.04514779895544052, -...0.999999
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Sci/Tech ... 0.999999\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"markdown","metadata":{"id":"a1JbtmWquQwj"},"source":["# The Model understands Chinese\n","![zh](https://www.worldometers.info/img/flags/small/tn_ch-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"kYSYqtoRtc-P","executionInfo":{"status":"ok","timestamp":1614343808581,"user_tz":-300,"elapsed":4307,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2918ac56-9319-41cf-9a65-258533a278ff"},"source":["# Chinese for: 'There have been a great increase in businesses over the last decade'\n","fitted_pipe.predict(\"在过去的十年中,业务有了很大的增长 \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Business在过去的十年中,业务有了很大的增长[0.007143490947782993, -0.003197029000148177, ...0.574210
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Business ... 0.574210\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"06v9SD-QtlBU","executionInfo":{"status":"ok","timestamp":1614343818476,"user_tz":-300,"elapsed":4201,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"74bf506f-9589-47dd-9570-5fde4f3055ec"},"source":["# Chinese for: 'Science has advanced rapidly over the last century'\n","fitted_pipe.predict(\"在上个世纪,科学发展迅速 \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Sci/Tech在上个世纪,科学发展迅速[0.01899210549890995, -0.05363348498940468, -0...0.999645
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Sci/Tech ... 0.999645\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":13}]},{"cell_type":"markdown","metadata":{"id":"9h7CvN4uu9Pb"},"source":["# Model understands Afrikaans\n","\n","![af](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)\n","\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"VMPhbgw9twtf","executionInfo":{"status":"ok","timestamp":1614343823492,"user_tz":-300,"elapsed":3950,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"0f60a3a8-8160-4881-881f-5bedb2f657c8"},"source":["# Afrikaans for: 'There have been a great increase in businesses over the last decade'\n","fitted_pipe.predict(\"Daar het die afgelope dekade 'n groot toename in besighede plaasgevind \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0BusinessDaar het die afgelope dekade 'n groot toename ...[0.028091464191675186, -0.016515646129846573, ...0.929858
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Business ... 0.929858\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":14}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"zWgNTIdkumhX","executionInfo":{"status":"ok","timestamp":1614343830946,"user_tz":-300,"elapsed":3528,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6c75c15c-759d-4e70-a73a-ebc0ec0a6bbc"},"source":["# Afrikaans for: 'Science has advanced rapidly over the last century'\n","fitted_pipe.predict(\"Die wetenskap het die afgelope eeu vinnig gevorder \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Sci/TechDie wetenskap het die afgelope eeu vinnig gevo...[0.02647087350487709, -0.04339253529906273, -0...0.999996
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Sci/Tech ... 0.999996\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":15}]},{"cell_type":"markdown","metadata":{"id":"rSEPkC-Bwnpg"},"source":["# The model understands Vietnamese\n","![vi](https://www.worldometers.info/img/flags/small/tn_vm-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"7ksJosuTOYpE","executionInfo":{"status":"ok","timestamp":1614343837756,"user_tz":-300,"elapsed":4195,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a8f10ff8-3342-4e9f-abfa-b335b2b432d4"},"source":["# Vietnamese for: 'There have been a great increase in businesses over the last decade'\n","fitted_pipe.predict(\"Đã có sự gia tăng đáng kể trong các doanh nghiệp trong thập kỷ qua \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0BusinessĐã có sự gia tăng đáng kể trong các doanh nghi...[0.002593805780634284, -0.03647594526410103, -...0.979494
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Business ... 0.979494\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":16}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"VfG3UaCTEZB_","executionInfo":{"status":"ok","timestamp":1614343844799,"user_tz":-300,"elapsed":3810,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d0047d6e-27a8-4933-ffff-3fe9f4670c92"},"source":["# Vietnamese for: 'Science has advanced rapidly over the last century'\n","fitted_pipe.predict(\"Khoa học đã phát triển nhanh chóng trong thế kỷ qua \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Sci/TechKhoa học đã phát triển nhanh chóng trong thế k...[0.006926446221768856, -0.0595879964530468, -0...0.999535
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Sci/Tech ... 0.999535\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":17}]},{"cell_type":"markdown","metadata":{"id":"IlkmAaMoxTuy"},"source":["# The model understands Japanese\n","![ja](https://www.worldometers.info/img/flags/small/tn_ja-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"1IfJu3q8wwUt","executionInfo":{"status":"ok","timestamp":1614344208848,"user_tz":-300,"elapsed":3931,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d15d595a-1bb8-4bb8-e191-6a5fa6c76804"},"source":["# Japanese for: 'Businesses are the best way of making profit'\n","fitted_pipe.predict(\"ビジネスは利益を上げるための最良の方法です\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Businessビジネスは利益を上げるための最良の方法です[-0.029112381860613823, -0.022607827559113503,...0.683041
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Business ... 0.683041\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":50}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"-RjXWbFIPvIs","executionInfo":{"status":"ok","timestamp":1614343862887,"user_tz":-300,"elapsed":3656,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"766defa2-eb2a-4bec-afdd-aa22a45f8f89"},"source":["# Japanese for: 'Science has advanced rapidly over the last century'\n","fitted_pipe.predict(\"科学は前世紀にわたって急速に進歩しました \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Sci/Tech科学は前世紀にわたって急速に進歩しました[0.01969727873802185, -0.04392292723059654, -0...0.999981
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Sci/Tech ... 0.999981\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":19}]},{"cell_type":"markdown","metadata":{"id":"GITfT7FK0CGv"},"source":["# The model understands Zulu\n","![zu](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"ifRhs6e7OcR3","executionInfo":{"status":"ok","timestamp":1614343872273,"user_tz":-300,"elapsed":3506,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"47a28575-8ea7-4251-9d72-23a96e71dda3"},"source":["# Zulu for: 'There have been a great increase in businesses over the last decade'\n","fitted_pipe.predict(\"Kube nokwanda okukhulu emabhizinisini kule minyaka eyishumi edlule \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0BusinessKube nokwanda okukhulu emabhizinisini kule min...[0.011455212719738483, -0.019759127870202065, ...0.998235
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Business ... 0.998235\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":20}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"6uelDwq4xdWv","executionInfo":{"status":"ok","timestamp":1614343880558,"user_tz":-300,"elapsed":3702,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a69e5a50-585f-4a5f-e599-e5e97b4c4270"},"source":["# Zulu for: 'Science has advanced rapidly over the last century'\n","fitted_pipe.predict(\"Isayensi ithuthuke ngokushesha ngekhulu leminyaka elidlule \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Sci/TechIsayensi ithuthuke ngokushesha ngekhulu leminy...[0.033070385456085205, -0.04442666471004486, -...0.999992
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Sci/Tech ... 0.999992\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":21}]},{"cell_type":"markdown","metadata":{"id":"VGVvzl_30a0T"},"source":["# The Model understands Turkish\n","![tr](https://www.worldometers.info/img/flags/small/tn_tu-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"DRNnuEeQz2pd","executionInfo":{"status":"ok","timestamp":1614344234044,"user_tz":-300,"elapsed":3703,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6d7cb27f-8d75-45f4-acac-0f8a131da284"},"source":["# Turkish for: 'Businesses are the best way of making profit'\n","fitted_pipe.predict(\"İşletmeler kar elde etmenin en iyi yoludur \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Businessİşletmeler kar elde etmenin en iyi yoludur[-0.023345213383436203, 0.0005469206953421235,...0.997994
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Business ... 0.997994\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":51}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"aOSsiK6J0jWs","executionInfo":{"status":"ok","timestamp":1614343901968,"user_tz":-300,"elapsed":4356,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"72de7e46-ed44-43a4-cb8c-df30acc68210"},"source":["# Turkish for: 'Science has advanced rapidly over the last century'\n","fitted_pipe.predict(\"Bilim, geçen yüzyılda hızla ilerledi \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Sci/TechBilim, geçen yüzyılda hızla ilerledi[0.01670285314321518, -0.050043221563100815, -...0.999998
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Sci/Tech ... 0.999998\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":23}]},{"cell_type":"markdown","metadata":{"id":"803qL2gt0vlb"},"source":["# The Model understands Hebrew\n","![he](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"XQ5VCtxw0pc0","executionInfo":{"status":"ok","timestamp":1614343908732,"user_tz":-300,"elapsed":4167,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2359b0fa-ad0c-49fb-8141-42b607007449"},"source":["# Hebrew for: 'There have been a great increase in businesses over the last decade'\n","fitted_pipe.predict(\"חלה עלייה גדולה בעסקים בעשור האחרון \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Businessחלה עלייה גדולה בעסקים בעשור האחרון[0.030628306791186333, -0.022280622273683548, ...0.999873
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Business ... 0.999873\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":24}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"9w2ZHfns05A4","executionInfo":{"status":"ok","timestamp":1614343914907,"user_tz":-300,"elapsed":5181,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f31d38fb-3797-48be-f49c-d1558b04f9fa"},"source":["# Hebrew for: 'Science has advanced rapidly over the last century'\n","fitted_pipe.predict(\"המדע התקדם במהירות במהלך המאה האחרונה \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Sci/Techהמדע התקדם במהירות במהלך המאה האחרונה[-0.0030932428780943155, -0.05540185421705246,...0.999986
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Sci/Tech ... 0.999986\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":25}]},{"cell_type":"markdown","metadata":{"id":"SDlpd33H1HIX"},"source":["# The Model understands Telugu\n","![te](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"Kc5n1bzv1BJT","executionInfo":{"status":"ok","timestamp":1614343923847,"user_tz":-300,"elapsed":5307,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e2b93d0b-d0e4-4d6b-e731-af26f3fa31dd"},"source":["# Telugu for: 'There have been a great increase in businesses over the last decade'\n","fitted_pipe.predict(\"గత దశాబ్దంలో వ్యాపారాలలో గొప్ప పెరుగుదల ఉంది \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Businessగత దశాబ్దంలో వ్యాపారాలలో గొప్ప పెరుగుదల ఉంది[0.00526750274002552, -0.022807607427239418, -...0.999657
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Business ... 0.999657\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":26}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"-l-u6vrz1Obe","executionInfo":{"status":"ok","timestamp":1614343944661,"user_tz":-300,"elapsed":5025,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a01f802f-0260-4bc5-cebd-b47ff1cd326d"},"source":["# Telugu for: 'Science has advanced rapidly over the last century'\n","fitted_pipe.predict(\"గత శతాబ్దంలో సైన్స్ వేగంగా అభివృద్ధి చెందింది \")\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Sci/Techగత శతాబ్దంలో సైన్స్ వేగంగా అభివృద్ధి చెందింది[-0.015292842872440815, -0.03326159343123436, ...0.998944
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Sci/Tech ... 0.998944\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":27}]},{"cell_type":"markdown","metadata":{"id":"nziBUe8t1Zwn"},"source":["# Model understands Russian\n","![ru](https://www.worldometers.info/img/flags/small/tn_rs-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"Ckyjl3YQ1VFn","executionInfo":{"status":"ok","timestamp":1614344256517,"user_tz":-300,"elapsed":3363,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"57adae43-f900-4030-86af-d3f483e76bae"},"source":["# Russian for: 'Businesses are the best way of making profit'\n","fitted_pipe.predict(\"Бизнес - лучший способ получения прибыли\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0BusinessБизнес - лучший способ получения прибыли[-0.016974015161395073, -0.024397598579525948,...0.999988
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Business ... 0.999988\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":52}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"GIdWkfGv1gFz","executionInfo":{"status":"ok","timestamp":1614343955256,"user_tz":-300,"elapsed":3471,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4b46c5b2-c989-4f67-f762-da6f47cc907d"},"source":["# Russian for: 'Science has advanced rapidly over the last century'\n","fitted_pipe.predict(\"Наука стремительно развивалась за последнее столетие \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Sci/TechНаука стремительно развивалась за последнее ст...[0.0139895835891366, -0.04563469812273979, -0....0.999999
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Sci/Tech ... 0.999999\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":29}]},{"cell_type":"markdown","metadata":{"id":"8R1j9mwz2Cm4"},"source":["# Model understands Urdu\n","![ur](https://www.worldometers.info/img/flags/small/tn_pk-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"j4zwvRV11pcG","executionInfo":{"status":"ok","timestamp":1614343961759,"user_tz":-300,"elapsed":3567,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"41b47ae4-218b-4ef2-a8fe-711c58b5ae78"},"source":["# Urdu for: 'There have been a great increase in businesses over the last decade'\n","fitted_pipe.predict(\"پچھلے ایک دہائی کے دوران کاروباروں میں زبردست اضافہ ہوا ہے \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Businessپچھلے ایک دہائی کے دوران کاروباروں میں زبردست ...[-0.004565550480037928, -0.008193295449018478,...0.999968
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Business ... 0.999968\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":30}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"SxzTuK4b2UKV","executionInfo":{"status":"ok","timestamp":1614343969034,"user_tz":-300,"elapsed":3719,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8c139479-3960-4819-ca7f-78b91491be97"},"source":["# Urdu for: 'Science has advanced rapidly over the last century'\n","fitted_pipe.predict(\"سائنس گذشتہ صدی کے دوران تیزی سے ترقی کرچکی ہے \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Sci/Techسائنس گذشتہ صدی کے دوران تیزی سے ترقی کرچکی ہے[-0.013339908793568611, -0.0262106005102396, -...0.999644
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Sci/Tech ... 0.999644\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":31}]},{"cell_type":"markdown","metadata":{"id":"RoNg-C3k1qcX"},"source":["# Model understands Hindi\n","![hi](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"QZ9RT5Wv1r1n","executionInfo":{"status":"ok","timestamp":1614343977885,"user_tz":-300,"elapsed":3502,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"9f4fc14d-24ae-46e5-c045-071984c4152a"},"source":["# hindi for: 'There have been a great increase in businesses over the last decade'\n","fitted_pipe.predict(\"पिछले दशक में व्यवसायों में बहुत वृद्धि हुई है \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Businessपिछले दशक में व्यवसायों में बहुत वृद्धि हुई है[-0.003939660266041756, -0.0293721966445446, -...0.938786
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Business ... 0.938786\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":32}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"quM-IL2i12-B","executionInfo":{"status":"ok","timestamp":1614343982199,"user_tz":-300,"elapsed":3446,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"03c51089-8220-4421-ef55-22ab9b131816"},"source":["\t\t\n","# hindi for: 'Science has advanced rapidly over the last century'\n","fitted_pipe.predict(\"विज्ञान पिछली सदी में तेजी से आगे बढ़ा है \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Sci/Techविज्ञान पिछली सदी में तेजी से आगे बढ़ा है[-0.0006327364826574922, -0.047755494713783264...0.999998
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Sci/Tech ... 0.999998\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":33}]},{"cell_type":"markdown","metadata":{"id":"R4ByHOZn35Lc"},"source":["# The model understands Tartar\n","![tt](https://www.worldometers.info/img/flags/small/tn_rs-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"2JrzusSQ18F5","executionInfo":{"status":"ok","timestamp":1614343989723,"user_tz":-300,"elapsed":3402,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"44954405-7a54-4f42-db5f-33c2a314334f"},"source":["# Tartar for: 'There have been a great increase in businesses over the last decade'\n","fitted_pipe.predict(\"Соңгы ун елда бизнеста зур үсеш булды \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0BusinessСоңгы ун елда бизнеста зур үсеш булды[0.023730691522359848, -0.02879856713116169, -...0.867039
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Business ... 0.867039\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":34}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"J06Xm_Ln4AYu","executionInfo":{"status":"ok","timestamp":1614343995715,"user_tz":-300,"elapsed":3713,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"db26c9df-2a1a-443c-d03d-40818303b9e1"},"source":["# Tartar for: 'Science has advanced rapidly over the last century'\n","fitted_pipe.predict(\"Соңгы гасырда фән тиз үсә \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Sci/TechСоңгы гасырда фән тиз үсә[0.02118440717458725, -0.046850722283124924, -...0.999993
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Sci/Tech ... 0.999993\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":35}]},{"cell_type":"markdown","metadata":{"id":"HKj5yWwwMplH"},"source":["# The Model understands French\n","![fr](https://www.worldometers.info/img/flags/small/tn_fr-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"CUHcJZfJMplL","executionInfo":{"status":"ok","timestamp":1614344002345,"user_tz":-300,"elapsed":3760,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ed229c88-b7f3-4160-d8c9-625b1220a8fe"},"source":["# French for: 'There have been a great increase in businesses over the last decade'\n","fitted_pipe.predict(\"Il y a eu une forte augmentation des entreprises au cours de la dernière décennie \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0BusinessIl y a eu une forte augmentation des entrepris...[0.007794329896569252, -0.012789410538971424, ...0.999476
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Business ... 0.999476\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":36}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"57NY2XoTMplM","executionInfo":{"status":"ok","timestamp":1614344007052,"user_tz":-300,"elapsed":3359,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d26c8d1c-af14-497c-f8c4-49c823917179"},"source":["# French for: 'Science has advanced rapidly over the last century'\n","fitted_pipe.predict(\"La science a progressé rapidement au cours du siècle dernier \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Sci/TechLa science a progressé rapidement au cours du ...[0.01239328645169735, -0.04608025401830673, -0...0.999996
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Sci/Tech ... 0.999996\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":37}]},{"cell_type":"markdown","metadata":{"id":"jD2TBgT0Nq6F"},"source":["# The Model understands Thai\n","![th](https://www.worldometers.info/img/flags/small/tn_th-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"gBp11S5GNq6S","executionInfo":{"status":"ok","timestamp":1614344013213,"user_tz":-300,"elapsed":3752,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ff11eb24-684b-4220-c781-78cf4512eeb8"},"source":["\t\t\n","# Thai for: 'There have been a great increase in businesses over the last decade'\n","fitted_pipe.predict(\"มีธุรกิจเพิ่มขึ้นอย่างมากในช่วงทศวรรษที่ผ่านมา \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Businessมีธุรกิจเพิ่มขึ้นอย่างมากในช่วงทศวรรษที่ผ่านมา[0.008413508534431458, -0.024852056056261063, ...0.583319
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Business ... 0.583319\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":38}]},{"cell_type":"code","metadata":{"id":"R6nKI7C3QKa3","colab":{"base_uri":"https://localhost:8080/","height":106},"executionInfo":{"status":"ok","timestamp":1614344018215,"user_tz":-300,"elapsed":3606,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8db8c572-5600-4e76-e599-87dd82181e9d"},"source":["# Thai for: 'Science has advanced rapidly over the last century'\n","fitted_pipe.predict(\"วิทยาศาสตร์ก้าวหน้าอย่างรวดเร็วในช่วงศตวรรษที่ผ่านมา \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Sci/Techวิทยาศาสตร์ก้าวหน้าอย่างรวดเร็วในช่วงศตวรรษที่...[0.007343577221035957, -0.04965793341398239, -...0.999865
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Sci/Tech ... 0.999865\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":39}]},{"cell_type":"markdown","metadata":{"id":"mLItI4KZOElB"},"source":["# The Model understands Khmer\n","![km](https://www.worldometers.info/img/flags/small/tn_cb-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"SWbqMgAwOElC","executionInfo":{"status":"ok","timestamp":1614344025699,"user_tz":-300,"elapsed":4221,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a1615359-750b-423e-b547-8ba75edf98c4"},"source":["# Khmer for: 'There have been a great increase in businesses over the last decade'\n","fitted_pipe.predict(\"មានការរីកចម្រើនយ៉ាងខ្លាំងនៅក្នុងអាជីវកម្មក្នុងរយៈពេលមួយទសវត្សចុងក្រោយនេះ \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Businessមានការរីកចម្រើនយ៉ាងខ្លាំងនៅក្នុងអាជីវកម្មក្នុង...[0.02500440925359726, -0.037305932492017746, -...0.772556
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Business ... 0.772556\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":40}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"beoCtm4xQf2P","executionInfo":{"status":"ok","timestamp":1614344044938,"user_tz":-300,"elapsed":4767,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"09bf5b1f-b2b9-4d22-901e-ac778c717c9e"},"source":["\t\t\n","# Khmer for: 'Science has advanced rapidly over the last century'\n","fitted_pipe.predict(\"វិទ្យាសាស្ត្របានជឿនលឿនយ៉ាងលឿនក្នុងរយៈពេលមួយសតវត្សចុងក្រោយនេះ \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Sci/Techវិទ្យាសាស្ត្របានជឿនលឿនយ៉ាងលឿនក្នុងរយៈពេលមួយសតវ...[0.008467243984341621, -0.05188147351145744, -...0.999984
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Sci/Tech ... 0.999984\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":42}]},{"cell_type":"markdown","metadata":{"id":"lvE-LbNiPoBT"},"source":["# The Model understands Yiddish\n","![yi](https://www.worldometers.info/img/flags/small/tn_pl-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"sZlmLhajPoBb","executionInfo":{"status":"ok","timestamp":1614344041811,"user_tz":-300,"elapsed":3640,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a5e70a7b-2447-4d67-eb99-76c768b112f8"},"source":["\n","# Yiddish for: 'There have been a great increase in businesses over the last decade'\n","fitted_pipe.predict(\"די לעצטע יאָרצענדלינג איז געווען אַ גרויס פאַרגרעסערן אין געשעפטן \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Businessדי לעצטע יאָרצענדלינג איז געווען אַ גרויס פאַר...[0.0017607753397896886, -0.03173191845417023, ...0.998951
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Business ... 0.998951\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":41}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"5h-pha_nPoBc","executionInfo":{"status":"ok","timestamp":1614344052015,"user_tz":-300,"elapsed":3453,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"62e63271-c366-4537-eb86-0480cf4ced09"},"source":["# Yiddish for: 'Science has advanced rapidly over the last century'\n","fitted_pipe.predict(\"וויסנשאַפֿט איז ראַפּאַדלי אַוואַנסירטע איבער די לעצטע יאָרהונדערט \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Sci/Techוויסנשאַפֿט איז ראַפּאַדלי אַוואַנסירטע איבער ...[-0.020669342949986458, -0.05547677353024483, ...0.999996
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Sci/Tech ... 0.999996\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":43}]},{"cell_type":"markdown","metadata":{"id":"XSz4WzScaAHj"},"source":["# The Model understands Kygrgyz\n","![ky](https://www.worldometers.info/img/flags/small/tn_kg-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"DXz6fhJSaAHu","executionInfo":{"status":"ok","timestamp":1614344284942,"user_tz":-300,"elapsed":3274,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"41b17b0d-4da6-4258-bb66-2ad52ee2e42c"},"source":["# Kygrgyz for: 'Businesses are the best way of making profit'\n","fitted_pipe.predict(\"Бизнес - бул киреше табуунун эң мыкты жолу \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0BusinessБизнес - бул киреше табуунун эң мыкты жолу[-0.028402332216501236, -0.02759084478020668, ...0.994321
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Business ... 0.994321\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":53}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"lh_ZSHlPaAHv","executionInfo":{"status":"ok","timestamp":1614344066211,"user_tz":-300,"elapsed":4652,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"445c9691-1718-43f5-9972-d8125a581694"},"source":["# Kygrgyz for: 'Science has advanced rapidly over the last century'\n","fitted_pipe.predict(\"Илим акыркы кылымда тездик менен өнүккөн \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Sci/TechИлим акыркы кылымда тездик менен өнүккөн[0.025420300662517548, -0.044107213616371155, ...0.999996
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Sci/Tech ... 0.999996\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":45}]},{"cell_type":"markdown","metadata":{"id":"DGMVMKaTdJFj"},"source":["# The Model understands Tamil\n","![ta](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"JWDr_LoCdJFn","executionInfo":{"status":"ok","timestamp":1614344073072,"user_tz":-300,"elapsed":3530,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"802d85b9-262d-4567-fee3-58419f72f3ec"},"source":["# Tamil for: 'There have been a great increase in businesses over the last decade'\n","fitted_pipe.predict(\"கடந்த தசாப்தத்தில் வணிகங்களில் பெரும் அதிகரிப்பு ஏற்பட்டுள்ளது \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Businessகடந்த தசாப்தத்தில் வணிகங்களில் பெரும் அதிகரிப்...[0.0057315402664244175, -0.030773166567087173,...0.999775
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Business ... 0.999775\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":46}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"Q6C0BmTtdJFp","executionInfo":{"status":"ok","timestamp":1614344077686,"user_tz":-300,"elapsed":3345,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"63ba0cfd-f20f-4ee2-f4c2-51397b726fb3"},"source":["\t\t\n","# Tamil for: 'Science has advanced rapidly over the last century'\n","fitted_pipe.predict(\"கடந்த நூற்றாண்டில் அறிவியல் வேகமாக முன்னேறியுள்ளது \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorydocumentxx_embed_sentence_labse_embeddingscategory_confidence
origin_index
0Sci/Techகடந்த நூற்றாண்டில் அறிவியல் வேகமாக முன்னேறியுள...[0.009729371406137943, -0.04586023837327957, -...0.999998
\n","
"],"text/plain":[" category ... category_confidence\n","origin_index ... \n","0 Sci/Tech ... 0.999998\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":47}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":110},"executionInfo":{"status":"ok","timestamp":1613867457298,"user_tz":-300,"elapsed":102453,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"552a5445-e5fe-49bf-fcec-b8f6beeb0cb1"},"source":["stored_model_path = './models/classifier_dl_trained'\n","hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('Tesla plans to invest 10M into the ML sector')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
classifier_confidencexx_embed_sentence_labse_embeddingsdocumentclassifier
origin_index
00.919902[0.0207071490585804, -0.03154003247618675, 0.0...Tesla plans to invest 10M into the ML sectorSci/Tech
\n","
"],"text/plain":[" classifier_confidence ... classifier\n","origin_index ... \n","0 0.919902 ... Sci/Tech\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1613867457299,"user_tz":-300,"elapsed":102225,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e567e913-9de3-4165-da9f-757da3d96c6a"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('labse') | Info: unique reference name for identification | Currently set to : labse\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['World', 'Sci/Tech', 'Sports', 'Business']) | Info: get the tags used to trained this ClassifierDLModel | Currently set to : ['World', 'Sci/Tech', 'Sports', 'Business']\n","pipe['classifier_dl'].setStorageRef('labse') | Info: unique reference name for identification | Currently set to : labse\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/multi_lingual/multi_class_text_classification/NLU_training_multi_lingual_multi_class_text_classifier_demo_amazon.ipynb b/examples/colab/Training/multi_lingual/multi_class_text_classification/NLU_training_multi_lingual_multi_class_text_classifier_demo_amazon.ipynb new file mode 100644 index 00000000..75ede144 --- /dev/null +++ b/examples/colab/Training/multi_lingual/multi_class_text_classification/NLU_training_multi_lingual_multi_class_text_classifier_demo_amazon.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_lingual_multi_class_text_classifier_demo_amazon.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_lingual/multi_class_text_classification/NLU_training_multi_lingual_multi_class_text_classifier_demo_amazon.ipynb)\n","\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","## 3 class Amazon Phone review classifier training]\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","\n","You can achieve these results or even better on this dataset with training data :\n","\n","
\n","\n","\n","![image.png]()\n","\n","\n","\n","You can achieve these results or even better on this dataset with test data :\n","\n","
\n","\n","![image.png]()"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"id":"hFGnBCHavltY","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614335654870,"user_tz":-300,"elapsed":89896,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2ec596cc-2ee6-416e-8362-a517bf39db2c"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install pyspark==2.4.7 \n","! pip install nlu > /dev/null \n","\n","\n","\n","import nlu"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Collecting pyspark==2.4.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e2/06/29f80e5a464033432eedf89924e7aa6ebbc47ce4dcd956853a73627f2c07/pyspark-2.4.7.tar.gz (217.9MB)\n","\u001b[K |████████████████████████████████| 217.9MB 65kB/s \n","\u001b[?25hCollecting py4j==0.10.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e3/53/c737818eb9a7dc32a7cd4f1396e787bd94200c3997c72c1dbe028587bd76/py4j-0.10.7-py2.py3-none-any.whl (197kB)\n","\u001b[K |████████████████████████████████| 204kB 17.4MB/s \n","\u001b[?25hBuilding wheels for collected packages: pyspark\n"," Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for pyspark: filename=pyspark-2.4.7-py2.py3-none-any.whl size=218279465 sha256=0b2e93a0248005e8642f90e550b22eda27ab90e4b7c2bd16beeab6f4c8eb0550\n"," Stored in directory: /root/.cache/pip/wheels/34/1f/2e/1e7460f80acf26b08dbb8c53d7ff9e07146f2a68dd5c732be5\n","Successfully built pyspark\n","Installing collected packages: py4j, pyspark\n","Successfully installed py4j-0.10.7 pyspark-2.4.7\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download Amazon Unlocked mobile phones dataset \n","https://www.kaggle.com/PromptCloudHQ/amazon-reviews-unlocked-mobile-phones\n","\n","dataset with unlocked mobile phone reviews in 5 review classes\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1614335656339,"user_tz":-300,"elapsed":91301,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"065fc929-13eb-440a-d783-5f05da20a258"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/02/Amazon_Unlocked_Mobile_multi_lingual.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-02-26 10:33:04-- http://ckl-it.de/wp-content/uploads/2021/02/Amazon_Unlocked_Mobile_multi_lingual.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 511871 (500K) [text/csv]\n","Saving to: ‘Amazon_Unlocked_Mobile_multi_lingual.csv’\n","\n","Amazon_Unlocked_Mob 100%[===================>] 499.87K 622KB/s in 0.8s \n","\n","2021-02-26 10:33:05 (622 KB/s) - ‘Amazon_Unlocked_Mobile_multi_lingual.csv’ saved [511871/511871]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":399},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1614335656356,"user_tz":-300,"elapsed":91252,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"852da78e-7ac3-4131-e77c-5e1514f54f33"},"source":["import pandas as pd\n","test_path = '/content/Amazon_Unlocked_Mobile_multi_lingual.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","from sklearn.model_selection import train_test_split\n","train_df, test_df = train_test_split(train_df, test_size=0.2)\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytext
937goodGood phone, good qualty, the only that can bet...
697averagePhone was unlocked, but didn't come in a box a...
1014averageThe camera on this phone was foggy from the fi...
308goodLike phone its nice for price sometimes it get...
971goodLove it.
.........
48goodGave the phone as a birthday gift. My friend s...
1174averageNo internet
1422poorI bought this phone to give as a gift to a fri...
873poorPhone would not turn on after charge all night
1032averageGood
\n","

1200 rows × 2 columns

\n","
"],"text/plain":[" y text\n","937 good Good phone, good qualty, the only that can bet...\n","697 average Phone was unlocked, but didn't come in a box a...\n","1014 average The camera on this phone was foggy from the fi...\n","308 good Like phone its nice for price sometimes it get...\n","971 good Love it.\n","... ... ...\n","48 good Gave the phone as a birthday gift. My friend s...\n","1174 average No internet\n","1422 poor I bought this phone to give as a gift to a fri...\n","873 poor Phone would not turn on after charge all night\n","1032 average Good\n","\n","[1200 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":828},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1614337076613,"user_tz":-300,"elapsed":1511438,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1ed7e2f5-94d2-42fb-9390-775c2ec5024a"},"source":["trainable_pipe = nlu.load('xx.embed_sentence.labse train.classifier')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['classifier_dl'].setMaxEpochs(60) \n","trainable_pipe['classifier_dl'].setLr(0.005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["labse download started this may take some time.\n","Approximate size to download 1.7 GB\n","[OK!]\n"," precision recall f1-score support\n","\n"," average 0.87 0.78 0.82 399\n"," good 0.86 0.92 0.89 404\n"," poor 0.90 0.93 0.91 397\n","\n"," accuracy 0.88 1200\n"," macro avg 0.88 0.88 0.87 1200\n","weighted avg 0.88 0.88 0.87 1200\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidenceydocumentxx_embed_sentence_labse_embeddingstext
origin_index
937good0.997852goodGood phone, good qualty, the only that can bet...[-0.04583406820893288, -0.02492734231054783, -...Good phone, good qualty, the only that can bet...
697average0.994527averagePhone was unlocked, but didn't come in a box a...[-0.05288608372211456, 0.06084609776735306, -0...Phone was unlocked, but didn't come in a box a...
1014poor0.993759averageThe camera on this phone was foggy from the fi...[-0.007337542716413736, 0.06280891597270966, -...The camera on this phone was foggy from the fi...
308average0.992863goodLike phone its nice for price sometimes it get...[-0.05627260357141495, 0.020682260394096375, -...Like phone its nice for price sometimes it get...
971good1.000000goodLove it.[-0.008187997154891491, -0.06332288682460785, ...Love it.
.....................
48good0.999999goodGave the phone as a birthday gift. My friend s...[0.02145097777247429, 0.023429783061146736, -0...Gave the phone as a birthday gift. My friend s...
1174poor0.999460averageNo internet[-0.03171629458665848, -0.048338472843170166, ...No internet
1422poor0.999944poorI bought this phone to give as a gift to a fri...[-0.05380842089653015, 0.04326845705509186, -0...I bought this phone to give as a gift to a fri...
873poor0.999999poorPhone would not turn on after charge all night[-0.05948413163423538, -0.0034415803384035826,...Phone would not turn on after charge all night
1032good1.000000averageGood[-0.005856278818100691, 0.01673833839595318, -...Good
\n","

1200 rows × 6 columns

\n","
"],"text/plain":[" category ... text\n","origin_index ... \n","937 good ... Good phone, good qualty, the only that can bet...\n","697 average ... Phone was unlocked, but didn't come in a box a...\n","1014 poor ... The camera on this phone was foggy from the fi...\n","308 average ... Like phone its nice for price sometimes it get...\n","971 good ... Love it.\n","... ... ... ...\n","48 good ... Gave the phone as a birthday gift. My friend s...\n","1174 poor ... No internet\n","1422 poor ... I bought this phone to give as a gift to a fri...\n","873 poor ... Phone would not turn on after charge all night\n","1032 good ... Good\n","\n","[1200 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"markdown","metadata":{"id":"_1jxw3GnVGlI"},"source":["# 3.1 evaluate on Test Data"]},{"cell_type":"code","metadata":{"id":"Fxx4yNkNVGFl","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614337408914,"user_tz":-300,"elapsed":914,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e21dba8c-dcb8-42a7-eed3-4f3642ffbf0c"},"source":["preds = fitted_pipe.predict(test_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," average 0.82 0.74 0.78 101\n"," good 0.81 0.85 0.83 96\n"," poor 0.86 0.90 0.88 103\n","\n"," accuracy 0.83 300\n"," macro avg 0.83 0.83 0.83 300\n","weighted avg 0.83 0.83 0.83 300\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"yUkk_L8MGcRg"},"source":["#4. Test Model with 20 languages!"]},{"cell_type":"code","metadata":{"id":"q2s6nsZZGcRm","colab":{"base_uri":"https://localhost:8080/","height":606},"executionInfo":{"status":"ok","timestamp":1614183101782,"user_tz":-300,"elapsed":1744726,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3a03154b-7093-4132-b8cf-4ae51ebc6b5d"},"source":["train_df = pd.read_csv(\"Amazon_Unlocked_Mobile_multi_lingual.csv\")\n","preds = fitted_pipe.predict(train_df[[\"test_sentences\",\"y\"]].iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," average 0.63 0.76 0.69 29\n"," good 0.88 0.88 0.88 32\n"," poor 0.88 0.74 0.81 39\n","\n"," accuracy 0.79 100\n"," macro avg 0.79 0.79 0.79 100\n","weighted avg 0.81 0.79 0.79 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategoryxx_embed_sentence_labse_embeddingsdocumentytext
origin_index
00.999397poor[0.023933328688144684, 0.03197603300213814, -0...Alındı, onu yandırdı, işləmədi. Geri açıldı, b...poorAlındı, onu yandırdı, işləmədi. Geri açıldı, b...
10.999097average[-0.019562775269150734, -0.036460112780332565,...דערווייַל עס איז 2014, די 3gs איז דיסקאַנטיניו...averageדערווייַל עס איז 2014, די 3gs איז דיסקאַנטיניו...
20.999882good[0.02229706011712551, -0.03892051428556442, -0...100% recommendedgood100% recommended
30.999841good[0.02347300760447979, -0.0566491037607193, 0.0...Đó là một chiếc điện thoại tốt nhưng nếu bạn s...averageĐó là một chiếc điện thoại tốt nhưng nếu bạn s...
40.999962average[-0.04908802732825279, 0.006140722427517176, -...វាល្អដែលទូរស័ព្ទនេះមានប្រព័ន្ធ LTE ហើយវាដំណើរក...averageវាល្អដែលទូរស័ព្ទនេះមានប្រព័ន្ធ LTE ហើយវាដំណើរក...
.....................
950.999811poor[-0.055938538163900375, 0.049839287996292114, ...העלא, מיר געקויפט צוויי פאָנעס און זיי געקומען...poorהעלא, מיר געקויפט צוויי פאָנעס און זיי געקומען...
961.000000good[0.017209621146321297, 0.013231038115918636, -...UitstekendgoodUitstekend
970.998939average[-0.04478548839688301, 0.037118781358003616, -...پروڈکٹ اچھی ہے لیکن انگریزی زبان میں اب بھی چی...poorپروڈکٹ اچھی ہے لیکن انگریزی زبان میں اب بھی چی...
980.999995poor[-0.0475904606282711, 0.046305101364851, -0.04...Veronderstel om 'n splinternuwe ontsluitfoon t...poorVeronderstel om 'n splinternuwe ontsluitfoon t...
990.998944average[-0.027637168765068054, 0.004834047518670559, ...خلل بسيط ومزعج للغاية عند إرسال الرسائل النصية...averageخلل بسيط ومزعج للغاية عند إرسال الرسائل النصية...
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" category_confidence ... text\n","origin_index ... \n","0 0.999397 ... Alındı, onu yandırdı, işləmədi. Geri açıldı, b...\n","1 0.999097 ... דערווייַל עס איז 2014, די 3gs איז דיסקאַנטיניו...\n","2 0.999882 ... 100% recommended \n","3 0.999841 ... Đó là một chiếc điện thoại tốt nhưng nếu bạn s...\n","4 0.999962 ... វាល្អដែលទូរស័ព្ទនេះមានប្រព័ន្ធ LTE ហើយវាដំណើរក...\n","... ... ... ...\n","95 0.999811 ... העלא, מיר געקויפט צוויי פאָנעס און זיי געקומען...\n","96 1.000000 ... Uitstekend \n","97 0.998939 ... پروڈکٹ اچھی ہے لیکن انگریزی زبان میں اب بھی چی...\n","98 0.999995 ... Veronderstel om 'n splinternuwe ontsluitfoon t...\n","99 0.998944 ... خلل بسيط ومزعج للغاية عند إرسال الرسائل النصية...\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"RjtuNUcvuJTT"},"source":["# The Model understands Englsih\n","![en](https://www.worldometers.info/img/flags/small/tn_nz-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"o0vu7PaWkcI7","executionInfo":{"status":"ok","timestamp":1614337420355,"user_tz":-300,"elapsed":4354,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8bef625d-e82c-473d-d6f3-610de65b7502"},"source":["fitted_pipe.predict(\"It was like brand new \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0good0.999587It was like brand new[0.024492498487234116, -0.003671384882181883, ...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 good ... [0.024492498487234116, -0.003671384882181883, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"1ykjRQhCtQ4w","executionInfo":{"status":"ok","timestamp":1614337428170,"user_tz":-300,"elapsed":4098,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"91ebb8b2-7044-4f54-b842-1137d91d3cf5"},"source":["fitted_pipe.predict(\"It stopped working on the first day \")\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0poor0.999982It stopped working on the first day[-0.004823732189834118, 0.020508425310254097, ...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 poor ... [-0.004823732189834118, 0.020508425310254097, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"markdown","metadata":{"id":"vohym-XbuNHn"},"source":["# The Model understands German\n","![de](https://www.worldometers.info/img/flags/small/tn_gm-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"dzaaZrI4tVWc","executionInfo":{"status":"ok","timestamp":1614339309225,"user_tz":-300,"elapsed":4257,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c51f1da7-1b02-4b82-f654-36a11ff37e76"},"source":["# German for: 'It worked perfectly '\n","fitted_pipe.predict(\"Es hat perfekt funktioniert\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0good0.999932Es hat perfekt funktioniert[-0.0051118237897753716, -0.048203449696302414...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 good ... [-0.0051118237897753716, -0.048203449696302414...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":51}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"BbhgTSBGtTtJ","executionInfo":{"status":"ok","timestamp":1614337516864,"user_tz":-300,"elapsed":5152,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d60e28bb-e42b-41bd-94cc-053ee9d6d8bf"},"source":["# German for: 'It stopped working on the first day'\n","fitted_pipe.predict(\"Am ersten Tag hörte es auf zu arbeiten \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0poor0.997735Am ersten Tag hörte es auf zu arbeiten[0.020860085263848305, -0.011390600353479385, ...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 poor ... [0.020860085263848305, -0.011390600353479385, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"markdown","metadata":{"id":"a1JbtmWquQwj"},"source":["# The Model understands Chinese\n","![zh](https://www.worldometers.info/img/flags/small/tn_ch-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"kYSYqtoRtc-P","executionInfo":{"status":"ok","timestamp":1614337526524,"user_tz":-300,"elapsed":5489,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6b685ef4-342e-4d1d-9d5c-37003bc807d9"},"source":["# Chinese for: 'It was like brand new'\n","fitted_pipe.predict(\"就像全新 \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0good0.999987就像全新[-0.01862957887351513, -0.02357419952750206, -...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 good ... [-0.01862957887351513, -0.02357419952750206, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"06v9SD-QtlBU","executionInfo":{"status":"ok","timestamp":1614337535041,"user_tz":-300,"elapsed":6116,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a4c54735-2451-4ccc-8f35-d025831cc8f6"},"source":["# Chinese for: 'It stopped working on the first day'\n","fitted_pipe.predict(\"第一天停止工作 \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0poor0.999527第一天停止工作[-0.002283927286043763, 0.012262673117220402, ...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 poor ... [-0.002283927286043763, 0.012262673117220402, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"markdown","metadata":{"id":"9h7CvN4uu9Pb"},"source":["# Model understands Afrikaans\n","\n","![af](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)\n","\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"VMPhbgw9twtf","executionInfo":{"status":"ok","timestamp":1614339342112,"user_tz":-300,"elapsed":4289,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"bce1cb12-85d8-4a97-b5bd-8e50ea62e1fe"},"source":["\n","# Afrikaans for: 'It worked perfectly '\n","fitted_pipe.predict(\"Dit het perfek gewerk\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0good0.992191Dit het perfek gewerk[-0.0018796967342495918, -0.043611422181129456...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 good ... [-0.0018796967342495918, -0.043611422181129456...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":52}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"zWgNTIdkumhX","executionInfo":{"status":"ok","timestamp":1614337548497,"user_tz":-300,"elapsed":5379,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7de1a4fc-0dde-4028-ceaf-1c548a6e70b8"},"source":["# Afrikaans for: 'It stopped working on the first day'\n","fitted_pipe.predict(\"Dit het op die eerste dag opgehou werk \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0poor0.999818Dit het op die eerste dag opgehou werk[0.00801787432283163, -0.013427305966615677, -...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 poor ... [0.00801787432283163, -0.013427305966615677, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":14}]},{"cell_type":"markdown","metadata":{"id":"GITfT7FK0CGv"},"source":["# The model understands Zulu\n","![zu](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"ifRhs6e7OcR3","executionInfo":{"status":"ok","timestamp":1614339479847,"user_tz":-300,"elapsed":4948,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2382c482-4576-40f8-c3c4-fae26437bfdb"},"source":["# Zulu for: 'It worked perfectly '\n","fitted_pipe.predict(\"Kusebenze ngokuphelele\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0good0.998644Kusebenze ngokuphelele[0.010045883245766163, -0.051708437502384186, ...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 good ... [0.010045883245766163, -0.051708437502384186, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":57}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"6uelDwq4xdWv","executionInfo":{"status":"ok","timestamp":1614337565836,"user_tz":-300,"elapsed":14127,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"67db0cee-df39-4078-823d-d3aaa77a1289"},"source":["# Zulu for: 'It stopped working on the first day'\n","fitted_pipe.predict(\"Iyeke ukusebenza ngosuku lokuqala \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0poor0.996274Iyeke ukusebenza ngosuku lokuqala[0.004491214174777269, 0.01870339922606945, 0....
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 poor ... [0.004491214174777269, 0.01870339922606945, 0....\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":20}]},{"cell_type":"markdown","metadata":{"id":"VGVvzl_30a0T"},"source":["# The Model understands Turkish\n","![tr](https://www.worldometers.info/img/flags/small/tn_tu-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"DRNnuEeQz2pd","executionInfo":{"status":"ok","timestamp":1614339507969,"user_tz":-300,"elapsed":4507,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5243eedd-2d72-4d7f-8fe4-13765975a733"},"source":["\n","# Turkish for: 'It It worked perfectly '\n","fitted_pipe.predict(\"Mükemmel çalıştı\")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0good0.999999Mükemmel çalıştı[0.06036875769495964, 0.0009111057734116912, -...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 good ... [0.06036875769495964, 0.0009111057734116912, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":58}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"aOSsiK6J0jWs","executionInfo":{"status":"ok","timestamp":1614339059278,"user_tz":-300,"elapsed":3491,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7d417dde-d49c-4a79-d1f6-b2fab60d1fcd"},"source":["\n","# Turkish for: 'It stopped working on the first day'\n","fitted_pipe.predict(\"İlk gün çalışmayı bıraktı \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0poor0.997740İlk gün çalışmayı bıraktı[0.05094106122851372, 0.02071293629705906, 0.0...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 poor ... [0.05094106122851372, 0.02071293629705906, 0.0...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":24}]},{"cell_type":"markdown","metadata":{"id":"803qL2gt0vlb"},"source":["# The Model understands Hebrew\n","![he](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"XQ5VCtxw0pc0","executionInfo":{"status":"ok","timestamp":1614339528341,"user_tz":-300,"elapsed":4778,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"dd6a13f6-78bd-4cab-d836-9da007704e18"},"source":["# Hebrew for: 'It worked perfectly '\n","fitted_pipe.predict(\"זה עבד בצורה מושלמת\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0good0.997647זה עבד בצורה מושלמת[-0.013386869803071022, -0.05498736351728439, ...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 good ... [-0.013386869803071022, -0.05498736351728439, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":59}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"9w2ZHfns05A4","executionInfo":{"status":"ok","timestamp":1614339073628,"user_tz":-300,"elapsed":3501,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2c1537a5-b481-4557-f4c8-251efbba0e5d"},"source":["\t\t\n","# Hebrew for: 'It stopped working on the first day'\n","fitted_pipe.predict(\"זה הפסיק לעבוד ביום הראשון \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0poor0.999990זה הפסיק לעבוד ביום הראשון[-0.013081037439405918, -0.02689044177532196, ...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 poor ... [-0.013081037439405918, -0.02689044177532196, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":25}]},{"cell_type":"markdown","metadata":{"id":"SDlpd33H1HIX"},"source":["# The Model understands Telugu\n","![te](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"Kc5n1bzv1BJT","executionInfo":{"status":"ok","timestamp":1614339084745,"user_tz":-300,"elapsed":6206,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c5fca5a8-56b4-47a0-d56f-0f8ab2f461a6"},"source":["\t\t\n","# Telugu for: 'It was like brand new'\n","fitted_pipe.predict(\"ఇది సరికొత్తది \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0good0.992611ఇది సరికొత్తది[0.0202536229044199, -0.04585985466837883, -0....
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 good ... [0.0202536229044199, -0.04585985466837883, -0....\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":27}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"-l-u6vrz1Obe","executionInfo":{"status":"ok","timestamp":1614339093673,"user_tz":-300,"elapsed":5306,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"beec8261-3c5c-439b-f5fd-3dcd8e69820f"},"source":["\n","# Telugu for: 'It stopped working on the first day'\n","fitted_pipe.predict(\"ఇది మొదటి రోజు పనిచేయడం మానేసింది \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0poor0.999991ఇది మొదటి రోజు పనిచేయడం మానేసింది[0.0002221895701950416, -0.018764713779091835,...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 poor ... [0.0002221895701950416, -0.018764713779091835,...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":28}]},{"cell_type":"markdown","metadata":{"id":"nziBUe8t1Zwn"},"source":["# Model understands Russian\n","![ru](https://www.worldometers.info/img/flags/small/tn_rs-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"Ckyjl3YQ1VFn","executionInfo":{"status":"ok","timestamp":1614339547750,"user_tz":-300,"elapsed":4086,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"068dd623-583f-452d-e6ab-e4befe490b84"},"source":["\t\t\n","# Russian for: 'It stopped working on the first day'\n","fitted_pipe.predict(\"Перестал работать в первый же день \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0good0.999996Это сработало отлично[0.008218087255954742, -0.05058329179883003, -...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 good ... [0.008218087255954742, -0.05058329179883003, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":60}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"GIdWkfGv1gFz","executionInfo":{"status":"ok","timestamp":1614339558570,"user_tz":-300,"elapsed":4095,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c7ec4069-584c-4072-c878-7b7a87b0af37"},"source":["# Russian for: 'It worked perfectly '\n","fitted_pipe.predict(\"Это сработало отлично\")\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0good0.999996Это сработало отлично[0.008218087255954742, -0.05058329179883003, -...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 good ... [0.008218087255954742, -0.05058329179883003, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":61}]},{"cell_type":"markdown","metadata":{"id":"8R1j9mwz2Cm4"},"source":["# Model understands Urdu\n","![ur](https://www.worldometers.info/img/flags/small/tn_pk-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"j4zwvRV11pcG","executionInfo":{"status":"ok","timestamp":1614339609069,"user_tz":-300,"elapsed":4234,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"4954fd5e-0918-42bc-89d3-516afa5db0d1"},"source":["# Urdu for: 'It was the best ever!'\n","fitted_pipe.predict(\"یہ اب تک کا سب سے اچھا تھا!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0good0.999999یہ اب تک کا سب سے اچھا تھا![0.030929533764719963, -0.05265289545059204, -...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 good ... [0.030929533764719963, -0.05265289545059204, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":63}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"SxzTuK4b2UKV","executionInfo":{"status":"ok","timestamp":1614339583461,"user_tz":-300,"elapsed":3986,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"0ae8cafc-bb28-41a2-e25d-46d6f4b5dcd5"},"source":["# Urdu for: 'It stopped working on the first day'\n","fitted_pipe.predict(\"اس نے پہلے دن کام کرنا چھوڑ دیا \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0poor0.866228یہ بالکل کام کیا[0.03534713014960289, -0.06895627826452255, -0...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 poor ... [0.03534713014960289, -0.06895627826452255, -0...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":62}]},{"cell_type":"markdown","metadata":{"id":"RoNg-C3k1qcX"},"source":["# Model understands Hindi\n","![hi](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"QZ9RT5Wv1r1n","executionInfo":{"status":"ok","timestamp":1614339668113,"user_tz":-300,"elapsed":3348,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"736cd07b-97fb-414b-ddf1-10688c156e4c"},"source":["# hindi for: 'It was the best ever !!'\n","fitted_pipe.predict(\"यह सबसे अच्छा कभी था !!\")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0good0.999092यह सबसे अच्छा कभी था !![0.002181761898100376, -0.05553552508354187, -...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 good ... [0.002181761898100376, -0.05553552508354187, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":66}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"quM-IL2i12-B","executionInfo":{"status":"ok","timestamp":1614339166747,"user_tz":-300,"elapsed":4190,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e23e028b-282e-4155-9481-2738e7b1cb99"},"source":["# hindi for: 'It stopped working on the first day'\n","fitted_pipe.predict(\"इसने पहले दिन काम करना बंद कर दिया \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0poor0.999991इसने पहले दिन काम करना बंद कर दिया[0.009801734238862991, -0.026267804205417633, ...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 poor ... [0.009801734238862991, -0.026267804205417633, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":36}]},{"cell_type":"markdown","metadata":{"id":"R4ByHOZn35Lc"},"source":["# The model understands Tartar\n","![tt](https://www.worldometers.info/img/flags/small/tn_rs-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"2JrzusSQ18F5","executionInfo":{"status":"ok","timestamp":1614339700868,"user_tz":-300,"elapsed":3311,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"93c8850e-9b1e-4a9a-cbe7-e54c8d150107"},"source":["# Tartar for: 'It was It was the best ever !!'\n","fitted_pipe.predict(\"Бу иң яхшысы иде !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0good0.999988Бу иң яхшысы иде !![0.03979770094156265, -0.05419746786355972, -0...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 good ... [0.03979770094156265, -0.05419746786355972, -0...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":67}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"J06Xm_Ln4AYu","executionInfo":{"status":"ok","timestamp":1614339183512,"user_tz":-300,"elapsed":4326,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"70ea097f-0496-430f-ece8-a1ed59f24332"},"source":["# Tartar for: 'It stopped working on the first day'\n","fitted_pipe.predict(\"Беренче көнне эшләүне туктатты \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0poor0.999926Беренче көнне эшләүне туктатты[0.030974868685007095, 0.03395547345280647, 0....
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 poor ... [0.030974868685007095, 0.03395547345280647, 0....\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":38}]},{"cell_type":"markdown","metadata":{"id":"HKj5yWwwMplH"},"source":["# The Model understands French\n","![fr](https://www.worldometers.info/img/flags/small/tn_fr-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"CUHcJZfJMplL","executionInfo":{"status":"ok","timestamp":1614339723420,"user_tz":-300,"elapsed":3620,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"8bcd1ae8-1a35-421a-b063-7e0a29190c3d"},"source":["# French for: 'It was the best ever !!'\n","fitted_pipe.predict(\"C'était la meilleure chose que je n'ai jamais faite !!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0good0.998638C'était la meilleure chose que je n'ai jamais ...[0.02751990035176277, -0.047827284783124924, -...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 good ... [0.02751990035176277, -0.047827284783124924, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":68}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"57NY2XoTMplM","executionInfo":{"status":"ok","timestamp":1614339207129,"user_tz":-300,"elapsed":4127,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7bd6b282-0d7c-4077-fbe0-ec3ec03677fa"},"source":["\t\t\n","# French for: 'It stopped working on the first day'\n","fitted_pipe.predict(\"Il a cessé de fonctionner le premier jour \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0poor0.999981Il a cessé de fonctionner le premier jour[-0.027476917952299118, -0.006572211626917124,...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 poor ... [-0.027476917952299118, -0.006572211626917124,...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":40}]},{"cell_type":"markdown","metadata":{"id":"jD2TBgT0Nq6F"},"source":["# The Model understands Thai\n","![th](https://www.worldometers.info/img/flags/small/tn_th-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"gBp11S5GNq6S","executionInfo":{"status":"ok","timestamp":1614339747805,"user_tz":-300,"elapsed":4390,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d3efa9b3-a289-47b2-a782-91244edfcee7"},"source":["# Thai for: 'It was the best ever !!'\n","fitted_pipe.predict(\"มันดีที่สุดเท่าที่เคยมีมา !!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0good1.000000มันดีที่สุดเท่าที่เคยมีมา !![-0.00953331496566534, -0.05253230035305023, -...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 good ... [-0.00953331496566534, -0.05253230035305023, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":69}]},{"cell_type":"code","metadata":{"id":"R6nKI7C3QKa3","colab":{"base_uri":"https://localhost:8080/","height":111},"executionInfo":{"status":"ok","timestamp":1614339220524,"user_tz":-300,"elapsed":4798,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"33f60654-b70c-4449-942e-43ded452995b"},"source":["# Thai for: 'It stopped working on the first day'\n","fitted_pipe.predict(\"มันหยุดทำงานในวันแรก \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0poor0.999795มันหยุดทำงานในวันแรก[-0.028285449370741844, -0.02508157305419445, ...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 poor ... [-0.028285449370741844, -0.02508157305419445, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":42}]},{"cell_type":"markdown","metadata":{"id":"mLItI4KZOElB"},"source":["# The Model understands Khmer\n","![km](https://www.worldometers.info/img/flags/small/tn_cb-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"SWbqMgAwOElC","executionInfo":{"status":"ok","timestamp":1614339225700,"user_tz":-300,"elapsed":4849,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"21c43d6c-085e-4c67-a45c-e7366c32e75c"},"source":["\n","# Khmer for: 'It was like brand new'\n","fitted_pipe.predict(\"វាដូចជាម៉ាកថ្មី \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0good0.999677វាដូចជាម៉ាកថ្មី[-0.013914551585912704, 0.011598535813391209, ...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 good ... [-0.013914551585912704, 0.011598535813391209, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":43}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"beoCtm4xQf2P","executionInfo":{"status":"ok","timestamp":1614339231428,"user_tz":-300,"elapsed":4320,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f4eec1f0-7f10-45a5-9eeb-f007738e758c"},"source":["\t\t\n","# Khmer for: 'It stopped working on the first day'\n","fitted_pipe.predict(\"វាឈប់ធ្វើការនៅថ្ងៃដំបូង \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0poor0.999771វាឈប់ធ្វើការនៅថ្ងៃដំបូង[-0.012111756019294262, -0.025659380480647087,...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 poor ... [-0.012111756019294262, -0.025659380480647087,...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":44}]},{"cell_type":"markdown","metadata":{"id":"lvE-LbNiPoBT"},"source":["# The Model understands Yiddish\n","![yi](https://www.worldometers.info/img/flags/small/tn_pl-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"sZlmLhajPoBb","executionInfo":{"status":"ok","timestamp":1614339787003,"user_tz":-300,"elapsed":5612,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"96a0eea3-0680-48fb-c880-f9492c762e4e"},"source":["# Yiddish for: 'It was the best ever !!'\n","fitted_pipe.predict(\"עס איז געווען דער בעסטער טאָמיד !!\")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0good0.999999עס איז געווען דער בעסטער טאָמיד !![0.01722850650548935, -0.04829198867082596, -0...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 good ... [0.01722850650548935, -0.04829198867082596, -0...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":70}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"5h-pha_nPoBc","executionInfo":{"status":"ok","timestamp":1614339242470,"user_tz":-300,"elapsed":5022,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a643a8f4-7906-4aed-8d29-dcf52d58ed65"},"source":["# Yiddish for: 'It stopped working on the first day'\n","fitted_pipe.predict(\"אויף דער ערשטער טאָג עס סטאַפּט ארבעטן \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0poor0.999583אויף דער ערשטער טאָג עס סטאַפּט ארבעטן[-0.03324508294463158, -0.02061191014945507, 0...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 poor ... [-0.03324508294463158, -0.02061191014945507, 0...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":46}]},{"cell_type":"markdown","metadata":{"id":"XSz4WzScaAHj"},"source":["# The Model understands Kygrgyz\n","![ky](https://www.worldometers.info/img/flags/small/tn_kg-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"DXz6fhJSaAHu","executionInfo":{"status":"ok","timestamp":1614339809996,"user_tz":-300,"elapsed":4078,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"886fc3a5-5899-457b-c968-d0ba6a3afe79"},"source":["\t\t\n","# Kygrgyz for: 'It was the best ever !!'\n","fitted_pipe.predict(\"Бул эң мыкты болду !!\")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0good1.000000Бул эң мыкты болду !![0.036575060337781906, -0.056231312453746796, ...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 good ... [0.036575060337781906, -0.056231312453746796, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":71}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"lh_ZSHlPaAHv","executionInfo":{"status":"ok","timestamp":1614339257197,"user_tz":-300,"elapsed":8374,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a91748c2-2e5b-41ec-e14f-dbb6a85c5365"},"source":["\n","# Kygrgyz for: 'It stopped working on the first day'\n","fitted_pipe.predict(\"Биринчи күнү эле иштебей калды \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0poor0.999976Биринчи күнү эле иштебей калды[0.012193235568702221, 0.014580748975276947, -...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 poor ... [0.012193235568702221, 0.014580748975276947, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":48}]},{"cell_type":"markdown","metadata":{"id":"DGMVMKaTdJFj"},"source":["# The Model understands Tamil\n","![ta](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"JWDr_LoCdJFn","executionInfo":{"status":"ok","timestamp":1614339829359,"user_tz":-300,"elapsed":4124,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6f1c92b6-35bc-4aca-aa8f-e771036196ad"},"source":["# Tamil for: 'It was the best ever !!'\n","fitted_pipe.predict(\"இது எப்போதும் சிறந்தது !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0good1.000000இது எப்போதும் சிறந்தது !![-0.030394665896892548, -0.058778341859579086,...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 good ... [-0.030394665896892548, -0.058778341859579086,...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":72}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":111},"id":"Q6C0BmTtdJFp","executionInfo":{"status":"ok","timestamp":1614339264676,"user_tz":-300,"elapsed":3898,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d8547920-9db4-4112-e87a-033ed247e540"},"source":["\t\t\n","# Tamil for: 'It stopped working on the first day'\n","fitted_pipe.predict(\"இது முதல் நாளில் வேலை செய்வதை நிறுத்தியது \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
categorycategory_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0poor0.999954இது முதல் நாளில் வேலை செய்வதை நிறுத்தியது[0.02203391306102276, -0.009059661068022251, 0...
\n","
"],"text/plain":[" category ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 poor ... [0.02203391306102276, -0.009059661068022251, 0...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":50}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1613872379873,"user_tz":-300,"elapsed":934849,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f88514ee-d24f-41a5-da45-784ec6894a34"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":110},"executionInfo":{"status":"ok","timestamp":1613872792433,"user_tz":-300,"elapsed":107385,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"560796d4-e889-4a94-cc63-3cfbadae7f5c"},"source":["stored_model_path = './models/classifier_dl_trained'\n","hdd_pipe = nlu.load(path=stored_model_path)\n","preds = hdd_pipe.predict('It worked perfectly.')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
classifierclassifier_confidencedocumentxx_embed_sentence_labse_embeddings
origin_index
0good0.999984It worked perfectly.[0.006914846133440733, -0.04569808393716812, -...
\n","
"],"text/plain":[" classifier ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 good ... [0.006914846133440733, -0.04569808393716812, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1613872792435,"user_tz":-300,"elapsed":103880,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ed2a6c7c-3fa5-482a-e9b1-327eb171a8ab"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('labse') | Info: unique reference name for identification | Currently set to : labse\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['average', 'poor', 'good']) | Info: get the tags used to trained this ClassifierDLModel | Currently set to : ['average', 'poor', 'good']\n","pipe['classifier_dl'].setStorageRef('labse') | Info: unique reference name for identification | Currently set to : labse\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/Training/multi_lingual/multi_class_text_classification/NLU_training_multi_lingual_multi_class_text_classifier_demo_hotel_reviews.ipynb b/examples/colab/Training/multi_lingual/multi_class_text_classification/NLU_training_multi_lingual_multi_class_text_classifier_demo_hotel_reviews.ipynb new file mode 100644 index 00000000..b8254535 --- /dev/null +++ b/examples/colab/Training/multi_lingual/multi_class_text_classification/NLU_training_multi_lingual_multi_class_text_classifier_demo_hotel_reviews.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_training_multi_lingual_multi_class_text_classifier_demo_hotel_reviews.ipynb","provenance":[],"collapsed_sections":["zkufh760uvF3"]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zkufh760uvF3"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/Training/multi_lingual/multi_class_text_classification/NLU_training_multi_lingual_multi_class_text_classifier_demo_hotel_reviews.ipynb)\n","\n","\n","\n","# Training a Deep Learning Classifier with NLU \n","## ClassifierDL (Multi-class Text Classification)\n","## 3 class Tripadvisor Hotel review classifier training\n","With the [ClassifierDL model](https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdl-multi-class-text-classification) from Spark NLP you can achieve State Of the Art results on any multi class text classification problem \n","\n","This notebook showcases the following features : \n","\n","- How to train the deep learning classifier\n","- How to store a pipeline to disk\n","- How to load the pipeline from disk (Enables NLU offline mode)\n","\n","You can achieve these results or even better on this dataset with training data :\n","\n","
\n","\n","![image.png]()\n","\n","\n","You can achieve these results or even better on this dataset with test data :\n","\n","
\n","\n","\n","![image.png]()"]},{"cell_type":"markdown","metadata":{"id":"dur2drhW5Rvi"},"source":["# 1. Install Java 8 and NLU"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"dTyEf8lNWMi7","executionInfo":{"status":"ok","timestamp":1614343559957,"user_tz":-300,"elapsed":52036,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ced6de18-fa33-401a-ca04-d8e4469b27c8"},"source":["import os\n","from sklearn.metrics import classification_report\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install pyspark==2.4.7 \n","! pip install nlu > /dev/null \n","\n","\n","import nlu"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Requirement already satisfied: pyspark==2.4.7 in /usr/local/lib/python3.7/dist-packages (2.4.7)\n","Requirement already satisfied: py4j==0.10.7 in /usr/local/lib/python3.7/dist-packages (from pyspark==2.4.7) (0.10.7)\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"f4KkTfnR5Ugg"},"source":["# 2. Download hotel reviews dataset \n","https://www.kaggle.com/andrewmvd/trip-advisor-hotel-reviews\n","\n","Hotels play a crucial role in traveling and with the increased access to information new pathways of selecting the best ones emerged.\n","With this dataset, consisting of 20k reviews crawled from Tripadvisor, you can explore what makes a great hotel and maybe even use this model in your travels!\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OrVb5ZMvvrQD","executionInfo":{"status":"ok","timestamp":1614340264850,"user_tz":-300,"elapsed":96138,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"50abf627-2a43-4a06-c142-ee9fd1cb2eb3"},"source":["! wget http://ckl-it.de/wp-content/uploads/2021/02/tripadvisor_hotel_reviews_multi_lingual.csv\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["--2021-02-26 11:49:52-- http://ckl-it.de/wp-content/uploads/2021/02/tripadvisor_hotel_reviews_multi_lingual.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 5332717 (5.1M) [text/csv]\n","Saving to: ‘tripadvisor_hotel_reviews_multi_lingual.csv’\n","\n","tripadvisor_hotel_r 100%[===================>] 5.08M 4.03MB/s in 1.3s \n","\n","2021-02-26 11:49:54 (4.03 MB/s) - ‘tripadvisor_hotel_reviews_multi_lingual.csv’ saved [5332717/5332717]\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":399},"id":"y4xSRWIhwT28","executionInfo":{"status":"ok","timestamp":1614343559964,"user_tz":-300,"elapsed":52023,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f19fba1c-ae73-47c3-affc-e1006a48e8ec"},"source":["import pandas as pd\n","test_path = '/content/tripadvisor_hotel_reviews_multi_lingual.csv'\n","train_df = pd.read_csv(test_path,sep=\",\")\n","cols = [\"y\",\"text\"]\n","train_df = train_df[cols]\n","from sklearn.model_selection import train_test_split\n","train_df = train_df.iloc[:1500]\n","train_df, test_df = train_test_split(train_df, test_size=0.2)\n","train_df\n","\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ytext
37greatexceeded expectations, stayed bavaro princess ...
1236averagenot bad just got week punta cana grand, like s...
1177poorhotel did not let know advance pool area close...
893greatprivacy french quarter feel spent nights dauph...
917averagenot bad, wife celebrated 10 year anniversay ba...
.........
1400poordisapointed hotel stayed hotel 2 days november...
1468greatwonderful hotel 5* plus emphasis customer serv...
554averagebasic good hotel fair distance city centre clo...
353greatcool, staff location hotel great, reasonably p...
118averageoverall good experience great place relax, hus...
\n","

1200 rows × 2 columns

\n","
"],"text/plain":[" y text\n","37 great exceeded expectations, stayed bavaro princess ...\n","1236 average not bad just got week punta cana grand, like s...\n","1177 poor hotel did not let know advance pool area close...\n","893 great privacy french quarter feel spent nights dauph...\n","917 average not bad, wife celebrated 10 year anniversay ba...\n","... ... ...\n","1400 poor disapointed hotel stayed hotel 2 days november...\n","1468 great wonderful hotel 5* plus emphasis customer serv...\n","554 average basic good hotel fair distance city centre clo...\n","353 great cool, staff location hotel great, reasonably p...\n","118 average overall good experience great place relax, hus...\n","\n","[1200 rows x 2 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"markdown","metadata":{"id":"0296Om2C5anY"},"source":["# 3. Train Deep Learning Classifier using nlu.load('train.classifier')\n","\n","You dataset label column should be named 'y' and the feature column with text data should be named 'text'"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":828},"id":"3ZIPkRkWftBG","executionInfo":{"status":"ok","timestamp":1614345941723,"user_tz":-300,"elapsed":2433763,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"db12de1a-f07d-40a6-eee0-26113a57e911"},"source":["trainable_pipe = nlu.load('xx.embed_sentence.labse train.classifier')\n","# We need to train longer and user smaller LR for NON-USE based sentence embeddings usually\n","# We could tune the hyperparameters further with hyperparameter tuning methods like gridsearch\n","# Also longer training gives more accuracy\n","trainable_pipe['classifier_dl'].setMaxEpochs(60) \n","trainable_pipe['classifier_dl'].setLr(0.005) \n","fitted_pipe = trainable_pipe.fit(train_df)\n","# predict with the trainable pipeline on dataset and get predictions\n","preds = fitted_pipe.predict(train_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":["labse download started this may take some time.\n","Approximate size to download 1.7 GB\n","[OK!]\n"," precision recall f1-score support\n","\n"," average 0.79 0.76 0.77 389\n"," great 0.87 0.88 0.88 402\n"," poor 0.84 0.86 0.85 409\n","\n"," accuracy 0.83 1200\n"," macro avg 0.83 0.83 0.83 1200\n","weighted avg 0.83 0.83 0.83 1200\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencetextycategorydocumentxx_embed_sentence_labse_embeddings
origin_index
370.999785exceeded expectations, stayed bavaro princess ...greatgreatexceeded expectations, stayed bavaro princess ...[-0.04549850523471832, 0.0030662992503494024, ...
12360.928466not bad just got week punta cana grand, like s...averageaveragenot bad just got week punta cana grand, like s...[-0.059047382324934006, 0.010260380804538727, ...
11770.999902hotel did not let know advance pool area close...poorpoorhotel did not let know advance pool area close...[-0.01514238677918911, 0.055598434060811996, -...
8930.845497privacy french quarter feel spent nights dauph...greatgreatprivacy french quarter feel spent nights dauph...[-0.00845617987215519, 0.01713375374674797, -0...
9170.786963not bad, wife celebrated 10 year anniversay ba...averageaveragenot bad, wife celebrated 10 year anniversay ba...[-0.022421982139348984, 0.03690879046916962, -...
.....................
14000.999878disapointed hotel stayed hotel 2 days november...poorpoordisapointed hotel stayed hotel 2 days november...[-0.055554818361997604, 0.058676544576883316, ...
14680.999970wonderful hotel 5* plus emphasis customer serv...greatgreatwonderful hotel 5* plus emphasis customer serv...[-0.0014703000197187066, -0.002986577339470386...
5540.998763basic good hotel fair distance city centre clo...averageaveragebasic good hotel fair distance city centre clo...[-0.02877921611070633, 0.0037026735953986645, ...
3530.751464cool, staff location hotel great, reasonably p...greatgreatcool, staff location hotel great, reasonably p...[0.015865614637732506, 0.04452160745859146, -0...
1180.999480overall good experience great place relax, hus...averagegreatoverall good experience great place relax, hus...[-0.013611174188554287, 0.022284582257270813, ...
\n","

1200 rows × 6 columns

\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","37 0.999785 ... [-0.04549850523471832, 0.0030662992503494024, ...\n","1236 0.928466 ... [-0.059047382324934006, 0.010260380804538727, ...\n","1177 0.999902 ... [-0.01514238677918911, 0.055598434060811996, -...\n","893 0.845497 ... [-0.00845617987215519, 0.01713375374674797, -0...\n","917 0.786963 ... [-0.022421982139348984, 0.03690879046916962, -...\n","... ... ... ...\n","1400 0.999878 ... [-0.055554818361997604, 0.058676544576883316, ...\n","1468 0.999970 ... [-0.0014703000197187066, -0.002986577339470386...\n","554 0.998763 ... [-0.02877921611070633, 0.0037026735953986645, ...\n","353 0.751464 ... [0.015865614637732506, 0.04452160745859146, -0...\n","118 0.999480 ... [-0.013611174188554287, 0.022284582257270813, ...\n","\n","[1200 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"markdown","metadata":{"id":"_1jxw3GnVGlI"},"source":["# 3.1 evaluate on Test Data"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Fxx4yNkNVGFl","executionInfo":{"status":"ok","timestamp":1614346462745,"user_tz":-300,"elapsed":2954773,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"80d001de-5876-477b-f5cc-0a7c9fd3fc95"},"source":["preds = fitted_pipe.predict(test_df,output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," average 0.63 0.52 0.57 105\n"," great 0.75 0.84 0.79 92\n"," poor 0.73 0.78 0.75 103\n","\n"," accuracy 0.71 300\n"," macro avg 0.70 0.71 0.70 300\n","weighted avg 0.70 0.71 0.70 300\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"yUkk_L8MGcRg"},"source":["#4. Test Model with 20 languages!"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":706},"id":"q2s6nsZZGcRm","executionInfo":{"status":"ok","timestamp":1614183708224,"user_tz":-300,"elapsed":2367533,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a2d42116-d186-4be3-b7bd-c8a5494262e0"},"source":["train_df = pd.read_csv(\"tripadvisor_hotel_reviews_multi_lingual.csv\")\n","preds = fitted_pipe.predict(train_df[[\"test_sentences\",\"y\"]].iloc[:100],output_level='document')\n","\n","#sentence detector that is part of the pipe generates sone NaNs. lets drop them first\n","preds.dropna(inplace=True)\n","print(classification_report(preds['y'], preds['category']))\n","\n","preds"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" precision recall f1-score support\n","\n"," average 0.48 0.42 0.45 33\n"," great 0.68 0.77 0.72 35\n"," poor 0.71 0.69 0.70 32\n","\n"," accuracy 0.63 100\n"," macro avg 0.62 0.63 0.62 100\n","weighted avg 0.62 0.63 0.62 100\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingscategory_confidencedocumentytextcategory
origin_index
0[-0.013124355114996433, -0.010088321752846241,...0.989939Tolles Hotel 5 Nächte Ende August 2005. Reserv...greatTolles Hotel 5 Nächte Ende August 2005. Reserv...average
1[0.0211328137665987, 0.06491417437791824, -0.0...0.685842தூண்டில் மற்றும் சுவிட்ச் அறை விகிதங்கள், ஏற்ற...poorதூண்டில் மற்றும் சுவிட்ச் அறை விகிதங்கள், ஏற்ற...average
2[-0.01487706322222948, 0.08078613132238388, -0...0.993675បន្ទប់ឆែកល្អចូលចិត្តសណ្ឋាគារទីតាំងល្អមិត្តភាព។...averageបន្ទប់ឆែកល្អចូលចិត្តសណ្ឋាគារទីតាំងល្អមិត្តភាព។...average
3[-0.05104789137840271, 0.0495218001306057, -0....0.999985beste Lage Wert Eigenschaften Waikiki Kopf Hot...greatbeste Lage Wert Eigenschaften Waikiki Kopf Hot...great
4[-0.04287628084421158, 0.011480966582894325, -...0.999809botel not recommended little disappointed hone...poorbotel not recommended little disappointed hone...poor
.....................
95[-0.0008096436504274607, 0.03655293956398964, ...0.801106موقع رائع قضى 7 أيام في نزل القلعة في بداية ال...greatموقع رائع قضى 7 أيام في نزل القلعة في بداية ال...great
96[-0.005492127034813166, 0.055038247257471085, ...0.987334super emplacement les lits durs ont vraiment a...averagesuper emplacement les lits durs ont vraiment a...great
97[0.009459462948143482, 0.0067734019830822945, ...0.994116ทำเลที่ตั้งดีเยี่ยมโรงแรมที่สมบูรณ์แบบตรงกลางน...greatทำเลที่ตั้งดีเยี่ยมโรงแรมที่สมบูรณ์แบบตรงกลางน...average
98[-0.06341181695461273, 0.03327463939785957, -0...0.976167gerade anfangen, Glanz zu verlieren, blieb Kan...greatgerade anfangen, Glanz zu verlieren, blieb Kan...poor
99[0.028746405616402626, -0.009642810560762882, ...0.984000bittersweet यादें शानदार अतीत में हाल ही में र...poorbittersweet यादें शानदार अतीत में हाल ही में र...poor
\n","

100 rows × 6 columns

\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... category\n","origin_index ... \n","0 [-0.013124355114996433, -0.010088321752846241,... ... average\n","1 [0.0211328137665987, 0.06491417437791824, -0.0... ... average\n","2 [-0.01487706322222948, 0.08078613132238388, -0... ... average\n","3 [-0.05104789137840271, 0.0495218001306057, -0.... ... great\n","4 [-0.04287628084421158, 0.011480966582894325, -... ... poor\n","... ... ... ...\n","95 [-0.0008096436504274607, 0.03655293956398964, ... ... great\n","96 [-0.005492127034813166, 0.055038247257471085, ... ... great\n","97 [0.009459462948143482, 0.0067734019830822945, ... ... average\n","98 [-0.06341181695461273, 0.03327463939785957, -0... ... poor\n","99 [0.028746405616402626, -0.009642810560762882, ... ... poor\n","\n","[100 rows x 6 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"RjtuNUcvuJTT"},"source":["# The Model understands Englsih\n","![en](https://www.worldometers.info/img/flags/small/tn_nz-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"o0vu7PaWkcI7","executionInfo":{"status":"ok","timestamp":1614346500989,"user_tz":-300,"elapsed":5090,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ad0ae855-2aba-4f2d-f449-9b5a533dcff6"},"source":["fitted_pipe.predict(\"It was the best stay of my life, I loved it!! \")\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.999093greatIt was the best stay of my life, I loved it!![0.025105193257331848, -0.04443410038948059, -...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999093 ... [0.025105193257331848, -0.04443410038948059, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"1ykjRQhCtQ4w","executionInfo":{"status":"ok","timestamp":1614346504428,"user_tz":-300,"elapsed":8332,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"b40b0e9c-3576-43c4-ec4c-df7195b33f7f"},"source":["fitted_pipe.predict(\"It was the worst stay of my life, I hated it!! \")\n"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.895561poorIt was the worst stay of my life, I hated it!![0.029578806832432747, -0.042081184685230255, ...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.895561 ... [0.029578806832432747, -0.042081184685230255, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"vohym-XbuNHn"},"source":["# The Model understands German\n","![de](https://www.worldometers.info/img/flags/small/tn_gm-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"dzaaZrI4tVWc","executionInfo":{"status":"ok","timestamp":1614346507368,"user_tz":-300,"elapsed":10904,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3a55ba80-9c2d-434f-bc94-3735b6aded46"},"source":["# German for: 'It was the best stay of my life, I loved it!!'\n","fitted_pipe.predict(\"Es war der beste Aufenthalt meines Lebens, ich habe es geliebt !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.998235greatEs war der beste Aufenthalt meines Lebens, ich...[0.025617875158786774, -0.05000672861933708, -...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.998235 ... [0.025617875158786774, -0.05000672861933708, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"BbhgTSBGtTtJ","executionInfo":{"status":"ok","timestamp":1614346510969,"user_tz":-300,"elapsed":14359,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f840584b-9fef-4bd2-960b-d9d4b8cb1704"},"source":["\t\t\n","# German for: 'It was the worst stay of my life, I hated it!!'\n","fitted_pipe.predict(\"Es war der schlimmste Aufenthalt meines Lebens, ich hasste es !! \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.467292poorEs war der schlimmste Aufenthalt meines Lebens...[0.022492459043860435, -0.05308642238378525, -...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.467292 ... [0.022492459043860435, -0.05308642238378525, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"markdown","metadata":{"id":"a1JbtmWquQwj"},"source":["# The Model understands Chinese\n","![zh](https://www.worldometers.info/img/flags/small/tn_ch-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"kYSYqtoRtc-P","executionInfo":{"status":"ok","timestamp":1614346514096,"user_tz":-300,"elapsed":17068,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"ef123584-4111-435c-9ac7-1faf86208d52"},"source":["# Chinese for: 'It was the best stay of my life, I loved it!!'\n","fitted_pipe.predict(\"这是我一生中最美好的时光,我喜欢它!! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.999551great这是我一生中最美好的时光,我喜欢它!![0.011552252806723118, -0.06316247582435608, -...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999551 ... [0.011552252806723118, -0.06316247582435608, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"06v9SD-QtlBU","executionInfo":{"status":"ok","timestamp":1614346967460,"user_tz":-300,"elapsed":3255,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"bb9b52b5-7218-4aaa-cb3d-4df7f906eba4"},"source":["# Chinese for: 'It was awful!! '\n","fitted_pipe.predict(\"太糟糕了!!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.984005poor太糟糕了!![-0.025874078273773193, -0.0665956661105156, -...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.984005 ... [-0.025874078273773193, -0.0665956661105156, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":49}]},{"cell_type":"markdown","metadata":{"id":"9h7CvN4uu9Pb"},"source":["# Model understands Afrikaans\n","\n","![af](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)\n","\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"VMPhbgw9twtf","executionInfo":{"status":"ok","timestamp":1614346520196,"user_tz":-300,"elapsed":22686,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"76c2e934-846e-4b57-befa-67a3a5c454cf"},"source":["\t\t\n","# Afrikaans for: 'It was the best stay of my life, I loved it!!'\n","fitted_pipe.predict(\"Dit was die beste verblyf in my lewe, ek was mal daaroor !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.996766greatDit was die beste verblyf in my lewe, ek was m...[0.027523042634129524, -0.034727904945611954, ...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.996766 ... [0.027523042634129524, -0.034727904945611954, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"zWgNTIdkumhX","executionInfo":{"status":"ok","timestamp":1614346523225,"user_tz":-300,"elapsed":25265,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"56effef4-2b5a-4e2d-d538-e687497117f6"},"source":["# Afrikaans for: 'It was the worst stay of my life, I hated it!!'\n","fitted_pipe.predict(\"Dit was die slegste verblyf in my lewe, ek het dit gehaat !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.982654poorDit was die slegste verblyf in my lewe, ek het...[0.020533405244350433, -0.04479401186108589, -...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.982654 ... [0.020533405244350433, -0.04479401186108589, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"markdown","metadata":{"id":"rSEPkC-Bwnpg"},"source":["# The model understands Vietnamese\n","![vi](https://www.worldometers.info/img/flags/small/tn_vm-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"7ksJosuTOYpE","executionInfo":{"status":"ok","timestamp":1614346526321,"user_tz":-300,"elapsed":28024,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"9e2c7e5e-218e-46a0-d374-87e51664e57f"},"source":["# Vietnamese for: 'It was the best stay of my life, I loved it!!'\n","fitted_pipe.predict(\"Đó là kỳ nghỉ tuyệt vời nhất trong đời tôi, tôi yêu nó !! \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.800499greatĐó là kỳ nghỉ tuyệt vời nhất trong đời tôi, tô...[0.022074760869145393, -0.06135038658976555, -...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.800499 ... [0.022074760869145393, -0.06135038658976555, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":13}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"VfG3UaCTEZB_","executionInfo":{"status":"ok","timestamp":1614346932664,"user_tz":-300,"elapsed":3251,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"90063bf5-a504-4ccb-f1cd-c5bcbed20f14"},"source":["# Vietnamese for: 'It was awful!! '\n","fitted_pipe.predict(\"Nó quá tệ!! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.993273poorNó quá tệ!![-0.011885236017405987, -0.06412354856729507, ...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.993273 ... [-0.011885236017405987, -0.06412354856729507, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":48}]},{"cell_type":"markdown","metadata":{"id":"IlkmAaMoxTuy"},"source":["# The model understands Japanese\n","![ja](https://www.worldometers.info/img/flags/small/tn_ja-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"1IfJu3q8wwUt","executionInfo":{"status":"ok","timestamp":1614346532495,"user_tz":-300,"elapsed":33764,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1713a75b-e523-4b19-fa14-0c8941f4f2e8"},"source":["# Japanese for: 'It was the best stay of my life, I loved it!!'\n","fitted_pipe.predict(\"それは私の人生で最高の滞在でした、私はそれを愛していました!! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.999605greatそれは私の人生で最高の滞在でした、私はそれを愛していました!![0.03218996152281761, -0.04521138221025467, -0...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999605 ... [0.03218996152281761, -0.04521138221025467, -0...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":15}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"-RjXWbFIPvIs","executionInfo":{"status":"ok","timestamp":1614346535433,"user_tz":-300,"elapsed":36523,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"cf009458-299a-425e-df0e-0f037e6407b4"},"source":["# Japanese for: 'It was the worst stay of my life, I hated it!!'\n","fitted_pipe.predict(\"人生最悪の滞在でした、嫌いでした!! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.726080poor人生最悪の滞在でした、嫌いでした!![0.0015585115179419518, -0.04200109466910362, ...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.726080 ... [0.0015585115179419518, -0.04200109466910362, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":16}]},{"cell_type":"markdown","metadata":{"id":"GITfT7FK0CGv"},"source":["# The model understands Zulu\n","![zu](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"ifRhs6e7OcR3","executionInfo":{"status":"ok","timestamp":1614346538101,"user_tz":-300,"elapsed":38857,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"87a1d188-adda-4977-fb8a-5b421456f7ed"},"source":["# Zulu for: 'It was the best stay of my life, I loved it!!'\n","fitted_pipe.predict(\"Kwakungukuhlala okuhle kakhulu empilweni yami, ngangikuthanda !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.996036greatKwakungukuhlala okuhle kakhulu empilweni yami,...[0.020903199911117554, -0.05406733229756355, -...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.996036 ... [0.020903199911117554, -0.05406733229756355, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":17}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"6uelDwq4xdWv","executionInfo":{"status":"ok","timestamp":1614346540885,"user_tz":-300,"elapsed":41490,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"81fc9d11-c2bc-43ee-dd8e-2570d441b19e"},"source":["# Zulu for: 'It was the worst stay of my life, I hated it!!'\n","fitted_pipe.predict(\"Kwakuwukuhlala okubi kakhulu empilweni yami, ngangikuzonda !! \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.974802poorKwakuwukuhlala okubi kakhulu empilweni yami, n...[0.012615667656064034, -0.05433851107954979, -...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.974802 ... [0.012615667656064034, -0.05433851107954979, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":18}]},{"cell_type":"markdown","metadata":{"id":"VGVvzl_30a0T"},"source":["# The Model understands Turkish\n","![tr](https://www.worldometers.info/img/flags/small/tn_tu-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"DRNnuEeQz2pd","executionInfo":{"status":"ok","timestamp":1614346543620,"user_tz":-300,"elapsed":43890,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"f5359d0a-11f1-47a9-afbc-c667d7420ce0"},"source":["\n","# Turkish for: 'It was the best stay of my life, I loved it!!'\n","fitted_pipe.predict(\"Hayatımın en iyi kalışıydı, onu sevdim! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.998495greatHayatımın en iyi kalışıydı, onu sevdim![0.01813080720603466, -0.018600456416606903, -...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.998495 ... [0.01813080720603466, -0.018600456416606903, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":19}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"aOSsiK6J0jWs","executionInfo":{"status":"ok","timestamp":1614346909801,"user_tz":-300,"elapsed":3979,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1e71ac31-6d05-472a-eae3-3725b43b62d6"},"source":["# Turkish for: 'It was awful!! '\n","fitted_pipe.predict(\"Berbattı!!\")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.993295poorBerbattı!![0.004018284380435944, -0.057571493089199066, ...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.993295 ... [0.004018284380435944, -0.057571493089199066, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":47}]},{"cell_type":"markdown","metadata":{"id":"803qL2gt0vlb"},"source":["# The Model understands Hebrew\n","![he](https://www.worldometers.info/img/flags/small/tn_sf-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"XQ5VCtxw0pc0","executionInfo":{"status":"ok","timestamp":1614346549067,"user_tz":-300,"elapsed":48742,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"afcf8675-dc8e-4b45-f492-113af2023909"},"source":["\t\t\n","# Hebrew for: 'It was the best stay of my life, I loved it!!'\n","fitted_pipe.predict(\"זה היה השהייה הכי טובה בחיי, אהבתי את זה !! \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.999557greatזה היה השהייה הכי טובה בחיי, אהבתי את זה !![0.021775633096694946, -0.0414351262152195, -0...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999557 ... [0.021775633096694946, -0.0414351262152195, -0...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":21}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"9w2ZHfns05A4","executionInfo":{"status":"ok","timestamp":1614346551851,"user_tz":-300,"elapsed":51401,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"a266a4ce-dd04-4172-8c89-d4d1a97e6995"},"source":["# Hebrew for: 'It was the worst stay of my life, I hated it!!'\n","fitted_pipe.predict(\"זה היה השהייה הגרועה בחיי, שנאתי את זה !! \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.672065poorזה היה השהייה הגרועה בחיי, שנאתי את זה !![0.020672744140028954, -0.04731827974319458, -...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.672065 ... [0.020672744140028954, -0.04731827974319458, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":22}]},{"cell_type":"markdown","metadata":{"id":"SDlpd33H1HIX"},"source":["# The Model understands Telugu\n","![te](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"Kc5n1bzv1BJT","executionInfo":{"status":"ok","timestamp":1614346554511,"user_tz":-300,"elapsed":53649,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"266ab13e-9de7-4cce-d7cd-48c5b86102bf"},"source":["# Telugu for: 'It was the best stay of my life, I loved it!!'\n","fitted_pipe.predict(\"ఇది నా జీవితంలో ఉత్తమమైన కాలం, నేను దానిని ఇష్టపడ్డాను !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.994647greatఇది నా జీవితంలో ఉత్తమమైన కాలం, నేను దానిని ఇష్...[0.018417492508888245, -0.05317000299692154, -...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.994647 ... [0.018417492508888245, -0.05317000299692154, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":23}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"-l-u6vrz1Obe","executionInfo":{"status":"ok","timestamp":1614346557176,"user_tz":-300,"elapsed":56033,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"d54c5e5a-864b-402b-eef7-10cec058096a"},"source":["\t\t\n","# Telugu for: 'It was the worst stay of my life, I hated it!!'\n","fitted_pipe.predict(\"ఇది నా జీవితంలో చెత్తగా ఉంది, నేను అసహ్యించుకున్నాను !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.971539poorఇది నా జీవితంలో చెత్తగా ఉంది, నేను అసహ్యించుకు...[-0.000955148774664849, -0.05593333765864372, ...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.971539 ... [-0.000955148774664849, -0.05593333765864372, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":24}]},{"cell_type":"markdown","metadata":{"id":"nziBUe8t1Zwn"},"source":["# Model understands Russian\n","![ru](https://www.worldometers.info/img/flags/small/tn_rs-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"Ckyjl3YQ1VFn","executionInfo":{"status":"ok","timestamp":1614346560265,"user_tz":-300,"elapsed":58241,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"5a57e3d0-9a0d-4824-9476-126884f01639"},"source":["# Russian for: 'It was the best stay of my life, I loved it!!'\n","fitted_pipe.predict(\"Это был лучший отдых в моей жизни, мне очень понравилось !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.999264greatЭто был лучший отдых в моей жизни, мне очень п...[0.024515371769666672, -0.03857886791229248, -...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999264 ... [0.024515371769666672, -0.03857886791229248, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":25}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"GIdWkfGv1gFz","executionInfo":{"status":"ok","timestamp":1614346562655,"user_tz":-300,"elapsed":60083,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"0be2cac8-9e26-44a2-d30b-6b3f4126090a"},"source":["# Russian for: 'It was the worst stay of my life, I hated it!!'\n","fitted_pipe.predict(\"Это было худшее пребывание в моей жизни, я ненавидел его !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.894004poorЭто было худшее пребывание в моей жизни, я нен...[-0.006281813140958548, -0.03218897059559822, ...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.894004 ... [-0.006281813140958548, -0.03218897059559822, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":26}]},{"cell_type":"markdown","metadata":{"id":"8R1j9mwz2Cm4"},"source":["# Model understands Urdu\n","![ur](https://www.worldometers.info/img/flags/small/tn_pk-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"j4zwvRV11pcG","executionInfo":{"status":"ok","timestamp":1614346566536,"user_tz":-300,"elapsed":63285,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"524b8550-cc34-4df7-e1fe-c5eb974be9b9"},"source":["\t\t\n","# Urdu for: 'It was the best stay of my life, I loved it!!'\n","fitted_pipe.predict(\"یہ میری زندگی کا بہترین قیام تھا ، مجھے اس سے پیار تھا !! \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.998025greatیہ میری زندگی کا بہترین قیام تھا ، مجھے اس سے ...[0.016523482277989388, -0.04361935704946518, -...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.998025 ... [0.016523482277989388, -0.04361935704946518, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":27}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"SxzTuK4b2UKV","executionInfo":{"status":"ok","timestamp":1614346568197,"user_tz":-300,"elapsed":64721,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2b276f5b-476b-43e5-e2be-24d9f338102a"},"source":["\n","# Urdu for: 'It was the worst stay of my life, I hated it!!'\n","fitted_pipe.predict(\"یہ میری زندگی کا بدترین قیام تھا ، مجھے اس سے نفرت تھی !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.947351poorیہ میری زندگی کا بدترین قیام تھا ، مجھے اس سے ...[0.010332305915653706, -0.032978497445583344, ...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.947351 ... [0.010332305915653706, -0.032978497445583344, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":28}]},{"cell_type":"markdown","metadata":{"id":"RoNg-C3k1qcX"},"source":["# Model understands Hindi\n","![hi](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"QZ9RT5Wv1r1n","executionInfo":{"status":"ok","timestamp":1614346572863,"user_tz":-300,"elapsed":69071,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2ddc360e-056d-4d5b-ff50-bfc5c8e81067"},"source":["\n","\t\t\n","# hindi for: 'It was the best stay of my life, I loved it!!'\n","fitted_pipe.predict(\"यह मेरे जीवन का सबसे अच्छा प्रवास था, मुझे यह पसंद था !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.998548greatयह मेरे जीवन का सबसे अच्छा प्रवास था, मुझे यह ...[0.02085617184638977, -0.03468615561723709, -0...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.998548 ... [0.02085617184638977, -0.03468615561723709, -0...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":29}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"quM-IL2i12-B","executionInfo":{"status":"ok","timestamp":1614346575324,"user_tz":-300,"elapsed":71369,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"1187b9af-b4b8-42f6-fff7-4eef2d16f6e4"},"source":["\t\t\n","# hindi for: 'It was the worst stay of my life, I hated it!!'\n","fitted_pipe.predict(\"यह मेरे जीवन का सबसे बुरा पड़ाव था, मुझे इससे नफरत थी !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.941456poorयह मेरे जीवन का सबसे बुरा पड़ाव था, मुझे इससे ...[0.01690828427672386, -0.031606461852788925, -...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.941456 ... [0.01690828427672386, -0.031606461852788925, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":30}]},{"cell_type":"markdown","metadata":{"id":"R4ByHOZn35Lc"},"source":["# The model understands Tartar\n","![tt](https://www.worldometers.info/img/flags/small/tn_rs-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"2JrzusSQ18F5","executionInfo":{"status":"ok","timestamp":1614346578887,"user_tz":-300,"elapsed":74650,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7acec658-1683-4ffa-b388-8e49f6f507cd"},"source":["# Tartar for: 'It was the best stay of my life, I loved it!!'\n","fitted_pipe.predict(\"Бу минем тормышымның иң яхшы торышы иде, мин аны яраттым !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.995180greatБу минем тормышымның иң яхшы торышы иде, мин а...[0.0361848883330822, -0.039926111698150635, -0...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.995180 ... [0.0361848883330822, -0.039926111698150635, -0...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":31}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"J06Xm_Ln4AYu","executionInfo":{"status":"ok","timestamp":1614346581079,"user_tz":-300,"elapsed":76674,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"210fda0b-ff92-4794-ace7-b1707a588886"},"source":["# Tartar for: 'It was the worst stay of my life, I hated it!!'\n","fitted_pipe.predict(\"Бу минем тормышымның иң начар торышы иде, мин аны нәфрәт иттем !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.973673poorБу минем тормышымның иң начар торышы иде, мин ...[0.023216629400849342, -0.029130196198821068, ...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.973673 ... [0.023216629400849342, -0.029130196198821068, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":32}]},{"cell_type":"markdown","metadata":{"id":"HKj5yWwwMplH"},"source":["# The Model understands French\n","![fr](https://www.worldometers.info/img/flags/small/tn_fr-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"CUHcJZfJMplL","executionInfo":{"status":"ok","timestamp":1614346583438,"user_tz":-300,"elapsed":78665,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"7da1d34a-79ff-4316-cdbc-13c04caf3e1b"},"source":["# French for: 'It was the best stay of my life, I loved it!!'\n","fitted_pipe.predict(\"C'était le meilleur séjour de ma vie, j'ai adoré !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.999693greatC'était le meilleur séjour de ma vie, j'ai ado...[0.016066204756498337, -0.042281411588191986, ...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.999693 ... [0.016066204756498337, -0.042281411588191986, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":33}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"57NY2XoTMplM","executionInfo":{"status":"ok","timestamp":1614346586277,"user_tz":-300,"elapsed":81148,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"c2e87a71-3fbd-4873-d412-6f1562585e8a"},"source":["# French for: 'It was the worst stay of my life, I hated it!!'\n","fitted_pipe.predict(\"C'était le pire séjour de ma vie, je l'ai détesté !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.625446poorC'était le pire séjour de ma vie, je l'ai déte...[0.016935674473643303, -0.04099534824490547, -...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.625446 ... [0.016935674473643303, -0.04099534824490547, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":34}]},{"cell_type":"markdown","metadata":{"id":"jD2TBgT0Nq6F"},"source":["# The Model understands Thai\n","![th](https://www.worldometers.info/img/flags/small/tn_th-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"gBp11S5GNq6S","executionInfo":{"status":"ok","timestamp":1614346588839,"user_tz":-300,"elapsed":83317,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"22df6a5d-f77b-4c70-c453-980c108c55a3"},"source":["\t\t\n","# Thai for: 'It was the best stay of my life, I loved it!!'\n","fitted_pipe.predict(\"เป็นการพักที่ดีที่สุดในชีวิตฉันชอบมาก !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.991230greatเป็นการพักที่ดีที่สุดในชีวิตฉันชอบมาก !![-0.009586167521774769, -0.03715922310948372, ...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.991230 ... [-0.009586167521774769, -0.03715922310948372, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":35}]},{"cell_type":"code","metadata":{"id":"R6nKI7C3QKa3","colab":{"base_uri":"https://localhost:8080/","height":106},"executionInfo":{"status":"ok","timestamp":1614346591489,"user_tz":-300,"elapsed":85525,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"6298a66a-c81e-4a1c-9e82-e1f4be5f8b3c"},"source":["\t\t\n","# Thai for: 'It was the worst stay of my life, I hated it!!'\n","fitted_pipe.predict(\"เป็นการพักที่แย่ที่สุดในชีวิตฉันเกลียดมัน !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.992197poorเป็นการพักที่แย่ที่สุดในชีวิตฉันเกลียดมัน !![-0.01785094477236271, -0.04203420504927635, -...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.992197 ... [-0.01785094477236271, -0.04203420504927635, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":36}]},{"cell_type":"markdown","metadata":{"id":"mLItI4KZOElB"},"source":["# The Model understands Khmer\n","![km](https://www.worldometers.info/img/flags/small/tn_cb-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"SWbqMgAwOElC","executionInfo":{"status":"ok","timestamp":1614346594266,"user_tz":-300,"elapsed":87513,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"48d23fdb-d7c0-4393-ef1a-62f7525991ce"},"source":["# Khmer for: 'It was the best stay of my life, I loved it!!'\n","fitted_pipe.predict(\"វាជាការស្នាក់នៅដ៏ល្អបំផុតក្នុងជីវិតខ្ញុំស្រឡាញ់វាណាស់ !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.996174greatវាជាការស្នាក់នៅដ៏ល្អបំផុតក្នុងជីវិតខ្ញុំស្រឡាញ...[0.0050583695992827415, -0.05372818186879158, ...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.996174 ... [0.0050583695992827415, -0.05372818186879158, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":37}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"beoCtm4xQf2P","executionInfo":{"status":"ok","timestamp":1614346609923,"user_tz":-300,"elapsed":5422,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"644cd909-2753-4620-ff05-3ad51c0653b5"},"source":["# Khmer for: 'It was the worst stay of my life, I hated it!!'\n","fitted_pipe.predict(\"វាជាការស្នាក់នៅដ៏អាក្រក់បំផុតក្នុងជីវិតខ្ញុំស្អប់វាណាស់ !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.994846poorវាជាការស្នាក់នៅដ៏អាក្រក់បំផុតក្នុងជីវិតខ្ញុំស្...[-0.01400109101086855, -0.046681348234415054, ...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.994846 ... [-0.01400109101086855, -0.046681348234415054, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":39}]},{"cell_type":"markdown","metadata":{"id":"lvE-LbNiPoBT"},"source":["# The Model understands Yiddish\n","![yi](https://www.worldometers.info/img/flags/small/tn_pl-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"sZlmLhajPoBb","executionInfo":{"status":"ok","timestamp":1614346615967,"user_tz":-300,"elapsed":4656,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"e9868050-4a7b-43e1-f845-870671d797a4"},"source":["\t\t\n","# Yiddish for: 'It was the best stay of my life, I loved it!!'\n","fitted_pipe.predict(\"דאָס איז געווען דער בעסטער בלייַבן פון מיין לעבן, איך ליב געהאט עס !! \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.998024greatדאָס איז געווען דער בעסטער בלייַבן פון מיין לע...[0.016376741230487823, -0.04827779531478882, -...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.998024 ... [0.016376741230487823, -0.04827779531478882, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":40}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"5h-pha_nPoBc","executionInfo":{"status":"ok","timestamp":1614346872786,"user_tz":-300,"elapsed":4483,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"0bcce17c-7820-4c21-b3fc-fdeeac244664"},"source":["# Yiddish for: 'It was awful!! '\n","fitted_pipe.predict(\"עס איז געווען שרעקלעך !! \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.751072poorעס איז געווען שרעקלעך !![0.0001469508424634114, -0.057936470955610275,...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.751072 ... [0.0001469508424634114, -0.057936470955610275,...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":46}]},{"cell_type":"markdown","metadata":{"id":"XSz4WzScaAHj"},"source":["# The Model understands Kygrgyz\n","![ky](https://www.worldometers.info/img/flags/small/tn_kg-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"DXz6fhJSaAHu","executionInfo":{"status":"ok","timestamp":1614346621291,"user_tz":-300,"elapsed":9515,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"907f11f4-6995-40e1-bd7d-41c3e380e3e6"},"source":["\n","# Kygrgyz for: 'It was the best stay of my life, I loved it!!'\n","fitted_pipe.predict(\"Бул менин жашоомдогу эң жакшы жашоо болду, мен аны жакшы көрчүмүн !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.996750greatБул менин жашоомдогу эң жакшы жашоо болду, мен...[0.032868802547454834, -0.04612768813967705, -...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.996750 ... [0.032868802547454834, -0.04612768813967705, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":42}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"lh_ZSHlPaAHv","executionInfo":{"status":"ok","timestamp":1614346623853,"user_tz":-300,"elapsed":11412,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2848f44b-e5ca-4393-d85b-53f309f4d607"},"source":["# Kygrgyz for: 'It was the worst stay of my life, I hated it!!'\n","fitted_pipe.predict(\"Бул менин жашоомдогу эң жаман калуу болду, мен аны жек көрдүм !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.981754poorБул менин жашоомдогу эң жаман калуу болду, мен...[0.023127004504203796, -0.04409905895590782, -...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.981754 ... [0.023127004504203796, -0.04409905895590782, -...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":43}]},{"cell_type":"markdown","metadata":{"id":"DGMVMKaTdJFj"},"source":["# The Model understands Tamil\n","![ta](https://www.worldometers.info/img/flags/small/tn_in-flag.gif)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"JWDr_LoCdJFn","executionInfo":{"status":"ok","timestamp":1614346626603,"user_tz":-300,"elapsed":13407,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"76068b9e-f503-46f0-fcfd-1bd5b0f88e39"},"source":["# Tamil for: 'It was the best stay of my life, I loved it!!'\n","fitted_pipe.predict(\"இது என் வாழ்க்கையின் சிறந்த தங்குமிடம், நான் அதை நேசித்தேன் !! \")\n","\t\t"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.994477greatஇது என் வாழ்க்கையின் சிறந்த தங்குமிடம், நான் அ...[0.0208477433770895, -0.049386993050575256, -0...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.994477 ... [0.0208477433770895, -0.049386993050575256, -0...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":44}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":106},"id":"Q6C0BmTtdJFp","executionInfo":{"status":"ok","timestamp":1614346629260,"user_tz":-300,"elapsed":15640,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"86761e33-ef65-4df8-de0b-817c39712229"},"source":["# Tamil for: 'It was the worst stay of my life, I hated it!!'\n","fitted_pipe.predict(\"இது என் வாழ்க்கையின் மிக மோசமான காலம், நான் அதை வெறுத்தேன் !! \")"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
category_confidencecategorydocumentxx_embed_sentence_labse_embeddings
origin_index
00.796923poorஇது என் வாழ்க்கையின் மிக மோசமான காலம், நான் அத...[0.009537279605865479, -0.053919799625873566, ...
\n","
"],"text/plain":[" category_confidence ... xx_embed_sentence_labse_embeddings\n","origin_index ... \n","0 0.796923 ... [0.009537279605865479, -0.053919799625873566, ...\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":45}]},{"cell_type":"markdown","metadata":{"id":"2BB-NwZUoHSe"},"source":["# 5. Lets save the model"]},{"cell_type":"code","metadata":{"id":"eLex095goHwm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1613907018382,"user_tz":-300,"elapsed":4090664,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"2394b2b7-795e-43f2-de73-048ed4ee2ecc"},"source":["stored_model_path = './models/classifier_dl_trained' \n","fitted_pipe.save(stored_model_path)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Stored model in ./models/classifier_dl_trained\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"e_b2DPd4rCiU"},"source":["# 6. Lets load the model from HDD.\n","This makes Offlien NLU usage possible! \n","You need to call nlu.load(path=path_to_the_pipe) to load a model/pipeline from disk."]},{"cell_type":"code","metadata":{"id":"SO4uz45MoRgp","colab":{"base_uri":"https://localhost:8080/","height":105},"executionInfo":{"status":"ok","timestamp":1613907286544,"user_tz":-300,"elapsed":134966,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"3d6a2ee6-d5dd-405f-dafe-35bf67fcc232"},"source":["stored_model_path = './models/classifier_dl_trained' \n","hdd_pipe = nlu.load(path=stored_model_path)\n","\n","preds = hdd_pipe.predict('It was a good experince!')\n","preds"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
xx_embed_sentence_labse_embeddingsclassifierclassifier_confidencedocument
origin_index
0[0.059547875076532364, -0.03929056599736214, -...great0.999993It was a good experince!
\n","
"],"text/plain":[" xx_embed_sentence_labse_embeddings ... document\n","origin_index ... \n","0 [0.059547875076532364, -0.03929056599736214, -... ... It was a good experince!\n","\n","[1 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"code","metadata":{"id":"e0CVlkk9v6Qi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1613907286546,"user_tz":-300,"elapsed":134953,"user":{"displayName":"ahmed lone","photoUrl":"","userId":"02458088882398909889"}},"outputId":"dc9fe771-a4aa-4ee3-fdbf-7ad1ef7d031d"},"source":["hdd_pipe.print_info()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["The following parameters are configurable for this NLU pipeline (You can copy paste the examples) :\n",">>> pipe['document_assembler'] has settable params:\n","pipe['document_assembler'].setCleanupMode('shrink') | Info: possible values: disabled, inplace, inplace_full, shrink, shrink_full, each, each_full, delete_full | Currently set to : shrink\n",">>> pipe['sentence_detector'] has settable params:\n","pipe['sentence_detector'].setCustomBounds([]) | Info: characters used to explicitly mark sentence bounds | Currently set to : []\n","pipe['sentence_detector'].setDetectLists(True) | Info: whether detect lists during sentence detection | Currently set to : True\n","pipe['sentence_detector'].setExplodeSentences(False) | Info: whether to explode each sentence into a different row, for better parallelization. Defaults to false. | Currently set to : False\n","pipe['sentence_detector'].setMaxLength(99999) | Info: Set the maximum allowed length for each sentence | Currently set to : 99999\n","pipe['sentence_detector'].setMinLength(0) | Info: Set the minimum allowed length for each sentence. | Currently set to : 0\n","pipe['sentence_detector'].setUseAbbreviations(True) | Info: whether to apply abbreviations at sentence detection | Currently set to : True\n","pipe['sentence_detector'].setUseCustomBoundsOnly(False) | Info: Only utilize custom bounds in sentence detection | Currently set to : False\n",">>> pipe['regex_tokenizer'] has settable params:\n","pipe['regex_tokenizer'].setCaseSensitiveExceptions(True) | Info: Whether to care for case sensitiveness in exceptions | Currently set to : True\n","pipe['regex_tokenizer'].setTargetPattern('\\S+') | Info: pattern to grab from text as token candidates. Defaults \\S+ | Currently set to : \\S+\n","pipe['regex_tokenizer'].setMaxLength(99999) | Info: Set the maximum allowed length for each token | Currently set to : 99999\n","pipe['regex_tokenizer'].setMinLength(0) | Info: Set the minimum allowed length for each token | Currently set to : 0\n",">>> pipe['glove'] has settable params:\n","pipe['glove'].setBatchSize(32) | Info: Batch size. Large values allows faster processing but requires more memory. | Currently set to : 32\n","pipe['glove'].setCaseSensitive(False) | Info: whether to ignore case in tokens for embeddings matching | Currently set to : False\n","pipe['glove'].setDimension(768) | Info: Number of embedding dimensions | Currently set to : 768\n","pipe['glove'].setMaxSentenceLength(128) | Info: Max sentence length to process | Currently set to : 128\n","pipe['glove'].setIsLong(False) | Info: Use Long type instead of Int type for inputs buffer - Some Bert models require Long instead of Int. | Currently set to : False\n","pipe['glove'].setStorageRef('labse') | Info: unique reference name for identification | Currently set to : labse\n",">>> pipe['classifier_dl'] has settable params:\n","pipe['classifier_dl'].setClasses(['average', 'great', 'poor']) | Info: get the tags used to trained this ClassifierDLModel | Currently set to : ['average', 'great', 'poor']\n","pipe['classifier_dl'].setStorageRef('labse') | Info: unique reference name for identification | Currently set to : labse\n"],"name":"stdout"}]}]} \ No newline at end of file diff --git a/examples/colab/component_examples/named_entity_recognition_(NER)/NER_aspect_airline_ATIS.ipynb b/examples/colab/component_examples/named_entity_recognition_(NER)/NER_aspect_airline_ATIS.ipynb index dcfc4c79..cae6b67f 100644 --- a/examples/colab/component_examples/named_entity_recognition_(NER)/NER_aspect_airline_ATIS.ipynb +++ b/examples/colab/component_examples/named_entity_recognition_(NER)/NER_aspect_airline_ATIS.ipynb @@ -1,576 +1 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "NER_aspect_airline_ATIS.ipynb", - "provenance": [], - "collapsed_sections": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "NYQRU3pRO146" - }, - "source": [ - "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", - "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/component_examples/named_entity_recognition_(NER)/NER_aspect_airline_ATIS.ipynb)\n", - "\n", - "\n", - "Named entities are phrases that contain the names of persons, organizations, locations, times and quantities. Example:\n", - "
\n", - "
\n", - "\n", - "#Content\n", - "ATIS dataset provides large number of messages and their associated intents that can be used in training a classifier. Within a chatbot, intent refers to the goal the customer has in mind when typing in a question or comment. While entity refers to the modifier the customer uses to describe their issue, the intent is what they really mean. For example, a user says, ‘I need new shoes.’ The intent behind the message is to browse the footwear on offer. Understanding the intent of the customer is key to implementing a successful chatbot experience for end-user.\n", - "https://www.kaggle.com/hassanamin/atis-airlinetravelinformationsystem\n", - "
\n", - "
\n", - "\n", - "|Tags predicted by this model | \t\n", - "|------|\n", - " | O|\n", - " | I-depart_time.end_time|\n", - " | B-arrive_date.date_relative|\n", - " | I-fromloc.state_name|\n", - " | B-depart_date.date_relative|\n", - " | B-fromloc.state_code|\n", - " | B-meal_description|\n", - " | B-depart_time.time_relative|\n", - " | I-fare_amount|\n", - " | I-fromloc.city_name|\n", - " | B-booking_class|\n", - " | I-arrive_time.end_time|\n", - " | B-return_date.today_relative|\n", - " | B-fromloc.state_name|\n", - " | B-round_trip|\n", - " | B-depart_date.today_relative|\n", - " | I-return_date.day_number|\n", - " | I-depart_time.start_time|\n", - " | B-period_of_day|\n", - " | B-arrive_date.day_number|\n", - " | B-flight_stop|\n", - " | B-depart_date.day_name|\n", - " | I-stoploc.city_name|\n", - " | I-return_date.today_relative|\n", - " | B-class_type|\n", - " | B-stoploc.state_code|\n", - " | B-economy|\n", - " | B-depart_time.end_time|\n", - " | B-return_date.date_relative|\n", - " | I-fromloc.airport_name|\n", - " | B-arrive_date.month_name|\n", - " | I-flight_mod|\n", - " | B-toloc.airport_code|\n", - " | I-depart_time.end_time|\n", - " | B-airline_code|\n", - " | B-flight_mod|\n", - " | B-cost_relative|\n", - " | B-state_name|\n", - " | B-fromloc.city_name|\n", - " | B-depart_time.period_of_day|\n", - " | I-city_name|\n", - " | B-depart_time.period_mod|\n", - " | B-city_name|\n", - " | B-meal|\n", - " | B-return_date.day_number|\n", - " | I-airline_name|\n", - " | I-restriction_code|\n", - " | B-airline_name|\n", - " | B-restriction_code|\n", - " | B-flight|\n", - " | B-transport_type|\n", - " | B-time_relative|\n", - " | B-arrive_time.time_relative|\n", - " | B-fromloc.airport_code|\n", - " | B-time|\n", - " | I-toloc.city_name|\n", - " | B-toloc.state_name|\n", - " | B-meal_code|\n", - " | I-arrive_date.day_number|\n", - " | B-depart_time.start_time|\n", - " | B-month_name|\n", - " | B-fromloc.airport_name|\n", - " | B-flight_number|\n", - " | B-days_code|\n", - " | I-meal_description|\n", - " | B-fare_basis_code|\n", - " | I-cost_relative|\n", - " | I-time|\n", - " | B-return_time.period_of_day|\n", - " | I-depart_time.time|\n", - " | B-depart_date.day_number|\n", - " | I-economy|\n", - " | B-arrive_time.start_time|\n", - " | B-return_date.day_name|\n", - " | B-return_time.period_mod|\n", - " | B-airport_code|\n", - " | B-stoploc.airport_code|\n", - " | B-flight_time|\n", - " | I-transport_type|\n", - " | B-depart_date.month_name|\n", - " | I-toloc.airport_name|\n", - " | B-today_relative|\n", - " | I-arrive_time.period_of_day|\n", - " | B-day_name|\n", - " | B-toloc.city_name|\n", - " | B-connect|\n", - " | I-round_trip|\n", - " | B-depart_time.time|\n", - " | B-airport_name|\n", - " | B-arrive_time.period_of_day|\n", - " | B-stoploc.airport_name|\n", - " | I-class_type|\n", - " | B-aircraft_code|\n", - " | I-return_date.date_relative|\n", - " | B-toloc.country_name|\n", - " | I-flight_number|\n", - " | B-state_code|\n", - " | B-or|\n", - " | I-depart_date.today_relative|\n", - " | B-toloc.airport_name|\n", - " | I-arrive_time.time|\n", - " | I-flight_time|\n", - " | I-state_name|\n", - " | I-airport_name|\n", - " | I-depart_time.period_of_day|\n", - " | B-arrive_time.time|\n", - " | B-depart_date.year|\n", - " | I-flight_stop|\n", - " | I-toloc.state_name|\n", - " | B-arrive_date.day_name|\n", - " | B-compartment|\n", - " | I-depart_date.day_number|\n", - " | I-meal_code|\n", - " | B-arrive_time.end_time|\n", - " | I-today_relative|\n", - " | I-arrive_time.start_time|\n", - " | B-toloc.state_code|\n", - " | B-day_number|\n", - " | I-arrive_time.time_relative|\n", - " | I-fare_basis_code|\n", - " | I-depart_time.time_relative|\n", - " | B-return_date.month_name|\n", - " | B-stoploc.city_name|\n", - " | B-arrive_time.period_mod|\n", - " | B-fare_amount|\n", - " | B-mod|\n", - " | B-arrive_date.today_relative|\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "M2-GiYL6xurJ", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "c254f482-0097-471f-89a4-e73c025d71d1" - }, - "source": [ - "import os\n", - "! apt-get update -qq > /dev/null \n", - "# Install java\n", - "! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n", - "os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n", - "os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n", - "!pip install nlu==1.1.1.rc1 pyspark==2.4.7 > /dev/null\n", - "! wget http://ckl-it.de/wp-content/uploads/2021/01/atis_intents.csv" - ], - "execution_count": 1, - "outputs": [ - { - "output_type": "stream", - "text": [ - "--2021-02-02 13:11:15-- http://ckl-it.de/wp-content/uploads/2021/01/atis_intents.csv\n", - "Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n", - "Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 391936 (383K) [text/csv]\n", - "Saving to: ‘atis_intents.csv’\n", - "\n", - "atis_intents.csv 100%[===================>] 382.75K --.-KB/s in 0.1s \n", - "\n", - "2021-02-02 13:11:15 (3.87 MB/s) - ‘atis_intents.csv’ saved [391936/391936]\n", - "\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Gph8XOL1Pzpl" - }, - "source": [ - "# NLU makes NER easy. \n", - "\n", - "You just need to load the NER model via ner.load() and predict on some dataset. \n", - "It could be a pandas dataframe with a column named text or just an array of strings." - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 552 - }, - "id": "pmpZSNvGlyZQ", - "outputId": "ff099ddb-e4cf-4480-9d4b-fe8bc4207ff0" - }, - "source": [ - "import nlu \n", - "import pandas as pd\n", - "\n", - "df = pd.read_csv(\"atis_intents.csv\")\n", - "df.columns = [\"flight\",\"text\"]\n", - "ner_df = nlu.load('en.ner.aspect.airline',).predict(df[\"text\"],output_level='chunk')\n", - "ner_df" - ], - "execution_count": 2, - "outputs": [ - { - "output_type": "stream", - "text": [ - "nerdl_atis_840b_300d download started this may take some time.\n", - "Approximate size to download 14.5 MB\n", - "[OK!]\n", - "glove_840B_300 download started this may take some time.\n", - "Approximate size to download 2.3 GB\n", - "[OK!]\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
entitiesword_embeddingsner_confidenceentities_confidence
origin_index
0pittsburgh[[-0.038548000156879425, 0.5425199866294861, -...[1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ...fromloc.city_name
0baltimore[[-0.038548000156879425, 0.5425199866294861, -...[1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ...toloc.city_name
0thursday[[-0.038548000156879425, 0.5425199866294861, -...[1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ...depart_date.day_name
0morning[[-0.038548000156879425, 0.5425199866294861, -...[1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ...depart_time.period_of_day
1arrival time[[-0.038548000156879425, 0.5425199866294861, -...[1.0, 1.0, 0.9991999864578247, 0.8240000009536...flight_time
...............
4975san francisco[[-0.13562999665737152, 0.3321700096130371, -0...[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9998000264167...toloc.city_name
4975denver[[-0.13562999665737152, 0.3321700096130371, -0...[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9998000264167...stoploc.city_name
4976delta[[-0.08496099710464478, 0.5019999742507935, 0....[1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ...airline_name
4976denver[[-0.08496099710464478, 0.5019999742507935, 0....[1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ...fromloc.city_name
4976san francisco[[-0.08496099710464478, 0.5019999742507935, 0....[1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ...toloc.city_name
\n", - "

16673 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " entities ... entities_confidence\n", - "origin_index ... \n", - "0 pittsburgh ... fromloc.city_name\n", - "0 baltimore ... toloc.city_name\n", - "0 thursday ... depart_date.day_name\n", - "0 morning ... depart_time.period_of_day\n", - "1 arrival time ... flight_time\n", - "... ... ... ...\n", - "4975 san francisco ... toloc.city_name\n", - "4975 denver ... stoploc.city_name\n", - "4976 delta ... airline_name\n", - "4976 denver ... fromloc.city_name\n", - "4976 san francisco ... toloc.city_name\n", - "\n", - "[16673 rows x 4 columns]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 2 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "STc7iOwtljGo" - }, - "source": [ - "## Lets explore our data which the predicted NER tags and visalize them! \n", - "\n", - "We specify [1:] so we dont see the count for the O-tag wich is the most common, since most words in a sentence are not named entities and thus not part of a chunk" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 919 - }, - "id": "UDSAYjadlfdK", - "outputId": "5778826a-5998-486e-98ad-16b66ccfd4f3" - }, - "source": [ - "ner_df['entities'].value_counts()[0:50].plot.bar(title='Occurence of Named Entities in dataset', figsize=(20,14))" - ], - "execution_count": 3, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 3 - }, - { - "output_type": "display_data", - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "tags": [], - "needs_background": "light" - } - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YO6d6VYi4aJQ" - }, - "source": [ - "## Most occurding `fromloc.city_name` tagged entities" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 386 - }, - "id": "rlcEvP9tOSiy", - "outputId": "64129ead-b568-43ab-b52d-da2b8d96c54f" - }, - "source": [ - "ner_type_to_viz = 'fromloc.city_name'\n", - "ner_df[ner_df.entities_confidence == ner_type_to_viz]['entities'].value_counts().plot.bar(title='Most often occuring fromloc.city_name labeled entities in the dataset')" - ], - "execution_count": 4, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 4 - }, - { - "output_type": "display_data", - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAbEAAAFgCAYAAAA1skc7AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOydd7weRfX/358UamiBiBBKABFEpRmagg1RihSRItJEFP2KAlawIlbkpyJYQASBKCpIEQQUEek9CRC6hN4JSIn0cn5/nLN59tm7+9ybwL3JxvN+vZ7Xszu7Ozs7OzNn5syZszIzkiRJkqSNDJvdCUiSJEmSWSWFWJIkSdJaUoglSZIkrSWFWJIkSdJaUoglSZIkrSWFWJIkSdJaWi/EJH1I0r2S/itprdmdnsFG0pGSvjlIcQ9ZXkoaJ8kkjRjM+5Tu9zdJuw/FvWY3kj4m6ZIBnvttSb+fxfvMlmtn4V5fk3R0j+M7S/rHa3zP1zROSRdI+sRrFV8l7uMkfW8w4h4K+hViku6S9IKkJSrh10QjNO7VJCDieMOriOLHwGfNbBTw+FA2jLMDM/u0mX13kKKfkZdmds0g3WO2YGabmdnxMHONfNIuJL1b0n3lMDP7gZl9Io736TyZ2Qlm9v7XMh2vJs6hFPAzy2AK01m9z0BHYncCO5Vu8FZggVlI22CwPHDj7E7EUCBp+CDfojEv5+aOQZIkLcbMev6Au4BvAFeXwn4MfB0wYFyELQJMAKYBd8c1w+LYG4ALgSeBR4ETI/yiiONp4L/AjjX3HxZx3Q08EvdYBJg3rimuvx24J/b/G78NIo6PAzcDjwPnAMuX4jfg08BtwBPALwE15MW8wM+AB+L3M2De0vGtgWuBpyI9m0b4aODYuOZx4C8R/jHgkso9DHhDbB8HHAGcHc/4vgj7Xhx/N3Af8MXImweBPUpxLQ78NdJzNfC96v1Kz9WVl6V3vz8wBXgeGAFshQu6J4ALgDdVysqX4/yngWOAJYG/AdOBfwKLxbnj4n4jYn9p4AzgP8BU4JOleIcDX4s8nQ5MApZteEdN7+AC4BPAm4DngJfjmZ8A1gEeBoaX4tkWuK6fuvFt4CS8TE6PfBlfOn5AKc03AR8qHfsYcClwaKThDuDtEX5vvM/dK+/ox3gZfxg4Epi/IV0fK79n4LCI86nIu40qz3AycGKkczKwRun40sApeL2+E9incu3vS/vrA5fF81wHvLt0bAW8DZgOnAv8onxtzTN8MN7jExHn6pVy9iW8nD0ZaZ8PWBB4FniFThuwdDmd1LQRNfm1aqTxP8CtwA6lY5vHu5wO3A98aYDvYEDtDLAp8ALwYqTvulL5/W6UmenAP4AlBpL3NfdYK97z9Mi7P9FpUxYDzoz3/XhsLxPHvo/Xm+cibb8YQPlaF5gYxx4Gftpfmpvu0/g8vQ6WCsz74mW+CW9Q7sN77WUhNgE4HVgIb6D+DewZx/6IC71heGHbsK7Rbrj/x/FGbUVgFHAq8LuGRn8cpYax1KhNjbSPwAXiZZXrzwQWBZaLl7dpQ1q+A1wBvA4YEy/gu6WX9SSwSTznWGDVOHZWFJbFgJHAu+oKes3zHBdxvqOUd8fRLcReinSNxCvYM3QExZ/itwCwGl7Q+gixpncR7/5aYFlgfuCNuHDaJO73lcjbeUrnX4ELrrF4QzwZrzTzAf8CDqx7V3iH5ldx3prxHt4bx74MXA+sAghYA1i8Jv293sEFwCd65PtNwGal/dOAL/ZTN76NV7TN8XrxQ+CK0vHt8UZ0GLBj5N1SpTS8BOwR134Pb2B/iQus9+ONzKg4/1BcyI/G69hfgR8OsAHdBe/QjMA7PA8B85We4UVgu3inX8KF1chI9yTgW8A8eB28A/hA6dpCOIwFHou8GBbv4DFgTBy/HPhpPNs749lqhViUl0eA9SJvdsfL1rylcnZV5O1ovIP66VKduK/mPRXpHEffNmJGfuGC8N54LyMiLY8Cq8XxB4lGGq/Paw/wHcxMOzMjvaWwC/AO0RvxungBcPBA8r4Szzz4gODz8Y63i/dftCmLAx/G24yFgD8Tne5qPRpg+boc2DW2RwHrD7C89LlPYz3s94SOEPsGXkk3xXspI+LFjMML2gvFi47rPgVcENsTgKMIid6r4aw5fh7wmdL+KpHpI6rXU19A/0YI09gfhjf0y5euLwvVk4ADGtJyO7B5af8DwF2x/Wvg0JprlsJ7hov1V9Brnuc4YELl+HF0C7FnK8/7CN7DGR75tErpWO1IrOldxLv/eGn/m8BJlby8n04P6i5g59LxU4AjSvufozMKnfGucCH5MrBQ6dwfAsfF9q3A1gMoq7XvoFopGvJ9f+CE2B4dZWSpfu73beCfpf3VgGd7nH9t8RyRhttKx94a+bFkKewxXKALF4ArlY5tANzZcJ8+z1c5/jgx2opnKAveYURDjQuReyrXfhU4tnRtIRz2p9S5jLBzcAG0HC6wFywd+wPNQuwIonNYCruVTufvLmCX0rFDgCNLdeLVCLEdgYtrytWBsX0P3rYt3E/Z6HoHzFw7MyO9lfL7jdL+Z4C/95f3NXG/E9cIqRR2GdGm1Jy/JvB4XT0aYPm6CDiI0qhxIGkeyH2K38xYJ/4O+Gi8nAmVY0vgUv3uUtjduLQF77ELuErSjZI+PhP3Xbom3hF4b38gLA8cJukJSU/gKgKV0gbecyh4Bu8xDDQtS8f2sriQq7Is8B8ze3yA6a1ybz/HHzOzl0r7RfrH4PlUvr6/uPq7f9fzm9krcbyclw+Xtp+t2a/L26XxPJpeCiuXn6a8rTLQ8+r4PbClpAWBHfCG7MEBXFctO/MV84eSdpN0bansvQWvKwXVvMHM6vJrDN4znlSK6+8R3i+SviTpZklPxrWLVNIx4x3HO70PfyfLA0sX94xrv0Z93Vse2L5y7oZ4J25pvCF8unT+3TVxlOP6YiWuZenUNRh4nZ1ZlgfWq9x7Z+D1cfzD+OjhbkkXStpgJuJ+tWluur5X3ldZGrjfQlIEM96FpAUk/VrS3ZKewoXQor3m4/spX3vio8dbJF0t6YOzkOaeDHiy3szulnQn/gL3rBx+FO/1L4+rZcB7X/fHtQ8BnwSQtCHwT0kXmdnUAdz6gYi3oOjVPVxzrtWE3Qt838xOGMC9BpqWwvhhuQgr7rNSw/1HS1rUzJ6oHHuakoGMpNfTl7pnGgjT8HxaBlftgjcEM0v5/g/gIwYAJCnivH8W01iOd7SkhUqCbEb5oZO3N/QTT9M7qNInT83sfkmX43Nhu+KjgVlG0vLAb4CNgcvN7GVJ1+IdqJnlUVygvdnMZiqvJW2EdyI3Bm40s1ckPV5Jx7Kl84fhZeYBvPzcaWYrD+BW9+I960/WpGF5YDFJC5YE2XI0l+2izn5/APet0l996e/4vcCFZrZJ7cVmVwNbSxoJfBYfUc1KverFzNb5xryv4UFgrCSVBNlydDp/X8S1XeuZ2UOS1gSuoVNeutLWX/kys9uAnaJcbQucLGnxAaR5wHkws+vE9sTnKco9KszsZfxlfl/SQlFov4D3bpG0vaRl4vTHI4GvxP7DuK69iT8Cn5e0gqRRwA9ww5CXas6dFvGW4zsS+KqkN0daFpG0/YCfuG9aviFpTCw5+BbxjLgRwx6SNpY0TNJYSatGb/5vwK8kLSZppKR3xjXXAW+WtKak+XA1wmtCvJNTgW9H72pVYLdXGe1JwBbxjCPxAv88ro54NWm9N+L4oaT5JK2Ol7Uib48GvitpZTmrR0WoUvsOas57GFhG0jyV8Al4hXwrnnevhgXxcj4NQNIe+EhsponR0W+AQyW9LuIbK+kDA7h8IVwYTQNGSPoWsHDlnLdJ2jZGkPvh7/QKfN5puqT9Jc0vabikt0hap+Y+xUj2A3HefHJz92XM7G58cv8gSfNER3bLHmn+DfBpSevF+15Q0haSFhrA8z4MLC5pkYbjdW1EmTOBN0raNerqSEnrSHpTpH1nSYuY2Yu4scIrDfG8Gh4GxkXDPxAa877m3Mvx8rBPPNu2+FxywUJ4h+kJSaOBA2vStmLl/MbyJWkXSWOiDBed+FcGkOb+5MIMZkqImdntZjax4fDn8JHFHcAluM77t3FsHeBKSf/FJ6f3NbM74ti3geNjSLlDTby/xVWZF+ETzs/FverS9wxu2XJpxLe+mZ0G/Aj4k3x4fAOw2Uw8dpnv4ZVxCm5oMDnCMLOr8MngQ3HjggvpjCB3xUeqt+BzVvvFNf/GjTL+iVstvdZrlz6LD+0fwvPwj3gDNUuY2a34JO7P8dHBlsCWZvbCq08qO+HzFQ/gRhUHmtk/49hPcQH6D7zhOAaf3Eaunt450tfrHZT5Fz6afkjSo6Xw0+L806IszTJmdhPwE7zReBgXjJe+iij3x41orohy/E+8x9wf5+Cqx3/jaqPn6KtWPh2fC3ocL6vbmtmL0RH6ID4vcif+zo/Gy1QX0RHZGlc3Tot7fJlOG/NRfI7tP3jDWJ2SKMc1Edfc/CLSNBWfxugXM7sFL+d3RBuwdOV4nzaicnw6blTzEbwsPoS3H/PGKbsCd8U7+DSuanyt+XP8PyZpcn8nDyDvy+e+gI+IPoa/ix3p7rD9DK9bj+Idmb9XojgM2E7S45IOp//ytSlwY7T9hwEfMbNnB5Dm6n0aUbdqNJmbkfQj4PVmtvvsTsuciqTbgU+VBGiSJHMwrXc7lTQjadVQvUnSuriK7rTZna45FUkfxlWA/5rdaUmSZGCkF4a5m4Vw1crSuErrJ7jqKKkg6QLcRH7X0N8X4X/Dzc2r/MDMfjBEyUuSpIFUJyZJkiStJdWJSZIkSWuZK9SJSyyxhI0bN252JyNJkqRVTJo06VEzG9Ci+TmVuUKIjRs3jokTmyz/kyRJkjok9fKc0gpSnZgkSZK0lhRiSZIkSWtJIZYkSZK0lhRiSZIkSWtJIZYkSZK0lhRiSZIkSWtJIZYkSZK0lhRiSZIkSWtJIZYkSZK0lrnCY0fBuAPOmrF918FbzMaUJEmSJENBjsSSJEmS1pJCLEmSJGktKcSSJEmS1jLoQkzSKpKuLf2ekrSfpNGSzpV0W/wvFudL0uGSpkqaImntwU5jkiRJ0k4GXYiZ2a1mtqaZrQm8DXgGOA04ADjPzFYGzot9gM2AleO3F3DEYKcxSZIkaSdDrU7cGLjdzO4GtgaOj/DjgW1ie2tggjlXAItKWmqI05kkSZK0gKEWYh8B/hjbS5rZg7H9ELBkbI8F7i1dc1+EdSFpL0kTJU2cNm3aYKU3SZIkmYMZMiEmaR5gK+DP1WNmZoDNTHxmdpSZjTez8WPGtPrr2kmSJMksMpQjsc2AyWb2cOw/XKgJ4/+RCL8fWLZ03TIRliRJkiRdDKUQ24mOKhHgDGD32N4dOL0UvltYKa4PPFlSOyZJkiTJDIbE7ZSkBYFNgE+Vgg8GTpK0J3A3sEOEnw1sDkzFLRn3GIo0JkmSJO1jSISYmT0NLF4Jewy3Vqyea8DeQ5GuJEmSpN2kx44kSZKktaQQS5IkSVpLCrEkSZKktaQQS5IkSVpLCrEkSZKktaQQS5IkSVpLCrEkSZKktaQQS5IkSVpLCrEkSZKktaQQS5IkSVpLCrEkSZKktaQQS5IkSVpLCrEkSZKktaQQS5IkSVpLCrEkSZKktaQQS5IkSVpLCrEkSZKktaQQS5IkSVpLCrEkSZKktQyJEJO0qKSTJd0i6WZJG0gaLelcSbfF/2JxriQdLmmqpCmS1h6KNCZJkiTtY6hGYocBfzezVYE1gJuBA4DzzGxl4LzYB9gMWDl+ewFHDFEakyRJkpYx6EJM0iLAO4FjAMzsBTN7AtgaOD5OOx7YJra3BiaYcwWwqKSlBjudSZIkSfsYipHYCsA04FhJ10g6WtKCwJJm9mCc8xCwZGyPBe4tXX9fhCVJkiRJF0MhxEYAawNHmNlawNN0VIcAmJkBNjORStpL0kRJE6dNm/aaJTZJkiRpD0MhxO4D7jOzK2P/ZFyoPVyoCeP/kTh+P7Bs6fplIqwLMzvKzMab2fgxY8YMWuKTJEmSOZdBF2Jm9hBwr6RVImhj4CbgDGD3CNsdOD22zwB2CyvF9YEnS2rHJEmSJJnBiCG6z+eAEyTNA9wB7IEL0JMk7QncDewQ554NbA5MBZ6Jc5MkSZKkD0MixMzsWmB8zaGNa841YO9BT1SSJEnSetJjR5IkSdJaUoglSZIkrSWFWJIkSdJaUoglSZIkrSWFWJIkSdJaUoglSZIkrSWFWJIkSdJaUoglSZIkrSWFWJIkSdJaUoglSZIkrSWFWJIkSdJaUoglSZIkrSWFWJIkSdJaUoglSZIkrSWFWJIkSdJaUoglSZIkrSWFWJIkSdJaUoglSZIkrSWFWJIkSdJaUoglSZIkrWVIhJikuyRdL+laSRMjbLSkcyXdFv+LRbgkHS5pqqQpktYeijQmSZIk7WMoR2LvMbM1zWx87B8AnGdmKwPnxT7AZsDK8dsLOGII05gkSZK0iNmpTtwaOD62jwe2KYVPMOcKYFFJS82OBCZJkiRzNkMlxAz4h6RJkvaKsCXN7MHYfghYMrbHAveWrr0vwrqQtJekiZImTps2bbDSnSRJkszBjBii+2xoZvdLeh1wrqRbygfNzCTZzERoZkcBRwGMHz++57XjDjira/+ug7eYmVslSZIkcyhDMhIzs/vj/xHgNGBd4OFCTRj/j8Tp9wPLli5fJsKSJEmSpItBF2KSFpS0ULENvB+4ATgD2D1O2x04PbbPAHYLK8X1gSdLasckSZIkmcFQqBOXBE6TVNzvD2b2d0lXAydJ2hO4G9ghzj8b2ByYCjwD7DEEaUySJElayKALMTO7A1ijJvwxYOOacAP2Hux0JUmSJO0nPXYkSZIkrSWFWJIkSdJaUoglSZIkrSWFWJIkSdJaUoglSZIkrSWFWJIkSdJaUoglSZIkrSWFWJIkSdJaUoglSZIkrSWFWJIkSdJaUoglSZIkrSWFWJIkSdJaUoglSZIkrSWFWJIkSdJaUoglSZIkrSWFWJIkSdJaUoglSZIkrSWFWJIkSdJaUoglSZIkrSWFWJIkSdJahkyISRou6RpJZ8b+CpKulDRV0omS5onweWN/ahwfN1RpTJIkSdrFUI7E9gVuLu3/CDjUzN4APA7sGeF7Ao9H+KFxXpIkSZL0YUiEmKRlgC2Ao2NfwHuBk+OU44FtYnvr2CeObxznJ0mSJEkXQzUS+xnwFeCV2F8ceMLMXor9+4CxsT0WuBcgjj8Z53chaS9JEyVNnDZt2mCmPUmSJJlDGXQhJumDwCNmNum1jNfMjjKz8WY2fsyYMa9l1EmSJElLGDEE93gHsJWkzYH5gIWBw4BFJY2I0dYywP1x/v3AssB9kkYAiwCPDUE6kyRJkpYx6CMxM/uqmS1jZuOAjwD/MrOdgfOB7eK03YHTY/uM2CeO/8vMbLDTmSRJkrSP2blObH/gC5Km4nNex0T4McDiEf4F4IDZlL4kSZJkDmco1IkzMLMLgAti+w5g3ZpzngO2H8p0JUmSJO0kPXYkSZIkrSWFWJIkSdJahlSdOCcy7oCzZmzfdfAWszElSZIkycySI7EkSZKktfzPj8R6kaO0JEmSOZsciSVJkiStJYVYkiRJ0lpSiCVJkiStJYVYkiRJ0lpSiCVJkiStJYVYkiRJ0lpSiCVJkiStJYVYkiRJ0lpSiCVJkiStJYVYkiRJ0lpSiCVJkiStJYVYkiRJ0lpSiCVJkiStJYVYkiRJ0loGXYhJmk/SVZKuk3SjpIMifAVJV0qaKulESfNE+LyxPzWOjxvsNCZJkiTtZChGYs8D7zWzNYA1gU0lrQ/8CDjUzN4APA7sGefvCTwe4YfGeUmSJEnSh0EXYub8N3ZHxs+A9wInR/jxwDaxvXXsE8c3lqTBTmeSJEnSPoZkTkzScEnXAo8A5wK3A0+Y2Utxyn3A2NgeC9wLEMefBBaviXMvSRMlTZw2bdpgP0KSJEkyBzIkQszMXjazNYFlgHWBVV+DOI8ys/FmNn7MmDGvOo1JkiRJ+xhS60QzewI4H9gAWFTSiDi0DHB/bN8PLAsQxxcBHhvKdCZJkiTtYCisE8dIWjS25wc2AW7Ghdl2cdruwOmxfUbsE8f/ZWY22OlMkiRJ2seI/k951SwFHC9pOC40TzKzMyXdBPxJ0veAa4Bj4vxjgN9Jmgr8B/jIEKQxSZIkaSGDLsTMbAqwVk34Hfj8WDX8OWD7wU5XkiRJ0n7SY0eSJEnSWlKIJUmSJK0lhViSJEnSWlKIJUmSJK0lhViSJEnSWlKIJUmSJK0lhViSJEnSWoZisfNcx7gDzpqxfdfBW8zGlCRJkvxvk0LsNSYFXJIkydCR6sQkSZKkteRIbIgoj9AgR2lJkiSvBSnE5gBSBZkkSTJrpDoxSZIkaS0pxJIkSZLWkkIsSZIkaS0pxJIkSZLWkkIsSZIkaS0pxJIkSZLWkkIsSZIkaS0pxJIkSZLWMuhCTNKyks6XdJOkGyXtG+GjJZ0r6bb4XyzCJelwSVMlTZG09mCnMUmSJGknQzESewn4opmtBqwP7C1pNeAA4DwzWxk4L/YBNgNWjt9ewBFDkMYkSZKkhQy6EDOzB81scmxPB24GxgJbA8fHaccD28T21sAEc64AFpW01GCnM0mSJGkfQzonJmkcsBZwJbCkmT0Yhx4CloztscC9pcvui7BqXHtJmihp4rRp0wYtzUmSJMmcy5AJMUmjgFOA/czsqfIxMzPAZiY+MzvKzMab2fgxY8a8hilNkiRJ2sKQCDFJI3EBdoKZnRrBDxdqwvh/JMLvB5YtXb5MhCVJkiRJF0NhnSjgGOBmM/tp6dAZwO6xvTtweil8t7BSXB94sqR2TJIkSZIZDMX3xN4B7ApcL+naCPsacDBwkqQ9gbuBHeLY2cDmwFTgGWCPIUhjkiRJ0kIGXYiZ2SWAGg5vXHO+AXsPaqKSJEmSuYL02JEkSZK0lhRiSZIkSWtJIZYkSZK0lhRiSZIkSWtJIZYkSZK0lhRiSZIkSWtJIZYkSZK0lhRiSZIkSWtJIZYkSZK0lhRiSZIkSWtJIZYkSZK0lhRiSZIkSWsZCi/2ySwy7oCzuvbvOniL2ZSSJEmSOZMciSVJkiStJYVYkiRJ0lpSiCVJkiStJYVYkiRJ0lpSiCVJkiStJYVYkiRJ0loGXYhJ+q2kRyTdUAobLelcSbfF/2IRLkmHS5oqaYqktQc7fUmSJEl7GYqR2HHAppWwA4DzzGxl4LzYB9gMWDl+ewFHDEH6kiRJkpYy6ELMzC4C/lMJ3ho4PraPB7YphU8w5wpgUUlLDXYakyRJknYyu+bEljSzB2P7IWDJ2B4L3Fs6774I64OkvSRNlDRx2rRpg5fSJEmSZI5ltht2mJkBNgvXHWVm481s/JgxYwYhZUmSJMmczuwSYg8XasL4fyTC7weWLZ23TIQlSZIkSR9mlxA7A9g9tncHTi+F7xZWiusDT5bUjkmSJEnSxaB7sZf0R+DdwBKS7gMOBA4GTpK0J3A3sEOcfjawOTAVeAbYY7DTlyRJkrSXQRdiZrZTw6GNa841YO/BTVGSJEkytzDbDTuSJEmSZFbJj2K2lPIHM6sfy+x1LEmSZG4iR2JJkiRJa0khliRJkrSWFGJJkiRJa8k5sf8hcq4sSZK5jRyJJUmSJK0lR2IJ0DxKK4dXjyVJksxuUogls8ysCL4UlkmSvJakEEvmeHIuL0mSJlKIJa0mBVyS/G+TQiyZK0nhliT/G6R1YpIkSdJaciSW/M+RxiVJMveQQixJBsBraYmZJMlrRwqxJJkNDEQo5tcJkqR/UoglSct5rdflpbBM2kQKsSRJBsxrOYLMUWfyWpBCLEmS1jCUo84UpO0ghViSJMlMkMJtzmKOFWKSNgUOA4YDR5vZwbM5SUmSJD3JOcihZ44UYpKGA78ENgHuA66WdIaZ3TR7U5YkSTI05LrFgTFHCjFgXWCqmd0BIOlPwNZACrEkSf7nybm8DjKz2Z2GPkjaDtjUzD4R+7sC65nZZ0vn7AXsFburALeWolgCeLQm6qbwtl4zJ6Qh0z13XjMnpOF/Kd2z61mXN7MxDee1AzOb437Advg8WLG/K/CLmbh+4syEt/WaOSENme6585o5IQ3/S+meE561rb851QHw/cCypf1lIixJkiRJZjCnCrGrgZUlrSBpHuAjwBmzOU1JkiTJHMYcadhhZi9J+ixwDm5i/1szu3EmojhqJsPbes2ckIZM99x5zZyQhv+ldM8Jz9pK5kjDjiRJkiQZCHOqOjFJkiRJ+iWFWJIkSdJaUoi1DEmja8JWmB1paSOSRkkaNbvT8VogaUtJWYeT/2myAswikhaYhWuGSVo4tkdK2kfSyfH7nKSRA4jmr0UcEc9qwF8HM901cfQRmpLWif95JL0lfgN5nlm5/08kvXkmr3mrpGuAG4GbJE2S9JYBXPcOSQvG9i6Sfipp+VlL+Yw455e0yquJI9gRuE3SIZJWrdxjuKSlJS1X/F7NjSSNlfR2Se8sfhF+iKSFozyfJ2mapF1mMu7FJK3+KtKmeDffiv3lJK07K+VkKJGkmrB5+7nmc5IWazj2dkkflbRb8Xut0jonM1cYdkh6B/BtYHnc4lKAASsDN5rZqg3XbQG8GZivCDOz70gaA3wSGEfJgtPMPi7p7cDRwCgzW07SGsCngC8Ae1bji+1PAy/jSwcWxh0brwKMBI6P83YFXjazTzQ9j5mtGGn+CrBFxDEBX33/ZFP+mNlWPdK9ipltXMmX88xsY0n7AscC0+PatYADgIOBLc3s/jj/XcAvgM/F89wVaV4W2N3MLorz1gA2ittcbGbXRfhqVvGLKendZnZBCIyVzeyfkuaP/NgR2CO2jwX+aGZPSlrVzG6RtHZNNhwL7Gdm5xfxAz8ws7fH/juAa83s6WiE14739FdgDWB14LjIh88A3+yR36dGnK+ju2zdI2lL4MfAPGa2gqQ1ge+Y2VZxzdvplLu9cR+iX8bL84zs8ehs9ejQ7BT5YfGcC+Pv6WHglc7tbfW4x/WV+MDLz0Tge2b2WPmApB/heX4TXo6L+LaSdK2ZrSnpQ8AH8XpwkZmtEZ1jmTwAACAASURBVB2mLwLLmdknJa2Ml7czJV0AbBXPOQl4BLgUuBz4u5lNl/SNeA/fM7PJkpYE1on7X2Vmj0T6jojnfK+ZvSka+X8Av6a+nPy15vm78sLMdq3kwe+Az+L1sijDF+Lv7sk45y3AanS/8wk96tFHzOzjpXuMAk4v18dqGcK9FH0EmAz8FjjHzCzStxJwLd3vaJ+Ip7FN6ZEP7WB2r7Z+LX7ALcBmwOuAxYtfHDsdr0TVa47EBcC9wIHA9cAxcewy4EfADsCHi18cuxJvnK8pxXUD8Gfgu8DtwO54JToMbxgBdgZ+gguuKcB1NWm6rr/niePbRBqvB94IvCt+hwEnAlvG7w/AoXXpxivGzcB1wGLA6PiNA26ppOcDwKm4gJ6MNyRXA68HNo84lsUbo1VK6XwjMCm29418+k78rgc+V8q//fGKNT/wc7wx+2Tc5/Y4b2XgvFL8q+AC9e541r9G+Pk1v+lN+R3bU+L+awDX4ALkQmByHP8WsGdsP4Y3SmcBjwOnxO8/wJl443wb8DRwJ97A3hjXTgIWobv8XB//v4v3+qvIg2OAw/GGp8+vdP3iwH545+FvwAvA/j3qyyHAD4G3xu/5+D0HvAg8Fb/p8X8rMG9DXDfE/9G4q7hyuTkR73AV5yxApz4U5fATwEGldzAltjcELsA7a1fidfFuvJM0IfJ1uzh3cjnOmndbLSf70akzdb/JlWccjgvwU4CDgBXjdyBwapxzIF7OHo6y8RBwcj/16DvAr+LYYvHu94j9XmVIEdefgKnAD+Jfs9JGtv032xPwmjwEXNnj2EVRGc/DF0wXv6KyFP+j8NEBRUXrda9qhSlVyiK+kcAVuPpqJC7k3lU6fzKwUimOFUuVsc/z4I3a4aXfjfEchwOHxzl93MkUYdV040KlaLzuiIpyR6Rt78qzHAZ8qHL9BnijcxUwpnx+5f4z8hlYsBS+YOnYgvhI7nJcoH0VV3VfC8xDfYM/HHcK/RdcMOyPj5r+1PDeTsNHT+Pi9w3gtNLxOmE1GRdkXwX+jQvtYaU0/ANYqhTHUvjaxuvwRqLIq/fQ6SBdUVN+iny4mZqGCPhRXVg8/2l4h+DLwOvi2IXAXT3K8OSmsOLZKsf+ho/g6+I6GG8gr8HL+ZhSWSvKXh/hEmleKvJwnVIZKfLsh8BHi+sjT19XimdMKa4rozxMLh0r4hlwOYn3PB14iW5B/likp0+7QEcoXx9lo0jTksC5A6hHh+Ad6quJjnKpjagtQ7G/BvCzyPsj8M7UL/trt+bG3xy52HkWOF/S/8N7Oc8XgWY2mWa1zyHx/4ykpfGCulSEnSlpczM7u+a6e0PlYzHnsy/e+BTD8idCrfAQ3uv5Gd5Dvg64KNRjT+FD+/Ml3YH3rJbHVR9NzzMN70kWTKpJ24KSVrSO9/8VcAFRl+6ReAN4E/AzM3tK0jdx9c3lxT0k/QNYAfiqpLOBFSQV3lMWwNVQx4R6f6Kko4Hfx/GdcRUV8YyFmoPYLuYEXgSexUdh8wF3mtkrkp43sxeKqQNJIyL9h+Kqq3/hKsGrIp4fSbpV/imfLehWB0/EG5ZTY/9iYIYqB5gu6avALsA75QYTI3E12kdxwfZQzC39v7hmWTN7sBTHw8BywNNm9ph8DnSYmZ0v6Wdxzo2SPgoMD/XaPngPHFyAvx4oxwn+SaL9K2GbxbmHWkdd+4XIq6nA6Hiecn34aWwOl7RukW/y+czhceylkgrrIFzt9gxwraTzKvHtY2YHSDoEV8G9LOkZXGgAvBAqYIv7rFS6/ju4wL/EzK6WtCI+8phX0q/jmX8Uc0TDgGEW6sPgMTpz+ofjZfl1kr6P+179xgDKyZ30VStOA040s69WwpH0bkkbmtklsf8OvNwCPBtl9qVQ8T5Cx3VetR59FFhY0ra4AP4m3hk0Sduaq6NfrCtDoZrcDZ9COBp4AzAW7/B9WtIbKu9oq9js1Ua2mrllTuz8mmAzs/fG8eXpzKssgFfY/fDRzcb4vIPh6rTn8cZ1wdh+kY7+eGFJS+A9qvdF+D9wQfYhXN2wOq5OGAV8y8yOrEnvCHOvJPPiqg6AW83s+V7Pg1fsCWa2c0M+bIqvxi8Lxk+Z2Tk90n2++dzKhrg69MeR7vWiIV8TuMPMnoj5nCUi/jquwNVwG8b+xbi65HlJX8DVrKfFsW2A483sUEnX4Wrf70b8R+IqsTuBJ/BK+zl8LuomvJE+ycyersmDRYA/4qqx6+nMCWFmBzWkG0mvx4XV1WZ2cQird5vZhB7X/AJXcf4xgnaMtL0pnu9gvDf9CD7aeHuUv68D78ffwznAd83suXjva+IN2vP4+1sBL6+3l269ED5/dL+ZzRBukg7Ey8i5Nck1M/tOnDeeThkFH23siQv9w/E5tUfi/g9EeusiPD6e5wu4yn6vyrzXJviIdzW8vL0D+JiZXdCQpYXh0ab4iPA2SUvhKs9N8LpVzuspxfPLDVs2jjw9z8xulrQHvctJuRM/H152X8JHbXW8gqszF4n7/Cee5zpJvwK+hs9XfRH4Lz5K26OmHv0B7wA+Xp+t9nFJ/8TL0A/xOvEIrsY/F/dgdHc8x7tK1y4H3FOJ7MI4r2cb2WbmCiHWC0mfxCdDR5vZSlHJjgQ2LwmNefFC/FwR1hDXcHoIkR7X9TEgwXuSJ5jZE3HOYsBOZvarfuK6BJ/AfqHh+LxAYchyS6/nifOvMbO1JP0Qbzj+IOkmM1tN9QYSw4D/Z2bv6RVvw73WpiTgzOyaCB9vZhMr5+4KnIA3rjMafDP7jcLwpHL+jDBJUyyMGErH6ybzC2OGX5vZcw1pXh/v7LwJV20OB/5rZovE8W3pTPRfZGanya0Zn4s074w3eidYxWCi5l7vqgQtiAusbXBDgILpZvYfSZPNrOsdSZqCC8U/V8K3N7M/RxneJzoPiwBYxzDhOuC9wD+jTLwHH5nug9eNl+O84fgc2TOSTsS1AruZ2VtCCF1mZmvGuYsD60deXAF83MwOkfRz6o0rflyXN+ZGMR/GBSF4+Tkt7rEScF90lt6NC7sJwCm9ykkVSUcBb8M1JTVJmNEpXjgC6s5D0ji8I/CCNRsa9RwFNZUhXGtSZ3TyQLlDE+E/qobNjcwVQiwq44HAOyNohtWQpGvxj2xeaWZrxfnX48P1agMw2czWbih0T+LqvAsoCRFJu5jZ72OkUccb8V7Xe/Dh/3Z4T/ttRUUv3b8QKL2eZwLeoJ6BT/oCHVWRuq3bimMTJL0R150vGY3N6vjk8fr4FwI2wVWJz+IVYvEeI0IDti0av1L6e1m9LW1mO1bO/5OZfUQ1a9+CXc3ssNL58wGfx3u776ajjlwYt2hbNc77Ed4b/0fp2sPwuZJyT/4pXECcF//ltBcWrv+O+/0ZGI+PCt9Yp26qPNvr8XJn+HzHb6hvtIGO2kcNFnhxrFDz7RLpWA4f+RUUI7TVmsp2bF9lZuvWpHmimY0PYbZWqMeuw8vE+8zsv3HeKOAfMbIsrrmmVL+KPKtjIzM7TNLuDce/hOeT4llXwLUUjabyUcfH4+X+rPi9FVfnv5vmclLOo2ERx/+Z2RqV+HvW8VLdW53uuvdpM3t/j3q0Iw1W0D2etavjEh2K6/FORp8OTbkzV9eZLkbnbWZumRP7LT6fsEPs74qrS7YFqvMqY/GXOELSWnQX8GIN1a/wBv362H9rxL8Irkq7VD4v9DSwQTQuCzWk7e2hrptiZgdJ+gk+UT5ckix6EVEY5xnA89wev2HVe6rBzBbvlf4Gn/z/NYCZTQm1xrq4+ubHoepYCjfZpmm0Jel04HpJ51ISpPFcL+MWYOAN2QL4/OAelTiKeSvwnnzRcBUYrko8rBT2KXwuEdzoouAp3DCk4ArgtFDjFOrgBcxseOmcv0q62syWlnSjmdW+v2ikp0oaHiORY+Xrzb4ao7Af4Y2l6Ai+L+AGIv+KsJ/jlmR/q7tH6V474PNtFxTXSfoyrlr8KbA0HTXfVFwFWx6hrYt3fMZKOrwUvjCuJiu4VK4KPZHu9/dECKiLgRMkPRLHFygEGICZ/Ved9YZ1816vxy1x6zDgMDM7vuF4V7ikp/EvWtSOesxsYeAVc/X8tng5GIarBqF3OSmn8SV87noH+Zzx/9HpRL4YI86mOo6k3+IjwBvpqLDvizQ21aPL8Lz+J536+nv5R4GrzIu3Dy+X8qIoc88Ao2IUXlB0aIp7HUl9Z7r1zC0jsWtrRjXF+pVD6J5XKeYpFqdjdAA+L3CcmZ0q6VTgmxae8+ULir+Dmwtfiqsju7CG+RZJV5rPL12BC6HH8IJ+Gt4Y/TpO/RRwr5l9sdfzlPZHxX3/Wwq7Ge+F93mp0WCvU+kx97lPhG9b9ywlmirzvjW9wfvxRnQUnseFoHoBOKpuRCNpJ3x+akO8kpfv+wrwFzP7eVPi5BP2W+Pq0aJxvRn4gJndE/vL4erJN5VGwMNx449y5+73+Dzi0bgwfhCfB1lD0lR8vdzNlfvfindeHov9xXEVW88FzjHq2cQ665/G4A0cdKv5tsDL0pcrUbwZ73DtjwvRgun43OfjEW/TyGBLfNQ1jG4V1hn4cojJcf3b8I/UbqAe816S5quqaSWdRbeBT3ciOoYI5Wuux+epHsSXIRQqtqXM7FuSrsQNqL6Ov487Jd2Aq4kby0kTcuOk6hrOV/ClAoc2XHOTma3WcKxp0fEX6upfP2n7YbnOhNZmMXzurI/KuXTelFJnevVoP/5mZhvRcuaWkdizarYaOgCfV7keFxRH4g3StmZ2SkN8b7TSp1/M7Cb5Qto7JD0cI6ouIaKGBdK4peOieA97Mt5YHI2rC/fCe3zgE7ZH9/c8csvH3+FGKEh6FJ+PuJFm6zaAR6OXXDTq2zWcB96YNWFN6g65dVzV6u1RMxsr6SEze31TpDFCXh7Pu5fw/FiC7t7yqvjaqa3qBK3FImN87d8NFWH+ReASSbfjjeAKwGfkcw/HS/oc/k66FghHXgzHF7p+Hrc4+3Acf7gqwILHcMFRMB1YptJTrqZ9dZot8J63kqUaXmaKdXl1I9hLe4x0GkcGQNkI6nh1G0H9WdIDca/X46owzOxcSZPpzHvta2aPRnSX4RqNMivj9aTp/mWV3bC4/gFgq4qa74gQ+t/CR/mfBr4fAuyjeB25v59y0jRfvU7lXv+Ke70VqBViwOWqWbRfxFfang83QJlMbyvoWszsq5W6Ap5Pa1kYezRQtId11titZm4Zia2Bq8wWiaDHcU8RU6IQn2UVAwe5AcSH6auP/o6kk/CX/KcI3hFvUHfFG44XCSGCm7ruhqvrLo7jL5fimyEo457zWd+5pNHAMmY2ZQDPcxnwdavxPKG+1m1FGraSmzAfBbw94rsT2Lmfgl+L3Djmh1S8E0Q+/RYfdQlX33wCH3lugY9iy5UPM7tIDR4hgJut2/ruoLjf62qSNUO4SjoOX/Lwt3I+4FaohdHLreVRQoyq1rN+jC8q+XAY3qD/pXKfbfAG7/R4jq1xFfAtcfxoKpjZ3XIT6KoF3vV4Q97HUs3C20hNui4GNrZm459v1YXjnZo+RlDm3ltGUrKkjedrYnHccu/3+Ii6rLI/0jpzUvPgc8bg7+NFuYVlQaHiOwVXzf4Sr5OGq7z3to7HlXJcO5rZNyUdW5O2cjlpUrGtBWxvZrfHeSsCJ+Nz0yOpqGHNvYm8Cx+xPkTHwtmsYmAU8S0az/EOGqyga3PVrz0YV9NX64rho+V7Gq77Jn2tsY82s0bPM21hbhFiK0QPbIbVUCnsWFwVcxFe+P4e+vO/40YHVaHzE7mO/zN0LOkuxefJnsN7l1+tChF83qCs7utPJbcPfd3uXGZmn+/nea6zvhPP14V6q2rdVjzThZLeZmaTYuQxzNytzwfN7Mwe+VprYIJPnB+I90q3xHvCw8zsW6XrZli9RVht5QsBeyuwek1Ho9b6rq5hqJxzYE3wSPz9LW8VF0hxzfm4Kq88d9SfscqSdAsv4tyeHQPrbeq/Ld0WnI3Wjvh7+Zd1rAsXxQ0ZtqW38c8XS7ecD19LdTMuLPsYQZnZW9XXpVLh1qqOJePYeNyopRBiT+HLKk6NetPLRdnCnmSbHvvj8PnRdxCjTdyN2F39xdVEk4oNL9vH0b1UZQ+6VbQFZmbvjU7QF+i7rKNPWYgOwQ3Wj3q5Ic1NdeUiXPheRfc7r1PP1nam28rcIsTqGrtJZva22B6JLw7dEW8gzgXWN7M+DmDl8yL/bFK5NAkR3AvAZYVqoKEXWGD48H8tSZ/AF80eWKpMjc8j6TRcFfG7OLQLbun4oR73I1Q+u5nZDbH/EeDzZrZej2tOwVWU5bmBNXBB8Laigaukr8kfZW3li2v/hvd8C9Xs/+GdiJWot767BzjEupcnfNHMvtHjWWpNwfERL5HmVXABXU7j62k2VtnQzHqpXsv3v8TMNpQ0nXo/iAurxiS6Lqx0rG7u9Boa1jk1Cc9o1M4B5jefvy3mCEfgZe0UXDiuBpyN16VLzKzOAKEc71fM7JBKWNEZm4R75Lg1wt+Ij0A/hRsxFfOuT+ILzbuWYFTibIrrHHqUEzXPV/8S77T2WcPZIw2Xm9kGDcfKyzuG4x2Mk8wXii+Gq1jL9aVR+FbrSim8sQMbx+s61U/i88aP1BxrDa2eE5MvcHwzsEjlJS1Md6F4MV6+4V4htgFOlfRWM7u+HKe514FXJC3S0FO5I4bmhRC5FddJrwh8TdILuGogoqtXDUi6Xm4JuAOdxaTzytfC9Hqej+OeFE6N55nheUL1a5qejjRsB5wsny/YCFeBvr8ubSVWMrMPl/YPkpszPyOfm7lN0mdxE/1RPdQz4L3akfQduUBfjxDz4COdG6hfH3WNmX2tCDSzxyVtjhsYFPOTX6FbmI43sx3lRiOYr3ESncbynvjNQ8dKFNy0vNyhuF6dpRi7StqbvnMqv8LfaZfqNO7baOFGg2cOueFCnRXkXTVxjCiElWqMfxpYAFgGrxNfA+aXG2x8Bu+cbUf4lDRfvLsk4ZVFPRY74wL/kMq9TsbXYo0shE6k8d/R2fwt8Bkzuzji3xD4raSNaTZHb4prs17lhM589SF0POAcja/XPBR3g9VFUycNuEZu7ftXulX5p9K99u0l4G4zuy86sPtG3l+LzytejmuOmmj0nqIeyzNwu4ANcP+O4J2SScAKkr5jZr+jpbRaiOE9pQ8Ci9JtjDCdmDyWVIzA3o2bLh+NC44pwMfklmxVHfZ/qTEhN/cIXRYi4IYi37aw/CpQTFCref1Yndudx/p7HnzubJ+GOH9BzZqmSPsdMfr6C95Yv9/Mnm2Ip6DJwGRfvOHbB/ey8R7cG8dRVr+cAHpUPjr+LLswNy7o8nyPz7UMlzSvdRarz4+bIBecgKuOP4hP+O8OrKIaF0jV0Yn6qrG2VbOLprH4SO0D+PvcGVfLnYBbDnapluL6w3Fv6peXwoqR54qqN5M+hHoryN9K+ik+cgA3Ky8+MdNk/FNVkQ7H1899J+L5BB0jqLPx+rKJNbtUOhZvDIv5ufuBM2J016sz1uSibK1CgAGY2SWSXsLnF6vm6PQT1/h+ysmPcSOZjXDhcTG+lnKU6pcg7EVzJ21+vFyXO4aGOwi+sCJgbov/fSPsCjN7T3TKf0BvauuKGpZnmNnJccoI4E1m9nCcvySuhVgPn2pprRCbW9SJG5QbhcqxP+KF8W9llYAavgllPsFeuxDTelh8RZzFfIbha3puaTq3SbUT8fR6novxings8AfrnncqFp7OmDeS9CydSgPem3+SECTWY35J/pmQqpud3fHR3e0159eqZ8zsDf3laTQwy5V71JL2wRuOosPwIdw4ZQFcyBcq2z2AMwrVlTqqzXI+3IqrAGtdIKnjiqmsxvo4PsoujFXAOxSFscoN5p9TKdTAI/GG8CUzK+a1qnm0O96pWgVfZvEn/P00mklLutTM3lET14K437334WXuXOD78V9r/BP75bL/Ej5/ZTR8tki9XSrVLXa+E29Mt6K7wZ2OO969LIRc2UXZHXjnaydcIPyRzoLg53AHA7Xm6DVxXYyPhvejdzk5KdJUCL+P4mV9TM1tDFjCZsFMvUbAbIR3cvY3X/ZyLW5U9Lx8zeJMfwNNDcszLKY+VFkCEFqIG80988x4d21kbhFihwDfw0cJf8ctvD5vZr/veaFf2+ebT/2cPx6v0OPoHslegjvjLFuW3W5me1eu/4r1cLsTaoG6Y0/iXsFPl+v89wC2x3uCx5qbOl9E3zVNn8TnMGqxAVgnquJmR9KFuArkarzBuMjMrlc/FlCqsUaL8NpvbOF5vIGF77totC+PBmTTeFZwb+HnlNJ7hZmtL+kc3BfgA7gaa11KLpCsYwpOjID2rqixflUSgnXGKleZ2bqR75+JPL8q8nwn3BNIVbVUXDsat479CC68V1az+6SDqLGCLMdXeV+Nxj+l/cIFmOHagGvki9gbLdziunHAwtaxpL0Mf9+XmqtYV8JHmuv26ozFtfPgqu9X8HLT1BgZPn85Y855oPRTTvqs7ZJ0E/BBCyfapfAV47maOmnLxDPMcIuFLze4r0nA4IJ7D1zYvhe3Gh5pZpv3eJ47qc+npy3mp+O8wqN+MWf9K9zDS+GObDt8KcqXgTNtFtzIzSnMLUKs14f5mrwq7IKvQSp7QbjZzN7cVFDMP0p5K/WqonPw4XqhrhqG99bXovtjmcvihefCumcJFdpRuCl4UeA+jJvEL447Ed1PboCyDd5IPxXPdSje6I3E1zQtgjfEU9XwZd9+GqzFcUutGY0d7v7qsWiA1sHVtJ/CP9UxunRtlwWUeliQySfm3wtcUOrN3xD3XMfCFF7udurqcmVtSPcH8UZkWXxd2QhcXdJnwtw6C3j79EblxjCb4Sqepc1sM/nC9w3M7Bj5nEYfp8+RX6vS7b3BrPsDiOviHZ2t8XK3pbrdJ52Nq9DeTL0VYFd8lXT3NP6Rm9hvT2eEuw1e1t5PjYVbpHFnYEVzI53lgNeb2VXqZ7EzNR+KNXdwuwWuii+v2/uUmdV6NZEbxFTN0eczs5GqtyA1XHPwMzM7vSHO3+OLtq+I/fXwEd1brMawCu9E1HbS5FMPf6A7z3c2s01UMoCKuLoETIS9C6+vf7eGpRFx3uKl3fnw9zgaH732cpAsXPCWLa5PKdqrNjO3CLEbQ/gcjX+I7u/qmJ03eVWodXZqZns2FRRz7wCX1KmKJJ2J9+QL79LL43NUz+FqxY9Smjcxs317PM8VwDus43B1BN4ob4j78jsDX3d1Lv6NocnyBYyXm1mtmrRU0WfGJ925eMNfnmt4N52v226Ez99dG+kbTYNTYzVYkJmr/YqRU1klNQU3c96dbs/3x5lZ8VmTfpH0G3OT+lovFdZx6voz6tVY6+H5/dEoTyNwA4dGQSrpVmswnw6twYfwxvtE/JtmRX4VBiNfwT/t8fNZUfVEvh9Ep8G6CP/wZOGx41ZgjVLnYH78He7VEOVHqPlyspmtE9d3OfktRriS/kxD2Zd0Cz7imRrnroRbhq6Pz+WOo9uAo888sKSlzOxBNUwN4GvqTrDmL7vfjKt1i47ccviofQyuxSjmsxfGO65rW4PTcPX2GlS3/m+Kme1fGRFfarPwaRR11Od9lmdUzluSjj/PquFHa2m7YUfBX6NSPAv8XwzXi4WsTV4Var/XA2B9F7z+LBrhbwEHhrDsUhXhcyk3S7oKLyTr4pPL78TnsJ6OUdYfgItDQGxfafD/ZGYfwOdHRuEqRPAe6Ghzy8nX4b3sr1kYZpQFlGq8QpjZ6tVGNyrPZxpz1FnKzL5b2v+epB1x3f4kfA7nbOs4Q77WzAojg8Ia7JP4/ESTBRk0fGPLzH4q/4x9UTH3sPB8X4fqVbXPyo0prq9rCEsUqrbqGrPiy8evRLpfkjRazQY7AJep2XvD7fhI7tGaYy/KrSd3o2PYM1LN3mC+ZCXXQgDyb8gN6+dZHyAa4NifFzfI2NxqTPzx+Zq15eb7xXudJ44XI5bC+8tyctXr3cAbzGx7SVuXy36cN70QYMEd+PzU2bjvyzqjmD7m6MCD1qwSv1tSry9ObFrZ3wQfjW6Cj0aLd1AYVl1OeCAJYfZ8jNbXBh6TtAsdQbUTrm7EzL6sbg/8R5mv/6uOiI+V9Gcz+15TglXvtHiEpD1x7VOTirk/w4/WMlcIMev7Yb6n6XyYb6J8jVDVq0Lh7PQiup2dNhaU2N8DVxWNpNs9UZMXhCNxteV31f2xzPkKARbP8HgIKHBrtGujARcuCH8gnxM60vqaw36wOXfqidFb4xqx4B9yi8aTYn87XG26EV4h3wnsI+kVvIL3cmo8Sc0fzPwcbpL+PN4IXBjPOxpXP95VJEjSaOv2CbcYvs5uCm4ZSMRbCLG3lc5tdEFkzesCL8AbmnNjf31cndXLVH59/P31sXw1s19LGiv/2kCX5xL6uk9aAVdPNVnm/VXSZtaZq1wNf1ePyedoqnOWhXB/Eu84nBv7RaPd9PHNZ+NdFu91DJ2yXzjLnhLP+RZcjboInaUm1Q/FgtfLsyO9Fnl8NS5YLqk2xpo1c3SAmyStUu5AFdQIv6OBo1WZy5N/kWAsvvSgyWn4x3FV46F05vBmOL0299xTdXO3M90j4oPj2RqFGA1Oi/HR7q/lc5aT8HbtYjO7Ns79Oq6ar87LtV6IzS3qxKrX6Qvxxv5FNbiewRvO2m8+VVRPRUH5sZnd2ktV1JC2Yt7krbh6bBRuUbYX/qnywiHt8rhqqfhcxlL4aA58HuiBAdxrM6vMKUj6tJkdqXqfdIvHyK8pvmIe4pXSdcVcifDPZmyEm1ffgzeEy1Pv1LjWgsy6LUYLC8Cy6hM6AqmYz7yHvt5OLjWzYlnDOnSMbwpV0whcAPwr9t+Dj/Y+GNcsSo0aC5/HOxxvnG/AVU3bhdBsyrdelq+Nnkt6xNfkqHkLfD3cFrha994ICwAAIABJREFUbAI+D3OtauYs8XnSOt4bv9H0/fjmZfjoaEe8Q3Ac8eVk8++T9XKWfQE+wu3zodiGegk+t/YiLtTKnc4L6Zijr6kwRzezRs84ajAY6pXXcV31s0Vfx+f2xtDgNLwhnurC9hmHInwS3gYU2phFcZP8Wf5QpVw1/Em8bo61+HKDBjAv11bmFiFW53X6ZTP7xCDc61j8o5A3xX5TQS1GIF8rhZUb5Ztwc/ELI3wjYC8L6yn1dfLZcyV/XHMZ3rj8K/a/ArzH3CCh1iedNXwMsp/73IHPdVyC9/iuMv/czTC8wSw+Olh2alxrvh3xvRVvgMv+KHcHnrIawxN1PEr08XYSx+uMb36H+9R7MM5ZCm+APhD7l1Gjxgo12AhcSIhuq8rab7QV6iDVWL6qt+eSJsuzP9BgmSdpG1xgLAR8ONS0G1IzZ2lmf6y5fjHcQvABenhCV+fLyeCurm6O8Bus4vmmCGsSvr2QLx7/Pv7liSIvDHjMZtIcXfUGQ12NecN1FxKfLbJuQ6MDrcFpuKTjcWvE8vTAT6zG+KY0Il6OzteaZ4yI+xHMTa7gPodrR0YB1+B18+JSeW+cl+uVF21grlAn0ux1uq6heRpf4Pkize5/Gq3y6KsquovOIulOZB2hMR4vqMV6mS3xgvrTUFuuH+H7WWdCvHCI22XdBlwkaSWrWaMVbIV7IfgyrpZZlVCrWo91ab1Q/TzEG8zsleq5EXZE/Krx3CppuTqhhI/cvmDd65qOwhvmugZnhPp6Oykzzcy6FoRKGlNU6OBhvBEpmK8YyVWum4Kv5TqxJt9rv9EW1/SxfMWt9Hp5LhlfTg8dy7PP495gCsu8+fDyUIx4F8FHUJ+VfzfvM9TMWZae6QLqR7I7qftzNKMkjYp3Vni0L7zeFNwo6Qi6nWXfFCNvk3QM9ZadtZaL+MjxDVaZM5R0WoxU/gKcK+lxwkelfBH+t+l0+oqRziPmH5ItRzWQXvsC5paX5bCX8Lr1UWqchuMdk+r0QJNBTjGam0THaAl85NofTd8aHBdpPAsXbJeXO0rWMC83gPvN8cwtQuzlcuMuX9NRqGqqDc2Cdb3HCn/CRxiFy6WdcUuy99F3MngG0QNe2cyOxU1wF8JHh2tbxwPEt/GCBj6h/h/8PawmqRhtbYO77qlr6H6rmvmOeLZHJW2F67on4Wqv8jxG1RUTvVQXap6H+HQ0XEXH4JxIy47UL01YHTdWuVFu+FJ1ULpgIcAi7AL5/N9kSeuY2dWVKOu8nZQXdNcZ39wZ6Sz3RP9ZuuZ3ciOUM+kWMFvGuSfJ5/5OxP3e3UNzY/fdyKuy5esv5AYmjZ5LrMGgyCquqtR34fikyv4S1MxZWsdj+SLmTqU/AUwoRrJyF2LfpvI5Gkkn4wL1FFxAlA0QPoYLzf3i/EtxVdaLwDT8PRUdjX9H/h2Dj4xvoa+3k6mRR11Yxzfot+Xq/kXwNaFEfJ+n4swbOEQ1BkPVuGto+mzR6XSchlfr5jBJi1nHAnQ0De1rjO6H43nfy/CkjlpXcKFiXRh/75sAR0l6xEqW1FY/L9d65hYh9mXg/FBzgfdKiknV2oZG0u/MbNdyYCmszipvp9gufyeqfO2BeE96FbxnNA9uxLAE/gHIgheAJXuNtujRWzezd6l7vuMsSctW0jUP7stxO7mdxcLUu2KaVvcsJZrc4nR1DPCG7Fw6X2quo9cnH6r+KHfB82A9YGdJd+OCr+warFhDh/nC1HLFrjO+eQj3a1eoYao90Rdw662vU1JjmdmKuKHNIdEQfhNfdzic5sZujPW1fF0Ib/wmUeM2KK6vGhRtgY86q9/kuj6Of76pEYy6sCzeAXl75EVB00h2P7zz1CVM1dckf4YBgrmF7E+o+ZJzNOonSfoqzLDsLIRMk+Xig7iQPx8v/4WbqPLHUwt/p6PwTuCTVrO+TP6NuMJg6A/4Orbv1OVXhb1xTcCq8o+63okL2bPMrKkT+xP8m2J/xsvpdrhatBZzA7TlJc1THSn3Q60rOLnhzEbAu/B26F7cCrrnvJz1+OxLW5hbhNileIO6Ma5LPwcfMUBzQ9OlS5fPexSWbHVWecXopelDhE/hC0UnA5jZA9FwTQCuki9AhVjrhDf8TaOtXk4+q/MdZ9Iw31Fh8VDj7Gvu2fpCSdURTpXnzOw5Sch90N0iaRXgmXLHwHytzkvWw/uH9fUfV16nUvVHWTg1bnKe3LiINjbXsWbjmyYVyhepUWPF/ZbHOxw74j39r8ShpsbuGPW1fL3XKm7L1G1ZCX0tz5bBy0ufL4njZW6eukZQnTnLwhfgHpVzDqJ+JDuazrKOMk0m+XWqPE+cC/+n5ar5ou6tX4q/yXLxCLq98P84rq/6JCxUhiviHdj/h5efcn1ay8y+TklQhwAuz/tROlZWJ5+NO8stjJk+jC+d6OM0PJ53gnwOrrBy3dbql1iUuQO4VFLtJ3Ma+D/8I67lbw1+DP9i/UW4EdLVFvO2NFtNzzXMLUJsAi5EitFT8WXX7enb0LyIj47mk/RUKY4X8TmAwvvFfnQ+hT4M9xeHma1QlwC5CyKTVFTYBeP878s96Bf+1fYwd/HzTprnRmqdfAYX0Hu+o+nTDkWhflBu1fYAHUOKJu5T/TyE6joGavCOYj7P2LhOJVQwdeuaCtVMl4EEzaqogl7rtJqoVWPJPciPxDs021u3O6K7zex9Kn2jLa65LuL6PB3L11Fx7AIq81Fy34hfsGbXP03m/xOoaQRpmLOMa4bjgnPGHG4xkpXPX10gqfgczYfoeL6oM8mHZlUe+GLhM4CVJF1KWHbGsaOirH4zziksF6v+SY+X95aWtWbvMsVSkfKcogHPSXrOzE6IZ/8F3fN5VQq17Sp4Z+t0vKzuGs+7NrBHdBL6fPjSzG6UNI0oq2qeAy64PX7D6L1ko/NQbjK/hiqu4JiFZTZzC3OLdWKdD7SH6P4MxPx0m4iPieNvpNNAmvVvAXiemW1cDcO9ta+MV/Af4iOJP5jZzxviOQU3P67z6t7r/ovSme9YB1eXXW7u+qZ2Dsv8o31lV0w/x0c5B1nFAKLHfWe4xYn4+3wlOp6lj3eUuL7RQanc+OZL9DVv/xn1BhIvxFxTl+NdM1s/4r4Z/xZZn3VaPZ7vNHxkV6ixCn5pNWuM4pp7Ij9OxC32CqHe+DFP9bCsVLPl2S7UeEKh3lFtv0Y80eFatya8utC7MJaqddsUasArreGbdJK2x0d8y+IjmfVwc/xGrxQaoG/AgSA3Nz8DN4bYFHjCenjKKV13EbBFqVOyED6P/f/bO/NouaoqD38/JgMog6IiYivEXiAiLIMIAmFQEReIyKACRgbRFlBRFGwnaFAUIXFAXCIgBDHSShxAUJAWJcyQhFFaaFdrFLOaVulFEgFBYPcf+9zUrVvn3Hr1XuXdqnrnWysrr27VrXtqOvucPfz2u/HYbrEgvT485x/kseiojF0vYx7D2L5Aj730Rp1R2YndIWlHa9dAewhf3aRWVQvwL2FH8WRwkdxlZo/Kq/Bn4G6Oh4GNwhenXPD4YjObI9eRWx6uebKZ/UfNmGO7rWPD+GNacIXyxiNKxzvqWjv8IsQ0lpFY2cdQK1vt9+HQxrgraS4+4T83vObDSaujgO9UyjI3D+OLCvD41jfxdPzyav5COhMkZtHKWIwV0UJN8k0NlxFpJpkyYIEt8RXwB3AX4hL8c3mR4i1VoD6zMpV5trlFlFCslf491r5hBTcp0mrEeu9DBglXXjBUJ5nXkm2If+fm4L+jHYJruUOTknSG5ksVT/IhjLnc56vYbc3GOw5chr//p6pSLJ/ghUTi2Lhr973htQr3CJyPLwpjyTyz6i4ij/vFfud1dWLdeqRNOYZ6J1aa7NekpYFm+CrofvM2A6lV1YYkiifDBFQUaV6ET67H48H8TXBXXMFy4Hwz+3ofXk+tFlxY8UVrtML5Cy1RSyPXkPxffDd2Ax4PqW1PLg+M/xuVbDX89T+Cx/+epmVUlpFQW5crqmxLXD9uZRfuyvWLNh934/GNZ8LfZ9NZQH6ymcViR8VzRdOwQ+ymeExHO5ixEibqb+AG6FLS9VZvx11oN5rZsfJ41GwzO1AJ/T38e7dNaae3Oq6Q8U5KfcPw+rrD8Jjl7yvPs1lxTAkdSdyl2/F8FgqZE687qUlZ2nWejst+XVI6dhVuoD9tXTQp5bGmdXFPxxIqST7qbMb6aTx+9zCR+HX5M0+8pk/jn2M5jv19fPeb6qoQ/a5apZtA5Trl7/w0fLf6lJl9PHFKUfKxvbX3SFuE9zScX3ns26vHRpFhN2LRyb4gTPptxaXy+pV78ELa1IRfCLGeDCw1T4gojn2o7CJUOvunGEMqOeGfcbfjVrTHr7r9wFazdLzjx3hm3ofxJJe21g5y9fFCMmpv3BWSLEYNhm8H68xWaytRUEt9oTphhJdk75FnY95Gu2LH7niSwXF4pmQ1MH8pPoGcjscx/4z/gHeiR+Tamh2xm+K1afzqDrvhxuTN+GTyfUsUxI5hjLcAJ1p75tkcfMHSoYSC71w6+obhNW8dKuyxhULlMTfHnm8873c4/0p8174n7s14HF90bVtacJVFn+8idCkPFJJvx+BxxJQrr3DH9tTnq8vYZ5SvZR7HvpdEVwVJv6Dzu/oai/SB63LdqKu3dP+/EumRBhwc+cw73NqjyFC7E20MvbBIZwe+VoniSWCFPC14FrCrXIniefLEhaVq1+AranZejWc9Fskg7wJeVDOuufgu5yv4CvJIgntNNQkSwMvVXqO1UiXCzPaXd0J+GHfRbYBnLSGvLdsZ/2Fui6f239jlvXuQeLZaW5aWmR0ZrhFVLQjn7GmuDlDuqfV9/AdZGL6PVa7zKnxFXU6Q+KykD4f3bwXuzpkBfMLMrql5LdE07BKn4DJf14XXdJekzcPO6WozWyHpM+Fap5lrTy7B1REuxY3Po9FnLqH6zMpY5tnhuIvx/eF+aCmh3GGl+jrcrboJXjeZ6qhMjSsvVa/X7TWVXXnFuZ/FdzNvxiXbHpG7UU8MD0llLqa0AetceUWH8sfk3RweJvz25O7m6kLx4m6vKbhDq7G7ucBtlbnkgvB3MpknhbyWrKAw2OsnHl6M64yw2yt6pF2Fu7BfLK9DLFgPf/9GnqHeiY2V2Kqqcv/KpAVz+aSN8QzHhWZ2Q9jBzKNdV66M4f2aahsRVu4r2ieslMEpHYu2jwmPiUriBINWdEIuilL3x+uhzpYXvC7EV9bRQH3kWhfgbtoiW63gvXgD0LbECVzqq9qT60Hc9bg5nbp8N5nZrOASOZaWQsoNuPbl40RQq83OXnjN22eA79StOuWp1asTj92gdDsYwup+F1yYdTbuutxB0nrWyg4bE6pvT/IsPHtvOr4AWeaXt2htkzr7hp2Gf8+fTaKjcjgv6srDMzQ7+pDhi93qRLEM33lujafcF668g/Dd1lFd3ocZuAEakyZl+CxSrrxoM1Z8Et8dN2I/w8WMbzSzgzqvMDbUap0Cnkx0ZzieTOapea5yEkthsD9b7MTHOJ5tgaKJbDmdfgXwKwvF16PMUO/ExkpiVVW+f0Hl0PFW0hQz17y7xWp0xiTdLG/78D38i3kI7WnPVZ4IO7zfypUSltJaudUlSKRUIsCNyw6lH/oZeLLK2fhOcRfgUEmfwOuCFpjZBaT5Y/i3Fi0tSEh3iv6JOlULluO7raQuH65qshyvcQGf4L8t6XvEG5ouCY/bG1c9uE+VNyRCKg27CKJH28GUztsHXxD8VFKhMr5xMCRR7cQEde1JLqcVa1xanKC0rNJ2tOrrCuM/A28uWST0xNjI4kXIRb1e4Q69AfcQnIobmXI8cwWe2XuQmW0YJuxTJX0J3x3UEnayu9HSpHwz8EZJb0ycItqTfp4Ox7CWMMEPgwtzmrnc1L241+FOMzsy7EC7dnvvNm5Kc4mkY/AF2HSlk3lSbEXn4m1R7Rmd47lbruu4l3WWJ0wJpoQRGweplhRFl9QO9wk+8Z4V/hn+BT605hofxoPRx+GZTXvgcj4HkGgfY66WnSrehvof+t2SirqUmfgqezda7pAOrEe9xTCBFaoF4JllnzezJbhRT7G1tZdI/EreJv5M4g1N50q6BtgM+KQ8WScaJyy9lm4ZmVV1h5/jn8sPJZ2LfyfOCLulIqsyqp1IfSuNuvYkm1pEEUKJeF54fKxZ5NtoZaXGSLnypuOuqdXwueENuJF/wkIDzMAVasW0CmWcDlfeGHgtrbKKIu52d+KxSVde2eVLaF4p6XN4Y9FnJD0lr6v6c3h9/eQS3GjXLdJSxBZvRX3rmDFX/3iJelf/GAmyESvRZVVVuGKqmVCF+2QJrR5m3a6zOq6ofgJeRF3ElObiK1Nw/3pVpeBHxFUiilTeuh/6ItztczO+4tu1W0xRPeotmqsWLKK1uxmLagHESyQW4TpxsR3pUbgL5Xdm9liYkI+MPK76elKxG/BV8Vb4b2IN/LN8K542nYrr1O2KUxRFvp+hVeRbSHKlFCFSskqXBaM6F69JLOKXqRT6YgcRK0J+Ox7bOwF375UXBVerVLgb3OuF1+AReWx5Nr5DMdy41yLpO7jRvAs3zEt9iOmFk9INUotU/l3wWNFsvGRjURjb+fgC4G+0lHz6QnjPl1G/SEuRWrxFUb3e4u/pXf1jJJgSMbGxIg+ob0h9S4poJhS+Uq+TQqpe61YLxbnjHGubSkTpeMpn/3wz66aVWL3GNfhEeAIlvcU6t+p4ULpN/DTcoBxDZ8p+T61qahYfR4X7HyAygReGXvG2KlcBHwTmm2euHgQcZWYpd2uRHXsgvgMp6vuOxcsY1sDTyNsUIfB4TjSeJy8UPxI3QrfjBq3c/qf0cF98hDE8Tam9DL77utZKgrGlMe+NG4X/Do/fLIz5OuB9ZvbV0vNOsy6lG+GxvwG2ssoEJE9AOpuW2voNeLLQn2qeK5nKX3rMy4D1UjG3JpA0D/h6ZfH2ATM7rOacG4HXV3dc6ixUB3r3pgwj2YhFUI04sIJCgaRbgQNw98l9eGA8GrBPXOMcvFvsfNpXTkVN1Wl41tXVeL3a8WY2rzIJVttBxK7T0V6kTN1KTa1Ek3KvroUV19KEUWepxOxiePikWY4tGp6O/056ayxZm4Yt6cbEBF5VYvgnvAbxlfIar6pyyaywK0+N42paSujF2DekfvdyUeRY2Sitju+6v4a7pwR8ytLNGmNJCHfgO8xD6FSR+VH43hX94B6wcfShq1xvPnCctbfHQS5tdQntySXvMrM9a56rmsq/RTj/4NjjrUYxZDLQGOpba869GO//Ft1xqffC96EnuxPj1IkDXxlcFGfSaoHxLeAtNQH7GNNwA1h2zRUuwzeZ2ccl7Y8nMRyA18XMo74dRIwxabIlGI/eYs9E3JrvgNqU/bpWNSmSadiBWPsW8DKIqBKDueZgh3ZiF6Jxry6ktBO3wXdh++Bp9/uG3dkWwEJJxY6skK9aG184rS3vdVVWnVmHuPp/8Z3cjtbCaVt526Cuqeo1bIT3Hbud9vf7+eatjAoukvQR6qmm8n8Zd8d3KOvTnszTFBPROYzqLYb46spCdUldC9VHhWzESsiztT6F/8iL1GnhsjPnhdtzcPfWTNy/XiiF7xXuT0khFdc4I7jjfmbpavric9kHd1WVG/v1NAlO0J1wWnCxfoyW3mKqxf2qINVosK6xZIpi8VGO3XyrdH9qAv+HdbZVmRfb4aql6l8Xh0gqoUeeb1bYfad20/uF1/Apay9H+AJujIu6u0K+6nJc8XxToDzG5fj3/nSLqP9H4lfg781EjNgpieMny6XeikzIQ/AFRxIze4xS/aH1WGA82XSLRXc5t5AGWye8bsLtm+lsLHs+rYSZkSW7EyNIOt3MPpm471I8vbhI1T0UrzG7Bk9N3gafMKJSSMGVsA2wuOrSKT3mi/hu43E8g2sD4MrgxjwPOHssk2B4ro+b2ZlqtURvw7oIDjeJvKhzd2tP2V+AN1fsWTy59LwdsRtJDyQm8JgSw/54wkxKoSTZsyoE7jtq7CxSTyTp/WZ2biLekbyOEvJVxTFJB1pEVUSeWDTbKsk4qfhVuC+mM3rWeCfq4Fo+Gy++NjwR6UNm9uB4nm/UkPQ6/Lv3bDP7J3md2PuBna2HOtVRIu/ESkja0szuB+arsxFh4UuPZhSZWSHaugAv7E1xNR4/Kdq+rLw8QZXDzD4R4mLLzNNnH6WV+bgLcIS8UHIsCu1Fdt8iauSxygyQ4Yum7OOulDGp7xcokYZtrQy3VPuW/ehUDXl12J3VKZSkSCZ9VDGzQmZq8x6vE22cWLr/Jnkhe1WxY0e8j121iP3XuCZmW/wqcA7uXtwW3/l9C9+h7Vb32pRQpcH1Cg+vLFzm0C5HNZX5Ku71+QmsLJ3ZFa83jTWWHXmyEWvno7jixZdon7yLH9jr6UwH/ypedxN1+VRdS2Z2InCipMvNrC4lfxM83lKuRbuYHibBcL0rwp//ibuMXkbrc0+5hMqGrzFs/Cn7MVJp2EUxc3QCrywOqsWkKXdn3Wsazw6l1+scDVysTvmqgrnhX6Gi/194FmrKTX0RkfhVSKR5ysxM0n54pt0FkmrVOgKpGsA7raQyYWb/1+09nWqY2YNqL+soF6qXC9+nhOHPRqyEmf1L+HNvOivpj5LXjq2Jr9rLGUV/pccEijoDFtxHu1ORy8FrRMbrT5+H70DupXth8BXh/8YVAILRqrq3xiOeXMRyYsob0DmB3wdYZbcMpR0zsJo6FUpWxW8qeh1J081spZSXvGv3WbirKdY4sSCq2JH6bkk6pWZshc7ou4GZchWaNWseX5BSpenbe6pE77/qsSHjQUk74d/NNYFb8UXnYYMcGliVZCMWJ1ZJvwQvNI4ykWBthIPos1wOXt811gaYV1DjerQuyu6TQFI8uYalSitvxD6/WvHWQMrd2W9S17lQXle1EF9oHYMrxpwNzIgYr4KUYkcUM1sQvoNFacXt1uoN90789/EeM3tIXgg9O/Y8FaKqNDWvdcwE78U6JHr/9fJcA8jR+Gf8YrysYC18AfHv8vT7ti2adVcNGXpyYkcExTtFdxwLx/seP1JL0mcxPkmvwGvOtuxyat1zvoFEDVDksUU84wA8FlIY0EPwFfRkZih2oBrx5Jpz1sF3W/ea2W/lyhuvsnrl+7GMZSta7s5fTsDdOa7rSFoLNy674+7iaXgMryy23OYaVUt895X4jrOb+O47cMN0XXiumbhq/w/C/SkDV/d65kYOm3nbngm9p/IuBx9hFfb+awpJO5vZTaXbx+FhkBfhRq2n3mmjQDZiEdRDJb2kfc3sCkmHV++D8bnkJH0Dn5AOxoPlf8MzwLpKK9U85zw8hfw+SinkllAUCecsMrPXdDs22cjTiXcBfgD8Ev/xftEi2YWjTIjvzQz/NsBT4O+h1X+rDWupj0zDlUb2whdIt+AZr9ECZnmW6J6FcZLLkf0iuCxrDVyTqNL7bxRQulD9NjM7JnHaSJONWATFZZAewHXx6jIBe7lGstNwMDgLcBfR3+mDXI4SKeRdzvkN3hX7d+H2Znh92ysmMpaJIml7PA6wAS7Suz5whpnd1uS4JhtJT+FF76fjn8uYxF/lZSLLge+GQ4cCG5hZVHi2vOMNt1cD7jZvBpk0cInnWuWZr2rvpdZBzPsw6MhT63fCd5hfKd21HrB/6v2eCuSYWJxeFRWQa9idQKccVEod4ALiyuTFfTNxl8904E5J14eg/XhJpZDXcTxwnaTf4Ub2pXhNStM8Yy6rUxZPnogKwrCyEa4xuCtwnLxn3C1mdlL9ab0Jz+ICwD+nvRXLz8Lfq1Xchw9TH5/sueRjHOxbc1+hQDJsrIXHadegPYlsOR5Dn7LknVifCCvSb1IxSma2OPH428xsh9h94f7V8TjDHngw9/EJxsR+gxvEsdaXFeeVNfPut96knlYJwX1ymJn9Otw+GNeWTL6fo4qkV+A1WTPxlfofzaxbjdZ4hGcPpCTKa2Y/Dsdn48X7ZQN3j3URiQ676Y6Sj354OUYZSS8tuYVXwzNRe2rMOmpkI9YnuiUWRB6f7DQs6VpgXVqyVjeOJVje5XpVkd3iet3asexE5+5yInJDE0YuvPsD3A02EzgM167sqp4+SoQd8v14+cX1eFJFVd28Y6Lrt7s8ZeC6nPMAkZKPPmf5ovr2O0OHXJP1aHyhvBB3J55lZmPJCB1JshGbIKGOBby55V/oNErRFFdJv4ocNjN7vaSv4IKrT+DNNa/H3USPR85ZZSihmTcI9SjBfXsZPhHvP9nvzSAg13LsqPnrNtGlFjQFpZX+CuIuv3Kt3HjHHu0a0E/Upf3OMKIgHybvIj8Dbxm1eCrvYLMRmyByhYeyhl7bGzqRFFd5x+Ij8Fjbxmb2rPE+1zivn9TMawK1WlgUvACvb3oCYKr9kIMhPwd4oZltLVe1fyueLr9KJ7qJGrheSj4mMMba9jvDiKT78Iawl+Au4QWaIhqJKXJixwQxs80AJK1Np8rHN2tOTbo6JH0Qd5NthxdZX0h9W5dVRZ1mXhNMxeSNOs7HXXLnApjZPWEXJrmaw9vwie4fkvq6EDGzibT4gfq2L/2iW/udYeRcfE64G7g+7KqndEwsG7H+EVP5+DahN1aVlKsj3D0Nb5Wx2My6tbtflUR7PllDih39jpeMAOuY2e1q19F7Cl/0LGGwJ7rtJ6Gur1v7naHDzL5Ga44B+IO8x92UJbsT+4R6UPkI9w28q0Mt5Y42zGzBZI8l04mkq/Ci5flmNkPSQcBRZtYhEi1pjYYXRG0o0fZlFV6vo/3OMCJXR/kClQ4EZnZBw0NrjLwT6x9VdfsdqFeBH3hXRzZWA88H8GatW0paipdPzIK4qxrv7jwojKVrwIRQ9/Y7w8hFxDsQZCOWmTDb0VK3h5C2XCQjRH6cA+vqKDLHIsH7CWdrSDYkAAAE5klEQVSlZfpHUFJ5o6R18aLjFdDVVT0o9CwoMA66td8ZRqIdCJoeVJNkI9Y/evpRmtnnwp8/lHQlA+TqKFKf+xC8z6xCgovsQEIdXyk2tlPJVX2qpC8BVzU0zCiTFN/s1n5nGOmpA8FUIBuxPtHrj3JEXR2ZyeVyfAJbTHs7k4F3VU8Ste13hpSP4l2dp0u6idCBoNkhNUs2Ys0xiq6OzOSyqZl1eAAkrTOorupJ5h24h2SOmT0ib79zYsNjmhBB0Wc3XHFFwANm9o+Gh9UoOTuxIeRt2F8t6XS8x9UlxbGmx5YZDiSdh7dQubfmMSORlZdpMYhScE2SjVhDhDjYUtzVMQN3Ad0+lSvvM70hV55/OS1R5/XxXddHY4/vpxpGphkGWQquKbIRawitok7DmalDRAOxEIF9NPJws5oGqJnhYNCk4AaBbMQymUxmSJA0HzjOzAZFCq5xcmJHJjMiSIq6EQvM7MuTNZZMf5F0Be4qfg4DJAU3CGQjlsmMDrmub3SZ0/QABpXsTsxkMpkhQdJmwP+Y2d/D7bXxVjxLGh1Yg2QjlsmMGJKmAUfR2eYnJ3YMOZIW4YosT4bbawE3mdn2zY6sOYa9ej2TyXTyHbwP3F7AAmBTYEWjI8r0izUKAwYQ/l6rwfE0TjZimczo8XIzOwl41My+jWsHZiWY0eAvklYmcUjaD/hrg+NpnJzYkcmMHoUM0SOStgYeAl7Q4Hgy/eNo4LuSvh5u/wl4d4PjaZxsxDKZ0eM8SRsCJ+Fisc8GTm52SJk+8YyZ7Ria6GJmfwvJHlOWnNiRyWQyQ4KkO8xsRuXYYjPbrqkxNU3eiWUyI0Iudh5dJG2JZ5uuL+mA0l3r0d69e8qRjVgmMzoUxc5bANvjrkSAfRm8zs6Z3tgCeAuwAf55FqwA3tfIiAaE7E7MZEYMSdcD+4SGq0h6DvBTM9u12ZFlJoqk15nZLU2PY5DIKfaZzOjxQuDJ0u0nw7HM8LO/pPUkrSnpWkl/kTSr6UE1STZimczocTFwu6RTJJ0C3AZc1OiIMv3iTWa2HHctLsH7yQ11t+qJkmNimcyIYWafl3QVMDMcOtLM7mxyTJm+sWb4fx9gvpktk9TkeBonG7FMZgQxszuAO5oeR6bvXCHpfrwT/DGSng/8veExNUpO7MhkMpkhQtJzgWVm9rSkdYHnmNlDTY+rKXJMLJPJZIYESesAxwLnhEObAK9pbkTNk41YJpPJDA9z8WzTncLtpcBpzQ2nebIRy2QymeFhupmdSRB5NrPHgCmd2ZGNWCaTyQwPT4ZuzgYgaTrwRLNDapacnZjJZDLDwynA1cBLJH0X2Bk4oskBNU3OTsxkMpkhQtLzgB1xN+KtZpabYmYymUxm8JE0D1gA3GBm9zc9nkEg78QymUxmSJC0B67EMhOYDtwJXG9mZzU6sAbJRiyTyWSGCEmr46129gCOBh43sy2bHVVzZHdiJpPJDAmSrgXWBW4BbgC2N7M/NzuqZskp9plMJjM83IMXO28NbANsHVLupyzZnZjJZDJDRmh0egRwArCxmT2r2RE1R3YnZjKZzJAg6YN4Usd2eD+xC3G34pQlG7FMJpMZHqYBXwYWm9lTTQ9mEMjuxEwmk8kMLTmxI5PJZDJDSzZimUwmkxlashHLZDKZzNCSjVgmk8lkhpb/B4i3UEk6z0/gAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "tags": [], - "needs_background": "light" - } - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6R2-0v5Z4hMJ" - }, - "source": [ - "## Most occurding `flight_time` tagged entities" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ks6NDXg7RXG3", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 368 - }, - "outputId": "dbf2e7ee-84fa-4d7d-abee-c5a11babfc5d" - }, - "source": [ - "ner_type_to_viz = 'flight_time'\n", - "ner_df[ner_df.entities_confidence == ner_type_to_viz]['entities'].value_counts().plot.bar(title='Most often occuring ORG labeled entities in the dataset')" - ], - "execution_count": 5, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 5 - }, - { - "output_type": "display_data", - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "tags": [], - "needs_background": "light" - } - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "67MNeUed5W0y" - }, - "source": [ - "" - ], - "execution_count": 5, - "outputs": [] - } - ] -} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NER_aspect_airline_ATIS.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"NYQRU3pRO146"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/component_examples/named_entity_recognition_(NER)/NER_aspect_airline_ATIS.ipynb)\n","\n","\n","Named entities are phrases that contain the names of persons, organizations, locations, times and quantities. Example:\n","
\n","
\n","\n","#Content\n","ATIS dataset provides large number of messages and their associated intents that can be used in training a classifier. Within a chatbot, intent refers to the goal the customer has in mind when typing in a question or comment. While entity refers to the modifier the customer uses to describe their issue, the intent is what they really mean. For example, a user says, ‘I need new shoes.’ The intent behind the message is to browse the footwear on offer. Understanding the intent of the customer is key to implementing a successful chatbot experience for end-user.\n","https://www.kaggle.com/hassanamin/atis-airlinetravelinformationsystem\n","
\n","
\n","\n","|Tags predicted by this model | \t\n","|------|\n"," | O|\n"," | I-depart_time.end_time|\n"," | B-arrive_date.date_relative|\n"," | I-fromloc.state_name|\n"," | B-depart_date.date_relative|\n"," | B-fromloc.state_code|\n"," | B-meal_description|\n"," | B-depart_time.time_relative|\n"," | I-fare_amount|\n"," | I-fromloc.city_name|\n"," | B-booking_class|\n"," | I-arrive_time.end_time|\n"," | B-return_date.today_relative|\n"," | B-fromloc.state_name|\n"," | B-round_trip|\n"," | B-depart_date.today_relative|\n"," | I-return_date.day_number|\n"," | I-depart_time.start_time|\n"," | B-period_of_day|\n"," | B-arrive_date.day_number|\n"," | B-flight_stop|\n"," | B-depart_date.day_name|\n"," | I-stoploc.city_name|\n"," | I-return_date.today_relative|\n"," | B-class_type|\n"," | B-stoploc.state_code|\n"," | B-economy|\n"," | B-depart_time.end_time|\n"," | B-return_date.date_relative|\n"," | I-fromloc.airport_name|\n"," | B-arrive_date.month_name|\n"," | I-flight_mod|\n"," | B-toloc.airport_code|\n"," | I-depart_time.end_time|\n"," | B-airline_code|\n"," | B-flight_mod|\n"," | B-cost_relative|\n"," | B-state_name|\n"," | B-fromloc.city_name|\n"," | B-depart_time.period_of_day|\n"," | I-city_name|\n"," | B-depart_time.period_mod|\n"," | B-city_name|\n"," | B-meal|\n"," | B-return_date.day_number|\n"," | I-airline_name|\n"," | I-restriction_code|\n"," | B-airline_name|\n"," | B-restriction_code|\n"," | B-flight|\n"," | B-transport_type|\n"," | B-time_relative|\n"," | B-arrive_time.time_relative|\n"," | B-fromloc.airport_code|\n"," | B-time|\n"," | I-toloc.city_name|\n"," | B-toloc.state_name|\n"," | B-meal_code|\n"," | I-arrive_date.day_number|\n"," | B-depart_time.start_time|\n"," | B-month_name|\n"," | B-fromloc.airport_name|\n"," | B-flight_number|\n"," | B-days_code|\n"," | I-meal_description|\n"," | B-fare_basis_code|\n"," | I-cost_relative|\n"," | I-time|\n"," | B-return_time.period_of_day|\n"," | I-depart_time.time|\n"," | B-depart_date.day_number|\n"," | I-economy|\n"," | B-arrive_time.start_time|\n"," | B-return_date.day_name|\n"," | B-return_time.period_mod|\n"," | B-airport_code|\n"," | B-stoploc.airport_code|\n"," | B-flight_time|\n"," | I-transport_type|\n"," | B-depart_date.month_name|\n"," | I-toloc.airport_name|\n"," | B-today_relative|\n"," | I-arrive_time.period_of_day|\n"," | B-day_name|\n"," | B-toloc.city_name|\n"," | B-connect|\n"," | I-round_trip|\n"," | B-depart_time.time|\n"," | B-airport_name|\n"," | B-arrive_time.period_of_day|\n"," | B-stoploc.airport_name|\n"," | I-class_type|\n"," | B-aircraft_code|\n"," | I-return_date.date_relative|\n"," | B-toloc.country_name|\n"," | I-flight_number|\n"," | B-state_code|\n"," | B-or|\n"," | I-depart_date.today_relative|\n"," | B-toloc.airport_name|\n"," | I-arrive_time.time|\n"," | I-flight_time|\n"," | I-state_name|\n"," | I-airport_name|\n"," | I-depart_time.period_of_day|\n"," | B-arrive_time.time|\n"," | B-depart_date.year|\n"," | I-flight_stop|\n"," | I-toloc.state_name|\n"," | B-arrive_date.day_name|\n"," | B-compartment|\n"," | I-depart_date.day_number|\n"," | I-meal_code|\n"," | B-arrive_time.end_time|\n"," | I-today_relative|\n"," | I-arrive_time.start_time|\n"," | B-toloc.state_code|\n"," | B-day_number|\n"," | I-arrive_time.time_relative|\n"," | I-fare_basis_code|\n"," | I-depart_time.time_relative|\n"," | B-return_date.month_name|\n"," | B-stoploc.city_name|\n"," | B-arrive_time.period_mod|\n"," | B-fare_amount|\n"," | B-mod|\n"," | B-arrive_date.today_relative|\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n"]},{"cell_type":"code","metadata":{"id":"M2-GiYL6xurJ","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614376587174,"user_tz":-60,"elapsed":68019,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"047d7d04-a754-4a32-ccdc-92efa7dae102"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null\n","! wget http://ckl-it.de/wp-content/uploads/2021/01/atis_intents.csv"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Looking in indexes: https://test.pypi.org/simple/, https://pypi.org/simple\n","Collecting nlu_test==1.1.3rc2\n","\u001b[?25l Downloading https://test-files.pythonhosted.org/packages/5c/84/241410ba610c9281afc8e1cffaa352f5ca83fe6e2574f1cfcdf3334dc81f/nlu_test-1.1.3rc2-py3-none-any.whl (158kB)\n","\u001b[K |████████████████████████████████| 163kB 4.6MB/s \n","\u001b[?25hCollecting spark-nlp<2.8,>=2.7.1\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/8d/a5/a5130215b43f3bd0e98bd16c471d36dafeab8855ca17789d4927337fa7dc/spark_nlp-2.7.4-py2.py3-none-any.whl (139kB)\n","\u001b[K |████████████████████████████████| 143kB 5.2MB/s \n","\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from nlu_test==1.1.3rc2) (1.19.5)\n","Requirement already satisfied: pyarrow>=0.16.0 in /usr/local/lib/python3.7/dist-packages (from nlu_test==1.1.3rc2) (3.0.0)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from nlu_test==1.1.3rc2) (1.1.5)\n","Collecting dataclasses\n"," Downloading https://files.pythonhosted.org/packages/26/2f/1095cdc2868052dd1e64520f7c0d5c8c550ad297e944e641dbf1ffbb9a5d/dataclasses-0.6-py3-none-any.whl\n","Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->nlu_test==1.1.3rc2) (2.8.1)\n","Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->nlu_test==1.1.3rc2) (2018.9)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->nlu_test==1.1.3rc2) (1.15.0)\n","Installing collected packages: spark-nlp, dataclasses, nlu-test\n","Successfully installed dataclasses-0.6 nlu-test-1.1.3rc2 spark-nlp-2.7.4\n","Collecting pyspark==2.4.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e2/06/29f80e5a464033432eedf89924e7aa6ebbc47ce4dcd956853a73627f2c07/pyspark-2.4.7.tar.gz (217.9MB)\n","\u001b[K |████████████████████████████████| 217.9MB 58kB/s \n","\u001b[?25hCollecting py4j==0.10.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e3/53/c737818eb9a7dc32a7cd4f1396e787bd94200c3997c72c1dbe028587bd76/py4j-0.10.7-py2.py3-none-any.whl (197kB)\n","\u001b[K |████████████████████████████████| 204kB 18.6MB/s \n","\u001b[?25hBuilding wheels for collected packages: pyspark\n"," Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for pyspark: filename=pyspark-2.4.7-py2.py3-none-any.whl size=218279465 sha256=9c99372c9e98f2f158460e074286800e94eadb7de413c968d281cede0811628f\n"," Stored in directory: /root/.cache/pip/wheels/34/1f/2e/1e7460f80acf26b08dbb8c53d7ff9e07146f2a68dd5c732be5\n","Successfully built pyspark\n","Installing collected packages: py4j, pyspark\n","Successfully installed py4j-0.10.7 pyspark-2.4.7\n","--2021-02-26 21:56:25-- http://ckl-it.de/wp-content/uploads/2021/01/atis_intents.csv\n","Resolving ckl-it.de (ckl-it.de)... 217.160.0.108, 2001:8d8:100f:f000::209\n","Connecting to ckl-it.de (ckl-it.de)|217.160.0.108|:80... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 391936 (383K) [text/csv]\n","Saving to: ‘atis_intents.csv’\n","\n","atis_intents.csv 100%[===================>] 382.75K 693KB/s in 0.6s \n","\n","2021-02-26 21:56:26 (693 KB/s) - ‘atis_intents.csv’ saved [391936/391936]\n","\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"Gph8XOL1Pzpl"},"source":["# NLU makes NER easy. \n","\n","You just need to load the NER model via ner.load() and predict on some dataset. \n","It could be a pandas dataframe with a column named text or just an array of strings."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":736},"id":"pmpZSNvGlyZQ","executionInfo":{"status":"ok","timestamp":1614377034668,"user_tz":-60,"elapsed":515492,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"19c3fb2b-6be4-4fc3-8aab-e836ef632fb5"},"source":["import nlu \n","import pandas as pd\n","\n","df = pd.read_csv(\"atis_intents.csv\")\n","df.columns = [\"flight\",\"text\"]\n","ner_df = nlu.load('en.ner.aspect.airline',).predict(df[\"text\"],output_level='chunk')\n","ner_df"],"execution_count":null,"outputs":[{"output_type":"stream","text":["nerdl_atis_840b_300d download started this may take some time.\n","Approximate size to download 14.5 MB\n","[OK!]\n","glove_840B_300 download started this may take some time.\n","Approximate size to download 2.3 GB\n","[OK!]\n","\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ner_confidenceword_embeddingsentitiesentities_class
origin_index
0[1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ...[[-0.038548000156879425, 0.5425199866294861, -...pittsburghfromloc.city_name
0[1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ...[[-0.038548000156879425, 0.5425199866294861, -...baltimoretoloc.city_name
0[1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ...[[-0.038548000156879425, 0.5425199866294861, -...thursdaydepart_date.day_name
0[1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ...[[-0.038548000156879425, 0.5425199866294861, -...morningdepart_time.period_of_day
1[1.0, 1.0, 0.9991999864578247, 0.8240000009536...[[-0.038548000156879425, 0.5425199866294861, -...arrival timeflight_time
...............
4975[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9998000264167...[[-0.13562999665737152, 0.3321700096130371, -0...san franciscotoloc.city_name
4975[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9998000264167...[[-0.13562999665737152, 0.3321700096130371, -0...denverstoploc.city_name
4976[1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ...[[-0.08496099710464478, 0.5019999742507935, 0....deltaairline_name
4976[1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ...[[-0.08496099710464478, 0.5019999742507935, 0....denverfromloc.city_name
4976[1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ...[[-0.08496099710464478, 0.5019999742507935, 0....san franciscotoloc.city_name
\n","

16673 rows × 4 columns

\n","
"],"text/plain":[" ner_confidence ... entities_class\n","origin_index ... \n","0 [1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ... ... fromloc.city_name\n","0 [1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ... ... toloc.city_name\n","0 [1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ... ... depart_date.day_name\n","0 [1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ... ... depart_time.period_of_day\n","1 [1.0, 1.0, 0.9991999864578247, 0.8240000009536... ... flight_time\n","... ... ... ...\n","4975 [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9998000264167... ... toloc.city_name\n","4975 [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9998000264167... ... stoploc.city_name\n","4976 [1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ... ... airline_name\n","4976 [1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ... ... fromloc.city_name\n","4976 [1.0, 0.9998999834060669, 1.0, 1.0, 1.0, 1.0, ... ... toloc.city_name\n","\n","[16673 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"markdown","metadata":{"id":"STc7iOwtljGo"},"source":["## Lets explore our data which the predicted NER tags and visalize them! \n","\n","We specify [1:] so we dont see the count for the O-tag wich is the most common, since most words in a sentence are not named entities and thus not part of a chunk"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":938},"id":"UDSAYjadlfdK","executionInfo":{"status":"ok","timestamp":1614377035646,"user_tz":-60,"elapsed":516465,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"66af0ab4-45f1-4041-b0ff-e79f94745bfb"},"source":["ner_df['entities'].value_counts()[0:50].plot.bar(title='Occurence of Named Entities in dataset', figsize=(20,14))"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":3},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"markdown","metadata":{"id":"YO6d6VYi4aJQ"},"source":["## Most occurding `fromloc.city_name` tagged entities"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":385},"id":"rlcEvP9tOSiy","executionInfo":{"status":"ok","timestamp":1614377131170,"user_tz":-60,"elapsed":1690,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"3b87033d-7691-4e83-ab77-16c771f178c4"},"source":["ner_type_to_viz = 'fromloc.city_name'\n","ner_df[ner_df.entities_class == ner_type_to_viz]['entities'].value_counts().plot.bar(title='Most often occuring fromloc.city_name labeled entities in the dataset')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":6},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"markdown","metadata":{"id":"6R2-0v5Z4hMJ"},"source":["## Most occurding `flight_time` tagged entities"]},{"cell_type":"code","metadata":{"id":"ks6NDXg7RXG3","colab":{"base_uri":"https://localhost:8080/","height":367},"executionInfo":{"status":"ok","timestamp":1614377131171,"user_tz":-60,"elapsed":778,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"30e830d7-154c-4d63-c921-8468e4be4c1f"},"source":["ner_type_to_viz = 'flight_time'\n","ner_df[ner_df.entities_class == ner_type_to_viz]['entities'].value_counts().plot.bar(title='Most often occuring ORG labeled entities in the dataset')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":7},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"id":"67MNeUed5W0y"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/examples/colab/component_examples/named_entity_recognition_(NER)/NLU_ner_CONLL_2003_5class_example.ipynb b/examples/colab/component_examples/named_entity_recognition_(NER)/NLU_ner_CONLL_2003_5class_example.ipynb index a0556d8b..096288bb 100644 --- a/examples/colab/component_examples/named_entity_recognition_(NER)/NLU_ner_CONLL_2003_5class_example.ipynb +++ b/examples/colab/component_examples/named_entity_recognition_(NER)/NLU_ner_CONLL_2003_5class_example.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_ner_CONLL_2003_5class_example.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"NYQRU3pRO146"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/component_examples/named_entity_recognition_(NER)/NLU_ner_CONLL_2003_5class_example.ipynb)\n","\n","\n","Named entities are phrases that contain the names of persons, organizations, locations, times and quantities. Example:\n","
\n","
\n","[ORG **U.N.** ] official [PER **Ekeus** ] heads for [LOC **Baghdad** ] . \n","
\n","\n","https://www.aclweb.org/anthology/W03-0419.pdf \n","CoNLL-2003 is a NER dataset that available in English and German. NLU provides pretrained languages for both of these languages.\n","\n","It features **5 classes** of tags, **LOC (location)** , **ORG(Organisation)**, **PER(Persons)** and the forth which describes all the named entities which do not belong to any of the thre previously mentioned tags **(MISC)**. \n","The fifth class **(O)** is used for tokens which belong to no named entity.\n","\n","\n","\n","\n","\n","|Tag | \tDescription |\n","|------|--------------|\n","|PER | A person like **Jim** or **Joe** |\n","|ORG | An organisation like **Microsoft** or **PETA**|\n","|LOC | A location like **Germany**|\n","|MISC | Anything else like **Playstation** |\n","|O| Everything that is not an entity. | \n","\n","\n","The shared task of [CoNLL-2003 concerns](https://www.clips.uantwerpen.be/conll2003/) language-independent named entity recognition. We will concentrate on four types of named entities: persons, locations, organizations and names of miscellaneous entities that do not belong to the previous three groups. The participants of the shared task will be offered training and test data for two languages. They will use the data for developing a named-entity recognition system that includes a machine learning component. For each language, additional information (lists of names and non-annotated data) will be supplied as well. The challenge for the participants is to find ways of incorporating this information in their system.\n","\n","\n","\n","\n","\n","\n","\n","\n","\n"]},{"cell_type":"code","metadata":{"id":"M2-GiYL6xurJ"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null\n"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Gph8XOL1Pzpl"},"source":["# NLU makes NER easy. \n","\n","You just need to load the NER model via ner.load() and predict on some dataset. \n","It could be a pandas dataframe with a column named text or just an array of strings."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":442},"id":"pmpZSNvGlyZQ","executionInfo":{"status":"ok","timestamp":1605842814212,"user_tz":-60,"elapsed":15191,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"0e995509-0682-4aa9-c704-adbde9954c94"},"source":["import nlu \n","\n","example_text = [\"A person like Jim or Joe\", \n"," \"An organisation like Microsoft or PETA\",\n"," \"A location like Germany\",\n"," \"Anything else like Playstation\", \n"," \"Person consisting of multiple tokens like Angela Merkel or Donald Trump\",\n"," \"Organisations consisting of multiple tokens like JP Morgan\",\n"," \"Locations consiting of multiple tokens like Los Angeles\", \n"," \"Anything else made up of multiple tokens like Super Nintendo\",]\n","\n","nlu.load('ner').predict(example_text)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["onto_recognize_entities_sm download started this may take some time.\n","Approx size to download 159 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
entities_confidenceembeddingsentities
origin_index
0PERSON[[-0.2708599865436554, 0.04400600120425224, -0...Jim
0PERSON[[-0.2708599865436554, 0.04400600120425224, -0...Joe
1ORG[[-0.4214000105857849, -0.18796999752521515, 0...Microsoft
1ORG[[-0.4214000105857849, -0.18796999752521515, 0...PETA
2GPE[[-0.2708599865436554, 0.04400600120425224, -0...Germany
3PRODUCT[[-0.029784999787807465, 0.08645900338888168, ...Playstation
4PERSON[[0.3870899975299835, 0.3262900114059448, 0.64...Angela Merkel
4PERSON[[0.3870899975299835, 0.3262900114059448, 0.64...Donald Trump
5ORG[[-0.19327999651432037, 0.6523399949073792, -1...JP Morgan
6GPE[[0.06345599889755249, -0.042142000049352646, ...Los Angeles
7PRODUCT[[-0.029784999787807465, 0.08645900338888168, ...Super Nintendo
\n","
"],"text/plain":[" entities_confidence ... entities\n","origin_index ... \n","0 PERSON ... Jim\n","0 PERSON ... Joe\n","1 ORG ... Microsoft\n","1 ORG ... PETA\n","2 GPE ... Germany\n","3 PRODUCT ... Playstation\n","4 PERSON ... Angela Merkel\n","4 PERSON ... Donald Trump\n","5 ORG ... JP Morgan\n","6 GPE ... Los Angeles\n","7 PRODUCT ... Super Nintendo\n","\n","[11 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"qgGdEUgkMika","executionInfo":{"status":"ok","timestamp":1605842820717,"user_tz":-60,"elapsed":21676,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"6df9b401-de99-476d-c6b4-ea9cff2fa2d0"},"source":["text = [\"Barclays misled shareholders and the public about one of the biggest investments in the bank's history, a BBC Panorama investigation has found.\",\n","\"The bank announced in 2008 that Manchester City owner Sheikh Mansour had agreed to invest more than £3bn.\",\n","\"But the BBC found that the money, which helped Barclays avoid a bailout by British taxpayers, actually came from the Abu Dhabi government.\",\n","\"Barclays said the mistake in its accounts was 'a drafting error'.\",\n","\"Unlike RBS and Lloyds TSB, Barclays narrowly avoided having to request a government bailout late in 2008 after it was rescued by £7bn worth of new investment, most of which came from the Gulf states of Qatar and Abu Dhabi.\",\n","\"The S&P 500's price to earnings multiple is 71% higher than Apple's, and if Apple were simply valued at the same multiple, its share price would be $840, which is 52% higher than its current price.\",\n","\"Alice has a cat named Alice and also a dog named Alice and also a parrot named Alice, it is her favorite name!\"\n","] + example_text\n","ner_df = nlu.load('ner').predict(text, output_level= 'chunk')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["onto_recognize_entities_sm download started this may take some time.\n","Approx size to download 159 MB\n","[OK!]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"5nhKQZPpSRxv","executionInfo":{"status":"ok","timestamp":1605842822177,"user_tz":-60,"elapsed":23119,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"1bc02780-4849-48c7-e0b2-36db9c549df6"},"source":["ner_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
entities_confidenceembeddingsentities
origin_index
0ORG[[0.044123999774456024, -0.47940999269485474, ...Barclays
0CARDINAL[[0.044123999774456024, -0.47940999269485474, ...about one
0ORG[[0.044123999774456024, -0.47940999269485474, ...BBC Panorama
1DATE[[-0.03819400072097778, -0.24487000703811646, ...2008
1GPE[[-0.03819400072097778, -0.24487000703811646, ...Manchester City
1PERSON[[-0.03819400072097778, -0.24487000703811646, ...Sheikh Mansour
1MONEY[[-0.03819400072097778, -0.24487000703811646, ...more than £3bn
2ORG[[-0.05707800015807152, 0.3987399935722351, 0....BBC
2ORG[[-0.05707800015807152, 0.3987399935722351, 0....Barclays
2NORP[[-0.05707800015807152, 0.3987399935722351, 0....British
2GPE[[-0.05707800015807152, 0.3987399935722351, 0....Abu Dhabi
3ORG[[0.044123999774456024, -0.47940999269485474, ...Barclays
4ORG[[-0.32710000872612, 0.4879100024700165, 0.416...RBS
4ORG[[-0.32710000872612, 0.4879100024700165, 0.416...Lloyds TSB
4ORG[[-0.32710000872612, 0.4879100024700165, 0.416...Barclays
4DATE[[-0.32710000872612, 0.4879100024700165, 0.416...2008
4MONEY[[-0.32710000872612, 0.4879100024700165, 0.416...7bn
4LOC[[-0.32710000872612, 0.4879100024700165, 0.416...Gulf
4GPE[[-0.32710000872612, 0.4879100024700165, 0.416...Qatar
4GPE[[-0.32710000872612, 0.4879100024700165, 0.416...Abu Dhabi
5ORG[[-0.03819400072097778, -0.24487000703811646, ...S&P
5DATE[[-0.03819400072097778, -0.24487000703811646, ...500's
5PERCENT[[-0.03819400072097778, -0.24487000703811646, ...71%
5ORG[[-0.03819400072097778, -0.24487000703811646, ...Apple
5ORG[[-0.03819400072097778, -0.24487000703811646, ...Apple
5CARDINAL[[-0.03819400072097778, -0.24487000703811646, ...$840
5PERCENT[[-0.03819400072097778, -0.24487000703811646, ...52%
6PERSON[[0.28501999378204346, -0.4355500042438507, 0....Alice
6PERSON[[0.28501999378204346, -0.4355500042438507, 0....Alice
6PERSON[[0.28501999378204346, -0.4355500042438507, 0....Alice
6PERSON[[0.28501999378204346, -0.4355500042438507, 0....Alice
7PERSON[[-0.2708599865436554, 0.04400600120425224, -0...Jim
7PERSON[[-0.2708599865436554, 0.04400600120425224, -0...Joe
8ORG[[-0.4214000105857849, -0.18796999752521515, 0...Microsoft
8ORG[[-0.4214000105857849, -0.18796999752521515, 0...PETA
9GPE[[-0.2708599865436554, 0.04400600120425224, -0...Germany
10PRODUCT[[-0.029784999787807465, 0.08645900338888168, ...Playstation
11PERSON[[0.3870899975299835, 0.3262900114059448, 0.64...Angela Merkel
11PERSON[[0.3870899975299835, 0.3262900114059448, 0.64...Donald Trump
12ORG[[-0.19327999651432037, 0.6523399949073792, -1...JP Morgan
13GPE[[0.06345599889755249, -0.042142000049352646, ...Los Angeles
14PRODUCT[[-0.029784999787807465, 0.08645900338888168, ...Super Nintendo
\n","
"],"text/plain":[" entities_confidence ... entities\n","origin_index ... \n","0 ORG ... Barclays\n","0 CARDINAL ... about one\n","0 ORG ... BBC Panorama\n","1 DATE ... 2008\n","1 GPE ... Manchester City\n","1 PERSON ... Sheikh Mansour\n","1 MONEY ... more than £3bn\n","2 ORG ... BBC\n","2 ORG ... Barclays\n","2 NORP ... British\n","2 GPE ... Abu Dhabi\n","3 ORG ... Barclays\n","4 ORG ... RBS\n","4 ORG ... Lloyds TSB\n","4 ORG ... Barclays\n","4 DATE ... 2008\n","4 MONEY ... 7bn\n","4 LOC ... Gulf\n","4 GPE ... Qatar\n","4 GPE ... Abu Dhabi\n","5 ORG ... S&P\n","5 DATE ... 500's\n","5 PERCENT ... 71%\n","5 ORG ... Apple\n","5 ORG ... Apple\n","5 CARDINAL ... $840\n","5 PERCENT ... 52%\n","6 PERSON ... Alice\n","6 PERSON ... Alice\n","6 PERSON ... Alice\n","6 PERSON ... Alice\n","7 PERSON ... Jim\n","7 PERSON ... Joe\n","8 ORG ... Microsoft\n","8 ORG ... PETA\n","9 GPE ... Germany\n","10 PRODUCT ... Playstation\n","11 PERSON ... Angela Merkel\n","11 PERSON ... Donald Trump\n","12 ORG ... JP Morgan\n","13 GPE ... Los Angeles\n","14 PRODUCT ... Super Nintendo\n","\n","[42 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"markdown","metadata":{"id":"STc7iOwtljGo"},"source":["## Lets explore our data which the predicted NER tags and visalize them! \n","\n","We specify [1:] so we dont se the count for the O-tag wich is the most common, since most words in a sentence are not named entities and thus not part of a chunk"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":370},"id":"UDSAYjadlfdK","executionInfo":{"status":"ok","timestamp":1605842822185,"user_tz":-60,"elapsed":23112,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"19a5af50-6e11-4692-b4a1-4a35b00f4a86"},"source":["ner_df['entities'].value_counts()[1:].plot.bar(title='Occurence of Named Entity tokens in dataset')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":12},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":310},"id":"rlcEvP9tOSiy","executionInfo":{"status":"ok","timestamp":1605842822570,"user_tz":-60,"elapsed":23484,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"46f15995-e4e3-43a2-91f9-dba065d92b7b"},"source":["ner_type_to_viz = 'LOC'\n","ner_df[ner_df.entities_confidence == ner_type_to_viz]['entities'].value_counts().plot.bar(title='Most often occuring LOC labeled tokens in the dataset')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":13},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":363},"id":"ks6NDXg7RXG3","executionInfo":{"status":"ok","timestamp":1605842822587,"user_tz":-60,"elapsed":23460,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"b2995949-a229-416a-f780-c8cffc83c5cb"},"source":["ner_type_to_viz = 'ORG'\n","ner_df[ner_df.entities_confidence == ner_type_to_viz]['entities'].value_counts().plot.bar(title='Most often occuring ORG labeled tokens in the dataset')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":14},{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAXQAAAFICAYAAABA2wWFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3debgcVZnH8e8vC4sQEiTXkS0JCCMIw2bYhmUQdGQTRgQBFQXB6CgibiOgItsILoOiKBhlFwEFVEBWlYCoICEkLIIaNgFZQghLWA2888c5TSqdvrf7Jt3VfSu/z/P0c7uWrvPe7uq3q06dOkcRgZmZDX3Duh2AmZm1hxO6mVlFOKGbmVWEE7qZWUU4oZuZVYQTuplZRTiht0jSuyU9KGmupI27HU+nSTpV0pe7HUc7Sdpf0g0trnuUpB8vYjldeW2DbW0n6aF2bGsRy3+/pKvbuL0pkg5q1/bqtn2mpOM6se0y9UxCl3S/pJclja2bf6ukkDRhMbcfktZajE18Ezg4IpYH5uTtjVicmHpZRHwsIo7txLYlLS3peEl/l/SCpL9J+rwkFdaZIunF/AP6hKSLJa1ct521JZ0vaZakZ/J2vitptU7E3W3dTtCDFRHnRsR/Lspr2/nD1m6d/GFZ3HJ6JqFn9wH71iYk/Rvwuu6Fs4DxwJ3dDqIMkoZ3uIifATsAOwOjgP2AScBJdevVfkDXApYn/ajWYlwLuAn4B7BxRKwAbAXcA2zd4fjNelNE9MQDuB/4EnBzYd43gS8CAUzI80YDZwOzgAfya4blZWsB1wFPA08AF+T51+dtPAfMBfZuUP6wvK0HgMdzGaOBpfNraq+/B/h7np6bH1vmbXwYuAuYA1wFjC9sP4CPAX8DngK+B6if92Jp4NukZPWP/HzpwvLdgenAMzmeHfP81wNn5NfMAX6R5+8P3FBXRgBr5ednAqcAl+f/8e153nF5+XbAQ8Bn83vzCHBAYVsrAZfmeG4Gjqsvr7DuDsCLwOp18zcHXinENAU4qLD848CdhekfA5cOch9b4H0g/YA8mOO+BdimsOwo4ELgAuBZYBqwYWH5KsBFpP3wPuCQutf+uDC9BfCH/LnPALYrLFuDtM8+C1wDnFx8bWG95YAXgFeZv9+tMtC+UvvcCts4BPgzsFp+3TdJ+/JjwKnAsi1+3jvn7TwLPAx8rsX3u6XvALAj8DLwz/x/zijsE8cCv89lXw2MbeV9blDGxvkzfTZ/xuczf39fEbgsf7Zz8vPV8rL/Je2nL+bYTm5hX9oMmJqXPQac2Czm/sppuo8vbiJu14OU0N8O/AVYFxied6rxLJjQzwZ+STqymwD8FTgwLzuP9AMwDFgG2LpRAuun/A8DM4E1SUeDFwPnNHp9LjeAEYXlu+fXrwuMIP04/KHu9ZcBY4BxeWfZsZ9YjgFuBN4A9OUP/NjCzvE08I78f64KrJOX/SrvnCsCI4H/aPTFavD/nJm3uVXhvTuTBRP6vBzXSNIX+nlgxbz8/Px4HfAW0o7dX0I/Abiun2UPAB8tfHkPys9XAn4N/LKw7qPA/oPcxxZ4H4AP5G2PICWvR4Fl8rKjSAllz/w/f46UuEfm9+gW4EhgqbzP3Au8s/DaH+fnqwKz83s2LH9us4G+vPyPwImkBLstKcEslNALn8NDdfMG2ldeWz/HOq1Q7reAS0gHAaNIP8jHt/h5P0JOWKR9bZMW3+/BfAdeew8L86aQDmD+FVg2T5/Qyvtct52lSPvap/P/t2f+rGv7+0rAe0j78yjSGeUv6uI4qG6bA+1LfwT2y8+XB7Zocd9YqJym+/hgVu7kg/kJ/UvA8aRf6WvyGxSkJDqc9Mv9lsLrPgpMyc/PBiaTf03rtt8sof8G+Hhh+s35Qx5R/3oaJ/QryD8seXoY6UswvvD64g/MT4HD+onlHmDnwvQ7gfvz8x8A32rwmpVJR28rNvtiNfh/zgTOrlt+Jgsm9Bfq/t/HSUcXw/P79ObCsoGO0H8EnN/PshuBLxZ25udJPzRBOiMZV1h3HoVkABxMOsqZC/ywn+0v9D7ULZ9DPgonJZQb6z7PR4BtSGcTf6977eHAGYXX1hL6FygcGOR5VwEfIiW1ecByhWU/YXAJfaB9ZTvSEfSJwA3A6DxfpDOxNxVetyVwX7PPOz//O+l7t0KT7/QC7zeD+w689h4W5k0BvlSY/jhwZbP3ucG2tyWdzagw7w/k/b3B+hsBc+riGDDR1u1L1wNHUzibaCXmVsqpf/RaHTrAOcD7SDvD2XXLxpJ+UR8ozHuA9EsH8D+knfVPku6U9OFBlLtKg+2OAP6lxdePB06S9JSkp4AncyyrFtZ5tPD8edKvdauxrJKfr076EtdbHXgyIua0GG+9B5ssnx0R8wrTtfj7SO9T8fUDbesJ0o9PIyvn5TWHRMRoYAPSkWDxYufs4nYi4uSIGEOqchg58L+SSPqcpLskPZ0/s9GkfWyh/yMiXiWdMa5C+qxXqX3W+bVH0HhfGQ/sVbfu1jn2VUiJ4rnC+g802MZABtpXIB0NTyIdfT+d5/WRjj5vKcR0ZZ5f09/nDenodWfgAUnXSdpyEPG2+h0Y7OsHep/rrQI8HDlrZq+9h5JeJ+kHkh6Q9AwpIY8Z6NpSk33pQNJZxd2Sbpa06yLE3JKeS+gR8QDp1HZnUrVH0ROko8HxhXnjSEchRMSjEfGRiFiFdATx/UG0bPlHg+3OI9V5LRRmg3kPkqoLxhQey0bEH1osv1ks/yiU86Z+yn+9pDENlj1H4eKypDc2WKfR/9SKWaT3qZhsVx9g/V8Dm0taYB1Jm+fX/XahwCJuJx31f6/QEuY3wB6LGDOStiEdALyXdFYzhnQ2oMJqqxfWH0b6H/9Beq/vq/usR0XEzg2KepB0FFZcd7mIOIF0xL+ipOUK648bIOxGn9FA+wqkI8VdgTMkbZXnPUE6Al+vENPoSBegm4qImyNid1I1zy9IR9rtNtj9caD3ud4jwKrFVlUs+L5/lnSGvnmki+3b5vm19ReIrdm+FBF/i4h9Se/X14AL82feLOZBfyd7LqFnBwLb1x25EBGvkHae/5U0StJ44DOkC2RI2qvQZG0O6Q15NU8/Rqrr7M95wKclrSFpeeCrpIuq8xqsOytvt7i9U4HDJa2XYxktaa+W/+OFY/mSpL7cjPNI8v8InAYcIGkHScMkrSppnYh4hFTt831JK0oaKam2I84A1pO0kaRlSKezbZE/k4uBo/KRzTrABwdY/9ekZHyRpPUkDZe0Rf7/TomIv/Xz0rNIR8C75emjgG0knShpVYD8Xq3bYuijSD9Es4ARko4EVqhb562S9sjNUw8FXiJVC/0JeFbSFyQtm/+H9SVt2qCcHwPvkvTOvN4yufnhavngZSpwtKSlJG0NvGuAmB8DVpI0ujBvoH0FgIiYArwfuFjSZvls44fAtyS9ASDvR+9s9qblON8vaXRE/JN0oe/VZq9bBI8BE/IPaSv6fZ8brPtH0md/SP6e7EG6NlUzivSD95Sk1wNfaRDbmnXr97svSfqApL78vj+VZ7/aQszNctZCejKhR8Q9ETG1n8WfJB1x3kuqF/wJcHpetilwk6S5pAs+n4qIe/Oyo4Cz8qnNexts93RSdc/1pDOEF3NZjeJ7nnQV+vd5e1tExM9Jv77n59O0O4CdBvFvFx1H+qLfBtxOuph1XC77T8ABpItaT5NaSNSO0PYjncHcTarzPDS/5q+kC1y/JrUwaOnmmkE4mHSK+SjpPTyPlPz68x7gWtJp/lzSjn0a/bzfABHxMqklwZfz9F9JddmrATMkPUtq/fCP2jpNXJXL/yvpdPtFFq4q+iWwN+ngYD9gj4j4Z/4R25VUt3of6Yj3R/k9qI/7QdIF8yNIX/gHgc8z/7v3vvx/PElKHPXVjMVt3U16b+/N+90qDLCv1L32GtKF/0slbUKqv50J3Jj311+TjkpbsR9wf37dx0g/Fu32s/x3tqRpzVZu4X0urvsy6exuf9L7vjcL1gZ8m3TR9QnSD/iVdZs4CdhT0hxJ36H5vrQjcGfOSycB+0TECy3EXF9OU1qwGsls8Un6GvDGiPhQt2MxW5L05BG6DS2S1pG0gZLNSFVmP+92XGZLmsreum6lGkWqCliFVO/3f6TqCjMrkatczMwqwlUuZmYV4YRuZlYRXatDHzt2bEyYMKFbxZuZDUm33HLLExHR12hZ1xL6hAkTmDq1v6bmZmbWiKR+u4dwlYuZWUU4oZuZVYQTuplZRTihm5lVhBO6mVlFtJzQc/eOt0q6rMGypSVdIGmmpJskTWhnkGZm1txgjtA/RRoAuZEDSSOvrEXq1vVrixuYmZkNTksJPXe4vgupz+dGdicNQABppPQd6kYDMTOzDmv1xqJvk4ZYGtXP8lXJHbpHxDxJT5NGwC6OD4mkSaTxDRk3bqCRtpIJh/2qxfAau/+EXRbr9WZmQ0nTI3SlAU0fj4hbFrewiJgcERMjYmJfX8M7V83MbBG1UuWyFbCbpPuB84HtJf24bp2HyQPq5vEXR5NGZTczs5I0TegRcXhErBYRE4B9gN9GxAfqVrsEqA03tmdexx2tm5mVaJE755J0DDA1Ii4hDfB7jqSZpEFX92lTfGZm1qJBJfSImAJMyc+PLMx/EdirnYGZmdng+E5RM7OKcEI3M6sIJ3Qzs4pwQjczqwgndDOzinBCNzOrCCd0M7OKcEI3M6sIJ3Qzs4pwQjczqwgndDOzinBCNzOrCCd0M7OKcEI3M6sIJ3Qzs4pwQjczq4hWBoleRtKfJM2QdKekoxuss7+kWZKm58dBnQnXzMz608qIRS8B20fEXEkjgRskXRERN9atd0FEHNz+EM3MrBVNE3oe7HlunhyZHx4A2sysx7RUhy5puKTpwOPANRFxU4PV3iPpNkkXSlq9rVGamVlTLSX0iHglIjYCVgM2k7R+3SqXAhMiYgPgGuCsRtuRNEnSVElTZ82atThxm5lZnUG1comIp4BrgR3r5s+OiJfy5I+At/bz+skRMTEiJvb19S1KvGZm1o9WWrn0SRqTny8LvAO4u26dlQuTuwF3tTNIMzNrrpVWLisDZ0kaTvoB+GlEXCbpGGBqRFwCHCJpN2Ae8CSwf6cCNjOzxlpp5XIbsHGD+UcWnh8OHN7e0MzMbDB8p6iZWUU4oZuZVYQTuplZRTihm5lVhBO6mVlFOKGbmVWEE7qZWUU4oZuZVYQTuplZRTihm5lVhBO6mVlFOKGbmVWEE7qZWUU4oZuZVYQTuplZRTihm5lVhBO6mVlFtDKm6DKS/iRphqQ7JR3dYJ2lJV0gaaakmyRN6ESwZmbWv1aO0F8Cto+IDYGNgB0lbVG3zoHAnIhYC/gW8LX2hmlmZs00TeiRzM2TI/Mj6lbbHTgrP78Q2EGS2halmZk11VIduqThkqYDjwPXRMRNdausCjwIEBHzgKeBldoZqJmZDaylhB4Rr0TERsBqwGaS1l+UwiRNkjRV0tRZs2YtyibMzKwfg2rlEhFPAdcCO9YtehhYHUDSCGA0MLvB6ydHxMSImNjX17doEZuZWUOttHLpkzQmP18WeAdwd91qlwAfys/3BH4bEfX17GZm1kEjWlhnZeAsScNJPwA/jYjLJB0DTI2IS4DTgHMkzQSeBPbpWMRmZtZQ04QeEbcBGzeYf2Th+YvAXu0NzczMBsN3ipqZVYQTuplZRTihm5lVhBO6mVlFOKGbmVWEE7qZWUU4oZuZVYQTuplZRTihm5lVhBO6mVlFOKGbmVWEE7qZWUU4oZuZVYQTuplZRTihm5lVhBO6mVlFOKGbmVVEK2OKri7pWkl/lnSnpE81WGc7SU9Lmp4fRzbalpmZdU4rY4rOAz4bEdMkjQJukXRNRPy5br3fRcSu7Q/RzMxa0fQIPSIeiYhp+fmzwF3Aqp0OzMzMBmdQdeiSJpAGjL6pweItJc2QdIWk9doQm5mZDUIrVS4ASFoeuAg4NCKeqVs8DRgfEXMl7Qz8Ali7wTYmAZMAxo0bt8hBm5nZwlo6Qpc0kpTMz42Ii+uXR8QzETE3P78cGClpbIP1JkfExIiY2NfXt5ihm5lZUSutXAScBtwVESf2s84b83pI2ixvd3Y7AzUzs4G1UuWyFbAfcLuk6XneEcA4gIg4FdgT+G9J84AXgH0iIjoQr5mZ9aNpQo+IGwA1Wedk4OR2BWVmZoPnO0XNzCrCCd3MrCKc0M3MKsIJ3cysIpzQzcwqwgndzKwinNDNzCrCCd3MrCKc0M3MKsIJ3cysIpzQzcwqwgndzKwinNDNzCrCCd3MrCKc0M3MKsIJ3cysIpzQzcwqopUxRVeXdK2kP0u6U9KnGqwjSd+RNFPSbZI26Uy4ZmbWn1bGFJ0HfDYipkkaBdwi6ZqI+HNhnZ2AtfNjc+CU/NfMzErS9Ag9Ih6JiGn5+bPAXcCqdavtDpwdyY3AGEkrtz1aMzPr16Dq0CVNADYGbqpbtCrwYGH6IRZO+mZm1kGtVLkAIGl54CLg0Ih4ZlEKkzQJmAQwbty4RdlE6SYc9qvF3sb9J+zShkjMzAbW0hG6pJGkZH5uRFzcYJWHgdUL06vleQuIiMkRMTEiJvb19S1KvGZm1o9WWrkIOA24KyJO7Ge1S4AP5tYuWwBPR8QjbYzTzMyaaKXKZStgP+B2SdPzvCOAcQARcSpwObAzMBN4Hjig/aGamdlAmib0iLgBUJN1AvhEu4IyM7PB852iZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFdHKmKKnS3pc0h39LN9O0tOSpufHke0P08zMmmllTNEzgZOBswdY53cRsWtbIjIzs0XS9Ag9Iq4HniwhFjMzWwztqkPfUtIMSVdIWq9N2zQzs0FopcqlmWnA+IiYK2ln4BfA2o1WlDQJmAQwbty4NhRtZmY1i32EHhHPRMTc/PxyYKSksf2sOzkiJkbExL6+vsUt2szMChY7oUt6oyTl55vlbc5e3O2amdngNK1ykXQesB0wVtJDwFeAkQARcSqwJ/DfkuYBLwD7RER0LGIzM2uoaUKPiH2bLD+Z1KzRzMy6yHeKmplVhBO6mVlFOKGbmVWEE7qZWUU4oZuZVYQTuplZRTihm5lVhBO6mVlFOKGbmVWEE7qZWUU4oZuZVYQTuplZRTihm5lVhBO6mVlFOKGbmVWEE7qZWUU4oZuZVUTThC7pdEmPS7qjn+WS9B1JMyXdJmmT9odpZmbNtHKEfiaw4wDLdwLWzo9JwCmLH5aZmQ1W04QeEdcDTw6wyu7A2ZHcCIyRtHK7AjQzs9Y0HSS6BasCDxamH8rzHqlfUdIk0lE848aNa0PRS44Jh/1qsV5//wm7dD2GdsTRCzH0Shy9EEOvxNELMfRCHKVeFI2IyRExMSIm9vX1lVm0mVnltSOhPwysXpheLc8zM7MStSOhXwJ8MLd22QJ4OiIWqm4xM7POalqHLuk8YDtgrKSHgK8AIwEi4lTgcmBnYCbwPHBAp4I1M7P+NU3oEbFvk+UBfKJtEZmZ2SLxnaJmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV0VJCl7SjpL9IminpsAbL95c0S9L0/Dio/aGamdlAWhlTdDjwPeAdwEPAzZIuiYg/1616QUQc3IEYzcysBa0coW8GzIyIeyPiZeB8YPfOhmVmZoPVSkJfFXiwMP1QnlfvPZJuk3ShpNUbbUjSJElTJU2dNWvWIoRrZmb9addF0UuBCRGxAXANcFajlSJickRMjIiJfX19bSrazMygtYT+MFA84l4tz3tNRMyOiJfy5I+At7YnPDMza1UrCf1mYG1Ja0haCtgHuKS4gqSVC5O7AXe1L0QzM2tF01YuETFP0sHAVcBw4PSIuFPSMcDUiLgEOETSbsA84Elg/w7GbGZmDTRN6AARcTlwed28IwvPDwcOb29oZmY2GL5T1MysIpzQzcwqwgndzKwinNDNzCrCCd3MrCKc0M3MKsIJ3cysIpzQzcwqwgndzKwinNDNzCrCCd3MrCKc0M3MKsIJ3cysIpzQzcwqwgndzKwinNDNzCrCCd3MrCJaSuiSdpT0F0kzJR3WYPnSki7Iy2+SNKHdgZqZ2cCaJnRJw4HvATsBbwH2lfSWutUOBOZExFrAt4CvtTtQMzMbWCtH6JsBMyPi3oh4GTgf2L1und2Bs/LzC4EdJKl9YZqZWTOKiIFXkPYEdoyIg/L0fsDmEXFwYZ078joP5el78jpP1G1rEjApT74Z+Mtixj8WeKLpWp3VCzFAb8TRCzFAb8TRCzFAb8TRCzFAb8TRjhjGR0RfowUjFnPDgxIRk4HJ7dqepKkRMbFd2xuqMfRKHL0QQ6/E0Qsx9EocvRBDr8TR6RhaqXJ5GFi9ML1antdwHUkjgNHA7HYEaGZmrWklod8MrC1pDUlLAfsAl9Stcwnwofx8T+C30awux8zM2qpplUtEzJN0MHAVMBw4PSLulHQMMDUiLgFOA86RNBN4kpT0y9C26pvF0AsxQG/E0QsxQG/E0QsxQG/E0QsxQG/E0dEYml4UNTOzocF3ipqZVYQTuplZRTihm5lVRKnt0G3JIOlfIuKxksvcKiJ+32xeB8vvAz4CTKDwvYqID5dRfo5hBPBKRISk1YHNgXsi4tayYugl+TP5AqnLkmVq8yNi+64F1WFD7ghd0tclrSBppKTfSJol6QMllv8vkk6TdEWefoukA8sqP5f5BknflnSZpOMlrVBm+f3ENEbSgZJ+A3QjgXy3xXmd8kvS/Re/Bn5VeJRC0keAx4EH8vPfkJoQny/pC2XF0SCulSS9W9Jbu1D8ucBdwBrA0cD9pGbYpZD0DUkfbTD/o5JO6EiZQ62Vi6TpEbGRpHcDuwKfAa6PiA1LKv8K4AzgixGxYT4qujUi/q2M8nMMVwK3ANeT3oNREbF/WeUX4liW1I/P+4CNgVHAf5E+j1dLimFL4N+BQ0kdw9WsALy7xP1iekRsVEZZ/ZR/J7A16TO4i3R7+BOSXgfcHBHrlRTHZcBhEXGHpJWBacBU4E3A5Ij4dhlx5FhuiYi3SrotIjbI826OiE3LKh+YWH9PjqRhwG0RsX67yxxyR+jMP53dBfhZRDxdcvljI+KnwKuQ2ukDr5Qcw8oR8cWIuCoiPglsUHL5SPoJ8FfgHaQj4QmkHjenlJXMs5HA8qT9YlTh8QzpCLUsl0naucTy6r0cEXMi4u+kzvSeAIiI54GXS4xjjYi4Iz8/ALgmIt5Fqv4prfop+2f++4ikXSRtDLy+xPKXbnSDZf5+dKTzwqFYh36ZpLuBF4D/zvVkL5ZY/nOSVgICQNIWQNk/Kkhakfk7xfDidEQ8WUIIbwHmkI4G74qIVyR143TvKxGxg6T1IuLoLpRf8yngCEkvkRKJgIiIsqrDls0JaxiwVH6u/FhmwFe21z8Lz3cAfggQEc9KKvOHHuA4SaOBz5IOOlYAPl1i+S9IWjsi/lacKWltUv5quyFX5QIg6fXA0zmJLEeqcni0pLI3Ie0c6wN3AH3AnhFxWxnl5xjuJ50hNPqVj4hYs6Q41gH2BfYm9SD3ZmD9Mi+ISvozcBDpbuX3UfeeRMS0smLpJklTyAcZjUTE20qK41LgauAh4HTSEftTuXpuallVP71A0k6kXHEcqYoUYCJwOHBoRFze9jKHWkLP9VKnAedFxJwuxTCClLwE/CUi/tnkJZUjaYuIuLEw/VZScn8v8FBE/HtJcexJGmBla1JdbVGU2aIhnyWtzYItKq4vq/xeIOkNwDHAysD3IuLqPP9twFsj4pslxrIG8EkWbnm0W4kxrA98nnQACHAn8I2IuL0j5Q3BhL4WqW5ub9IX+Azg6k53BiZpj4GWR8TFnSx/ILnu9tqIeEHSHmXEImlaRGzSYL6AbcpOZJK+HBHHlllmXfkHkapdVgOmA1sAfyzrB0XSpsCDtTNVSR8E3gM8ABxVUjVcT5E0g3Twdzv5mhdARFzXtaA6bMgl9Jp8pXhX4BTSRckzgJM6teNKOmOAxVFme+N6kk4mncpNA7ZolGg7UGbDhN5NknYDts2TUyLishLLvh3YFLgxt8JaB/hqRAx4INDG8qcBb4+IJyVtSxpZ7JPARsC6EVHKBeJcP3wE6frKiaQ69G2BmcCBEVF/FtXJWG6KiM3LKq9B+WsDXyR1WFh7L7YB7gEOioi2N6EcihdFkbQB6Sh9Z+AiUnvTrYHfknbgtouIAzqx3UUhaXPg3oiYBRARB0s6knSE+ImSwlhTUn03yq8p87QWQNLxpOESz82zPiXp3yPiiJJCeDEiXpSEpKUj4m5Jby6pbIDhhYOZvUlNBC8CLpI0vcQ4zgDOJl2AvInUnPTdpET2PVJrl7KcJOkrpDr9l2ozS7yuMtB7cTIdeC+G3BF6rkN/inQqdVFEvFRYdnGnj4hyC5evkH5AArgBOCYiShvQI59Kblb73yWdSKon/Cjw84jYuoQY/ka6GNlQ2ae1km4DNqo1mVQa3PzWWvvjEsr/Oekg41Bge9IR6siIKKUpo9IwkBtF6u76bmBSrdpL0h2daPPcTxyvtceXNDPSwPELLSspluOB/UhHxLUql9Kuq3TjvRiKR+h7RcS9jRaUdHp7PumGnvfk6fcDFwBvL6HsmhER8VK+OHsmqQnUnhHxar6RpAxze7Aucgzp9BbSXZuliYh356dHSbo2l39liSGcB1wn6QnS/vA7eO2aU5nNaotNE58ZYFkZ9gLWjDS4fTeU/l4MuYQeEfdK2gVYjwVbExxTUggr1118O07S3iWVXXOD0i32byTdVLNtTubb0aH2rQ3cV1I5rToeuDUnU5HqbQ8rq/DclLam1oKhzNPfr5Fu91+ZBRsJDCPVpZdlnXy2JOBN+Tl5upTmtAV3kH7kHy+53JrS34shl9AlnQq8Dngb8CPS3YB/KjGEqyXtA/w0T+9JGs2pNBHxUUiEmk4AAAyLSURBVElbk+4AfAy4MFcFiflnDp1WbLK4V0T8rDD91RLrrgGIiPNyW+zabd1fKOvehGwaaVzdOaTPYQzwqKTHgI9ExC0DvbgN/tToInVE/LXD5dZbt+TyBjIGuFvSzSxYh17W9Z3S34uhWId+W0RsUPi7PHBFRGxTUvnPAssx/5RpGPBcfl7KnYENmqh9iHQh7B7SnZMdb6JWbOVS3+KlGy1gJG0FTI+I55Q6a9uE1OrpgZLK/yFwYURclaf/k/TjWmt91dGLgZJujYiNO1nG4sit0vaNiHObrty+Mv+j0fxuVhVKGgvM7lQz6yF3hM78KoXnJa0CzCadZpYiIkaVVdYAfkCus89N1I5nfhO1yZTTh4n6ed5ougynABtK2pDUYdtppBYGDb/UHbBFRHykNhERV0v6Zj6bWrqE8vskfaa/hRFxYgkxoNTz5yeAVUmDx18DHEy6/X4G81shdVy3r/EodQtyAum6zrHAOcBYYJikD0ZE26+xDMWEfpmkMcA3SKe5Qap6KU2+yajWyuV3EfGLMsunN5qoRT/PG02XYV5EhKTdSXconqZyuzV+RKmb2vPz9N7AY7m1TRkXA4eTrqd048e06BxStdMfSa2gjiDF9F8RUWbzyVpC/S6p6mMp0nv0XBln0dnJpP9/NKlJ9U4RcWO+R+E8OnDRfMhVuRTlI59losQeFyV9H1iL9IFAruqIiLLaf/dEEzVJr5CqmgQsCzxfW0T6TEZ2Ooa6eK4jfUE+TGrn+zgwI0rq1jifShebs/6edAv808C4iJjZ4fJ74kYvSbfX3vP8Y/YI6f8vswO9WixTgX2An5FuvPsg8K8RcXhJ5RebLd4VEesWlnWkimzIHKEPdOu9pDJvvd+edOddrbfFs0j9M5Sp603UImJ4GeUMwt6kzrk+HBGPShpHOovruJy4ToqI9/ezSkeTeS2MEspoxWv9GkXqPO+hbiTzQgwzJQ2PiFeAMyTdSuocqwzFM7P61mdLfB36uwZYFkBZCX0mMI7URwaklg1lfGFfExH/m5stdq2JmqRlgI+RzlZuA06P1Dd8V+Qkfi6wqaRdSa0+zi6p7FckjZe0VBfbPO/QpXLrbSip1uZapG59n8nPS2k0UPC8pKWA6ZK+TjpbKHMMiA0L//uyde9LR7o0HtJVLt2QT+03ZX5TyU1Jw1o9A+Xf8t4tki4gHY39DtgJeCAiPtXFeN5LOiKfQvrCbAN8PiIuLKn8s0l1tZcwv9VTaRcjbWGSxpOa9S5F6gd9NPD9Tld/ddOQS+iSvgp8PSKeytMrAp+NiC+VVH6x1UQtcewDfBy6f2W9LHV1pSPopx10ifHMAN4REY/n6T7g11HeEHRfaTQ/ujvoxhIrV4OdPUA1WCUNpSqXmp2KN61ExByl7mNLSegRcZ3SaDDvI91afB9w6pKSyAuKdaXzpK5X4Q6rJfNsNiWeXtcSd74vgoiYW1bZRUoDvryQ7xz+V2Ad0n0aS1Sf/T1SDVa6oZjQhyv1ZlfrmGpZoOPtfPOXY9/8eILUf4uipJFgelAv1ZUCXCnpKhZsfdT2EWH6ozSQwTnkMSvzBesPRkTZF8yvB7bJZ65Xk6oD9yb1ObSkuRf4vVKvoEtENdhQTOjnAr/R/P7JDwDOKqHcu0n1xbvW6uAklTk+YU/ppVYuSqcH3yFdz6j1NDk5In5eYhiTgc9ExLU5pu1I/V+XMnJTgSLi+dwG//sR8fUS703oNffkxzDSwOGVN6QSev7i/oR0x1mtd8Nja7dbd9gepLryayVdSbqBpOv1DJZOByRdnuv0uzVy1HK1ZJ5jmpKrP8omSVuSjshrN1b1zI9vmXqlGqxMQyqh131xy+yalHw36C/yl3R3Ur/Xb5B0CqkP8qvLjMcWMk3SptGBUWBadK+kL5OqXQA+QDrlL9uhpHbWP4+IOyWtCVzb5DWV1EPVYKUZiq1czgJO7uIXtxjLiqQLo3tHRK+0A14i5Ttm1yLdH1C7gzWivAEuVgSOZn6Vz+9IY3l2ZSBzA0l/AL5YVw321ShpAPNuGIoJvatfXOtNuc3xQsrqbbHbJF3KAHcfLin3RxRJmlHfbLXRvCoZUlUu2Tu7HYD1pJWBOyPiWXit1791mX9Hb0dogHFVodRE+s38dw/SwCc/ztP7km6uWRL1SjVYaYbcEXqNpDew4IhFf+9iONZluY+OTQp97AwDpnb6ZidJs4AHSc0lb6LuQnnZ9ydImhoRE5vNWxIsidVgQ+4IXdJuwP8Bq5B61BsP3EUaks6WXCr0aUO+saaM/fuNwDtIR8LvA34FnNfFC2/LSVoz8ri7ktYgDciyxMmJ+5Bux1GmIZfQSR3Fb0G6rXtjSW8jnUrZku1eSYeQBrqA1BVDx0+vcy9+V5JubFqalNinSDo6Ik7udPkNfDqXfy/pbGE8MKkLcXRdvhnwc8AECrkuIrbvVkydNuSqXGqnj7nvjo3zkVilL3RYc7kK7juk7o2DNGDyoXXdAXSq7KWBXUjJfAKpg67TI+LhTpc9QDzr5Mm7a3dVL2lyjjgVuAV4pTY/Oj++a9cMxSP0p/KNAtcD50p6nMJtvbZkyol7n7LLzb0srk/qZuDoiLij7Bjq4rmFNPzeeVWuK27RvIg4pflq1TEUj9CXI3UWP4x0N9xo4NyImN3VwKwrJP1Pvr39uzRothcRHa1DlfQqhUHCi4voQp82eZCTA0j9t0wlDVJd7DN/iSHpKNJ1tp8Dr52lRAmDqHfLkEvoRerwCNrW+yS9KyIulfShRssjoox+fnpObuWzK+mawiukxH5SlZNZPUn3NZgdEbFm6cGUZMgkdA0wgjbpdt5SuwIw61WSNiAdpe8MXEXq0G5rYL/IY1xaNQ2lhD6V+SNoT6ZuBO3owICr1vt66MaenpDr0J8i1aNfVLwgKuniiOh3bN4qyv25vIUF71kpZWjCbhhKCb30EbSt9/XajT3dVmyDvqTLo0htR0rol5OGSrwhIvbsZlydVOaAqYur9BG0bUh4I+nMbX3gJNJNPk9ExHVlJXNJ/yXpc5J6oVuK2ZJOlDQ1P/5P0uhuB9Ule5IGz340Ig4ANiSd4VfWUEroG0p6RtKzwAb5eW3637odnHVHRLwSEVdGxIdIN5zNJN1Yc3AZ5Uv6PulmnpWAY3PfId10OvAs8N78eIZ0QXRJ9EJEvArMy337PA6s3uWYOmrItEPvpRFyrLc0uLHnO6SmamXYFtgwj2H5OlJ/IceWVHYjb4qI9xSmj16CRyyaKmkMaeSoW4C5wB+7G1JnDZmEbtZID9zY83K+/Z889Fu3R7F6QdLWEXEDgKStWLiKsvIk9ZHOVoiIU/MoYytExG3djayzhsxFUbNGun1jj6QXgL8x/2Lsm0jVPl3pp1/SRqQxdkfnGJ4E9o+IGWXG0U2SDgK+ShpPdA1gUkQM2BqqKpzQzRZDHlhjoIElutKtc64zJiKe6Ub53STpDuBtETErD8F3bkRs2e24yuAqF7PFcwf9J/SXJN1DGgbtN50MQtJn+pkPQESc2Mnye8zLETELICLuzddYlghO6GaLISJG9bdM0nBS/f65+W8n9RsHS16z3tUkfae/6U7379NNTuhmHZIvls7IHYd1uqyj+1sm6dBOl99jPl83Xdnucuu5Dt2s4iT9PSLGdTsO67yhdGORmS2abjeltJI4oZtVn0/DlxCuQzergNwFRqPELWDZksOxLnEduplViqRvADMj4gd18z8KrBERh3Unss5zQjezSsl9wk+sH8ksj+J0W0R0uglp17gO3cyqZulGw1LmnhcrfYHYCd3MquYFSWvXz8zzKt1RmS+KmlnVHAlcIek45t9UNBE4HKj0TVauQzezysljiX6e+V0u3Al8IyJu715UneeEbmZWEa5DN7NKkbS2pDPz2KqrSbpC0lxJMyRt2u34OskJ3cyq5gzgD8A/gJtIIxeNBT4HnNzFuDrOVS5mVimSpkfERvn5zIhYq9GyKvIRuplVzauF5/UjNr1KhfkI3cwqRdLzzB/XtTbGK3l6zYhYrluxdZrboZtZ1azb7QC6xUfoZlZ5ksYCsxt1CVAlrkM3s0qRtIWkKZIulrSxpDtIg3k/JmnHbsfXST5CN7NKkTQVOAIYDUwGdoqIGyWtA5wXERt3NcAO8hG6mVXNiIi4OiJ+BjwaETcCRMTdXY6r45zQzaxqik0T63tXrHSVhKtczKxSJL0CPMf84feery0ClomIkd2KrdOc0M3MKsJVLmZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXhhG5mVhH/D3NhtZPd2pofAAAAAElFTkSuQmCC\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"id":"aBZM4_boe7DF"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_ner_CONLL_2003_5class_example.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","name":"python3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"NYQRU3pRO146"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/component_examples/named_entity_recognition_(NER)/NLU_ner_CONLL_2003_5class_example.ipynb)\n","\n","\n","Named entities are phrases that contain the names of persons, organizations, locations, times and quantities. Example:\n","
\n","
\n","[ORG **U.N.** ] official [PER **Ekeus** ] heads for [LOC **Baghdad** ] . \n","
\n","\n","https://www.aclweb.org/anthology/W03-0419.pdf \n","CoNLL-2003 is a NER dataset that available in English and German. NLU provides pretrained languages for both of these languages.\n","\n","It features **5 classes** of tags, **LOC (location)** , **ORG(Organisation)**, **PER(Persons)** and the forth which describes all the named entities which do not belong to any of the thre previously mentioned tags **(MISC)**. \n","The fifth class **(O)** is used for tokens which belong to no named entity.\n","\n","\n","\n","\n","\n","|Tag | \tDescription |\n","|------|--------------|\n","|PER | A person like **Jim** or **Joe** |\n","|ORG | An organisation like **Microsoft** or **PETA**|\n","|LOC | A location like **Germany**|\n","|MISC | Anything else like **Playstation** |\n","|O| Everything that is not an entity. | \n","\n","\n","The shared task of [CoNLL-2003 concerns](https://www.clips.uantwerpen.be/conll2003/) language-independent named entity recognition. We will concentrate on four types of named entities: persons, locations, organizations and names of miscellaneous entities that do not belong to the previous three groups. The participants of the shared task will be offered training and test data for two languages. They will use the data for developing a named-entity recognition system that includes a machine learning component. For each language, additional information (lists of names and non-annotated data) will be supplied as well. The challenge for the participants is to find ways of incorporating this information in their system.\n","\n","\n","\n","\n","\n","\n","\n","\n","\n"]},{"cell_type":"code","metadata":{"id":"M2-GiYL6xurJ","executionInfo":{"status":"ok","timestamp":1614376103147,"user_tz":-60,"elapsed":622,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}}},"source":["# import os\n","# ! apt-get update -qq > /dev/null \n","# # Install java\n","# ! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","# os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","# os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","# ! pip install nlu pyspark==2.4.7 > /dev/null\n"],"execution_count":1,"outputs":[]},{"cell_type":"code","metadata":{"id":"RaFQ13IDzHFH","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614376168385,"user_tz":-60,"elapsed":65851,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"e9a8747c-46b1-413a-c029-77cf75f2c155"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.3.4 \n","\n"],"execution_count":2,"outputs":[{"output_type":"stream","text":["Collecting pyspark==2.3.4\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/7d/0f/6ae297667dad605a3bccc5581a4a7e3b4e409235b17b6e91f1d80ba8a04a/pyspark-2.3.4.tar.gz (212.3MB)\n","\u001b[K |████████████████████████████████| 212.3MB 64kB/s \n","\u001b[?25hCollecting py4j==0.10.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e3/53/c737818eb9a7dc32a7cd4f1396e787bd94200c3997c72c1dbe028587bd76/py4j-0.10.7-py2.py3-none-any.whl (197kB)\n","\u001b[K |████████████████████████████████| 204kB 18.1MB/s \n","\u001b[?25hBuilding wheels for collected packages: pyspark\n"," Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for pyspark: filename=pyspark-2.3.4-py2.py3-none-any.whl size=212742814 sha256=eaeafb20427c3d270903d9b9125291a3ffaf02390a704a1f8cd6cdc87a95ea03\n"," Stored in directory: /root/.cache/pip/wheels/28/43/b9/dd30cad8012aa654f5688534e626e6f8b262d4425bb3530fb1\n","Successfully built pyspark\n","Installing collected packages: py4j, pyspark\n","Successfully installed py4j-0.10.7 pyspark-2.3.4\n","Looking in indexes: https://test.pypi.org/simple/, https://pypi.org/simple\n","Collecting nlu_test==1.1.3rc2\n","\u001b[?25l Downloading https://test-files.pythonhosted.org/packages/5c/84/241410ba610c9281afc8e1cffaa352f5ca83fe6e2574f1cfcdf3334dc81f/nlu_test-1.1.3rc2-py3-none-any.whl (158kB)\n","\u001b[K |████████████████████████████████| 163kB 5.3MB/s \n","\u001b[?25hCollecting dataclasses\n"," Downloading https://files.pythonhosted.org/packages/26/2f/1095cdc2868052dd1e64520f7c0d5c8c550ad297e944e641dbf1ffbb9a5d/dataclasses-0.6-py3-none-any.whl\n","Collecting spark-nlp<2.8,>=2.7.1\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/8d/a5/a5130215b43f3bd0e98bd16c471d36dafeab8855ca17789d4927337fa7dc/spark_nlp-2.7.4-py2.py3-none-any.whl (139kB)\n","\u001b[K |████████████████████████████████| 143kB 6.9MB/s \n","\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from nlu_test==1.1.3rc2) (1.1.5)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from nlu_test==1.1.3rc2) (1.19.5)\n","Requirement already satisfied: pyarrow>=0.16.0 in /usr/local/lib/python3.7/dist-packages (from nlu_test==1.1.3rc2) (3.0.0)\n","Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->nlu_test==1.1.3rc2) (2018.9)\n","Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->nlu_test==1.1.3rc2) (2.8.1)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->nlu_test==1.1.3rc2) (1.15.0)\n","Installing collected packages: dataclasses, spark-nlp, nlu-test\n","Successfully installed dataclasses-0.6 nlu-test-1.1.3rc2 spark-nlp-2.7.4\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"Gph8XOL1Pzpl"},"source":["# NLU makes NER easy. \n","\n","You just need to load the NER model via ner.load() and predict on some dataset. \n","It could be a pandas dataframe with a column named text or just an array of strings."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"pmpZSNvGlyZQ","executionInfo":{"status":"ok","timestamp":1614376217409,"user_tz":-60,"elapsed":114869,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"fb7aa88a-7854-48f0-f368-befdf4d48b9b"},"source":["import nlu \n","\n","example_text = [\"A person like Jim or Joe\", \n"," \"An organisation like Microsoft or PETA\",\n"," \"A location like Germany\",\n"," \"Anything else like Playstation\", \n"," \"Person consisting of multiple tokens like Angela Merkel or Donald Trump\",\n"," \"Organisations consisting of multiple tokens like JP Morgan\",\n"," \"Locations consiting of multiple tokens like Los Angeles\", \n"," \"Anything else made up of multiple tokens like Super Nintendo\",]\n","\n","nlu.load('ner').predict(example_text)"],"execution_count":3,"outputs":[{"output_type":"stream","text":["onto_recognize_entities_sm download started this may take some time.\n","Approx size to download 159 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
tokenentitiesner_confidenceembeddingsentities_confidence
origin_index
0A[Jim, Joe]0.999700[-0.2708599865436554, 0.04400600120425224, -0....[PERSON, PERSON]
0person[Jim, Joe]0.844200[0.3870899975299835, 0.3262900114059448, 0.645...[PERSON, PERSON]
0like[Jim, Joe]0.973300[-0.2687000036239624, 0.817080020904541, 0.698...[PERSON, PERSON]
0Jim[Jim, Joe]0.996600[-0.5946000218391418, 0.24015000462532043, 0.8...[PERSON, PERSON]
0or[Jim, Joe]0.997800[0.31038999557495117, 0.6485900282859802, 0.28...[PERSON, PERSON]
0Joe[Jim, Joe]0.678500[-0.008324000053107738, -0.22608999907970428, ...[PERSON, PERSON]
1An[Microsoft, PETA]0.999600[-0.4214000105857849, -0.18796999752521515, 0....[ORG, ORG]
1organisation[Microsoft, PETA]0.828700[0.2066899985074997, 0.11495999991893768, -0.7...[ORG, ORG]
1like[Microsoft, PETA]0.995100[-0.2687000036239624, 0.817080020904541, 0.698...[ORG, ORG]
1Microsoft[Microsoft, PETA]0.998100[0.292959988117218, -0.2003300040960312, 0.647...[ORG, ORG]
1or[Microsoft, PETA]0.999900[0.31038999557495117, 0.6485900282859802, 0.28...[ORG, ORG]
1PETA[Microsoft, PETA]0.981300[0.23658999800682068, 0.09654799848794937, -0....[ORG, ORG]
2A[Germany]0.999700[-0.2708599865436554, 0.04400600120425224, -0....[GPE]
2location[Germany]0.834800[-0.15413999557495117, -0.4418500065803528, 0....[GPE]
2like[Germany]0.879700[-0.2687000036239624, 0.817080020904541, 0.698...[GPE]
2Germany[Germany]0.960200[0.6208900213241577, 0.7105100154876709, 0.495...[GPE]
3Anything[Playstation]0.997300[-0.029784999787807465, 0.08645900338888168, 0...[PRODUCT]
3else[Playstation]0.865000[0.07139399647712708, 0.5581200122833252, 1.03...[PRODUCT]
3like[Playstation]0.816900[-0.2687000036239624, 0.817080020904541, 0.698...[PRODUCT]
3Playstation[Playstation]0.681800[-0.13797999918460846, -0.2239599972963333, 1....[PRODUCT]
4Person[Angela Merkel, Donald Trump]0.999500[0.3870899975299835, 0.3262900114059448, 0.645...[PERSON, PERSON]
4consisting[Angela Merkel, Donald Trump]0.670700[-0.8197299838066101, 0.6228200197219849, 0.21...[PERSON, PERSON]
4of[Angela Merkel, Donald Trump]0.956800[-0.15289999544620514, -0.24278999865055084, 0...[PERSON, PERSON]
4multiple[Angela Merkel, Donald Trump]0.700400[-0.16572999954223633, 0.5437099933624268, -0....[PERSON, PERSON]
4tokens[Angela Merkel, Donald Trump]0.530900[0.1645199954509735, 0.6764900088310242, -0.50...[PERSON, PERSON]
4like[Angela Merkel, Donald Trump]0.988500[-0.2687000036239624, 0.817080020904541, 0.698...[PERSON, PERSON]
4Angela[Angela Merkel, Donald Trump]0.959800[-0.563759982585907, 0.26958999037742615, 0.35...[PERSON, PERSON]
4Merkel[Angela Merkel, Donald Trump]0.987700[-1.000499963760376, 0.41997000575065613, 0.59...[PERSON, PERSON]
4or[Angela Merkel, Donald Trump]0.999900[0.31038999557495117, 0.6485900282859802, 0.28...[PERSON, PERSON]
4Donald[Angela Merkel, Donald Trump]0.947000[-0.5496799945831299, -0.488319993019104, 0.59...[PERSON, PERSON]
4Trump[Angela Merkel, Donald Trump]0.859300[-0.15730999410152435, -0.7550299763679504, 0....[PERSON, PERSON]
5Organisations[JP Morgan]0.999600[-0.19327999651432037, 0.6523399949073792, -1....[ORG]
5consisting[JP Morgan]0.952000[-0.8197299838066101, 0.6228200197219849, 0.21...[ORG]
5of[JP Morgan]0.995200[-0.15289999544620514, -0.24278999865055084, 0...[ORG]
5multiple[JP Morgan]0.971600[-0.16572999954223633, 0.5437099933624268, -0....[ORG]
5tokens[JP Morgan]0.953100[0.1645199954509735, 0.6764900088310242, -0.50...[ORG]
5like[JP Morgan]0.994100[-0.2687000036239624, 0.817080020904541, 0.698...[ORG]
5JP[JP Morgan]0.816400[-0.4920400083065033, 0.42118000984191895, -0....[ORG]
5Morgan[JP Morgan]0.386300[0.017304999753832817, -0.045906998217105865, ...[ORG]
6Locations[Los Angeles]0.999300[0.06345599889755249, -0.042142000049352646, 0...[GPE]
6consiting[Los Angeles]0.868300[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...[GPE]
6of[Los Angeles]0.958900[-0.15289999544620514, -0.24278999865055084, 0...[GPE]
6multiple[Los Angeles]0.675400[-0.16572999954223633, 0.5437099933624268, -0....[GPE]
6tokens[Los Angeles]0.822400[0.1645199954509735, 0.6764900088310242, -0.50...[GPE]
6like[Los Angeles]0.893900[-0.2687000036239624, 0.817080020904541, 0.698...[GPE]
6Los[Los Angeles]0.741900[0.9132099747657776, -0.6833699941635132, 0.43...[GPE]
6Angeles[Los Angeles]0.769100[0.7376400232315063, -0.31266000866889954, 0.4...[GPE]
7Anything[Super Nintendo]0.999800[-0.029784999787807465, 0.08645900338888168, 0...[PRODUCT]
7else[Super Nintendo]0.945700[0.07139399647712708, 0.5581200122833252, 1.03...[PRODUCT]
7made[Super Nintendo]0.700100[-0.19820000231266022, -0.28404998779296875, 0...[PRODUCT]
7up[Super Nintendo]0.828700[0.21468999981880188, 0.4336700141429901, 0.33...[PRODUCT]
7of[Super Nintendo]0.810000[-0.15289999544620514, -0.24278999865055084, 0...[PRODUCT]
7multiple[Super Nintendo]0.627900[-0.16572999954223633, 0.5437099933624268, -0....[PRODUCT]
7tokens[Super Nintendo]0.483400[0.1645199954509735, 0.6764900088310242, -0.50...[PRODUCT]
7like[Super Nintendo]0.920500[-0.2687000036239624, 0.817080020904541, 0.698...[PRODUCT]
7Super[Super Nintendo]0.320700[-0.4453299939632416, -0.4496900141239166, 1.2...[PRODUCT]
7Nintendo[Super Nintendo]0.446900[0.25946998596191406, -0.5758200287818909, 1.3...[PRODUCT]
\n","
"],"text/plain":[" token ... entities_confidence\n","origin_index ... \n","0 A ... [PERSON, PERSON]\n","0 person ... [PERSON, PERSON]\n","0 like ... [PERSON, PERSON]\n","0 Jim ... [PERSON, PERSON]\n","0 or ... [PERSON, PERSON]\n","0 Joe ... [PERSON, PERSON]\n","1 An ... [ORG, ORG]\n","1 organisation ... [ORG, ORG]\n","1 like ... [ORG, ORG]\n","1 Microsoft ... [ORG, ORG]\n","1 or ... [ORG, ORG]\n","1 PETA ... [ORG, ORG]\n","2 A ... [GPE]\n","2 location ... [GPE]\n","2 like ... [GPE]\n","2 Germany ... [GPE]\n","3 Anything ... [PRODUCT]\n","3 else ... [PRODUCT]\n","3 like ... [PRODUCT]\n","3 Playstation ... [PRODUCT]\n","4 Person ... [PERSON, PERSON]\n","4 consisting ... [PERSON, PERSON]\n","4 of ... [PERSON, PERSON]\n","4 multiple ... [PERSON, PERSON]\n","4 tokens ... [PERSON, PERSON]\n","4 like ... [PERSON, PERSON]\n","4 Angela ... [PERSON, PERSON]\n","4 Merkel ... [PERSON, PERSON]\n","4 or ... [PERSON, PERSON]\n","4 Donald ... [PERSON, PERSON]\n","4 Trump ... [PERSON, PERSON]\n","5 Organisations ... [ORG]\n","5 consisting ... [ORG]\n","5 of ... [ORG]\n","5 multiple ... [ORG]\n","5 tokens ... [ORG]\n","5 like ... [ORG]\n","5 JP ... [ORG]\n","5 Morgan ... [ORG]\n","6 Locations ... [GPE]\n","6 consiting ... [GPE]\n","6 of ... [GPE]\n","6 multiple ... [GPE]\n","6 tokens ... [GPE]\n","6 like ... [GPE]\n","6 Los ... [GPE]\n","6 Angeles ... [GPE]\n","7 Anything ... [PRODUCT]\n","7 else ... [PRODUCT]\n","7 made ... [PRODUCT]\n","7 up ... [PRODUCT]\n","7 of ... [PRODUCT]\n","7 multiple ... [PRODUCT]\n","7 tokens ... [PRODUCT]\n","7 like ... [PRODUCT]\n","7 Super ... [PRODUCT]\n","7 Nintendo ... [PRODUCT]\n","\n","[57 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"qgGdEUgkMika","executionInfo":{"status":"ok","timestamp":1614376226664,"user_tz":-60,"elapsed":124119,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"da600ecd-8f40-440f-fe0a-035b7e878896"},"source":["text = [\"Barclays misled shareholders and the public about one of the biggest investments in the bank's history, a BBC Panorama investigation has found.\",\n","\"The bank announced in 2008 that Manchester City owner Sheikh Mansour had agreed to invest more than £3bn.\",\n","\"But the BBC found that the money, which helped Barclays avoid a bailout by British taxpayers, actually came from the Abu Dhabi government.\",\n","\"Barclays said the mistake in its accounts was 'a drafting error'.\",\n","\"Unlike RBS and Lloyds TSB, Barclays narrowly avoided having to request a government bailout late in 2008 after it was rescued by £7bn worth of new investment, most of which came from the Gulf states of Qatar and Abu Dhabi.\",\n","\"The S&P 500's price to earnings multiple is 71% higher than Apple's, and if Apple were simply valued at the same multiple, its share price would be $840, which is 52% higher than its current price.\",\n","\"Alice has a cat named Alice and also a dog named Alice and also a parrot named Alice, it is her favorite name!\"\n","] + example_text\n","ner_df = nlu.load('ner').predict(text, output_level= 'chunk')"],"execution_count":4,"outputs":[{"output_type":"stream","text":["onto_recognize_entities_sm download started this may take some time.\n","Approx size to download 159 MB\n","[OK!]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"5nhKQZPpSRxv","executionInfo":{"status":"ok","timestamp":1614376229431,"user_tz":-60,"elapsed":126880,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"3f2dff25-64af-47ae-9b0e-59e12d3e0ee2"},"source":["ner_df"],"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
entitiesentities_classner_confidenceembeddings
origin_index
0BarclaysORG[0.9980999827384949, 1.0, 0.9994999766349792, ...[[0.044123999774456024, -0.47940999269485474, ...
0about oneCARDINAL[0.9980999827384949, 1.0, 0.9994999766349792, ...[[0.044123999774456024, -0.47940999269485474, ...
0BBC PanoramaORG[0.9980999827384949, 1.0, 0.9994999766349792, ...[[0.044123999774456024, -0.47940999269485474, ...
12008DATE[0.9997000098228455, 0.848800003528595, 0.9908...[[-0.03819400072097778, -0.24487000703811646, ...
1Manchester CityGPE[0.9997000098228455, 0.848800003528595, 0.9908...[[-0.03819400072097778, -0.24487000703811646, ...
1Sheikh MansourPERSON[0.9997000098228455, 0.848800003528595, 0.9908...[[-0.03819400072097778, -0.24487000703811646, ...
1more than £3bnMONEY[0.9997000098228455, 0.848800003528595, 0.9908...[[-0.03819400072097778, -0.24487000703811646, ...
2BBCORG[0.9998999834060669, 0.9825999736785889, 1.0, ...[[-0.05707800015807152, 0.3987399935722351, 0....
2BarclaysORG[0.9998999834060669, 0.9825999736785889, 1.0, ...[[-0.05707800015807152, 0.3987399935722351, 0....
2BritishNORP[0.9998999834060669, 0.9825999736785889, 1.0, ...[[-0.05707800015807152, 0.3987399935722351, 0....
2Abu DhabiGPE[0.9998999834060669, 0.9825999736785889, 1.0, ...[[-0.05707800015807152, 0.3987399935722351, 0....
3BarclaysORG[0.9987999796867371, 1.0, 0.9980000257492065, ...[[0.044123999774456024, -0.47940999269485474, ...
4RBSORG[0.9997000098228455, 0.9998999834060669, 0.999...[[-0.32710000872612, 0.4879100024700165, 0.416...
4Lloyds TSBORG[0.9997000098228455, 0.9998999834060669, 0.999...[[-0.32710000872612, 0.4879100024700165, 0.416...
4BarclaysORG[0.9997000098228455, 0.9998999834060669, 0.999...[[-0.32710000872612, 0.4879100024700165, 0.416...
42008DATE[0.9997000098228455, 0.9998999834060669, 0.999...[[-0.32710000872612, 0.4879100024700165, 0.416...
47bnMONEY[0.9997000098228455, 0.9998999834060669, 0.999...[[-0.32710000872612, 0.4879100024700165, 0.416...
4GulfLOC[0.9997000098228455, 0.9998999834060669, 0.999...[[-0.32710000872612, 0.4879100024700165, 0.416...
4QatarGPE[0.9997000098228455, 0.9998999834060669, 0.999...[[-0.32710000872612, 0.4879100024700165, 0.416...
4Abu DhabiGPE[0.9997000098228455, 0.9998999834060669, 0.999...[[-0.32710000872612, 0.4879100024700165, 0.416...
5S&PORG[0.9994999766349792, 0.9878000020980835, 0.865...[[-0.03819400072097778, -0.24487000703811646, ...
5500'sDATE[0.9994999766349792, 0.9878000020980835, 0.865...[[-0.03819400072097778, -0.24487000703811646, ...
571%PERCENT[0.9994999766349792, 0.9878000020980835, 0.865...[[-0.03819400072097778, -0.24487000703811646, ...
5AppleORG[0.9994999766349792, 0.9878000020980835, 0.865...[[-0.03819400072097778, -0.24487000703811646, ...
5AppleORG[0.9994999766349792, 0.9878000020980835, 0.865...[[-0.03819400072097778, -0.24487000703811646, ...
5$840CARDINAL[0.9994999766349792, 0.9878000020980835, 0.865...[[-0.03819400072097778, -0.24487000703811646, ...
552%PERCENT[0.9994999766349792, 0.9878000020980835, 0.865...[[-0.03819400072097778, -0.24487000703811646, ...
6AlicePERSON[0.9970999956130981, 0.9984999895095825, 0.922...[[0.28501999378204346, -0.4355500042438507, 0....
6AlicePERSON[0.9970999956130981, 0.9984999895095825, 0.922...[[0.28501999378204346, -0.4355500042438507, 0....
6AlicePERSON[0.9970999956130981, 0.9984999895095825, 0.922...[[0.28501999378204346, -0.4355500042438507, 0....
6AlicePERSON[0.9970999956130981, 0.9984999895095825, 0.922...[[0.28501999378204346, -0.4355500042438507, 0....
7JimPERSON[0.9997000098228455, 0.8442000150680542, 0.973...[[-0.2708599865436554, 0.04400600120425224, -0...
7JoePERSON[0.9997000098228455, 0.8442000150680542, 0.973...[[-0.2708599865436554, 0.04400600120425224, -0...
8MicrosoftORG[0.9995999932289124, 0.8287000060081482, 0.995...[[-0.4214000105857849, -0.18796999752521515, 0...
8PETAORG[0.9995999932289124, 0.8287000060081482, 0.995...[[-0.4214000105857849, -0.18796999752521515, 0...
9GermanyGPE[0.9997000098228455, 0.8348000049591064, 0.879...[[-0.2708599865436554, 0.04400600120425224, -0...
10PlaystationPRODUCT[0.9973000288009644, 0.8650000095367432, 0.816...[[-0.029784999787807465, 0.08645900338888168, ...
11Angela MerkelPERSON[0.9994999766349792, 0.6707000136375427, 0.956...[[0.3870899975299835, 0.3262900114059448, 0.64...
11Donald TrumpPERSON[0.9994999766349792, 0.6707000136375427, 0.956...[[0.3870899975299835, 0.3262900114059448, 0.64...
12JP MorganORG[0.9995999932289124, 0.9520000219345093, 0.995...[[-0.19327999651432037, 0.6523399949073792, -1...
13Los AngelesGPE[0.9993000030517578, 0.8683000206947327, 0.958...[[0.06345599889755249, -0.042142000049352646, ...
14Super NintendoPRODUCT[0.9998000264167786, 0.9456999897956848, 0.700...[[-0.029784999787807465, 0.08645900338888168, ...
\n","
"],"text/plain":[" entities ... embeddings\n","origin_index ... \n","0 Barclays ... [[0.044123999774456024, -0.47940999269485474, ...\n","0 about one ... [[0.044123999774456024, -0.47940999269485474, ...\n","0 BBC Panorama ... [[0.044123999774456024, -0.47940999269485474, ...\n","1 2008 ... [[-0.03819400072097778, -0.24487000703811646, ...\n","1 Manchester City ... [[-0.03819400072097778, -0.24487000703811646, ...\n","1 Sheikh Mansour ... [[-0.03819400072097778, -0.24487000703811646, ...\n","1 more than £3bn ... [[-0.03819400072097778, -0.24487000703811646, ...\n","2 BBC ... [[-0.05707800015807152, 0.3987399935722351, 0....\n","2 Barclays ... [[-0.05707800015807152, 0.3987399935722351, 0....\n","2 British ... [[-0.05707800015807152, 0.3987399935722351, 0....\n","2 Abu Dhabi ... [[-0.05707800015807152, 0.3987399935722351, 0....\n","3 Barclays ... [[0.044123999774456024, -0.47940999269485474, ...\n","4 RBS ... [[-0.32710000872612, 0.4879100024700165, 0.416...\n","4 Lloyds TSB ... [[-0.32710000872612, 0.4879100024700165, 0.416...\n","4 Barclays ... [[-0.32710000872612, 0.4879100024700165, 0.416...\n","4 2008 ... [[-0.32710000872612, 0.4879100024700165, 0.416...\n","4 7bn ... [[-0.32710000872612, 0.4879100024700165, 0.416...\n","4 Gulf ... [[-0.32710000872612, 0.4879100024700165, 0.416...\n","4 Qatar ... [[-0.32710000872612, 0.4879100024700165, 0.416...\n","4 Abu Dhabi ... [[-0.32710000872612, 0.4879100024700165, 0.416...\n","5 S&P ... [[-0.03819400072097778, -0.24487000703811646, ...\n","5 500's ... [[-0.03819400072097778, -0.24487000703811646, ...\n","5 71% ... [[-0.03819400072097778, -0.24487000703811646, ...\n","5 Apple ... [[-0.03819400072097778, -0.24487000703811646, ...\n","5 Apple ... [[-0.03819400072097778, -0.24487000703811646, ...\n","5 $840 ... [[-0.03819400072097778, -0.24487000703811646, ...\n","5 52% ... [[-0.03819400072097778, -0.24487000703811646, ...\n","6 Alice ... [[0.28501999378204346, -0.4355500042438507, 0....\n","6 Alice ... [[0.28501999378204346, -0.4355500042438507, 0....\n","6 Alice ... [[0.28501999378204346, -0.4355500042438507, 0....\n","6 Alice ... [[0.28501999378204346, -0.4355500042438507, 0....\n","7 Jim ... [[-0.2708599865436554, 0.04400600120425224, -0...\n","7 Joe ... [[-0.2708599865436554, 0.04400600120425224, -0...\n","8 Microsoft ... [[-0.4214000105857849, -0.18796999752521515, 0...\n","8 PETA ... [[-0.4214000105857849, -0.18796999752521515, 0...\n","9 Germany ... [[-0.2708599865436554, 0.04400600120425224, -0...\n","10 Playstation ... [[-0.029784999787807465, 0.08645900338888168, ...\n","11 Angela Merkel ... [[0.3870899975299835, 0.3262900114059448, 0.64...\n","11 Donald Trump ... [[0.3870899975299835, 0.3262900114059448, 0.64...\n","12 JP Morgan ... [[-0.19327999651432037, 0.6523399949073792, -1...\n","13 Los Angeles ... [[0.06345599889755249, -0.042142000049352646, ...\n","14 Super Nintendo ... [[-0.029784999787807465, 0.08645900338888168, ...\n","\n","[42 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"STc7iOwtljGo"},"source":["## Lets explore our data which the predicted NER tags and visalize them! \n","\n","We specify [1:] so we dont se the count for the O-tag wich is the most common, since most words in a sentence are not named entities and thus not part of a chunk"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":368},"id":"UDSAYjadlfdK","executionInfo":{"status":"ok","timestamp":1614376229735,"user_tz":-60,"elapsed":127177,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"cd41bd9c-d283-45f0-ae03-5a9c84bc5d6d"},"source":["ner_df['entities'].value_counts()[1:].plot.bar(title='Occurence of Named Entity tokens in dataset')"],"execution_count":6,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":6},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":308},"id":"rlcEvP9tOSiy","executionInfo":{"status":"ok","timestamp":1614376390216,"user_tz":-60,"elapsed":575,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"86d229d3-3847-434c-cbce-eac44c324737"},"source":["ner_type_to_viz = 'LOC'\n","ner_df[ner_df.entities_class == ner_type_to_viz]['entities'].value_counts().plot.bar(title='Most often occuring LOC labeled tokens in the dataset')"],"execution_count":10,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":10},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"id":"ks6NDXg7RXG3","colab":{"base_uri":"https://localhost:8080/","height":361},"executionInfo":{"status":"ok","timestamp":1614376396753,"user_tz":-60,"elapsed":579,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"2632ce57-f063-4bb3-c1c9-d2325134329b"},"source":["ner_type_to_viz = 'ORG'\n","ner_df[ner_df.entities_class == ner_type_to_viz]['entities'].value_counts().plot.bar(title='Most often occuring ORG labeled tokens in the dataset')"],"execution_count":11,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":11},{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAXQAAAFICAYAAABA2wWFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deZhcVZnH8e8vC4sQEiTtyJYEhBGEYTNswzIIOrIJI4KAioJgdBQRtxFQkW0El0FRFIyyKgIKqICsIgFRQUIIm6CGTUCWEMISVgPv/HFOJzeV6q7qdNWt6tu/z/PU03WXuvftqltvnXvuuecoIjAzs6FvRKcDMDOz1nBCNzOrCCd0M7OKcEI3M6sIJ3Qzs4pwQjczqwgn9CZJerekhyTNk7Rxp+NpN0mnSvpyp+NoJUn7S7qhyXWPkvSTJdxPR15bZ1vbSXq4Fdtawv2/X9JVLdzeNEkHtWp7Nds+U9Jx7dh2mbomoUt6QNIrksbXzL9VUkiaNMjth6S1BrGJbwIHR8TywNy8vVGDiambRcTHIuLYdmxb0tKSjpf0d0kvSvqbpM9LUmGdaZJeyj+gT0q6SNLKNdtZW9J5kmZLejZv57uSVmtH3J3W6QQ9UBFxTkT855K8tpU/bK3Wzh+Wwe6naxJ6dj+wb++EpH8DXte5cBYxEbir00GUQdLINu/i58AOwM7AGGA/YApwUs16vT+gawHLk35Ue2NcC7gJ+AewcUSsAGwF3Ats3eb4zbpTRHTFA3gA+BJwc2HeN4EvAgFMyvPGAmcDs4EH82tG5GVrAdcBzwBPAufn+dfnbTwPzAP2rrP/EXlbDwJP5H2MBZbOr+l9/b3A3/P0vPzYMm/jw8DdwFzgSmBiYfsBfAz4G/A08D1AfbwXSwPfJiWrf+TnSxeW7w7MBJ7N8eyY578eOCO/Zi7wyzx/f+CGmn0EsFZ+fiZwCnBZ/h/fnucdl5dvBzwMfDa/N48CBxS2tRJwSY7nZuC42v0V1t0BeAlYvWb+5sCrhZimAQcVln8cuKsw/RPgkgEeY4u8D6QfkIdy3LcA2xSWHQVcAJwPPAfMADYsLF8FuJB0HN4PHFLz2p8UprcA/pA/99uA7QrL1iAds88BVwMnF19bWG854EXgNRYed6v0d6z0fm6FbRwC/BlYLb/um6Rj+XHgVGDZJj/vnfN2ngMeAT7X5Pvd1HcA2BF4Bfhn/j9vKxwTxwK/z/u+ChjfzPtcZx8b58/0ufwZn8fC431F4NL82c7Nz1fLy/6XdJy+lGM7uYljaTNgel72OHBio5j72k/DY3ywibhVD1JCfzvwF2BdYGQ+qCayaEI/G/gVqWQ3CfgrcGBedi7pB2AEsAywdb0E1sf+PwzMAtYklQYvAn5c7/V5vwGMKizfPb9+XWAU6cfhDzWvvxQYB0zIB8uOfcRyDHAj8AagJ3/gxxYOjmeAd+T/c1Vgnbzs1/ngXBEYDfxHvS9Wnf/nzLzNrQrv3ZksmtDn57hGk77QLwAr5uXn5cfrgLeQDuy+EvoJwHV9LHsQ+Gjhy3tQfr4S8BvgV4V1HwP2H+Axtsj7AHwgb3sUKXk9BiyTlx1FSih75v/5c6TEPTq/R7cARwJL5WPmPuCdhdf+JD9fFZiT37MR+XObA/Tk5X8ETiQl2G1JCWaxhF74HB6umdffsbJg/RzrjMJ+vwVcTCoEjCH9IB/f5Of9KDlhkY61TZp8vwfyHVjwHhbmTSMVYP4VWDZPn9DM+1yznaVIx9qn8/+3Z/6se4/3lYD3kI7nMaQzyl/WxHFQzTb7O5b+COyXny8PbNHksbHYfhoe4wNZuZ0PFib0LwHHk36lr85vUJCS6EjSL/dbCq/7KDAtPz8bmEr+Na3ZfqOEfg3w8cL0m/OHPKr29dRP6JeTf1jy9AjSl2Bi4fXFH5ifAYf1Ecu9wM6F6XcCD+TnPwC+Vec1K5NKbys2+mLV+X/OBM6uWX4miyb0F2v+3ydIpYuR+X16c2FZfyX0HwHn9bHsRuCLhYP5BdIPTZDOSCYU1p1PIRkAB5NKOfOAH/ax/cXeh5rlc8mlcFJCubHm83wU2IZ0NvH3mtceDpxReG1vQv8ChYJBnncl8CFSUpsPLFdY9lMGltD7O1a2I5WgTwRuAMbm+SKdib2p8Lotgfsbfd75+d9J37sVGnynF3m/Gdh3YMF7WJg3DfhSYfrjwBWN3uc6296WdDajwrw/kI/3OutvBMytiaPfRFtzLF0PHE3hbKKZmJvZT+2j2+rQAX4MvI90MJxds2w86Rf1wcK8B0m/dAD/QzpY/yTpLkkfHsB+V6mz3VHAvzT5+onASZKelvQ08FSOZdXCOo8Vnr9A+rVuNpZV8vPVSV/iWqsDT0XE3CbjrfVQg+VzImJ+Ybo3/h7S+1R8fX/bepL041PPynl5r0MiYiywAakkWLzYOae4nYg4OSLGkaocRvf/rySSPifpbknP5M9sLOkYW+z/iIjXSGeMq5A+61V6P+v82iOof6xMBPaqWXfrHPsqpETxfGH9B+tsoz/9HSuQSsNTSKXvZ/K8HlLp85ZCTFfk+b36+rwhlV53Bh6UdJ2kLQcQb7PfgYG+vr/3udYqwCORs2a24D2U9DpJP5D0oKRnSQl5XH/XlhocSweSzirukXSzpF2XIOamdF1Cj4gHSae2O5OqPYqeJJUGJxbmTSCVQoiIxyLiIxGxCqkE8f0BtGz5R53tzifVeS0WZp15D5GqC8YVHstGxB+a3H+jWP5R2M+b+tj/6yWNq7PseQoXlyW9sc469f6nZswmvU/FZLt6P+v/Bthc0iLrSNo8v+63iwUWcQep1P+9QkuYa4A9ljBmJG1DKgC8l3RWM450NqDCaqsX1h9B+h//QXqv76/5rMdExM51dvUQqRRWXHe5iDiBVOJfUdJyhfUn9BN2vc+ov2MFUklxV+AMSVvleU+SSuDrFWIaG+kCdEMRcXNE7E6q5vklqaTdagM9Hvt7n2s9CqxabFXFou/7Z0ln6JtHuti+bZ7fu/4isTU6liLibxGxL+n9+hpwQf7MG8U84O9k1yX07EBg+5qSCxHxKung+V9JYyRNBD5DukCGpL0KTdbmkt6Q1/L046S6zr6cC3xa0hqSlge+SrqoOr/OurPzdovbOxU4XNJ6OZaxkvZq+j9ePJYvSerJzTiPJP+PwGnAAZJ2kDRC0qqS1omIR0nVPt+XtKKk0ZJ6D8TbgPUkbSRpGdLpbEvkz+Qi4KhcslkH+GA/6/+GlIwvlLSepJGStsj/3ykR8bc+XnoWqQS8W54+CthG0omSVgXI79W6TYY+hvRDNBsYJelIYIWadd4qaY/cPPVQ4GVStdCfgOckfUHSsvl/WF/SpnX28xPgXZLemddbJjc/XC0XXqYDR0taStLWwLv6iflxYCVJYwvz+jtWAIiIacD7gYskbZbPNn4IfEvSGwDycfTORm9ajvP9ksZGxD9JF/pea/S6JfA4MCn/kDajz/e5zrp/JH32h+TvyR6ka1O9xpB+8J6W9HrgK3ViW7Nm/T6PJUkfkNST3/en8+zXmoi5Uc5aTFcm9Ii4NyKm97H4k6QS532kesGfAqfnZZsCN0maR7rg86mIuC8vOwo4K5/avLfOdk8nVfdcTzpDeCnvq158L5CuQv8+b2+LiPgF6df3vHyadiew0wD+7aLjSF/024E7SBezjsv7/hNwAOmi1jOkFhK9JbT9SGcw95DqPA/Nr/kr6QLXb0gtDJq6uWYADiadYj5Geg/PJSW/vrwHuJZ0mj+PdGCfRh/vN0BEvEJqSfDlPP1XUl32asBtkp4jtX74R+86DVyZ9/9X0un2SyxeVfQrYG9S4WA/YI+I+Gf+EduVVLd6P6nE+6P8HtTG/RDpgvkRpC/8Q8DnWfjde1/+P54iJY7aasbitu4hvbf35eNuFfo5VmpeezXpwv8lkjYh1d/OAm7Mx+tvSKXSZuwHPJBf9zHSj0Wr/Tz/nSNpRqOVm3ifi+u+Qjq725/0vu/NorUB3yZddH2S9AN+Rc0mTgL2lDRX0ndofCztCNyV89JJwD4R8WITMdfupyEtWo1kNniSvga8MSI+1OlYzIaTriyh29AiaR1JGyjZjFRl9otOx2U23FT21nUr1RhSVcAqpHq//yNVV5hZiVzlYmZWEa5yMTOrCCd0M7OK6Fgd+vjx42PSpEmd2r2Z2ZB0yy23PBkRPfWWdSyhT5o0ienT+2pqbmZm9Ujqs3sIV7mYmVWEE7qZWUU4oZuZVYQTuplZRTihm5lVRNMJPXfveKukS+ssW1rS+ZJmSbpJ0qRWBmlmZo0NpIT+KdIAyPUcSBp5ZS1St65fG2xgZmY2ME0l9Nzh+i6kPp/r2Z00AAGkkdJ3qBkNxMzM2qzZG4u+TRpiaUwfy1cld+geEfMlPUMaAbs4PiSSppDGN2TChP5G2komHfbrJsOr74ETdhnU683MhpKGJXSlAU2fiIhbBruziJgaEZMjYnJPT907V83MbAk1U+WyFbCbpAeA84DtJf2kZp1HyAPq5vEXx5JGZTczs5I0TOgRcXhErBYRk4B9gN9GxAdqVrsY6B1ubM+8jjtaNzMr0RJ3ziXpGGB6RFxMGuD3x5JmkQZd3adF8ZmZWZMGlNAjYhowLT8/sjD/JWCvVgZmZmYD4ztFzcwqwgndzKwinNDNzCrCCd3MrCKc0M3MKsIJ3cysIpzQzcwqwgndzKwinNDNzCrCCd3MrCKc0M3MKsIJ3cysIpzQzcwqwgndzKwinNDNzCrCCd3MrCKaGSR6GUl/knSbpLskHV1nnf0lzZY0Mz8Oak+4ZmbWl2ZGLHoZ2D4i5kkaDdwg6fKIuLFmvfMj4uDWh2hmZs1omNDzYM/z8uTo/PAA0GZmXaapOnRJIyXNBJ4Aro6Im+qs9h5Jt0u6QNLqLY3SzMwaaiqhR8SrEbERsBqwmaT1a1a5BJgUERsAVwNn1duOpCmSpkuaPnv27MHEbWZmNQbUyiUingauBXasmT8nIl7Okz8C3trH66dGxOSImNzT07Mk8ZqZWR+aaeXSI2lcfr4s8A7gnpp1Vi5M7gbc3cogzcyssWZauawMnCVpJOkH4GcRcamkY4DpEXExcIik3YD5wFPA/u0K2MzM6mumlcvtwMZ15h9ZeH44cHhrQzMzs4HwnaJmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV0cyYostI+pOk2yTdJenoOussLel8SbMk3SRpUjuCNTOzvjVTQn8Z2D4iNgQ2AnaUtEXNOgcCcyNiLeBbwNdaG6aZmTXSMKFHMi9Pjs6PqFltd+Cs/PwCYAdJalmUZmbWUFN16JJGSpoJPAFcHRE31ayyKvAQQETMB54BVmploGZm1r+mEnpEvBoRGwGrAZtJWn9JdiZpiqTpkqbPnj17STZhZmZ9GFArl4h4GrgW2LFm0SPA6gCSRgFjgTl1Xj81IiZHxOSenp4li9jMzOpqppVLj6Rx+fmywDuAe2pWuxj4UH6+J/DbiKitZzczszYa1cQ6KwNnSRpJ+gH4WURcKukYYHpEXAycBvxY0izgKWCftkVsZmZ1NUzoEXE7sHGd+UcWnr8E7NXa0MzMbCB8p6iZWUU4oZuZVYQTuplZRTihm5lVhBO6mVlFOKGbmVWEE7qZWUU4oZuZVYQTuplZRTihm5lVhBO6mVlFOKGbmVWEE7qZWUU4oZuZVYQTuplZRTihm5lVhBO6mVlFNDOm6OqSrpX0Z0l3SfpUnXW2k/SMpJn5cWS9bZmZWfs0M6bofOCzETFD0hjgFklXR8Sfa9b7XUTs2voQzcysGQ1L6BHxaETMyM+fA+4GVm13YGZmNjADqkOXNIk0YPRNdRZvKek2SZdLWq8FsZmZ2QA0U+UCgKTlgQuBQyPi2ZrFM4CJETFP0s7AL4G162xjCjAFYMKECUsctJmZLa6pErqk0aRkfk5EXFS7PCKejYh5+fllwGhJ4+usNzUiJkfE5J6enkGGbmZmRc20chFwGnB3RJzYxzpvzOshabO83TmtDNTMzPrXTJXLVsB+wB2SZuZ5RwATACLiVGBP4L8lzQdeBPaJiGhDvGZm1oeGCT0ibgDUYJ2TgZNbFZSZmQ2c7xQ1M6sIJ3Qzs4pwQjczqwgndDOzinBCNzOrCCd0M7OKcEI3M6sIJ3Qzs4pwQjczqwgndDOzinBCNzOrCCd0M7OKcEI3M6sIJ3Qzs4pwQjczqwgndDOzinBCNzOriGbGFF1d0rWS/izpLkmfqrOOJH1H0ixJt0vapD3hmplZX5oZU3Q+8NmImCFpDHCLpKsj4s+FdXYC1s6PzYFT8l8zMytJwxJ6RDwaETPy8+eAu4FVa1bbHTg7khuBcZJWbnm0ZmbWpwHVoUuaBGwM3FSzaFXgocL0wyye9M3MrI2aqXIBQNLywIXAoRHx7JLsTNIUYArAhAkTlmQTpZt02K8HvY0HTtilBZGYmfWvqRK6pNGkZH5ORFxUZ5VHgNUL06vleYuIiKkRMTkiJvf09CxJvGZm1odmWrkIOA24OyJO7GO1i4EP5tYuWwDPRMSjLYzTzMwaaKbKZStgP+AOSTPzvCOACQARcSpwGbAzMAt4ATig9aGamVl/Gib0iLgBUIN1AvhEq4IyM7OB852iZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFdHMmKKnS3pC0p19LN9O0jOSZubHka0P08zMGmlmTNEzgZOBs/tZ53cRsWtLIjIzsyXSsIQeEdcDT5UQi5mZDUKr6tC3lHSbpMslrdeibZqZ2QA0U+XSyAxgYkTMk7Qz8Etg7XorSpoCTAGYMGFCC3ZtZma9Bl1Cj4hnI2Jefn4ZMFrS+D7WnRoRkyNick9Pz2B3bWZmBYNO6JLeKEn5+WZ5m3MGu10zMxuYhlUuks4FtgPGS3oY+AowGiAiTgX2BP5b0nzgRWCfiIi2RWxmZnU1TOgRsW+D5SeTmjWamVkH+U5RM7OKcEI3M6sIJ3Qzs4pwQjczqwgndDOzinBCNzOrCCd0M7OKcEI3M6sIJ3Qzs4pwQjczqwgndDOzinBCNzOrCCd0M7OKcEI3M6sIJ3Qzs4pwQjczqwgndDOzimiY0CWdLukJSXf2sVySviNplqTbJW3S+jDNzKyRZkroZwI79rN8J2Dt/JgCnDL4sMzMbKAaJvSIuB54qp9VdgfOjuRGYJyklVsVoJmZNafhINFNWBV4qDD9cJ73aO2KkqaQSvFMmDChBbsePiYd9utBvf6BE3bpeAytiKMbYuiWOLohhm6Joxti6IY4Sr0oGhFTI2JyREzu6ekpc9dmZpXXioT+CLB6YXq1PM/MzErUioR+MfDB3NplC+CZiFisusXMzNqrYR26pHOB7YDxkh4GvgKMBoiIU4HLgJ2BWcALwAHtCtbMzPrWMKFHxL4NlgfwiZZFZGZmS8R3ipqZVYQTuplZRTihm5lVhBO6mVlFOKGbmVWEE7qZWUU4oZuZVYQTuplZRTihm5lVhBO6mVlFOKGbmVWEE7qZWUU4oZuZVYQTuplZRTihm5lVhBO6mVlFOKGbmVVEUwld0o6S/iJplqTD6izfX9JsSTPz46DWh2pmZv1pZkzRkcD3gHcADwM3S7o4Iv5cs+r5EXFwG2I0M7MmNFNC3wyYFRH3RcQrwHnA7u0Ny8zMBqqZhL4q8FBh+uE8r9Z7JN0u6QJJq9fbkKQpkqZLmj579uwlCNfMzPrSqouilwCTImID4GrgrHorRcTUiJgcEZN7enpatGszM4PmEvojQLHEvVqet0BEzImIl/Pkj4C3tiY8MzNrVjMJ/WZgbUlrSFoK2Ae4uLiCpJULk7sBd7cuRDMza0bDVi4RMV/SwcCVwEjg9Ii4S9IxwPSIuBg4RNJuwHzgKWD/NsZsZmZ1NEzoABFxGXBZzbwjC88PBw5vbWhmZjYQvlPUzKwinNDNzCrCCd3MrCKc0M3MKsIJ3cysIpzQzcwqwgndzKwinNDNzCrCCd3MrCKc0M3MKsIJ3cysIpzQzcwqwgndzKwinNDNzCrCCd3MrCKc0M3MKsIJ3cysIppK6JJ2lPQXSbMkHVZn+dKSzs/Lb5I0qdWBmplZ/xomdEkjge8BOwFvAfaV9Jaa1Q4E5kbEWsC3gK+1OlAzM+tfMyX0zYBZEXFfRLwCnAfsXrPO7sBZ+fkFwA6S1LowzcysEUVE/ytIewI7RsRBeXo/YPOIOLiwzp15nYfz9L15nSdrtjUFmJIn3wz8ZZDxjweebLhWe3VDDNAdcXRDDNAdcXRDDNAdcXRDDNAdcbQihokR0VNvwahBbnhAImIqMLVV25M0PSImt2p7QzWGbomjG2Lolji6IYZuiaMbYuiWONodQzNVLo8AqxemV8vz6q4jaRQwFpjTigDNzKw5zST0m4G1Ja0haSlgH+DimnUuBj6Un+8J/DYa1eWYmVlLNaxyiYj5kg4GrgRGAqdHxF2SjgGmR8TFwGnAjyXNAp4iJf0ytKz6ZhC6IQbojji6IQbojji6IQbojji6IQbojjjaGkPDi6JmZjY0+E5RM7OKcEI3M6sIJ3Qzs4ootR26tYakHuAjwCQKn2FEfLgDsWwVEb9vNK/q8mfyBVL3GMv0zo+I7UuMYRTwakSEpNWBzYF7I+LWsmLoj6R/iYjHOx1HlQ25Erqkr0taQdJoSddImi3pAyXu/18knSbp8jz9FkkHlrX/7Fektv6/AX5deHTCd5uc1xaSviHpo3Xmf1TSCWXFAZwD3A2sARwNPEBq8lsKSR8BngAezM+vITUhPk/SF8qKo05c4yQdKOkaoGM/LJJWkvRuSW8tcZ9vkPRtSZdKOl7SCm3f51Br5SJpZkRsJOndwK7AZ4DrI2LDkvZ/OXAG8MWI2DCXim6NiH8rY/85hpkRsVFZ++sjhi2BfwcOJXXI1msF4N0lfh63AJNr73uQNAK4PSLWLyuOiHirpNsjYoM87+aI2LSk/d8FbA2MIf2wTIyIJyW9Drg5ItYrI44cy7Kk/p3eB2ycY/ov0vf0tZJiuBQ4LCLulLQyMAOYDrwJmBoR3y4hhiuAW4DrSblqTETs3859DrkSOgurGHYBfh4Rz5S8//ER8TPgNUjt9IFXS47hUkk7l7zPWqOB5Umfx5jC41lSybAsS9e7iS0njjI7iPtn/vuopF0kbQy8vsT9vxIRcyPi76TO9J4EiIgXgFfKCkLST4G/Au8gnalNIvXEOq2sZJ6tERF35ucHAFdHxLtI1VBlVU2uHBFfjIgrI+KTwAbt3uFQrEO/VNI9wIvAf+e6y5dK3P/zklYCAkDSFkDZPyqfAo6Q9DIpkQiIiGj7KV3BVyJiB0nrRcTRJe631ouS1o6IvxVnSlqbdIyU5ThJY4HPkhLZCsCnS9z/svlHZASwVH6u/Fim31e21luAuaSzhLsj4lVJnagG+Gfh+Q7ADwEi4jlJpf2wSFqRhQWLkcXpiHiq5fsbalUuAJJeDzyTD5blSKcyj5W0701IX9j1gTuBHmDPiLi9jP13C0l/Bg4i3SX8PmpKwxExo6Q4diJ9HseRTm8BJgOHA4dGxGVlxNFpkqaRCxn1RMTbSoxlHWBfYG9Sz4JvBtYv84KopEuAq4CHgdNJJfanc3XQ9DKqoCQ9QDqTr3emGBGxZsv3OdQSeq4zPQ04NyLmdiiGUaSDVMBfIuKfDV7SjhhWBNZm0RYV15e4/z1JA5tsTaqbLIqSW3esD3ye9CMLcBfwjYi4o8QY1gA+yeItj3YrK4ZuIGmLiLixMP1WUnJ/L/BwRPx7SXG8ATgGWBn4XkRclee/DXhrRHyzjDjKNhQT+lqkOrG9SYnkDOCqdncGJmmP/pZHxEXt3H9NLAeRql1WA2YCWwB/LDOJFmL5ckQcW/Z+u42k20gFjTvI11cAIuK6kva/KfBQ75mqpA8C7wEeBI5qx+l9H3HMiIhN6swXsE2ZhY5uk697XRsRL0raox05Y8gl9F65FcOuwCmki5JnACe168CVdEY/i6PMNuCS7gA2BW7MLX7WAb4aEf3+6LQxnt2AbfPktIi4tMR9rw18kdQp3ImkutJtgHuBgyKilKaDkm6KiM3L2Fcf+58BvD0inpK0LWlksU8CGwHrRkQpF6r7Suhly8fFEaT6/N7jYltgFnBgRNSeVZYR08mk6sAZwBbteJ+G4kVRJG1AKqXvDFxIagO8NfBb0gHcchFxQDu2u4ReioiXJCFp6Yi4R9KbOxGIpONJwxSek2d9StK/R8QRJYVwBnA26SLkTaRmlO8mJfWTSa0aynCSpK+Q6m1f7p1Z1rUEYGShMLM3qWnehcCFkmaWFAPAmpJqu9deoMQqqP6Oi+9RwnEhaXPgvoiYDRARB0s6knR2/Ym27HOoldBzHfrTpNPbCyPi5cKyi9pdSs0tXL5C+gEJ4AbgmIgobUAPSb8g/aAdCmxPKoWMjojSmzJKuh3YqLdJmtKg4rf2tsUuYf8L2uRLmhVpoPLFlpUQx/HAfqQzg94ql9KuJSgNA7lRpO6u7wGm9FZvSLqzxPb4fyNdLK+rxCqojh8XuRpus94cJelE0jWWjwK/iIitW73PoVhC3ysi7qu3oKQqh/NINwq8J0+/HzgfeHsJ+wYgIt6dnx4l6VrSXaNXlLX/OsaRqjzIsZSp2ATt2X6WtdtewJqRBlLvhHOB6yQ9SWqu+TtYcM2pzGa188pK2g10w3ExKiJezo0oziR9LntGxGv5hq/W77AdG22niLhP0i7AeizawuOYkkJYueYi4HGS9i5p38CCZpu9eltydOpU63jg1vzDIlI95WEl7n+dfJYg4E35OXm65c3C+nEn6YftiRL3WfQ10u3+K7NoI4ERpLr0stxf4r760w3HxQ1KXR68kXQT3rY5mW9Hm+6RGHIJXdKpwOuAtwE/It2V+KcSQ7hK0j7Az/L0nqTRnMo0gzSG61zSAQQuRN8AAAyGSURBVDoOeEzS48BHIuKW/l7cShFxbm4D3XuL+xfKuicgW7fEffVnHHCPpJtZtA69rDrjP9W7yBYRfy1p/72KTRb3ioifF6a/WuK1lY4fFxHxUUlbk+7UfRy4IFfZioVn+C01FOvQb4+IDQp/lwcuj4htStr/c8ByLDxtGwE8n5+XcrempB8CF0TElXn6P0kHSG9Ln9JaW0jaCpgZEc8rdZK2SY7hwbJiqBPTeGBOu5uy1uzzP+rNL7HO+NaI2LiMfTWIY0Erl9oWL93QAia3jts3Is5puPLg91XblPRDpAvW95LutG55i7whV0Jn4anKC5JWAeaQTjNLERFjytpXP7aIiI/0TkTEVZK+mUsES5ccyynAhpI2JHWUdhqpdUHdBNdqSl0vnECqwz8W+DEwHhgh6YMRUcq1hS6oN+6R9Jm+FkbEiSXFoT6e15tuXxCpZ8NPAKuSBrG/GjiY1DXDbSxsldVOPyBfW8tNSY9nYVPSqbShz6OhmNAvlTQO+Aap6iFIVS+lyTcZ9bZy+V1E/LLM/ZM6gPoC6QItpF/9x3MLkzIvBALMj4iQtDvpjrzTVG53wieT2huPJTVb3Skibsxt88+lpIvF+Yflu6RT/aVIA6o/X8YZWzaSVE9bZodk9UQfz+tNt9OPSVWSfyS1ujmC9N78V0SU1Yyz9KakQ67KpSiXRpeJEntclPR9YC1SsoB8ChURbWlX2kcM41m06eTvSbc5PwNMiIhZJcZyHSlpfpjUxvcJ4LYoqTvhmuZpd0fEuoVlpVVDSJoO7AP8nHTzyAeBf42Iw0vaf8erM3Icr5KqIAUsC7zQu4j0XR1dUhx39B6DuaDzKOm7UVpHfp1oSjpkSuj93Xovqcxb77cn3XnX29viWaS+Q0qRD86TIuL9faxSWjLP9iZ1zvXhiHhM0gTS2VNZimcktS0HSi2tRMQsSSMj4lXgDEm3kjoJK0OnS+YARMTITseQLehfKVInfg+Xmcyz0puSDpmEDryrn2UBlJXQZwETSH1kQGptUloSzQfnRElLdbDNczGexySdA2wqaVdSa4uzSwxhQ0nPkkuE+Tl5usxuY1+QtBQwU9LXSSXCMscb2KHEffVJ0jLAx0hnsbcDp0caM6BsG9YcC8sWjpNSGi9ExP/mZoulNSUd0lUunZCrGDZlYVPJTUlDjT0L5TRTk3Q2qa72Yha2sCnzwlcxlveSSuTTSF+WbYDPR8QFZcfSSZImkpqmLUXqB30s8P0yq7+6gaTzSaXj3wE7AQ9GxKc6G9XwMeQSuqSvAl+PiKfz9IrAZyPiSyXtv9h6ozeB7QN8HMpp7aDUZ8hiogMDTeTbm98REU/k6R7gN1HSEHTdIFeDnd1PNdiwUVN3PYo+2sdbewylKpdeOxVvToiIuUrdUpaS0CPiOqXRYN5Hut37fuDUMput9Sbu3AafiJhX1r7rGNGbzLM5DM2hDZdYN1WDKQ348mK+I/FfgXVI92mU1Wd/se56vtQVVfvDxlBM6COVehjs7fBmWaDtba/zl2Pf/HiS1H+LosSRYAqxrE9qlvX6PP0k8MGIKO3ibMEVkq5k0VY/w2KUoBr3Ab9X6mmwk9Vg1wPb5DPXq0jVgXuT+hwqQ8frroezoZjQzwGu0cL+yQ8Aziphv/eQ6gV37a0XlVTmmJFFU4HPRMS1OY7tSP09lzIaTC+l4td3SNcRenuOmxoRvygzji5xb36MIA2W3SmKiBfyvQDfj4ivt6vNcz1d1MplWBpSCT0nkJ+S7vTq7d3w2N5b4NtsD1Jd+bWSriDd1NOp88nlepM5QERMy6fapco3FF2W60xLG7GpG3VRNZgkbUkqkffe4OUkO0wMqYRek0BK7S423w36y5w4dyf1Rf4GSaeQ+ja+qsRw7pP0ZVK1C8AHSKf8nTBD0qZR0shA3aqLqsEOJbV9/0VE3CVpTeDaBq+xihiKrVzOAk7uhgSS6yn3AvaOiNLaAef9Hs3Cao7fkcaNLH3Q7HwH3Fqkdvm9dwhGlDTARbeQ9AfgizXVYF+NkgZFNoOhmdCdQLpIbn+9mOhgb4udIOm22qaa9ea1cf+X0M+dsWXcH2GdN6SqXLJ3djqATlE/YzVCx760KwN3RcRzsKCXu3VZeCftcNHparBv5r97kAZU+Eme3pd0w5MNA0OuhN5L0htYdMSiv3cwnFJImg08RGoieBM1F2U70YVr7q9kk0LfNiOA6cPtZpJuqQaTND0iJjeaZ9U05EroknYD/g9YhdSz30TgbtKQdFX3RuAdpFLX+4BfA+d2qP15LxX6qCDf0DLkjqvByon7kE7HASwnac3I4+5KWoM0IIsNA0Pxi3cssAXp9vKNJb2NdHpbebkXvytIN/MsTUrs0yQdHREndyis+yQdQhroAlIXCJ1qcdMx+cazz5FGdV/wvYqI7UsO5dOkY+I+0hncRGBKyTFYhwy5Kpfe08fch8jGuURY2sWnTsuJfBdSMp9E6qDr9Ih4pEPxvIF0c9H2pIty1wCH1nQHUHn5eDwVuAV4tXd+lDi+ayGWpUm3/APc03tXtVXfUCyhP51v3rgeOEfSExRuta6y3Mvi+qRb64+OiDs7HBI5ce/T6Ti6wPyIOKXxau0l6RbSMIDndqIZq3XWUCyhL0fqLH4E6W64scA5ETGno4GVQNJrFAakLi6i5H4yJP1Pvq38u9RpLhcR3VCfXBpJR5Gu6fwCWFAijjYMBNwgjrVI3WHsDUwnDRxe7IvbKmzIJfQidWB0d0skvSsiLlEayXwxEVFG/zpdQ9L9dWZHRKxZejAsaG20K+naxqukxH5S2T8wVq4hk9DVz+jupFusS+0KwKxbSdqAVErfGbiS1KHd1sB+kcdftWoaSgl9OgtHd59KzejuUdJgwJZ06U1OHZX7c3kLi94fUeZwfL116E+T6tEvLF4QlXRRRPQ5Nq8NfUMpoXfF6O6WdONNTp2UR5HajpTQLyMNv3ZDROxZchwL2qDb8DOURpbpmtHdO0nSf0n6nKROd4HwRtIZ0/rASaQbnp6MiOuGWzLP9iQN1PxYRBwAbEg6myzbHEknSpqeH/8nqRNxWAcMpYS+oaRnJT0HbJCf907/W6eDK4Ok75NuHFkJODb3HdIREfFqRFwRER8i3eg1i3RDy8GdiqnDXoyI14D5uT+bJ4DVOxDH6cBzwHvz41nSBVEbBoZMO3SPhALAtsCGeQzL15H6Czm2U8HUucnpO6Rme8PRdEnjSCNH3QLMA/7YgTjeFBHvKUwfXeaIRdZZQyahGwCv5Nv/ycOMdWwE3m68yalTJPWQSsZExKl5RKsVIuL2DoTzoqStI+KGHNtWLF5FaRU1ZC6KGkh6EfgbCy9AvolU1VF6n/DddJNTJ0k6CPgqaTzRNYApEdFvC6A2x7MRaYzdsaTP4ilg/4i4rVMxWXmc0IeQPJhEf4MYVL4L4W4j6U7gbRExOw/3dk5EbNkFca0AEBHPdjoWK4+rXIaWO+k7ob8s6V7SMGjXlBjTcPdKRMwGiIj78nWF0kn6TB/zAYiIE0sNyDrCCX0IiYgxfS2TNJJUp31O/mvlWE3Sd/qaLrFPmz6PDYZRs97hzgm9IvLF0ttyZ1lWns/XTJfeXS5ARBzd1zJJh5YZi3WO69DNKk7S3yNiQqfjsPYbSjcWmdmS6VjzViuXE7pZ9fk0fJhwHbpZBeQuMOolbgHLlhyOdYjr0M0GQdI3gFkR8YOa+R8F1oiIwzoTmQ1HTuhmg5D7H59cO2pWHjHo9ohwE1IrjevQzQZn6XpDIOaeF30x0krlhG42OC9KWrt2Zp7nTrGsVL4oajY4RwKXSzqOhTcVTQYOB3xDj5XKdehmg5THEv08C7tcuAv4RkTc0bmobDhyQjczqwjXoZsNgqS1JZ2Zx/FcTdLlkuZJuk3Spp2Oz4YXJ3SzwTkD+APwD+Am0shF44HPASd3MC4bhlzlYjYIkmZGxEb5+ayIWKveMrMyuIRuNjivFZ7Xjg70GmYlcgndbBAkvcDCcV17x3glT68ZEct1KjYbftwO3Wxw1u10AGa9XEI3azFJ44E59boEMGsn16GbDYKkLSRNk3SRpI0l3UkazPtxSTt2Oj4bXlxCNxsESdOBI4CxwFRgp4i4UdI6wLkRsXFHA7RhxSV0s8EZFRFXRcTPgcci4kaAiLinw3HZMOSEbjY4xaaJtb0r+vTXSuUqF7NBkPQq8DwLh3p7oXcRsExEjO5UbDb8OKGbmVWEq1zMzCrCCd3MrCKc0M3MKsIJ3cysIpzQzcwqwgndzKwi/h82CLWTMSCIHQAAAABJRU5ErkJggg==\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"id":"BDe5P8ByVOU_"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/examples/colab/component_examples/named_entity_recognition_(NER)/NLU_ner_ONTO_18class_example.ipynb b/examples/colab/component_examples/named_entity_recognition_(NER)/NLU_ner_ONTO_18class_example.ipynb index 39dc408b..0c33620a 100644 --- a/examples/colab/component_examples/named_entity_recognition_(NER)/NLU_ner_ONTO_18class_example.ipynb +++ b/examples/colab/component_examples/named_entity_recognition_(NER)/NLU_ner_ONTO_18class_example.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_ner_ONTO_18class_example.ipynb","provenance":[{"file_id":"1CYzHfQyFCdvIOVO2Z5aggVI9c0hDEOrw","timestamp":1599267946314}],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"NYQRU3pRO146"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/component_examples/named_entity_recognition_(NER)/NLU_ner_ONTO_18class_example.ipynb.ipynb)\n","\n","# Named-entity recognition with Deep Learning ONTO NOTES\n","\n","Named-Entity recognition is a well-known technique in information extraction it is also known as entity identification, entity chunking and entity extraction. Knowing the relevant tags for each article help in automatically categorizing the articles in defined hierarchies and enable smooth content discovery. This pipeline is based on NerDLApproach annotator with Char CNN - BiLSTM and GloVe Embeddings on the OntoNotes corpus and supports the identification of 18 entities.\n","\n","\n","Following NER classes can be detected by this model\n","\n","\n","\n","\n","|Type | \tDescription |\n","|------|--------------|\n","| PERSON | \tPeople, including fictional like **Harry Potter** |\n","| NORP | \tNationalities or religious or political groups like the **Germans** |\n","| FAC | \tBuildings, airports, highways, bridges, etc. like **New York Airport** |\n","| ORG | \tCompanies, agencies, institutions, etc. like **Microsoft** |\n","| GPE | \tCountries, cities, states. like **Germany** |\n","| LOC | \tNon-GPE locations, mountain ranges, bodies of water. Like the **Sahara desert**|\n","| PRODUCT | \tObjects, vehicles, foods, etc. (Not services.) like **playstation** |\n","| EVENT | \tNamed hurricanes, battles, wars, sports events, etc. like **hurricane Katrina**|\n","| WORK_OF_ART | \tTitles of books, songs, etc. Like **Mona Lisa** |\n","| LAW | \tNamed documents made into laws. Like : **Declaration of Independence** |\n","| LANGUAGE | \tAny named language. Like **Turkish**|\n","| DATE | \tAbsolute or relative dates or periods. Like every second **friday**|\n","| TIME | \tTimes smaller than a day. Like **every minute**|\n","| PERCENT | \tPercentage, including ”%“. Like **55%** of workers enjoy their work |\n","| MONEY | \tMonetary values, including unit. Like **50$** for those pants |\n","| QUANTITY | \tMeasurements, as of weight or distance. Like this person weights **50kg** |\n","| ORDINAL | \t“first”, “second”, etc. Like David placed **first** in the tournament |\n","| CARDINAL | \tNumerals that do not fall under another type. Like **hundreds** of models are avaiable in NLU |"]},{"cell_type":"code","metadata":{"id":"M2-GiYL6xurJ"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.4.7 > /dev/null\n","\n"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Gph8XOL1Pzpl"},"source":["# NLU makes NER easy. \n","\n","You just need to load the NER model via ner.load() and predict on some dataset. \n","It could be a pandas dataframe with a column named text or just an array of strings."]},{"cell_type":"code","metadata":{"id":"pmpZSNvGlyZQ","colab":{"base_uri":"https://localhost:8080/","height":757},"executionInfo":{"status":"ok","timestamp":1609628217826,"user_tz":-60,"elapsed":179196,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"b7848f8f-2049-45dc-fa3c-c6c52c09a340"},"source":["import nlu \n","\n","example_text = ['People, including fictional like Harry Potter.',\n","'Nationalities or religious or political groups like Germans.',\n","'Buildings, airports, highways, bridges, etc. like New York Airport',\n","'Companies, agencies, institutions, etc. like Microsoft',\n","'Countries, cities, states. like Germany',\n","'Non-GPE locations, mountain ranges, bodies of water. Like Sahara Destert',\n","'Objects, vehicles, foods, etc. (Not services.) Like the a or playstation or Playstation',\n","'Named hurricanes, battles, wars, sports events, etc. like hurricane Katrina',\n","'Titles of books, songs, etc. Like the Mona Lisa',\n","'Named documents made into laws. Like the Declaration of Independence',\n","'Any named language. Like English',\n","'Absolute or relative dates or periods. Like every second friday',\n","'Times smaller than a day. Like every minute',\n","'Percentage, including ”%“. Like 55% of workers enjoy their work',\n","'Monetary values, including unit. Like 50$ for those pants',\n","'Measurements, as of weight or distance. Like this person weights 50kg',\n","'“first”, “second”, etc. Like David place first in the tournament',\n","'Numerals that do not fall under another type. Like hundreds of models are avaiable in NLU',]\n","nlu.load('ner.onto').predict(example_text)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["onto_recognize_entities_sm download started this may take some time.\n","Approx size to download 159 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
entities_confidenceembeddingsentities
origin_index
0PERSON[[0.2901900112628937, 0.8049700260162354, 0.31...Harry Potter
1NORP[[-0.02076599933207035, 0.5784800052642822, 0....Germans
2FAC[[0.058736998587846756, 0.6042199730873108, -0...New York Airport
3ORG[[0.39910998940467834, 0.23048000037670135, -0...Microsoft
4GPE[[-0.0445609986782074, 0.8070899844169617, 0.6...Germany
5ORG[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...Non-GPE
5LOC[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...Sahara Destert
6PRODUCT[[-0.12313000112771988, 1.027899980545044, -0....Playstation
7EVENT[[-0.3515700101852417, -0.1662600040435791, 0....hurricane Katrina
8PERSON[[0.5689799785614014, -0.38422998785972595, 0....Lisa
10LANGUAGE[[-0.2367600053548813, 0.15658999979496002, 0....English
11ORDINAL[[-0.0853630006313324, -0.5337499976158142, 1....second
12DATE[[-0.29739999771118164, 0.1302099972963333, 0....smaller than a day
13PERCENT[[0.06162400171160698, 0.6707599759101868, 0.3...55%
14MONEY[[0.3520300090312958, -0.1374099999666214, 0.2...50$
15PERSON[[-0.5554199814796448, 0.0024757999926805496, ...50kg
16ORDINAL[[-0.04256799817085266, -0.08424600213766098, ...first
16ORDINAL[[-0.04256799817085266, -0.08424600213766098, ...second
16PERSON[[-0.04256799817085266, -0.08424600213766098, ...David
16ORDINAL[[-0.04256799817085266, -0.08424600213766098, ...first
17CARDINAL[[-0.2671700119972229, 0.7479100227355957, -0....hundreds
17ORG[[-0.2671700119972229, 0.7479100227355957, -0....NLU
\n","
"],"text/plain":[" entities_confidence ... entities\n","origin_index ... \n","0 PERSON ... Harry Potter\n","1 NORP ... Germans\n","2 FAC ... New York Airport\n","3 ORG ... Microsoft\n","4 GPE ... Germany\n","5 ORG ... Non-GPE\n","5 LOC ... Sahara Destert\n","6 PRODUCT ... Playstation\n","7 EVENT ... hurricane Katrina\n","8 PERSON ... Lisa\n","10 LANGUAGE ... English\n","11 ORDINAL ... second\n","12 DATE ... smaller than a day\n","13 PERCENT ... 55%\n","14 MONEY ... 50$\n","15 PERSON ... 50kg\n","16 ORDINAL ... first\n","16 ORDINAL ... second\n","16 PERSON ... David\n","16 ORDINAL ... first\n","17 CARDINAL ... hundreds\n","17 ORG ... NLU\n","\n","[22 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"code","metadata":{"id":"qgGdEUgkMika","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1609628226448,"user_tz":-60,"elapsed":187809,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"87443e04-3cf8-4d1d-a434-9f924e22f49c"},"source":["text = [\"Barclays misled shareholders and the public about one of the biggest investments in the bank's history, a BBC Panorama investigation has found.\",\n","\"The bank announced in 2008 that Manchester City owner Sheikh Mansour had agreed to invest more than £3bn.\",\n","\"But the BBC found that the money, which helped Barclays avoid a bailout by British taxpayers, actually came from the Abu Dhabi government.\",\n","\"Barclays said the mistake in its accounts was 'a drafting error'.\",\n","\"Unlike RBS and Lloyds TSB, Barclays narrowly avoided having to request a government bailout late in 2008 after it was rescued by £7bn worth of new investment, most of which came from the Gulf states of Qatar and Abu Dhabi.\",\n","\"The S&P 500's price to earnings multiple is 71% higher than Apple's, and if Apple were simply valued at the same multiple, its share price would be $840, which is 52% higher than its current price.\",\n","\"Alice has a cat named Alice and also a dog named Alice and also a parrot named Alice, it is her favorite name!\"\n","] + example_text\n","ner_df = nlu.load('ner.onto').predict(text, output_level='chunk')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["onto_recognize_entities_sm download started this may take some time.\n","Approx size to download 159 MB\n","[OK!]\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"STc7iOwtljGo"},"source":["## Lets explore our data which the predicted NER tags and visalize them! \n","\n","We specify [1:] so we dont se the count for the O-tag wich is the most common, since most words in a sentence are not named entities and thus not part of a chunk"]},{"cell_type":"code","metadata":{"id":"UDSAYjadlfdK","colab":{"base_uri":"https://localhost:8080/","height":381},"executionInfo":{"status":"ok","timestamp":1609628227256,"user_tz":-60,"elapsed":188597,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"f97077e1-bb5a-4ab3-f714-c8fb0f859b75"},"source":["ner_df['entities'].value_counts()[1:].plot.bar(title='Occurence of Named Entity tokens in dataset')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":4},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"id":"rlcEvP9tOSiy","colab":{"base_uri":"https://localhost:8080/","height":361},"executionInfo":{"status":"ok","timestamp":1609628227258,"user_tz":-60,"elapsed":188575,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"3e93ce38-87eb-4411-a3f1-093a7479abde"},"source":["ner_type_to_viz = 'ORG'\n","ner_df[ner_df.entities_confidence == ner_type_to_viz]['entities'].value_counts().plot.bar(title='Most often occuring ORG labeled tokens in the dataset')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":5},{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAXQAAAFICAYAAABA2wWFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3debwcVZn/8c83C4sQApKrAiEJCA4IIkvYhmVQ5CebMLIIqCgIEx1FRNERUBGQURwVBVGYyC4IKCADCAIii4gsSYCwqgFZZQk7AQQDz++Pcy6pNH1v9026q7vrft+vV79u19J1nttd/XTVqVPnKCIwM7PeN6LTAZiZWWs4oZuZVYQTuplZRTihm5lVhBO6mVlFOKGbmVWEE3qTJH1Y0kOS5khap9PxtJukEyR9o9NxtJKkvSRd1+S6h0k6YwHL6chr62xrC0kPt2JbC1j+xyRd3sLtXS1p31Ztr2bbp0o6sh3bLlPXJHRJ90t6VdK4mvm3SApJkxZy+yFplYXYxPeB/SJiSeCZvL1RCxNTN4uIz0TEt9qxbUmLSvqOpAclvSzpr5K+IkmFda6W9I/8A/qkpPMlLVeznVUlnS1ptqTn83Z+LGl8O+LutE4n6KGKiDMj4v8tyGtb+cPWau38YVnYcromoWd/A/bon5D0HuAtnQtnPhOBOzsdRBkkjWxzEb8CtgS2BcYAewJTgGNq1uv/AV0FWJL0o9of4yrAjcDfgXUiYilgE+BeYNM2x2/WnSKiKx7A/cDXgZsL874PfA0IYFKeNxY4HZgNPJBfMyIvWwW4BngOeBI4J8+/Nm/jRWAOsFud8kfkbT0APJHLGAssml/T//p7gQfz9Jz82Dhv41PA3cAzwGXAxML2A/gM8FfgWeAngAZ4LxYFfkRKVn/PzxctLN8RuBV4PsezdZ7/VuCU/JpngAvy/L2A62rKCGCV/PxU4Hjgkvw/fiDPOzIv3wJ4GDgwvzePAnsXtrUscFGO52bgyNryCutuCfwDWLFm/obAa4WYrgb2LSz/LHBnYfoM4KIh7mPzvQ+kH5CHctzTgc0Kyw4DzgXOAV4AZgDvLSxfHjiPtB/+Ddi/5rVnFKY3Aq7Pn/ttwBaFZSuR9tkXgCuA44qvLay3BPAy8Drz9rvlB9tX+j+3wjb2B+4CxufXfZ+0Lz8OnAAs3uTnvW3ezgvAI8CXm3y/m/oOAFsDrwL/zP/nbYV94lvAH3PZlwPjmnmf65SxTv5MX8if8dnM29+XAS7On+0z+fn4vOy/SfvpP3JsxzWxL20ATMvLHgeObhTzQOU03McXNhG36kFK6B8A/gysDozMO9VE5k/opwP/RzqymwT8BdgnLzuL9AMwAlgM2LReAhug/E8Bs4CVSUeD5wM/r/f6XG4AowrLd8yvXx0YRfpxuL7m9RcDSwMT8s6y9QCxHAHcALwN6Msf+LcKO8dzwFb5/1wBWC0v+03eOZcBRgP/Vu+LVef/OTVvc5PCe3cq8yf0uTmu0aQv9EvAMnn52fnxFuDdpB17oIR+FHDNAMseAD5d+PLum58vC/wO+L/Cuo8Bew1xH5vvfQA+nrc9ipS8HgMWy8sOIyWUXfL//GVS4h6d36PpwKHAInmfuQ/4YOG1Z+TnKwBP5fdsRP7cngL68vI/AUeTEuzmpATzpoRe+Bwerpk32L7yxvo51hmFcn8IXEg6CBhD+kH+TpOf96PkhEXa19Zt8v0eynfgjfewMO9q0gHMu4DF8/RRzbzPNdtZhLSvfTH/f7vkz7p/f18W2Jm0P48hnVFeUBPHvjXbHGxf+hOwZ36+JLBRk/vGm8ppuI8PZeV2PpiX0L8OfIf0K31FfoOClERHkn6531143aeBq/Pz04Gp5F/Tmu03SuhXAp8tTP9L/pBH1b6e+gn9UvIPS54eQfoSTCy8vvgD80vgoAFiuRfYtjD9QeD+/Px/gR/Wec1ypKO3ZRp9ser8P6cCp9csP5X5E/rLNf/vE6Sji5H5ffqXwrLBjtBPBM4eYNkNwNcKO/NLpB+aIJ2RTCisO5dCMgD2Ix3lzAF+NsD23/Q+1Cx/hnwUTkooN9R8no8Cm5HOJh6see3BwCmF1/Yn9K9SODDI8y4DPklKanOBJQrLfsHQEvpg+8oWpCPoo4HrgLF5vkhnYu8svG5j4G+NPu/8/EHS926pBt/p+d5vhvYdeOM9LMy7Gvh6YfqzwG8bvc91tr056WxGhXnXk/f3OuuvDTxTE8egibZmX7oWOJzC2UQzMTdTTu2j2+rQAX4OfJS0M5xes2wc6Rf1gcK8B0i/dAD/RdpZb5J0p6RPDaHc5etsdxTw9iZfPxE4RtKzkp4Fns6xrFBY57HC85dIv9bNxrJ8fr4i6Utca0Xg6Yh4psl4az3UYPlTETG3MN0ffx/pfSq+frBtPUn68alnuby83/4RMRZYi3QkWLzY+VRxOxFxXEQsTapyGD34v5JI+rKkuyU9lz+zsaR97E3/R0S8TjpjXJ70WS/f/1nn1x5C/X1lIrBrzbqb5tiXJyWKFwvrP1BnG4MZbF+BdDQ8hXT0/Vye10c6+pxeiOm3eX6/gT5vSEev2wIPSLpG0sZDiLfZ78BQXz/Y+1xreeCRyFkze+M9lPQWSf8r6QFJz5MS8tKDXVtqsC/tQzqruEfSzZK2X4CYm9J1CT0iHiCd2m5LqvYoepJ0NDixMG8C6SiEiHgsIv4jIpYnHUH8dAgtW/5eZ7tzSXVebwqzzryHSNUFSxcei0fE9U2W3yiWvxfKeecA5b9V0tJ1lr1I4eKypHfUWafe/9SM2aT3qZhsVxxk/d8BG0qabx1JG+bX/f5NgUXcTjrq/0mhJcyVwE4LGDOSNiMdAHyEdFazNOlsQIXVViysP4L0P/6d9F7/reazHhMR29Yp6iHSUVhx3SUi4ijSEf8ykpYorD9hkLDrfUaD7SuQjhS3B06RtEme9yTpCHyNQkxjI12Abigibo6IHUnVPBeQjrRbbaj742Dvc61HgRWKraqY/30/kHSGvmGki+2b5/n9688XW6N9KSL+GhF7kN6v7wLn5s+8UcxD/k52XULP9gHeX3PkQkS8Rtp5/lvSGEkTgS+RLpAhaddCk7VnSG/I63n6cVJd50DOAr4oaSVJSwLfJl1UnVtn3dl5u8XtnQAcLGmNHMtYSbs2/R+/OZavS+rLzTgPJf+PwEnA3pK2lDRC0gqSVouIR0nVPj+VtIyk0ZL6d8TbgDUkrS1pMdLpbEvkz+R84LB8ZLMa8IlB1v8dKRmfJ2kNSSMlbZT/v+Mj4q8DvPQ00hHwDnn6MGAzSUdLWgEgv1erNxn6GNIP0WxglKRDgaVq1llP0k65eeoBwCukaqGbgBckfVXS4vl/WFPS+nXKOQP4kKQP5vUWy80Px+eDl2nA4ZIWkbQp8KFBYn4cWFbS2MK8wfYVACLiauBjwPmSNshnGz8DfijpbQB5P/pgozctx/kxSWMj4p+kC32vN3rdAngcmJR/SJsx4PtcZ90/kT77/fP3ZCfStal+Y0g/eM9KeivwzTqxrVyz/oD7kqSPS+rL7/uzefbrTcTcKGe9SVcm9Ii4NyKmDbD486QjzvtI9YK/AE7Oy9YHbpQ0h3TB5wsRcV9edhhwWj61+Uid7Z5Mqu65lnSG8I9cVr34XiJdhf5j3t5GEfFr0q/v2fk07Q5gmyH820VHkr7oM4HbSRezjsxl3wTsTbqo9RyphUT/EdqepDOYe0h1ngfk1/yFdIHrd6QWBk3dXDME+5FOMR8jvYdnkZLfQHYGriKd5s8h7dgnMcD7DRARr5JaEnwjT/+FVJc9HrhN0guk1g9/71+ngcty+X8hnW7/gzdXFf0fsBvp4GBPYKeI+Gf+EdueVLf6N9IR74n5PaiN+yHSBfNDSF/4h4CvMO+799H8fzxNShy11YzFbd1Dem/vy/vd8gyyr9S89grShf+LJK1Lqr+dBdyQ99ffkY5Km7EncH9+3WdIPxat9qv89ylJMxqt3MT7XFz3VdLZ3V6k93035q8N+BHpouuTpB/w39Zs4hhgF0nPSDqWxvvS1sCdOS8dA+weES83EXNtOQ1p/moks4Un6bvAOyLik52OxWw46cojdOstklaTtJaSDUhVZr/udFxmw01lb123Uo0hVQUsT6r3+wGpusLMSuQqFzOzinCVi5lZRTihm5lVRMfq0MeNGxeTJk3qVPFmZj1p+vTpT0ZEX71lHUvokyZNYtq0gZqam5lZPZIG7B7CVS5mZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV0XRCz9073iLp4jrLFpV0jqRZkm6UNKmVQZqZWWNDOUL/AmkA5Hr2IY28sgqpW9fvLmxgZmY2NE0l9Nzh+nakPp/r2ZE0AAGkkdK3rBkNxMzM2qzZG4t+RBpiacwAy1cgd+geEXMlPUcaAbs4PiSSppDGN2TChMFG2komHfSbJsOr7/6jtluo15uZ9ZKGR+hKA5o+ERHTF7awiJgaEZMjYnJfX907V83MbAE1U+WyCbCDpPuBs4H3SzqjZp1HyAPq5vEXx5JGZTczs5I0TOgRcXBEjI+IScDuwO8j4uM1q10I9A83tktexx2tm5mVaIE755J0BDAtIi4kDfD7c0mzSIOu7t6i+MzMrElDSugRcTVwdX5+aGH+P4BdWxmYmZkNje8UNTOrCCd0M7OKcEI3M6sIJ3Qzs4pwQjczqwgndDOzinBCNzOrCCd0M7OKcEI3M6sIJ3Qzs4pwQjczqwgndDOzinBCNzOrCCd0M7OKcEI3M6sIJ3Qzs4poZpDoxSTdJOk2SXdKOrzOOntJmi3p1vzYtz3hmpnZQJoZsegV4P0RMUfSaOA6SZdGxA01650TEfu1PkQzM2tGw4SeB3uekydH54cHgDYz6zJN1aFLGinpVuAJ4IqIuLHOajtLminpXEkrtjRKMzNrqKmEHhGvRcTawHhgA0lr1qxyETApItYCrgBOq7cdSVMkTZM0bfbs2QsTt5mZ1RhSK5eIeBa4Cti6Zv5TEfFKnjwRWG+A10+NiMkRMbmvr29B4jUzswE008qlT9LS+fniwFbAPTXrLFeY3AG4u5VBmplZY820clkOOE3SSNIPwC8j4mJJRwDTIuJCYH9JOwBzgaeBvdoVsJmZ1ddMK5eZwDp15h9aeH4wcHBrQzMzs6HwnaJmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV0cyYootJuknSbZLulHR4nXUWlXSOpFmSbpQ0qR3BmpnZwJo5Qn8FeH9EvBdYG9ha0kY16+wDPBMRqwA/BL7b2jDNzKyRhgk9kjl5cnR+RM1qOwKn5efnAltKUsuiNDOzhpqqQ5c0UtKtwBPAFRFxY80qKwAPAUTEXOA5YNlWBmpmZoNrKqFHxGsRsTYwHthA0poLUpikKZKmSZo2e/bsBdmEmZkNYEitXCLiWeAqYOuaRY8AKwJIGgWMBZ6q8/qpETE5Iib39fUtWMRmZlZXM61c+iQtnZ8vDmwF3FOz2oXAJ/PzXYDfR0RtPbuZmbXRqCbWWQ44TdJI0g/ALyPiYklHANMi4kLgJODnkmYBTwO7ty1iMzOrq2FCj4iZwDp15h9aeP4PYNfWhmZmZkPhO0XNzCrCCd3MrCKc0M3MKsIJ3cysIpzQzcwqwgndzKwinNDNzCrCCd3MrCKc0M3MKsIJ3cysIpzQzcwqwgndzKwinNDNzCrCCd3MrCKc0M3MKsIJ3cysIpzQzcwqopkxRVeUdJWkuyTdKekLddbZQtJzkm7Nj0PrbcvMzNqnmTFF5wIHRsQMSWOA6ZKuiIi7atb7Q0Rs3/oQzcysGQ2P0CPi0YiYkZ+/ANwNrNDuwMzMbGiGVIcuaRJpwOgb6yzeWNJtki6VtEYLYjMzsyFopsoFAElLAucBB0TE8zWLZwATI2KOpG2BC4BV62xjCjAFYMKECQsctJmZvVlTR+iSRpOS+ZkRcX7t8oh4PiLm5OeXAKMljauz3tSImBwRk/v6+hYydDMzK2qmlYuAk4C7I+LoAdZ5R14PSRvk7T7VykDNzGxwzVS5bALsCdwu6dY87xBgAkBEnADsAvynpLnAy8DuERFtiNfMzAbQMKFHxHWAGqxzHHBcq4IyM7Oh852iZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFdHMmKIrSrpK0l2S7pT0hTrrSNKxkmZJmilp3faEa2ZmA2lmTNG5wIERMUPSGGC6pCsi4q7COtsAq+bHhsDx+a+ZmZWk4RF6RDwaETPy8xeAu4EValbbETg9khuApSUt1/JozcxsQEOqQ5c0CVgHuLFm0QrAQ4Xph3lz0jczszZqpsoFAElLAucBB0TE8wtSmKQpwBSACRMmLMgmSjfpoN8s9DbuP2q7FkRiZja4po7QJY0mJfMzI+L8Oqs8AqxYmB6f580nIqZGxOSImNzX17cg8ZqZ2QCaaeUi4CTg7og4eoDVLgQ+kVu7bAQ8FxGPtjBOMzNroJkql02APYHbJd2a5x0CTACIiBOAS4BtgVnAS8DerQ/VzMwG0zChR8R1gBqsE8DnWhWUmZkNne8UNTOrCCd0M7OKcEI3M6sIJ3Qzs4pwQjczqwgndDOzinBCNzOrCCd0M7OKcEI3M6sIJ3Qzs4pwQjczqwgndDOzinBCNzOrCCd0M7OKcEI3M6sIJ3Qzs4pwQjczq4hmxhQ9WdITku4YYPkWkp6TdGt+HNr6MM3MrJFmxhQ9FTgOOH2Qdf4QEdu3JCIzM1sgDY/QI+Ja4OkSYjEzs4XQqjr0jSXdJulSSWu0aJtmZjYEzVS5NDIDmBgRcyRtC1wArFpvRUlTgCkAEyZMaEHRZmbWb6GP0CPi+YiYk59fAoyWNG6AdadGxOSImNzX17ewRZuZWcFCJ3RJ75Ck/HyDvM2nFna7ZmY2NA2rXCSdBWwBjJP0MPBNYDRARJwA7AL8p6S5wMvA7hERbYvYzMzqapjQI2KPBsuPIzVrNDOzDvKdomZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXRMKFLOlnSE5LuGGC5JB0raZakmZLWbX2YZmbWSDNH6KcCWw+yfBtg1fyYAhy/8GGZmdlQNUzoEXEt8PQgq+wInB7JDcDSkpZrVYBmZtachoNEN2EF4KHC9MN53qO1K0qaQjqKZ8KECS0oeviYdNBvFur19x+1XcdjaEUc3RBDt8TRDTF0SxzdEEM3xFHqRdGImBoRkyNicl9fX5lFm5lVXisS+iPAioXp8XmemZmVqBUJ/ULgE7m1y0bAcxHxpuoWMzNrr4Z16JLOArYAxkl6GPgmMBogIk4ALgG2BWYBLwF7tytYMzMbWMOEHhF7NFgewOdaFpGZmS0Q3ylqZlYRTuhmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFeGEbmZWEU0ldElbS/qzpFmSDqqzfC9JsyXdmh/7tj5UMzMbTDNjio4EfgJsBTwM3Czpwoi4q2bVcyJivzbEaGZmTWjmCH0DYFZE3BcRrwJnAzu2NywzMxuqZhL6CsBDhemH87xaO0uaKelcSSvW25CkKZKmSZo2e/bsBQjXzMwG0qqLohcBkyJiLeAK4LR6K0XE1IiYHBGT+/r6WlS0mZlBcwn9EaB4xD0+z3tDRDwVEa/kyROB9VoTnpmZNauZhH4zsKqklSQtAuwOXFhcQdJyhckdgLtbF6KZmTWjYSuXiJgraT/gMmAkcHJE3CnpCGBaRFwI7C9pB2Au8DSwVxtjNjOzOhomdICIuAS4pGbeoYXnBwMHtzY0MzMbCt8pamZWEU7oZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXhhG5mVhFO6GZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXhhG5mVhFNJXRJW0v6s6RZkg6qs3xRSefk5TdKmtTqQM3MbHANE7qkkcBPgG2AdwN7SHp3zWr7AM9ExCrAD4HvtjpQMzMbXDNH6BsAsyLivoh4FTgb2LFmnR2B0/Lzc4EtJal1YZqZWSOKiMFXkHYBto6IffP0nsCGEbFfYZ078joP5+l78zpP1mxrCjAlT/4L8OeFjH8c8GTDtdqrG2KA7oijG2KA7oijG2KA7oijG2KA7oijFTFMjIi+egtGLeSGhyQipgJTW7U9SdMiYnKrtterMXRLHN0QQ7fE0Q0xdEsc3RBDt8TR7hiaqXJ5BFixMD0+z6u7jqRRwFjgqVYEaGZmzWkmod8MrCppJUmLALsDF9ascyHwyfx8F+D30agux8zMWqphlUtEzJW0H3AZMBI4OSLulHQEMC0iLgROAn4uaRbwNCnpl6Fl1TcLoRtigO6IoxtigO6IoxtigO6IoxtigO6Io60xNLwoamZmvcF3ipqZVYQTuplZRTihm5lVRKnt0K21JG0SEX9sNK/qJPUBXyV1TbFY//yIeH9J5Y8CXouIkLQisCFwb0TcUkb5Zv167ghd0v9IWkrSaElXSpot6eMllv92SSdJujRPv1vSPmWVX+PHTc6rujOBu4GVgMOB+0nNbdtO0n8ATwAP5OdXkpruni3pq2XEUIjle5I+XWf+pyUdVVIMqxWeL1qzbKOSYnibpB9JuljSdyQtVUa5jUhaVtKHJa3XtjJ6rZWLpFsjYm1JHwa2B74EXBsR7y2p/EuBU4CvRcR789HZLRHxnjLKzzFsDPwrcACpM7R+SwEfLuO9kPQCUNx5gnRL81XAVyOitBvLJE2PiPUkzYyItfK8myNi/RLKvhPYFBhD+lGZGBFPSnoLcHNErNHuGAqxTAcm194DImkEMDMi1iwhhhkRsW7t83rTbYzht8B04FpSjhgTEXu1u9w6cVwMHBQRd0haDpgBTAPeCUyNiB+1usyeO0JnXjXRdsCvIuK5kssfFxG/BF6H1E4feK3kGEYDS5LeizGFx/Oko8O2i4gxEbFU4TEWmAzcCZxQRgwF/8x/H5W0naR1gLeWVParEfFMRDxI6sTuSYCIeAl4taQY+i1a74a+iHgdKKuzPA3wvN50uywXEV+LiMsi4vPAWiWVW2uliLgjP98buCIiPkSqkvtUOwrsxTr0iyXdA7wM/GeuP/1HieW/KGlZ8tFpPo0s+0flmxGxpaQ1IuLwksseUEQ8A/wwd+BWpiMljQUOJFU5LQV8saSyF88/ICOARfJz5cdig76y9V6WtGpE/LU4U9KqpO9LGWrP2gZa1laSlmHeD8jI4nREPF1SGP8sPN8S+Fku/wVJr7ejwJ6rcgGQ9FbguYh4TdISpFOqx0oqe11S0lgTuAPoA3aJiJlllJ9juAvYl3SH7kepOfKJiBllxVJL0mhgen/VR9VJuppBElVEvK/EWLYh7ZtHkqocIJ01HQwcEBGXlBDDE6QutgXslp+Tpz8SEW8vIYb7SWfQ9c4IIiJWbncMOY6LgMuBh4GTSUfsz0panHSXfcur43ouoed6wpOAs/IRYSdiGEXq/lfAnyPinw1e0urydyENKrIpqU6uKMpo3SFppzqzlyF9ia+LiCPaHUMhlpWAzwOTKJx1RsQOZcXQLSStCXyFdMABqQrsexFxe0nlf3Kw5RFx2mDLq0TS24AjgOWAn0TE5Xn++4D1IuL7LS+zBxP6KqT6qN1IyewU4PJ2dwY2QAJ7Q0Sc387y65H0jYj4Vtnl5rJPqZkVpB42r46I35Qcy22kH/nbydc2ACLimhLKXh94qP8MUdIngJ2BB4DDSjy97xq5GnQi6ZrCs10Qz7bAVRHxsqSdOvFdLUvPJfR++cr99sDxpIuSpwDHtOsLVCeBFUVEtOUiRyOSdgA2z5NXR8TFnYijSNKDETGhxPJujIgNyyqvpuwZwAci4mlJm5OqGD4PrA2sHhGlXKTOsawKfI3UQd7RpDrbzYB7gX0jou1NOSXtC3w7l7kSMCV34Ncxko4jVT3NADYqo6VNLndV4BDgGeZ9HpsDs4B9IqL27Hrhy+zFhC5pLdJR+rakXiDPJFU/7BkRa3cytjJJ+g5piMAz86w9SE3lDulcVCDpoYhYsfGaLSvvo8CqpPrKV/rnl3EtQdJt/c1EJf0EmB0Rh+XpW8vcHyVdB5zOvIvCBwAXkZL6kWX86CmNXva+iJgtaWXgzIjYuN3l1sSwIXBfRMwuzDsU+ALwuYg4e8AXtzaO8j+PiOipB+liz5Wki4GL1iw7v4TylwWOJf3aTweOAZbt0HsxExhRmB5Jam/c6c/owZLL+w7pwtM1pHbwV5H65C+j7DuAUfn5PcDmxWUlvw+3Fp7PGmhZm2OYMdh0STHcVswNpKPj80kNGK6r8ufRi80Wd42I++otiIhB67lb5GzSDQs75+mPAecAHyih7HqWJp1iQxopqhSSvjTQIlIb+TLtCqwcaRDzsp0FXCPpSVLTwD/AG9d6ym7OWmwK9/wgy9ppvKRjB5qOiP1LiGFURLySGy+cSvpcdomI1/MNX2Up/fPouYQeEfdJ2g5Yg/n77SirVcVyMf+FyCMl7VZS2bW+A9wi6SpSIt0cOKiksscMsuyYkmLodwfph+2JkssF+C7pjHE55r84P4JUl16m1STNJO0L78zPydOlNNUjtbApml53rfa6TtKVwDtIBxeb52S+BeW1x4cOfB49V4cu6QTgLcD7gBNJd0beFBGl9Kci6WjgJuCXedYuwAYR8eUyyq8Tz3JA/y3uN0VJ7fG7SW4Lvhap/5ZiHXrbmy2WdTt7MyRNHGx5RDxQViydJmlT0p26jwPnkqpKBewcJd2n0YnPoxcT+syIWAM1htgAAAxHSURBVKvwd0ng0ojYrKTyXwCWYN4p0wjgxfw8IqK0joAkbUKqi3tRqYOydUktfdr+xc0XmQYSUWJzSkn/NkAQZTRbvCUi1ml3OQtK0jjgqSjpi54T6coRcXqePpd53TAcGRG/LyGG2qaknyQ1c76XdJd1R5uS5hZ6e0TEmQ1XHuq2ezCh3xgRG0q6AdiJ1Pb5zohYpcOhlS6fwr2XdHR6Cqkt9kciom6Ca3HZB9aZvQTphqdlI6LsevSOkPQw6aJbXREx4LI2xLIRcBTpmsq3gJ8D40gHHZ+IiN+WEMOVwOcj4q48fTuwF2nfOCQiti4hhq5oSqrUy+PngBWAC4ErgP1IXVTcFhE7trrMnqtDJ/XlsjTwPVJLkyBVvZQm32S0aS77DxFxQZnlF8yNiJC0I+lOtJNUUle+EfGD/ueSxpCahO1N+vL8YKDXtUNOZD8GVgcWIbX2ebGks6WRpHrasjqeGsxxpHbPY4HfA9tExA1KXdqeBbQ9oQNL9Sfz7K8RMR3eaGZbhpGFo/DdSD0bngecJ+nWkmKA9IP6DPAnUlcdh5D2k3+PiLbE0XMJvXAqf55S95SLRYk9Lkr6KbAK6QsC8BlJW0XE58qKoeAFSQcDewKb5VO50WUVnvvU+RKppc9pwLrRme4YjgN2B35FuoHkE8C7Sir70RIvyDcyKubdXn5ERNwAEBH3SKX93ixdnKhpedb2flyykZJGReoJdUtgSmFZmTlv5cjdaks6EXgUmBARbetMsGcS+mC33ksiyrud9/2k07b+3hZPI/WX0Qm7kdrjfyoiHpM0gXTm0naSvkeq8poKvCci5pRR7kAiYpakkRHxGnCKpFtInVK1WzccmfcrNoWrbc1RVt3qPZK2i5ruHyRtD/y5pBi6pSnpG308RepI8OF2JnPooTr0brn1Pp8VfK7/wmO+kn1cpH6OSyfp7czfyqWUpntK3X++Asxl/mQhyr84fC3pPoATgcdIR0J7RTkDfby10xfZ+kl6jXSBXsDiwEv9i0hnsm0/e8tJ8zfA9aQqUYD1SAOybB8Rf2l3DDmOjZjXlPTFPO9dwJIltnLp/zxg/s+kbd+Rnkno3ULSNaQEelOetT6pudzzUG4Pf5I+Qjoiv5q0k2wGfCUizi0rhm6Qf1QfJ9Wff5FUh/zTiJjV0cCGKaWh5z5GulcE0hnsL9p9dGo9mNAlfRv4n8i9uCl1XH9gRHy9pPKLLUj6k+juwGehnKZyhVhuA7bqPypX6uXud2UcmXYLSSOB0yPiY52OxazTenEIum2i0CVnvgi3bVmF54T9PKmnx1NJdeonRMQ1ZSbzbERNFctT9OZnusBynflESYt0Mg5JS+SL0kh6l6QdlAb7GPZyM0IrQc9cFC0YKWnRiHgFQGn0j0UbvGah5fq3PfLjSVL/LYoSR6Sp47eSLmNei5vdgLaPStOF7gP+KOlC5tVZltoGnNS/z2b5jPFyUjXcbqSqh+Gumy4cV1ovJvQzgSsLF0n3JjWZa7d7SFfLt++vm5VU1riVb6LUDu1YUh3+pnn21Ij4dadi6qB782MEg/cx006KiJfyfQA/jYj/KbnNczcrdcCT4ayn6tBzEhtPutjS37vhFRFxWQll/zuprnwT0g0aZwMnRsRK7S57kJhu72/napC7gaATTShzM8nPAj8kDV5wpz8fK1tPHaHnuyIvyV+SMu56K5Z9AXCB0qDUO5I6q3+bpOOBX/ff0FGyGZLWjxJGoulmSuNo/pzcZ0huf/yJiCjz/oADSO3ef52T+cqkftmHpXzfyHeBt5GqXEpvzjoc9dQROrxxI89x3ZDEcn3prsBuEbFlB8q/h3TX6gPMa38cEbFW2bF0kqTrga9FxFV5egvg2xHxrx0NbBiTNAv4UETc3elYhpNeTOhOYtlA3XPGMOomFZhvGLjB5rWp7IsY5C7MMu9L6CaS/hgRm3Q6juGmp6pcsg92OoAushypp8kX4I3e3VYn/dgNJ/dJ+gap2gXg46SWL2X4fv67E2lAhTPy9B6km52Gq2mSzgEuYP4+6svqomNY6rkj9H6S3sb8IxY92MFwOiJfiFu30K/MCGBadMmAC2XJVV+HM6+1zx+Aw8rsKEzStIiY3GjecDFAVx2lddExXPXcEbqkHUjdsy5PGnJsInA3824zHk4UhV/kSMNs9dxnurBy4i5jrMrBLCFp5cjj3UpaidQH+LAUEXt3OobhqBe//N8CNiLd4r6OpPeRTrGHo/sk7Q8cn6c/S3lVDV0j3/T1ZWAShX06It5fYhhfBK6WdB/pus5E5u+2dViRNJ7UR31/PfofgC9ExMOdi6r6eq7Kpf80Nvdjsk4+Ki3lAli3ydVOx5K6HwjSYMUHlNXjYrfI+8IJpAGJX+uf3z+wQolxLAqslifv6b+beTiSdAXwC+a/rvGxiNiqc1FVXy8m9N8B/04a8X4cqdplfTdRG74kTY+I9TodA2kIwLM6NMhHV5F0a0Ss3WietVYvJvQlSJ3WjyD1kzEWODMinupoYCWS9F/51vIfU6fJXER0uj65VJIOI/2w/5r5W1SU1k957gd8b1L/LdNIY7xeHr32BWsRpbFFT2FeP0N7AHt34n6N4aTnEnqRSh7RvFtI+lBEXKQ0mvmbREQZfdt0DUl/qzM7ImLlDsQygtQT5/Gk6p9TgGO6ZRCMsuR7JH4MbEw66Lge2H84tkYrU88kdHXBiOZmg5G0FukofVvgMlJHcpsCe7qqwcrQSwl9GvNGNJ9KzYjmEbFORwMsUe4mdkDD8e7E3J/Lu5n/3oTTSyx/OvAsqR79vOIFUUnnx/yDJVeWpEMHWRwxb5B3a4NeSuhvXFCRdHdErF5YdsswS+izgYdI9ZM3UtPfdAcG2ugoSd8EtiAl9EuAbYDrImKXEmN4ow36cCbpwDqzlwD2AZaNiCVLDmlY6aV26N0wonm3eAewFelC00dJ/U2fVXLvgt1kF+C9wC0RsXceOPuMBq9ptackHQ1snqevAY6IiDJHme+4iPhB/3NJY4AvkKqhzibdEGht1EvDlb1X0vOSXgDWys/7p4dVn9MR8VpE/DYiPkm6yWoW6aaW/TocWqe8HBGvA3NzfzZPACuWHMPJwAvAR/LjedIF0WFH0lslHQnMJB00rhsRXx1u90d0Qs8coUfEyE7H0E3yTSzbkY7SJ5FuMBqOoxVB6ghqaeBnpJuL5gB/KjmGd0bEzoXpw4fjiEWSvkfqqGwq8J5ODDYynPVMHbrNI+l0YE1SffHZEXFHh0PqGEl9pNvsZ0XEs5ImAUtFxMyS4/gT8JWIuC5PbwJ8PyI2LjOOTpP0OulegLnMXxXqAS5K4ITeg/KXpn8w5GH7pZG0L/Bt0niiKwFTImLQFkBtjGVt0ti2Y0mfw9PAXhFxWyfiseHJCd16lqQ7gPdFxOw85NuZnT4iznX4RMTznYzDhqeeqUM3q+PViJgNEBH35esKpZL0pQHmAxARR5cakA1rTujWy8ZLOnag6ZL6tBkzyDKf/lqpnNCtl32lZrrU7nIBIuLwgZZJOqDMWMxch27WJpIejIgJnY7Dho9eurHIrNeo8SpmreOEbtY+Pv21UrkO3Wwh5K4n6iVuAYuXHI4Nc65Dt56VbzOfFRH/WzP/08BKEXFQZyIz6wwndOtZuQ/yybUjVuVRg2ZGxJqdicysM1yHbr1s0XrDD+aeF31B0oYdJ3TrZS9LWrV2Zp5X22e+WeX5oqj1skOBS3Pf2/03FU0GDgZ8U48NO65Dt56WxxL9Cqk7YYA7ge9FxO2di8qsM5zQzcwqwnXo1rMkrSrpVElHSxov6VJJcyTdJmn9TsdnVjYndOtlpwDXA38HbiSN6zkO+DJwXAfjMusIV7lYz5J0a0SsnZ/PiohV6i0zGy58hG697PXC89oRgl7HbJjxEbr1LEkvAbNINxG9Mz8nT68cEUt0KjazTnA7dOtlq3c6ALNu4iN0qxRJ44Cn6nUJYFZ1rkO3niVpI0lXSzpf0jqS7gDuAB6XtHWn4zMrm4/QrWdJmgYcAowFpgLbRMQNklYDzoqIdToaoFnJfIRuvWxURFweEb8CHouIGwAi4p4Ox2XWEU7o1suKTRNre1f0qacNO65ysZ4l6TXgReYN9/ZS/yJgsYgY3anYzDrBCd3MrCJc5WJmVhFO6GZmFeGEbmZWEU7oZmYV4YRuZlYRTuhmZhXx/wFGZBr7R2bbIQAAAABJRU5ErkJggg==\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"id":"ks6NDXg7RXG3","colab":{"base_uri":"https://localhost:8080/","height":363},"executionInfo":{"status":"ok","timestamp":1609628227683,"user_tz":-60,"elapsed":188986,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"b6278d5d-ece7-4e76-e31f-ed2467c3bc81"},"source":["ner_type_to_viz = 'LOC'\n","ner_df[ner_df.entities_confidence == ner_type_to_viz]['entities'].value_counts().plot.bar(title='Most often occuring LOC labeled tokens in the dataset')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":6},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]}]} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"NLU_ner_ONTO_18class_example.ipynb","provenance":[{"file_id":"1CYzHfQyFCdvIOVO2Z5aggVI9c0hDEOrw","timestamp":1599267946314}],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"NYQRU3pRO146"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/component_examples/named_entity_recognition_(NER)/NLU_ner_ONTO_18class_example.ipynb.ipynb)\n","\n","# Named-entity recognition with Deep Learning ONTO NOTES\n","\n","Named-Entity recognition is a well-known technique in information extraction it is also known as entity identification, entity chunking and entity extraction. Knowing the relevant tags for each article help in automatically categorizing the articles in defined hierarchies and enable smooth content discovery. This pipeline is based on NerDLApproach annotator with Char CNN - BiLSTM and GloVe Embeddings on the OntoNotes corpus and supports the identification of 18 entities.\n","\n","\n","Following NER classes can be detected by this model\n","\n","\n","\n","\n","|Type | \tDescription |\n","|------|--------------|\n","| PERSON | \tPeople, including fictional like **Harry Potter** |\n","| NORP | \tNationalities or religious or political groups like the **Germans** |\n","| FAC | \tBuildings, airports, highways, bridges, etc. like **New York Airport** |\n","| ORG | \tCompanies, agencies, institutions, etc. like **Microsoft** |\n","| GPE | \tCountries, cities, states. like **Germany** |\n","| LOC | \tNon-GPE locations, mountain ranges, bodies of water. Like the **Sahara desert**|\n","| PRODUCT | \tObjects, vehicles, foods, etc. (Not services.) like **playstation** |\n","| EVENT | \tNamed hurricanes, battles, wars, sports events, etc. like **hurricane Katrina**|\n","| WORK_OF_ART | \tTitles of books, songs, etc. Like **Mona Lisa** |\n","| LAW | \tNamed documents made into laws. Like : **Declaration of Independence** |\n","| LANGUAGE | \tAny named language. Like **Turkish**|\n","| DATE | \tAbsolute or relative dates or periods. Like every second **friday**|\n","| TIME | \tTimes smaller than a day. Like **every minute**|\n","| PERCENT | \tPercentage, including ”%“. Like **55%** of workers enjoy their work |\n","| MONEY | \tMonetary values, including unit. Like **50$** for those pants |\n","| QUANTITY | \tMeasurements, as of weight or distance. Like this person weights **50kg** |\n","| ORDINAL | \t“first”, “second”, etc. Like David placed **first** in the tournament |\n","| CARDINAL | \tNumerals that do not fall under another type. Like **hundreds** of models are avaiable in NLU |"]},{"cell_type":"code","metadata":{"id":"M2-GiYL6xurJ","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614375794950,"user_tz":-60,"elapsed":58377,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"b399183e-704a-42b1-e16f-697ab6795204"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n"," \n","! pip install nlu pyspark==2.4.7 > /dev/null\n","\n"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Looking in indexes: https://test.pypi.org/simple/, https://pypi.org/simple\n","Collecting nlu_test==1.1.3rc2\n","\u001b[?25l Downloading https://test-files.pythonhosted.org/packages/5c/84/241410ba610c9281afc8e1cffaa352f5ca83fe6e2574f1cfcdf3334dc81f/nlu_test-1.1.3rc2-py3-none-any.whl (158kB)\n","\u001b[K |████████████████████████████████| 163kB 5.7MB/s \n","\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from nlu_test==1.1.3rc2) (1.19.5)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from nlu_test==1.1.3rc2) (1.1.5)\n","Collecting dataclasses\n"," Downloading https://files.pythonhosted.org/packages/26/2f/1095cdc2868052dd1e64520f7c0d5c8c550ad297e944e641dbf1ffbb9a5d/dataclasses-0.6-py3-none-any.whl\n","Requirement already satisfied: pyarrow>=0.16.0 in /usr/local/lib/python3.7/dist-packages (from nlu_test==1.1.3rc2) (3.0.0)\n","Collecting spark-nlp<2.8,>=2.7.1\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/8d/a5/a5130215b43f3bd0e98bd16c471d36dafeab8855ca17789d4927337fa7dc/spark_nlp-2.7.4-py2.py3-none-any.whl (139kB)\n","\u001b[K |████████████████████████████████| 143kB 7.6MB/s \n","\u001b[?25hRequirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->nlu_test==1.1.3rc2) (2.8.1)\n","Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->nlu_test==1.1.3rc2) (2018.9)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->nlu_test==1.1.3rc2) (1.15.0)\n","Installing collected packages: dataclasses, spark-nlp, nlu-test\n","Successfully installed dataclasses-0.6 nlu-test-1.1.3rc2 spark-nlp-2.7.4\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"Gph8XOL1Pzpl"},"source":["# NLU makes NER easy. \n","\n","You just need to load the NER model via ner.load() and predict on some dataset. \n","It could be a pandas dataframe with a column named text or just an array of strings."]},{"cell_type":"code","metadata":{"id":"pmpZSNvGlyZQ","colab":{"base_uri":"https://localhost:8080/","height":486},"executionInfo":{"status":"ok","timestamp":1614375848849,"user_tz":-60,"elapsed":112266,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"e10d0a3e-7aed-4f50-813b-6d889365fd2f"},"source":["import nlu \n","\n","example_text = ['People, including fictional like Harry Potter.',\n","'Nationalities or religious or political groups like Germans.',\n","'Buildings, airports, highways, bridges, etc. like New York Airport',\n","'Companies, agencies, institutions, etc. like Microsoft',\n","'Countries, cities, states. like Germany',\n","'Non-GPE locations, mountain ranges, bodies of water. Like Sahara Destert',\n","'Objects, vehicles, foods, etc. (Not services.) Like the a or playstation or Playstation',\n","'Named hurricanes, battles, wars, sports events, etc. like hurricane Katrina',\n","'Titles of books, songs, etc. Like the Mona Lisa',\n","'Named documents made into laws. Like the Declaration of Independence',\n","'Any named language. Like English',\n","'Absolute or relative dates or periods. Like every second friday',\n","'Times smaller than a day. Like every minute',\n","'Percentage, including ”%“. Like 55% of workers enjoy their work',\n","'Monetary values, including unit. Like 50$ for those pants',\n","'Measurements, as of weight or distance. Like this person weights 50kg',\n","'“first”, “second”, etc. Like David place first in the tournament',\n","'Numerals that do not fall under another type. Like hundreds of models are avaiable in NLU',]\n","nlu.load('ner.onto').predict(example_text)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["onto_recognize_entities_sm download started this may take some time.\n","Approx size to download 159 MB\n","[OK!]\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
embeddingsentitiestokenentities_confidencener_confidence
origin_index
0[0.2901900112628937, 0.8049700260162354, 0.311...[Harry Potter]People[PERSON]0.999600
0[-0.10767000168561935, 0.11052999645471573, 0....[Harry Potter],[PERSON]0.995300
0[0.013899999670684338, 0.39162999391555786, 0....[Harry Potter]including[PERSON]0.953400
0[-0.20130999386310577, -0.14045000076293945, 0...[Harry Potter]fictional[PERSON]0.883800
0[-0.2687000036239624, 0.817080020904541, 0.698...[Harry Potter]like[PERSON]0.965100
..................
17[-0.453359991312027, 0.5522699952125549, 0.218...[hundreds, NLU]models[CARDINAL, ORG]0.681000
17[-0.5153300166130066, 0.8318600058555603, 0.22...[hundreds, NLU]are[CARDINAL, ORG]0.920500
17[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...[hundreds, NLU]avaiable[CARDINAL, ORG]0.841900
17[0.08570300042629242, -0.22201000154018402, 0....[hundreds, NLU]in[CARDINAL, ORG]0.973100
17[0.4970400035381317, -0.013454999774694443, 0....[hundreds, NLU]NLU[CARDINAL, ORG]0.936900
\n","

217 rows × 5 columns

\n","
"],"text/plain":[" embeddings ... ner_confidence\n","origin_index ... \n","0 [0.2901900112628937, 0.8049700260162354, 0.311... ... 0.999600\n","0 [-0.10767000168561935, 0.11052999645471573, 0.... ... 0.995300\n","0 [0.013899999670684338, 0.39162999391555786, 0.... ... 0.953400\n","0 [-0.20130999386310577, -0.14045000076293945, 0... ... 0.883800\n","0 [-0.2687000036239624, 0.817080020904541, 0.698... ... 0.965100\n","... ... ... ...\n","17 [-0.453359991312027, 0.5522699952125549, 0.218... ... 0.681000\n","17 [-0.5153300166130066, 0.8318600058555603, 0.22... ... 0.920500\n","17 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... ... 0.841900\n","17 [0.08570300042629242, -0.22201000154018402, 0.... ... 0.973100\n","17 [0.4970400035381317, -0.013454999774694443, 0.... ... 0.936900\n","\n","[217 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"code","metadata":{"id":"qgGdEUgkMika","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614375858341,"user_tz":-60,"elapsed":121752,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"c5753103-8853-46a2-dca7-41686799c2ef"},"source":["text = [\"Barclays misled shareholders and the public about one of the biggest investments in the bank's history, a BBC Panorama investigation has found.\",\n","\"The bank announced in 2008 that Manchester City owner Sheikh Mansour had agreed to invest more than £3bn.\",\n","\"But the BBC found that the money, which helped Barclays avoid a bailout by British taxpayers, actually came from the Abu Dhabi government.\",\n","\"Barclays said the mistake in its accounts was 'a drafting error'.\",\n","\"Unlike RBS and Lloyds TSB, Barclays narrowly avoided having to request a government bailout late in 2008 after it was rescued by £7bn worth of new investment, most of which came from the Gulf states of Qatar and Abu Dhabi.\",\n","\"The S&P 500's price to earnings multiple is 71% higher than Apple's, and if Apple were simply valued at the same multiple, its share price would be $840, which is 52% higher than its current price.\",\n","\"Alice has a cat named Alice and also a dog named Alice and also a parrot named Alice, it is her favorite name!\"\n","] + example_text\n","ner_df = nlu.load('ner.onto').predict(text, output_level='chunk')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["onto_recognize_entities_sm download started this may take some time.\n","Approx size to download 159 MB\n","[OK!]\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"STc7iOwtljGo"},"source":["## Lets explore our data which the predicted NER tags and visalize them! \n","\n","We specify [1:] so we dont se the count for the O-tag wich is the most common, since most words in a sentence are not named entities and thus not part of a chunk"]},{"cell_type":"code","metadata":{"id":"UDSAYjadlfdK","colab":{"base_uri":"https://localhost:8080/","height":381},"executionInfo":{"status":"ok","timestamp":1614375859096,"user_tz":-60,"elapsed":122501,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"51553f59-5c76-4946-ebbe-3f0acab5a246"},"source":["ner_df['entities'].value_counts()[1:].plot.bar(title='Occurence of Named Entity tokens in dataset')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":4},{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAXUAAAFcCAYAAAA3Xi42AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOydd9gdRfXHP98UAtJLpIUkVFGQGpqgIojSBAuoWGgiWBBUfiqgAiJKUUERAelFpAkiCChIR6QkIRBCQAKEJiX0IqDA+f1xZvPO3Xfvvfu+703ecD2f59nn3t2ZnZ3dnT0zc+bMGZkZQRAEQXcwZLAzEARBEHSOEOpBEARdRAj1IAiCLiKEehAEQRcRQj0IgqCLCKEeBEHQRYRQD9oi6ROSHpH0sqQ1Bzs/fUHSxpIenU3XGp2e0dDZcb2K65ukFQbp2h2998G8l7c7/9NCXdLOkiZL+rekJyQdJ2mhwc7XHMjPgT3NbD4zu70cmD7AyZKGZMcOkXTa7Mxkf0h5fyUJpGL7bs1zp0v6cLFvZg+nZ/RmCr9W0m4DzNvbQrCV7312IWlsek7DuuE6neB/VqhL2gc4HPgOsCCwPjAGuFLSXLMpD3N8AUmMAaa0ibMU8NnZkJdZwepJIBXbEYOdoSDoN2b2P7cBCwAvA58uHZ8PmAHsmvaHAvsD9wMvAROAZVLYKsCVwLPAk8D+6fhpwCFZmhsDj2b704HvAXcCrwPD8ArlJuB54A5g4yz+tcCPgb+nPFwBLJaFb5Sd+wiwczo+Am9hP5zydzwwT5PnMQT4AfAQ8BRwBl7RjUjPyYBXgPubnG/pnu4DhqVjhwCnZXHOB54AXgCuB1bJwk4DjgUuT9f7O7AE8EvgOeAeYM0s/lLABeldPQjslYXNk9J7Drgbr7Qfrcp3lvcVmoQdBJyXnsdLeMU2LoWdCbwFvJry/F1gbEpvGPAT4E3gtRR+DPAb4Bela1wMfKvi2tdnz/1l4DPp+JeBaXi5uxhYqupeUrl4pChLwK7A1PRc/gqMKZ33lfT+nk/5VApbAbguvbengXObPKuZ916n3Fac/x3gceBfKa/5vWwF3A68mO7poOy8h1Pcl9O2AbA8cDXwTMrzWcBC2TnfAx5L+boX2DT7DvbFv/dn0rtfpNl1BluONX2Wg52BQblp2Bx4oyiApbDTgbOzgjYZeBcgYHVgUWD+VAD3AeZO++ulc06jvVCfBCyDC6ClUwHaMhWqzdL+yOzjuB9YKcW/FjgshY1JBXMHYHjK2xop7Cj8o18k5e8S4NAmz2NXXFAsh1dsFwJnZuFNBV8WviJe6e2WjpWF+q4pHyNwYT0pCzstfXxrp+d5NS6sd8Qr1kOAa1LcIek6BwBzpTw/AHw0hR8G3JDuexngLgYm1F9L72YocChwc+ldfjjbH0tvwbZbFr4uLrSGpP3FgH8Di9fJG7BJek5rpef4a+D6cny8fD8CrJuOb5ve77vxCucHwE2l8/4MLASMxivLzVPY2cD303OfG9ioSV6r7r2y3Db5Hp8EVgXmBX5Po1DfGHhvysNqKe7Hq66bjq2Af0cjgJF4BfnLFPau9GyWys5fPv3fG7gZGJXO/S09sqDXdebUbdAzMCg3DV8AnmgSdhhwZfp/L7BtRZwdgNubnH8a7YX6rtn+98gEaDr2V2Cn9P9a4AdZ2NeAv6T/+wF/rMiD8Bbe8tmxDYAHm+T5KuBr2f67gP9mH2gdob4CLvwewoVtg1AvxV8onbNg9sxOzMK/AUzN9t8LPJ/+rwc8XEpvP+DU9P8BkkBK+7vTXqi/iLdQi62oIA4C/pbFfQ/wauld1hbq6dhUYLP0f0/gsnbPNds/GTgi258vvaexWfz90jtYNYt3OfClbH8IXpmMyc7bKAs/D9g3/T8DOAEY1eabqrr3ynJbce4pZAIfrwhaVba/BI6qum6T+B8nfa+pnD4FfBgYXvFuNs32l0zPd1id68wp2/+qTv1pYLEmOu0lUzh4S+/+ijjNjtflkez/GGB7Sc8XG951XjKL80T2/9/4x9wqHyOBdwATsjT/ko5XsRQuCAoewgvy4jXvBwAzuwx4FNgjPy5pqKTDJN0v6UVcGIK3VAuezP6/WrFf3PMYYKnS89o/y+tSND7f/L6asZaZLZRtf83Cys9+7gGOhZyONypIv2f24dyG92RmL+O9uqWzON8EzjOzu7JjY4BfZc/rWbziz89rVsa+m+LeKmmKpF37kN9maZZp+c4krSfpGkkzJL2Aq4ryskMp/uKSzpH0WCpvvyvim9k0/BkdBDyV4i2VTh0D/DF7TlNxFVqfvoPB5n9VqP8D12d/Mj8oaT5gC7zlCl7Qlq84/xG821/FK7hALViiIo6V0jqzJFTmNbPD2t9G0/w9jQvCVbI0FzSzZh/Vv/ACXTAaV089WR29Jd/HhWz+DD6HqwA+jOvqx6bj6kf6j+A9jvx5zW9mW6bwx/HKrmB0P65RF+tH+O+AbSWtjqtDLurD9Rrek6R5cZXbY1mc7YGPS9o7O/YIsEfpmc1jZje1u6CZPWFmXzazpfDK+thZYJHT7p39HlclLmNmC+LjQ0XZqXrGP03H32tmC+CV58yyZma/N7ON8GdpuMEE+HPaovSc5jazx5pcZ47kf1Kom9kLwI+AX0vaXNJwSWPxbuej9LSeTgJ+LGlFOatJWhTXPy4p6ZuSRkiaX9J66ZxJwJaSFpG0BN4qaMXvgI9J+mhq0c6dbKtH1biVs4APS/q0pGGSFpW0hpm9BZwIHCXpnQCSlpb00SbpnA18S9KyqWL7KT4g9kaNPDRgZtfieuydssPz45XoM7iw/2lf0824FXhJ0vckzZOe2aqS1knh5wH7SVo4PcNvDOBa7XiS5pV7ZbiZPQrchpexC8zs1T6cfzawi6Q1JI3An+MtZjY9i/MvYFNgb0lfTceOx5/JKgCSFpS0fbubS3G3z8ric7hwe6vOuX3gPGBnSe+R9A7gwFL4/MCzZvaapHXxRkLBjJSf5UrxXwZekLQ0PjYGgKR3SdokPb/X8MZPcT/HAz+RNCbFHSlp2xbXmSP5nxTqAOZma/vjFiIvArfgNfWmZvZ6inYkXuCuSHFOxi1IXsIHYj6GdzHvAz6UzjkTt2CZns47t00+HsFbsfvjBecRvBC2fTdm9jCux94H71JPwgdzwXX104CbUxf0b7iuvIpTUr6vxwcoX2NgwvAH+EBlwRl4l/ox3CLl5v4mbG4HvTWwBp7Xp/HKd8EU5UfpWg/iz7+OeuOOkp36L2tm51DgB6m7/n8V4b8CtpP0nKSjs+On4+ME7fJ2EHB6Sv/TZvY34Ie45c/jeC+tlxlpKhebAvtK2s3M/oi3Rs9JZeEuvEdah3WAWyS9jLeW9zazB2qeWwszuxzXk1+Nl9mrS1G+Bhws6SV8gPy87Nx/45ZGf0/PaX28DKyFW+xcig/8F4zAx82exr/dd+LjEODv62LginStm/ExnGbXmSMpzJaCIJhNSPoA3kMbY/EBBh3mf7alHgSDgaThuOncSSHQg1lBCPUgmE1IejduMrkkrm4Igo4T6pcgCIIuIlrqQRAEXUQI9SAIgi5i0LwELrbYYjZ27NjBunwQBMHbkgkTJjxtZs1mhw+eUB87dizjx48frMsHQRC8LZHU0vVFqF+CIAi6iBDqQRAEXUQI9SAIgi4ihHoQBEEXEUI9CIKgi6gt1JOL09sl/bkibISkcyVNk3RLcmMbBEEQzGb60lLfG18JpIovAc+Z2Qr42piHN4kXBEEQzEJqCfXkJH8r3G91FdviPqIB/gBsKqk/q9oEQRAEA6Du5KNf4msVzt8kfGnSGoNm9kZaR3BRetb6BEDS7vhCwIwe7StWjd330oaEph+2Vc0sBUEQBGXattQlbQ08ZWYTBnoxMzvBzMaZ2biRI5vOcg2CIAj6SR31y4bANpKmA+cAm0j6XSnOY6SFY9NK6wvi61EGQRAEs5E662DuZ2ajzGwsvh7i1Wb2hVK0i+lZaHi7FCcctQdBEMxm+u3QS9LBwHgzuxhfkPlMSdPwBZB7LYYbBEEQzHr6JNTN7Frg2vT/gOz4a8D2ncxYEARB0HdiRmkQBEEXEUI9CIKgiwihHgRB0EWEUA+CIOgiQqgHQRB0ESHUgyAIuogQ6kEQBF1ECPUgCIIuIoR6EARBFxFCPQiCoIsIoR4EQdBFhFAPgiDoIkKoB0EQdBEh1IMgCLqIEOpBEARdRAj1IAiCLqLOwtNzS7pV0h2Spkj6UUWcnSXNkDQpbbvNmuwGQRAEraiz8tHrwCZm9rKk4cCNki43s5tL8c41sz07n8UgCIKgLm2FelpA+uW0Ozxtsah0EATBHEgtnbqkoZImAU8BV5rZLRXRPiXpTkl/kLRMR3MZBEEQ1KKWUDezN81sDWAUsK6kVUtRLgHGmtlqwJXA6VXpSNpd0nhJ42fMmDGQfAdBEAQV9Mn6xcyeB64BNi8df8bMXk+7JwFrNzn/BDMbZ2bjRo4c2Z/8BkEQBC2oY/0yUtJC6f88wGbAPaU4S2a72wBTO5nJIAiCoB51rF+WBE6XNBSvBM4zsz9LOhgYb2YXA3tJ2gZ4A3gW2HlWZTgIgiBoTh3rlzuBNSuOH5D93w/Yr7NZC4IgCPpKzCgNgiDoIkKoB0EQdBEh1IMgCLqIEOpBEARdRAj1IAiCLiKEehAEQRcRQj0IgqCLCKEeBEHQRYRQD4Ig6CJCqAdBEHQRIdSDIAi6iBDqQRAEXUQI9SAIgi4ihHoQBEEXEUI9CIKgiwihHgRB0EWEUA+CIOgi6qxROrekWyXdIWmKpB9VxBkh6VxJ0yTdImnsrMhsEARB0Jo6LfXXgU3MbHVgDWBzSeuX4nwJeM7MVgCOAg7vbDaDIAiCOrQV6ua8nHaHp81K0bYFTk///wBsKkkdy2UQBEFQi1o6dUlDJU0CngKuNLNbSlGWBh4BMLM3gBeARTuZ0SAIgqA9tYS6mb1pZmsAo4B1Ja3an4tJ2l3SeEnjZ8yY0Z8kgiAIghb0yfrFzJ4HrgE2LwU9BiwDIGkYsCDwTMX5J5jZODMbN3LkyP7lOAiCIGhKHeuXkZIWSv/nATYD7ilFuxjYKf3fDrjazMp69yAIgmAWM6xGnCWB0yUNxSuB88zsz5IOBsab2cXAycCZkqYBzwKfnWU5DoIgCJrSVqib2Z3AmhXHD8j+vwZs39msBUEQBH0lZpQGQRB0ESHUgyAIuogQ6kEQBF1ECPUgCIIuIoR6EARBFxFCPQiCoIsIoR4EQdBFhFAPgiDoIkKoB0EQdBEh1IMgCLqIEOpBEARdRAj1IAiCLiKEehAEQRcRQj0IgqCLCKEeBEHQRYRQD4Ig6CJCqAdBEHQRddYoXUbSNZLuljRF0t4VcTaW9IKkSWk7oCqtIAiCYNZSZ43SN4B9zGyipPmBCZKuNLO7S/FuMLOtO5/FIAiCoC5tW+pm9riZTUz/XwKmAkvP6owFQRAEfadPOnVJY/FFqG+pCN5A0h2SLpe0SgfyFgRBEPSROuoXACTNB1wAfNPMXiwFTwTGmNnLkrYELgJWrEhjd2B3gNGjR/c700EQBEE1tVrqkobjAv0sM7uwHG5mL5rZy+n/ZcBwSYtVxDvBzMaZ2biRI0cOMOtBEARBmTrWLwJOBqaa2ZFN4iyR4iFp3ZTuM53MaBAEQdCeOuqXDYEvApMlTUrH9gdGA5jZ8cB2wFclvQG8CnzWzGwW5DcIgiBoQVuhbmY3AmoT5xjgmE5lKgiCIOgfMaM0CIKgiwihHgRB0EWEUA+CIOgiQqgHQRB0ESHUgyAIuogQ6kEQBF1ECPUgCIIuIoR6EARBFxFCPQiCoIsIoR4EQdBFhFAPgiDoIkKoB0EQdBEh1IMgCLqIEOpBEARdRAj1IAiCLiKEehAEQRcRQj0IgqCLqLNG6TKSrpF0t6QpkvauiCNJR0uaJulOSWvNmuwGQRAEraizRukbwD5mNlHS/MAESVea2d1ZnC2AFdO2HnBc+g2CIAhmI21b6mb2uJlNTP9fAqYCS5eibQucYc7NwEKSlux4boMgCIKW9EmnLmkssCZwSyloaeCRbP9Regv+IAiCYBZTR/0CgKT5gAuAb5rZi/25mKTdgd0BRo8eXeucsfte2rA//bCt+hxnVocHQRDMKdRqqUsajgv0s8zswooojwHLZPuj0rEGzOwEMxtnZuNGjhzZn/wGQRAELahj/SLgZGCqmR3ZJNrFwI7JCmZ94AUze7yD+QyCIAhqUEf9siHwRWCypEnp2P7AaAAzOx64DNgSmAb8G9il81kNgiAI2tFWqJvZjYDaxDHg653KVBAEQdA/YkZpEARBFxFCPQiCoIsIoR4EQdBFhFAPgiDoIkKoB0EQdBEh1IMgCLqIEOpBEARdRAj1IAiCLiKEehAEQRcRQj0IgqCLCKEeBEHQRYRQD4Ig6CJCqAdBEHQRIdSDIAi6iBDqQRAEXUQI9SAIgi4ihHoQBEEXUWeN0lMkPSXpribhG0t6QdKktB3Q+WwGQRAEdaizRulpwDHAGS3i3GBmW3ckR0EQBEG/adtSN7PrgWdnQ16CIAiCAdIpnfoGku6QdLmkVTqUZhAEQdBH6qhf2jERGGNmL0vaErgIWLEqoqTdgd0BRo8e3YFLB0EQBDkDbqmb2Ytm9nL6fxkwXNJiTeKeYGbjzGzcyJEjB3rpIAiCoMSAhbqkJSQp/V83pfnMQNMNgiAI+k5b9Yuks4GNgcUkPQocCAwHMLPjge2Ar0p6A3gV+KyZ2SzLcRAEQdCUtkLdzHZoE34MbvIYBEEQDDIxozQIgqCLCKEeBEHQRYRQD4Ig6CJCqAdBEHQRIdSDIAi6iBDqQRAEXUQI9SAIgi4ihHoQBEEXEUI9CIKgiwihHgRB0EWEUA+CIOgiQqgHQRB0ESHUgyAIuogQ6kEQBF1ECPUgCIIuIoR6EARBFxFCPQiCoItoK9QlnSLpKUl3NQmXpKMlTZN0p6S1Op/NIAiCoA51WuqnAZu3CN8CWDFtuwPHDTxbQRAEQX9oK9TN7Hrg2RZRtgXOMOdmYCFJS3Yqg0EQBEF92i48XYOlgUey/UfTscfLESXtjrfmGT16dAcuPWcwdt9LG/anH7ZVn8I7kcZgh88JeYh7mDPyEPcw+65RxWwdKDWzE8xsnJmNGzly5Oy8dBAEwf8EnRDqjwHLZPuj0rEgCIJgNtMJoX4xsGOyglkfeMHMeqlegiAIgllPW526pLOBjYHFJD0KHAgMBzCz44HLgC2BacC/gV1mVWaDIAiC1rQV6ma2Q5twA77esRwFQRAE/SZmlAZBEHQRIdSDIAi6iBDqQRAEXUQI9SAIgi4ihHoQBEEXEUI9CIKgiwihHgRB0EWEUA+CIOgiQqgHQRB0ESHUgyAIuogQ6kEQBF1ECPUgCIIuIoR6EARBFxFCPQiCoIsIoR4EQdBFhFAPgiDoIkKoB0EQdBG1hLqkzSXdK2mapH0rwneWNEPSpLTt1vmsBkEQBO2os0bpUOA3wGbAo8Btki42s7tLUc81sz1nQR6DIAiCmtRpqa8LTDOzB8zsP8A5wLazNltBEARBf6gj1JcGHsn2H03HynxK0p2S/iBpmaqEJO0uabyk8TNmzOhHdoMgCIJWdGqg9BJgrJmtBlwJnF4VycxOMLNxZjZu5MiRHbp0EARBUFBHqD8G5C3vUenYTMzsGTN7Pe2eBKzdmewFQRAEfaGOUL8NWFHSspLmAj4LXJxHkLRktrsNMLVzWQyCIAjq0tb6xczekLQn8FdgKHCKmU2RdDAw3swuBvaStA3wBvAssPMszHMQBEHQhLZCHcDMLgMuKx07IPu/H7BfZ7MWBEEQ9JWYURoEQdBFhFAPgiDoIkKoB0EQdBEh1IMgCLqIEOpBEARdRAj1IAiCLiKEehAEQRcRQj0IgqCLCKEeBEHQRYRQD4Ig6CJCqAdBEHQRIdSDIAi6iBDqQRAEXUQI9SAIgi4ihHoQBEEXEUI9CIKgiwihHgRB0EXUEuqSNpd0r6RpkvatCB8h6dwUfouksZ3OaBAEQdCetkJd0lDgN8AWwHuAHSS9pxTtS8BzZrYCcBRweKczGgRBELSnTkt9XWCamT1gZv8BzgG2LcXZFjg9/f8DsKkkdS6bQRAEQR1kZq0jSNsBm5vZbmn/i8B6ZrZnFueuFOfRtH9/ivN0Ka3dgd3T7ruAe7PgxYCG+CXahXcijcEOnxPyEPcwZ+Qh7mHOyMOceA9jzGxk09hm1nIDtgNOyva/CBxTinMXMCrbvx9YrF3apTTGDyS8E2kMdvickIe4hzkjD3EPc0Ye3g73UN7qqF8eA5bJ9kelY5VxJA0DFgSeqZF2EARB0EHqCPXbgBUlLStpLuCzwMWlOBcDO6X/2wFXW6pigiAIgtnHsHYRzOwNSXsCfwWGAqeY2RRJB+PdgouBk4EzJU0DnsUFf185YYDhnUhjsMPnhDzEPcwZeYh7mDPy8Ha4hwbaDpQGQRAEbx9iRmkQBEEXEUI9CIKgiwihHnQ9kpatcyzoP5KGSjprsPMxUCS9t0XYEEmf7sA1tq9zrL90tVCXtHedYxVxFki/i1RtKWxYMWtW0jKStpO0ZqfvYU5A0oZ1jg0g/ZGSfi7pMklXF1un0gcuqDj2h1IeLpS0laR+fROS3iHph5JOTPsrStq6P2m9HTGzN4ExyULu7cyxkm6V9DVJC+YBZvYW8N0OXGO/qmOSzoR6MqoVba1fZiWSjgAOAV4F/gKsBnzLzH4n6QPAk2Z2bxIgGwBTzexSSZcATUd4zWyb9Hcn4Fel4J2LY5IWB34KLGVmWySfNhsAnwC2Biak6+QuD0zSobh/m5cl/Rj4DjARWFPSKWY20/eNpOWBR83sdUkbp3s8A5gL2B9YAZgMHGpmL7Z5XiOBvYF5gONpMxfAzJ5N540APgWMJXvnZnZwlvb7KsLPSH9/DaxVSv7XwFqSfoa7kfhtKa97AMua2b6SVsZdSSydgh8DLjazqWn/LOBcYCvgK/h7m5GltX663rvx5zYUeMXMFmh275JGA6sA7wAWlPTJLHgBYO7SKccCuwBHSzofONXMZs54TucfDrwTLw/yRzQzD6fi5WWD7B7PB/5ckbeG92hm96XjKwKH4j6WZubPzJbLzq18T5JWNrN7UpwRZvZ6ds76ZnZzqzwADwBTzGzlcn5L520ITDKzVyR9AS8XvzKzh1Iaf5d0MfBKlr8jW6VZSn974C9m9pKkH6T0DzGziaV4Q4HFS8/h4RQ2md7y4QVgfErrmRRvUeADwMNmNiGl8f70HnYFJki6FS8LV6Z0/ibp//Dymt/js1nevgH8zsyeK+V5C2BLYGlJR2dBCwBvAGtLWgrYVdIZNMqdhmu0YlCtXyRNMrM1JBVC9NvA9cA1uM+ZYbgp5abA5cAHgdvp+VA+CSwB/C7t7wA8CdwKfA7YCLghu+QCwJtmtmm6/uX4x/h9M1s9TZy63cyadsHSeVNS2vMDU/Fpu09Legdwm5mtkt8jMA7/EC8D/oQLmyG4ELg+3fv8ZrZzm+ueAZyIF9ij8OnDRaUzGngu/V8IL6jLpvP+ghfqCcCbRXpm9osUfiawPDApCzfgbOB9wDfT9fLn+In0zCYA48rzElKL907gTPy9nAM8moJH4Wav55jZYZImmNnaku40s9XS+beZ2Trp//gU//z0LHcEVjKz/bLrHQacYWZ3S/oUcCTwFvBiul4+t+KldO2bKp7xgim/3wceSc/7d/h7/lhWEZXPG29m4yTdbmZrpmN3mNnqFXEb3mN2nzcCB6Zn/TG8khliZgek8Mr3ZGZ7SZpoZmuleDP/V+03y4OkPwHfKIRjk/u8E1gdb5ycBpwEfNrMPijpwKpzzOxHkl6iUdAaPvX9GuB7maC908xWk7QR3uD7GXCAma2X5eEb6Tk9ib/j4jkUZeeI9Hx+n8I+i1fum+MNxY0lLYk3xManZ3qCmf0yu8ZQ4OPA0XgZEt4I+0X1LTZUvIeka04ETgH+amYmaXVgDeBg4IDs/JfSc/gi8FVgObxR0NCYzK/Rkr5MP+30BtyVfk/CfccA3AFMSTf0DlxQvSOFDS/OsSbTZ9NLGgNsDPwDrwiKbS1gWBb3tvR7e3ZsUim9T+IC4hfAxyvi31GKf3tpf2L6/Q7+wYBXTOXzJlbcy1+BD2T75+Azd0cBd2bHTwS2zPa3AH5bfs4t3sNUUgVfOv4B/ON5PP0W27eBFdulnd7jP4HhFWFzAfel/zdn97sVsCZwf/k9l+65/JwnZf9vxH0LDU3laf+a5XFRvPU6Hq8EPoP3EK4F/t7m3JvwVm/xvpcHbu3je5yQfieXj7V6TxVlsvxsbq+TB7yB8RJwVbr/i/EeVVV5PgD4UrOyW/N5Lwx8Czi/nHe8x/K5JvczDVi0RbpV39LEVB4np/398UYAeOOseAar4ZXqP3HvtGul40sBD/Xh3gR8ND3nabhGYPlUJn/f5tzj+vM8i21Q1S/AnyXdg6tfvpq6hK8B85qZSZpZC6fft2gcB5hX0nJm9gDMHPya17wr+JCkDwOvmtlbklYCVsZVHQWvpC6YpfPXx1u0pP1jcfXI2enQVyRtBsyT9OdDgLnS/6JLXu7W/1fSDrhK4WPp2HDgTUkL01MbD833zbtanwZ+IOmrwA+AH+KFfR7ga9k11jezLxc7ZnZ5aq0U3CTpvWaW33vOXXiP5/HS8QPNbFNJq5jZj5qc+6qkFS2pEApSF/ZVvBAvBTxUOm9JelpZh6QW8j64EF0A/9gL/p10tZPSfT1OVg5SC3FxSQfgz2Z5XCALd1nxNUk3mtn1Te4BSX/EK4Iz8RZ58SzOTT2Fv0s6F7gImKnaMLMLi2eFqxCXkQ8Yboir+qD+e3w99XDuk0/4ewyYLwtv9p6gdyu4HFYnDz+sSLfMS5L2A74AfCDldziApGsqro2ZbVKVkLl64ii5k8CCxyT9FtgMODypDsvjHI+QfacVDJW0rpndmvK1Dl4O/5vlb1O8MYS5qqcoi7/GG5n7m9mrWV7/JekHqTf+bWC0me2eyvm7zKxBzZbk1xPAE7hqZWF8HOdKvIzMZe71diZK43XA97P/eZpzvvoFZt7IC2b2ptgJ8VgAACAASURBVKR58VrzW3i3f268lbQycDPe2n7AzL6Szt0cn231AP4BjwH2MLO/pvAJwPvxB/p33OXBf8zs8yl8Lfwlrop/MCOB7czszhR+D/BuK6peL8BT8G5fK53+h7L7ew+uJ/6HmZ2dKp5P492styjpzXqSaOjOLQf8BPgX8GMze770DP+Kq5kKNdTn8Vb2Uimfw4AV03N6nR59cNFdvQbvFt5KJrDwCm03fMbw58p5NbOJSU/4a7yrPCEFjcMHg76Z7vEY4D78YwRXFa0A7Glmf6m4/wYkjcGf+Vx42VgQONbMpmVxzgVexiuEB8zse6kiuCbd13B660EnZudvaWaXla47Uzct6dSKrJmZ7ZrFXxRYH39ON1tvL6Xt3uM6eGt8IeDH6T6PwFVBhn8bvd6TmW0j6Sm8VSi8QjunSBZXjyxeJw/tkLQEXhZuM7Mb5GMXG5vr9dfOos6Nj+O8YWZNBxclDcd7I0VZLNQkk83svqQmea+ZXZGdczJeAV9aeg5HpvB1cLVHUSG+hJfjg3Ahe0kKX9bMnpc0D94bnKk2bZHfc/FyvqOZrZrye5OZrZHF2RtXET6NVxAXmdl/iwobl0XvxntCr2TJf4MeuVKWCw0yoWUeB1OoJ6F7MnC29R5U2AC/kZvlg42fAB4G/mA+Cl3EG4ELfYB7rHGAaKKZrZV0cPOY2RFKevwszjC8gAi418z+m4X9Gfh6avkXwuUYMyta3LOUdN9fBf6DC8bl8VbWpcBvzC0OiorxQFyQg3ejf4QLgaZk9/XBJlFG4gugbISrJEqnewtM0qq4emnVFDYF+FnRM0iFeV0aB0pvy/K/LF6gx9I48FUMeLclNQh2xJ/VGekjWgEvG/tUnDIz/+n8Kr1zr2Nt8rA03rDI7+H6uu+xlNYQYD4ze7HF+ymucZ2knVrFwVVSLfOgfgxIt0PSrWa2rhoHqgsWxiugG61x0L7pIGgKb6q7T+EjzA0TFkzHX0jfyDBcn71kuucrUvwPAWub2c/VZrBaNcZOJP0Id6dS7p0i6d14o65p/gdMf3Q2ndrw1tpPcJ3TObgOSln44rgefC1g8SZpvA9vOexYbFnY7bg1ws3AKunYZFxP3mq7BK9FrwP+jfcWrsn+rwMskV1nR3wA9GhgkXTsGuBqvBKq8yy2xCsegE+m31vT/W0GXFW63lV10k3xlwdGpP8bA3sBC5XiLI4P2G4NvLMU9sMOv/evlfbvSHn6ENkYSBa+Id5t/Sfe23gAb413Ii9LAGvjLeQ1s/K2Md5IKOItl8rFDOCp9L6XzcIPB6bjQvKSogz15T3iA3sLAPMCd+MDy9/JwufFB04BVgK2IRuvwCvhceV3WzcPeMW9Av7dDMUHag9NYS/hA4aVW4qzSLYthn/P96awU0vbKfgg6FalfH4Db+FOwb/VyWTjDqW4C+AGBuXjl9I4drYE2dhEm/JwI66auROvoA8CDs7Cm46dlNIp1I6ji60iznx4xV0+/oGqrXaZ7uTHOoAPa0gqoI/hrfHjcVXJVOBvabsHF85rZeedmR7ysXgL49fA0aWHczE+ug7+YR5dUcDKhe2DbbaJ9AjvD+Bd2U/hXeY/pONj0jaq5jM4Jt3fsVmBuQNvVayIq2/y+PMAv0z/i0qoYcviTsJbKSvggvFnwGVZ+KdxnffpuLnlg7gaKr/eNsDP07Z1dnxF3AriSHzQ7XJcDXIHXvl9u2J7uvif0rilzbO5Bx/8fSc+mLko2UAZrqY4LMV7Fjf1nJqOLYRXWCcDl6f476FnkG8nvAIuLBCK7WJS5Zri3YxbJwxL2xfyfOMLvoxokv+W7zF/T+n38/jA/HBKA6m48cDSeAVyPnBWCtsNr2z+gasYtulrHqg3IP1jXAc/Py5Uv0oSeqncPJB+7wOuADaqUfYfzv63HARNccbhwn562u7AW9pF+JeBP+KCdSwuoD+S7v1UGsvqK+n8ccUzTr/NBqs/gjf2ZuCmuNOBD5XytyctKia8R3s7/s09lN7rKln4Jdl2JT5+cHVtedpXAdzpjZ7R5ntxgbseLtz/WRF3fTKrEVpbAwwFfj6L8pzn4TfAQeUPM9uv7G2k+xxZinsALpA+m/bfh0+cORtYvSIfa6ffysoni1dUEt8ls8DJ74esdY63+PJ7PBS3iNg1bVcCP01hN+KrWf1fem/b493WzYBbcGF5brq3A9P2XPE/pfG5tL9B9qzyyrud0P8r8D0ae09LpGNX4B/vp4t7woXy5FIan2pzjV6txdIzupyKVled95jFm4IL8vOL91e6RvEevwF8Nw8njQml/8vRW3C3zQOutpsLr9iPwMcvylZad1Sc1+tYH7+nR7L/15C1spu9C+D92f5G5fcDfB0XipOB99UpqynOTXgj80JcOH+C1NvI0l4Ut9LamorFgGhvnXMTWUWA9wpvahF/GeCC2s9zIC9joBteQ12VPuoR2fH7gAubnDMt+38+sGSL9G9uc/1F8YpkYsrLr2hsAa6P9xhexnWRb+LdzbuKgoe3DnNTscJMcw28dVfZ28AFaX7PR6aCNBLXMfblOX6SJq3EFH4Lbnt9F0llQKNpaFnADaGxpXInqduf9ofSYwKWmxJOK6UzCe96no+rJwrT1AdK8Q7FVQ3X0dNSvjoLPwzvXTQT+ve2uPd7aWG6Cnwh/e5Dda+iUCccDuyLt/zG4BXkoVl6F+Af829TmTqarNdY8z3uhQuby+gZ+L8hC69UJ6bfiaW0+mxmmK43N94CPzCVyeVLcW7CexJDUzn5PEkg4RXSXriVxx9wodjLnLXiunlL/WRc+O6Xv4dS/Nsr0phYem/74N/YmdmxlmU1/a6Dq0VG4a36C3HrsiJeL7Vn+RhtKib6WDGmsnB33fc42CaN21syRyxxObC8pM/QYzGxDK7/y60lFgPuls/6arAGSH9vl89uO59Gq4fCDO0cvHXyqbT/ebxV+eG0fwwVk15wIX+dpKdxs70bANLAXGFqdRpuiXNLfmNpMOpU/KW/ngZqT0vpbGdufvmOFHcBvHCPwlUHv8/SOdbMClO0j+GmYden/P/FzN7ILrsLboHzEzN7MA1MnpmF/yVZ0BSmm5/BBUvOQrhqA1zdUfBW9r88I/Yt8wGu7SVtC1wp6Sh6sz2wnJVMvDKKiSfjsmMGFAOdD0n6LnC6mT0JFLOFd8bLzzA1N12dN/3mpoM55VnFe5TyUEyAKtRevZC0uSUrnzR4dyQuPO7CZ1A/CWBmRWVQ8FAaxCv4ZrreH83XNFgOFyAAo9Q4S7Fh38z2anJ/SLrczLbA52H8CjcrLgYd96ZxVvbn0v6v0v3/PR0DOA4X7Mem/S+mY7tJ+nazy9P47B9O21xpy/NZDFpfl8wez055+Aw+1lU2DCi+8+J4y7IKYGa3pf2X8e+muPbcuOprMTWaIi9AjwFAwQPAtZIqrXOAByT9kJ5v8AvpnOJav6bHCmYIPtbTMKO2FXOCSeNW+AzLfKT54GQqVzW1/LLs3A9WpWlm16XwU6uD3QxN0l1mtmoeKGmypRml2Uh3PtPxdvyDHIfrKK8ws1dS2Ep4F3yipPvMbMUm9zwN76GsgKsJ5sNb+w/JXQn8xMw2lHQB3mu5GVd7/BefkPF62TIjmYZtgRfwjYArLS0WXgf5LMzCn8sNZvbHLGwHvLV8DV6YPwDsa2bnSvo33kIVPmhUmBkKF9TzZunMiw88rWdmH8iOXwTsbmZP1c1vKe8L463obXF1l+EmkBfjLeyx9DZd3d7M7ujP9VrkYy680ofMkkqNsz1PwnXeJ+I9rA/i4zC/ayb4rMY0+xrWL83mKAj4s5kt2cQCaKaVR4089JpBWxxrZrFSYDUsP+Smty2SqLaHz85vVVbfTe+GTM5VeKW6FI2zPV8ETjSzY7LrVN6r9VjnLIxXmhuloBtwFe5zKbx4l4bbuE+3itnPTe9zMIW6pOPx2u9DuD3ndvhI8pf6kMbiuJAlnVtbMEg6ErcKOC8d2g5Y18z+L4Vfj7faiw/xcbz192a58FekfTRecM6gd2/jQTPbUz4V+j+4APoDrg4Srt+dWGF++X3cSmYbXGiXP8DhuI3vLrjgvdrMPq1qXxgUFVUd5PbC+XN+Ih0f0+o8qzbrWswyG25J1+JjK7fRaCd/Xn+EnaT34yaUk83sCrnZ65tkpqu4Oul11fNds65fzm6TzzvYHPdDdHkWf2N8oHl6usYywE7mJo25UC+/00n4DMLfNhEGhg/Kf1NNfB5ZDdNPSW/i6q2qeREb4uMSZbca8+O9rU2zdFbCW9+Lm9tpr4YPyh4iaSJeWd6f4i6HV1h9MQsdiau2yg29TVKv4UIze6TJuQviPZmP44PqRo+l0mE09jDLrId/h5WuR8zsW+kae6UeVX7dBl87dUn5fcvMXkr72+KGFb9J+7fiDRDDx1D+0DSxPN1BFuqFn4fidz5c9bI1/nLyltfMl2NpwoTcDebP8K6X8IlG38FbiEeUujEzKbqicn8U89LTLRtCj5rGgPdSMekFn9bctvXSqrchnyDxSCYcd8Jb2ffjA4jPSpqK605zu/yd0z3OZ2Zjsut8Bh9wuRavpK7AB84ebyF4zzKzjdTbL0eDsyq1duJUdd+LAc+YmaW8HZvu/Rv4xzIC/2B3MrOrmvW4gJVbCLu85XOrma2b/u+GD5JdhFsqXIJPvqm0QVd73zXn4z2gYfgA8Xp4j2Uz3KfHT1L8CXgv6t60vxI+/2JtSY/iKhelvC1fXC/vBTZ5lt/Ee04TWvVMUwNhOUtO2CT9AR8LAJ8YdjTur+e+8vmS/oWrUA7FezwFL+FjJ29kca/Dy99vrcdO+64k4DfFVYv5ZMBdzOwa+WzfZpiZ/TildQWuQvw/Mudu5pPJXsC/z/tx1ct5pcbBX3Ez4tOz72oJvCG2iZl9pOri6V3vYGZnFb3zUvjMY016M+Vec9OKKYUXk6MKtdALeE/8aNxI4pEUbxKuYpwPdyo2s3JtifVxMKWTGz0jzjfj3ZoReJeomTXDvri6Y+bgAhVWG7hAAe8u7VTeOpDvR6keVOs1qNMijTpmkUcAH644d3OS35S0fzbeOmk6WDrA+70T/0hXT/n+OnBdClsfr0guxHV/d+G9mqdSPifhXdsNcMue9dN576bGYB4+IPetNnHyAdDb6LECWRZXXzW1Qae975rJKQ/vwLvaC6SweWg0U6uyjikGkw8sbUX+liD5H2mRh3wQce+K8L3T71XAe7Ljk3H7+w/g41Db4dPZq67x8T6UhZb+kvBveLW05YYA+1RsB+AmfS9n8QqTwjurrok3vD6CD6jOSPe2Ey4g2w2YF2NUx6Q0hDc0pgN/SvGm4pVjcd6y6Vit+QzpnCvwSXtTcfXaKcDhpe+pl/VOcZ/Z8WOy/y2NPhrO68RH398N9zWxEC7MCvXGj9u9nLzglsKGpMJ8N15J3IHPWssnRSxSOueTlBx2ZWGVk15SPnMTvYYtnVvYTk+l2na6tllk6eV/G/hIzefbbMLIS6QJIynemRXnnpn9b+rECZ+w8hF8sPM5eoT2yvhHODFL55HSNQqLg0oroyxer8kdpXSK97womZM3/GN/KW1XU2GDnq67YkWaK6Z7a+UoKxdmp+Bquo3TdiI+q3BA75FGc78qR1WFA6yyQLgw+/933CplmTbXavkeUpzLcbVi8f63o8f+f3vSZCB8tuqFZFZKWRrzp/AHSe6Ms7Cmzt3K948Pym6DN2pm4ML0uzSaDi+ONxD/hvf0T8MHu8/DGyPXAWtk8TfHB2qLsOl4+d6JGvMZUhpNK6aqclTcGyWLnFL4/c3CesWtG3FWb3gNv2D63/LlZMd+ll7+zmm7HG/dfgMXoK/TKIwfJDOnw9UCV+A66F3wWv83WXjlpJdy4WpyPy17G9Qzi7w1O/ZlvNV7IP6R7tuXj7FNXssfyzAyE6pUuPfDW71LkJk80ijYppbSuR0XpnvgXfY7cTXW0ukjuTHFazqTMYUfhbeu3k+1SeP0/P2SzFzxbuskWtigp/c7LZWf96ZtF7wi3xI3By1MMXOzzgVprLBG4IL6wrR9i55ZvPl73K3Ze2ySv4dxve4leKWZTzC7hp7ZoPe1SGMa3sX/F64z/xqlORJ13kOKsxwuIP+Nq9RuxF1PQ0/PZKOUt61onKC1CK4KehAfMF+4Ig9bp2e7akpjAmkiFRXCMDvvHXjFfjj+PT2HN6ampmOL0GimOxTvTc7dRBatnrYRpbCW8xlSnHZeR3+Jm75ujLfkj8Ublpfh/njK6e2Bq/LmXKFO+2n6LV9ORVpHpu0TpbCWLixT+rlbgiFkgokmk15aFa4sTruu4Pfxj/pP6SMqxjdWILl5pblaYd5SAW0nFBep2IbjgvolfIQ9b8U/Uzp/CVxgvT/tjya5Y6BRsPWylcYHDH+LD64tgQu7yfhU7ncX+U+/lTMZaWwZFVvbGXb4h74s7k53Aby7fVLK10eyeKvig5wT0nY67kQKms8SXayIUyMfLd8jrXtUb1DDlTQu9LequPbW6Vm3VF3UfA8zJ/SlvM9fdZ9UuM3FG2D34w2dyklaNZ7jSn2M/35czfORZuWzH3lYCJc149P2C1JjtPTMKyumFuX5GryCfCH9/0Xark3vvdJNStU2KAOlTUwNC8zMdpWvljMKr/Vezs7NbX6XBR43s9fS/jz4zU+vmY9Kh114Kwp8FuJQvOWVj25PtzZuMNOAz9+otp3ezMw+LLeXbmUWeQf+MQ/BB+XGZennDoUqTS+z8Om4cM0X0XgCHwT+Mm4fX7XEVn4/lVZGyarilZTuPHgLjrQ/t5kNT+e+YmYvp3e0D96K/pX5QG6llZEl8zhl7pWz/PQ61iLvhVndR/HBtx/g6qXaVhlN0l3YSo7oml2fGu9xgHlZARfeN9Fj07w2PpN0a3xRkCoT2B3wcZuR7d5DOu9mM1u/SR7+jLfeN8MrnFfxsrK63LXt63gllQud8qB8v5271Rgw/wk9hhB5eS2vYtXqGhfgvezT06Ev4jN0P9nu3HT+EPx7O69FnE3wQVbw1aj6tLTjoNupVyFpL/yFTMVnZu5tZn9KYbl52Hh8CvB/0v5ceCt3neqUe13nOlxQ3ZoOrYO3pArb9SrbXrM29rAp7Za20+0qhZTGdHrc8xqwYRKC8+GqizVSvHZC8UR88LVwSfwRfBzjVFywrpfyuyKNo/XXp/iVVkZW18TK1xvd2cwelvtDH4n3kjY3sw+pjWvdJhYHE8xsbWqQWVf9CrjWzP5YV5imMvZR61mZZx1LE1Sq8tUkjenUeI8tzr/R6lkpjcAn0M0UCPiCDK+1ul9J7zCzf8vNNRfAe3Fbpd/jrNHF8XG4+qzXhD7VcJvbjlQBnox/ezOtvizNPWlzbt6QuQ1fOGaGfH7EzdZmRbOa+ZtUfl/lY+0qpioLm47S1+5HJzd8NZCFsv2FcZ3bZFIXLT2Y8fSM8jddpSgdq+2Hgsau7Mb4wO0USr5TOnSvDV3BAab1DmjwEDiG3tO7V8jCJ1ekMXOaP67nnYy35K/BW1j5NP2WvmHa5HUn3MJhx+z/19P/B9LvpU3OXRmvfO6nUT23M96Cqfu8TsXHMe5Lz25+6nvta7pCVaksbl9xbq9jrd7jrNxoobrAhfcvcTXnxLT9C9gvhecDiadWbKdk4W29E7bJZ0s/P+3eFRUD5uV3VSOdhfF5Dr08JOKqkI2y/Q3p7WenndfRw3CTzWVoYsAxoHc9OwpUi4fXbBR4SunYfLj+70gaB+aupFFXtS19cEmbzlkTb4VOxwVa4fDqXbhO69K0/bz8YdDCFSq9B8dup+bgWD+f5UgqBr9S2BW4LnMMPX5Lrkwf4ERcoM9NjzXKyjRaT7T0DdMmX2Nwa6Q1cJemt6aPfUz2/x/AXBXnbpuExjM0CpGjSU6aauZhCK4OWCjtLwqsloU3NZvEB99/jruL+CQ9FcynaBz8qlxCrYPvdygl07ka59S6fnqeJ5HpyPEGwgn4WMiDNdOp7Ta3RRotnbu1OXc6LQbMa6bRroGzOi60p6ft9rwspTjtHNA9WLF1xJW02eD7fhmqxtVl5sFHnp+UtIaZTQIw18VujZuN5V2orwBnSfoN3i19FG8RtiTprXdI29P4ZAdZWrFIvkDHhXihPgHv5q6J+3P4pPWszH498P6kurgCV918Bu8CD88uuQfeQp8h6ee4Xf5hfXhOze5D+AewJy64JOkN4NeWLTpAz4dyUdov/HUMxccNzjbvohez4+6R9K7s/Dq+YSoxd33wa9wS4C3gy+ZqmNH4fIKHJd1H81Xo/yRpAzP7R/0n0zsbuLvdrfFFEualUc30ptwVQpVfms/iA88fTOcslNIRsKDarxDfEVIe75U02losDF2iavZoFVviZp0zVTvmi3N8Ff8+tlCTiXxZ/L3wAel3WVJV9ZP34nrqTcgWlabHz09TzGxsk6C3cG+LddgbV8PebK4aXBnXKCBpDdwg4bP42AFmVvYhA/Ar+YS5K2j0/TIx/S5bMy/9YrCF+lnAVdnA6S74AMTvKX0Q5rPadpQ78imO3Q+sn3STWDag2oZ7cNOura1Hb5uviXkAPsPs2uzYRUk3fCA+wAReEfxb0pdwHfARaRYYwJAk7IekeDNSHl9JgrcTfAvv/q1jZg+m+1gOOE7St8zsqHTNp/FWVBXTJD0qaSFc6F8p6TnSmqKp4jgaL+iFr4oTLPMN0w4zO07SmfiU6GIg9Rm8UgVv/d6PP6uq1Zpul/R1es/Q27UibhXH4h/2JrhQfwn3qpiPvfxd0jFUL3mXrzK/v5kVH/npuJpiPN5Lm5Cl9xKN66x2goWBKfLp43kemw0iXloz3bdygZ6l+6akGearjxWV/IZ4BXlu2t8e74lB+7VD69DOuVufSWXuwZrRX6tq4MhnxH4Bf8dH4NZhJzZJo7JiknSImV2t6lWgsB5HgwNi0AZKk7AYhX+ohVfEKy0N5tVMY3G8Fl3KzLZIAz0bmNnJbc77OF7bboirdc4BTipqUEn/NLOVmpx7r5m9K/2/Hbf5PQqflDNFySHYQAfHat7/7bglTXktzJG4RU0xaLQSrsMbS+PATa/Wj3wq+oL4hJLCIdVMJ2ezkmaVs6Tz8Yr4c7hQ/jxuerp3zXQLlwCtliC7puJUq3pGTa4xzBo9Y3YctXFgN4B0L8LVbWeUjn8BHxfYNjt2M65TfiPtD8fdGKyvNmuH9iEv/XbuNlDkC5Dvgs9G3wRXwwzH1YXrpEbcorgn1EqDDLnDvveUKyZJPzKzA1VjvduBMGgtdTMzSZclYdF28eEmnIbrWL+f9v+JtyBaCnUzuwhvec+L622/Cbwzjez/EW9lNSNfKLapK9QOdQXbMbws0NO1Z6SPreB8fDWpk/CJSQ1IOtPMvpjOLTxcnom3NgAm5lYfnUa+xumZJF8lcpfGO5rZlBRlBTPbXtK2Zna6pN/T6HiqHf+Vr3tpKf2RNLphxbLFwvvJfZKqWru1FguuQy68lfnXyeOkVuDh+IQ5Uc9c7+vAhZJ2pXHx8HnoXVYXxlVLhfXWfOkYtHCb2wcWAu5J1itV7rRnKWZW3O9BqaJfEJdPNxe9TDN7Rm6a2Iy78PtoqJjM7MD0u0v5hNRA7QiDrX4ZqLBYzMzOk7QfuIpGbjddC3Pb8N8Dv0+qku3xAcVlSvrRApH5Tk4f2XXZ/gP4qHera/alK9iOVl3UPOwNMzuuRdyGVdSTAMzNBdcDPi/pIXps0s364OWxDSfgPnOuSdffGJ9m/74UXiwG/nyqAJ7AhVZdjsYr63dK+gk+tf0HeYT+9voychO1ufGytEiTuH1CPp/hMFyQ/hivABfDVXw7Wpq3kTgC+JiZTa2bvpk9BqynRvvoy8zsqoroh+HqsGtgphvmg1I6nVg4+cAOpNERSpXocmnMB/y+l8/2y5VOrYopqTw/hfdA341bDQ2YwfbSeA8+8NAvYSF32fopkhvaVPgPN7PKbmof8rVTmyjb0XrQaLa0KtQz8adXEGniT4p3EN5q+CONk6j2APan96Sh/+B68/3S+WOqrm9NvDT2lbIqpHxMPpHkAlxXeRreOvyhldzltrnGyrj1jXALqaml8MtJvT7zyTLDcOusfqud1Adb+jbpjMff04J4BbhF0nOvjA9yr5nF/buZbdgkqY4g93xYLFxyi/V4RGzpnbBGukNxy7eVO5vjgdNM9VVQqgCaqsnkxiDb4oJ8TXwM6ePA9ZZ5Yx1QXgdZqA9IWMhXQikvfrCdmd3ZsUxWX7d4aS19L88pSKrqGVihGpB0qLWYUZoqyynW4/d5AXyK/y3Nzulj/v6Im1bmK8GsDexlzX1nb21mf66Z/ntxM01wXfxdFXFuM7N1Snr3mZNKksD6Mr3HJYoFV/JJSEPwlvtXy5VVfyjlY6qZvTsLu70k1H+Fl8mLaGwldmQQLl1jaVzHnD+H69XCbW4f0v4TblZc18JnjqJVxZTUhu/HrWLOwf0iTbMOW8MMqvrFeqbnv5OsZu8Dy+OWKMvgLfb1mA33lOmdf2GNM8MuSa2qOYoahebPkua15v7Sj0vHCl6uODYQdsVXgikEzw3p2D/kbiGm55El7YKrT1oKdfkiBH/Cy0fhPvi9kh4GtrVGc7RX1HzJO1I6N+CuH6pUfL/I/r+B2zB/ulX++kDegnu1FFZulS2A97o+UorTGcsK6XDcpHUKjSaH1+Pr+54sae9CNZlUEH2hrxY+cxTW2vT0PfjA61S8cfFm1ThMJzIxaBtuBnYf/vIexAtJX2YKtvQKNxvyX+l7eTCfaSl/383+b18K+2n+HGGmv/Tbyfylp/Cqmbt9mlTSz/xviQ9+r5gd2w+fHDKqxvlH4xOHyotmH4Hb8udx18Lt919Iv//EfXo0fQaz8T0WC55XOV/772zOy700d3LW0jthzfQ/WLXNxvv7ZJJJL1DhprpmGten864i86qZXpF6nQAAGHhJREFUwlbGGzD34A68ZtAHZ121rj87C0TFzd+Bz+4rPLl9CDi5D+c39Qo3m/Jf6Xt5MJ9pKX8tPSiW/1PhLz39vxAfAB6etr2BizqYz5VwXfEVeJf0atIsPlwPPg1Xsf0Sd1jVy2Vrk3TvpmJVd7w3V3YTPCIdXyVdaziNizwcgvsSaXatYkHppt77ZtM7H4WPnTyVtguoUQH2If3LaeJlkTbeCftwjTGkxWFIbh1m4/ObRvIe2iS8rWdOalZMuIrx50mG3NSpexhsnXrhXfAOYE0ze6tq0KzF+U29wnUof6dSvRzerlmcEfToa++xfqxVOKso6YfLutc87DrcbKtY2/Qp3OdJsQD3O/FW7yb487gK+KZ1yJY4vf/jcSEwU7VhZhNS+PtxQXUTvjTdazXT7eV8qVmY2ixTpp6lD1/HrXHKzrQG5L2vU0i6ErfoyscnPm9mm3Uo/QvwHt1VNOrsW1p99SH9LwO7475Qlpe0InC81V3KbeDXbznQLOkGvAFwGr4cZOVkqzReuKKZ/U3u6GyopTGpirjC3VpfP+AbYPBNGp+XTzi5Hp/u/xTV1hzN+DTeWv65mT0v9wr3nQ7mL9fZzo3b7P6rOCBfl/Jk3AKhrQvWQcCa/C/vfwYfjf+SmT0hn8L/s5kRXXh/dpblsonJpXq8Egr/kDYFnkofwUyB2oK5Ja2Zzm9IOqVXWHIsDcxTirsA3koEv1jVTNec5c3sU9n+j9Qzu3h2MtLM8sktp8nXOe0UhToh5/NNTICBPgv8r+POtG5J596XGhWzi/GSzqXJQLOZvT9VNLsCE5Lu/1Qzu7KIm1dM+Ljf0nijpbJiMm9Zd0Sgw+Bbv8yLt66H4LMEF8Rrv4H4jphlyCcc3Ghm70v7K+Ct28/gXe5T8Zmcg/dQM1TD13mK9w3gd+WKSdJ3rcYC3h3I50FUmFxaDffEbdKtmiU6E3PfHjvhXh/H4b57CqH+EnCaZVYjau2e+B+4O+Ib0/6GeGNjg4HcQ1+RdBVeDgs/PTvgiz/PspaupP/gvZTz8EZPQyVqZqdXndckrVvMXUHfbmZrJtPSida5ORHtrl9rtmeycvk43oN9Eb/n/c1dEE8iVUxZb3i2zMqGQRbqOWoyQ25OQu7/4lIzW6F0fAiuTzwOVx+ciluPDEgozS4kHYK3xCfiTtP+amYm6WNmdoma2O335WNtc/2WJpezA0mfMrMLWoTvho8ljMLdFa+Pu1wtVohfA1e9LIh/4M/ii5zPUvPainyOwc18N8Ar4ptw09COmAimd1X+Rofgk5I+gw/knov773++H+kfATyPO+b7Bu6G424z+37LE2cTklbDG3Jb4Z5OTzZf0GYpvDyMaVcxKXNimKW7SMfkRaeU833ZaLMC/WDkqUk+i2XGit9/UlqjEF81/SjcKuBo3KxyHwbRWqKf9yrgo7j97DR8duXyg52v2Xj/7Za8a+meOIu3ALDAYN/PLHxO+Vq9S+OuMg7Owkfhdur/Ar7Yj/SH4PMBzk/bbrP5/ubGVUDH4g2cU2j0F38dPl4yT8W5X0y/R+CTxe7Bx/v+CPwki3cpyUV32l+Smv7962yDpVM/hp4ZcldTmiFH/33BdBRro0dNOvXncb36vtZT+96Sut9vG8zMJD2BV65v4PbCEyXNoMcLX/mcjtkOy6f/v4dG1cYZzc/oOLua2a/kS94tin+4Z+IWOdDEe1+W/0XxKe4b4R75bsSF3WxRJcq9CDbDzOzHnbhOxf38Mn0HB8gnYO2AC7LLafRa2RJJ2+JWOr8BTkx66ZHA2pKet5qrbHWAM3Fh/FEy53Epj0OBx8zszKoTs+P7Al/CGwK74737k7KoFwHnSdoOn0NxMV4RdoTBEurDLC1xJelgS/7J04cySFmqRu4gaSO8y3mDuTOwgu2tyTqZNputHgaCpL3x7u7TeCv1O2b236zQ/RYfuJolL0fue3pjXKhfhk8ouxE4I4uTWxPMg5ehVo7X+pyN9LslcIa5g7b8fpu6J06cgw92FYOln8fVEB9m9lBlYDAvLlwWxX3GDBhVz5xdOgn2qfhz2M/67rHyuzQOxs+Fm/zNh6szZ5dQb+o8znyy0DKS5rIK18B1KyYzO1G+9OZF+AzlPczspk7dwGAJ9b7MkBs0JB2L+6YpBp2+ImkzM/t62n9G0pG4GSB41+xga2LmNAezCPBJ6+2eYQl8lH8j3DrmUtzSZwqdZTvSxCcz20XuXKtwvVBlTTCKFtYEVbQa5ExMkE9zXxbYT9L8NK6R2cx7X8GSpdbwIZI+Uzd/A8XMZs5oTXnfG9f9nkPjbNeB8gt6vtFi5uw78Upl9bT9NNWHffHlNJc1uoS40VzH/GwyqJhdtHMe9yDNF3RpWTElq7IC4at+TcLXhFjf+uCiuBWDJdRXl1SMGM+T/pP2++MuYFaxCT4RoZg6fjo+PbrgFHw8oJgO/kW8VfG2aaVDj0tQldw1mA+unYi3OkbgXetr5X6hj+lgFl41n6PwhtyvzFN4D6FgQGZuzQY5aVxN50v4knsPWI/P7F1K6QzFFxEvBnaXwCeOAFwh6bO4BQh4RVV7bYBOIGkR4Nt4L+F0fBm4TpvaboH3RsbSIz+OwseTBsLC+Y6Z7Zntjhxg2n3hhNQA+CGuFpkPn5RX0GpBl3YVUzl+YVnVzly2TwyKUDezoYNx3X4wDa9NixbsMulYwZximzwgJH0Mnw25FC5Qx+Bd6VWSMN8KF+hj6XFj20nGJ9XGibge9mVc6Ba8bmb/KbQhyZqgLz26pkuUZRgtlrxLZp8HAk/SuJrNsvTY0n+Tnh7GkHQfHdOVtkLSz/DGxAn4rMe6q4D1lYvwcaSJQDEJ7OWKXl5fuUXSl620mpCkPfC1bGcLme77OqDB+ipV6iuZ2eebnN6yYjKzH6U0DjezWVYu5hiTxjkJSZfgH+qCuDC4Ne2vh89Y3TjFmyNskweKfEbnJsDfzE2wPoTPRByOT/m+DDjHKrwbduDaI/FKZJr5BLKxuPXInVmcAZm5qccD4yRgPTN7XdIUM1sli3Mcack7M3t3aq1dYWl1G/lqNuvNroHPviLpLdzG/w0aK7y6E7XqXucuM1u1E2mV0n0nPRN+JqbDa+OTxD5uZk92+ppN8jGC3j0RLK35mwbAN2miUz8LuLZJxbSxme2Q9v8xK2XEYM8onVP5eYuw/IP5KnC63BtgYZu88yzM16ziv5ZWc5E0xMyukfRL3H/5K3hLd69s3LAjgiKpRX6Kd2eXlbS7mZVnK0KjNcEeeCVzUkW8ZrQb5AQX2GvJlwjEzJ5Lg1kFbdffrKG3n2WYWauVeDrJTZLea2aTO5mo+azl96lxoY5LzezqTl6nBn/C3/MEGtceKHiA5jr1b+Erqn2OioopS2NSOv/8Uhpv7zVK345I2ghfkPrrpeMLAFj1yuJzPJL+hhe6Q/EVdZ7C12N8X8sTB37du4APmS+/txw+m3iW9nLUswbrX/LWlqRb8JWWbkvCfSRuzliYqa1Ci/U3m+ntreYCEXM6kibjDZpheMX1AP4cOr0K1qDSrieSLLV6YdmqT6WKaUq5YtIsXqM0hHob5P5APocvT/Yg7vWu5fqLnRrFnl2kQZzX8A90trlrUMmJVnk/O74hvmTaGFyoFIKk7YxT1VxNR9Ln8RmRa+GDjMWSd+9pcZpl3fLJ9Ojt1yj09m8n09ZWqMmCNgVm9tBA3tOcgqQTcLfMHe2JzE5C/VKBpJXwgcEdcNvtc/EK8EMpvLK2TrztaknztVoLOjL1vyaj1OgIqmHfenzLnIx3bRu8ONbBWi9akMc7S25rXSx593HLlryTtL2ZnZ+fI2n7bLfl5KS3OzUHQvv9ngabUk9kF0mVPRENcMm+lMYo3JVDMUHxBmBvM3u0A7cSQr0J9+APemszmwYgaeYSddZigV111iPeLEWNXhChp0Lq6OBaC8oeNZvNQHzBzC4fwHXqrqZzH+4OYhj/397Zx0hWVmn898yIjiO6cWWNiiK4uIKoMIIYRXRF8WPBb8C44kTEBEUDhkh0HEVHo7IBFbMowrISICP4xYeIsLiKjoQwKqDMsgTBDyCIQSAYFVFgHv84b3Xdqq7qvtV9q+ujzy/pTN9b97739nTXqfee95znAbo+CNYROdAq1X118vbTzmJ/T6PkwJrHbSQmeQdSsewb8FpnEPLIrUnBoWVfM/LImX6ZjaTXE00E+xANJucCp7uGl6CkW23vMN9xSX0kHU84Fp1HZz77mr4ndZ7/kl773WkWXC1ZfIjywQZ8gOgyPYR4M7d4DPBM23v3ud6svP20onaX6SEs4vc0Dkg62/bb+u1TMROXdF1l9v6TVpVUzWvM0vnvtW+h5Ey9Bw4pgAtKrvl1RP3x40vZ2/kuEgd9GC+dgzmQtIqYbexMWNp92YO3dy8FLef6qh+s6Wwe6ks1eM/B0cAzutcRJO1OyCq/ls4niT8SqYaFXm+a6O5YXdDvaUzYrbpR1mT2rOxqdZzeIekAQrjsHwe8xt0KL+CqPHJj61c5U69JKVc7GHiz59CmnqSZusIM4AEi1fRq4BbbR4/2rppHYSL9n8CuxCL3SuDP1fSSovV//34fapK2sf1Ar9eSQNIqd7lSSXrcuNb2V5G0jhAZ7PYd+Btwmu115bgDiffLU4i/qccAG/qU4nZf4xTiye+xDFMeOYP64FRy0bNeIiQ5J+IJSBXhfkWX5o97VZ+MA2VW1L049fGa5/6USKd9nZhFriU6A9dJOqYcNmfJYjI/ki4GXtf6YFS4Sl1se8+5zxwfJH26FcCHMPaxhIbRR21/ZRjXgEy/LAjPb202KczMPG0/qCVWyFS0tt9s+9Su/UcAO9n+YNn+EmEt91Ki6eggBmwdt32zpJW2HyLEla4lFjpbv8tby9fDmadkNenLBcDXNSRJ2aWgX0BXH/evynnzuoDZPkGh+vhZSe8gROmqonHZfJQsDrXt7oAOy7slqX4pJYR7ueuPUOEkdV2rCaS1KFX5d1vgEtv71rzOJkIC93RCde8O4O1egEG5pNW275v/yOWJpPcQvsE70rCk7ChRp/vXBmJRfQYPZtm3Fvgk4SUxoyPUVPNRztSXMR69sNojugM6gEOxsfrY0JJnvk9hG3Y34RZTl7cRAlvvJRY3n0Jb9xwASd8l9PHvLduPJfRuXlm2X0h8KGwL7FAWUI+wfeQA9zGVVFJYwPAkZUdJNWhLet8gQbxy3m6E5eVvgb1t39HgLc6QQT0ZJX+R9HTbN1V3Ktzaqzr73y414CcQmhpmAO2XSuPM/cQsqxf/5IqnpkP7pSrv+znCDedb5fWfS3oxCSyRpOywqdt9zMIbDL9BNBnNVT23aDKoJ6PkOOAShfF1q1xwLyLXPdPE5bb5xDclfRtY5QGMSHq0r7fGrbavP1RtNipt8R1vXtu3da07TFTX5LCYqxlvkqjbfbwI9nCX4fQwyKCejAzbl5RGr2MJSV0IE5I3VbU3Sp52o+17HbK5qyUdafuLNS9Vp319PXCFpB8SKYR9iUqFFreVFIwlbUPUtd8we5jlh6STbL9PbcnqDnp07o4zPbuPiUX61s+2Wp3GPrXWn5YioEMulCYTQJ8OvGttr6l5/mbbz69x3HaEuiKEMNddXa99nlhwFaHgePQk1GAPG0l72r66TufuuDMVP0MG9WRUlNz5ekKH/rOE89G+hL76O23/pBy3BXhOa1G15D6vc8Xkos/4A7WvS9qe2SmaTeV6Z7m/401SUJiC72D7xlHfy0JRp8n5amClGzQ5l/Qy4Erb3f7MjZDpl2SUnAGcRXTlbSby6G8gAvvJtOUBLgW+KqlVz34EnabP/ajdvi7pPwjp3euplJkBm0qu9anq4yKfBApbxBOJOv+dJO1BGLFPTPpFs03Ot2dAk/MarAVOkXQP0Z26ifAzbcRPNmfqycioplUk3Wx75z6vrSACeeuN9V1CYK2xhUpJNxJPAz3znpLOImQGejneJMz0HexHWLqtKftmupYnAYXl4d7A5mH/DKU89yCiQetJTXWi50w9GSVbK993u0ZVO+22EvW9pyz0QjVkBn5FeLL2W8yay0U+CR6w/YeuCqFJmzUu1uR8XoqY176EXeRdxFPpj5oaP4N6Mkp2kXQdsfD4z+V7yvbTJH3N9iFqGxh04JoWajVlBu4jvCO/R2fe/ajy74Yy1rZl+0+1f8rlw/UKf86VZb3kKEKsapL4oaQPAY+UtD9hcn5Rw9c4iZggfAm43PZvmhw80y/JyNA8FmnA32zf0e8413PjqSUz0NUGXr3GmeX1ZxF+pS2Z1buAtbavr3MPy4GyqLgeeAXxwfw/wCfcpdw4zpRU3+F0/gyn9+p8XuR1dgNeDLyI8Hy90V067gseO4N6Mk6U0sG7uypd/tfFSnCBY262/XxJVwFvJGQGrq/m8GuMcSWw3vblZftfCQ/SoZpzJ9OHwqh+H+AlRBpmO6KEtufEYlAy/ZKMDIXO+fFESeMniJnwdsAKSWttX1oqT7ZK+odBuki76CUz8F9d9/J04NOE0XQ1797qOn1UK6CX/T9QmKgkBYW37/sJMa9qWejYm2T0S/G1qJvqq8kVla+T3ZA3aYucqScjQ6Fz/iHC+u004NW2r5K0C3BOpfrgQmANUfVSrTyZV+60xzUfQQ+ZAUlXEMp7nwNeAxwGrLB9XHn9fOID4exyyqHAnrbfMOg9TCuSfk7kiTs6d233854dG+ZLBdZN9Q14zaEofmZQT0ZGV9niDbZ3rbx2bSWoz5nvnmP85wG32f5d2V5LqDPeAnzM9j2VY1vek1XjkKtdDB4Uqo0biBwoRLXCx5qqLZ4Gqv9fSX8kvYCQrtjWduOKn5l+SUZJtaSxu7uumlN/+wJz6qcSbf0URcXjCY2ZPYgng4Mqx/61LJLdJOm9wO2EzG7cTATvgZ8MlgOSWovHF0k6Ejifzgqie3qeOEZobjezWtouA3ASQ1T8zKCejJLdizCSiBKyqkjSKphRzltoTn1lJaC8mfCa/Cah9vizrmOPJsoejyLy+/sBM08IkvYiUkU70pkvbjLXOqlcTQTEVoH6sZXXDDxt1hljhpfYzcxDVPzMoJ6MDNc36fgTsEVhZDFITn2lpIc5PDNfRqfqYsfffktnplzrsB5jbSSC1RY6nzCWPbZ3GvU9NI1CS7+6YN6kFO9QFT8zqCeTwHm0jRcG4RyimeQuIr3zIwBJOwN/KN/P6QJf0S35vWs4xi9HBlm7GHckvZbQDHoScCch8HYD0Y3cFO8iFD+3J9J8lwHvaWrwXChNpppSNvlE4DLbfy77/oVYpLpG0u+B24gPgM20UwhAW3K1KOu9BejuOG3ELHiSkXQN8HLb95Tc8Lm01y52tX3QnAOMEaWCZz+iN2KNpJcCh9o+fMS3VpucqSdjj6Rf01smYN5cre2reuz7RWXzCcD+RMD+d+Biopyyu1P0MGAXQh+mquK47IM6g61djDsP2L5b0gpJK2xfLumkJgaWdNwcL9tth69FkUE9mQSqkrmrgINpt+sviqL0eClwaalhfwvwA0kbbJ9cOfR5tp/RxDWnkNprFxPAvUVGYhOwUdKddDogLYZe4zyKkCV4HLFAv2gy/ZJMJE3WRJdgfgAR0HckSs2+bPv2yjFnACfY/v8mrjlNSFoP/Buhh7MD8FzbLmsXZ9reZ6Q3OAClS/h+Ig33VqIxbqMbdriS9GhigfRw4GvAZ2zf2cjYGdSTcUdtByMI6du9gHfb3r2Bsc8CngV8BzjX9v/1Oe4GwjTh10ROvVW/nCWNzL92MdKbWwBFn6VautrIYm+p6T+G+MA4E/h80w1sGdSTsUfS5ZXNB4HfACe6Acs0SVtpPxZX3wwdTSeLVYpMJgNJRxCdw/cTayetv4NF19pLOoEQlDsN+MKw5JszqCdJkhQk3QS8wBXT8QbH3ko85T3IHBOIxTJpixjJMqTkvN/E7G7Oj/c7J0kWyC8Jw5TGsb1iGON2k0E9mQQuJJqFrqa/3VySNME64EpJm+nhgDUJZFBPJoEn237VqG8iWRacCnyfCZaDyKCeTAJXSnq27S2jvpFk6tnG9jGjvonFkAulydhScaN5GOHj+CuynDAZIpI+RVRXXcSEyQe3yKCejC2jcKNJljdFkqKbRkoal4oM6kmSJFPEkpTYJEmSTAKSDi4t/Ej6sKTzJK0Z9X0NQgb1JEmSNh+x/UdJLyKsEP+bMNOeGDKoJ0mStGnZyh1ASAhfDDx8hPczMBnUkyRJ2twu6VRCF/47pZt5ouJkLpQmSZIUJK0GXgVssX2TpCcCz7Z92YhvrTYZ1JMkSaaIiXqsSJIkSeYmg3qSJMkUkUE9SZJkisigniRJMkVkUE+SJJki/g4k1wwM55RifgAAAABJRU5ErkJggg==\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"oRH8yxMcTdv9","executionInfo":{"status":"ok","timestamp":1614375940210,"user_tz":-60,"elapsed":2569,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"9a895ec5-db3a-43f6-aeb3-6679e7e2b52e"},"source":["ner_df"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
entities_classembeddingsentitiesner_confidence
origin_index
0ORG[[0.044123999774456024, -0.47940999269485474, ...Barclays[0.9980999827384949, 1.0, 0.9994999766349792, ...
0CARDINAL[[0.044123999774456024, -0.47940999269485474, ...about one[0.9980999827384949, 1.0, 0.9994999766349792, ...
0ORG[[0.044123999774456024, -0.47940999269485474, ...BBC Panorama[0.9980999827384949, 1.0, 0.9994999766349792, ...
1DATE[[-0.03819400072097778, -0.24487000703811646, ...2008[0.9997000098228455, 0.848800003528595, 0.9908...
1GPE[[-0.03819400072097778, -0.24487000703811646, ...Manchester City[0.9997000098228455, 0.848800003528595, 0.9908...
1PERSON[[-0.03819400072097778, -0.24487000703811646, ...Sheikh Mansour[0.9997000098228455, 0.848800003528595, 0.9908...
1MONEY[[-0.03819400072097778, -0.24487000703811646, ...more than £3bn[0.9997000098228455, 0.848800003528595, 0.9908...
2ORG[[-0.05707800015807152, 0.3987399935722351, 0....BBC[0.9998999834060669, 0.9825999736785889, 1.0, ...
2ORG[[-0.05707800015807152, 0.3987399935722351, 0....Barclays[0.9998999834060669, 0.9825999736785889, 1.0, ...
2NORP[[-0.05707800015807152, 0.3987399935722351, 0....British[0.9998999834060669, 0.9825999736785889, 1.0, ...
2GPE[[-0.05707800015807152, 0.3987399935722351, 0....Abu Dhabi[0.9998999834060669, 0.9825999736785889, 1.0, ...
3ORG[[0.044123999774456024, -0.47940999269485474, ...Barclays[0.9987999796867371, 1.0, 0.9980000257492065, ...
4ORG[[-0.32710000872612, 0.4879100024700165, 0.416...RBS[0.9997000098228455, 0.9998999834060669, 0.999...
4ORG[[-0.32710000872612, 0.4879100024700165, 0.416...Lloyds TSB[0.9997000098228455, 0.9998999834060669, 0.999...
4ORG[[-0.32710000872612, 0.4879100024700165, 0.416...Barclays[0.9997000098228455, 0.9998999834060669, 0.999...
4DATE[[-0.32710000872612, 0.4879100024700165, 0.416...2008[0.9997000098228455, 0.9998999834060669, 0.999...
4MONEY[[-0.32710000872612, 0.4879100024700165, 0.416...7bn[0.9997000098228455, 0.9998999834060669, 0.999...
4LOC[[-0.32710000872612, 0.4879100024700165, 0.416...Gulf[0.9997000098228455, 0.9998999834060669, 0.999...
4GPE[[-0.32710000872612, 0.4879100024700165, 0.416...Qatar[0.9997000098228455, 0.9998999834060669, 0.999...
4GPE[[-0.32710000872612, 0.4879100024700165, 0.416...Abu Dhabi[0.9997000098228455, 0.9998999834060669, 0.999...
5ORG[[-0.03819400072097778, -0.24487000703811646, ...S&P[0.9994999766349792, 0.9878000020980835, 0.865...
5DATE[[-0.03819400072097778, -0.24487000703811646, ...500's[0.9994999766349792, 0.9878000020980835, 0.865...
5PERCENT[[-0.03819400072097778, -0.24487000703811646, ...71%[0.9994999766349792, 0.9878000020980835, 0.865...
5ORG[[-0.03819400072097778, -0.24487000703811646, ...Apple[0.9994999766349792, 0.9878000020980835, 0.865...
5ORG[[-0.03819400072097778, -0.24487000703811646, ...Apple[0.9994999766349792, 0.9878000020980835, 0.865...
5CARDINAL[[-0.03819400072097778, -0.24487000703811646, ...$840[0.9994999766349792, 0.9878000020980835, 0.865...
5PERCENT[[-0.03819400072097778, -0.24487000703811646, ...52%[0.9994999766349792, 0.9878000020980835, 0.865...
6PERSON[[0.28501999378204346, -0.4355500042438507, 0....Alice[0.9970999956130981, 0.9984999895095825, 0.922...
6PERSON[[0.28501999378204346, -0.4355500042438507, 0....Alice[0.9970999956130981, 0.9984999895095825, 0.922...
6PERSON[[0.28501999378204346, -0.4355500042438507, 0....Alice[0.9970999956130981, 0.9984999895095825, 0.922...
6PERSON[[0.28501999378204346, -0.4355500042438507, 0....Alice[0.9970999956130981, 0.9984999895095825, 0.922...
7PERSON[[0.2901900112628937, 0.8049700260162354, 0.31...Harry Potter[0.9995999932289124, 0.9952999949455261, 0.953...
8NORP[[-0.02076599933207035, 0.5784800052642822, 0....Germans[0.9599000215530396, 0.9986000061035156, 0.931...
9FAC[[0.058736998587846756, 0.6042199730873108, -0...New York Airport[0.9984999895095825, 0.9846000075340271, 0.864...
10ORG[[0.39910998940467834, 0.23048000037670135, -0...Microsoft[0.9997000098228455, 0.9990000128746033, 0.998...
11GPE[[-0.0445609986782074, 0.8070899844169617, 0.6...Germany[0.9965000152587891, 0.996999979019165, 0.9298...
12ORG[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...Non-GPE[0.9894000291824341, 0.9937000274658203, 0.984...
12LOC[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...Sahara Destert[0.9894000291824341, 0.9937000274658203, 0.984...
13PRODUCT[[-0.12313000112771988, 1.027899980545044, -0....Playstation[0.9980000257492065, 0.9919999837875366, 0.975...
14EVENT[[-0.3515700101852417, -0.1662600040435791, 0....hurricane Katrina[0.9976999759674072, 0.2168000042438507, 0.997...
15PERSON[[0.5689799785614014, -0.38422998785972595, 0....Lisa[0.9997000098228455, 0.9491999745368958, 0.618...
17LANGUAGE[[-0.2367600053548813, 0.15658999979496002, 0....English[0.9994999766349792, 0.45260000228881836, 0.70...
18ORDINAL[[-0.0853630006313324, -0.5337499976158142, 1....second[0.9986000061035156, 0.953000009059906, 0.6958...
19DATE[[-0.29739999771118164, 0.1302099972963333, 0....smaller than a day[0.641700029373169, 0.8838000297546387, 0.4259...
20PERCENT[[0.06162400171160698, 0.6707599759101868, 0.3...55%[0.9894000291824341, 0.9973999857902527, 0.984...
21MONEY[[0.3520300090312958, -0.1374099999666214, 0.2...50$[0.9984999895095825, 0.9837999939918518, 0.995...
22PERSON[[-0.5554199814796448, 0.0024757999926805496, ...50kg[0.9990000128746033, 0.9778000116348267, 0.961...
23ORDINAL[[-0.04256799817085266, -0.08424600213766098, ...first[0.996999979019165, 0.9940000176429749, 0.9404...
23ORDINAL[[-0.04256799817085266, -0.08424600213766098, ...second[0.996999979019165, 0.9940000176429749, 0.9404...
23PERSON[[-0.04256799817085266, -0.08424600213766098, ...David[0.996999979019165, 0.9940000176429749, 0.9404...
23ORDINAL[[-0.04256799817085266, -0.08424600213766098, ...first[0.996999979019165, 0.9940000176429749, 0.9404...
24CARDINAL[[-0.2671700119972229, 0.7479100227355957, -0....hundreds[0.9789000153541565, 0.9932000041007996, 0.816...
24ORG[[-0.2671700119972229, 0.7479100227355957, -0....NLU[0.9789000153541565, 0.9932000041007996, 0.816...
\n","
"],"text/plain":[" entities_class ... ner_confidence\n","origin_index ... \n","0 ORG ... [0.9980999827384949, 1.0, 0.9994999766349792, ...\n","0 CARDINAL ... [0.9980999827384949, 1.0, 0.9994999766349792, ...\n","0 ORG ... [0.9980999827384949, 1.0, 0.9994999766349792, ...\n","1 DATE ... [0.9997000098228455, 0.848800003528595, 0.9908...\n","1 GPE ... [0.9997000098228455, 0.848800003528595, 0.9908...\n","1 PERSON ... [0.9997000098228455, 0.848800003528595, 0.9908...\n","1 MONEY ... [0.9997000098228455, 0.848800003528595, 0.9908...\n","2 ORG ... [0.9998999834060669, 0.9825999736785889, 1.0, ...\n","2 ORG ... [0.9998999834060669, 0.9825999736785889, 1.0, ...\n","2 NORP ... [0.9998999834060669, 0.9825999736785889, 1.0, ...\n","2 GPE ... [0.9998999834060669, 0.9825999736785889, 1.0, ...\n","3 ORG ... [0.9987999796867371, 1.0, 0.9980000257492065, ...\n","4 ORG ... [0.9997000098228455, 0.9998999834060669, 0.999...\n","4 ORG ... [0.9997000098228455, 0.9998999834060669, 0.999...\n","4 ORG ... [0.9997000098228455, 0.9998999834060669, 0.999...\n","4 DATE ... [0.9997000098228455, 0.9998999834060669, 0.999...\n","4 MONEY ... [0.9997000098228455, 0.9998999834060669, 0.999...\n","4 LOC ... [0.9997000098228455, 0.9998999834060669, 0.999...\n","4 GPE ... [0.9997000098228455, 0.9998999834060669, 0.999...\n","4 GPE ... [0.9997000098228455, 0.9998999834060669, 0.999...\n","5 ORG ... [0.9994999766349792, 0.9878000020980835, 0.865...\n","5 DATE ... [0.9994999766349792, 0.9878000020980835, 0.865...\n","5 PERCENT ... [0.9994999766349792, 0.9878000020980835, 0.865...\n","5 ORG ... [0.9994999766349792, 0.9878000020980835, 0.865...\n","5 ORG ... [0.9994999766349792, 0.9878000020980835, 0.865...\n","5 CARDINAL ... [0.9994999766349792, 0.9878000020980835, 0.865...\n","5 PERCENT ... [0.9994999766349792, 0.9878000020980835, 0.865...\n","6 PERSON ... [0.9970999956130981, 0.9984999895095825, 0.922...\n","6 PERSON ... [0.9970999956130981, 0.9984999895095825, 0.922...\n","6 PERSON ... [0.9970999956130981, 0.9984999895095825, 0.922...\n","6 PERSON ... [0.9970999956130981, 0.9984999895095825, 0.922...\n","7 PERSON ... [0.9995999932289124, 0.9952999949455261, 0.953...\n","8 NORP ... [0.9599000215530396, 0.9986000061035156, 0.931...\n","9 FAC ... [0.9984999895095825, 0.9846000075340271, 0.864...\n","10 ORG ... [0.9997000098228455, 0.9990000128746033, 0.998...\n","11 GPE ... [0.9965000152587891, 0.996999979019165, 0.9298...\n","12 ORG ... [0.9894000291824341, 0.9937000274658203, 0.984...\n","12 LOC ... [0.9894000291824341, 0.9937000274658203, 0.984...\n","13 PRODUCT ... [0.9980000257492065, 0.9919999837875366, 0.975...\n","14 EVENT ... [0.9976999759674072, 0.2168000042438507, 0.997...\n","15 PERSON ... [0.9997000098228455, 0.9491999745368958, 0.618...\n","17 LANGUAGE ... [0.9994999766349792, 0.45260000228881836, 0.70...\n","18 ORDINAL ... [0.9986000061035156, 0.953000009059906, 0.6958...\n","19 DATE ... [0.641700029373169, 0.8838000297546387, 0.4259...\n","20 PERCENT ... [0.9894000291824341, 0.9973999857902527, 0.984...\n","21 MONEY ... [0.9984999895095825, 0.9837999939918518, 0.995...\n","22 PERSON ... [0.9990000128746033, 0.9778000116348267, 0.961...\n","23 ORDINAL ... [0.996999979019165, 0.9940000176429749, 0.9404...\n","23 ORDINAL ... [0.996999979019165, 0.9940000176429749, 0.9404...\n","23 PERSON ... [0.996999979019165, 0.9940000176429749, 0.9404...\n","23 ORDINAL ... [0.996999979019165, 0.9940000176429749, 0.9404...\n","24 CARDINAL ... [0.9789000153541565, 0.9932000041007996, 0.816...\n","24 ORG ... [0.9789000153541565, 0.9932000041007996, 0.816...\n","\n","[53 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"code","metadata":{"id":"rlcEvP9tOSiy","colab":{"base_uri":"https://localhost:8080/","height":361},"executionInfo":{"status":"ok","timestamp":1614375965327,"user_tz":-60,"elapsed":711,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"ad4a7e55-288e-4e5b-ac28-98920489a30e"},"source":["ner_type_to_viz = 'ORG'\n","ner_df[ner_df.entities_class == ner_type_to_viz]['entities'].value_counts().plot.bar(title='Most often occuring ORG labeled tokens in the dataset')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":7},{"output_type":"display_data","data":{"image/png":"\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"id":"ks6NDXg7RXG3","colab":{"base_uri":"https://localhost:8080/","height":363},"executionInfo":{"status":"ok","timestamp":1614375965582,"user_tz":-60,"elapsed":675,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"320d148a-8f22-474d-d48c-9e3fffe85916"},"source":["ner_type_to_viz = 'LOC'\n","ner_df[ner_df.entities_class == ner_type_to_viz]['entities'].value_counts().plot.bar(title='Most often occuring LOC labeled tokens in the dataset')"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{"tags":[]},"execution_count":8},{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAXQAAAFKCAYAAAANE6SOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAcPUlEQVR4nO3de7hcdX3v8feHREABA5pYhZAEFYUottiI9NgqR/QUaAuntSh41Hqp1HMO1qO2PbRaj+KlauulFyrgURG1INVq04oFq1SqRbkoUgPSRoQmKMq9XKoIfvvHWlsmw+y9J2GSlVn7/Xqe/exZ9++sWfOZ3/zWmplUFZKk6bdD1wVIkibDQJeknjDQJaknDHRJ6gkDXZJ6wkCXpJ4w0DdDkl9OsiHJ7UkO7LqerS3JyUl+v+s67q8khyTZOOa8L0zyhS3cTifLjljXqiSVZPEk1rcF2/+5JFdOcH2nJXnTpNY3tO7XJ/nw1lh3F7arQE9ydZK7kiwdGv/V9gBddT/XX0kefT9W8UfA8VW1K3Bzl0+abaGqXlZVb5z0eucLnDbc/jnJnUmuS/KeJLsPzfOYJH+Z5IYktya5LMmrkiyadL1d6zqgN1dV/WNVPXZLlp3kC9ukbc0XlkltZ7sK9Na3gGNnBpIcADyou3I2sRJY13UR20JXwZjk1cDbgN8GlgAH0+z3zyTZsZ3nUcCXgQ3AAVW1BDgaWAPs1kXd0nahqrabP+Bq4LXARQPj/gh4DVDAqnbcEuB04HrgmnaZHdppjwY+D9wK3AB8tB1/fruOO4DbgeeM2P4O7bquAb7XbmMJsFO7zMzy3wT+rR2+vf37mXYdLwauAG4GzgFWDqy/gJcB/wrcApwEZJZ9sRPwbuDb7d+7gZ0Gph8FXAr8e1vPYe34hwAfaJe5GfhkO/6FwBeGtlHAo9vbpwHvAc5u7+Mz2nFvaqcfAmwEXt3um+8ALxpY10OBv2nruQh40/D2BuZd1W578dD4B7f78tlD43dtH+sXt8MfBj61GcfVIcDGgeET2n12G3A58MsD014IfBH4s/YY+gZw6MD0JcD72vt/bXs/F43ax8B+wGeAm4ArB+9Xu7/WtvvrQuCNc+yv+xxrzHKsjtq/wLNonluPb5ebuf83AmcBDxla7tfabd4AvGagjoOAi9uavwu8c8z9fTXwW8Bl7T79KLDziOX2B74P3NPez1sGjs2TgE+1j9mXgUeNs59HbGMfmny4rV3mz4APD0z/S+C6ts7zgce1448Dfgjc1db2N2McSyOzaK6aZ9vO2Mf6/Q3hSf61D/wz2ju4P7CIJkRWsmmgnw78NU1rbBXwL8BL2mln0LwA7ADsDPzsqACbZfsvBtYDj6QJkb8CPjRqeUaEEk3Irm9rX0zzhPunoeX/FtgdWEETUofNUsuJwJeAhwHLgH8C3jjwxLoVeGZ7P/cC9munfYrmCbMH8ADgaaPCZsT9Oa1d51MG9t1pbBrod7d1PQA4ArgT2KOdfmb79yBgNU3reXMD/bB2G4tHLPNB4Iz29nUMvJiMcVwdwqYBczSwZ3s/n0PzAvaIgf10N/DK9n4+p90vM6H3CeAUYJf2sbkQ+I3hfdxO3wC8qD0WDqR5Uq8e2F9ntfM9nubFYez9xRzH6uD87fbXDzzOr6A5rpbTNBpOGdivM8u9F3gg8JPAD4D92+kXAM9vb+8KHDzm/r663U970jQ4rgBeNsuyP96HA+NOo3nxOai9Tx8BzhxnP49Y/wXAO9v7/lSaIP7w0H7djXsbVJcO1fGmofXNdSyNzKIxjo37bGfsY31LFtpaf9wb6K8F/oDmCf6Z9k5Xe8Atonn1Wj2w3G8A/9DePh04FVg+Yv3zBfpngf81MPxYmlfLxcPLM/pJ9mnaF5Z2eAea0Fs5sPzgC8xZwAmz1PJN4IiB4Z8Hrm5vnwK8a8QyjwB+RBuyYzxRhgP99BFPpMFA/4+h+/s9mi6RRe1+euzAtC1poT8PuG6WZd4KfKa9/UNmeSGcZdlDGAiYEdMvBY4a2E/fZuCdE00YPR/4CZqAe+DAtGOB84b3Mc2T+x+HtnMK8P8G9td+A9Pesjn7a65jdWD+36JpNS4fmO8KNn3H8YgRyy0fuu/HtLfPB94ALN2c/U3zvH7ewPDbgZNnWXbUcXoa8P8Hho8AvjHffh6x7hU0L9a7DIz7CwYCfWj+3dv9sWSgjjmDduhYGplF89U8znZm+9se+9ABPgQ8l+bBPX1o2lKaltM1A+OuoWmlAvwOEODCJOuSvHgztrvniPUupnkij2Ml8MdJbklyC83bqQzUBk3rcsadNC2dcWvZs729N03gD9sbuKmqbh6z3mEb5pl+Y1XdPTA8U/8ymv00uPx86xrlBmDpLCf/HtFOh6a19ogtWD8ASV6Q5NKBx+nxNMfVjGurfWa1Zvb9Sppj7zsDy55C01IfthJ48sx87bz/A3g4o/fXNSPWMZdxjtXfBk6qqsErfFYCnxio6QqaLo7B5WY7Rl8CPAb4RpKLkvziZtQ77nG/ucvPtZ+H7QncXFV3DIz78T5MsijJW5N8M8m/07wQwabHxibmOZZmy6LNqXmzbJdnzavqmiTfonklfsnQ5BtoWhQraVof0LzyXtsuex3wUoAkPwv8fZLzq2r9GJv+drveGTOv6N8dVeaIcRuAN1fVR8bY1ri1zJyEXdGOm9nOo2bZ/kOS7F5VtwxNu4OBk8tJRh08o+7TOK6n2U/Labq/oHlx2VwX0LSAf4Xm3QsASXYFDgd+rx319zT9wh/Y3A0kWUnTpXAocEFV3ZPkUpon3oy9kmQg1FfQ9HdvaOtbOvTCNsoG4PNV9cwRNSyi2V970/TRz2xjNqMel7mO1eXtuP8G/F2S66rq4wN1vbiqvjiirlVz1EBV/StwbJIdaB6jjyV56FBA3l+bewzOup9H+A6wR5JdBmpeMbDN59J0mz6DJsyX0JyHmjk2NqltvmNptiwao+YtfR5uty10aIL86cMHS1XdQ/Nkf3OS3dqd+iqaE2UkOTrJzAF9M83O+VE7/F2aPsfZnAG8Msk+bYi8heZExqgn7/XtegfXdzLwu0ke19ayJMnRY9/j+9by2iTL2ss4X0d7H2lOyr0oyaFJdkiyV5L9quo7NN0+f55kjyQPSPLUdpmvAY9L8lNJdgZev4V13Uf7mPwV8PokD0qyH/CCMRbdKcnOM380/ZlvAP40yWFt/atoHu+NNO/coOm2+C9J/nDmhSnJo5N8ePjyxhF2oTkmrm+XexFNq2rQw4DfbLd/NM05kbPb/Xsu8I4kD273/aOSPG3Edv4WeEyS57freUCSJyXZf8T+Wk1zInI2o461cY7VdTTdliclObIddzLNc2dle/+XJTlqrh02I8nzkiyrqh/RnNSHe59bk/JdYPnMFU1jmHU/D89YVdfQnNR9Q5Id25D9pYFZdqN5wb6RpvHzlhG1DT4Gcx5Lc2TRfDXPl1Oz2m4Dvaq+WVUXzzL55TQtzquAL9D0g72/nfYk4MtJbqdpVb2iqq5qp70e+GD7NufZI9b7fprQOJ/m8snvt9saVd+dwJuBL7brO7iqPkFzyd2Z7Vu2r9O0LLfEm2gOvsuAfwa+0o6jqi6kOaHyLpoTdp/n3tba82newXyDpo/7/7TL/AvNCc2/p7nKZtLX+h5P06K5jmYfnkHz5JjL7TT98jN/T6+qt9O0xP+I5mqKmcsTD62qH7T35Zs0V3qsAtYluRX4OM3+um2uDVbV5cA7aN4NfBc4gOaqlkFfBvaleTf4ZuBXq+rGdtoLgB1p3h3eDHyMEd0/VXUbTQv5GJrW9HU0x8ZO7SzH03QbXEfTZzrru41RxxpjHqtV9TXgF4H3Jjkc+GOa58W5SW6jOUH65Nm2PeQwmv19e7ueY6rqP8Zcdlyfo3khui7JDfPNPMZ+HvZcmvt7E03DYLBL93SaLphraR7fLw0t+z5gdfsYfHKMY2lkFo1R8ybbmW8fDMqmXYXSZCR5G/Dwqpqr5SlpgrbbFrqmS5L9kjwhjYNousw+0XVd0kKyXZ4U1VTajaabZU+at5/voPmsgKRtxC4XSeoJu1wkqScMdEnqic760JcuXVqrVq3qavOSNJUuueSSG6pq2ahpnQX6qlWruPji2S4zlySNkmTWr4mwy0WSesJAl6SeMNAlqScMdEnqCQNdknpi3kBP8v4k30vy9VmmJ8mfJFmf5pfXnzj5MiVJ8xmnhX4azddmzuZwmq8a3ZfmB07fc//LkiRtrnkDvarOp/nu4NkcRfNblFVVXwJ2T7LFPw8mSdoyk/hg0V5s+tuIG9tx3xmeMclxNK14VqyY6xe3th+rTvhU1yX0ytVv/YWuS+gNj83J6sOxuU1PilbVqVW1pqrWLFs28pOrkqQtNIlAv5ZNfxB4eTtOkrQNTSLQ1wIvaK92ORi4tf0xXUnSNjRvH3qSM4BDgKVJNtL8sOoDAKrqZOBs4AhgPXAnzY8XS5K2sXkDvaqOnWd6Af97YhVJkraInxSVpJ4w0CWpJwx0SeoJA12SesJAl6SeMNAlqScMdEnqCQNdknrCQJeknjDQJaknDHRJ6gkDXZJ6wkCXpJ4w0CWpJwx0SeoJA12SesJAl6SeMNAlqScMdEnqCQNdknrCQJeknjDQJaknDHRJ6gkDXZJ6wkCXpJ4w0CWpJwx0SeoJA12SesJAl6SeMNAlqScMdEnqCQNdknpirEBPcliSK5OsT3LCiOkrkpyX5KtJLktyxORLlSTNZd5AT7IIOAk4HFgNHJtk9dBsrwXOqqoDgWOAP590oZKkuY3TQj8IWF9VV1XVXcCZwFFD8xTw4Pb2EuDbkytRkjSOxWPMsxewYWB4I/DkoXleD5yb5OXALsAzJlKdJGlskzopeixwWlUtB44APpTkPutOclySi5NcfP31109o05IkGC/QrwX2Hhhe3o4b9BLgLICqugDYGVg6vKKqOrWq1lTVmmXLlm1ZxZKkkcYJ9IuAfZPsk2RHmpOea4fm+TfgUIAk+9MEuk1wSdqG5g30qrobOB44B7iC5mqWdUlOTHJkO9urgZcm+RpwBvDCqqqtVbQk6b7GOSlKVZ0NnD007nUDty8HnjLZ0iRJm8NPikpSTxjoktQTBrok9YSBLkk9YaBLUk8Y6JLUEwa6JPWEgS5JPWGgS1JPGOiS1BMGuiT1hIEuST1hoEtSTxjoktQTBrok9YSBLkk9YaBLUk8Y6JLUEwa6JPWEgS5JPWGgS1JPGOiS1BMGuiT1hIEuST1hoEtSTxjoktQTBrok9YSBLkk9YaBLUk8Y6JLUEwa6JPWEgS5JPTFWoCc5LMmVSdYnOWGWeZ6d5PIk65L8xWTLlCTNZ/F8MyRZBJwEPBPYCFyUZG1VXT4wz77A7wJPqaqbkzxsaxUsSRptnBb6QcD6qrqqqu4CzgSOGprnpcBJVXUzQFV9b7JlSpLmM06g7wVsGBje2I4b9BjgMUm+mORLSQ6bVIGSpPHM2+WyGevZFzgEWA6cn+SAqrplcKYkxwHHAaxYsWJCm5YkwXgt9GuBvQeGl7fjBm0E1lbVD6vqW8C/0AT8Jqrq1KpaU1Vrli1btqU1S5JGGCfQLwL2TbJPkh2BY4C1Q/N8kqZ1TpKlNF0wV02wTknSPOYN9Kq6GzgeOAe4AjirqtYlOTHJke1s5wA3JrkcOA/47aq6cWsVLUm6r7H60KvqbODsoXGvG7hdwKvaP0lSB/ykqCT1hIEuST1hoEtSTxjoktQTBrok9YSBLkk9YaBLUk8Y6JLUEwa6JPWEgS5JPWGgS1JPGOiS1BMGuiT1hIEuST1hoEtSTxjoktQTBrok9YSBLkk9YaBLUk8Y6JLUEwa6JPWEgS5JPWGgS1JPGOiS1BMGuiT1hIEuST1hoEtSTxjoktQTBrok9YSBLkk9YaBLUk8Y6JLUEwa6JPXEWIGe5LAkVyZZn+SEOeZ7VpJKsmZyJUqSxjFvoCdZBJwEHA6sBo5NsnrEfLsBrwC+POkiJUnzG6eFfhCwvqquqqq7gDOBo0bM90bgbcD3J1ifJGlM4wT6XsCGgeGN7bgfS/JEYO+q+tQEa5MkbYb7fVI0yQ7AO4FXjzHvcUkuTnLx9ddff383LUkaME6gXwvsPTC8vB03Yzfg8cA/JLkaOBhYO+rEaFWdWlVrqmrNsmXLtrxqSdJ9jBPoFwH7JtknyY7AMcDamYlVdWtVLa2qVVW1CvgScGRVXbxVKpYkjTRvoFfV3cDxwDnAFcBZVbUuyYlJjtzaBUqSxrN4nJmq6mzg7KFxr5tl3kPuf1mSpM3lJ0UlqScMdEnqCQNdknrCQJeknjDQJaknDHRJ6gkDXZJ6wkCXpJ4w0CWpJwx0SeoJA12SesJAl6SeMNAlqScMdEnqCQNdknrCQJeknjDQJaknDHRJ6gkDXZJ6wkCXpJ4w0CWpJwx0SeoJA12SesJAl6SeMNAlqScMdEnqCQNdknrCQJeknjDQJaknDHRJ6gkDXZJ6wkCXpJ4YK9CTHJbkyiTrk5wwYvqrklye5LIkn02ycvKlSpLmMm+gJ1kEnAQcDqwGjk2yemi2rwJrquoJwMeAt0+6UEnS3MZpoR8ErK+qq6rqLuBM4KjBGarqvKq6sx38ErB8smVKkuYzTqDvBWwYGN7YjpvNS4BP35+iJEmbb/EkV5bkecAa4GmzTD8OOA5gxYoVk9y0JC1447TQrwX2Hhhe3o7bRJJnAK8BjqyqH4xaUVWdWlVrqmrNsmXLtqReSdIsxgn0i4B9k+yTZEfgGGDt4AxJDgROoQnz702+TEnSfOYN9Kq6GzgeOAe4AjirqtYlOTHJke1sfwjsCvxlkkuTrJ1ldZKkrWSsPvSqOhs4e2jc6wZuP2PCdUmSNpOfFJWknjDQJaknDHRJ6gkDXZJ6wkCXpJ4w0CWpJwx0SeoJA12SesJAl6SeMNAlqScMdEnqCQNdknrCQJeknjDQJaknDHRJ6gkDXZJ6wkCXpJ4w0CWpJwx0SeoJA12SesJAl6SeMNAlqScMdEnqCQNdknrCQJeknjDQJaknDHRJ6gkDXZJ6wkCXpJ4w0CWpJwx0SeoJA12SesJAl6SeGCvQkxyW5Mok65OcMGL6Tkk+2k7/cpJVky5UkjS3eQM9ySLgJOBwYDVwbJLVQ7O9BLi5qh4NvAt426QLlSTNbZwW+kHA+qq6qqruAs4Ejhqa5yjgg+3tjwGHJsnkypQkzWfxGPPsBWwYGN4IPHm2earq7iS3Ag8FbhicKclxwHHt4O1JrtySojXSUob29/YovndbiDw2J2vlbBPGCfSJqapTgVO35TYXiiQXV9WaruuQhnlsbjvjdLlcC+w9MLy8HTdyniSLgSXAjZMoUJI0nnEC/SJg3yT7JNkROAZYOzTPWuDX2tu/CnyuqmpyZUqS5jNvl0vbJ348cA6wCHh/Va1LciJwcVWtBd4HfCjJeuAmmtDXtmVXlrZXHpvbSGxIS1I/+ElRSeoJA12SesJAn1JJ9hlnnKSFw0CfXh8fMe5j27wKaUCSD7X/X9F1LQvRNv1gke6/JPsBjwOWJPmVgUkPBnbupirpx346yZ7Ai5OcDmzyFSBVdVM3ZS0MBvr0eSzwi8DuwC8NjL8NeGknFUn3Ohn4LPBI4BI2DfRqx2sr8bLFKdR+A+b/raq3dF2LNEqS91TV/+y6joXGQJ9SSS6sqoO6rkMalOQhc023y2XrMtCnVJJ3AQ8APgrcMTO+qr7SWVFa8JJ8i6ZrBYb6z4GqKrtctiIDfUolOW/E6Kqqp2/zYiRtFwx0SROX5KmjxlfV+du6loXEQJ9SSX4CeAuwZ1Ud3v4s4M9U1fs6Lk0iyd8MDO5M88tnl/gOcusy0KdUkk8DHwBeU1U/2X4P/Ver6oCOS5PuI8newLur6lld19JnflJ0ei2tqrOAH0HzNcfAPd2WJM1qI7B/10X0nR8sml53JHko7RUFSQ4Gbu22JKmR5E+592qXHYADAa/A2soM9On1KppfinpUki8Cy4Cjuy1J+rGL2/8F3A38RVX9U4f1LAj2oU+pJDvRdLE8luZ63yuBHarqB50WpgUtyVHA8qo6qR2+kKaxUcDvVJVfILcV2Yc+vS6oqrural1Vfb2qfghc0HVRWvB+h01/c3hH4KeBQwC/CmArs8tlyiR5OLAX8MAkB3Lvp/EeDDyos8Kkxo5VtWFg+Avtx/1vSrJLV0UtFAb69Pl54IXAcuAd3BvotwG/11FN0ow9Bgeq6viBwWXbuJYFxz70KZXkWVU16kcupM4k+QjwD1X13qHxvwEcUlXHdlPZwmALfXotT/Jgmpb5e4EnAidU1bndlqUF7pXAJ5M8l3svU/xpYCfgv3dW1QJhC31KJfla+wnRnwdeBrwW+FBVPbHj0iSSPJ3ml7UA1lXV57qsZ6GwhT69ZvrOjwBOr6p1SYa/rlTqRBvghvg25mWL0+uSJOfSBPo5SXaj/RoASQuTXS5TKskOwE8BV1XVLe3XAOxVVZd1XJqkjthCn14FrAZ+sx3eheZrSiUtULbQp1SS99B0sTy9qvZPsgdwblU9qePSJHXEk6LT68lV9cQkXwWoqpuT7Nh1UZK6Y5fL9PphkkXc+/W5y/CkqLSgGejT60+ATwAPS/Jm4As0P0knaYGyD32KJdkPOJTmmvTPVtUVHZckqUMG+pRKcgCwXzt4RVV9vct6JHXPQJ8ySZYAfw3sDVxG0zo/APg34Kiq+vcOy5PUIQN9yiT5E+Auml9/+VE7bhHwB8ADq+rlXdYnqTsG+pRJcjnwhKq6e2j8YuCfq8pfVpcWKK9ymT53DYc5QDvO3xOVFjA/WDR9dh766bkZofnOaUkLlF0uUybJeXNNr6r/uq1qkbR9MdAlqSfsQ5eknjDQJaknDHRJ6gmvcpli7Xeg78vAD1tU1fndVSSpSwb6lEry68ArgOXApcDBwAXA07usS1J37HKZXq8AngRc016qeCBwS7clSeqSgT69vl9V3wdIslNVfQN4bMc1SeqQXS7Ta2OS3YFPAp9JcjNwTcc1SeqQHyzqgSRPA5YAf1dVd3Vdj6RuGOhTqP263HVVtd+8M0taMOxDn0JVdQ9wZZIVXdciafthH/r02gNYl+RC4I6ZkVV1ZHclSeqSgT69fr/rAiRtX+xDl6SesA99SiU5OMlFSW5PcleSe5L4A9HSAmagT68/A44F/hV4IPDrwEmdViSpUwb6FKuq9cCiqrqnqj4AHNZ1TZK640nR6XVnkh2BS5O8HfgOvkBLC5oBML2eT/P4HU9z2eLewLM6rUhSp7zKRZJ6wi6XKZXkKcDrgZUMPI5V9ciuapLULVvoUyrJN4BXApcA98yMr6obOytKUqdsoU+vW6vq010XIWn7YQt9yiR5Ynvz2cAi4K+AH8xMr6qvdFGXpO4Z6FMmyXlzTK6q8jdFpQXKQJeknrAPfYol+QXgccDOM+Oq6sTuKpLUJT9YNKWSnAw8B3g5EOBomksYJS1QdrlMqSSXVdUTBv7vCny6qn6u69okdcMW+vT6j/b/nUn2BH4IPKLDeiR1zD706fW3SXYH/hD4ClDAe7stSVKX7HLpgSQ7ATtX1a1d1yKpO3a5TJkkT0ry8IHhFwBnAW9M8pDuKpPUNQN9+pwC3AWQ5KnAW4HTgVuBUzusS1LH7EOfPouq6qb29nOAU6vq48DHk1zaYV2SOmYLffosSjLzQnwo8LmBab5ASwuYATB9zgA+n+QGmksX/xEgyaNpul0kLVBe5TKFkhxMc835uVV1RzvuMcCuftuitHAZ6JLUE/ahS1JPGOiS1BMGuiT1hIEuST1hoEtST/wn7Yqv8vpp2t0AAAAASUVORK5CYII=\n","text/plain":["
"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"id":"AuB4bjK1TlCk"},"source":["ss"],"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/examples/colab/component_examples/named_entity_recognition_(NER)/aspect_based_ner_sentiment_restaurants.ipynb b/examples/colab/component_examples/named_entity_recognition_(NER)/aspect_based_ner_sentiment_restaurants.ipynb index a3f92eb9..faa45c53 100644 --- a/examples/colab/component_examples/named_entity_recognition_(NER)/aspect_based_ner_sentiment_restaurants.ipynb +++ b/examples/colab/component_examples/named_entity_recognition_(NER)/aspect_based_ner_sentiment_restaurants.ipynb @@ -1,357 +1 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "aspect_based_ner_sentiment_restaurants.ipynb", - "provenance": [], - "collapsed_sections": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "9ayP-N_Cqr9K" - }, - "source": [ - "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", - "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/component_examples/named_entity_recognition_(NER)/aspect_based_ner_sentiment_restaurants.ipynb)\n", - "\n", - "\n", - "\n", - "\n", - "Automatically detect positive, negative and neutral aspects about restaurants from user reviews. Instead of labelling the entire review as negative or positive, this model helps identify which exact phrases relate to sentiment identified in the review." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "NqnAGVadANyZ" - }, - "source": [ - "import os\n", - "! apt-get update -qq > /dev/null \n", - "# Install java\n", - "! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n", - "os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n", - "os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n", - "! pip install nlu pyspark==2.4.7\n", - "import nlu\n" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 220 - }, - "id": "c-9dIJVco9Xf", - "outputId": "60774c61-4f27-49d4-d046-34792f08268b" - }, - "source": [ - "pipe = nlu.load('en.ner.aspect_sentiment')\n", - "data = 'We loved our Thai-style main which amazing with lots of flavours very impressive for vegetarian. But the service was below average and the chips were too terrible to finish.'\n", - "df = pipe.predict([data], output_level='chunk')\n", - "df" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ner_confidenceentities_confidenceword_embeddingsentities
origin_index
0[1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997...POS[[-0.05083499848842621, 0.2482600063085556, -0...Thai-style main
0[1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997...POS[[-0.05083499848842621, 0.2482600063085556, -0...flavours
0[1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997...POS[[-0.05083499848842621, 0.2482600063085556, -0...vegetarian
0[1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997...NEG[[-0.05083499848842621, 0.2482600063085556, -0...service
0[1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997...NEG[[-0.05083499848842621, 0.2482600063085556, -0...chips
\n", - "
" - ], - "text/plain": [ - " ner_confidence ... entities\n", - "origin_index ... \n", - "0 [1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997... ... Thai-style main\n", - "0 [1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997... ... flavours\n", - "0 [1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997... ... vegetarian\n", - "0 [1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997... ... service\n", - "0 [1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997... ... chips\n", - "\n", - "[5 rows x 4 columns]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 11 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 105 - }, - "id": "WFtrCQSnp_Ie", - "outputId": "003874b1-e5d9-4296-cfac-7e3f974792e1" - }, - "source": [ - "df = pipe.predict([data], output_level='document')\n", - "df" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ner_confidencedocumententities_confidenceword_embeddingsentities
origin_index
0[1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997...We loved our Thai-style main which amazing wit...[POS, POS, POS, NEG, NEG][[-0.05083499848842621, 0.2482600063085556, -0...[Thai-style main, flavours, vegetarian, servic...
\n", - "
" - ], - "text/plain": [ - " ner_confidence ... entities\n", - "origin_index ... \n", - "0 [1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997... ... [Thai-style main, flavours, vegetarian, servic...\n", - "\n", - "[1 rows x 5 columns]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 14 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "-gM60xyIxx5M" - }, - "source": [ - "" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 133 - }, - "id": "GCFVSTRKqIgi", - "outputId": "8ed02957-f9bd-4da3-8695-4e6e2831ce5d" - }, - "source": [ - "data = 'We loved our Thai-style main which amazing with lots of flavours very impressive for vegetarian. But the service was below average and the chips were too terrible to finish.'\n", - "df = pipe.predict([data], output_level='sentence')\n", - "df" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ner_confidencesentenceentities_confidenceword_embeddingsentities
origin_index
0[1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997...We loved our Thai-style main which amazing wit...[POS, POS, POS, NEG, NEG][[-0.05083499848842621, 0.2482600063085556, -0...[Thai-style main, flavours, vegetarian, servic...
0[1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997...But the service was below average and the chip...[POS, POS, POS, NEG, NEG][[-0.05083499848842621, 0.2482600063085556, -0...[Thai-style main, flavours, vegetarian, servic...
\n", - "
" - ], - "text/plain": [ - " ner_confidence ... entities\n", - "origin_index ... \n", - "0 [1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997... ... [Thai-style main, flavours, vegetarian, servic...\n", - "0 [1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997... ... [Thai-style main, flavours, vegetarian, servic...\n", - "\n", - "[2 rows x 5 columns]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 15 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Yao4hlfyqQNg" - }, - "source": [ - "nlu.print_all_model_kinds_for_action('pos')" - ], - "execution_count": null, - "outputs": [] - } - ] -} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"aspect_based_ner_sentiment_restaurants.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyMaOK1zlx/utJMFRRzUDN5Z"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"markdown","metadata":{"id":"9ayP-N_Cqr9K"},"source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/component_examples/named_entity_recognition_(NER)/aspect_based_ner_sentiment_restaurants.ipynb)\n","\n","\n","\n","\n","Automatically detect positive, negative and neutral aspects about restaurants from user reviews. Instead of labelling the entire review as negative or positive, this model helps identify which exact phrases relate to sentiment identified in the review."]},{"cell_type":"code","metadata":{"id":"NqnAGVadANyZ","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614376517904,"user_tz":-60,"elapsed":67506,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"4c6518b8-bc24-42ee-e3e4-546d96f0f749"},"source":["import os\n","! apt-get update -qq > /dev/null \n","# Install java\n","! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null\n","os.environ[\"JAVA_HOME\"] = \"/usr/lib/jvm/java-8-openjdk-amd64\"\n","os.environ[\"PATH\"] = os.environ[\"JAVA_HOME\"] + \"/bin:\" + os.environ[\"PATH\"]\n","! pip install nlu pyspark==2.3.4 > /dev/null\n","\n","import nlu\n"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Collecting pyspark==2.4.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e2/06/29f80e5a464033432eedf89924e7aa6ebbc47ce4dcd956853a73627f2c07/pyspark-2.4.7.tar.gz (217.9MB)\n","\u001b[K |████████████████████████████████| 217.9MB 70kB/s \n","\u001b[?25hCollecting py4j==0.10.7\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/e3/53/c737818eb9a7dc32a7cd4f1396e787bd94200c3997c72c1dbe028587bd76/py4j-0.10.7-py2.py3-none-any.whl (197kB)\n","\u001b[K |████████████████████████████████| 204kB 20.9MB/s \n","\u001b[?25hBuilding wheels for collected packages: pyspark\n"," Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for pyspark: filename=pyspark-2.4.7-py2.py3-none-any.whl size=218279465 sha256=37f941c42432e737a6396f822d2bfc63b586e9a35ca673ed38a2c6141ffa743b\n"," Stored in directory: /root/.cache/pip/wheels/34/1f/2e/1e7460f80acf26b08dbb8c53d7ff9e07146f2a68dd5c732be5\n","Successfully built pyspark\n","Installing collected packages: py4j, pyspark\n","Successfully installed py4j-0.10.7 pyspark-2.4.7\n","Looking in indexes: https://test.pypi.org/simple/, https://pypi.org/simple\n","Collecting nlu_test==1.1.3rc2\n","\u001b[?25l Downloading https://test-files.pythonhosted.org/packages/5c/84/241410ba610c9281afc8e1cffaa352f5ca83fe6e2574f1cfcdf3334dc81f/nlu_test-1.1.3rc2-py3-none-any.whl (158kB)\n","\u001b[K |████████████████████████████████| 163kB 5.3MB/s \n","\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from nlu_test==1.1.3rc2) (1.19.5)\n","Collecting dataclasses\n"," Downloading https://files.pythonhosted.org/packages/26/2f/1095cdc2868052dd1e64520f7c0d5c8c550ad297e944e641dbf1ffbb9a5d/dataclasses-0.6-py3-none-any.whl\n","Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from nlu_test==1.1.3rc2) (1.1.5)\n","Collecting spark-nlp<2.8,>=2.7.1\n","\u001b[?25l Downloading https://files.pythonhosted.org/packages/8d/a5/a5130215b43f3bd0e98bd16c471d36dafeab8855ca17789d4927337fa7dc/spark_nlp-2.7.4-py2.py3-none-any.whl (139kB)\n","\u001b[K |████████████████████████████████| 143kB 8.5MB/s \n","\u001b[?25hRequirement already satisfied: pyarrow>=0.16.0 in /usr/local/lib/python3.7/dist-packages (from nlu_test==1.1.3rc2) (3.0.0)\n","Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->nlu_test==1.1.3rc2) (2018.9)\n","Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->nlu_test==1.1.3rc2) (2.8.1)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->nlu_test==1.1.3rc2) (1.15.0)\n","Installing collected packages: dataclasses, spark-nlp, nlu-test\n","Successfully installed dataclasses-0.6 nlu-test-1.1.3rc2 spark-nlp-2.7.4\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":394},"id":"c-9dIJVco9Xf","executionInfo":{"status":"ok","timestamp":1614376583281,"user_tz":-60,"elapsed":132870,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"a9cd0d1a-5f00-4b72-9c0e-d2b7d9e9e791"},"source":["pipe = nlu.load('en.ner.aspect_sentiment')\n","data = 'We loved our Thai-style main which amazing with lots of flavours very impressive for vegetarian. But the service was below average and the chips were too terrible to finish.'\n","df = pipe.predict([data], output_level='chunk')\n","df"],"execution_count":2,"outputs":[{"output_type":"stream","text":["ner_aspect_based_sentiment download started this may take some time.\n","Approximate size to download 21.3 MB\n","[OK!]\n","glove_6B_300 download started this may take some time.\n","Approximate size to download 426.2 MB\n","[OK!]\n","\n","\n","\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
word_embeddingsner_confidenceentitiesentities_class
origin_index
0[[-0.05083499848842621, 0.2482600063085556, -0...[1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997...Thai-style mainPOS
0[[-0.05083499848842621, 0.2482600063085556, -0...[1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997...flavoursPOS
0[[-0.05083499848842621, 0.2482600063085556, -0...[1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997...vegetarianPOS
0[[-0.05083499848842621, 0.2482600063085556, -0...[1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997...serviceNEG
0[[-0.05083499848842621, 0.2482600063085556, -0...[1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997...chipsNEG
\n","
"],"text/plain":[" word_embeddings ... entities_class\n","origin_index ... \n","0 [[-0.05083499848842621, 0.2482600063085556, -0... ... POS\n","0 [[-0.05083499848842621, 0.2482600063085556, -0... ... POS\n","0 [[-0.05083499848842621, 0.2482600063085556, -0... ... POS\n","0 [[-0.05083499848842621, 0.2482600063085556, -0... ... NEG\n","0 [[-0.05083499848842621, 0.2482600063085556, -0... ... NEG\n","\n","[5 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":174},"id":"WFtrCQSnp_Ie","executionInfo":{"status":"ok","timestamp":1614376588254,"user_tz":-60,"elapsed":137835,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"233fd26c-0c9e-45d8-fc1c-7c16aad41e86"},"source":["df = pipe.predict([data], output_level='document')\n","df"],"execution_count":3,"outputs":[{"output_type":"stream","text":["\n","\n","\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
word_embeddingsner_confidenceentities_confidencedocumententities
origin_index
0[[-0.05083499848842621, 0.2482600063085556, -0...[1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997...[POS, POS, POS, NEG, NEG]We loved our Thai-style main which amazing wit...[Thai-style main, flavours, vegetarian, servic...
\n","
"],"text/plain":[" word_embeddings ... entities\n","origin_index ... \n","0 [[-0.05083499848842621, 0.2482600063085556, -0... ... [Thai-style main, flavours, vegetarian, servic...\n","\n","[1 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":204},"id":"GCFVSTRKqIgi","executionInfo":{"status":"ok","timestamp":1614376592010,"user_tz":-60,"elapsed":141577,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"efb9945a-4f48-4dbd-f15e-d24b9c56ffe3"},"source":["data = 'We loved our Thai-style main which amazing with lots of flavours very impressive for vegetarian. But the service was below average and the chips were too terrible to finish.'\n","df = pipe.predict([data], output_level='sentence')\n","df"],"execution_count":4,"outputs":[{"output_type":"stream","text":["\n","\n","\n","\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
word_embeddingsner_confidencesentenceentities_confidenceentities
origin_index
0[[-0.05083499848842621, 0.2482600063085556, -0...[1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997...We loved our Thai-style main which amazing wit...[POS, POS, POS, NEG, NEG][Thai-style main, flavours, vegetarian, servic...
0[[-0.05083499848842621, 0.2482600063085556, -0...[1.0, 1.0, 1.0, 0.5135999917984009, 0.93879997...But the service was below average and the chip...[POS, POS, POS, NEG, NEG][Thai-style main, flavours, vegetarian, servic...
\n","
"],"text/plain":[" word_embeddings ... entities\n","origin_index ... \n","0 [[-0.05083499848842621, 0.2482600063085556, -0... ... [Thai-style main, flavours, vegetarian, servic...\n","0 [[-0.05083499848842621, 0.2482600063085556, -0... ... [Thai-style main, flavours, vegetarian, servic...\n","\n","[2 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"code","metadata":{"id":"Yao4hlfyqQNg","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614376592012,"user_tz":-60,"elapsed":141571,"user":{"displayName":"Christian Kasim Loan","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GjqAD-ircKP-s5Eh6JSdkDggDczfqQbJGU_IRb4Hw=s64","userId":"14469489166467359317"}},"outputId":"eaa48aab-6fb2-480a-beab-36982127d3d0"},"source":["nlu.print_all_model_kinds_for_action('pos')"],"execution_count":5,"outputs":[{"output_type":"stream","text":["For language NLU provides the following Models : \n","nlu.load('nl.pos') returns Spark NLP model pos_ud_alpino\n","nlu.load('nl.pos.ud_alpino') returns Spark NLP model pos_ud_alpino\n","For language NLU provides the following Models : \n","nlu.load('en.pos') returns Spark NLP model pos_anc\n","nlu.load('en.pos.anc') returns Spark NLP model pos_anc\n","nlu.load('en.pos.ud_ewt') returns Spark NLP model pos_ud_ewt\n","For language NLU provides the following Models : \n","nlu.load('fr.pos') returns Spark NLP model pos_ud_gsd\n","nlu.load('fr.pos.ud_gsd') returns Spark NLP model pos_ud_gsd\n","For language NLU provides the following Models : \n","nlu.load('de.pos.ud_hdt') returns Spark NLP model pos_ud_hdt\n","nlu.load('de.pos') returns Spark NLP model pos_ud_hdt\n","For language NLU provides the following Models : \n","nlu.load('it.pos') returns Spark NLP model pos_ud_isdt\n","nlu.load('it.pos.ud_isdt') returns Spark NLP model pos_ud_isdt\n","For language NLU provides the following Models : \n","nlu.load('nb.pos.ud_bokmaal') returns Spark NLP model pos_ud_bokmaal\n","For language NLU provides the following Models : \n","nlu.load('nn.pos') returns Spark NLP model pos_ud_nynorsk\n","nlu.load('nn.pos.ud_nynorsk') returns Spark NLP model pos_ud_nynorsk\n","For language NLU provides the following Models : \n","nlu.load('pl.pos') returns Spark NLP model pos_ud_lfg\n","nlu.load('pl.pos.ud_lfg') returns Spark NLP model pos_ud_lfg\n","For language NLU provides the following Models : \n","nlu.load('pt.pos.ud_bosque') returns Spark NLP model pos_ud_bosque\n","nlu.load('pt.pos') returns Spark NLP model pos_ud_bosque\n","For language NLU provides the following Models : \n","nlu.load('ru.pos.ud_gsd') returns Spark NLP model pos_ud_gsd\n","nlu.load('ru.pos') returns Spark NLP model pos_ud_gsd\n","For language NLU provides the following Models : \n","nlu.load('es.pos') returns Spark NLP model pos_ud_gsd\n","nlu.load('es.pos.ud_gsd') returns Spark NLP model pos_ud_gsd\n","For language NLU provides the following Models : \n","nlu.load('ar.pos') returns Spark NLP model pos_ud_padt\n","For language NLU provides the following Models : \n","nlu.load('hy.pos') returns Spark NLP model pos_ud_armtdp\n","For language NLU provides the following Models : \n","nlu.load('eu.pos') returns Spark NLP model pos_ud_bdt\n","For language NLU provides the following Models : \n","nlu.load('bn.pos') returns Spark NLP model pos_msri\n","For language
NLU provides the following Models : \n","nlu.load('br.pos') returns Spark NLP model pos_ud_keb\n","For language NLU provides the following Models : \n","nlu.load('bg.pos') returns Spark NLP model pos_ud_btb\n","nlu.load('bg.pos.ud_btb') returns Spark NLP model pos_ud_btb\n","For language NLU provides the following Models : \n","nlu.load('ca.pos') returns Spark NLP model pos_ud_ancora\n","For language NLU provides the following Models : \n","nlu.load('cs.pos') returns Spark NLP model pos_ud_pdt\n","nlu.load('cs.pos.ud_pdt') returns Spark NLP model pos_ud_pdt\n","For language NLU provides the following Models : \n","nlu.load('fi.pos.ud_tdt') returns Spark NLP model pos_ud_tdt\n","nlu.load('fi.pos') returns Spark NLP model pos_ud_tdt\n","For language NLU provides the following Models : \n","nlu.load('gl.pos') returns Spark NLP model pos_ud_treegal\n","For language NLU provides the following Models : \n","nlu.load('el.pos') returns Spark NLP model pos_ud_gdt\n","nlu.load('el.pos.ud_gdt') returns Spark NLP model pos_ud_gdt\n","For language NLU provides the following Models : \n","nlu.load('he.pos') returns Spark NLP model pos_ud_htb\n","nlu.load('he.pos.ud_htb') returns Spark NLP model pos_ud_htb\n","For language NLU provides the following Models : \n","nlu.load('hi.pos') returns Spark NLP model pos_ud_hdtb\n","For language NLU provides the following Models : \n","nlu.load('hu.pos') returns Spark NLP model pos_ud_szeged\n","nlu.load('hu.pos.ud_szeged') returns Spark NLP model pos_ud_szeged\n","For language NLU provides the following Models : \n","nlu.load('id.pos') returns Spark NLP model pos_ud_gsd\n","For language NLU provides the following Models : \n","nlu.load('ga.pos') returns Spark NLP model pos_ud_idt\n","For language NLU provides the following Models : \n","nlu.load('da.pos') returns Spark NLP model pos_ud_ddt\n","For language NLU provides the following Models : \n","nlu.load('ja.pos') returns Spark NLP model pos_ud_gsd\n","nlu.load('ja.pos.ud_gsd') returns Spark NLP model pos_ud_gsd\n","For language NLU provides the following Models : \n","nlu.load('la.pos') returns Spark NLP model pos_ud_llct\n","For language NLU provides the following Models : \n","nlu.load('lv.pos') returns Spark NLP model pos_ud_lvtb\n","For language NLU provides the following Models : \n","nlu.load('mr.pos') returns Spark NLP model pos_ud_ufal\n","For language NLU provides the following Models : \n","nlu.load('fa.pos') returns Spark NLP model pos_ud_perdt\n","For language NLU provides the following Models : \n","nlu.load('ro.pos') returns Spark NLP model pos_ud_rrt\n","nlu.load('ro.pos.ud_rrt') returns Spark NLP model pos_ud_rrt\n","For language NLU provides the following Models : \n","nlu.load('sk.pos') returns Spark NLP model pos_ud_snk\n","nlu.load('sk.pos.ud_snk') returns Spark NLP model pos_ud_snk\n","For language NLU provides the following Models : \n","nlu.load('sl.pos') returns Spark NLP model pos_ud_ssj\n","For language NLU provides the following Models : \n","nlu.load('sv.pos') returns Spark NLP model pos_ud_tal\n","nlu.load('sv.pos.ud_tal') returns Spark NLP model pos_ud_tal\n","For language NLU provides the following Models : \n","nlu.load('th.pos') returns Spark NLP model pos_lst20\n","For language NLU provides the following Models : \n","nlu.load('tr.pos') returns Spark NLP model pos_ud_imst\n","nlu.load('tr.pos.ud_imst') returns Spark NLP model pos_ud_imst\n","For language NLU provides the following Models : \n","nlu.load('uk.pos') returns Spark NLP model pos_ud_iu\n","nlu.load('uk.pos.ud_iu') returns Spark NLP model pos_ud_iu\n","For language NLU provides the following Models : \n","nlu.load('yo.pos') returns Spark NLP model pos_ud_ytb\n","For language NLU provides the following Models : \n","nlu.load('zh.pos') returns Spark NLP model pos_ud_gsd\n","nlu.load('zh.pos.ud_gsd') returns Spark NLP model pos_ud_gsd\n","nlu.load('zh.pos.ctb9') returns Spark NLP model pos_ctb9\n","nlu.load('zh.pos.ud_gsd_trad') returns Spark NLP model pos_ud_gsd_trad\n","For language NLU provides the following Models : \n","nlu.load('et.pos') returns Spark NLP model pos_ud_edt\n","For language NLU provides the following Models : \n","nlu.load('ur.pos') returns Spark NLP model pos_ud_udtb\n","nlu.load('ur.pos.ud_udtb') returns Spark NLP model pos_ud_udtb\n","For language NLU provides the following Models : \n","nlu.load('ko.pos') returns Spark NLP model pos_ud_kaist\n","nlu.load('ko.pos.ud_kaist') returns Spark NLP model pos_ud_kaist\n","For language NLU provides the following Models : \n","nlu.load('bh.pos') returns Spark NLP model pos_ud_bhtb\n","For language NLU provides the following Models : \n","nlu.load('am.pos') returns Spark NLP model pos_ud_att\n"],"name":"stdout"}]}]} \ No newline at end of file