From 9abff28c7dbe6f61dc96f42dc254b9dc6f5b23dd Mon Sep 17 00:00:00 2001 From: sonurdogan Date: Sun, 21 Jan 2024 16:12:43 +0300 Subject: [PATCH] BGEEmbeddings Integration --- .../NLU_BGE_sentence_embeddings.ipynb | 1157 +++++++++++++++++ .../sentence_bge/BGESentenceEmbedding.py | 16 + .../embeddings/sentence_bge/__init__.py | 0 .../name_deductable_annotators_OS.py | 2 + .../col_substitution/substitution_map_OS.py | 3 + nlu/spellbook.py | 6 + nlu/universe/annotator_class_universe.py | 3 +- nlu/universe/component_universes.py | 22 + nlu/universe/feature_node_ids.py | 1 + nlu/universe/feature_node_universes.py | 1 + .../sentence_embeddings/sentence_bge_tests.py | 18 + 11 files changed, 1228 insertions(+), 1 deletion(-) create mode 100644 examples/colab/component_examples/sentence_embeddings/NLU_BGE_sentence_embeddings.ipynb create mode 100644 nlu/components/embeddings/sentence_bge/BGESentenceEmbedding.py create mode 100644 nlu/components/embeddings/sentence_bge/__init__.py create mode 100644 tests/nlu_core_tests/component_tests/embed_tests/sentence_embeddings/sentence_bge_tests.py diff --git a/examples/colab/component_examples/sentence_embeddings/NLU_BGE_sentence_embeddings.ipynb b/examples/colab/component_examples/sentence_embeddings/NLU_BGE_sentence_embeddings.ipynb new file mode 100644 index 00000000..b2893149 --- /dev/null +++ b/examples/colab/component_examples/sentence_embeddings/NLU_BGE_sentence_embeddings.ipynb @@ -0,0 +1,1157 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "rBXrqlGEYA8G" + }, + "source": [ + "![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/component_examples/sentence_embeddings/NLU_E5_sentence_embeddings.ipynb)\n", + "\n", + "# BGE Sentence Embeddings with NLU\n", + "\n", + " BGE, or BAAI General Embeddings, a model that can map any text to a low-dimensional dense\n", + " vector which can be used for tasks like retrieval, classification, clustering, or semantic search. And it also can be used in vector database for LLMs.\n", + "\n", + "## Sources :\n", + "- https://arxiv.org/pdf/2309.07597.pdf\n", + "- https://github.com/FlagOpen/FlagEmbedding\n", + "\n", + "## Paper abstract\n", + "\n", + "This paper introduces C-Pack, a package of resources that significantly advance the field of general\n", + " Chinese embeddings. C-Pack includes three critical resources.\n", + " 1) C-MTEB is a comprehensive benchmark for Chinese text embeddings covering 6 tasks and 35 datasets.\n", + " 2) C-MTP is a massive text embedding dataset curated from labeled and unlabeled Chinese corpora\n", + " for training embedding models.\n", + " 3) C-TEM is a family of embedding models covering multiple sizes.\n", + " Our models outperform all prior Chinese text embeddings on C-MTEB by up to +10% upon the\n", + " time of the release. We also integrate and optimize the entire suite of training methods for\n", + " C-TEM. Along with our resources on general Chinese embedding, we release our data and models for\n", + " English text embeddings. The English models achieve stateof-the-art performance on the MTEB\n", + " benchmark; meanwhile, our released English data is 2 times larger than the Chinese data.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pc-VxiUuks79" + }, + "source": [ + "**All the available models:**\n", + "\n", + "| Language | nlu.load() reference | Spark NLP Model reference |\n", + "|----------|---------------------------------|-----------------------------------------------------------------------------------------------|\n", + "| English | en.embed_sentence.bge_small \t | [bge_small](https://sparknlp.org/2024/01/01/bge_small_en.html) \t\t\t\t\t |\n", + "| English | en.embed_sentence.bge_base | [bge_base](https://sparknlp.org/2024/01/01/bge_base_en.html) \t |\n", + "| English | en.embed_sentence.bge_large | [bge_large](https://sparknlp.org/2024/01/01/bge_large_en.html) \t |\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "# 1. Install NLU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SAdkGaH7lyEi" + }, + "outputs": [], + "source": [ + "!pip install nlu pyspark==3.4.1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "N_CL8HZ8Ydry" + }, + "source": [ + "# 2. Load Model and embed sample sentence" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6gWpe1M5fIoB" + }, + "source": [ + "### en.embed_sentence.bge_small" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "j2ZZZvr1uGpx", + "outputId": "097aa80e-46f6-49f4-d2c5-fd970f729a55" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Warning::Spark Session already created, some configs may not take.\n", + "Warning::Spark Session already created, some configs may not take.\n", + "bge_small download started this may take some time.\n", + "Approximate size to download 76.1 MB\n", + "[OK!]\n", + "sentence_detector_dl download started this may take some time.\n", + "Approximate size to download 354.6 KB\n", + "[OK!]\n", + "Warning::Spark Session already created, some configs may not take.\n" + ] + } + ], + "source": [ + "import nlu\n", + "\n", + "res = nlu.load(\"en.embed_sentence.bge_small\").predict('query: how much protein should a female eat')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 89 + }, + "id": "QFJshD-4rdor", + "outputId": "9e5785bb-6418-4f30-d1e4-12ee456503ad" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " sentence \\\n", + "0 query: how much protein should a female eat \n", + "\n", + " sentence_embedding_bge_small \n", + "0 [-0.059140872210264206, -0.013027993030846119,... " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sentencesentence_embedding_bge_small
0query: how much protein should a female eat[-0.059140872210264206, -0.013027993030846119,...
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "res" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XuzOX2d3fUAI" + }, + "source": [ + "### en.embed_sentence.bge_base" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3Ian3US8sUuw", + "outputId": "1f0c9b99-315f-47c4-9117-7d75a82b1e53" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Warning::Spark Session already created, some configs may not take.\n", + "Warning::Spark Session already created, some configs may not take.\n", + "bge_base download started this may take some time.\n", + "Approximate size to download 246.7 MB\n", + "[OK!]\n", + "Warning::Spark Session already created, some configs may not take.\n" + ] + } + ], + "source": [ + "res = nlu.load('en.embed_sentence.bge_base').predict(\"passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.\",\n", + " output_level='document') # output_level should defined as document to get the embedding of the document instead of each sentence separately." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 89 + }, + "id": "eSpFlZdQeUHJ", + "outputId": "01bf348f-9c44-4181-f00d-e521c62fd6b5" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " document \\\n", + "0 passage: As a general guideline, the CDC's ave... \n", + "\n", + " sentence_embedding_bge_base \n", + "0 [0.006804925389587879, -0.006068557035177946, ... " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
documentsentence_embedding_bge_base
0passage: As a general guideline, the CDC's ave...[0.006804925389587879, -0.006068557035177946, ...
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "res" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BAUFklCqLr3V" + }, + "source": [ + "# 3. NLU has many more sentence embedding models!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3aiiLjYilt1a" + }, + "source": [ + "Make sure to try them all out!\n", + "You can change 'embed_sentence.electra' in nlu.load('embed_sentence.electra') to bert, xlnet, albert or any other of the 20+ sentence embeddings offerd by NLU" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9qUF7jPlme-R", + "outputId": "02df4660-d777-4766-fe0d-15d245e5668a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "For language NLU provides the following Models : \n", + "nlu.load('am.embed_sentence.xlm_roberta') returns Spark NLP model_anno_obj sent_xlm_roberta_base_finetuned_amharic\n", + "For language NLU provides the following Models : \n", + "nlu.load('de.embed_sentence.bert.base_cased') returns Spark NLP model_anno_obj sent_bert_base_cased\n", + "For language NLU provides the following Models : \n", + "nlu.load('el.embed_sentence.bert.base_uncased') returns Spark NLP model_anno_obj sent_bert_base_uncased\n", + "For language NLU provides the following Models : \n", + "nlu.load('en.embed_sentence') returns Spark NLP model_anno_obj tfhub_use\n", + "nlu.load('en.embed_sentence.albert') returns Spark NLP model_anno_obj albert_base_uncased\n", + "nlu.load('en.embed_sentence.bert') returns Spark NLP model_anno_obj sent_bert_base_uncased\n", + "nlu.load('en.embed_sentence.bert.base_uncased_legal') returns Spark NLP model_anno_obj sent_bert_base_uncased_legal\n", + "nlu.load('en.embed_sentence.bert.finetuned') returns Spark NLP model_anno_obj sbert_setfit_finetuned_financial_text_classification\n", + "nlu.load('en.embed_sentence.bert.pubmed') returns Spark NLP model_anno_obj sent_bert_pubmed\n", + "nlu.load('en.embed_sentence.bert.pubmed_squad2') returns Spark NLP model_anno_obj sent_bert_pubmed_squad2\n", + "nlu.load('en.embed_sentence.bert.wiki_books') returns Spark NLP model_anno_obj sent_bert_wiki_books\n", + "nlu.load('en.embed_sentence.bert.wiki_books_mnli') returns Spark NLP model_anno_obj sent_bert_wiki_books_mnli\n", + "nlu.load('en.embed_sentence.bert.wiki_books_qnli') returns Spark NLP model_anno_obj sent_bert_wiki_books_qnli\n", + "nlu.load('en.embed_sentence.bert.wiki_books_qqp') returns Spark NLP model_anno_obj sent_bert_wiki_books_qqp\n", + "nlu.load('en.embed_sentence.bert.wiki_books_squad2') returns Spark NLP model_anno_obj sent_bert_wiki_books_squad2\n", + "nlu.load('en.embed_sentence.bert.wiki_books_sst2') returns Spark NLP model_anno_obj sent_bert_wiki_books_sst2\n", + "nlu.load('en.embed_sentence.bert_base_cased') returns Spark NLP model_anno_obj sent_bert_base_cased\n", + "nlu.load('en.embed_sentence.bert_base_uncased') returns Spark NLP model_anno_obj sent_bert_base_uncased\n", + "nlu.load('en.embed_sentence.bert_large_cased') returns Spark NLP model_anno_obj sent_bert_large_cased\n", + "nlu.load('en.embed_sentence.bert_large_uncased') returns Spark NLP model_anno_obj sent_bert_large_uncased\n", + "nlu.load('en.embed_sentence.bert_use_cmlm_en_base') returns Spark NLP model_anno_obj sent_bert_use_cmlm_en_base\n", + "nlu.load('en.embed_sentence.bert_use_cmlm_en_large') returns Spark NLP model_anno_obj sent_bert_use_cmlm_en_large\n", + "nlu.load('en.embed_sentence.biobert.clinical_base_cased') returns Spark NLP model_anno_obj sent_biobert_clinical_base_cased\n", + "nlu.load('en.embed_sentence.biobert.discharge_base_cased') returns Spark NLP model_anno_obj sent_biobert_discharge_base_cased\n", + "nlu.load('en.embed_sentence.biobert.pmc_base_cased') returns Spark NLP model_anno_obj sent_biobert_pmc_base_cased\n", + "nlu.load('en.embed_sentence.biobert.pubmed_base_cased') returns Spark NLP model_anno_obj sent_biobert_pubmed_base_cased\n", + "nlu.load('en.embed_sentence.biobert.pubmed_large_cased') returns Spark NLP model_anno_obj sent_biobert_pubmed_large_cased\n", + "nlu.load('en.embed_sentence.biobert.pubmed_pmc_base_cased') returns Spark NLP model_anno_obj sent_biobert_pubmed_pmc_base_cased\n", + "nlu.load('en.embed_sentence.bge_base') returns Spark NLP model_anno_obj bge_base\n", + "nlu.load('en.embed_sentence.bge_small') returns Spark NLP model_anno_obj bge_small\n", + "nlu.load('en.embed_sentence.bge_large') returns Spark NLP model_anno_obj bge_large\n", + "nlu.load('en.embed_sentence.covidbert.large_uncased') returns Spark NLP model_anno_obj sent_covidbert_large_uncased\n", + "nlu.load('en.embed_sentence.distil_roberta.distilled_base') returns Spark NLP model_anno_obj sent_distilroberta_base\n", + "nlu.load('en.embed_sentence.doc2vec') returns Spark NLP model_anno_obj doc2vec_gigaword_300\n", + "nlu.load('en.embed_sentence.doc2vec.gigaword_300') returns Spark NLP model_anno_obj doc2vec_gigaword_300\n", + "nlu.load('en.embed_sentence.doc2vec.gigaword_wiki_300') returns Spark NLP model_anno_obj doc2vec_gigaword_wiki_300\n", + "nlu.load('en.embed_sentence.e5_small') returns Spark NLP model_anno_obj e5_small\n", + "nlu.load('en.embed_sentence.e5_small_opt') returns Spark NLP model_anno_obj e5_small_opt\n", + "nlu.load('en.embed_sentence.e5_small_v2_opt') returns Spark NLP model_anno_obj e5_small_v2_opt\n", + "nlu.load('en.embed_sentence.e5_base_v2') returns Spark NLP model_anno_obj e5_base_v2\n", + "nlu.load('en.embed_sentence.e5_base') returns Spark NLP model_anno_obj e5_base\n", + "nlu.load('en.embed_sentence.e5_base_v2_opt') returns Spark NLP model_anno_obj e5_base_v2_opt\n", + "nlu.load('en.embed_sentence.e5_base_quantized') returns Spark NLP model_anno_obj e5_base_quantized\n", + "nlu.load('en.embed_sentence.e5_base_opt') returns Spark NLP model_anno_obj e5_base_opt\n", + "nlu.load('en.embed_sentence.e5_base_v2_quantized') returns Spark NLP model_anno_obj e5_base_v2_quantized\n", + "nlu.load('en.embed_sentence.e5_small_v2_quantized') returns Spark NLP model_anno_obj e5_small_v2_quantized\n", + "nlu.load('en.embed_sentence.e5_large_v2') returns Spark NLP model_anno_obj e5_large_v2\n", + "nlu.load('en.embed_sentence.e5_small_v2') returns Spark NLP model_anno_obj e5_small_v2\n", + "nlu.load('en.embed_sentence.e5_small_quantized') returns Spark NLP model_anno_obj e5_small_quantized\n", + "nlu.load('en.embed_sentence.e5_large_v2_opt') returns Spark NLP model_anno_obj e5_large_v2_opt\n", + "nlu.load('en.embed_sentence.e5_large_v2_quantized') returns Spark NLP model_anno_obj e5_large_v2_quantized\n", + "nlu.load('en.embed_sentence.e5_large') returns Spark NLP model_anno_obj e5_large\n", + "nlu.load('en.embed_sentence.electra') returns Spark NLP model_anno_obj sent_electra_small_uncased\n", + "nlu.load('en.embed_sentence.electra_base_uncased') returns Spark NLP model_anno_obj sent_electra_base_uncased\n", + "nlu.load('en.embed_sentence.electra_large_uncased') returns Spark NLP model_anno_obj sent_electra_large_uncased\n", + "nlu.load('en.embed_sentence.electra_small_uncased') returns Spark NLP model_anno_obj sent_electra_small_uncased\n", + "nlu.load('en.embed_sentence.mpnet.579_stmodel_product_rem_v3a') returns Spark NLP model_anno_obj 579_stmodel_product_rem_v3a\n", + "nlu.load('en.embed_sentence.mpnet.abstract_sim_query') returns Spark NLP model_anno_obj abstract_sim_query\n", + "nlu.load('en.embed_sentence.mpnet.abstract_sim_sentence') returns Spark NLP model_anno_obj abstract_sim_sentence\n", + "nlu.load('en.embed_sentence.mpnet.action_policy_plans_classifier') returns Spark NLP model_anno_obj action_policy_plans_classifier\n", + "nlu.load('en.embed_sentence.mpnet.all_datasets_v3_mpnet_base') returns Spark NLP model_anno_obj all_datasets_v3_mpnet_base\n", + "nlu.load('en.embed_sentence.mpnet.all_datasets_v4_mpnet_base') returns Spark NLP model_anno_obj all_datasets_v4_mpnet_base\n", + "nlu.load('en.embed_sentence.mpnet.all_mpnet_base_questions_clustering_english') returns Spark NLP model_anno_obj all_mpnet_base_questions_clustering_english\n", + "nlu.load('en.embed_sentence.mpnet.all_mpnet_base_v1') returns Spark NLP model_anno_obj all_mpnet_base_v1\n", + "nlu.load('en.embed_sentence.mpnet.all_mpnet_base_v2') returns Spark NLP model_anno_obj all_mpnet_base_v2\n", + "nlu.load('en.embed_sentence.mpnet.all_mpnet_base_v2_diptanuc') returns Spark NLP model_anno_obj all_mpnet_base_v2_diptanuc\n", + "nlu.load('en.embed_sentence.mpnet.all_mpnet_base_v2_embedding_all') returns Spark NLP model_anno_obj all_mpnet_base_v2_embedding_all\n", + "nlu.load('en.embed_sentence.mpnet.all_mpnet_base_v2_feature_extraction') returns Spark NLP model_anno_obj all_mpnet_base_v2_feature_extraction\n", + "nlu.load('en.embed_sentence.mpnet.all_mpnet_base_v2_feature_extraction_pipeline') returns Spark NLP model_anno_obj all_mpnet_base_v2_feature_extraction_pipeline\n", + "nlu.load('en.embed_sentence.mpnet.all_mpnet_base_v2_finetuned_v2') returns Spark NLP model_anno_obj all_mpnet_base_v2_finetuned_v2\n", + "nlu.load('en.embed_sentence.mpnet.all_mpnet_base_v2_for_sb_clustering') returns Spark NLP model_anno_obj all_mpnet_base_v2_for_sb_clustering\n", + "nlu.load('en.embed_sentence.mpnet.all_mpnet_base_v2_ftlegal_v3') returns Spark NLP model_anno_obj all_mpnet_base_v2_ftlegal_v3\n", + "nlu.load('en.embed_sentence.mpnet.all_mpnet_base_v2_obrizum') returns Spark NLP model_anno_obj all_mpnet_base_v2_obrizum\n", + "nlu.load('en.embed_sentence.mpnet.all_mpnet_base_v2_sentence_transformers') returns Spark NLP model_anno_obj all_mpnet_base_v2_sentence_transformers\n", + "nlu.load('en.embed_sentence.mpnet.all_mpnet_base_v2_table') returns Spark NLP model_anno_obj all_mpnet_base_v2_table\n", + "nlu.load('en.embed_sentence.mpnet.all_mpnet_base_v2_tasky_classification') returns Spark NLP model_anno_obj all_mpnet_base_v2_tasky_classification\n", + "nlu.load('en.embed_sentence.mpnet.attack_bert') returns Spark NLP model_anno_obj attack_bert\n", + "nlu.load('en.embed_sentence.mpnet.biolord_stamb2_v1') returns Spark NLP model_anno_obj biolord_stamb2_v1\n", + "nlu.load('en.embed_sentence.mpnet.burmese_awesome_setfit_model') returns Spark NLP model_anno_obj burmese_awesome_setfit_model\n", + "nlu.load('en.embed_sentence.mpnet.burmese_awesome_setfit_model_98') returns Spark NLP model_anno_obj burmese_awesome_setfit_model_98\n", + "nlu.load('en.embed_sentence.mpnet.contradiction_psb') returns Spark NLP model_anno_obj contradiction_psb\n", + "nlu.load('en.embed_sentence.mpnet.contradiction_psb_lds') returns Spark NLP model_anno_obj contradiction_psb_lds\n", + "nlu.load('en.embed_sentence.mpnet.covid_qa_mpnet') returns Spark NLP model_anno_obj covid_qa_mpnet\n", + "nlu.load('en.embed_sentence.mpnet.cpu_conditional_classifier') returns Spark NLP model_anno_obj cpu_conditional_classifier\n", + "nlu.load('en.embed_sentence.mpnet.cpu_economywide_classifier') returns Spark NLP model_anno_obj cpu_economywide_classifier\n", + "nlu.load('en.embed_sentence.mpnet.cpu_mitigation_classifier') returns Spark NLP model_anno_obj cpu_mitigation_classifier\n", + "nlu.load('en.embed_sentence.mpnet.cpu_netzero_classifier') returns Spark NLP model_anno_obj cpu_netzero_classifier\n", + "nlu.load('en.embed_sentence.mpnet.cpu_target_classifier') returns Spark NLP model_anno_obj cpu_target_classifier\n", + "nlu.load('en.embed_sentence.mpnet.cpu_transport_ghg_classifier') returns Spark NLP model_anno_obj cpu_transport_ghg_classifier\n", + "nlu.load('en.embed_sentence.mpnet.cross_all_mpnet_base_v2_finetuned_webnlg2020_metric_average') returns Spark NLP model_anno_obj cross_all_mpnet_base_v2_finetuned_webnlg2020_metric_average\n", + "nlu.load('en.embed_sentence.mpnet.domainadaptm2') returns Spark NLP model_anno_obj domainadaptm2\n", + "nlu.load('en.embed_sentence.mpnet.due_eshop_21') returns Spark NLP model_anno_obj due_eshop_21\n", + "nlu.load('en.embed_sentence.mpnet.due_eshop_21_multilabel') returns Spark NLP model_anno_obj due_eshop_21_multilabel\n", + "nlu.load('en.embed_sentence.mpnet.due_retail_25') returns Spark NLP model_anno_obj due_retail_25\n", + "nlu.load('en.embed_sentence.mpnet.ecolo_pas_ecolo_v0.1') returns Spark NLP model_anno_obj ecolo_pas_ecolo_v0.1\n", + "nlu.load('en.embed_sentence.mpnet.esci_jp_mpnet_crossencoder') returns Spark NLP model_anno_obj esci_jp_mpnet_crossencoder\n", + "nlu.load('en.embed_sentence.mpnet.eth_setfit_payment_model') returns Spark NLP model_anno_obj eth_setfit_payment_model\n", + "nlu.load('en.embed_sentence.mpnet.fail_detect') returns Spark NLP model_anno_obj fail_detect\n", + "nlu.load('en.embed_sentence.mpnet.few_shot_model') returns Spark NLP model_anno_obj few_shot_model\n", + "nlu.load('en.embed_sentence.mpnet.fewshotissueclassifier_nlbse23') returns Spark NLP model_anno_obj fewshotissueclassifier_nlbse23\n", + "nlu.load('en.embed_sentence.mpnet.github_issues_mpnet_southern_sotho_e10') returns Spark NLP model_anno_obj github_issues_mpnet_southern_sotho_e10\n", + "nlu.load('en.embed_sentence.mpnet.github_issues_preprocessed_mpnet_southern_sotho_e10') returns Spark NLP model_anno_obj github_issues_preprocessed_mpnet_southern_sotho_e10\n", + "nlu.load('en.embed_sentence.mpnet.ikitracs_conditional') returns Spark NLP model_anno_obj ikitracs_conditional\n", + "nlu.load('en.embed_sentence.mpnet.ikitracs_mitigation') returns Spark NLP model_anno_obj ikitracs_mitigation\n", + "nlu.load('en.embed_sentence.mpnet.initial_model') returns Spark NLP model_anno_obj initial_model\n", + "nlu.load('en.embed_sentence.mpnet.initial_model_v3') returns Spark NLP model_anno_obj initial_model_v3\n", + "nlu.load('en.embed_sentence.mpnet.invoiceornot') returns Spark NLP model_anno_obj invoiceornot\n", + "nlu.load('en.embed_sentence.mpnet.java_deprecation_classifier') returns Spark NLP model_anno_obj java_deprecation_classifier\n", + "nlu.load('en.embed_sentence.mpnet.java_expand_classifier') returns Spark NLP model_anno_obj java_expand_classifier\n", + "nlu.load('en.embed_sentence.mpnet.java_ownership_classifier') returns Spark NLP model_anno_obj java_ownership_classifier\n", + "nlu.load('en.embed_sentence.mpnet.java_pointer_classifier') returns Spark NLP model_anno_obj java_pointer_classifier\n", + "nlu.load('en.embed_sentence.mpnet.java_rational_classifier') returns Spark NLP model_anno_obj java_rational_classifier\n", + "nlu.load('en.embed_sentence.mpnet.java_summary_classifier') returns Spark NLP model_anno_obj java_summary_classifier\n", + "nlu.load('en.embed_sentence.mpnet.java_usage_classifier') returns Spark NLP model_anno_obj java_usage_classifier\n", + "nlu.load('en.embed_sentence.mpnet.keyphrase_mpnet_v1') returns Spark NLP model_anno_obj keyphrase_mpnet_v1\n", + "nlu.load('en.embed_sentence.mpnet.kw_classification_setfit_model') returns Spark NLP model_anno_obj kw_classification_setfit_model\n", + "nlu.load('en.embed_sentence.mpnet.kw_classification_setfithead_model') returns Spark NLP model_anno_obj kw_classification_setfithead_model\n", + "nlu.load('en.embed_sentence.mpnet.labels_per_job_title_fine_tune') returns Spark NLP model_anno_obj labels_per_job_title_fine_tune\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_adaptation_mitigation_classifier') returns Spark NLP model_anno_obj mpnet_adaptation_mitigation_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_base_articles_ner') returns Spark NLP model_anno_obj mpnet_base_articles_ner\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_base_snli_mnli') returns Spark NLP model_anno_obj mpnet_base_snli_mnli\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_579_stmodel_product_rem_v3a') returns Spark NLP model_anno_obj mpnet_embedding_579_STmodel_product_rem_v3a\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_attack_bert') returns Spark NLP model_anno_obj mpnet_embedding_ATTACK_BERT\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_biolord_stamb2_v1') returns Spark NLP model_anno_obj mpnet_embedding_BioLORD_STAMB2_v1\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_cpu_conditional_classifier') returns Spark NLP model_anno_obj mpnet_embedding_CPU_Conditional_Classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_cpu_economywide_classifier') returns Spark NLP model_anno_obj mpnet_embedding_CPU_Economywide_Classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_cpu_mitigation_classifier') returns Spark NLP model_anno_obj mpnet_embedding_CPU_Mitigation_Classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_cpu_netzero_classifier') returns Spark NLP model_anno_obj mpnet_embedding_CPU_Netzero_Classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_cpu_target_classifier') returns Spark NLP model_anno_obj mpnet_embedding_CPU_Target_Classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_cpu_transport_ghg_classifier') returns Spark NLP model_anno_obj mpnet_embedding_CPU_Transport_GHG_Classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_domainadaptm2') returns Spark NLP model_anno_obj mpnet_embedding_DomainAdaptM2\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_fewshotissueclassifier_nlbse23') returns Spark NLP model_anno_obj mpnet_embedding_FewShotIssueClassifier_NLBSE23\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_invoiceornot') returns Spark NLP model_anno_obj mpnet_embedding_InvoiceOrNot\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_pdfsegs') returns Spark NLP model_anno_obj mpnet_embedding_PDFSegs\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_patentsberta') returns Spark NLP model_anno_obj mpnet_embedding_PatentSBERTa\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_patentsberta_v2') returns Spark NLP model_anno_obj mpnet_embedding_PatentSBERTa_V2\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_sentiment140_fewshot') returns Spark NLP model_anno_obj mpnet_embedding_Sentiment140_fewshot\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_all_data') returns Spark NLP model_anno_obj mpnet_embedding_SetFit_all_data\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_few_shot_classifier') returns Spark NLP model_anno_obj mpnet_embedding_Setfit_few_shot_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_abstract_sim_query') returns Spark NLP model_anno_obj mpnet_embedding_abstract_sim_query\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_abstract_sim_sentence') returns Spark NLP model_anno_obj mpnet_embedding_abstract_sim_sentence\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_action_policy_plans_classifier') returns Spark NLP model_anno_obj mpnet_embedding_action_policy_plans_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_all_datasets_v3_mpnet_base') returns Spark NLP model_anno_obj mpnet_embedding_all_datasets_v3_mpnet_base\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_all_datasets_v4_mpnet_base') returns Spark NLP model_anno_obj mpnet_embedding_all_datasets_v4_mpnet_base\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_all_mpnet_base_questions_clustering_english') returns Spark NLP model_anno_obj mpnet_embedding_all_mpnet_base_questions_clustering_english\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_all_mpnet_base_v1') returns Spark NLP model_anno_obj mpnet_embedding_all_mpnet_base_v1\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_all_mpnet_base_v2_by_diptanuc') returns Spark NLP model_anno_obj mpnet_embedding_all_mpnet_base_v2_by_diptanuc\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_all_mpnet_base_v2_by_obrizum') returns Spark NLP model_anno_obj mpnet_embedding_all_mpnet_base_v2_by_obrizum\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_all_mpnet_base_v2_by_sentence_transformers') returns Spark NLP model_anno_obj mpnet_embedding_all_mpnet_base_v2_by_sentence_transformers\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_all_mpnet_base_v2_embedding_all') returns Spark NLP model_anno_obj mpnet_embedding_all_mpnet_base_v2_embedding_all\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_all_mpnet_base_v2_feature_extraction') returns Spark NLP model_anno_obj mpnet_embedding_all_mpnet_base_v2_feature_extraction\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_all_mpnet_base_v2_feature_extraction_pipeline') returns Spark NLP model_anno_obj mpnet_embedding_all_mpnet_base_v2_feature_extraction_pipeline\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_all_mpnet_base_v2_finetuned_v2') returns Spark NLP model_anno_obj mpnet_embedding_all_mpnet_base_v2_finetuned_v2\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_all_mpnet_base_v2_for_sb_clustering') returns Spark NLP model_anno_obj mpnet_embedding_all_mpnet_base_v2_for_sb_clustering\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_all_mpnet_base_v2_ftlegal_v3') returns Spark NLP model_anno_obj mpnet_embedding_all_mpnet_base_v2_ftlegal_v3\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_all_mpnet_base_v2_table') returns Spark NLP model_anno_obj mpnet_embedding_all_mpnet_base_v2_table\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_all_mpnet_base_v2_tasky_classification') returns Spark NLP model_anno_obj mpnet_embedding_all_mpnet_base_v2_tasky_classification\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_burmese_awesome_setfit_model') returns Spark NLP model_anno_obj mpnet_embedding_burmese_awesome_setfit_model\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_burmese_awesome_setfit_model_98') returns Spark NLP model_anno_obj mpnet_embedding_burmese_awesome_setfit_model_98\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_contradiction_psb') returns Spark NLP model_anno_obj mpnet_embedding_contradiction_psb\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_contradiction_psb_lds') returns Spark NLP model_anno_obj mpnet_embedding_contradiction_psb_lds\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_covid_qa_mpnet') returns Spark NLP model_anno_obj mpnet_embedding_covid_qa_mpnet\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_cross_all_mpnet_base_v2_finetuned_webnlg2020_metric_average') returns Spark NLP model_anno_obj mpnet_embedding_cross_all_mpnet_base_v2_finetuned_WebNLG2020_metric_average\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_due_eshop_21') returns Spark NLP model_anno_obj mpnet_embedding_due_eshop_21\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_due_eshop_21_multilabel') returns Spark NLP model_anno_obj mpnet_embedding_due_eshop_21_multilabel\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_due_retail_25') returns Spark NLP model_anno_obj mpnet_embedding_due_retail_25\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_ecolo_pas_ecolo_v0.1') returns Spark NLP model_anno_obj mpnet_embedding_ecolo_pas_ecolo_v0.1\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_esci_jp_mpnet_crossencoder') returns Spark NLP model_anno_obj mpnet_embedding_esci_jp_mpnet_crossencoder\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_eth_setfit_payment_model') returns Spark NLP model_anno_obj mpnet_embedding_eth_setfit_payment_model\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_fail_detect') returns Spark NLP model_anno_obj mpnet_embedding_fail_detect\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_few_shot_model') returns Spark NLP model_anno_obj mpnet_embedding_few_shot_model\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_finetunned_sbert') returns Spark NLP model_anno_obj mpnet_embedding_finetunned_sbert\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_github_issues_mpnet_southern_sotho_e10') returns Spark NLP model_anno_obj mpnet_embedding_github_issues_mpnet_southern_sotho_e10\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_github_issues_mpnet_st_e10') returns Spark NLP model_anno_obj mpnet_embedding_github_issues_mpnet_st_e10\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_github_issues_preprocessed_mpnet_southern_sotho_e10') returns Spark NLP model_anno_obj mpnet_embedding_github_issues_preprocessed_mpnet_southern_sotho_e10\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_github_issues_preprocessed_mpnet_st_e10') returns Spark NLP model_anno_obj mpnet_embedding_github_issues_preprocessed_mpnet_st_e10\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_ikitracs_conditional') returns Spark NLP model_anno_obj mpnet_embedding_ikitracs_conditional\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_ikitracs_mitigation') returns Spark NLP model_anno_obj mpnet_embedding_ikitracs_mitigation\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_initial_model') returns Spark NLP model_anno_obj mpnet_embedding_initial_model\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_initial_model_v3') returns Spark NLP model_anno_obj mpnet_embedding_initial_model_v3\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_java_deprecation_classifier') returns Spark NLP model_anno_obj mpnet_embedding_java_deprecation_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_java_expand_classifier') returns Spark NLP model_anno_obj mpnet_embedding_java_expand_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_java_ownership_classifier') returns Spark NLP model_anno_obj mpnet_embedding_java_ownership_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_java_pointer_classifier') returns Spark NLP model_anno_obj mpnet_embedding_java_pointer_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_java_rational_classifier') returns Spark NLP model_anno_obj mpnet_embedding_java_rational_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_java_summary_classifier') returns Spark NLP model_anno_obj mpnet_embedding_java_summary_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_java_usage_classifier') returns Spark NLP model_anno_obj mpnet_embedding_java_usage_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_keyphrase_mpnet_v1') returns Spark NLP model_anno_obj mpnet_embedding_keyphrase_mpnet_v1\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_kw_classification_setfit_model') returns Spark NLP model_anno_obj mpnet_embedding_kw_classification_setfit_model\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_kw_classification_setfithead_model') returns Spark NLP model_anno_obj mpnet_embedding_kw_classification_setfithead_model\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_labels_per_job_title_fine_tune') returns Spark NLP model_anno_obj mpnet_embedding_labels_per_job_title_fine_tune\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_mpnet_adaptation_mitigation_classifier') returns Spark NLP model_anno_obj mpnet_embedding_mpnet_adaptation_mitigation_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_mpnet_base') returns Spark NLP model_anno_obj mpnet_embedding_mpnet_base\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_mpnet_base_articles_ner') returns Spark NLP model_anno_obj mpnet_embedding_mpnet_base_articles_ner\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_mpnet_base_snli_mnli') returns Spark NLP model_anno_obj mpnet_embedding_mpnet_base_snli_mnli\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_mpnet_mnr_v2_fine_tuned') returns Spark NLP model_anno_obj mpnet_embedding_mpnet_mnr_v2_fine_tuned\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_mpnet_multilabel_sector_classifier') returns Spark NLP model_anno_obj mpnet_embedding_mpnet_multilabel_sector_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_mpnet_nli_sts') returns Spark NLP model_anno_obj mpnet_embedding_mpnet_nli_sts\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_mpnet_retriever_squad2') returns Spark NLP model_anno_obj mpnet_embedding_mpnet_retriever_squad2\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_mpnet_snli') returns Spark NLP model_anno_obj mpnet_embedding_mpnet_snli\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_mpnet_snli_negatives') returns Spark NLP model_anno_obj mpnet_embedding_mpnet_snli_negatives\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_multi_qa_v1_mpnet_asymmetric_a') returns Spark NLP model_anno_obj mpnet_embedding_multi_QA_v1_mpnet_asymmetric_A\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_multi_qa_v1_mpnet_asymmetric_q') returns Spark NLP model_anno_obj mpnet_embedding_multi_QA_v1_mpnet_asymmetric_Q\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_multi_qa_mpnet_base_cos_v1') returns Spark NLP model_anno_obj mpnet_embedding_multi_qa_mpnet_base_cos_v1\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_multi_qa_mpnet_base_cos_v1_by_navteca') returns Spark NLP model_anno_obj mpnet_embedding_multi_qa_mpnet_base_cos_v1_by_navteca\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_multi_qa_mpnet_base_cos_v1_by_sentence_transformers') returns Spark NLP model_anno_obj mpnet_embedding_multi_qa_mpnet_base_cos_v1_by_sentence_transformers\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_multi_qa_mpnet_base_dot_v1') returns Spark NLP model_anno_obj mpnet_embedding_multi_qa_mpnet_base_dot_v1\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_multi_qa_mpnet_base_dot_v1_by_model_embeddings') returns Spark NLP model_anno_obj mpnet_embedding_multi_qa_mpnet_base_dot_v1_by_model_embeddings\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_multi_qa_mpnet_base_dot_v1_by_sentence_transformers') returns Spark NLP model_anno_obj mpnet_embedding_multi_qa_mpnet_base_dot_v1_by_sentence_transformers\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_multi_qa_mpnet_base_dot_v1_eclass') returns Spark NLP model_anno_obj mpnet_embedding_multi_qa_mpnet_base_dot_v1_eclass\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_multi_qa_mpnet_base_dot_v1_legal_finetune') returns Spark NLP model_anno_obj mpnet_embedding_multi_qa_mpnet_base_dot_v1_legal_finetune\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_multi_qa_v1_mpnet_cls_dot') returns Spark NLP model_anno_obj mpnet_embedding_multi_qa_v1_mpnet_cls_dot\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_my_awesome_setfit_model_98') returns Spark NLP model_anno_obj mpnet_embedding_my_awesome_setfit_model_98\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_nli_mpnet_base_v2') returns Spark NLP model_anno_obj mpnet_embedding_nli_mpnet_base_v2\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_nli_mpnet_base_v2_by_sentence_transformers') returns Spark NLP model_anno_obj mpnet_embedding_nli_mpnet_base_v2_by_sentence_transformers\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_nooks_amd_detection_realtime') returns Spark NLP model_anno_obj mpnet_embedding_nooks_amd_detection_realtime\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_nooks_amd_detection_v2_full') returns Spark NLP model_anno_obj mpnet_embedding_nooks_amd_detection_v2_full\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_nps_psb_lds') returns Spark NLP model_anno_obj mpnet_embedding_nps_psb_lds\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_ouvrage_classif') returns Spark NLP model_anno_obj mpnet_embedding_ouvrage_classif\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_paraphrase_mpnet_base_v2') returns Spark NLP model_anno_obj mpnet_embedding_paraphrase_mpnet_base_v2\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_paraphrase_mpnet_base_v2_setfit_sst2') returns Spark NLP model_anno_obj mpnet_embedding_paraphrase_mpnet_base_v2_SetFit_sst2\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_paraphrase_mpnet_base_v2_by_sentence_transformers') returns Spark NLP model_anno_obj mpnet_embedding_paraphrase_mpnet_base_v2_by_sentence_transformers\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_paraphrase_mpnet_base_v2_finetuned_polifact') returns Spark NLP model_anno_obj mpnet_embedding_paraphrase_mpnet_base_v2_finetuned_polifact\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_paraphrase_mpnet_base_v2_fuzzy_matcher') returns Spark NLP model_anno_obj mpnet_embedding_paraphrase_mpnet_base_v2_fuzzy_matcher\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_pharo_collaborators_classifier') returns Spark NLP model_anno_obj mpnet_embedding_pharo_collaborators_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_pharo_example_classifier') returns Spark NLP model_anno_obj mpnet_embedding_pharo_example_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_pharo_keyimplementationpoints_classifier') returns Spark NLP model_anno_obj mpnet_embedding_pharo_keyimplementationpoints_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_pharo_responsibilities_classifier') returns Spark NLP model_anno_obj mpnet_embedding_pharo_responsibilities_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_python_developmentnotes_classifier') returns Spark NLP model_anno_obj mpnet_embedding_python_developmentnotes_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_python_expand_classifier') returns Spark NLP model_anno_obj mpnet_embedding_python_expand_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_python_parameters_classifier') returns Spark NLP model_anno_obj mpnet_embedding_python_parameters_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_python_summary_classifier') returns Spark NLP model_anno_obj mpnet_embedding_python_summary_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_python_usage_classifier') returns Spark NLP model_anno_obj mpnet_embedding_python_usage_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_reddit_single_context_mpnet_base') returns Spark NLP model_anno_obj mpnet_embedding_reddit_single_context_mpnet_base\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_retriever_coding_guru_adapted') returns Spark NLP model_anno_obj mpnet_embedding_retriever_coding_guru_adapted\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_review_intent_20230116') returns Spark NLP model_anno_obj mpnet_embedding_review_intent_20230116\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_review_multiclass_20230116') returns Spark NLP model_anno_obj mpnet_embedding_review_multiclass_20230116\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_sb_temfac') returns Spark NLP model_anno_obj mpnet_embedding_sb_temfac\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_sbert_paper') returns Spark NLP model_anno_obj mpnet_embedding_sbert_paper\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_sentence_transformers_bible_reference_final') returns Spark NLP model_anno_obj mpnet_embedding_sentence_transformers_bible_reference_final\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_ag_news_endpoint') returns Spark NLP model_anno_obj mpnet_embedding_setfit_ag_news_endpoint\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_ds_version_0_0_1') returns Spark NLP model_anno_obj mpnet_embedding_setfit_ds_version_0_0_1\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_ds_version_0_0_2') returns Spark NLP model_anno_obj mpnet_embedding_setfit_ds_version_0_0_2\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_ds_version_0_0_4') returns Spark NLP model_anno_obj mpnet_embedding_setfit_ds_version_0_0_4\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_ds_version_0_0_5') returns Spark NLP model_anno_obj mpnet_embedding_setfit_ds_version_0_0_5\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_ethos_multilabel_example') returns Spark NLP model_anno_obj mpnet_embedding_setfit_ethos_multilabel_example\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_ethos_multilabel_example_by_lewtun') returns Spark NLP model_anno_obj mpnet_embedding_setfit_ethos_multilabel_example_by_lewtun\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_ethos_multilabel_example_by_neilthematic') returns Spark NLP model_anno_obj mpnet_embedding_setfit_ethos_multilabel_example_by_neilthematic\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_finetuned_financial_text') returns Spark NLP model_anno_obj mpnet_embedding_setfit_finetuned_financial_text\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_ft_sentinent_eval') returns Spark NLP model_anno_obj mpnet_embedding_setfit_ft_sentinent_eval\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_model') returns Spark NLP model_anno_obj mpnet_embedding_setfit_model\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_model_feb11_misinformation_on_law') returns Spark NLP model_anno_obj mpnet_embedding_setfit_model_Feb11_Misinformation_on_Law\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_model_by_pradipta11') returns Spark NLP model_anno_obj mpnet_embedding_setfit_model_by_pradipta11\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_model_by_rajistics') returns Spark NLP model_anno_obj mpnet_embedding_setfit_model_by_rajistics\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_model_test_sensitve_v1') returns Spark NLP model_anno_obj mpnet_embedding_setfit_model_test_sensitve_v1\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_occupation') returns Spark NLP model_anno_obj mpnet_embedding_setfit_occupation\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_ostrom') returns Spark NLP model_anno_obj mpnet_embedding_setfit_ostrom\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_zero_shot_classification_pbsp_p1') returns Spark NLP model_anno_obj mpnet_embedding_setfit_zero_shot_classification_pbsp_p1\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_zero_shot_classification_pbsp_p1_comm') returns Spark NLP model_anno_obj mpnet_embedding_setfit_zero_shot_classification_pbsp_p1_comm\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_zero_shot_classification_pbsp_p1_life') returns Spark NLP model_anno_obj mpnet_embedding_setfit_zero_shot_classification_pbsp_p1_life\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_zero_shot_classification_pbsp_p1_likes') returns Spark NLP model_anno_obj mpnet_embedding_setfit_zero_shot_classification_pbsp_p1_likes\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_zero_shot_classification_pbsp_p3_bhvr') returns Spark NLP model_anno_obj mpnet_embedding_setfit_zero_shot_classification_pbsp_p3_bhvr\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_zero_shot_classification_pbsp_p3_cons') returns Spark NLP model_anno_obj mpnet_embedding_setfit_zero_shot_classification_pbsp_p3_cons\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_zero_shot_classification_pbsp_p3_dur') returns Spark NLP model_anno_obj mpnet_embedding_setfit_zero_shot_classification_pbsp_p3_dur\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_zero_shot_classification_pbsp_p3_func') returns Spark NLP model_anno_obj mpnet_embedding_setfit_zero_shot_classification_pbsp_p3_func\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_zero_shot_classification_pbsp_p3_sev') returns Spark NLP model_anno_obj mpnet_embedding_setfit_zero_shot_classification_pbsp_p3_sev\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_zero_shot_classification_pbsp_p3_trig') returns Spark NLP model_anno_obj mpnet_embedding_setfit_zero_shot_classification_pbsp_p3_trig\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_zero_shot_classification_pbsp_p4_achiev') returns Spark NLP model_anno_obj mpnet_embedding_setfit_zero_shot_classification_pbsp_p4_achiev\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_zero_shot_classification_pbsp_p4_meas') returns Spark NLP model_anno_obj mpnet_embedding_setfit_zero_shot_classification_pbsp_p4_meas\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_zero_shot_classification_pbsp_p4_rel') returns Spark NLP model_anno_obj mpnet_embedding_setfit_zero_shot_classification_pbsp_p4_rel\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_zero_shot_classification_pbsp_p4_specific') returns Spark NLP model_anno_obj mpnet_embedding_setfit_zero_shot_classification_pbsp_p4_specific\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_zero_shot_classification_pbsp_p4_time') returns Spark NLP model_anno_obj mpnet_embedding_setfit_zero_shot_classification_pbsp_p4_time\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_setfit_zero_shot_classification_pbsp_q8a_azure_gpt35') returns Spark NLP model_anno_obj mpnet_embedding_setfit_zero_shot_classification_pbsp_q8a_azure_gpt35\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_shona_mpnet_base_snli_mnli') returns Spark NLP model_anno_obj mpnet_embedding_shona_mpnet_base_snli_mnli\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_sml_ukr_message_classifier') returns Spark NLP model_anno_obj mpnet_embedding_sml_ukr_message_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_sml_ukr_word_classifier_medium') returns Spark NLP model_anno_obj mpnet_embedding_sml_ukr_word_classifier_medium\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_sn_mpnet_base_snli_mnli') returns Spark NLP model_anno_obj mpnet_embedding_sn_mpnet_base_snli_mnli\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_spiced') returns Spark NLP model_anno_obj mpnet_embedding_spiced\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_stackoverflow_mpnet_base') returns Spark NLP model_anno_obj mpnet_embedding_stackoverflow_mpnet_base\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_stsb_mpnet_base_v2') returns Spark NLP model_anno_obj mpnet_embedding_stsb_mpnet_base_v2\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_test_food') returns Spark NLP model_anno_obj mpnet_embedding_test_food\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_test_model_001') returns Spark NLP model_anno_obj mpnet_embedding_test_model_001\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_tiny_random_mpnetformaskedlm') returns Spark NLP model_anno_obj mpnet_embedding_tiny_random_MPNetForMaskedLM\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_tiny_random_mpnetforquestionanswering') returns Spark NLP model_anno_obj mpnet_embedding_tiny_random_MPNetForQuestionAnswering\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_tiny_random_mpnetforsequenceclassification') returns Spark NLP model_anno_obj mpnet_embedding_tiny_random_MPNetForSequenceClassification\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_tiny_random_mpnetfortokenclassification') returns Spark NLP model_anno_obj mpnet_embedding_tiny_random_MPNetForTokenClassification\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_tiny_random_mpnetmodel') returns Spark NLP model_anno_obj mpnet_embedding_tiny_random_MPNetModel\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_tiny_random_mpnet_by_hf_internal_testing') returns Spark NLP model_anno_obj mpnet_embedding_tiny_random_mpnet_by_hf_internal_testing\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_tiny_random_mpnetformaskedlm_by_hf_internal_testing') returns Spark NLP model_anno_obj mpnet_embedding_tiny_random_mpnetformaskedlm_by_hf_internal_testing\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_tiny_random_mpnetformaskedlm_by_hf_tiny_model_private') returns Spark NLP model_anno_obj mpnet_embedding_tiny_random_mpnetformaskedlm_by_hf_tiny_model_private\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_tiny_random_mpnetforquestionanswering_by_hf_internal_testing') returns Spark NLP model_anno_obj mpnet_embedding_tiny_random_mpnetforquestionanswering_by_hf_internal_testing\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_tiny_random_mpnetforquestionanswering_by_hf_tiny_model_private') returns Spark NLP model_anno_obj mpnet_embedding_tiny_random_mpnetforquestionanswering_by_hf_tiny_model_private\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_tiny_random_mpnetforsequenceclassification_by_hf_internal_testing') returns Spark NLP model_anno_obj mpnet_embedding_tiny_random_mpnetforsequenceclassification_by_hf_internal_testing\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_tiny_random_mpnetforsequenceclassification_by_hf_tiny_model_private') returns Spark NLP model_anno_obj mpnet_embedding_tiny_random_mpnetforsequenceclassification_by_hf_tiny_model_private\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_tiny_random_mpnetfortokenclassification_by_hf_internal_testing') returns Spark NLP model_anno_obj mpnet_embedding_tiny_random_mpnetfortokenclassification_by_hf_internal_testing\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_tiny_random_mpnetfortokenclassification_by_hf_tiny_model_private') returns Spark NLP model_anno_obj mpnet_embedding_tiny_random_mpnetfortokenclassification_by_hf_tiny_model_private\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_tiny_random_mpnetmodel_by_hf_internal_testing') returns Spark NLP model_anno_obj mpnet_embedding_tiny_random_mpnetmodel_by_hf_internal_testing\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_tiny_random_mpnetmodel_by_hf_tiny_model_private') returns Spark NLP model_anno_obj mpnet_embedding_tiny_random_mpnetmodel_by_hf_tiny_model_private\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_embedding_vulnerable_groups') returns Spark NLP model_anno_obj mpnet_embedding_vulnerable_groups\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_mnr_v2_fine_tuned') returns Spark NLP model_anno_obj mpnet_mnr_v2_fine_tuned\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_multilabel_sector_classifier') returns Spark NLP model_anno_obj mpnet_multilabel_sector_classifier\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_nli_sts') returns Spark NLP model_anno_obj mpnet_nli_sts\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_retriever_squad2') returns Spark NLP model_anno_obj mpnet_retriever_squad2\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_snli') returns Spark NLP model_anno_obj mpnet_snli\n", + "nlu.load('en.embed_sentence.mpnet.mpnet_snli_negatives') returns Spark NLP model_anno_obj mpnet_snli_negatives\n", + "nlu.load('en.embed_sentence.mpnet.multi_qa_mpnet_base_cos_v1') returns Spark NLP model_anno_obj multi_qa_mpnet_base_cos_v1\n", + "nlu.load('en.embed_sentence.mpnet.multi_qa_mpnet_base_cos_v1_navteca') returns Spark NLP model_anno_obj multi_qa_mpnet_base_cos_v1_navteca\n", + "nlu.load('en.embed_sentence.mpnet.multi_qa_mpnet_base_cos_v1_sentence_transformers') returns Spark NLP model_anno_obj multi_qa_mpnet_base_cos_v1_sentence_transformers\n", + "nlu.load('en.embed_sentence.mpnet.multi_qa_mpnet_base_dot_v1') returns Spark NLP model_anno_obj multi_qa_mpnet_base_dot_v1\n", + "nlu.load('en.embed_sentence.mpnet.multi_qa_mpnet_base_dot_v1_eclass') returns Spark NLP model_anno_obj multi_qa_mpnet_base_dot_v1_eclass\n", + "nlu.load('en.embed_sentence.mpnet.multi_qa_mpnet_base_dot_v1_legal_finetune') returns Spark NLP model_anno_obj multi_qa_mpnet_base_dot_v1_legal_finetune\n", + "nlu.load('en.embed_sentence.mpnet.multi_qa_mpnet_base_dot_v1_model_embeddings') returns Spark NLP model_anno_obj multi_qa_mpnet_base_dot_v1_model_embeddings\n", + "nlu.load('en.embed_sentence.mpnet.multi_qa_mpnet_base_dot_v1_sentence_transformers') returns Spark NLP model_anno_obj multi_qa_mpnet_base_dot_v1_sentence_transformers\n", + "nlu.load('en.embed_sentence.mpnet.multi_qa_v1_mpnet_asymmetric_a') returns Spark NLP model_anno_obj multi_qa_v1_mpnet_asymmetric_a\n", + "nlu.load('en.embed_sentence.mpnet.multi_qa_v1_mpnet_asymmetric_q') returns Spark NLP model_anno_obj multi_qa_v1_mpnet_asymmetric_q\n", + "nlu.load('en.embed_sentence.mpnet.multi_qa_v1_mpnet_cls_dot') returns Spark NLP model_anno_obj multi_qa_v1_mpnet_cls_dot\n", + "nlu.load('en.embed_sentence.mpnet.nli_mpnet_base_v2') returns Spark NLP model_anno_obj nli_mpnet_base_v2\n", + "nlu.load('en.embed_sentence.mpnet.nli_mpnet_base_v2_sentence_transformers') returns Spark NLP model_anno_obj nli_mpnet_base_v2_sentence_transformers\n", + "nlu.load('en.embed_sentence.mpnet.nooks_amd_detection_realtime') returns Spark NLP model_anno_obj nooks_amd_detection_realtime\n", + "nlu.load('en.embed_sentence.mpnet.nooks_amd_detection_v2_full') returns Spark NLP model_anno_obj nooks_amd_detection_v2_full\n", + "nlu.load('en.embed_sentence.mpnet.nps_psb_lds') returns Spark NLP model_anno_obj nps_psb_lds\n", + "nlu.load('en.embed_sentence.mpnet.ouvrage_classif') returns Spark NLP model_anno_obj ouvrage_classif\n", + "nlu.load('en.embed_sentence.mpnet.paraphrase_mpnet_base_v2') returns Spark NLP model_anno_obj paraphrase_mpnet_base_v2\n", + "nlu.load('en.embed_sentence.mpnet.paraphrase_mpnet_base_v2_finetuned_polifact') returns Spark NLP model_anno_obj paraphrase_mpnet_base_v2_finetuned_polifact\n", + "nlu.load('en.embed_sentence.mpnet.paraphrase_mpnet_base_v2_fuzzy_matcher') returns Spark NLP model_anno_obj paraphrase_mpnet_base_v2_fuzzy_matcher\n", + "nlu.load('en.embed_sentence.mpnet.paraphrase_mpnet_base_v2_sentence_transformers') returns Spark NLP model_anno_obj paraphrase_mpnet_base_v2_sentence_transformers\n", + "nlu.load('en.embed_sentence.mpnet.paraphrase_mpnet_base_v2_setfit_sst2') returns Spark NLP model_anno_obj paraphrase_mpnet_base_v2_setfit_sst2\n", + "nlu.load('en.embed_sentence.mpnet.patentsberta') returns Spark NLP model_anno_obj patentsberta\n", + "nlu.load('en.embed_sentence.mpnet.patentsberta_v2') returns Spark NLP model_anno_obj patentsberta_v2\n", + "nlu.load('en.embed_sentence.mpnet.pdfsegs') returns Spark NLP model_anno_obj pdfsegs\n", + "nlu.load('en.embed_sentence.mpnet.pharo_collaborators_classifier') returns Spark NLP model_anno_obj pharo_collaborators_classifier\n", + "nlu.load('en.embed_sentence.mpnet.pharo_example_classifier') returns Spark NLP model_anno_obj pharo_example_classifier\n", + "nlu.load('en.embed_sentence.mpnet.pharo_keyimplementationpoints_classifier') returns Spark NLP model_anno_obj pharo_keyimplementationpoints_classifier\n", + "nlu.load('en.embed_sentence.mpnet.pharo_responsibilities_classifier') returns Spark NLP model_anno_obj pharo_responsibilities_classifier\n", + "nlu.load('en.embed_sentence.mpnet.python_developmentnotes_classifier') returns Spark NLP model_anno_obj python_developmentnotes_classifier\n", + "nlu.load('en.embed_sentence.mpnet.python_expand_classifier') returns Spark NLP model_anno_obj python_expand_classifier\n", + "nlu.load('en.embed_sentence.mpnet.python_parameters_classifier') returns Spark NLP model_anno_obj python_parameters_classifier\n", + "nlu.load('en.embed_sentence.mpnet.python_summary_classifier') returns Spark NLP model_anno_obj python_summary_classifier\n", + "nlu.load('en.embed_sentence.mpnet.python_usage_classifier') returns Spark NLP model_anno_obj python_usage_classifier\n", + "nlu.load('en.embed_sentence.mpnet.reddit_single_context_mpnet_base') returns Spark NLP model_anno_obj reddit_single_context_mpnet_base\n", + "nlu.load('en.embed_sentence.mpnet.retriever_coding_guru_adapted') returns Spark NLP model_anno_obj retriever_coding_guru_adapted\n", + "nlu.load('en.embed_sentence.mpnet.review_intent_20230116') returns Spark NLP model_anno_obj review_intent_20230116\n", + "nlu.load('en.embed_sentence.mpnet.review_multiclass_20230116') returns Spark NLP model_anno_obj review_multiclass_20230116\n", + "nlu.load('en.embed_sentence.mpnet.sb_temfac') returns Spark NLP model_anno_obj sb_temfac\n", + "nlu.load('en.embed_sentence.mpnet.sbert_paper') returns Spark NLP model_anno_obj sbert_paper\n", + "nlu.load('en.embed_sentence.mpnet.sentence_transformers_bible_reference_final') returns Spark NLP model_anno_obj sentence_transformers_bible_reference_final\n", + "nlu.load('en.embed_sentence.mpnet.sentiment140_fewshot') returns Spark NLP model_anno_obj sentiment140_fewshot\n", + "nlu.load('en.embed_sentence.mpnet.setfit_ag_news_endpoint') returns Spark NLP model_anno_obj setfit_ag_news_endpoint\n", + "nlu.load('en.embed_sentence.mpnet.setfit_all_data') returns Spark NLP model_anno_obj setfit_all_data\n", + "nlu.load('en.embed_sentence.mpnet.setfit_ds_version_0_0_1') returns Spark NLP model_anno_obj setfit_ds_version_0_0_1\n", + "nlu.load('en.embed_sentence.mpnet.setfit_ds_version_0_0_2') returns Spark NLP model_anno_obj setfit_ds_version_0_0_2\n", + "nlu.load('en.embed_sentence.mpnet.setfit_ds_version_0_0_4') returns Spark NLP model_anno_obj setfit_ds_version_0_0_4\n", + "nlu.load('en.embed_sentence.mpnet.setfit_ds_version_0_0_5') returns Spark NLP model_anno_obj setfit_ds_version_0_0_5\n", + "nlu.load('en.embed_sentence.mpnet.setfit_ethos_multilabel_example_lewtun') returns Spark NLP model_anno_obj setfit_ethos_multilabel_example_lewtun\n", + "nlu.load('en.embed_sentence.mpnet.setfit_ethos_multilabel_example_neilthematic') returns Spark NLP model_anno_obj setfit_ethos_multilabel_example_neilthematic\n", + "nlu.load('en.embed_sentence.mpnet.setfit_few_shot_classifier') returns Spark NLP model_anno_obj setfit_few_shot_classifier\n", + "nlu.load('en.embed_sentence.mpnet.setfit_finetuned_financial_text') returns Spark NLP model_anno_obj setfit_finetuned_financial_text\n", + "nlu.load('en.embed_sentence.mpnet.setfit_ft_sentinent_eval') returns Spark NLP model_anno_obj setfit_ft_sentinent_eval\n", + "nlu.load('en.embed_sentence.mpnet.setfit_model_feb11_misinformation_on_law') returns Spark NLP model_anno_obj setfit_model_feb11_misinformation_on_law\n", + "nlu.load('en.embed_sentence.mpnet.setfit_model_pradipta11') returns Spark NLP model_anno_obj setfit_model_pradipta11\n", + "nlu.load('en.embed_sentence.mpnet.setfit_model_rajistics') returns Spark NLP model_anno_obj setfit_model_rajistics\n", + "nlu.load('en.embed_sentence.mpnet.setfit_model_test_sensitve_v1') returns Spark NLP model_anno_obj setfit_model_test_sensitve_v1\n", + "nlu.load('en.embed_sentence.mpnet.setfit_occupation') returns Spark NLP model_anno_obj setfit_occupation\n", + "nlu.load('en.embed_sentence.mpnet.setfit_ostrom') returns Spark NLP model_anno_obj setfit_ostrom\n", + "nlu.load('en.embed_sentence.mpnet.setfit_zero_shot_classification_pbsp_p1') returns Spark NLP model_anno_obj setfit_zero_shot_classification_pbsp_p1\n", + "nlu.load('en.embed_sentence.mpnet.setfit_zero_shot_classification_pbsp_p1_comm') returns Spark NLP model_anno_obj setfit_zero_shot_classification_pbsp_p1_comm\n", + "nlu.load('en.embed_sentence.mpnet.setfit_zero_shot_classification_pbsp_p1_life') returns Spark NLP model_anno_obj setfit_zero_shot_classification_pbsp_p1_life\n", + "nlu.load('en.embed_sentence.mpnet.setfit_zero_shot_classification_pbsp_p1_likes') returns Spark NLP model_anno_obj setfit_zero_shot_classification_pbsp_p1_likes\n", + "nlu.load('en.embed_sentence.mpnet.setfit_zero_shot_classification_pbsp_p3_bhvr') returns Spark NLP model_anno_obj setfit_zero_shot_classification_pbsp_p3_bhvr\n", + "nlu.load('en.embed_sentence.mpnet.setfit_zero_shot_classification_pbsp_p3_cons') returns Spark NLP model_anno_obj setfit_zero_shot_classification_pbsp_p3_cons\n", + "nlu.load('en.embed_sentence.mpnet.setfit_zero_shot_classification_pbsp_p3_dur') returns Spark NLP model_anno_obj setfit_zero_shot_classification_pbsp_p3_dur\n", + "nlu.load('en.embed_sentence.mpnet.setfit_zero_shot_classification_pbsp_p3_func') returns Spark NLP model_anno_obj setfit_zero_shot_classification_pbsp_p3_func\n", + "nlu.load('en.embed_sentence.mpnet.setfit_zero_shot_classification_pbsp_p3_sev') returns Spark NLP model_anno_obj setfit_zero_shot_classification_pbsp_p3_sev\n", + "nlu.load('en.embed_sentence.mpnet.setfit_zero_shot_classification_pbsp_p3_trig') returns Spark NLP model_anno_obj setfit_zero_shot_classification_pbsp_p3_trig\n", + "nlu.load('en.embed_sentence.mpnet.setfit_zero_shot_classification_pbsp_p4_achiev') returns Spark NLP model_anno_obj setfit_zero_shot_classification_pbsp_p4_achiev\n", + "nlu.load('en.embed_sentence.mpnet.setfit_zero_shot_classification_pbsp_p4_meas') returns Spark NLP model_anno_obj setfit_zero_shot_classification_pbsp_p4_meas\n", + "nlu.load('en.embed_sentence.mpnet.setfit_zero_shot_classification_pbsp_p4_rel') returns Spark NLP model_anno_obj setfit_zero_shot_classification_pbsp_p4_rel\n", + "nlu.load('en.embed_sentence.mpnet.setfit_zero_shot_classification_pbsp_p4_specific') returns Spark NLP model_anno_obj setfit_zero_shot_classification_pbsp_p4_specific\n", + "nlu.load('en.embed_sentence.mpnet.setfit_zero_shot_classification_pbsp_p4_time') returns Spark NLP model_anno_obj setfit_zero_shot_classification_pbsp_p4_time\n", + "nlu.load('en.embed_sentence.mpnet.setfit_zero_shot_classification_pbsp_q8a_azure_gpt35') returns Spark NLP model_anno_obj setfit_zero_shot_classification_pbsp_q8a_azure_gpt35\n", + "nlu.load('en.embed_sentence.mpnet.shona_mpnet_base_snli_mnli') returns Spark NLP model_anno_obj shona_mpnet_base_snli_mnli\n", + "nlu.load('en.embed_sentence.mpnet.sml_ukr_message_classifier') returns Spark NLP model_anno_obj sml_ukr_message_classifier\n", + "nlu.load('en.embed_sentence.mpnet.sml_ukr_word_classifier_medium') returns Spark NLP model_anno_obj sml_ukr_word_classifier_medium\n", + "nlu.load('en.embed_sentence.mpnet.spiced') returns Spark NLP model_anno_obj spiced\n", + "nlu.load('en.embed_sentence.mpnet.stackoverflow_mpnet_base') returns Spark NLP model_anno_obj stackoverflow_mpnet_base\n", + "nlu.load('en.embed_sentence.mpnet.stsb_mpnet_base_v2') returns Spark NLP model_anno_obj stsb_mpnet_base_v2\n", + "nlu.load('en.embed_sentence.mpnet.test_food') returns Spark NLP model_anno_obj test_food\n", + "nlu.load('en.embed_sentence.mpnet.test_model_001') returns Spark NLP model_anno_obj test_model_001\n", + "nlu.load('en.embed_sentence.mpnet.testing_setfit') returns Spark NLP model_anno_obj testing_setfit\n", + "nlu.load('en.embed_sentence.mpnet.tiny_random_mpnet_hf_internal_testing') returns Spark NLP model_anno_obj tiny_random_mpnet_hf_internal_testing\n", + "nlu.load('en.embed_sentence.mpnet.tiny_random_mpnetformaskedlm_hf_internal_testing') returns Spark NLP model_anno_obj tiny_random_mpnetformaskedlm_hf_internal_testing\n", + "nlu.load('en.embed_sentence.mpnet.tiny_random_mpnetformaskedlm_hf_tiny_model_private') returns Spark NLP model_anno_obj tiny_random_mpnetformaskedlm_hf_tiny_model_private\n", + "nlu.load('en.embed_sentence.mpnet.tiny_random_mpnetformultiplechoice') returns Spark NLP model_anno_obj tiny_random_mpnetformultiplechoice\n", + "nlu.load('en.embed_sentence.mpnet.tiny_random_mpnetforquestionanswering_hf_internal_testing') returns Spark NLP model_anno_obj tiny_random_mpnetforquestionanswering_hf_internal_testing\n", + "nlu.load('en.embed_sentence.mpnet.tiny_random_mpnetforquestionanswering_hf_tiny_model_private') returns Spark NLP model_anno_obj tiny_random_mpnetforquestionanswering_hf_tiny_model_private\n", + "nlu.load('en.embed_sentence.mpnet.tiny_random_mpnetforsequenceclassification_hf_internal_testing') returns Spark NLP model_anno_obj tiny_random_mpnetforsequenceclassification_hf_internal_testing\n", + "nlu.load('en.embed_sentence.mpnet.tiny_random_mpnetforsequenceclassification_hf_tiny_model_private') returns Spark NLP model_anno_obj tiny_random_mpnetforsequenceclassification_hf_tiny_model_private\n", + "nlu.load('en.embed_sentence.mpnet.tiny_random_mpnetfortokenclassification_hf_internal_testing') returns Spark NLP model_anno_obj tiny_random_mpnetfortokenclassification_hf_internal_testing\n", + "nlu.load('en.embed_sentence.mpnet.tiny_random_mpnetfortokenclassification_hf_tiny_model_private') returns Spark NLP model_anno_obj tiny_random_mpnetfortokenclassification_hf_tiny_model_private\n", + "nlu.load('en.embed_sentence.mpnet.tiny_random_mpnetmodel_hf_internal_testing') returns Spark NLP model_anno_obj tiny_random_mpnetmodel_hf_internal_testing\n", + "nlu.load('en.embed_sentence.mpnet.tiny_random_mpnetmodel_hf_tiny_model_private') returns Spark NLP model_anno_obj tiny_random_mpnetmodel_hf_tiny_model_private\n", + "nlu.load('en.embed_sentence.mpnet.vulnerable_groups') returns Spark NLP model_anno_obj vulnerable_groups\n", + "nlu.load('en.embed_sentence.roberta.base') returns Spark NLP model_anno_obj sent_roberta_base\n", + "nlu.load('en.embed_sentence.roberta.large') returns Spark NLP model_anno_obj sent_roberta_large\n", + "nlu.load('en.embed_sentence.small_bert_L10_128') returns Spark NLP model_anno_obj sent_small_bert_L10_128\n", + "nlu.load('en.embed_sentence.small_bert_L10_256') returns Spark NLP model_anno_obj sent_small_bert_L10_256\n", + "nlu.load('en.embed_sentence.small_bert_L10_512') returns Spark NLP model_anno_obj sent_small_bert_L10_512\n", + "nlu.load('en.embed_sentence.small_bert_L10_768') returns Spark NLP model_anno_obj sent_small_bert_L10_768\n", + "nlu.load('en.embed_sentence.small_bert_L12_128') returns Spark NLP model_anno_obj sent_small_bert_L12_128\n", + "nlu.load('en.embed_sentence.small_bert_L12_256') returns Spark NLP model_anno_obj sent_small_bert_L12_256\n", + "nlu.load('en.embed_sentence.small_bert_L12_512') returns Spark NLP model_anno_obj sent_small_bert_L12_512\n", + "nlu.load('en.embed_sentence.small_bert_L12_768') returns Spark NLP model_anno_obj sent_small_bert_L12_768\n", + "nlu.load('en.embed_sentence.small_bert_L2_128') returns Spark NLP model_anno_obj sent_small_bert_L2_128\n", + "nlu.load('en.embed_sentence.small_bert_L2_256') returns Spark NLP model_anno_obj sent_small_bert_L2_256\n", + "nlu.load('en.embed_sentence.small_bert_L2_512') returns Spark NLP model_anno_obj sent_small_bert_L2_512\n", + "nlu.load('en.embed_sentence.small_bert_L2_768') returns Spark NLP model_anno_obj sent_small_bert_L2_768\n", + "nlu.load('en.embed_sentence.small_bert_L4_128') returns Spark NLP model_anno_obj sent_small_bert_L4_128\n", + "nlu.load('en.embed_sentence.small_bert_L4_256') returns Spark NLP model_anno_obj sent_small_bert_L4_256\n", + "nlu.load('en.embed_sentence.small_bert_L4_512') returns Spark NLP model_anno_obj sent_small_bert_L4_512\n", + "nlu.load('en.embed_sentence.small_bert_L4_768') returns Spark NLP model_anno_obj sent_small_bert_L4_768\n", + "nlu.load('en.embed_sentence.small_bert_L6_128') returns Spark NLP model_anno_obj sent_small_bert_L6_128\n", + "nlu.load('en.embed_sentence.small_bert_L6_256') returns Spark NLP model_anno_obj sent_small_bert_L6_256\n", + "nlu.load('en.embed_sentence.small_bert_L6_512') returns Spark NLP model_anno_obj sent_small_bert_L6_512\n", + "nlu.load('en.embed_sentence.small_bert_L6_768') returns Spark NLP model_anno_obj sent_small_bert_L6_768\n", + "nlu.load('en.embed_sentence.small_bert_L8_128') returns Spark NLP model_anno_obj sent_small_bert_L8_128\n", + "nlu.load('en.embed_sentence.small_bert_L8_256') returns Spark NLP model_anno_obj sent_small_bert_L8_256\n", + "nlu.load('en.embed_sentence.small_bert_L8_512') returns Spark NLP model_anno_obj sent_small_bert_L8_512\n", + "nlu.load('en.embed_sentence.small_bert_L8_768') returns Spark NLP model_anno_obj sent_small_bert_L8_768\n", + "nlu.load('en.embed_sentence.instructor_base') returns Spark NLP model_anno_obj instructor_base\n", + "nlu.load('en.embed_sentence.instructor_large') returns Spark NLP model_anno_obj instructor_large\n", + "nlu.load('en.embed_sentence.tfhub_use') returns Spark NLP model_anno_obj tfhub_use\n", + "nlu.load('en.embed_sentence.tfhub_use.lg') returns Spark NLP model_anno_obj tfhub_use_lg\n", + "nlu.load('en.embed_sentence.use') returns Spark NLP model_anno_obj tfhub_use\n", + "nlu.load('en.embed_sentence.use.lg') returns Spark NLP model_anno_obj tfhub_use_lg\n", + "For language NLU provides the following Models : \n", + "nlu.load('es.embed_sentence.bert.base_cased') returns Spark NLP model_anno_obj sent_bert_base_cased\n", + "nlu.load('es.embed_sentence.bert.base_uncased') returns Spark NLP model_anno_obj sent_bert_base_uncased\n", + "nlu.load('es.embed_sentence.mpnet.mpnet_embedding_negation_categories_classifier') returns Spark NLP model_anno_obj mpnet_embedding_negation_categories_classifier\n", + "nlu.load('es.embed_sentence.mpnet.mpnet_embedding_setfit_alpaca_es_unprocessable_sample_detection') returns Spark NLP model_anno_obj mpnet_embedding_setfit_alpaca_es_unprocessable_sample_detection\n", + "nlu.load('es.embed_sentence.mpnet.mpnet_embedding_setfit_alpaca_spanish_unprocessable_sample_detection') returns Spark NLP model_anno_obj mpnet_embedding_setfit_alpaca_spanish_unprocessable_sample_detection\n", + "nlu.load('es.embed_sentence.mpnet.negation_categories_classifier') returns Spark NLP model_anno_obj negation_categories_classifier\n", + "nlu.load('es.embed_sentence.mpnet.setfit_alpaca_spanish_unprocessable_sample_detection') returns Spark NLP model_anno_obj setfit_alpaca_spanish_unprocessable_sample_detection\n", + "For language NLU provides the following Models : \n", + "nlu.load('fi.embed_sentence.bert') returns Spark NLP model_anno_obj bert_base_finnish_uncased\n", + "nlu.load('fi.embed_sentence.bert.cased') returns Spark NLP model_anno_obj bert_base_finnish_cased\n", + "nlu.load('fi.embed_sentence.bert.uncased') returns Spark NLP model_anno_obj bert_base_finnish_uncased\n", + "For language NLU provides the following Models : \n", + "nlu.load('fr.embed_sentence.mpnet.biencoder_all_mpnet_base_v2_mmarcofr') returns Spark NLP model_anno_obj biencoder_all_mpnet_base_v2_mmarcofr\n", + "nlu.load('fr.embed_sentence.mpnet.biencoder_multi_qa_mpnet_base_cos_v1_mmarcofr') returns Spark NLP model_anno_obj biencoder_multi_qa_mpnet_base_cos_v1_mmarcofr\n", + "nlu.load('fr.embed_sentence.mpnet.mpnet_embedding_biencoder_all_mpnet_base_v2_mmarcofr') returns Spark NLP model_anno_obj mpnet_embedding_biencoder_all_mpnet_base_v2_mmarcoFR\n", + "nlu.load('fr.embed_sentence.mpnet.mpnet_embedding_biencoder_multi_qa_mpnet_base_cos_v1_mmarcofr') returns Spark NLP model_anno_obj mpnet_embedding_biencoder_multi_qa_mpnet_base_cos_v1_mmarcoFR\n", + "For language NLU provides the following Models : \n", + "nlu.load('ha.embed_sentence.xlm_roberta') returns Spark NLP model_anno_obj sent_xlm_roberta_base_finetuned_hausa\n", + "For language NLU provides the following Models : \n", + "nlu.load('ig.embed_sentence.xlm_roberta') returns Spark NLP model_anno_obj sent_xlm_roberta_base_finetuned_igbo\n", + "For language NLU provides the following Models : \n", + "nlu.load('lg.embed_sentence.xlm_roberta') returns Spark NLP model_anno_obj sent_xlm_roberta_base_finetuned_luganda\n", + "For language NLU provides the following Models : \n", + "nlu.load('nl.embed_sentence.bert.base_cased') returns Spark NLP model_anno_obj sent_bert_base_cased\n", + "For language NLU provides the following Models : \n", + "nlu.load('pcm.embed_sentence.xlm_roberta') returns Spark NLP model_anno_obj sent_xlm_roberta_base_finetuned_naija\n", + "For language NLU provides the following Models : \n", + "nlu.load('pt.embed_sentence.bert.base_legal') returns Spark NLP model_anno_obj sbert_legal_bertimbau_base_tsdae_sts\n", + "nlu.load('pt.embed_sentence.bert.cased_large_legal') returns Spark NLP model_anno_obj sbert_bert_large_portuguese_cased_legal_mlm_sts_v0.1\n", + "nlu.load('pt.embed_sentence.bert.large_legal') returns Spark NLP model_anno_obj sbert_legal_bertimbau_large_gpl_sts\n", + "nlu.load('pt.embed_sentence.bert.legal.cased_large_mlm_sts_v0.10.by_stjiris') returns Spark NLP model_anno_obj sbert_bert_large_portuguese_cased_legal_mlm_sts_v0.10\n", + "nlu.load('pt.embed_sentence.bert.legal.cased_large_mlm_sts_v0.2.by_stjiris') returns Spark NLP model_anno_obj sbert_bert_large_portuguese_cased_legal_mlm_sts_v0.2\n", + "nlu.load('pt.embed_sentence.bert.legal.cased_large_mlm_sts_v0.3.by_stjiris') returns Spark NLP model_anno_obj sbert_bert_large_portuguese_cased_legal_mlm_sts_v0.3\n", + "nlu.load('pt.embed_sentence.bert.legal.cased_large_mlm_sts_v0.4.by_stjiris') returns Spark NLP model_anno_obj sbert_bert_large_portuguese_cased_legal_mlm_sts_v0.4\n", + "nlu.load('pt.embed_sentence.bert.legal.cased_large_mlm_sts_v0.5.by_stjiris') returns Spark NLP model_anno_obj sbert_bert_large_portuguese_cased_legal_mlm_sts_v0.5\n", + "nlu.load('pt.embed_sentence.bert.legal.cased_large_mlm_sts_v0.7.by_stjiris') returns Spark NLP model_anno_obj sbert_bert_large_portuguese_cased_legal_mlm_sts_v0.7\n", + "nlu.load('pt.embed_sentence.bert.legal.cased_large_mlm_sts_v0.8.by_stjiris') returns Spark NLP model_anno_obj sbert_bert_large_portuguese_cased_legal_mlm_sts_v0.8\n", + "nlu.load('pt.embed_sentence.bert.legal.cased_large_mlm_sts_v0.9.by_stjiris') returns Spark NLP model_anno_obj sbert_bert_large_portuguese_cased_legal_mlm_sts_v0.9\n", + "nlu.load('pt.embed_sentence.bert.legal.cased_large_mlm_sts_v1.0.by_stjiris') returns Spark NLP model_anno_obj sbert_bert_large_portuguese_cased_legal_mlm_sts_v1.0\n", + "nlu.load('pt.embed_sentence.bert.legal.cased_large_mlm_v0.11_gpl_nli_sts_v0.by_stjiris') returns Spark NLP model_anno_obj sbert_bert_large_portuguese_cased_legal_mlm_v0.11_gpl_nli_sts_v0\n", + "nlu.load('pt.embed_sentence.bert.legal.cased_large_mlm_v0.11_gpl_nli_sts_v1.by_stjiris') returns Spark NLP model_anno_obj sbert_bert_large_portuguese_cased_legal_mlm_v0.11_gpl_nli_sts_v1\n", + "nlu.load('pt.embed_sentence.bert.legal.cased_large_mlm_v0.11_nli_sts_v0.by_stjiris') returns Spark NLP model_anno_obj sbert_bert_large_portuguese_cased_legal_mlm_v0.11_nli_sts_v0\n", + "nlu.load('pt.embed_sentence.bert.legal.cased_large_mlm_v0.11_nli_sts_v1.by_stjiris') returns Spark NLP model_anno_obj sbert_bert_large_portuguese_cased_legal_mlm_v0.11_nli_sts_v1\n", + "nlu.load('pt.embed_sentence.bert.legal.cased_large_mlm_v0.11_sts_v0.by_stjiris') returns Spark NLP model_anno_obj sbert_bert_large_portuguese_cased_legal_mlm_v0.11_sts_v0\n", + "nlu.load('pt.embed_sentence.bert.legal.cased_large_mlm_v0.11_sts_v1.by_stjiris') returns Spark NLP model_anno_obj sbert_bert_large_portuguese_cased_legal_mlm_v0.11_sts_v1\n", + "nlu.load('pt.embed_sentence.bert.v2_base_legal') returns Spark NLP model_anno_obj sbert_legal_bertimbau_sts_base_ma_v2\n", + "nlu.load('pt.embed_sentence.bert.v2_large_legal') returns Spark NLP model_anno_obj sbert_legal_bertimbau_large_tsdae_sts_v2\n", + "nlu.load('pt.embed_sentence.bertimbau.legal.assin.base.by_rufimelo') returns Spark NLP model_anno_obj sbert_legal_bertimbau_sts_base_ma\n", + "nlu.load('pt.embed_sentence.bertimbau.legal.assin2.base.by_rufimelo') returns Spark NLP model_anno_obj sbert_legal_bertimbau_sts_base\n", + "nlu.load('pt.embed_sentence.bertimbau.legal.large_sts_by_rufimelo') returns Spark NLP model_anno_obj sbert_legal_bertimbau_sts_large\n", + "nlu.load('pt.embed_sentence.bertimbau.legal.large_sts_ma.by_rufimelo') returns Spark NLP model_anno_obj sbert_legal_bertimbau_sts_large_ma\n", + "nlu.load('pt.embed_sentence.bertimbau.legal.large_sts_ma_v3.by_rufimelo') returns Spark NLP model_anno_obj sbert_legal_bertimbau_sts_large_ma_v3\n", + "nlu.load('pt.embed_sentence.bertimbau.legal.large_tsdae_sts.by_rufimelo') returns Spark NLP model_anno_obj sbert_legal_bertimbau_large_tsdae_sts\n", + "nlu.load('pt.embed_sentence.bertimbau.legal.large_tsdae_sts_v4.by_rufimelo') returns Spark NLP model_anno_obj sbert_legal_bertimbau_large_tsdae_sts_v4\n", + "nlu.load('pt.embed_sentence.bertimbau.legal.large_tsdae_v4_gpl_sts.by_rufimelo') returns Spark NLP model_anno_obj sbert_legal_bertimbau_large_tsdae_v4_gpl_sts\n", + "nlu.load('pt.embed_sentence.bertimbau.legal.v2_large_sts_v2.by_rufimelo') returns Spark NLP model_anno_obj sbert_legal_bertimbau_sts_large_v2\n", + "nlu.load('pt.embed_sentence.bertimbau.legal.v2_large_v2_sts.by_rufimelo') returns Spark NLP model_anno_obj sbert_legal_bertimbau_large_v2_sts\n", + "For language NLU provides the following Models : \n", + "nlu.load('rw.embed_sentence.xlm_roberta') returns Spark NLP model_anno_obj sent_xlm_roberta_base_finetuned_kinyarwanda\n", + "For language NLU provides the following Models : \n", + "nlu.load('sv.embed_sentence.bert.base_cased') returns Spark NLP model_anno_obj sent_bert_base_cased\n", + "For language NLU provides the following Models : \n", + "nlu.load('sw.embed_sentence.xlm_roberta') returns Spark NLP model_anno_obj sent_xlm_roberta_base_finetuned_swahili\n", + "For language NLU provides the following Models : \n", + "nlu.load('wo.embed_sentence.xlm_roberta') returns Spark NLP model_anno_obj sent_xlm_roberta_base_finetuned_wolof\n", + "For language NLU provides the following Models : \n", + "nlu.load('xx.embed_sentence') returns Spark NLP model_anno_obj sent_bert_multi_cased\n", + "nlu.load('xx.embed_sentence.bert') returns Spark NLP model_anno_obj sent_bert_multi_cased\n", + "nlu.load('xx.embed_sentence.bert.cased') returns Spark NLP model_anno_obj sent_bert_multi_cased\n", + "nlu.load('xx.embed_sentence.bert.muril') returns Spark NLP model_anno_obj sent_bert_muril\n", + "nlu.load('xx.embed_sentence.bert_use_cmlm_multi_base') returns Spark NLP model_anno_obj sent_bert_use_cmlm_multi_base\n", + "nlu.load('xx.embed_sentence.bert_use_cmlm_multi_base_br') returns Spark NLP model_anno_obj sent_bert_use_cmlm_multi_base_br\n", + "nlu.load('xx.embed_sentence.labse') returns Spark NLP model_anno_obj labse\n", + "nlu.load('xx.embed_sentence.xlm_roberta.base') returns Spark NLP model_anno_obj sent_xlm_roberta_base\n", + "For language NLU provides the following Models : \n", + "nlu.load('yo.embed_sentence.xlm_roberta') returns Spark NLP model_anno_obj sent_xlm_roberta_base_finetuned_yoruba\n", + "For language NLU provides the following Models : \n", + "nlu.load('zh.embed_sentence.bert') returns Spark NLP model_anno_obj sbert_chinese_qmc_finance_v1\n", + "nlu.load('zh.embed_sentence.bert.distilled') returns Spark NLP model_anno_obj sbert_chinese_qmc_finance_v1_distill\n" + ] + } + ], + "source": [ + "nlu.print_all_model_kinds_for_action('embed_sentence')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8qTXkzr9e9Mm" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/nlu/components/embeddings/sentence_bge/BGESentenceEmbedding.py b/nlu/components/embeddings/sentence_bge/BGESentenceEmbedding.py new file mode 100644 index 00000000..5538310f --- /dev/null +++ b/nlu/components/embeddings/sentence_bge/BGESentenceEmbedding.py @@ -0,0 +1,16 @@ +import sparknlp +from sparknlp.annotator import BGEEmbeddings + + +class BGE: + @staticmethod + def get_default_model(): + return BGEEmbeddings.pretrained() \ + .setInputCols(["document"]) \ + .setOutputCol("bge_embeddings") + sparknlp.start() + @staticmethod + def get_pretrained_model(name, language, bucket=None): + return BGEEmbeddings.pretrained(name,language,bucket) \ + .setInputCols(["document"]) \ + .setOutputCol("bge_embeddings") diff --git a/nlu/components/embeddings/sentence_bge/__init__.py b/nlu/components/embeddings/sentence_bge/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/nlu/pipe/col_substitution/name_deduction/name_deductable_annotators_OS.py b/nlu/pipe/col_substitution/name_deduction/name_deductable_annotators_OS.py index 882e34f7..54fe5665 100644 --- a/nlu/pipe/col_substitution/name_deduction/name_deductable_annotators_OS.py +++ b/nlu/pipe/col_substitution/name_deduction/name_deductable_annotators_OS.py @@ -20,6 +20,7 @@ WordEmbeddingsModel , ElmoEmbeddings , E5Embeddings, + BGEEmbeddings, BertSentenceEmbeddings, RoBertaSentenceEmbeddings, UniversalSentenceEncoder, @@ -88,6 +89,7 @@ WordEmbeddingsModel , ElmoEmbeddings , E5Embeddings, + BGEEmbeddings, BertSentenceEmbeddings, RoBertaSentenceEmbeddings, InstructorEmbeddings, diff --git a/nlu/pipe/col_substitution/substitution_map_OS.py b/nlu/pipe/col_substitution/substitution_map_OS.py index 326ec49b..451cb72e 100644 --- a/nlu/pipe/col_substitution/substitution_map_OS.py +++ b/nlu/pipe/col_substitution/substitution_map_OS.py @@ -50,6 +50,9 @@ E5Embeddings: { 'default': substitute_word_embed_cols, }, + BGEEmbeddings: { + 'default': substitute_sent_embed_cols, + }, BertSentenceEmbeddings: { 'default': substitute_sent_embed_cols, }, diff --git a/nlu/spellbook.py b/nlu/spellbook.py index 6fb87f36..6cb5989b 100644 --- a/nlu/spellbook.py +++ b/nlu/spellbook.py @@ -4775,6 +4775,9 @@ class Spellbook: 'en.embed_sentence.biobert.pubmed_base_cased': 'sent_biobert_pubmed_base_cased', 'en.embed_sentence.biobert.pubmed_large_cased': 'sent_biobert_pubmed_large_cased', 'en.embed_sentence.biobert.pubmed_pmc_base_cased': 'sent_biobert_pubmed_pmc_base_cased', + 'en.embed_sentence.bge_base': 'bge_base', + 'en.embed_sentence.bge_small': 'bge_small', + 'en.embed_sentence.bge_large': 'bge_large', 'en.embed_sentence.covidbert.large_uncased': 'sent_covidbert_large_uncased', 'en.embed_sentence.distil_roberta.distilled_base': 'sent_distilroberta_base', 'en.embed_sentence.doc2vec': 'doc2vec_gigaword_300', @@ -15344,6 +15347,9 @@ class Spellbook: 'bert_wiki_books_squad2': 'BertEmbeddings', 'bert_wiki_books_sst2': 'BertEmbeddings', 'beto_sentiment': 'BertForSequenceClassification', + 'bge_small': 'BGEEmbeddings', + 'bge_base': 'BGEEmbeddings', + 'bge_large': 'BGEEmbeddings', 'binary2image': 'BinaryToImage', 'biobert_clinical_base_cased': 'BertEmbeddings', 'biobert_discharge_base_cased': 'BertEmbeddings', diff --git a/nlu/universe/annotator_class_universe.py b/nlu/universe/annotator_class_universe.py index a6254eed..1224bdea 100644 --- a/nlu/universe/annotator_class_universe.py +++ b/nlu/universe/annotator_class_universe.py @@ -15,7 +15,8 @@ class AnnoClassRef: JSL_anno2_py_class: Dict[JslAnnoId, JslAnnoPyClass] = { A_N.E5_SENTENCE_EMBEDDINGS: 'E5Embeddings', - A_N.INSTRUCTOR_SENTENCE_EMBEDDINGS:'InstructorEmbeddings', + A_N.BGE_SENTENCE_EMBEDDINGS: 'BGEEmbeddings', + A_N.INSTRUCTOR_SENTENCE_EMBEDDINGS: 'InstructorEmbeddings', A_N.WHISPER_FOR_CTC: 'WhisperForCTC', A_N.HUBERT_FOR_CTC: 'HubertForCTC', diff --git a/nlu/universe/component_universes.py b/nlu/universe/component_universes.py index 5ff9447c..2b37a515 100644 --- a/nlu/universe/component_universes.py +++ b/nlu/universe/component_universes.py @@ -75,6 +75,7 @@ from nlu.components.embeddings.longformer.longformer import Longformer from nlu.components.embeddings.roberta.roberta import Roberta from nlu.components.embeddings.sentence_e5.E5SentenceEmbedding import E5 +from nlu.components.embeddings.sentence_bge.BGESentenceEmbedding import BGE from nlu.components.embeddings.sentence_bert.BertSentenceEmbedding import BertSentence from nlu.components.embeddings.sentence_roberta.RobertaSentenceEmbedding import RobertaSentence from nlu.components.embeddings.sentence_mpnet.MPNetSentenceEmbedding import MPNetSentence @@ -2420,6 +2421,27 @@ class ComponentUniverse: has_storage_ref=True, is_storage_ref_producer=True, ), + A.BGE_SENTENCE_EMBEDDINGS: partial(NluComponent, + name=A.BGE_SENTENCE_EMBEDDINGS, + type=T.DOCUMENT_EMBEDDING, + get_default_model=BGE.get_default_model, + get_pretrained_model=BGE.get_pretrained_model, + pdf_extractor_methods={'default': default_sentence_embedding_config, + 'default_full': default_full_config, }, + pdf_col_name_substitutor=substitute_sent_embed_cols, + output_level=L.INPUT_DEPENDENT_DOCUMENT_EMBEDDING, + node=NLP_FEATURE_NODES.nodes[A.BGE_SENTENCE_EMBEDDINGS], + description='Sentence-level embeddings using BGE. E5, a weakly supervised text embedding model that can generate text embeddings tailored to any task (e.g., classification, retrieval, clustering, text evaluation, etc.).', + provider=ComponentBackends.open_source, + license=Licenses.open_source, + computation_context=ComputeContexts.spark, + output_context=ComputeContexts.spark, + jsl_anno_class_id=A.BGE_SENTENCE_EMBEDDINGS, + jsl_anno_py_class=ACR.JSL_anno2_py_class[A.BGE_SENTENCE_EMBEDDINGS], + has_storage_ref=True, + is_storage_ref_producer=True, + ), + A.BERT_FOR_TOKEN_CLASSIFICATION: partial(NluComponent, name=A.BERT_FOR_TOKEN_CLASSIFICATION, type=T.TRANSFORMER_TOKEN_CLASSIFIER, diff --git a/nlu/universe/feature_node_ids.py b/nlu/universe/feature_node_ids.py index 39edaf9c..373d54fa 100644 --- a/nlu/universe/feature_node_ids.py +++ b/nlu/universe/feature_node_ids.py @@ -91,6 +91,7 @@ class NLP_NODE_IDS: CAMEMBERT_FOR_TOKEN_CLASSIFICATION = JslAnnoId('camenbert_for_token_classification') CAMEMBERT_FOR_SEQUENCE_CLASSIFICATION = JslAnnoId('camenbert_for_sequence_classification') E5_SENTENCE_EMBEDDINGS = JslAnnoId('e5_sentence_embeddings') + BGE_SENTENCE_EMBEDDINGS = JslAnnoId('bge_sentence_embeddings') BERT_SENTENCE_EMBEDDINGS = JslAnnoId('bert_sentence_embeddings') DISTIL_BERT_EMBEDDINGS = JslAnnoId('distil_bert_embeddings') DISTIL_BERT_FOR_TOKEN_CLASSIFICATION = JslAnnoId('distil_bert_for_token_classification') diff --git a/nlu/universe/feature_node_universes.py b/nlu/universe/feature_node_universes.py index 6035d5ba..9c8aab79 100644 --- a/nlu/universe/feature_node_universes.py +++ b/nlu/universe/feature_node_universes.py @@ -76,6 +76,7 @@ class NLP_FEATURE_NODES: # or Mode Node? A.INSTRUCTOR_SENTENCE_EMBEDDINGS: NlpFeatureNode(A.INSTRUCTOR_SENTENCE_EMBEDDINGS, [F.DOCUMENT], [F.SENTENCE_EMBEDDINGS]), A.E5_SENTENCE_EMBEDDINGS: NlpFeatureNode(A.E5_SENTENCE_EMBEDDINGS, [F.DOCUMENT],[F.SENTENCE_EMBEDDINGS]), + A.BGE_SENTENCE_EMBEDDINGS: NlpFeatureNode(A.BGE_SENTENCE_EMBEDDINGS, [F.DOCUMENT], [F.SENTENCE_EMBEDDINGS]), A.MPNET_SENTENCE_EMBEDDINGS: NlpFeatureNode(A.MPNET_SENTENCE_EMBEDDINGS, [F.DOCUMENT], [F.SENTENCE_EMBEDDINGS]), A.PARTIALLY_IMPLEMENTED: NlpFeatureNode(A.PARTIALLY_IMPLEMENTED, [F.UNKOWN], [F.UNKOWN]), diff --git a/tests/nlu_core_tests/component_tests/embed_tests/sentence_embeddings/sentence_bge_tests.py b/tests/nlu_core_tests/component_tests/embed_tests/sentence_embeddings/sentence_bge_tests.py new file mode 100644 index 00000000..025190e7 --- /dev/null +++ b/tests/nlu_core_tests/component_tests/embed_tests/sentence_embeddings/sentence_bge_tests.py @@ -0,0 +1,18 @@ +import unittest + +from nlu import * + + +class TestBGESentenceEmbeddings(unittest.TestCase): + def test_bge_embeds(self): + pipe = nlu.load("en.embed_sentence.bge_small", verbose=True) + res = pipe.predict( + "query: how much protein should a female eat", + output_level="document" + ) + for c in res: + print(res[c]) + + +if __name__ == "__main__": + unittest.main()