diff --git a/examples/colab/healthcare/medical_named_entity_recognition/NLU_explain_clinical_doc_vop_pipeline.ipynb b/examples/colab/healthcare/medical_named_entity_recognition/NLU_explain_clinical_doc_vop_pipeline.ipynb new file mode 100644 index 00000000..5533cdf0 --- /dev/null +++ b/examples/colab/healthcare/medical_named_entity_recognition/NLU_explain_clinical_doc_vop_pipeline.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"machine_shape":"hm","gpuType":"T4","authorship_tag":"ABX9TyPpHDLYEn+ftr/QOZRewxfO"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU"},"cells":[{"cell_type":"markdown","source":["![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)\n","\n","[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/nlu/blob/master/examples/colab/component_examples/named_entity_recognition_NER/NLU_explain_clinical_doc_vop_pipeline.ipynb)\n","\n","# Explain Clinical Document - Voice Of Patient (VOP)\n","\n","This pipeline is designed to:\n","\n","- extract all healthcare-related entities\n","\n","- assign assertion status to the extracted entities\n","\n","- establish relations between the extracted entities\n","\n","from the documents transferred from the patient’s sentences. In this pipeline, six NER models, one assertion model, and one relation extraction model were used to achieve those tasks."],"metadata":{"id":"jQMCz8dLv5AR"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"gCqJ0slmBlb6"},"outputs":[],"source":["! pip install nlu pyspark==3.1.2"]},{"cell_type":"code","source":["! pip install johnsnowlabs"],"metadata":{"id":"HXBlOsnfCJpt"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["import json, os\n","from google.colab import files\n","\n","if 'spark_jsl.json' not in os.listdir():\n"," license_keys = files.upload()\n"," os.rename(list(license_keys.keys())[0], 'spark_jsl.json')\n","\n","with open('spark_jsl.json') as f:\n"," license_keys = json.load(f)\n","\n","# Defining license key-value pairs as local variables\n","locals().update(license_keys)\n","os.environ.update(license_keys)"],"metadata":{"id":"k4dnH0zuCZLI","executionInfo":{"status":"ok","timestamp":1712211006003,"user_tz":240,"elapsed":5,"user":{"displayName":"Samed Koçer","userId":"16161902236051002702"}}},"execution_count":1,"outputs":[]},{"cell_type":"code","source":["# Installing pyspark and spark-nlp\n","! pip install --upgrade -q pyspark==3.1.2 spark-nlp==$PUBLIC_VERSION\n","\n","# Installing NLU\n","! pip install --upgrade --q nlu --no-dependencies\n","\n","# Installing Spark NLP Healthcare\n","! pip install --upgrade -q spark-nlp-jsl==$JSL_VERSION --extra-index-url https://pypi.johnsnowlabs.com/$SECRET\n","\n","# Installing Spark NLP Display Library for visualization\n","! pip install -q spark-nlp-display"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Y_X8oTmdCbmo","executionInfo":{"status":"ok","timestamp":1712209917378,"user_tz":240,"elapsed":18403,"user":{"displayName":"Samed Koçer","userId":"16161902236051002702"}},"outputId":"dbd0ad1d-cbc9-4888-808b-ec9ba1a24773"},"execution_count":3,"outputs":[{"output_type":"stream","name":"stdout","text":["\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n","johnsnowlabs 5.3.2 requires pyspark==3.4.0, but you have pyspark 3.1.2 which is incompatible.\u001b[0m\u001b[31m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.6/507.6 kB\u001b[0m \u001b[31m18.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h"]}]},{"cell_type":"code","source":["import json\n","import os\n","\n","import sparknlp\n","import sparknlp_jsl\n","import nlu\n","\n","from sparknlp.base import *\n","from sparknlp.annotator import *\n","from sparknlp_jsl.annotator import *\n","\n","from pyspark.sql import SparkSession\n","from pyspark.sql import functions as F\n","from pyspark.ml import Pipeline,PipelineModel\n","\n","import pandas as pd\n","pd.set_option('display.max_colwidth', 200)\n","\n","import warnings\n","warnings.filterwarnings('ignore')\n","\n","params = {\"spark.driver.memory\":\"16G\",\n"," \"spark.kryoserializer.buffer.max\":\"2000M\",\n"," \"spark.driver.maxResultSize\":\"2000M\"}\n","\n","print(\"Spark NLP Version :\", sparknlp.version())\n","print(\"Spark NLP_JSL Version :\", sparknlp_jsl.version())\n","\n","spark = sparknlp_jsl.start(license_keys['SECRET'],params=params)\n","\n","spark"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":257},"id":"jVNmOh_hCc_z","executionInfo":{"status":"ok","timestamp":1712211019507,"user_tz":240,"elapsed":9459,"user":{"displayName":"Samed Koçer","userId":"16161902236051002702"}},"outputId":"be72aad1-264d-484d-c4aa-a8fd4a9d242a"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["Spark NLP Version : 5.3.1\n","Spark NLP_JSL Version : 5.3.1\n"]},{"output_type":"execute_result","data":{"text/plain":[""],"text/html":["\n","
\n","

SparkSession - in-memory

\n"," \n","
\n","

SparkContext

\n","\n","

Spark UI

\n","\n","
\n","
Version
\n","
v3.1.2
\n","
Master
\n","
local[*]
\n","
AppName
\n","
Spark NLP Licensed
\n","
\n","
\n"," \n","
\n"," "]},"metadata":{},"execution_count":2}]},{"cell_type":"code","source":["pipe = nlu.load(\"en.explain_doc.pipeline_vop\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"sodmB9vZEsQZ","executionInfo":{"status":"ok","timestamp":1712211195860,"user_tz":240,"elapsed":165222,"user":{"displayName":"Samed Koçer","userId":"16161902236051002702"}},"outputId":"ca93c286-0bdd-402a-ee6c-8200ddc7d080"},"execution_count":3,"outputs":[{"output_type":"stream","name":"stdout","text":["Warning::Spark Session already created, some configs may not take.\n","Warning::Spark Session already created, some configs may not take.\n","explain_clinical_doc_radiology download started this may take some time.\n","Approx size to download 1.7 GB\n","[OK!]\n"]}]},{"cell_type":"code","source":["text = [\"\"\"I had been feeling really tired all the time and was losing weight without even trying. My doctor checked my sugar levels and they came out to be high. So, I have type 2 diabetes.\n","He put me on two medications - I take metformin 500 mg twice a day, and glipizide 5 mg before breakfast and dinner. I also have to watch what I eat and try to exercise more.\n","Now, I also have chronic acid reflux disease or GERD. Now I take daily omeprazole 20 mg to control the heartburn symptoms.\"\"\"]"],"metadata":{"id":"nizEiuR9Fi51","executionInfo":{"status":"ok","timestamp":1712211239090,"user_tz":240,"elapsed":5,"user":{"displayName":"Samed Koçer","userId":"16161902236051002702"}}},"execution_count":4,"outputs":[]},{"cell_type":"code","source":["df = pipe.predict(text)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"KfRJj9jOGjUN","executionInfo":{"status":"ok","timestamp":1712211256451,"user_tz":240,"elapsed":13309,"user":{"displayName":"Samed Koçer","userId":"16161902236051002702"}},"outputId":"a14c06d1-1cbd-412a-919c-55255a3bc4e6"},"execution_count":5,"outputs":[{"output_type":"stream","name":"stdout","text":["\u001b[91m🚨 Your Spark-Healthcare is outdated, installed==5.3.1 but latest version==5.3.0\n","You can run \u001b[92m nlp.install() \u001b[39mto update Spark-Healthcare\n","Warning::Spark Session already created, some configs may not take.\n"]}]},{"cell_type":"code","source":["df"],"metadata":{"id":"FLNcNmoHGl_O","colab":{"base_uri":"https://localhost:8080/","height":841},"executionInfo":{"status":"ok","timestamp":1712211265299,"user_tz":240,"elapsed":10,"user":{"displayName":"Samed Koçer","userId":"16161902236051002702"}},"outputId":"a5c97890-1544-4073-a8a7-211fcb294490"},"execution_count":6,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" assertion \\\n","0 [Suspected, Confirmed, Confirmed, Confirmed, Suspected, Confirmed] \n","\n"," document \\\n","0 I had been feeling really tired all the time and was losing weight without even trying. My doctor checked my sugar levels and they came out to be high. So, I have type 2 diabetes.\\nHe put me on tw... \n","\n"," entities_jsl_ner_chunk \\\n","0 [feeling really tired, losing weight, sugar levels, high, He, metformin, twice a day, glipizide, before breakfast and dinner, acid reflux disease, GERD, daily, omeprazole, heartburn symptoms] \n","\n"," entities_jsl_ner_chunk_class \\\n","0 [Symptom, Symptom, Test, Test_Result, Gender, Drug, Frequency, Drug, Frequency, Disease_Syndrome_Disorder, Disease_Syndrome_Disorder, Frequency, Drug, Symptom] \n","\n"," entities_jsl_ner_chunk_confidence \\\n","0 [0.42706665, 0.7063, 0.81455, 0.8967, 1.0, 0.978, 0.86443335, 0.999, 0.866575, 0.5604667, 0.9963, 0.9646, 0.9959, 0.70985] \n","\n"," entities_jsl_ner_chunk_origin_chunk \\\n","0 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] \n","\n"," entities_jsl_ner_chunk_origin_sentence entities_ner_chexpert_chunk \\\n","0 [0, 0, 1, 1, 3, 3, 3, 3, 3, 5, 5, 6, 6, 6] [] \n","\n"," entities_ner_oncology_chunk entities_ner_oncology_chunk_class ... \\\n","0 [He] [Gender] ... \n","\n"," entities_radiology_ner_chunk_class \\\n","0 [Symptom, Test, Disease_Syndrome_Disorder, Measurements, Units, Measurements, Units, Disease_Syndrome_Disorder, Disease_Syndrome_Disorder, Measurements, Units, Symptom] \n","\n"," entities_radiology_ner_chunk_confidence \\\n","0 [0.6786, 0.40135002, 0.4851, 0.9707, 0.9833, 0.9051, 0.9769, 0.643025, 0.9902, 0.8762, 0.9376, 0.7716] \n","\n"," entities_radiology_ner_chunk_origin_chunk \\\n","0 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] \n","\n"," entities_radiology_ner_chunk_origin_sentence \\\n","0 [0, 1, 2, 3, 3, 3, 3, 5, 5, 6, 6, 6] \n","\n"," matched_pos \\\n","0 [feeling really tired, losing weight, sugar levels, high, diabetes, He, two medications, metformin, 500, mg, twice a day, glipizide, 5, mg, before breakfast and dinner, chronic acid reflux disease... \n","\n"," pos \\\n","0 [MC, VHD, VBN, VVG, RR, VVNJ, DB, DD, NN, CC, VBD, VVGJ, NN, II, RR, VVGJ, NN, NN, NN, VVD, NN, NN, NNS, CC, PN, VVB, II, TO, VBI, JJ, NN, NN, NN, MC, VHB, NN, MC, NN, NN, NN, NN, NN, II, MC, NNS,... \n","\n"," relations \\\n","0 NaN \n","\n"," sentence_dl \\\n","0 [I had been feeling really tired all the time and was losing weight without even trying., My doctor checked my sugar levels and they came out to be high., So, I have type 2 diabetes., He put me on... \n","\n"," unlabeled_dependency \\\n","0 [feeling, feeling, feeling, ROOT, tired, feeling, time, time, tired, losing, losing, weight, tired, trying, trying, feeling, feeling, doctor, checked, ROOT, levels, levels, checked, came, came, ch... \n","\n"," word_embedding_embeddings \n","0 [[0.09337666630744934, 0.031265825033187866, 0.152923122048378, -0.24998794496059418, 0.49187055230140686, -0.44001245498657227, 0.14361239969730377, -0.3373923599720001, 0.1620967984199524, 0.066... \n","\n","[1 rows x 29 columns]"],"text/html":["\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
assertiondocumententities_jsl_ner_chunkentities_jsl_ner_chunk_classentities_jsl_ner_chunk_confidenceentities_jsl_ner_chunk_origin_chunkentities_jsl_ner_chunk_origin_sentenceentities_ner_chexpert_chunkentities_ner_oncology_chunkentities_ner_oncology_chunk_class...entities_radiology_ner_chunk_classentities_radiology_ner_chunk_confidenceentities_radiology_ner_chunk_origin_chunkentities_radiology_ner_chunk_origin_sentencematched_posposrelationssentence_dlunlabeled_dependencyword_embedding_embeddings
0[Suspected, Confirmed, Confirmed, Confirmed, Suspected, Confirmed]I had been feeling really tired all the time and was losing weight without even trying. My doctor checked my sugar levels and they came out to be high. So, I have type 2 diabetes.\\nHe put me on tw...[feeling really tired, losing weight, sugar levels, high, He, metformin, twice a day, glipizide, before breakfast and dinner, acid reflux disease, GERD, daily, omeprazole, heartburn symptoms][Symptom, Symptom, Test, Test_Result, Gender, Drug, Frequency, Drug, Frequency, Disease_Syndrome_Disorder, Disease_Syndrome_Disorder, Frequency, Drug, Symptom][0.42706665, 0.7063, 0.81455, 0.8967, 1.0, 0.978, 0.86443335, 0.999, 0.866575, 0.5604667, 0.9963, 0.9646, 0.9959, 0.70985][0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13][0, 0, 1, 1, 3, 3, 3, 3, 3, 5, 5, 6, 6, 6][][He][Gender]...[Symptom, Test, Disease_Syndrome_Disorder, Measurements, Units, Measurements, Units, Disease_Syndrome_Disorder, Disease_Syndrome_Disorder, Measurements, Units, Symptom][0.6786, 0.40135002, 0.4851, 0.9707, 0.9833, 0.9051, 0.9769, 0.643025, 0.9902, 0.8762, 0.9376, 0.7716][0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11][0, 1, 2, 3, 3, 3, 3, 5, 5, 6, 6, 6][feeling really tired, losing weight, sugar levels, high, diabetes, He, two medications, metformin, 500, mg, twice a day, glipizide, 5, mg, before breakfast and dinner, chronic acid reflux disease...[MC, VHD, VBN, VVG, RR, VVNJ, DB, DD, NN, CC, VBD, VVGJ, NN, II, RR, VVGJ, NN, NN, NN, VVD, NN, NN, NNS, CC, PN, VVB, II, TO, VBI, JJ, NN, NN, NN, MC, VHB, NN, MC, NN, NN, NN, NN, NN, II, MC, NNS,...NaN[I had been feeling really tired all the time and was losing weight without even trying., My doctor checked my sugar levels and they came out to be high., So, I have type 2 diabetes., He put me on...[feeling, feeling, feeling, ROOT, tired, feeling, time, time, tired, losing, losing, weight, tired, trying, trying, feeling, feeling, doctor, checked, ROOT, levels, levels, checked, came, came, ch...[[0.09337666630744934, 0.031265825033187866, 0.152923122048378, -0.24998794496059418, 0.49187055230140686, -0.44001245498657227, 0.14361239969730377, -0.3373923599720001, 0.1620967984199524, 0.066...
\n","

1 rows × 29 columns

\n","
\n","
\n","\n","
\n"," \n","\n"," \n","\n"," \n","
\n","\n","\n","
\n"," \n"," \n"," \n","
\n","\n","
\n","
\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"df"}},"metadata":{},"execution_count":6}]}]} \ No newline at end of file diff --git a/nlu/spellbook.py b/nlu/spellbook.py index 3b5f93aa..e611753e 100644 --- a/nlu/spellbook.py +++ b/nlu/spellbook.py @@ -10644,6 +10644,7 @@ class Spellbook: 'en.explain_doc.clinical_radiology.pipeline': 'explain_clinical_doc_radiology', 'en.explain_doc.era': 'explain_clinical_doc_era', 'en.explain_doc.clinical_granular': 'explain_clinical_doc_granular', + 'en.explain_doc.pipeline_vop': 'explain_clinical_doc_radiology', 'en.icd10_icd9.mapping': 'icd10_icd9_mapping', 'en.icd10cm.umls.mapping': 'icd10cm_umls_mapping', 'en.icd10cm_resolver.pipeline': 'icd10cm_resolver_pipeline', @@ -11128,7 +11129,6 @@ class Spellbook: 'en.relation': 'redl_bodypart_direction_biobert', 'en.relation.ade': 'redl_ade_biobert', 'en.relation.ade_biobert': 're_ade_biobert', - 'en.relation.ade_clinical': 're_ade_clinical', 'en.relation.adverse_drug_events.clinical': 're_ade_clinical', 'en.relation.adverse_drug_events.clinical.biobert': 'redl_ade_biobert', 'en.relation.adverse_drug_events.conversational': 're_ade_conversational', @@ -11455,6 +11455,7 @@ class Spellbook: 'RelationExtractionModel_bc96a0f8b566': 'en.embed.glove.clinical', 'RelationExtractionModel_ce79d77d1bf1': 'en.embed.glove.clinical', 'RelationExtractionModel_d0af74510daa': 'en.embed.glove.clinical', + 'GenericREModel_c7cd3246ff12': 'en.embed.glove.clinical', 'bert_base_cased': 'en.embed.bert.base_cased', 'bert_embeddings_PHS_BERT': 'en.embed.bert_phs', 'biobert_clinical_base_cased': 'en.embed.biobert.clinical_base_cased',