From 843123a67efdf9ef597252548aa0e7557e1525d8 Mon Sep 17 00:00:00 2001 From: SKocer <samedkocer22@gmail.com> Date: Mon, 22 Apr 2024 01:46:06 -0400 Subject: [PATCH 1/2] TextMatcherInternalModel annotator, clinical_deidentification_generic_optimized and clinical_deidentification_subentity_optimized pretrained healthcare pipelines added to nlu --- nlu/spellbook.py | 2 ++ nlu/universe/annotator_class_universe.py | 1 + nlu/universe/component_universes.py | 17 +++++++++++++++++ nlu/universe/feature_node_ids.py | 2 ++ 4 files changed, 22 insertions(+) diff --git a/nlu/spellbook.py b/nlu/spellbook.py index 8637bd78..3478a859 100644 --- a/nlu/spellbook.py +++ b/nlu/spellbook.py @@ -10631,6 +10631,8 @@ class Spellbook: 'en.deid.clinical_wip': 'clinical_deidentification_wip', 'en.deid.glove_augmented.pipeline': 'clinical_deidentification_glove_augmented', 'en.deid.glove_pipeline': 'clinical_deidentification_glove', + 'en.deid.generic_optimized.pipeline': 'clinical_deidentification_generic_optimized', + 'en.deid.subentity_optimized.pipeline': 'clinical_deidentification_subentity_optimized', 'en.deid.med_ner_large.pipeline': 'ner_deid_sd_large_pipeline', 'en.deid.ner_augmented.pipeline': 'ner_deid_augmented_pipeline', 'en.deid.ner_biobert.pipeline': 'ner_deid_biobert_pipeline', diff --git a/nlu/universe/annotator_class_universe.py b/nlu/universe/annotator_class_universe.py index b2f0f22c..96c403e8 100644 --- a/nlu/universe/annotator_class_universe.py +++ b/nlu/universe/annotator_class_universe.py @@ -207,6 +207,7 @@ class AnnoClassRef: A_N.PARTIAL_EntityRulerApproach: 'EntityRulerApproach', A_N.PARTIAL_EntityRulerModel: 'EntityRulerModel', A_N.PARTIAL_TextMatcherModel: 'TextMatcherModel', + A_N.PARTIAL_TextMatcherInternalModel: 'TextMatcherInternalModel', A_N.PARTIAL_BigTextMatcher: 'BigTextMatcher', A_N.PARTIAL_BigTextMatcherModel: 'BigTextMatcherModel', A_N.PARTIAL_DateMatcher: 'DateMatcher', diff --git a/nlu/universe/component_universes.py b/nlu/universe/component_universes.py index 908f047f..cbceaba1 100644 --- a/nlu/universe/component_universes.py +++ b/nlu/universe/component_universes.py @@ -786,6 +786,23 @@ class ComponentUniverse: output_context=ComputeContexts.spark, ), + A.PARTIAL_TextMatcherInternalModel: partial(NluComponent, + name=A.PARTIAL_ChunkMergeApproach, + jsl_anno_class_id=A.PARTIAL_TextMatcherInternalModel, + jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_TextMatcherInternalModel], + node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED], + type=T.PARTIALLY_READY, + pdf_extractor_methods={'default': default_partial_implement_config, + 'default_full': default_full_config, }, + pdf_col_name_substitutor=partially_implemented_substitutor, + output_level=L.DOCUMENT, + description='Not fully integrated', + provider=ComponentBackends.open_source, + license=Licenses.open_source, + computation_context=ComputeContexts.spark, + output_context=ComputeContexts.spark, + ), + A.PARTIAL_BigTextMatcher: partial(NluComponent, name=A.PARTIAL_ChunkMergeApproach, jsl_anno_class_id=A.PARTIAL_BigTextMatcher, diff --git a/nlu/universe/feature_node_ids.py b/nlu/universe/feature_node_ids.py index 655f08ca..15e97aa9 100644 --- a/nlu/universe/feature_node_ids.py +++ b/nlu/universe/feature_node_ids.py @@ -212,8 +212,10 @@ class NLP_NODE_IDS: PARTIAL_EntityRulerApproach = JslAnnoId('PARTIAL_EntityRulerApproach') PARTIAL_EntityRulerModel = JslAnnoId('PARTIAL_EntityRulerModel') PARTIAL_TextMatcherModel = JslAnnoId('PARTIAL_TextMatcherModel') + PARTIAL_TextMatcherModel = JslAnnoId('PARTIAL_TextMatcherModel') PARTIAL_BigTextMatcher = JslAnnoId('PARTIAL_BigTextMatcher') PARTIAL_BigTextMatcherModel = JslAnnoId('PARTIAL_BigTextMatcherModel') + PARTIAL_TextMatcherInternalModel = JslAnnoId('PARTIAL_TextMatcherInternalModel') PARTIAL_DateMatcher = JslAnnoId('PARTIAL_DateMatcher') PARTIAL_MultiDateMatcher = JslAnnoId('PARTIAL_MultiDateMatcher') PARTIAL_RegexMatcher = JslAnnoId('PARTIAL_RegexMatcher') From 7f211d99b8a2eb40e42f64ceaa951280eba68f07 Mon Sep 17 00:00:00 2001 From: Christian Kasim Loan <christian.kasim.loan@gmail.com> Date: Tue, 23 Apr 2024 05:16:08 +0200 Subject: [PATCH 2/2] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7afb2353..23bb7625 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ See how easy it is to use any of the **thousands** of models in 1 line of code, This 1 line let's you visualize and play with **1000+ SOTA NLU & NLP models** in **200** languages ```shell -streamlit run https://raw.githubusercontent.com/JohnSnowLabs/nlu/master/examples/streamlit/01_dashboard.py +streamlit run https://raw.githubusercontent.com/JohnSnowLabs/nlu/master/examples/streamlit/01_dashboard.py ``` <img src="https://raw.githubusercontent.com/JohnSnowLabs/nlu/master/docs/assets/streamlit_docs_assets/gif/start.gif">