JohnSnowLabs · C-K-Loan · Jan 28, 2024 · Jan 27, 2024
diff --git a/nlu/components/classifiers/span_medical/__init__.py b/nlu/components/classifiers/span_medical/__init__.py
diff --git a/nlu/components/classifiers/span_medical/span_medical.py b/nlu/components/classifiers/span_medical/span_medical.py
@@ -0,0 +1,18 @@
+class SpanMedical:
+    @staticmethod
+    def get_default_model():
+        from sparknlp_jsl.annotator import MedicalQuestionAnswering
+
+        return MedicalQuestionAnswering.pretrained() \
+            .setInputCols(["document_question", "context"]) \
+            .setOutputCol("answer")
+
+
+
+    @staticmethod
+    def get_pretrained_model(name, language, bucket=None):
+        from sparknlp_jsl.annotator import MedicalQuestionAnswering
+
+        return MedicalQuestionAnswering.pretrained(name, language, bucket) \
+            .setInputCols(["document_question", "context"]) \
+            .setOutputCol("answer")
diff --git a/nlu/pipe/col_substitution/col_substitution_HC.py b/nlu/pipe/col_substitution/col_substitution_HC.py
@@ -395,3 +395,28 @@ def substitute_generic_classifier_parser_cols(c, cols, is_unique=True, nlu_ident
             logger.info(f'Dropping unmatched metadata_col={col} for c={c}')
         # new_cols[col]= f"{new_base_name}_confidence"
     return new_cols
+def substitute_hc_span_classifier_cols(c, cols, nlu_identifier=True):
+    """
+    QA classifier
+    """
+    new_cols = {}
+    #new_base_name = 'answer' if nlu_identifier == 'UNIQUE' else f'{nlu_identifier}_answer'
+    new_base_name = 'answer'
+    for col in cols:
+        if 'answer_results' in col:
+            new_cols[col] = f'{new_base_name}'
+        if 'answer_results_score' in col:
+            new_cols[col] = f'{new_base_name}_confidence'
+
+        elif 'span_start_score' in col:
+            new_cols[col] = f'{new_base_name}_start_confidence'
+        elif 'span_end_score' in col:
+            new_cols[col] = f'{new_base_name}_end_confidence'
+        elif 'start' in col and not 'score' in col:
+            new_cols[col] = f'{new_base_name}_start'
+        elif 'end' in col and not 'score' in col:
+            new_cols[col] = f'{new_base_name}_end'
+        elif 'sentence' in col:
+            new_cols[col] = f'{new_base_name}_sentence'
+
+    return new_cols
diff --git a/nlu/spellbook.py b/nlu/spellbook.py
@@ -10598,7 +10598,7 @@ class Spellbook:
                'de.deid.pipeline': 'german_deid_pipeline_spark24',
                'de.med_ner.deid_generic.pipeline': 'ner_deid_generic_pipeline'},
         'en': {
-
+            'en.answer_question.clinical_notes_onnx.pipeline': 'clinical_notes_qa_base_onnx_pipeline',
             'en.classify.bert_sequence.binary_rct_biobert.pipeline': 'bert_sequence_classifier_binary_rct_biobert_pipeline',
             'en.classify.bert_sequence.vop_hcp_consult.pipeline': 'bert_sequence_classifier_vop_hcp_consult_pipeline',
             'en.classify.bert_sequence.vop_drug_side_effect.pipeline': 'bert_sequence_classifier_vop_drug_side_effect_pipeline',
@@ -10634,6 +10634,7 @@ class Spellbook:
             'en.explain_doc.clinical_ade': 'explain_clinical_doc_ade',
             'en.explain_doc.clinical_radiology.pipeline': 'explain_clinical_doc_radiology',
             'en.explain_doc.era': 'explain_clinical_doc_era',
+            'en.explain_doc.clinical_granular': 'explain_clinical_doc_granular',
             'en.icd10_icd9.mapping': 'icd10_icd9_mapping',
             'en.icd10cm.umls.mapping': 'icd10cm_umls_mapping',
             'en.icd10cm_resolver.pipeline': 'icd10cm_resolver_pipeline',
@@ -10765,6 +10766,7 @@ class Spellbook:
             'en.spell.clinical.pipeline': 'spellcheck_clinical_pipeline',
             'en.summarize.biomedical_pubmed.pipeline':'summarizer_biomedical_pubmed_pipeline',
             'en.summarize.clinical_guidelines_large.pipeline': 'summarizer_clinical_guidelines_large_pipeline',
+            'en.summarize.clinical_laymen_onnx.pipeline': 'summarizer_clinical_laymen_onnx_pipeline',
             'en.summarize.clinical_jsl_augmented.pipeline': 'summarizer_clinical_jsl_augmented_pipeline',
             'en.summarize.clinical_questions.pipeline': 'summarizer_clinical_questions_pipeline',
             'en.summarize.generic_jsl.pipeline': 'summarizer_generic_jsl_pipeline',

diff --git a/nlu/universe/annotator_class_universe.py b/nlu/universe/annotator_class_universe.py
@@ -15,7 +15,7 @@ class AnnoClassRef:
     JSL_anno2_py_class: Dict[JslAnnoId, JslAnnoPyClass] = {
 
         A_N.E5_SENTENCE_EMBEDDINGS: 'E5Embeddings',
-        A_N.INSTRUCTOR_SENTENCE_EMBEDDINGS:'InstructorEmbeddings',
+        A_N.INSTRUCTOR_SENTENCE_EMBEDDINGS: 'InstructorEmbeddings',
 
         A_N.WHISPER_FOR_CTC: 'WhisperForCTC',
         A_N.HUBERT_FOR_CTC: 'HubertForCTC',
@@ -240,6 +240,7 @@ class AnnoClassRef:
 
     }
     JSL_anno_HC_ref_2_py_class: Dict[JslAnnoId, JslAnnoPyClass] = {
+        HC_A_N.MEDICAL_QUESTION_ANSWERING: 'MedicalQuestionAnswering',
         HC_A_N.MEDICAL_TEXT_GENERATOR: 'MedicalTextGenerator',
         HC_A_N.MEDICAL_SUMMARIZER:'MedicalSummarizer',
         HC_A_N.ZERO_SHOT_NER: 'ZeroShotNerModel',

diff --git a/nlu/universe/component_universes.py b/nlu/universe/component_universes.py
@@ -45,6 +45,7 @@
 from nlu.components.classifiers.span_longformer.span_longformer import SpanLongFormerClassifier
 from nlu.components.classifiers.span_roberta.span_roberta import SpanRobertaClassifier
 from nlu.components.classifiers.span_xlm_roberta.span_xlm_roberta import SpanXlmRobertaClassifier
+from nlu.components.classifiers.span_medical.span_medical import SpanMedical
 from nlu.components.classifiers.token_albert.token_albert import TokenAlbert
 from nlu.components.classifiers.token_bert.token_bert import TokenBert
 from nlu.components.classifiers.token_bert_healthcare.token_bert_healthcare import TokenBertHealthcare
@@ -3278,6 +3279,27 @@ class ComponentUniverse:
                                                       computation_context=ComputeContexts.spark,
                                                       output_context=ComputeContexts.spark,
                                                       ),
+        H_A.MEDICAL_QUESTION_ANSWERING: partial(NluComponent,
+                                                      name=H_A.MEDICAL_QUESTION_ANSWERING,
+                                                      jsl_anno_class_id= H_A.MEDICAL_QUESTION_ANSWERING,
+                                                      jsl_anno_py_class= ACR.JSL_anno_HC_ref_2_py_class[
+                                                          H_A.MEDICAL_QUESTION_ANSWERING],
+                                                      node= NLP_HC_FEATURE_NODES.nodes[
+                                                          H_A.MEDICAL_QUESTION_ANSWERING],
+                                                      get_default_model= SpanMedical.get_default_model,
+                                                      get_pretrained_model= SpanMedical.get_pretrained_model,
+                                                      type= T.QUESTION_SPAN_CLASSIFIER,
+                                                      pdf_extractor_methods={
+                                                          'default': default_span_classifier_config,
+                                                          'default_full': default_full_span_classifier_config, },
+                                                      pdf_col_name_substitutor=substitute_hc_span_classifier_cols,
+                                                      output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
+                                                      description='TODO',
+                                                      provider=ComponentBackends.hc,
+                                                      license=Licenses.hc,
+                                                      computation_context=ComputeContexts.spark,
+                                                      output_context=ComputeContexts.spark,
+                                                      ),
 
         A.MULTI_DOCUMENT_ASSEMBLER: partial(NluComponent,
                                             name=A.MULTI_DOCUMENT_ASSEMBLER,

diff --git a/nlu/universe/feature_node_ids.py b/nlu/universe/feature_node_ids.py
@@ -303,6 +303,7 @@ class NLP_HC_NODE_IDS:  # or Mode Node?
     ENTITY_CHUNK_EMBEDDING = JslAnnoId('entity_chunk_embedding')
     MEDICAL_SUMMARIZER = JslAnnoId('med_summarizer')
     MEDICAL_TEXT_GENERATOR = JslAnnoId('med_text_generator')
+    MEDICAL_QUESTION_ANSWERING = JslAnnoId('med_question_answering')
 
 class OCR_NODE_IDS:
     """All available Feature nodes in OCR

diff --git a/nlu/universe/feature_node_universes.py b/nlu/universe/feature_node_universes.py
@@ -392,6 +392,8 @@ class NLP_HC_FEATURE_NODES():
     H_F = NLP_HC_FEATURES
     # HC Feature Nodes
     nodes = {
+        A.MEDICAL_QUESTION_ANSWERING: NlpFeatureNode(A.MEDICAL_QUESTION_ANSWERING, [F.DOCUMENT_QUESTION, F.DOCUMENT_QUESTION_CONTEXT], [F.CLASSIFIED_SPAN]),
+
         A.MEDICAL_TEXT_GENERATOR: NlpFeatureNode(A.MEDICAL_TEXT_GENERATOR, [F.DOCUMENT], [F.DOCUMENT_GENERATED]),
 
         A.MEDICAL_SUMMARIZER: NlpFeatureNode(A.MEDICAL_SUMMARIZER, [F.DOCUMENT], [F.DOCUMENT_GENERATED]),