diff --git a/johnsnowlabs/auto_install/health_checks/report.py b/johnsnowlabs/auto_install/health_checks/report.py
index 69cb81a5d2..20f0bc7926 100644
--- a/johnsnowlabs/auto_install/health_checks/report.py
+++ b/johnsnowlabs/auto_install/health_checks/report.py
@@ -7,7 +7,8 @@
 from johnsnowlabs.auto_install.softwares import Software
 from johnsnowlabs.py_models.jsl_secrets import LicenseInfos
 from johnsnowlabs.utils.enums import ProductName
-from johnsnowlabs.utils.my_jsl_api import get_access_key_from_browser, get_user_licenses
+from johnsnowlabs.utils.my_jsl_api import (get_access_key_from_browser,
+                                           get_user_licenses)
 
 
 def check_health(check_install=True):
@@ -49,6 +50,7 @@ def check_health(check_install=True):
         if health_check:
             health_check[product] = product.health_check()
 
+    return install_status
 
 def list_remote_licenses():
     access_token = get_access_key_from_browser()
diff --git a/tests/installations/test_auto_install.py b/tests/installations/test_auto_install.py
new file mode 100644
index 0000000000..ad185ed6bf
--- /dev/null
+++ b/tests/installations/test_auto_install.py
@@ -0,0 +1,44 @@
+import os
+import shutil
+import unittest
+
+from johnsnowlabs import nlp, settings
+from johnsnowlabs.auto_install.softwares import (Software, SparkHcSoftware,
+                                                 SparkNlpSoftware,
+                                                 SparkOcrSoftware)
+from johnsnowlabs.utils.enums import ProductName
+from johnsnowlabs.utils.venv_utils import VenvWrapper
+
+
+class AutoInstallationTestCases(unittest.TestCase):
+    def setUp(self) -> None:
+        shutil.rmtree(settings.root_dir, ignore_errors=True)
+        import pip
+        for product in ProductName:
+            software = Software.for_name(product)
+            if software and software.pypi_name:
+                pip.main(["uninstall", "-y", software.pypi_name])
+
+    def test_only_spark_nlp_should_be_installed_if_secrets_are_empty(self):
+        nlp.install(browser_login=False)
+        installed_products = nlp.check_health()
+
+        self.assertTrue(installed_products[SparkNlpSoftware])
+        self.assertFalse(installed_products[SparkHcSoftware])
+        self.assertFalse(installed_products[SparkOcrSoftware])
+        
+    def test_spark_hc_is_installed_if_licensed_provided(self):
+        nlp.install(med_license=os.environ.get("VALID_LICENSE"))
+        installed_products = nlp.check_health()
+
+        self.assertTrue(installed_products[SparkNlpSoftware])
+        self.assertTrue(installed_products[SparkHcSoftware])
+        self.assertFalse(installed_products[SparkOcrSoftware])
+    
+
+    def test_spark_ocr_is_installed_if_visual_is_true(self):
+        nlp.install(med_license=os.environ.get("VALID_LICENSE"), visual=True)
+        installed_products = nlp.check_health()
+        self.assertTrue(installed_products[SparkNlpSoftware])
+        self.assertTrue(installed_products[SparkHcSoftware])
+        self.assertTrue(installed_products[SparkOcrSoftware])
diff --git a/tests/sessions/cross_libs.py b/tests/sessions/cross_libs.py
new file mode 100644
index 0000000000..6ea7301f57
--- /dev/null
+++ b/tests/sessions/cross_libs.py
@@ -0,0 +1,44 @@
+import os
+import sys
+import unittest
+
+from johnsnowlabs import nlp
+from johnsnowlabs.auto_install.softwares import (SparkHcSoftware,
+                                                 SparkNlpSoftware,
+                                                 SparkOcrSoftware)
+from tests.utils import clear_installed_jsl_installation, get_cross_lib_pipe
+
+os.environ["PYSPARK_PYTHON"] = sys.executable
+os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable
+
+def setUpModule():
+    nlp.install(browser_login=False, spark_nlp=True, nlp=True, visual=True,
+                med_license=os.environ.get("VALID_LICENSE"), ocr_license=os.environ.get("VALID_LICENSE"),
+                aws_access_key="",
+                aws_key_id=""
+                )
+
+
+def tearDownModule():
+    clear_installed_jsl_installation()
+
+
+class InstallationTestCase(unittest.TestCase):
+    def test_all_libs_are_installedd(self):
+        installed_products = nlp.check_health()
+        self.assertTrue(installed_products[SparkNlpSoftware])
+        self.assertTrue(installed_products[SparkHcSoftware])
+        self.assertTrue(installed_products[SparkOcrSoftware])
+
+class SparkSessionTestCase(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.spark = nlp.start( visual=True)
+    
+    def simple_cross_library_session(self):
+        import pkg_resources
+        doc_example = pkg_resources.resource_filename(
+            "sparkocr", "resources/ocr/docs/doc2.docx"
+        )
+        df = self.spark.read.format("binaryFile").load(doc_example).cache()
+        get_cross_lib_pipe().fit(df).transform(df).show()
diff --git a/tests/sessions/healthcare_lib.py b/tests/sessions/healthcare_lib.py
new file mode 100644
index 0000000000..3ae6253efc
--- /dev/null
+++ b/tests/sessions/healthcare_lib.py
@@ -0,0 +1,61 @@
+import os
+import sys
+import unittest
+
+from johnsnowlabs import medical, nlp
+from johnsnowlabs.auto_install.softwares import (SparkHcSoftware,
+                                                 SparkNlpSoftware,
+                                                 SparkOcrSoftware)
+from tests.utils import (clear_installed_jsl_installation,
+                         get_finance_pipeline, get_legal_pipeline)
+
+os.environ["PYSPARK_PYTHON"] = sys.executable
+os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable
+
+def setUpModule():
+    nlp.install(browser_login=False, spark_nlp=True, nlp=True, visual=False,
+                med_license=os.environ.get("VALID_LICENSE"),
+                aws_access_key="",
+                aws_key_id=""
+                )
+
+
+def tearDownModule():
+    clear_installed_jsl_installation()
+
+
+class InstallationTestCase(unittest.TestCase):
+    def test_spark_nlp_jsl_is_installed(self):
+        installed_products = nlp.check_health()
+        self.assertTrue(installed_products[SparkNlpSoftware])
+        self.assertTrue(installed_products[SparkHcSoftware])
+        self.assertFalse(installed_products[SparkOcrSoftware])
+
+class SparkSessionTestCase(unittest.TestCase):
+    
+    @classmethod
+    def setUpClass(cls):
+        cls.spark = nlp.start()
+
+    def test_healthcare_session(self):
+        print("Test Healthcare session ...")
+        d = nlp.DocumentAssembler().setInputCol("text").setOutputCol("doc")
+        t = nlp.Tokenizer().setInputCols("doc").setOutputCol("tok")
+        c = (
+            medical.BertForTokenClassification()
+            .pretrained()
+            .setInputCols(["tok", "doc"])
+            .setOutputCol("class")
+        )
+        p = nlp.Pipeline(stages=[d, t, c])
+        p = nlp.to_nlu_pipe(p)
+        print(p.predict("Hello form John SNow labs"))
+    
+    def test_finance_session(self):
+        print("Testing Finance Session ...")
+        nlp.Pipeline(get_finance_pipeline()).fullAnnotate("unit")
+    
+
+    def test_legal_session(self):
+        print("Testing Legal Session ...")
+        nlp.Pipeline(get_legal_pipeline()).fullAnnotate("Shwrm")
diff --git a/tests/sessions/spark_nlp_lib.py b/tests/sessions/spark_nlp_lib.py
new file mode 100644
index 0000000000..8439f9a309
--- /dev/null
+++ b/tests/sessions/spark_nlp_lib.py
@@ -0,0 +1,64 @@
+import os
+import sys
+import unittest
+
+from johnsnowlabs import nlp
+from johnsnowlabs.auto_install.softwares import (SparkHcSoftware,
+                                                 SparkNlpSoftware,
+                                                 SparkOcrSoftware)
+from tests.utils import clear_installed_jsl_installation
+
+os.environ["PYSPARK_PYTHON"] = sys.executable
+os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable
+
+
+
+def setUpModule():
+    nlp.install(browser_login=False, spark_nlp=True, nlp=False, visual=False)
+
+
+def tearDownModule():
+    clear_installed_jsl_installation()
+
+
+class InstallationTestCase(unittest.TestCase):
+    def test_only_spark_nlp_should_be_installed_if_secrets_are_empty(self):
+        
+        installed_products = nlp.check_health()
+
+        self.assertTrue(installed_products[SparkNlpSoftware])
+        self.assertFalse(installed_products[SparkHcSoftware])
+        self.assertFalse(installed_products[SparkOcrSoftware])
+
+class SparkSessionTestCase(unittest.TestCase):
+    @classmethod 
+    def setUpClass(cls):
+        cls.spark = nlp.start()
+
+    def test_sparknlp_session(self):
+        print("Start test_spark_nlp_session")
+        d = nlp.DocumentAssembler().setInputCol("text").setOutputCol("doc")
+        t = nlp.Tokenizer().setInputCols("doc").setOutputCol("tok")
+        c = (
+                    nlp.DeBertaForTokenClassification()
+                    .setInputCols(["tok", "doc"])
+                    .setOutputCol("class")
+                )
+        p = nlp.Pipeline(stages=[d, t])
+        p = nlp.to_nlu_pipe(p)
+        print(p.predict("Hello World"))
+
+    def test_sparknlp_gpu_session(self):
+        print("Start test_spark_nlp_gpu_session")
+        self.spark = nlp.start(hardware_target="gpu")
+        d = nlp.DocumentAssembler().setInputCol("text").setOutputCol("doc")
+        t = nlp.Tokenizer().setInputCols("doc").setOutputCol("tok")
+        c = (
+            nlp.DeBertaForTokenClassification()
+            .setInputCols(["tok", "doc"])
+            .setOutputCol("class")
+        )
+        p = nlp.Pipeline(stages=[d, t])
+        p = nlp.to_nlu_pipe(p)
+        print(p.predict("Hello form John SNow labs"))
+
diff --git a/tests/sessions/visual_lib.py b/tests/sessions/visual_lib.py
new file mode 100644
index 0000000000..9f749dadc1
--- /dev/null
+++ b/tests/sessions/visual_lib.py
@@ -0,0 +1,98 @@
+import os
+import sys
+import unittest
+
+from johnsnowlabs import nlp, visual
+from johnsnowlabs.auto_install.softwares import (SparkHcSoftware,
+                                                 SparkNlpSoftware,
+                                                 SparkOcrSoftware)
+from tests.utils import (clear_installed_jsl_installation,
+                         get_finance_pipeline, get_legal_pipeline)
+
+os.environ["PYSPARK_PYTHON"] = sys.executable
+os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable
+
+def setUpModule():
+    nlp.install(browser_login=False, spark_nlp=True, nlp=False, visual=True,
+                ocr_license=os.environ.get("VALID_LICENSE"),
+                aws_key_id="",
+                aws_access_key=""
+                )
+
+
+def tearDownModule():
+    clear_installed_jsl_installation()
+
+
+class InstallationTestCase(unittest.TestCase):
+    def test_spark_ocr_is_installed(self):
+        installed_products = nlp.check_health()
+        self.assertTrue(installed_products[SparkNlpSoftware])
+        self.assertFalse(installed_products[SparkHcSoftware])
+        self.assertTrue(installed_products[SparkOcrSoftware])
+
+class SparkSessionTestCase(unittest.TestCase):
+    @classmethod
+    def setUp(cls):
+        cls.spark = nlp.start(visual=True)
+
+    def test_healthcare_session(self):
+        print("Test OCR session ...")
+        pdf_to_image = visual.PdfToImage()
+        pdf_to_image.setImageType(visual.ImageType.TYPE_3BYTE_BGR)
+
+        # Detect tables on the page using pretrained model
+        # It can be finetuned for have more accurate results for more specific documents
+        table_detector = visual.ImageTableDetector.pretrained(
+            "general_model_table_detection_v2", "en", "clinical/ocr"
+        )
+        table_detector.setInputCol("image")
+        table_detector.setOutputCol("region")
+
+        # Draw detected region's with table to the page
+        draw_regions = visual.ImageDrawRegions()
+        draw_regions.setInputCol("image")
+        draw_regions.setInputRegionsCol("region")
+        draw_regions.setOutputCol("image_with_regions")
+        draw_regions.setRectColor(visual.Color.red)
+
+        # Extract table regions to separate images
+        splitter = visual.ImageSplitRegions()
+        splitter.setInputCol("image")
+        splitter.setInputRegionsCol("region")
+        splitter.setOutputCol("table_image")
+        splitter.setDropCols("image")
+
+        # Detect cells on the table image
+        cell_detector = visual.ImageTableCellDetector()
+        cell_detector.setInputCol("table_image")
+        cell_detector.setOutputCol("cells")
+        cell_detector.setAlgoType("morphops")
+
+        # Extract text from the detected cells
+        table_recognition = visual.ImageCellsToTextTable()
+        table_recognition.setInputCol("table_image")
+        table_recognition.setCellsCol("cells")
+        table_recognition.setMargin(3)
+        table_recognition.setStrip(True)
+        table_recognition.setOutputCol("table")
+
+        pipeline = nlp.PipelineModel(
+            stages=[
+                pdf_to_image,
+                table_detector,
+                draw_regions,
+                splitter,
+                cell_detector,
+                table_recognition,
+            ]
+        )
+
+        import pkg_resources
+
+        pdf_example = pkg_resources.resource_filename(
+            "sparkocr", "resources/ocr/pdfs/tabular-pdf/data.pdf"
+        )
+        pdf_example_df = self.spark.read.format("binaryFile").load(pdf_example).cache()
+        pipeline.transform(pdf_example_df).show()
+
diff --git a/tests/spark_session.py b/tests/spark_session.py
deleted file mode 100644
index 44463332e9..0000000000
--- a/tests/spark_session.py
+++ /dev/null
@@ -1,267 +0,0 @@
-import sys
-
-from johnsnowlabs import *
-import unittest
-import pkg_resources
-
-
-import os
-
-os.environ["PYSPARK_PYTHON"] = sys.executable
-os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable
-
-
-# finance.ClassifierDLApproach()
-class ImportTestCase(unittest.TestCase):
-    def test_sparknlp_session(self):
-        nlp.start()
-        d = nlp.DocumentAssembler().setInputCol("text").setOutputCol("doc")
-        t = nlp.Tokenizer().setInputCols("doc").setOutputCol("tok")
-        c = (
-            nlp.DeBertaForTokenClassification()
-            .setInputCols(["tok", "doc"])
-            .setOutputCol("class")
-        )
-        p = nlp.Pipeline(stages=[d, t])
-        p = nlp.to_nlu_pipe(p)
-        print(p.predict("Hello World"))
-
-    def test_sparknlp_gpu_session(self):
-        nlp.start(hardware_target="gpu")
-        d = nlp.DocumentAssembler().setInputCol("text").setOutputCol("doc")
-        t = nlp.Tokenizer().setInputCols("doc").setOutputCol("tok")
-        c = (
-            nlp.DeBertaForTokenClassification()
-            .setInputCols(["tok", "doc"])
-            .setOutputCol("class")
-        )
-        p = nlp.Pipeline(stages=[d, t])
-        p = nlp.to_nlu_pipe(p)
-        print(p.predict("Hello form John SNow labs"))
-
-    def test_sparknlp_m1_session(self):
-        import os
-
-        nlp.start(hardware_target="m1")
-        d = nlp.DocumentAssembler().setInputCol("text").setOutputCol("doc")
-        t = nlp.Tokenizer().setInputCols("doc").setOutputCol("tok")
-        c = (
-            nlp.DeBertaForTokenClassification()
-            .pretrained()
-            .setInputCols(["tok", "doc"])
-            .setOutputCol("class")
-        )
-        nlp.UniversalSentenceEncoder.pretrained()
-        p = nlp.Pipeline(stages=[d, t])
-        p = nlp.to_nlu_pipe(p)
-        print(p.predict("Hello form John SNow labs"))
-
-    def test_healthcare_session(self):
-        nlp.start()
-        d = nlp.DocumentAssembler().setInputCol("text").setOutputCol("doc")
-        t = nlp.Tokenizer().setInputCols("doc").setOutputCol("tok")
-        c = (
-            medical.BertForTokenClassification()
-            .pretrained()
-            .setInputCols(["tok", "doc"])
-            .setOutputCol("class")
-        )
-        p = nlp.Pipeline(stages=[d, t, c])
-        p = nlp.to_nlu_pipe(p)
-        print(p.predict("Hello form John SNow labs"))
-
-    def test_ocr_session(self):
-        # Convert pdf to image
-        p = "/home/ckl/old_home/ckl/Documents/freelance/johnsnowlabs_lib/tmp/licenses/4_1_LATEST_OCR_HC_BCK.json"
-        spark = nlp.start(visual=True)
-
-        pdf_to_image = visual.PdfToImage()
-        pdf_to_image.setImageType(visual.ImageType.TYPE_3BYTE_BGR)
-
-        # Detect tables on the page using pretrained model
-        # It can be finetuned for have more accurate results for more specific documents
-        table_detector = visual.ImageTableDetector.pretrained(
-            "general_model_table_detection_v2", "en", "clinical/ocr"
-        )
-        table_detector.setInputCol("image")
-        table_detector.setOutputCol("region")
-
-        # Draw detected region's with table to the page
-        draw_regions = visual.ImageDrawRegions()
-        draw_regions.setInputCol("image")
-        draw_regions.setInputRegionsCol("region")
-        draw_regions.setOutputCol("image_with_regions")
-        draw_regions.setRectColor(visual.Color.red)
-
-        # Extract table regions to separate images
-        splitter = visual.ImageSplitRegions()
-        splitter.setInputCol("image")
-        splitter.setInputRegionsCol("region")
-        splitter.setOutputCol("table_image")
-        splitter.setDropCols("image")
-
-        # Detect cells on the table image
-        cell_detector = visual.ImageTableCellDetector()
-        cell_detector.setInputCol("table_image")
-        cell_detector.setOutputCol("cells")
-        cell_detector.setAlgoType("morphops")
-
-        # Extract text from the detected cells
-        table_recognition = visual.ImageCellsToTextTable()
-        table_recognition.setInputCol("table_image")
-        table_recognition.setCellsCol("cells")
-        table_recognition.setMargin(3)
-        table_recognition.setStrip(True)
-        table_recognition.setOutputCol("table")
-
-        pipeline = nlp.PipelineModel(
-            stages=[
-                pdf_to_image,
-                table_detector,
-                draw_regions,
-                splitter,
-                cell_detector,
-                table_recognition,
-            ]
-        )
-
-        import pkg_resources
-
-        pdf_example = pkg_resources.resource_filename(
-            "sparkocr", "resources/ocr/pdfs/tabular-pdf/data.pdf"
-        )
-        pdf_example_df = spark.read.format("binaryFile").load(pdf_example).cache()
-        pipeline.transform(pdf_example_df).show()
-
-    def test_legal_session(self):
-        nlp.start()
-
-        nlp.Pipeline(self.get_legal_pipe()).fullAnnotate("Shwrm")
-
-    def test_finance_session(self):
-        nlp.start()
-        nlp.Pipeline(self.get_finance_pipe()).fullAnnotate("unit")
-
-    @staticmethod
-    def get_finance_pipe() -> nlp.PipelineModel:
-        documentAssembler = (
-            nlp.DocumentAssembler().setInputCol("text").setOutputCol("ner_chunk")
-        )
-
-        embeddings = (
-            nlp.UniversalSentenceEncoder.pretrained("tfhub_use", "en")
-            .setInputCols("ner_chunk")
-            .setOutputCol("sentence_embeddings")
-        )
-
-        resolver = (
-            finance.SentenceEntityResolverModel.pretrained(
-                "finel_tickers2names", "en", "finance/models"
-            )
-            .setInputCols(["ner_chunk", "sentence_embeddings"])
-            .setOutputCol("name")
-            .setDistanceFunction("EUCLIDEAN")
-        )
-
-        return nlp.PipelineModel(stages=[documentAssembler, embeddings, resolver])
-
-    @staticmethod
-    def get_legal_pipe() -> nlp.PipelineModel:
-        z = legal.ZeroShotRelationExtractionModel.pretrained(
-            "finre_zero_shot", "en", "finance/models"
-        )
-        documentAssembler = (
-            nlp.DocumentAssembler().setInputCol("text").setOutputCol("ner_chunk")
-        )
-
-        embeddings = (
-            nlp.UniversalSentenceEncoder.pretrained("tfhub_use", "en")
-            .setInputCols("ner_chunk")
-            .setOutputCol("sentence_embeddings")
-        )
-
-        resolver = (
-            legal.SentenceEntityResolverModel.pretrained(
-                "legel_crunchbase_companynames", "en", "legal/models"
-            )
-            .setInputCols(["ner_chunk", "sentence_embeddings"])
-            .setOutputCol("name")
-            .setDistanceFunction("EUCLIDEAN")
-        )
-
-        return nlp.PipelineModel(stages=[documentAssembler, embeddings, resolver])
-
-    @staticmethod
-    def get_cross_lib_pipe() -> nlp.PipelineModel:
-        # Returns pipe with one anno per lib
-        # TODO add some fancy OCR DL models?
-        doc2text = visual.DocToText().setInputCol("content").setOutputCol("text")
-        d = nlp.DocumentAssembler().setInputCol("text").setOutputCol("doc")
-        t = nlp.Tokenizer().setInputCols("doc").setOutputCol("tok")
-        # One classifier per NLP lib
-
-        c1 = (
-            medical.BertForTokenClassifier()
-            .pretrained()
-            .setInputCols(["tok", "doc"])
-            .setOutputCol("medical")
-        )
-
-        c2 = (
-            nlp.DeBertaForTokenClassification()
-            .setInputCols(["tok", "doc"])
-            .setOutputCol("opene_source")
-        )
-
-        c3 = (
-            finance.BertForSequenceClassification.pretrained(
-                "finclf_augmented_esg", "en", "finance/models"
-            )
-            .setInputCols(["tok", "doc"])
-            .setOutputCol("finance")
-        )
-
-        c4 = (
-            legal.BertForSequenceClassification.pretrained(
-                "legclf_bert_judgements_agent", "en", "legal/models"
-            )
-            .setInputCols(["tok", "doc"])
-            .setOutputCol("legal")
-        )
-
-        return nlp.Pipeline(stages=[doc2text, d, t, c1, c2, c3, c4])
-
-    def test_simple_cross_lib(self):
-        spark = nlp.start()
-        doc_example = pkg_resources.resource_filename(
-            "sparkocr", "resources/ocr/docs/doc2.docx"
-        )
-        df = spark.read.format("binaryFile").load(doc_example).cache()
-        self.get_cross_lib_pipe().fit(df).transform(df).show()
-
-    def test_simple_cross_lib_gpu(self):
-        spark = nlp.start(hardware_target="gpu")
-        doc_example = pkg_resources.resource_filename(
-            "sparkocr", "resources/ocr/docs/doc2.docx"
-        )
-        df = spark.read.format("binaryFile").load(doc_example).cache()
-        self.get_cross_lib_pipe().fit(df).transform(df).show()
-
-    def test_cross_engine_session(self):
-        import itertools
-
-        # Test every combination of jars with CPU jars
-        for c in range(3):
-            p = itertools.combinations(["nlp-cpu", "ocr", "hc"], c)
-            for pp in p:
-                print(pp)
-
-        # Test every combination of jars with GPU jars
-        for c in range(3):
-            p = itertools.combinations(["nlp-gpu", "ocr", "hc"], c)
-            for pp in p:
-                print(pp)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/utils.py b/tests/utils.py
new file mode 100644
index 0000000000..1e78e80b32
--- /dev/null
+++ b/tests/utils.py
@@ -0,0 +1,98 @@
+import shutil
+
+import pip
+
+from johnsnowlabs import finance, legal, medical, nlp, settings, visual
+
+
+def clear_installed_jsl_installation():
+    shutil.rmtree(settings.root_dir, ignore_errors=True)
+    pip.main(["uninstall", "-y", "johnsnowlabs"])
+    pip.main(["uninstall", "-y", "nlu"])
+    pip.main(["uninstall", "-y", "spark-nlp"])
+    pip.main(["uninstall", "-y", "spark-nlp-jsl"])
+    pip.main(["uninstall", "-y", "spark-ocr"])
+
+def get_finance_pipeline():
+    documentAssembler = (
+        nlp.DocumentAssembler().setInputCol("text").setOutputCol("ner_chunk")
+    )
+
+    embeddings = (
+        nlp.UniversalSentenceEncoder.pretrained("tfhub_use", "en")
+        .setInputCols("ner_chunk")
+        .setOutputCol("sentence_embeddings")
+    )
+
+    resolver = (
+        finance.SentenceEntityResolverModel.pretrained(
+            "finel_tickers2names", "en", "finance/models"
+        )
+        .setInputCols(["ner_chunk", "sentence_embeddings"])
+        .setOutputCol("name")
+        .setDistanceFunction("EUCLIDEAN")
+    )
+
+    return nlp.PipelineModel(stages=[documentAssembler, embeddings, resolver])
+
+def get_legal_pipeline() -> nlp.PipelineModel:
+
+    documentAssembler = (
+        nlp.DocumentAssembler().setInputCol("text").setOutputCol("ner_chunk")
+    )
+
+    embeddings = (
+        nlp.UniversalSentenceEncoder.pretrained("tfhub_use", "en")
+        .setInputCols("ner_chunk")
+        .setOutputCol("sentence_embeddings")
+    )
+
+    resolver = (
+        legal.SentenceEntityResolverModel.pretrained(
+            "legel_crunchbase_companynames", "en", "legal/models"
+        )
+        .setInputCols(["ner_chunk", "sentence_embeddings"])
+        .setOutputCol("name")
+        .setDistanceFunction("EUCLIDEAN")
+    )
+
+    return nlp.PipelineModel(stages=[documentAssembler, embeddings, resolver])
+
+def get_cross_lib_pipe() -> nlp.PipelineModel:
+    # Returns pipe with one anno per lib
+    # TODO add some fancy OCR DL models?
+    doc2text = visual.DocToText().setInputCol("content").setOutputCol("text")
+    d = nlp.DocumentAssembler().setInputCol("text").setOutputCol("doc")
+    t = nlp.Tokenizer().setInputCols("doc").setOutputCol("tok")
+    # One classifier per NLP lib
+
+    c1 = (
+        medical.BertForTokenClassifier()
+        .pretrained()
+        .setInputCols(["tok", "doc"])
+        .setOutputCol("medical")
+    )
+
+    c2 = (
+        nlp.DeBertaForTokenClassification()
+        .setInputCols(["tok", "doc"])
+        .setOutputCol("opene_source")
+    )
+
+    c3 = (
+        finance.BertForSequenceClassification.pretrained(
+            "finclf_augmented_esg", "en", "finance/models"
+        )
+        .setInputCols(["tok", "doc"])
+        .setOutputCol("finance")
+    )
+
+    c4 = (
+        legal.BertForSequenceClassification.pretrained(
+            "legclf_bert_judgements_agent", "en", "legal/models"
+        )
+        .setInputCols(["tok", "doc"])
+        .setOutputCol("legal")
+    )
+
+    return nlp.Pipeline(stages=[doc2text, d, t, c1, c2, c3, c4])