diff --git a/johnsnowlabs/auto_install/install_flow.py b/johnsnowlabs/auto_install/install_flow.py index 6fbf45d334..055da41f0e 100644 --- a/johnsnowlabs/auto_install/install_flow.py +++ b/johnsnowlabs/auto_install/install_flow.py @@ -47,12 +47,6 @@ def install( leg_license: Optional[str] = None, aws_access_key: Optional[str] = None, aws_key_id: Optional[str] = None, - # -- Databricks auth flows & Install Target -- - databricks_cluster_id: Optional[str] = None, - databricks_token: Optional[str] = None, - databricks_host: Optional[str] = None, - databricks_password: Optional[str] = None, - databricks_email: Optional[str] = None, # -- Install Params -- # Install Target python_exec_path: str = sys.executable, @@ -77,38 +71,11 @@ def install( py_install_type: str = PyInstallTypes.wheel.value, only_refresh_credentials: bool = False, refresh_install: bool = False, - # -- Databricks Cluster Creation Params -- - block_till_cluster_ready=True, - num_workers=1, - cluster_name=settings.db_cluster_name, - node_type_id=settings.db_node_type_id, - driver_node_type_id=settings.db_driver_node_type, - spark_env_vars=None, - autotermination_minutes=60, - spark_version=settings.db_spark_version, - spark_conf=None, - auto_scale=None, - aws_attributes=None, - ssh_public_keys=None, - custom_tags=None, - cluster_log_conf=None, - enable_elastic_disk=None, - cluster_source=None, - instance_pool_id=None, - headers=None, - clean_cluster=True, - write_db_credentials=True, - extra_pip_installs: Optional[List[str]] = None, + spark_version=settings.raw_version_pyspark, ): if refresh_install and os.path.exists(settings.root_dir): print("๐Ÿงน Cleaning up old JSL Home in ", settings.root_dir) shutil.rmtree(settings.root_dir) - if clean_cluster and databricks_host: - dbfs_rm( - get_db_client_for_token(databricks_host, databricks_token), - settings.dbfs_home_dir, - recursive=True, - ) # Input Validation py_install_type = PyInstallTypes.from_str(py_install_type) @@ -159,65 +126,8 @@ def install( spark_nlp=spark_nlp, ) - # Databricks Install - if databricks_host and databricks_token and not offline: - print( - "nlp.install() for databricks will be deprecated next release, instead use nlp.install_to_databricks()" - ) - suite = get_install_suite_from_jsl_home( - jvm_hardware_target=hardware_platform, - visual=visual, - nlp=nlp, - spark_nlp=spark_nlp, - ) - if databricks_cluster_id: - install_jsl_suite_to_cluster( - db=get_db_client_for_token(databricks_host, databricks_token), - install_suite=suite, - cluster_id=databricks_cluster_id, - medical_nlp=nlp, - spark_nlp=spark_nlp, - visual=visual, - ) - if extra_pip_installs: - install_list_of_pypi_ref_to_cluster( - db=get_db_client_for_token(databricks_host, databricks_token), - cluster_id=databricks_cluster_id, - pip_installs=extra_pip_installs, - ) - - else: - return create_cluster( - medical_nlp=nlp, - spark_nlp=spark_nlp, - visual=visual, - databricks_host=databricks_host, - databricks_token=databricks_token, - install_suite=suite, - block_till_cluster_ready=block_till_cluster_ready, - num_workers=num_workers, - cluster_name=cluster_name, - node_type_id=node_type_id, - driver_node_type_id=driver_node_type_id, - spark_env_vars=spark_env_vars, - autotermination_minutes=autotermination_minutes, - spark_version=spark_version, - spark_conf=spark_conf, - auto_scale=auto_scale, - aws_attributes=aws_attributes, - ssh_public_keys=ssh_public_keys, - custom_tags=custom_tags, - cluster_log_conf=cluster_log_conf, - enable_elastic_disk=enable_elastic_disk, - cluster_source=cluster_source, - instance_pool_id=instance_pool_id, - headers=headers, - write_db_credentials=write_db_credentials, - extra_pip_installs=extra_pip_installs, - ) - # Local Py-Install - elif not slim_install: + if not slim_install: check_and_install_dependencies( product=product, secrets=secrets, @@ -230,6 +140,7 @@ def install( visual=visual, spark_nlp=spark_nlp, enterpise_nlp=nlp, + spark_version=spark_version, ) diff --git a/johnsnowlabs/auto_install/install_software.py b/johnsnowlabs/auto_install/install_software.py index d86b3278b1..600fc641b4 100644 --- a/johnsnowlabs/auto_install/install_software.py +++ b/johnsnowlabs/auto_install/install_software.py @@ -4,6 +4,8 @@ from pathlib import Path from typing import Dict, Set +from johnsnowlabs.py_models.lib_version import LibVersion + from johnsnowlabs import settings from johnsnowlabs.abstract_base.software_product import AbstractSoftwareProduct from johnsnowlabs.auto_install.softwares import Software @@ -26,6 +28,7 @@ def check_and_install_dependencies( visual: bool = False, spark_nlp: bool = True, enterpise_nlp: bool = True, + spark_version: str = None, ): """ Iterates the dependency DAG in DFS order for input product and downloads installs all dependencies @@ -68,6 +71,12 @@ def check_and_install_dependencies( :param install_licensed: install licensed products if license permits it if True, otherwise not sparknlp_to_latest: for some releases we might not want to go to the latest spark release """ + if spark_version is not None: + # Ignore reccomended settings + spark_version = LibVersion(spark_version) + LatestCompatibleProductVersion.pyspark.value.minor = spark_version.minor + LatestCompatibleProductVersion.pyspark.value.major = spark_version.major + LatestCompatibleProductVersion.pyspark.value.patch = spark_version.patch import site from importlib import reload @@ -180,10 +189,11 @@ def check_and_install_dependencies( download_folder=offline_py_dir, include_dependencies=include_dependencies, ) + if not get_pip_lib_version("pyspark", py_exec=python_exec_path).equals( LatestCompatibleProductVersion.pyspark.value ): - # Re-install NLP incase some other library up/downgraded it while we installed it + # Re-install Pyspark incase some other library up/downgraded it while we installed it install_results[Software.spark_nlp] = Software.pyspark.install( re_install=True, version=LatestCompatibleProductVersion.pyspark.value, @@ -222,7 +232,6 @@ def check_and_install_dependencies( to_reload.append(PIL) for mod in to_reload: reload(mod) - else: print(f"๐Ÿ‘Œ Everything is already installed, no changes made")