Skip to content

Commit

Permalink
Release/550 (#1567)
Browse files Browse the repository at this point in the history
* spark-nlp-jsl==5.5.0 requirements  (#1544)

* bump versions

* implemented spark-nlp-jsl==550 requirements

---------

Co-authored-by: C-K-Loan <[email protected]>

* bump pydantic support to > 2.X, various refactors and deprecation fixes (#1560)

* 550 BugFix (#1563)

* fix listener config if nlp=False

* update conditions

* fix deprecated @validator decerator (#1565)

* Pydantic2 migration fix env var model parsing (#1566)

* fix deprecated @validator decerator

* fix env var pydantic model creation bug

* bump verisons

---------

Co-authored-by: Mehmet Butgul <[email protected]>
  • Loading branch information
C-K-Loan and mehmetbutgul authored Oct 23, 2024
1 parent a34ce3f commit 405a223
Show file tree
Hide file tree
Showing 12 changed files with 138 additions and 47 deletions.
21 changes: 21 additions & 0 deletions docs/en/jsl/jsl_release_notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,27 @@ sidebar:
See [Github Releases](https://github.com/JohnSnowLabs/johnsnowlabs/releases) for detailed information on Release History and Features



## 5.5.0
Release date: 10-23-2024

The John Snow Labs 5.5.0 Library released with the following pre-installed and recommended dependencies

{:.table-model-big}
| Library | Version |
|-----------------------------------------------------------------------------------------|------------|
| [Visual NLP](https://nlp.johnsnowlabs.com/docs/en/spark_ocr_versions/ocr_release_notes) | `5.4.1` |
| [Enterprise NLP](https://nlp.johnsnowlabs.com/docs/en/licensed_annotators) | `5.5.0` |
| [Finance NLP](https://nlp.johnsnowlabs.com/docs/en/financial_release_notes) | `1.X.X` |
| [Legal NLP](https://nlp.johnsnowlabs.com/docs/en/legal_release_notes) | `1.X.X` |
| [NLU](https://github.com/JohnSnowLabs/nlu/releases) | `5.4.1` |
| [Spark-NLP-Display](https://sparknlp.org/docs/en/display) | `5.0` |
| [Spark-NLP](https://github.com/JohnSnowLabs/spark-nlp/releases/) | `5.5.0` |
| [Pyspark](https://spark.apache.org/docs/latest/api/python/) | `3.4.0` |




## 5.4.5
Release date: 9-27-2024

Expand Down
27 changes: 18 additions & 9 deletions johnsnowlabs/auto_install/jsl_home.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,15 @@ def download_deps_and_create_info(
overwrite=False,
):
"""Download a list of deps to given lib_dir folder and creates info_file at info_file_path."""
info, old_info = {}, {}
info, old_info = {}, None
if os.path.exists(info_file_path):
# keep old infos, we assume they are up-to-date and compatible
old_info = InstallFolder.parse_file(info_file_path)
if os.path.join("java_installs","info.json") in info_file_path:
old_info = InstallFolder.java_folder_from_home()
elif os.path.join("py_installs","info.json") in info_file_path:
old_info = InstallFolder.py_folder_from_home()

for p in deps:

# print_prefix = Software.for_name(p.product_name).logo
print_prefix = ProductLogo.from_name(p.product_name.name).value
if p.dependency_type in JvmHardwareTarget:
Expand Down Expand Up @@ -88,12 +90,19 @@ def download_deps_and_create_info(
install_type=p.dependency_type.value,
product_version=p.dependency_version.as_str(),
)
info[p.file_name].compatible_spark_version = p.spark_version.value.as_str()
info[p.file_name].product_version = p.dependency_version.as_str()

if info:
info = InstallFolder(**{"infos": info})
if old_info:
info.infos.update(old_info.infos)
info.write(info_file_path, indent=4)
with open(info_file_path, "w") as f:
for k, v in info.infos.items():
v.product_version = str(v.product_version)
v.compatible_spark_version = str(v.compatible_spark_version)

f.write(info.model_dump_json())

def setup_jsl_home(
secrets: Optional[JslSecrets] = None,
Expand Down Expand Up @@ -183,9 +192,10 @@ def setup_jsl_home(
java_deps, settings.java_dir, settings.java_info_file, overwrite
)

RootInfo(version=settings.raw_version_jsl_lib, run_from=sys.executable).write(
settings.root_info_file, indent=4
)
root_info = RootInfo(version=settings.raw_version_jsl_lib, run_from=sys.executable)
root_info.version = root_info.version.as_str()
with open(settings.root_info_file, "w") as f:
f.write(root_info.model_dump_json())
print(f"🙆 JSL Home setup in {settings.root_dir}")

return
Expand Down Expand Up @@ -258,9 +268,8 @@ def get_install_suite_from_jsl_home(
if os.path.exists(settings.py_info_file):
py_folder = InstallFolder.py_folder_from_home()

info = RootInfo.parse_file(settings.root_info_file)
info = RootInfo.get_from_jsl_home()
# Read all dependencies from local ~/.johnsnowlabs folder

suite = InstallSuite(
nlp=LocalPy4JLib(
java_lib=java_folder.get_product_entry(ProductName.nlp, jvm_hardware_target)
Expand Down
2 changes: 2 additions & 0 deletions johnsnowlabs/finance.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@
LargeFewShotClassifierModel,
Mapper2Chunk,
DocumentFiltererByNER,
REChunkMerger,
ContextualEntityFilterer,
)

from sparknlp_jsl.modelTracer import ModelTracer
Expand Down
2 changes: 2 additions & 0 deletions johnsnowlabs/legal.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@
LargeFewShotClassifierModel,
Mapper2Chunk,
DocumentFiltererByNER,
REChunkMerger,
ContextualEntityFilterer,
)
from sparknlp_jsl.modelTracer import ModelTracer
from sparknlp_jsl.pipeline_tracer import PipelineTracer
Expand Down
3 changes: 3 additions & 0 deletions johnsnowlabs/medical.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@
LargeFewShotClassifierModel,
Mapper2Chunk,
DocumentFiltererByNER,
REChunkMerger,
ContextualEntityFilterer,
)
from sparknlp_jsl.structured_deidentification import StructuredDeidentification
from sparknlp_jsl.text_to_documents_columns import TextToDocumentsColumns
Expand Down Expand Up @@ -130,6 +132,7 @@
TextMatcherInternalModel as TextMatcherModel,
RegexMatcherInternal as RegexMatcher,
RegexMatcherInternalModel as RegexMatcherModel,
MedicalLLM as AutoGGUFModel,
)
from sparknlp_jsl.compatibility import Compatibility
from sparknlp_jsl.pretrained import InternalResourceDownloader
Expand Down
26 changes: 22 additions & 4 deletions johnsnowlabs/py_models/install_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from johnsnowlabs.py_models.lib_version import LibVersion
from johnsnowlabs.utils.enums import JvmHardwareTarget, ProductName, PyInstallTypes


import json
class InstallFileInfoBase(WritableBaseModel):
file_name: str
product: ProductName
Expand Down Expand Up @@ -71,7 +71,12 @@ def __init__(self, *args, **kwargs):

@staticmethod
def get_from_jsl_home():
return RootInfo.parse_file(settings.root_info_file)
import json
if os.path.exists(settings.root_info_file):
with open(settings.root_info_file, "r") as f:
json_data = json.loads(f.read())
return RootInfo(run_from=json_data["run_from"],
version=json_data["version"])


class InstallFolder(WritableBaseModel):
Expand All @@ -93,13 +98,26 @@ def get_product_entry(
@staticmethod
def java_folder_from_home():
if os.path.exists(settings.java_info_file):
return InstallFolder.parse_file(settings.java_info_file)
with open(settings.java_info_file, "r") as f:
json_data = json.loads(f.read())
infos = {}
for k,v in json_data['infos'].items():
if k.endswith(".jar"):
infos[k] = JvmInstallInfo(**v)
return InstallFolder(infos=infos)
return False

@staticmethod
def py_folder_from_home():
if os.path.exists(settings.py_info_file):
return InstallFolder.parse_file(settings.py_info_file)
with open(settings.py_info_file, "r") as f:
json_data = json.loads(f.read())
infos = {}
for k,v in json_data['infos'].items():
if k.endswith(".whl") or k.endswith(".tar.gz"):
infos[k] = PyInstallInfo(**v)

return InstallFolder(infos=infos)
return False


Expand Down
80 changes: 55 additions & 25 deletions johnsnowlabs/py_models/jsl_secrets.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pathlib import Path
from typing import Dict, List, Optional, Union

from pydantic import validator
from pydantic import field_validator

from johnsnowlabs import settings
from johnsnowlabs.abstract_base.pydantic_model import WritableBaseModel
Expand Down Expand Up @@ -62,17 +62,17 @@ class JslSecrets(WritableBaseModel):
methods for reading/storing found_secrets and managing .jslhome folder
"""

HC_SECRET: Secret = None
HC_LICENSE: Secret = None
HC_SECRET: Optional[str] = None
HC_LICENSE: Optional[str] = None
HC_VERSION: Optional[LibVersionIdentifier] = None
OCR_SECRET: Secret = None
OCR_LICENSE: Secret = None
OCR_SECRET: Optional[str] = None
OCR_LICENSE: Optional[str] = None
OCR_VERSION: Optional[LibVersionIdentifier] = None
AWS_ACCESS_KEY_ID: Secret = None
AWS_SECRET_ACCESS_KEY: Secret = None
AWS_ACCESS_KEY_ID: Optional[str] = None
AWS_SECRET_ACCESS_KEY: Optional[str] = None
NLP_VERSION: Optional[LibVersionIdentifier] = None
JSL_LEGAL_LICENSE: Secret = None
JSL_FINANCE_LICENSE: Secret = None
JSL_LEGAL_LICENSE: Optional[str] = None
JSL_FINANCE_LICENSE: Optional[str] = None

@staticmethod
def raise_invalid_version():
Expand All @@ -82,7 +82,7 @@ def raise_invalid_version():
)
raise ValueError("Invalid secrets")

@validator("HC_SECRET")
@field_validator("HC_SECRET")
def hc_version_check(cls, HC_SECRET):
global hc_validation_logged
try:
Expand Down Expand Up @@ -114,7 +114,7 @@ def is_ocr_secret_correct_version(ocr_secret: Optional[str]) -> bool:
def is_hc_secret_correct_version(hc_secret: Optional[str]) -> bool:
return hc_secret and hc_secret.split("-")[0] == settings.raw_version_medical

@validator("OCR_SECRET")
@field_validator("OCR_SECRET")
def ocr_version_check(cls, OCR_SECRET):
global ocr_validation_logged
try:
Expand All @@ -123,6 +123,8 @@ def ocr_version_check(cls, OCR_SECRET):
and not ocr_validation_logged
):
ocr_validation_logged = True
if not OCR_SECRET:
return OCR_SECRET
print(
f"🚨 Outdated OCR Secrets in license file. Version={(OCR_SECRET.split('-')[0] if OCR_SECRET else None)} but should be Version={settings.raw_version_ocr}"
)
Expand Down Expand Up @@ -424,6 +426,13 @@ def search_env_vars() -> Union["JslSecrets", bool]:
]
):
print("👌 License detected in Environment Variables")
if isinstance(hc_version,str):
hc_version = LibVersionIdentifier(hc_version)
if isinstance(ocr_version,str):
ocr_version = LibVersionIdentifier(ocr_version)
if isinstance(nlp_version,str):
nlp_version = LibVersionIdentifier(nlp_version)

return JslSecrets(
HC_SECRET=hc_secret,
HC_LICENSE=hc_license,
Expand Down Expand Up @@ -631,6 +640,13 @@ def from_json_dict(secrets, secrets_metadata: Optional = None) -> "JslSecrets":
secrets["JSL_FINANCE_LICENSE"] if "JSL_FINANCE_LICENSE" in secrets else None
)

if isinstance(hc_version,str):
hc_version = LibVersionIdentifier(hc_version)
if isinstance(ocr_version,str):
ocr_version = LibVersionIdentifier(ocr_version)
if isinstance(nlp_version,str):
nlp_version = LibVersionIdentifier(nlp_version)

return JslSecrets(
HC_SECRET=hc_secret,
HC_LICENSE=hc_license,
Expand Down Expand Up @@ -659,8 +675,9 @@ def from_jsl_home(
return False

try:

# Try/Catch incase we get validation errors from outdated files
license_infos = LicenseInfos.parse_file(settings.creds_info_file)
license_infos = LicenseInfos.from_home()
if log and not already_logged:
already_logged = True
print(
Expand Down Expand Up @@ -692,7 +709,7 @@ def update_outdated_lib_secrets(
for license in os.listdir(settings.license_dir):
if license == "info.json":
continue
secrets = JslSecrets.parse_file(os.path.join(settings.license_dir, license))
secrets = JslSecrets.from_json_file_path(os.path.join(settings.license_dir, license))
if (
secrets.HC_SECRET
and hc_secrets
Expand Down Expand Up @@ -768,7 +785,7 @@ def are_credentials_known(found_secrets: "JslSecrets") -> bool:
# Return True, if secrets are already stored in JSL-Home, otherwise False
Path(settings.py_dir).mkdir(parents=True, exist_ok=True)
if os.path.exists(settings.creds_info_file):
license_infos = LicenseInfos.parse_file(settings.creds_info_file)
license_infos = LicenseInfos.from_home()
else:
# If license dir did not exist yet, secrets are certainly new
return False
Expand All @@ -786,13 +803,13 @@ def are_lib_secrets_an_upgrade(found_secrets: "JslSecrets") -> bool:
# Return True, if lib are newer than existing ones, if yes upgrade locally stored secrets
Path(settings.py_dir).mkdir(parents=True, exist_ok=True)
if os.path.exists(settings.creds_info_file):
license_infos = LicenseInfos.parse_file(settings.creds_info_file)
license_infos = LicenseInfos.from_home()
else:
# If license dir did not exist yet, secrets are certainly new
return False

# if any stored secrets equal to found_secrets, then we already know them
# check OCR secrets

if found_secrets.HC_SECRET:
if any(
map(
Expand Down Expand Up @@ -837,7 +854,7 @@ def store_in_jsl_home_if_new(secrets: "JslSecrets") -> None:
file_name = file_name + "_".join(products) + f".json"

if os.path.exists(settings.creds_info_file):
license_infos = LicenseInfos.parse_file(settings.creds_info_file)
license_infos = LicenseInfos.from_home()
file_name = file_name.format(number=str(len(license_infos.infos)))
license_info = LicenseInfo(
jsl_secrets=secrets, products=products, id=str(len(license_infos.infos))
Expand All @@ -848,13 +865,16 @@ def store_in_jsl_home_if_new(secrets: "JslSecrets") -> None:
secrets.write(out_dir)
print(f"📋 Stored new John Snow Labs License in {out_dir}")
else:
file_name = file_name.format(number="0")
license_info = LicenseInfo(jsl_secrets=secrets, products=products, id="0")
LicenseInfos(infos={file_name: license_info}).write(
settings.creds_info_file
)
license_infos = LicenseInfos(infos={file_name: license_info})
with open(settings.creds_info_file, "w") as f:
f.write(license_infos.model_dump_json())

file_name = file_name.format(number="0")
out_dir = os.path.join(settings.license_dir, file_name)
secrets.write(out_dir)
with open(out_dir, "w") as f:
f.write(secrets.model_dump_json())
#secrets.write(out_dir)
print(f"📋 Stored John Snow Labs License in {out_dir}")
# We might load again JSL-Secrets from local
already_logged = True
Expand All @@ -877,6 +897,7 @@ class LicenseInfo(WritableBaseModel):
products: List[ProductName]



class LicenseInfos(WritableBaseModel):
"""Representation of a LicenseInfo in ~/.johnsnowlabs/licenses/info.json
Maps file_name to LicenseInfo
Expand All @@ -886,6 +907,15 @@ class LicenseInfos(WritableBaseModel):

@staticmethod
def from_home() -> Optional["LicenseInfos"]:
if os.path.exists(settings.creds_info_file):
return LicenseInfos.parse_file(settings.creds_info_file)
return None
if not os.path.exists(settings.creds_info_file):
return None
data = json.load(open(settings.creds_info_file))
infos = {}
for info in data['infos']:
secret = JslSecrets.from_json_dict(data['infos'][info]['jsl_secrets'])
i = LicenseInfo(id=info, jsl_secrets=secret,
products=data['infos'][info]['products'],
)
infos[info] = i
license_infos = LicenseInfos(infos=infos)
return license_infos
3 changes: 3 additions & 0 deletions johnsnowlabs/py_models/lib_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,6 @@ def as_str(self) -> str:
"""Return LibVersion object as canonical str representation"""
# We filter out all values != None soo version checks match up
return ".".join(filter(lambda x: x, [self.major, self.minor, self.patch]))

def __str__(self):
return self.as_str()
Loading

0 comments on commit 405a223

Please sign in to comment.