From 9857154aa3580c2a1cc7208f33d919ebee12e9be Mon Sep 17 00:00:00 2001 From: Jon Craton Date: Thu, 29 Aug 2024 13:45:21 -0400 Subject: [PATCH] Avoid checking for model updates --- changelog.md | 6 ++++++ languagemodels/models.py | 22 +++++++++++++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/changelog.md b/changelog.md index aa8098f..9f7bb02 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,11 @@ # Changelog +## 0.21 + +### Changed + +- Skip checking for model updates + ## 0.20 - 2024-04-25 ### Changed diff --git a/languagemodels/models.py b/languagemodels/models.py index 4367dfd..4a2f66c 100644 --- a/languagemodels/models.py +++ b/languagemodels/models.py @@ -47,9 +47,25 @@ def initialize_tokenizer(model_type, model_name): def initialize_model(model_type, model_name): model_info = get_model_info(model_type) - path = snapshot_download( - model_info["path"], max_workers=1, allow_patterns=["*.bin", "*.txt", "*.json"] - ) + allowed = ["*.bin", "*.txt", "*.json"] + rev = model_info.get("revision", None) + + # snapshot_download checks for updates by default + # This can cause significant lag in offline usecases or high latency networks + # To avoid this penalty, we try to use the local cache first. + # If the files are not available, then we attempt a download + try: + path = snapshot_download( + model_info["path"], + max_workers=1, + allow_patterns=allowed, + revision=rev, + local_files_only=True, + ) + except FileNotFoundError: + path = snapshot_download( + model_info["path"], max_workers=1, allow_patterns=allowed, revision=rev + ) if model_info["architecture"] == "encoder-only-transformer": return ctranslate2.Encoder(