From 05062e716e20adbaf9b4750fe302946c31d71fc8 Mon Sep 17 00:00:00 2001 From: Jon Craton Date: Thu, 29 Aug 2024 15:01:06 -0400 Subject: [PATCH] Download entire model upfront This change will download all model files initially, even if we only need the tokenizer right now. This simplifies offline caching and eliminates a bug related to missing model files. --- languagemodels/models.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/languagemodels/models.py b/languagemodels/models.py index 4a2f66c..46a1f10 100644 --- a/languagemodels/models.py +++ b/languagemodels/models.py @@ -34,7 +34,9 @@ def get_model_info(model_type="instruct"): def initialize_tokenizer(model_type, model_name): model_info = get_model_info(model_type) - tok_config = hf_hub_download(model_info["path"], "tokenizer.json") + tok_config = hf_hub_download( + model_info["path"], "tokenizer.json", local_files_only=True + ) tokenizer = Tokenizer.from_file(tok_config) if model_type == "embedding": @@ -44,7 +46,7 @@ def initialize_tokenizer(model_type, model_name): return tokenizer -def initialize_model(model_type, model_name): +def initialize_model(model_type, model_name, tokenizer_only=False): model_info = get_model_info(model_type) allowed = ["*.bin", "*.txt", "*.json"] @@ -67,6 +69,9 @@ def initialize_model(model_type, model_name): model_info["path"], max_workers=1, allow_patterns=allowed, revision=rev ) + if tokenizer_only: + return None + if model_info["architecture"] == "encoder-only-transformer": return ctranslate2.Encoder( path, @@ -111,10 +116,8 @@ def get_model(model_type, tokenizer_only=False): pass if model_name not in modelcache: + model = initialize_model(model_type, model_name, tokenizer_only) tokenizer = initialize_tokenizer(model_type, model_name) - model = None - if not tokenizer_only: - model = initialize_model(model_type, model_name) modelcache[model_name] = (tokenizer, model) elif not tokenizer_only: # Make sure model is loaded if we've never loaded it