diff --git a/changelog.md b/changelog.md index 9f7bb02..af8a18e 100644 --- a/changelog.md +++ b/changelog.md @@ -5,6 +5,7 @@ ### Changed - Skip checking for model updates +- Download entire model upfront even if we only need the tokenizer initially ## 0.20 - 2024-04-25 diff --git a/languagemodels/models.py b/languagemodels/models.py index 4a2f66c..ca58b1b 100644 --- a/languagemodels/models.py +++ b/languagemodels/models.py @@ -33,8 +33,11 @@ def get_model_info(model_type="instruct"): def initialize_tokenizer(model_type, model_name): model_info = get_model_info(model_type) + rev = model_info.get("revision", None) - tok_config = hf_hub_download(model_info["path"], "tokenizer.json") + tok_config = hf_hub_download( + model_info["path"], "tokenizer.json", revision=rev, local_files_only=True + ) tokenizer = Tokenizer.from_file(tok_config) if model_type == "embedding": @@ -44,7 +47,7 @@ def initialize_tokenizer(model_type, model_name): return tokenizer -def initialize_model(model_type, model_name): +def initialize_model(model_type, model_name, tokenizer_only=False): model_info = get_model_info(model_type) allowed = ["*.bin", "*.txt", "*.json"] @@ -67,6 +70,9 @@ def initialize_model(model_type, model_name): model_info["path"], max_workers=1, allow_patterns=allowed, revision=rev ) + if tokenizer_only: + return None + if model_info["architecture"] == "encoder-only-transformer": return ctranslate2.Encoder( path, @@ -111,10 +117,8 @@ def get_model(model_type, tokenizer_only=False): pass if model_name not in modelcache: + model = initialize_model(model_type, model_name, tokenizer_only) tokenizer = initialize_tokenizer(model_type, model_name) - model = None - if not tokenizer_only: - model = initialize_model(model_type, model_name) modelcache[model_name] = (tokenizer, model) elif not tokenizer_only: # Make sure model is loaded if we've never loaded it