Skip to content

Commit

Permalink
Improve offline latency by skipping checks for model updates
Browse files Browse the repository at this point in the history
  • Loading branch information
jncraton committed Feb 24, 2024
1 parent cd77973 commit 7175c04
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 3 deletions.
4 changes: 4 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## 0.18.0

### Added

- Improve offline latency by skipping checks for model updates

### Fixed

- Correct issue causing `choices` to be scored improperly
Expand Down
19 changes: 16 additions & 3 deletions languagemodels/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import re
from functools import partial
from huggingface_hub import hf_hub_download, snapshot_download
from huggingface_hub.utils import LocalEntryNotFoundError
from tokenizers import Tokenizer
import ctranslate2

Expand Down Expand Up @@ -34,7 +36,13 @@ def get_model_info(model_type="instruct"):
def initialize_tokenizer(model_type, model_name):
model_info = get_model_info(model_type)

tok_config = hf_hub_download(model_info["path"], "tokenizer.json")
get_model = partial(hf_hub_download, model_info["path"], "tokenizer.json")

try:
tok_config = get_model(local_files_only=True)
except LocalEntryNotFoundError:
tok_config = get_model()

tokenizer = Tokenizer.from_file(tok_config)

if model_type == "embedding":
Expand All @@ -47,8 +55,13 @@ def initialize_tokenizer(model_type, model_name):
def initialize_model(model_type, model_name):
model_info = get_model_info(model_type)

path = snapshot_download(model_info["path"], max_workers=1,
allow_patterns=["*.bin", "*.txt", "*.json"])
get_path = partial(snapshot_download, model_info["path"], max_workers=1,
allow_patterns=["*.bin", "*.txt", "*.json"])

try:
path = get_path(local_files_only=True)
except LocalEntryNotFoundError:
path = get_path()

if model_info["architecture"] == "encoder-only-transformer":
return ctranslate2.Encoder(path, "cpu", compute_type="int8", )
Expand Down

0 comments on commit 7175c04

Please sign in to comment.