Skip to content

Commit

Permalink
change exception
Browse files Browse the repository at this point in the history
  • Loading branch information
strutive07 authored Dec 13, 2023
1 parent c3b1c12 commit 35e95b6
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ def __init__(self, params: Params, fname_tokenizer: Path) -> None:

try:
self.tokenizer = AutoTokenizer.from_pretrained(str(fname_tokenizer), trust_remote_code=True)
except Exception:
except ValueError:
self.tokenizer = AutoTokenizer.from_pretrained(str(fname_tokenizer), use_fast=False, trust_remote_code=True)

self.added_tokens_dict: OrderedDict[str, int] = OrderedDict()
Expand Down Expand Up @@ -400,25 +400,25 @@ def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
def get_vocab_type(self) -> str:
path_candidates = []
vocab_file = "tokenizer.model"
path_candidates.append(vocab_file)
path_candidate = find_vocab_file_path(self.fname_tokenizer, vocab_file)
if path_candidate is not None:
return "llama"

path_candidates.append(path_candidate)
vocab_file = "vocab.json"
path_candidates.append(vocab_file)
path_candidate = find_vocab_file_path(self.fname_tokenizer, vocab_file)
if path_candidate is not None:
return "gpt2"

path_candidates.append(path_candidate)
vocab_file = "tokenizer.json"
path_candidates.append(vocab_file)
path_candidate = find_vocab_file_path(self.fname_tokenizer, vocab_file)
if path_candidate:
if not self.has_newline_token():
return "gpt2"
return "llama"

path_candidates.append(path_candidate)
raise FileNotFoundError(
f"Could not find {path_candidates} in {self.fname_tokenizer} or its parent; "
"if it's in another directory, pass the directory as --vocab-dir"
Expand Down

0 comments on commit 35e95b6

Please sign in to comment.