From 54c725743163cd1ed449e2e0a6134cd897fb49a9 Mon Sep 17 00:00:00 2001 From: Jon Craton Date: Mon, 18 Nov 2024 12:22:54 -0500 Subject: [PATCH] Skip adding special tokens to targets --- changelog.md | 1 + languagemodels/inference.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index cf83c96..4c7e85c 100644 --- a/changelog.md +++ b/changelog.md @@ -5,6 +5,7 @@ ### Fixed - Properly apply prompt format when providing `choices` +- Do not add special tokens before `choices` ## 0.22 - 2024-11-02 diff --git a/languagemodels/inference.py b/languagemodels/inference.py index 2824924..26b578b 100644 --- a/languagemodels/inference.py +++ b/languagemodels/inference.py @@ -284,7 +284,7 @@ def rank_instruct(inputs, targets): fmt = model_info.get("prompt_fmt", "{instruction}") inputs = [fmt.replace("{instruction}", inst) for inst in inputs] - targ_tok = [tokenizer.encode(t).tokens for t in targets] + targ_tok = [tokenizer.encode(t, add_special_tokens=False).tokens for t in targets] targ_tok *= len(inputs) in_tok = []