diff --git a/changelog.md b/changelog.md index 8030266..4533abd 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,11 @@ # Changelog +## 0.18.0 + +### Fixed + +- Correct issue causing `choices` to be scored improperly + ## 0.17.0 - 2024-02-15 ### Added diff --git a/languagemodels/__init__.py b/languagemodels/__init__.py index f141060..4061488 100644 --- a/languagemodels/__init__.py +++ b/languagemodels/__init__.py @@ -83,6 +83,14 @@ def do(prompt, choices=None): >>> do(["Say red", "Say blue"], choices=["red", "blue"]) ['red', 'blue'] + + >>> do("Classify as positive or negative: LLMs are bad", + ... choices=["Positive", "Negative"]) + 'Negative' + + >>> do("Classify as positive or negative: LLMs are great", + ... choices=["Positive", "Negative"]) + 'Positive' """ prompts = [prompt] if isinstance(prompt, str) else prompt diff --git a/languagemodels/inference.py b/languagemodels/inference.py index 35d75a1..f3c19b6 100644 --- a/languagemodels/inference.py +++ b/languagemodels/inference.py @@ -196,12 +196,12 @@ def rank_instruct(inputs, targets): """ tokenizer, model = get_model("instruct") - targ_tok = [tokenizer.encode(t, add_special_tokens=False).tokens for t in targets] + targ_tok = [tokenizer.encode(t).tokens for t in targets] targ_tok *= len(inputs) in_tok = [] for input in inputs: - toks = [tokenizer.encode(input, add_special_tokens=False).tokens] + toks = [tokenizer.encode(input).tokens] in_tok += toks * len(targets) if "Generator" in str(type(model)):