diff --git a/llama_cpp/_internals.py b/llama_cpp/_internals.py index dcd4e17ff..357023e3c 100644 --- a/llama_cpp/_internals.py +++ b/llama_cpp/_internals.py @@ -511,7 +511,7 @@ def sample_token(self, candidates: "_LlamaTokenDataArray") -> int: def grammar_accept_token(self, grammar: LlamaGrammar, token: int): assert self.ctx is not None assert grammar.grammar is not None - llama_cpp.llama_grammar_accept_token(self.ctx, grammar.grammar, token) + llama_cpp.llama_grammar_accept_token(grammar.grammar, self.ctx, token) def reset_timings(self): assert self.ctx is not None