update encoder latency

Differential Revision: D60867022 Pull Request resolved: pytorch#4584
kirklandsign · Aug 8, 2024 · e610947 · e610947
1 parent 8c813f9
commit e610947
Showing 1 changed file with 6 additions and 0 deletions.
diff --git a/extension/llm/tokenizer/tiktoken.cpp b/extension/llm/tokenizer/tiktoken.cpp
@@ -359,8 +359,14 @@ Error Tiktoken::load(const std::string& path) {
   _special_token_decoder = _build_decoder(_special_token_encoder);
 
   _regex = _create_regex(_pattern);
+  // Warmup re2 as it is slow on the first run, void the return value as it's
+  // not needed Refer to
+  // https://github.com/google/re2/blob/6dcd83d60f7944926bfd308cc13979fc53dd69ca/re2/fuzzing/re2_fuzzer.cc#L136-L141
+  (void)_regex->ReverseProgramSize();
 
   _special_token_regex = _build_special_token_regex(_special_token_encoder);
+  // Same as above, warm up re2
+  (void)_special_token_regex->ReverseProgramSize();
 
   // initialize vocab_size, bos_tok, eos_tok
   vocab_size_ = _encoder.size() + _special_token_encoder.size();