From 7173168a4911be890cb6b52d878e496bc89118fb Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Wed, 21 Aug 2024 09:32:58 +0200 Subject: [PATCH] llama : std::move llm_bigram_bpe from work_queue (#9062) * llama : std::move llm_bigram_bpe from work_queue This commit updates the retrieval of llm_bigram_bpe objects from work_queue.top() by using std::move. The motivation for this is to avoid the copying of the std::string `text` member of the llm_bigram_bpe struct. * squash! llama : std::move llm_bigram_bpe from work_queue Introduced a MovablePriorityQueue class to allow moving elements out of the priority queue for llm_bigram_bpe. * squash! llama : std::move llm_bigram_bpe from work_queue Rename MovablePriorityQueue to lama_priority_queue. * squash! llama : std::move llm_bigram_bpe from work_queue Rename lama_priority_queue -> llama_priority_queue. --- src/llama-vocab.cpp | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 17deefaa86038..323660ef54cb0 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -321,6 +321,21 @@ struct llm_tokenizer_spm { // TODO: there are a lot of common parts between spm and bpe tokenizers, should be refactored and reused +template, typename Compare = std::less> +class llama_priority_queue : public std::priority_queue { +public: + using std::priority_queue::priority_queue; + + T pop_move() { + T item = std::move(this->c.front()); + std::pop_heap(this->c.begin(), this->c.end(), this->comp); + this->c.pop_back(); + return item; + } + + void pop() = delete; +}; + struct llm_bigram_bpe { struct comparator { bool operator()(const llm_bigram_bpe & l, const llm_bigram_bpe & r) const { @@ -329,7 +344,7 @@ struct llm_bigram_bpe { }; using queue_storage = std::vector; - using queue = std::priority_queue; + using queue = llama_priority_queue; llm_symbol::index left; llm_symbol::index right; std::string text; @@ -520,8 +535,7 @@ struct llm_tokenizer_bpe { // build token(s) while (!work_queue.empty()) { - auto bigram = work_queue.top(); - work_queue.pop(); + auto bigram = work_queue.pop_move(); auto & left_symbol = symbols[bigram.left]; auto & right_symbol = symbols[bigram.right];