diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/llm-npu-cli.cpp b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/llm-npu-cli.cpp index 7009de37b6f..fbaef01f304 100644 --- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/llm-npu-cli.cpp +++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/llm-npu-cli.cpp @@ -110,7 +110,7 @@ std::string run_generate(void* void_model, int32_t* embd_inp_ptr, int32_t embd_i printf("\nPrefill %d tokens cost %d ms.\n", embd_inp_size, duration.count()); } - std::vector embd; // output ids; + std::vector embd; // output ids embd.push_back(token); int token_nums = 0;