Merge pull request #68 from EricLBuehler/develop

Update demo video
EricLBuehler · Jul 26, 2024 · eb41272 · eb41272
2 parents e922750 + c42266e
commit eb41272
Show file tree

Hide file tree

Showing 3 changed files with 5 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -33,8 +33,10 @@ Currently, candle-vllm supports chat serving for the following models.
 | #12 | Moondream-2 (Multimodal LLM) |TBD|TBD|
 
 
-## Demo Chat with candle-vllm (71 tokens/s, LLaMa2 7B, bf16, on A100)
-<img src="./res/candle-vllm-demo.gif" width="90%" height="90%" >
+## Demo Chat with candle-vllm (61-65 tokens/s, LLaMa3.1 8B, bf16, on A100)
+
+https://github.com/user-attachments/assets/290d72d8-d5e6-41a3-8bd8-1d9d732aee3b
+
 
 ## Usage
 See [this folder](examples/) for some examples.

diff --git a/res/candle-vllm-demo.gif b/res/candle-vllm-demo.gif
diff --git a/src/openai/pipelines/pipeline.rs b/src/openai/pipelines/pipeline.rs
@@ -327,7 +327,7 @@ impl ModelLoader for DefaultLoader {
             stop_token_ids.push(eos_token);
         }
 
-        //custome stop tokens
+        //custom stop tokens
         if let Some(custom_stop) = &config.custom_stop_tokens {
             for stop in custom_stop {
                 match tokenizer.get_token(&stop) {