From 1f222611c12486ee0e07e0f0d59ecdcb9ad72c50 Mon Sep 17 00:00:00 2001
From: Khai Mai <khai.mai@meetkai.com>
Date: Wed, 7 Aug 2024 14:20:35 +0700
Subject: [PATCH 1/2] add functionary-small-v3.2 to readme

---
 README.md | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/README.md b/README.md
index 6f1eb49..c3c448a 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,9 @@ Documentation and more examples: [functionary.meetkai.com](https://functionary.m
 
   <summary>Changelog: (click to expand)</summary>
 
-  + [2024/08/06] We release [meetkai/functionary-small-v3.1](https://huggingface.co/meetkai/functionary-small-v3.1) (based on [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct)) with 128k context-length
+  + [2024/08/07] We release 2 128k-context length models that are based on [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): 
+     + [meetkai/functionary-small-v3.1](https://huggingface.co/meetkai/functionary-small-v3.1): using Meta's original prompt template as described in: [User-defined Custom tool calling](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1#user-defined-custom-tool-calling)
+     + [meetkai/functionary-small-v3.2](https://huggingface.co/meetkai/functionary-small-v3.2): using our own prompt template. This model is a little bit better compared with [meetkai/functionary-small-v3.1](https://huggingface.co/meetkai/functionary-small-v3.1)
   + [2024/06/14] We release [meetkai/functionary-medium-v3.0](https://huggingface.co/meetkai/functionary-medium-v3.0) (based on [meta-llama/Meta-Llama-3-70B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct)) with better capability for function calling
   + [2024/05/17] We release [meetkai/functionary-small-v2.5](https://huggingface.co/meetkai/functionary-small-v2.5) with better capability for function calling and code interpreter compared with [functionary-small-v2.4](https://huggingface.co/meetkai/functionary-small-v2.4)
   + [2024/05/06] Streaming support for functionary v2 to v2.4 models is released in [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)!
@@ -38,7 +40,7 @@ Now you can start a blazing fast [vLLM](https://vllm.readthedocs.io/en/latest/ge
 
 **Small Model:**
 ```shell
-python3 server_vllm.py --model "meetkai/functionary-small-v3.1" --host 0.0.0.0 --max-model-len 8192
+python3 server_vllm.py --model "meetkai/functionary-small-v3.2" --host 0.0.0.0 --max-model-len 8192
 ```
 
 <details>
@@ -47,7 +49,7 @@ python3 server_vllm.py --model "meetkai/functionary-small-v3.1" --host 0.0.0.0 -
   If you use multiple GPUs (medium models require: 4xA6000 or 2xA100 80GB to run), need to use: `tensor-parallel-size`
   
 ```shell
-python3 server_vllm.py --model "meetkai/functionary-medium-v3.0" --max-model-len 8192 --tensor-parallel-size 2
+python3 server_vllm.py --model "meetkai/functionary-small-v3.2" --max-model-len 8192 --tensor-parallel-size 2
 ```
   
 </details>
@@ -116,7 +118,7 @@ from openai import OpenAI
 client = OpenAI(base_url="http://localhost:8000/v1", api_key="functionary")
 
 client.chat.completions.create(
-    model="meetkai/functionary-small-v3.1",
+    model="meetkai/functionary-small-v3.2",
     messages=[{"role": "user",
             "content": "What is the weather for Istanbul?"}
     ],
@@ -152,7 +154,7 @@ client.chat.completions.create(
 import requests
 
 data = {
-    'model': 'meetkai/functionary-small-v3.1', # model name here is the value of argument "--model" in deploying: server_vllm.py or server.py
+    'model': 'meetkai/functionary-small-v3.2', # model name here is the value of argument "--model" in deploying: server_vllm.py or server.py
     'messages': [
         {
             "role": "user",
@@ -195,7 +197,8 @@ print(response.text)
 ## Models Available
 | Model                                                                                | Description                                                                                                                         | VRAM FP16 |
 |:-------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------|:------|
-| [functionary-small-v3.1](https://huggingface.co/meetkai/functionary-small-v3.1) / [GGUF](https://huggingface.co/meetkai/functionary-small-v3.1-GGUF) | 128k context, code interpreter | 24GB |
+| [functionary-small-v3.2](https://huggingface.co/meetkai/functionary-small-v3.1) / [GGUF](https://huggingface.co/meetkai/functionary-small-v3.2-GGUF) | 128k context, code interpreter, using our own prompt template | 24GB |
+| [functionary-small-v3.1](https://huggingface.co/meetkai/functionary-small-v3.1) / [GGUF](https://huggingface.co/meetkai/functionary-small-v3.1-GGUF) | 128k context, code interpreter, using original Meta prompt template | 24GB |
 | [functionary-medium-v3.0](https://huggingface.co/meetkai/functionary-medium-v3.0) / [GGUF](https://huggingface.co/meetkai/functionary-medium-v3.0-GGUF) | 8k context, based on [meta-llama/Meta-Llama-3-70B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct) | 160GB |
 | [functionary-small-v2.5](https://huggingface.co/meetkai/functionary-small-v2.5) / [GGUF](https://huggingface.co/meetkai/functionary-small-v2.5-GGUF) | 8k context, code interpreter | 24GB |
 | [functionary-small-v2.4](https://huggingface.co/meetkai/functionary-small-v2.4) / [GGUF](https://huggingface.co/meetkai/functionary-small-v2.4-GGUF) | 8k context, code interpreter | 24GB |

From cfb6bc52b4a3eddb6fcadd62b89b582f54c860ff Mon Sep 17 00:00:00 2001
From: Khai Mai <khai.mai@meetkai.com>
Date: Wed, 7 Aug 2024 14:25:19 +0700
Subject: [PATCH 2/2] fix readme

---
 README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index c3c448a..b5aa976 100644
--- a/README.md
+++ b/README.md
@@ -16,8 +16,8 @@ Documentation and more examples: [functionary.meetkai.com](https://functionary.m
   <summary>Changelog: (click to expand)</summary>
 
   + [2024/08/07] We release 2 128k-context length models that are based on [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): 
-     + [meetkai/functionary-small-v3.1](https://huggingface.co/meetkai/functionary-small-v3.1): using Meta's original prompt template as described in: [User-defined Custom tool calling](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1#user-defined-custom-tool-calling)
-     + [meetkai/functionary-small-v3.2](https://huggingface.co/meetkai/functionary-small-v3.2): using our own prompt template. This model is a little bit better compared with [meetkai/functionary-small-v3.1](https://huggingface.co/meetkai/functionary-small-v3.1)
+     + [meetkai/functionary-small-v3.1](https://huggingface.co/meetkai/functionary-small-v3.1): **using Meta's original prompt template** as described in: [User-defined Custom tool calling](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1#user-defined-custom-tool-calling)
+     + [meetkai/functionary-small-v3.2](https://huggingface.co/meetkai/functionary-small-v3.2): using **our own prompt template**. This model is **better** than [meetkai/functionary-small-v3.1](https://huggingface.co/meetkai/functionary-small-v3.1)
   + [2024/06/14] We release [meetkai/functionary-medium-v3.0](https://huggingface.co/meetkai/functionary-medium-v3.0) (based on [meta-llama/Meta-Llama-3-70B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct)) with better capability for function calling
   + [2024/05/17] We release [meetkai/functionary-small-v2.5](https://huggingface.co/meetkai/functionary-small-v2.5) with better capability for function calling and code interpreter compared with [functionary-small-v2.4](https://huggingface.co/meetkai/functionary-small-v2.4)
   + [2024/05/06] Streaming support for functionary v2 to v2.4 models is released in [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)!
@@ -49,7 +49,7 @@ python3 server_vllm.py --model "meetkai/functionary-small-v3.2" --host 0.0.0.0 -
   If you use multiple GPUs (medium models require: 4xA6000 or 2xA100 80GB to run), need to use: `tensor-parallel-size`
   
 ```shell
-python3 server_vllm.py --model "meetkai/functionary-small-v3.2" --max-model-len 8192 --tensor-parallel-size 2
+python3 server_vllm.py --model "meetkai/functionary-medium-v3.0" --max-model-len 8192 --tensor-parallel-size 2
 ```
   
 </details>
@@ -197,8 +197,8 @@ print(response.text)
 ## Models Available
 | Model                                                                                | Description                                                                                                                         | VRAM FP16 |
 |:-------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------|:------|
-| [functionary-small-v3.2](https://huggingface.co/meetkai/functionary-small-v3.1) / [GGUF](https://huggingface.co/meetkai/functionary-small-v3.2-GGUF) | 128k context, code interpreter, using our own prompt template | 24GB |
-| [functionary-small-v3.1](https://huggingface.co/meetkai/functionary-small-v3.1) / [GGUF](https://huggingface.co/meetkai/functionary-small-v3.1-GGUF) | 128k context, code interpreter, using original Meta prompt template | 24GB |
+| [functionary-small-v3.2](https://huggingface.co/meetkai/functionary-small-v3.1) / [GGUF](https://huggingface.co/meetkai/functionary-small-v3.2-GGUF) | 128k context, code interpreter, using **our own prompt template** | 24GB |
+| [functionary-small-v3.1](https://huggingface.co/meetkai/functionary-small-v3.1) / [GGUF](https://huggingface.co/meetkai/functionary-small-v3.1-GGUF) | 128k context, code interpreter, using **original Meta's prompt template** | 24GB |
 | [functionary-medium-v3.0](https://huggingface.co/meetkai/functionary-medium-v3.0) / [GGUF](https://huggingface.co/meetkai/functionary-medium-v3.0-GGUF) | 8k context, based on [meta-llama/Meta-Llama-3-70B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct) | 160GB |
 | [functionary-small-v2.5](https://huggingface.co/meetkai/functionary-small-v2.5) / [GGUF](https://huggingface.co/meetkai/functionary-small-v2.5-GGUF) | 8k context, code interpreter | 24GB |
 | [functionary-small-v2.4](https://huggingface.co/meetkai/functionary-small-v2.4) / [GGUF](https://huggingface.co/meetkai/functionary-small-v2.4-GGUF) | 8k context, code interpreter | 24GB |