From 6de3ca6a7b7afe06bc03f03b2f26606748e89554 Mon Sep 17 00:00:00 2001
From: akiseakusa <sadhvi8807@gmail.com>
Date: Fri, 5 Apr 2024 22:07:16 +0530
Subject: [PATCH 1/6] Huggingface comparison added

---
 .github/workflows/causal_lm_cpp.yml | 41 +++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index e58d4e67ee..f93b23d3e8 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -11,6 +11,47 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: true
 jobs:
+  cpp-beam_search_causal_lm-Mistral-7B:
+    runs-on: ubuntu-20.04-16-cores
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - name: Install OpenVINO
+        run: |
+          mkdir ./ov/
+          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
+          sudo ./ov/install_dependencies/install_openvino_dependencies.sh
+      - name: Download, convert and build
+        run: |
+          source ./ov/setupvars.sh
+          python3 -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python3 ./llm_bench/python/convert.py --model_id mistralai/Mistral-7B-v0.1 --output_dir ./Mistral-7B-v0.1/ --precision FP16 &7B-v0.1/ --precision FP16 &
+          cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
+          cmake --build ./build/ --config Release -j --parallel 8
+          wait
+      - name: Compare
+        run: |
+          source ./ov/setupvars.sh
+          convert_tokenizer ./Mistral-7B-v0.1/pytorch/dldt/FP16/ --output ./Mistral-7B-v0.1/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
+          timeout 50s ./build/beam_search_causal_lm ./Mistral-7B-v0.1/pytorch/dldt/FP16/ 69 > ./pred.txt
+          python -c " 
+           import transformers 
+           with open('pred.txt', 'r') as file: 
+               predictions = file.read() 
+           tokenizer = transformers.LlamaTokenizer.from_pretrained('mistralai/Mistral-7B-v0.1') 
+           tokenized = tokenizer('69', return_tensors='pt') 
+           for beam in transformers.LlamaForCausalLM.from_pretrained('mistralai/Mistral-7B-v0.1').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): 
+               ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n' 
+               idx = predictions.find(ref) 
+               if -1 == idx: 
+                   raise RuntimeError(f'Missing "{ref=}" from predictions') 
+               predictions = predictions[:idx] + predictions[idx + len(ref):] 
+           " 
+           echo 69 passed 
+
   cpp-greedy_causal_lm-ubuntu:
     runs-on: ubuntu-20.04-8-cores
     steps:

From 5874836921b2d36381ef15b44168738d8789baf0 Mon Sep 17 00:00:00 2001
From: akiseakusa <sadhvi8807@gmail.com>
Date: Fri, 5 Apr 2024 22:21:31 +0530
Subject: [PATCH 2/6] huggingface compare added

---
 text_generation/causal_lm/cpp/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/text_generation/causal_lm/cpp/README.md b/text_generation/causal_lm/cpp/README.md
index 0ad2ffe928..a616da085e 100644
--- a/text_generation/causal_lm/cpp/README.md
+++ b/text_generation/causal_lm/cpp/README.md
@@ -141,6 +141,6 @@ To enable Unicode characters for Windows cmd open `Region` settings from `Contro
    2. https://huggingface.co/microsoft/phi-1_5
 9. [notus-7b-v1](https://huggingface.co/argilla/notus-7b-v1)
 10. [zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)
-
+11. [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
 
 This pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature.

From 1c762d7c4c652ed6e027dbae1c5c4465619607cb Mon Sep 17 00:00:00 2001
From: akiseakusa <sadhvi8807@gmail.com>
Date: Tue, 9 Apr 2024 23:43:36 +0530
Subject: [PATCH 3/6] red pjama instruct 3b added

---
 .github/workflows/causal_lm_cpp.yml     | 51 ++++++++++++++++++++++---
 text_generation/causal_lm/cpp/README.md |  2 +-
 2 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index f93b23d3e8..a75f028676 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -11,6 +11,47 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: true
 jobs:
+   cpp-beam_search_causal_lm-red-pajama-3b-instruct:
+    runs-on: ubuntu-20.04-16-cores
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - name: Install OpenVINO
+        run: |
+          mkdir ./ov/
+          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
+          sudo ./ov/install_dependencies/install_openvino_dependencies.sh
+      - name: Download, convert and build
+        run: |
+          source ./ov/setupvars.sh
+          python3 -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python3 ./llm_bench/python/convert.py --model_id togethercomputer/RedPajama-INCITE-Instruct-3B-v1 --output_dir .RedPajama-INCITE-Instruct-3B-v1/ --precision FP16 &7B-v0.1/ --precision FP16 &
+          cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
+          cmake --build ./build/ --config Release -j --parallel 8
+          wait
+      - name: Compare
+        run: |
+          source ./ov/setupvars.sh
+          convert_tokenizer ./RedPajama-INCITE-Instruct-3B-v1/pytorch/dldt/FP16/ --output ./RedPajama-INCITE-Instruct-3B-v1/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
+          timeout 50s ./build/beam_search_causal_lm .RedPajama-INCITE-Instruct-3B-v1/pytorch/dldt/FP16/ 69 > ./pred.txt
+          python -c " 
+           import transformers 
+           with open('pred.txt', 'r') as file: 
+               predictions = file.read() 
+           tokenizer = transformers.LlamaTokenizer.from_pretrained('togethercomputer/RedPajama-INCITE-Instruct-3B-v1') 
+           tokenized = tokenizer('69', return_tensors='pt') 
+           for beam in transformers.LlamaForCausalLM.from_pretrained('togethercomputer/RedPajama-INCITE-Instruct-3B-v1').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): 
+               ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n' 
+               idx = predictions.find(ref) 
+               if -1 == idx: 
+                   raise RuntimeError(f'Missing "{ref=}" from predictions') 
+               predictions = predictions[:idx] + predictions[idx + len(ref):] 
+           " 
+           echo "69" passed 
+
   cpp-beam_search_causal_lm-Mistral-7B:
     runs-on: ubuntu-20.04-16-cores
     steps:
@@ -50,7 +91,7 @@ jobs:
                    raise RuntimeError(f'Missing "{ref=}" from predictions') 
                predictions = predictions[:idx] + predictions[idx + len(ref):] 
            " 
-           echo 69 passed 
+           echo "69" passed 
 
   cpp-greedy_causal_lm-ubuntu:
     runs-on: ubuntu-20.04-8-cores
@@ -134,7 +175,7 @@ jobs:
                   raise RuntimeError(f'Missing "{ref=}" from predictions')
               predictions = predictions[:idx] + predictions[idx + len(ref):]
           "
-          echo Hi passed
+          echo "Hi" passed
 
           timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "return 0" > ./pred.txt
           python -c "
@@ -166,7 +207,7 @@ jobs:
                   raise RuntimeError(f'Missing "{ref=}" from predictions')
               predictions = predictions[:idx] + predictions[idx + len(ref):]
           "
-          echo 你好！ 你好嗎？ passed
+          echo "你好！ 你好嗎？" passed
   cpp-beam_search_causal_lm-windows:
     runs-on: windows-latest
     steps:
@@ -348,7 +389,7 @@ jobs:
               predicted_speculative = f.readline()
           assert predicted_greedy == predicted_speculative
           "
-          echo speculative_decoding_lm passed
+          echo "speculative_decoding_lm" passed
   cpp-Phi-1_5:
     runs-on: ubuntu-20.04-16-cores
     steps:
@@ -391,4 +432,4 @@ jobs:
                   raise RuntimeError(f'Missing "{ref=}" from predictions')
               predictions = predictions[:idx] + predictions[idx + len(ref):]
           "
-          echo Phi-1_5 passed
+          echo "Phi-1_5" passed
diff --git a/text_generation/causal_lm/cpp/README.md b/text_generation/causal_lm/cpp/README.md
index a616da085e..625f9afaf3 100644
--- a/text_generation/causal_lm/cpp/README.md
+++ b/text_generation/causal_lm/cpp/README.md
@@ -142,5 +142,5 @@ To enable Unicode characters for Windows cmd open `Region` settings from `Contro
 9. [notus-7b-v1](https://huggingface.co/argilla/notus-7b-v1)
 10. [zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)
 11. [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
-
+12. [red-pajama-3b-instruct](https://huggingface.co/togethercomputer/RedPajama-INCITE-Instruct-3B-v1)
 This pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature.

From 77901b1fb44fa6e76b50c021275f74c7ee2746b8 Mon Sep 17 00:00:00 2001
From: Sadhvi <41192585+akiseakusa@users.noreply.github.com>
Date: Sun, 28 Apr 2024 23:29:38 +0530
Subject: [PATCH 4/6] Update causal_lm_cpp.yml

---
 .github/workflows/causal_lm_cpp.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 9945a7c7cf..749ce357c1 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -51,7 +51,7 @@ jobs:
                    raise RuntimeError(f'Missing "{ref=}" from predictions') 
                predictions = predictions[:idx] + predictions[idx + len(ref):] 
            " 
-           echo "69" passed 
+           echo 69 passed 
 
   cpp-beam_search_causal_lm-Mistral-7B:
     runs-on: ubuntu-20.04-16-cores
@@ -475,4 +475,4 @@ jobs:
                   raise RuntimeError(f'Missing "{ref=}" from predictions')
               predictions = predictions[:idx] + predictions[idx + len(ref):]
           "
-          echo Phi-1_5 passed
\ No newline at end of file
+          echo Phi-1_5 passed

From 8e37fdc4dae3b171256b0b310061e2f408c6afd1 Mon Sep 17 00:00:00 2001
From: Sadhvi <41192585+akiseakusa@users.noreply.github.com>
Date: Sun, 28 Apr 2024 23:37:12 +0530
Subject: [PATCH 5/6] Update causal_lm_cpp.yml

---
 .github/workflows/causal_lm_cpp.yml | 35 ++++++++++++++---------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 749ce357c1..30f79f3754 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -12,7 +12,7 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-   cpp-beam_search_causal_lm-red-pajama-3b-instruct:
+  cpp-beam_search_causal_lm-red-pajama-3b-instruct:
     runs-on: ubuntu-20.04-16-cores
     steps:
       - uses: actions/checkout@v4
@@ -29,29 +29,28 @@ jobs:
       - name: Download, convert and build
         run: |
           source ./ov/setupvars.sh
-          python3 -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python3 ./llm_bench/python/convert.py --model_id togethercomputer/RedPajama-INCITE-Instruct-3B-v1 --output_dir .RedPajama-INCITE-Instruct-3B-v1/ --precision FP16 &7B-v0.1/ --precision FP16 &
+          python3 -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python3 ./llm_bench/python/convert.py --model_id togethercomputer/RedPajama-INCITE-Instruct-3B-v1 --output_dir .RedPajama-INCITE-Instruct-3B-v1/ --precision FP16
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
-          cmake --build ./build/ --config Release -j --parallel 8
-          wait
+          cmake --build ./build/ --config Release -j8
       - name: Compare
         run: |
           source ./ov/setupvars.sh
           convert_tokenizer ./RedPajama-INCITE-Instruct-3B-v1/pytorch/dldt/FP16/ --output ./RedPajama-INCITE-Instruct-3B-v1/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
           timeout 50s ./build/beam_search_causal_lm .RedPajama-INCITE-Instruct-3B-v1/pytorch/dldt/FP16/ 69 > ./pred.txt
-          python -c " 
-           import transformers 
-           with open('pred.txt', 'r') as file: 
-               predictions = file.read() 
-           tokenizer = transformers.LlamaTokenizer.from_pretrained('togethercomputer/RedPajama-INCITE-Instruct-3B-v1') 
-           tokenized = tokenizer('69', return_tensors='pt') 
-           for beam in transformers.LlamaForCausalLM.from_pretrained('togethercomputer/RedPajama-INCITE-Instruct-3B-v1').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): 
-               ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n' 
-               idx = predictions.find(ref) 
-               if -1 == idx: 
-                   raise RuntimeError(f'Missing "{ref=}" from predictions') 
-               predictions = predictions[:idx] + predictions[idx + len(ref):] 
-           " 
-           echo 69 passed 
+          python -c "
+          import transformers
+          with open('pred.txt', 'r') as file:
+              predictions = file.read()
+          tokenizer = transformers.LlamaTokenizer.from_pretrained('togethercomputer/RedPajama-INCITE-Instruct-3B-v1')
+          tokenized = tokenizer('69', return_tensors='pt')
+          for beam in transformers.LlamaForCausalLM.from_pretrained('togethercomputer/RedPajama-INCITE-Instruct-3B-v1').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_length=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9, do_sample=False):
+              ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
+              idx = predictions.find(ref)
+              if -1 == idx:
+                  raise RuntimeError(f'Missing "{ref=}" from predictions')
+              predictions = predictions[:idx] + predictions[idx + len(ref):]
+          "
+          echo 69 passed
 
   cpp-beam_search_causal_lm-Mistral-7B:
     runs-on: ubuntu-20.04-16-cores

From d1cd3c9ea3d377ba7693f4724ca92e24b58897bb Mon Sep 17 00:00:00 2001
From: Sadhvi <41192585+akiseakusa@users.noreply.github.com>
Date: Mon, 29 Apr 2024 15:01:34 +0530
Subject: [PATCH 6/6] Update causal_lm_cpp.yml

---
 .github/workflows/causal_lm_cpp.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 30f79f3754..f2b775a594 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -29,7 +29,7 @@ jobs:
       - name: Download, convert and build
         run: |
           source ./ov/setupvars.sh
-          python3 -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python3 ./llm_bench/python/convert.py --model_id togethercomputer/RedPajama-INCITE-Instruct-3B-v1 --output_dir .RedPajama-INCITE-Instruct-3B-v1/ --precision FP16
+          python3 -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python3 ./llm_bench/python/convert.py --model_id togethercomputer/RedPajama-INCITE-Instruct-3B-v1 --output_dir ./RedPajama-INCITE-Instruct-3B-v1/ --precision FP16
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
           cmake --build ./build/ --config Release -j8
       - name: Compare