openvinotoolkit · akiseakusa · Apr 5, 2024 · Apr 5, 2024 · Apr 9, 2024 · Apr 28, 2024
diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
@@ -12,6 +12,87 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
+  cpp-beam_search_causal_lm-red-pajama-3b-instruct:
+    runs-on: ubuntu-20.04-16-cores
-    runs-on: ubuntu-20.04-16-cores
+    runs-on: ubuntu-20.04
-    runs-on: ubuntu-20.04-16-cores
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - name: Install OpenVINO
+        run: |
+          mkdir ./ov/
+          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
+          sudo ./ov/install_dependencies/install_openvino_dependencies.sh
+      - name: Download, convert and build
+        run: |
+          source ./ov/setupvars.sh
+          python3 -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python3 ./llm_bench/python/convert.py --model_id togethercomputer/RedPajama-INCITE-Instruct-3B-v1 --output_dir ./RedPajama-INCITE-Instruct-3B-v1/ --precision FP16
+          cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
+          cmake --build ./build/ --config Release -j8
+      - name: Compare
+        run: |
+          source ./ov/setupvars.sh
+          convert_tokenizer ./RedPajama-INCITE-Instruct-3B-v1/pytorch/dldt/FP16/ --output ./RedPajama-INCITE-Instruct-3B-v1/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
+          timeout 50s ./build/beam_search_causal_lm .RedPajama-INCITE-Instruct-3B-v1/pytorch/dldt/FP16/ 69 > ./pred.txt
-          timeout 50s ./build/beam_search_causal_lm .RedPajama-INCITE-Instruct-3B-v1/pytorch/dldt/FP16/ 69 > ./pred.txt
+          timeout 50s ./build/beam_search_causal_lm ./RedPajama-INCITE-Instruct-3B-v1/pytorch/dldt/FP16/ 69 > ./pred.txt
-          timeout 50s ./build/beam_search_causal_lm .RedPajama-INCITE-Instruct-3B-v1/pytorch/dldt/FP16/ 69 > ./pred.txt
+          timeout 50s ./build/beam_search_causal_lm ./RedPajama-INCITE-Instruct-3B-v1/pytorch/dldt/FP16/ 69 > ./pred.txt
+          python -c "
+          import transformers
+          with open('pred.txt', 'r') as file:
+              predictions = file.read()
+          tokenizer = transformers.LlamaTokenizer.from_pretrained('togethercomputer/RedPajama-INCITE-Instruct-3B-v1')
+          tokenized = tokenizer('69', return_tensors='pt')
+          for beam in transformers.LlamaForCausalLM.from_pretrained('togethercomputer/RedPajama-INCITE-Instruct-3B-v1').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_length=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9, do_sample=False):
+              ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
+              idx = predictions.find(ref)
+              if -1 == idx:
+                  raise RuntimeError(f'Missing "{ref=}" from predictions')
+              predictions = predictions[:idx] + predictions[idx + len(ref):]
+          "
+          echo 69 passed
+
+  cpp-beam_search_causal_lm-Mistral-7B:
+    runs-on: ubuntu-20.04-16-cores
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - name: Install OpenVINO
+        run: |
+          mkdir ./ov/
+          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
+          sudo ./ov/install_dependencies/install_openvino_dependencies.sh
+      - name: Download, convert and build
+        run: |
+          source ./ov/setupvars.sh
+          python3 -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python3 ./llm_bench/python/convert.py --model_id mistralai/Mistral-7B-v0.1 --output_dir ./Mistral-7B-v0.1/ --precision FP16 &7B-v0.1/ --precision FP16 &
+          cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
+          cmake --build ./build/ --config Release -j --parallel 8
+          wait
+      - name: Compare
+        run: |
+          source ./ov/setupvars.sh
+          convert_tokenizer ./Mistral-7B-v0.1/pytorch/dldt/FP16/ --output ./Mistral-7B-v0.1/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
+          timeout 50s ./build/beam_search_causal_lm ./Mistral-7B-v0.1/pytorch/dldt/FP16/ 69 > ./pred.txt
+          python -c " 
+           import transformers 
+           with open('pred.txt', 'r') as file: 
+               predictions = file.read() 
+           tokenizer = transformers.LlamaTokenizer.from_pretrained('mistralai/Mistral-7B-v0.1') 
+           tokenized = tokenizer('69', return_tensors='pt') 
+           for beam in transformers.LlamaForCausalLM.from_pretrained('mistralai/Mistral-7B-v0.1').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): 
+               ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n' 
+               idx = predictions.find(ref) 
+               if -1 == idx: 
+                   raise RuntimeError(f'Missing "{ref=}" from predictions') 
+               predictions = predictions[:idx] + predictions[idx + len(ref):] 
+           " 
+           echo "69" passed 
+
   cpp-greedy_causal_lm-ubuntu:
     runs-on: ubuntu-20.04-8-cores
     steps:
@@ -143,8 +224,7 @@ jobs:
                   raise RuntimeError(f'Missing "{ref=}" from predictions')
               predictions = predictions[:idx] + predictions[idx + len(ref):]
           "
-          echo "你好！ 你好嗎？" passed
-
+          echo 你好！ 你好嗎？ passed
           timeout 1m ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "Alan Turing was a" "return 0" "你好！ 你好嗎？" > ./pred.txt
           python -c "
           import transformers
@@ -165,7 +245,7 @@ jobs:
                     raise RuntimeError(f'Missing "{ref=}" from predictions')
                 predictions = predictions[:idx] + predictions[idx + len(ref):]
           "
-          echo "Multi prompt" passed
+          echo Multi prompt passed
   cpp-beam_search_causal_lm-windows:
     runs-on: windows-latest
     steps:
@@ -351,8 +431,7 @@ jobs:
               predicted_speculative = f.readline()
           assert predicted_greedy == predicted_speculative
           "
-          echo "Alan Turing was a" passed
-
+          echo speculative_decoding_lm passed
   cpp-Phi-1_5:
     runs-on: ubuntu-20.04-16-cores
     steps:
@@ -396,46 +475,3 @@ jobs:
               predictions = predictions[:idx] + predictions[idx + len(ref):]
           "
           echo Phi-1_5 passed
-
-  cpp-greedy_causal_lm-redpajama-3b-chat:
-    runs-on: ubuntu-20.04
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - uses: actions/setup-python@v4
-        with:
-          python-version: 3.8
-      - name: Install OpenVINO
-        run: |
-          mkdir ./ov/
-          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.0/linux/l_openvino_toolkit_ubuntu20_2024.0.0.14509.34caeefd078_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
-          sudo ./ov/install_dependencies/install_openvino_dependencies.sh
-      - name: Download, convert and build
-        run: |
-          source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id ikala/redpajama-3b-chat --output_dir ./redpajama-3b-chat/ --precision FP16 &
-          cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
-          cmake --build ./build/ --config Release -j
-          wait
-      - name: Run Generation
-        run: |
-          source ./ov/setupvars.sh
-          convert_tokenizer ./redpajama-3b-chat/pytorch/dldt/FP16/ --output ./redpajama-3b-chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
-          timeout 50s ./build/greedy_causal_lm ./redpajama-3b-chat/pytorch/dldt/FP16/ "Alan Turing was a" > ./pred_greedy.txt 
-      - name: Compare
-        run: |
-          python -c "
-          import transformers
-          with open('pred_greedy.txt', 'r') as file:
-              predictions = file.read()
-          tokenizer = transformers.AutoTokenizer.from_pretrained('ikala/redpajama-3b-chat')
-          tokenized = tokenizer('Alan Turing was a', return_tensors='pt')
-          for output in transformers.AutoModelForCausalLM.from_pretrained('ikala/redpajama-3b-chat').generate(**tokenized, max_length=100, do_sample=False):
-              ref = tokenizer.decode(output[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
-              idx = predictions.find(ref)
-              if -1 == idx:
-                  raise RuntimeError(f'Missing "{ref}" from predictions')
-              predictions = predictions[:idx] + predictions[idx + len(ref):]
-          "
-          echo "Alan Turing was a" passed
diff --git a/text_generation/causal_lm/cpp/README.md b/text_generation/causal_lm/cpp/README.md
@@ -148,7 +148,6 @@ To enable Unicode characters for Windows cmd open `Region` settings from `Contro
    2. https://huggingface.co/microsoft/phi-1_5
 9. [notus-7b-v1](https://huggingface.co/argilla/notus-7b-v1)
 10. [zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)
-11. [redpajama-3b-chat](https://huggingface.co/ikala/redpajama-3b-chat)
-12. [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
+11. [RedPajama-INCITE-Instruct-3B-v1](https://huggingface.co/togethercomputer/RedPajama-INCITE-Instruct-3B-v1)
 
 This pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature.