From 6de3ca6a7b7afe06bc03f03b2f26606748e89554 Mon Sep 17 00:00:00 2001 From: akiseakusa Date: Fri, 5 Apr 2024 22:07:16 +0530 Subject: [PATCH 1/6] Huggingface comparison added --- .github/workflows/causal_lm_cpp.yml | 41 +++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index e58d4e67ee..f93b23d3e8 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -11,6 +11,47 @@ concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: + cpp-beam_search_causal_lm-Mistral-7B: + runs-on: ubuntu-20.04-16-cores + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: Install OpenVINO + run: | + mkdir ./ov/ + curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz + sudo ./ov/install_dependencies/install_openvino_dependencies.sh + - name: Download, convert and build + run: | + source ./ov/setupvars.sh + python3 -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python3 ./llm_bench/python/convert.py --model_id mistralai/Mistral-7B-v0.1 --output_dir ./Mistral-7B-v0.1/ --precision FP16 &7B-v0.1/ --precision FP16 & + cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ + cmake --build ./build/ --config Release -j --parallel 8 + wait + - name: Compare + run: | + source ./ov/setupvars.sh + convert_tokenizer ./Mistral-7B-v0.1/pytorch/dldt/FP16/ --output ./Mistral-7B-v0.1/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code + timeout 50s ./build/beam_search_causal_lm ./Mistral-7B-v0.1/pytorch/dldt/FP16/ 69 > ./pred.txt + python -c " + import transformers + with open('pred.txt', 'r') as file: + predictions = file.read() + tokenizer = transformers.LlamaTokenizer.from_pretrained('mistralai/Mistral-7B-v0.1') + tokenized = tokenizer('69', return_tensors='pt') + for beam in transformers.LlamaForCausalLM.from_pretrained('mistralai/Mistral-7B-v0.1').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): + ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n' + idx = predictions.find(ref) + if -1 == idx: + raise RuntimeError(f'Missing "{ref=}" from predictions') + predictions = predictions[:idx] + predictions[idx + len(ref):] + " + echo 69 passed + cpp-greedy_causal_lm-ubuntu: runs-on: ubuntu-20.04-8-cores steps: From 5874836921b2d36381ef15b44168738d8789baf0 Mon Sep 17 00:00:00 2001 From: akiseakusa Date: Fri, 5 Apr 2024 22:21:31 +0530 Subject: [PATCH 2/6] huggingface compare added --- text_generation/causal_lm/cpp/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text_generation/causal_lm/cpp/README.md b/text_generation/causal_lm/cpp/README.md index 0ad2ffe928..a616da085e 100644 --- a/text_generation/causal_lm/cpp/README.md +++ b/text_generation/causal_lm/cpp/README.md @@ -141,6 +141,6 @@ To enable Unicode characters for Windows cmd open `Region` settings from `Contro 2. https://huggingface.co/microsoft/phi-1_5 9. [notus-7b-v1](https://huggingface.co/argilla/notus-7b-v1) 10. [zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) - +11. [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) This pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature. From 1c762d7c4c652ed6e027dbae1c5c4465619607cb Mon Sep 17 00:00:00 2001 From: akiseakusa Date: Tue, 9 Apr 2024 23:43:36 +0530 Subject: [PATCH 3/6] red pjama instruct 3b added --- .github/workflows/causal_lm_cpp.yml | 51 ++++++++++++++++++++++--- text_generation/causal_lm/cpp/README.md | 2 +- 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index f93b23d3e8..a75f028676 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -11,6 +11,47 @@ concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: + cpp-beam_search_causal_lm-red-pajama-3b-instruct: + runs-on: ubuntu-20.04-16-cores + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: Install OpenVINO + run: | + mkdir ./ov/ + curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz + sudo ./ov/install_dependencies/install_openvino_dependencies.sh + - name: Download, convert and build + run: | + source ./ov/setupvars.sh + python3 -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python3 ./llm_bench/python/convert.py --model_id togethercomputer/RedPajama-INCITE-Instruct-3B-v1 --output_dir .RedPajama-INCITE-Instruct-3B-v1/ --precision FP16 &7B-v0.1/ --precision FP16 & + cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ + cmake --build ./build/ --config Release -j --parallel 8 + wait + - name: Compare + run: | + source ./ov/setupvars.sh + convert_tokenizer ./RedPajama-INCITE-Instruct-3B-v1/pytorch/dldt/FP16/ --output ./RedPajama-INCITE-Instruct-3B-v1/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code + timeout 50s ./build/beam_search_causal_lm .RedPajama-INCITE-Instruct-3B-v1/pytorch/dldt/FP16/ 69 > ./pred.txt + python -c " + import transformers + with open('pred.txt', 'r') as file: + predictions = file.read() + tokenizer = transformers.LlamaTokenizer.from_pretrained('togethercomputer/RedPajama-INCITE-Instruct-3B-v1') + tokenized = tokenizer('69', return_tensors='pt') + for beam in transformers.LlamaForCausalLM.from_pretrained('togethercomputer/RedPajama-INCITE-Instruct-3B-v1').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): + ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n' + idx = predictions.find(ref) + if -1 == idx: + raise RuntimeError(f'Missing "{ref=}" from predictions') + predictions = predictions[:idx] + predictions[idx + len(ref):] + " + echo "69" passed + cpp-beam_search_causal_lm-Mistral-7B: runs-on: ubuntu-20.04-16-cores steps: @@ -50,7 +91,7 @@ jobs: raise RuntimeError(f'Missing "{ref=}" from predictions') predictions = predictions[:idx] + predictions[idx + len(ref):] " - echo 69 passed + echo "69" passed cpp-greedy_causal_lm-ubuntu: runs-on: ubuntu-20.04-8-cores @@ -134,7 +175,7 @@ jobs: raise RuntimeError(f'Missing "{ref=}" from predictions') predictions = predictions[:idx] + predictions[idx + len(ref):] " - echo Hi passed + echo "Hi" passed timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "return 0" > ./pred.txt python -c " @@ -166,7 +207,7 @@ jobs: raise RuntimeError(f'Missing "{ref=}" from predictions') predictions = predictions[:idx] + predictions[idx + len(ref):] " - echo 你好! 你好嗎? passed + echo "你好! 你好嗎?" passed cpp-beam_search_causal_lm-windows: runs-on: windows-latest steps: @@ -348,7 +389,7 @@ jobs: predicted_speculative = f.readline() assert predicted_greedy == predicted_speculative " - echo speculative_decoding_lm passed + echo "speculative_decoding_lm" passed cpp-Phi-1_5: runs-on: ubuntu-20.04-16-cores steps: @@ -391,4 +432,4 @@ jobs: raise RuntimeError(f'Missing "{ref=}" from predictions') predictions = predictions[:idx] + predictions[idx + len(ref):] " - echo Phi-1_5 passed + echo "Phi-1_5" passed diff --git a/text_generation/causal_lm/cpp/README.md b/text_generation/causal_lm/cpp/README.md index a616da085e..625f9afaf3 100644 --- a/text_generation/causal_lm/cpp/README.md +++ b/text_generation/causal_lm/cpp/README.md @@ -142,5 +142,5 @@ To enable Unicode characters for Windows cmd open `Region` settings from `Contro 9. [notus-7b-v1](https://huggingface.co/argilla/notus-7b-v1) 10. [zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) 11. [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) - +12. [red-pajama-3b-instruct](https://huggingface.co/togethercomputer/RedPajama-INCITE-Instruct-3B-v1) This pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature. From 77901b1fb44fa6e76b50c021275f74c7ee2746b8 Mon Sep 17 00:00:00 2001 From: Sadhvi <41192585+akiseakusa@users.noreply.github.com> Date: Sun, 28 Apr 2024 23:29:38 +0530 Subject: [PATCH 4/6] Update causal_lm_cpp.yml --- .github/workflows/causal_lm_cpp.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index 9945a7c7cf..749ce357c1 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -51,7 +51,7 @@ jobs: raise RuntimeError(f'Missing "{ref=}" from predictions') predictions = predictions[:idx] + predictions[idx + len(ref):] " - echo "69" passed + echo 69 passed cpp-beam_search_causal_lm-Mistral-7B: runs-on: ubuntu-20.04-16-cores @@ -475,4 +475,4 @@ jobs: raise RuntimeError(f'Missing "{ref=}" from predictions') predictions = predictions[:idx] + predictions[idx + len(ref):] " - echo Phi-1_5 passed \ No newline at end of file + echo Phi-1_5 passed From 8e37fdc4dae3b171256b0b310061e2f408c6afd1 Mon Sep 17 00:00:00 2001 From: Sadhvi <41192585+akiseakusa@users.noreply.github.com> Date: Sun, 28 Apr 2024 23:37:12 +0530 Subject: [PATCH 5/6] Update causal_lm_cpp.yml --- .github/workflows/causal_lm_cpp.yml | 35 ++++++++++++++--------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index 749ce357c1..30f79f3754 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -12,7 +12,7 @@ concurrency: cancel-in-progress: true jobs: - cpp-beam_search_causal_lm-red-pajama-3b-instruct: + cpp-beam_search_causal_lm-red-pajama-3b-instruct: runs-on: ubuntu-20.04-16-cores steps: - uses: actions/checkout@v4 @@ -29,29 +29,28 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python3 -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python3 ./llm_bench/python/convert.py --model_id togethercomputer/RedPajama-INCITE-Instruct-3B-v1 --output_dir .RedPajama-INCITE-Instruct-3B-v1/ --precision FP16 &7B-v0.1/ --precision FP16 & + python3 -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python3 ./llm_bench/python/convert.py --model_id togethercomputer/RedPajama-INCITE-Instruct-3B-v1 --output_dir .RedPajama-INCITE-Instruct-3B-v1/ --precision FP16 cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ - cmake --build ./build/ --config Release -j --parallel 8 - wait + cmake --build ./build/ --config Release -j8 - name: Compare run: | source ./ov/setupvars.sh convert_tokenizer ./RedPajama-INCITE-Instruct-3B-v1/pytorch/dldt/FP16/ --output ./RedPajama-INCITE-Instruct-3B-v1/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code timeout 50s ./build/beam_search_causal_lm .RedPajama-INCITE-Instruct-3B-v1/pytorch/dldt/FP16/ 69 > ./pred.txt - python -c " - import transformers - with open('pred.txt', 'r') as file: - predictions = file.read() - tokenizer = transformers.LlamaTokenizer.from_pretrained('togethercomputer/RedPajama-INCITE-Instruct-3B-v1') - tokenized = tokenizer('69', return_tensors='pt') - for beam in transformers.LlamaForCausalLM.from_pretrained('togethercomputer/RedPajama-INCITE-Instruct-3B-v1').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): - ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n' - idx = predictions.find(ref) - if -1 == idx: - raise RuntimeError(f'Missing "{ref=}" from predictions') - predictions = predictions[:idx] + predictions[idx + len(ref):] - " - echo 69 passed + python -c " + import transformers + with open('pred.txt', 'r') as file: + predictions = file.read() + tokenizer = transformers.LlamaTokenizer.from_pretrained('togethercomputer/RedPajama-INCITE-Instruct-3B-v1') + tokenized = tokenizer('69', return_tensors='pt') + for beam in transformers.LlamaForCausalLM.from_pretrained('togethercomputer/RedPajama-INCITE-Instruct-3B-v1').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_length=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9, do_sample=False): + ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n' + idx = predictions.find(ref) + if -1 == idx: + raise RuntimeError(f'Missing "{ref=}" from predictions') + predictions = predictions[:idx] + predictions[idx + len(ref):] + " + echo 69 passed cpp-beam_search_causal_lm-Mistral-7B: runs-on: ubuntu-20.04-16-cores From d1cd3c9ea3d377ba7693f4724ca92e24b58897bb Mon Sep 17 00:00:00 2001 From: Sadhvi <41192585+akiseakusa@users.noreply.github.com> Date: Mon, 29 Apr 2024 15:01:34 +0530 Subject: [PATCH 6/6] Update causal_lm_cpp.yml --- .github/workflows/causal_lm_cpp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index 30f79f3754..f2b775a594 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -29,7 +29,7 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python3 -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python3 ./llm_bench/python/convert.py --model_id togethercomputer/RedPajama-INCITE-Instruct-3B-v1 --output_dir .RedPajama-INCITE-Instruct-3B-v1/ --precision FP16 + python3 -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python3 ./llm_bench/python/convert.py --model_id togethercomputer/RedPajama-INCITE-Instruct-3B-v1 --output_dir ./RedPajama-INCITE-Instruct-3B-v1/ --precision FP16 cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ cmake --build ./build/ --config Release -j8 - name: Compare