Skip to content

Commit

Permalink
Upgrade to SynapseAI 1.13 (#563)
Browse files Browse the repository at this point in the history
  • Loading branch information
regisss authored Nov 30, 2023
1 parent 484dedf commit 75aff85
Show file tree
Hide file tree
Showing 40 changed files with 154 additions and 204 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/fast_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
runs-on: ubuntu-20.04
env:
AWS_REGION: us-east-1
EC2_AMI_ID: ami-014161e3e9dd7b42f
EC2_AMI_ID: ami-0a82d7d7ad5d25f56
EC2_INSTANCE_TYPE: dl1.24xlarge
EC2_SUBNET_ID: subnet-b7533b96
EC2_SECURITY_GROUP: sg-08af7938042271373
Expand Down Expand Up @@ -77,7 +77,7 @@ jobs:
ref: ${{ github.event.pull_request.merge_commit_sha }}
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
- name: Run tests
run: |
docker run \
Expand All @@ -89,7 +89,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
/bin/bash tests/ci/fast_tests.sh
diffusers:
name: Run tests for optimum.habana.diffusers
Expand All @@ -113,7 +113,7 @@ jobs:
ref: ${{ github.event.pull_request.merge_commit_sha }}
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
- name: Run tests
run: |
docker run \
Expand All @@ -125,7 +125,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
/bin/bash tests/ci/fast_tests_diffusers.sh
stop-runner:
name: Stop self-hosted EC2 runner
Expand Down
30 changes: 15 additions & 15 deletions .github/workflows/slow_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
runs-on: ubuntu-20.04
env:
AWS_REGION: us-west-2
EC2_AMI_ID: ami-095f659c5aa683a3a
EC2_AMI_ID: ami-01b277257cd28a061
EC2_INSTANCE_TYPE: dl1.24xlarge
EC2_SUBNET_ID: subnet-452c913d
EC2_SECURITY_GROUP: sg-0894f4f70dd6bd778
Expand Down Expand Up @@ -55,7 +55,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
- name: Run tests
run: |
docker run \
Expand All @@ -67,7 +67,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
/bin/bash tests/ci/example_diff_tests.sh
stable-diffusion:
name: Test Stable Diffusion
Expand All @@ -83,7 +83,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
- name: Run tests
run: |
docker run \
Expand All @@ -95,7 +95,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
/bin/bash tests/ci/slow_tests_diffusers.sh
deepspeed:
name: Test DeepSpeed models
Expand All @@ -112,7 +112,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
- name: Run tests
run: |
docker run \
Expand All @@ -124,7 +124,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
/bin/bash tests/ci/slow_tests_deepspeed.sh
multi-card:
name: Test multi-card models
Expand All @@ -141,7 +141,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
- name: Run tests
run: |
docker run \
Expand All @@ -153,7 +153,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
/bin/bash tests/ci/slow_tests_8x.sh
single-card:
name: Test single-card models
Expand All @@ -171,7 +171,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
- name: Run tests
run: |
docker run \
Expand All @@ -183,7 +183,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
/bin/bash tests/ci/slow_tests_1x.sh
albert-xxl-single-card:
name: Test single-card ALBERT XXL
Expand All @@ -204,7 +204,7 @@ jobs:
- name: Pull image
if: github.event.schedule == '0 21 * * 6'
run: |
docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
- name: Run test
if: github.event.schedule == '0 21 * * 6'
run: |
Expand All @@ -217,7 +217,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
/bin/bash tests/ci/albert_xxl_1x.sh
- name: Warning
if: github.event.schedule != '0 21 * * 6'
Expand All @@ -240,7 +240,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
- name: Run tests
run: |
docker run \
Expand All @@ -252,7 +252,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
make slow_tests_text_generation_example TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
stop-runner:
name: Stop self-hosted EC2 runner
Expand Down
20 changes: 10 additions & 10 deletions .github/workflows/slow_tests_gaudi2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
- name: Run tests
run: |
docker run \
Expand All @@ -30,7 +30,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
/bin/bash tests/ci/slow_tests_diffusers.sh
deepspeed:
name: Test DeepSpeed models
Expand All @@ -43,7 +43,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
- name: Run tests
run: |
docker run \
Expand All @@ -56,7 +56,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
/bin/bash tests/ci/slow_tests_deepspeed.sh
multi-card:
name: Test multi-card models
Expand All @@ -69,7 +69,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
- name: Run tests
run: |
docker run \
Expand All @@ -82,7 +82,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
/bin/bash tests/ci/slow_tests_8x.sh
single-card:
name: Test single-card models
Expand All @@ -96,7 +96,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
- name: Run tests
run: |
docker run \
Expand All @@ -110,7 +110,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
/bin/bash tests/ci/slow_tests_1x.sh
text-generation:
name: Test text-generation example
Expand All @@ -125,7 +125,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
docker pull vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest
- name: Run tests
run: |
docker run \
Expand All @@ -138,5 +138,5 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest \
make slow_tests_text_generation_example TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ slow_tests_8x: test_installs

# Run DeepSpeed non-regression tests
slow_tests_deepspeed: test_installs
python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.12.0
python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0
python -m pytest tests/test_examples.py -v -s -k "deepspeed"

slow_tests_diffusers: test_installs
Expand All @@ -58,7 +58,7 @@ slow_tests_diffusers: test_installs

# Run text-generation non-regression tests
slow_tests_text_generation_example: test_installs
python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.12.0
python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0
python -m pytest tests/test_text_generation_example.py tests/test_encoder_decoder_text_summarization.py -v -s --token $(TOKEN)

# Check if examples are up to date with the Transformers library
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ The `--upgrade-strategy eager` option is needed to ensure `optimum-habana` is up

> To use DeepSpeed on HPUs, you also need to run the following command:
>```bash
>pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.12.0
>pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0
>```
Optimum Habana is a fast-moving project, and you may want to install it from source:
Expand Down
2 changes: 1 addition & 1 deletion docs/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
FROM vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest

ARG commit_sha
ARG clone_url
Expand Down
2 changes: 1 addition & 1 deletion docs/source/installation.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ python -m pip install --upgrade-strategy eager optimum[habana]
To use DeepSpeed on HPUs, you also need to run the following command:

```bash
python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.12.0
python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0
```

4 changes: 2 additions & 2 deletions docs/source/usage_guides/deepspeed.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ You can find more information about DeepSpeed Gaudi integration [here](https://d
To use DeepSpeed on Gaudi, you need to install Optimum Habana and [Habana's DeepSpeed fork](https://github.com/HabanaAI/DeepSpeed) with:
```bash
pip install optimum[habana]
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.12.0
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0
```


Expand Down Expand Up @@ -78,7 +78,7 @@ It is strongly advised to read [this section](https://huggingface.co/docs/transf

</Tip>

Other examples of configurations for HPUs are proposed [here](https://github.com/HabanaAI/Model-References/tree/1.12.0/PyTorch/nlp/DeepSpeedExamples/deepspeed-bert/scripts) by Habana.
Other examples of configurations for HPUs are proposed [here](https://github.com/HabanaAI/Model-References/tree/1.13.0/PyTorch/nlp/DeepSpeedExamples/deepspeed-bert/scripts) by Habana.

The [Transformers documentation](https://huggingface.co/docs/transformers/main_classes/deepspeed#configuration) explains how to write a configuration from scratch very well.
A more complete description of all configuration possibilities is available [here](https://www.deepspeed.ai/docs/config-json/).
Expand Down
2 changes: 1 addition & 1 deletion examples/audio-classification/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ On 8 HPUs, this script should run in ~12 minutes and yield an accuracy of **80.4

> You need to install DeepSpeed with:
> ```bash
> pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.12.0
> pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0
> ```
DeepSpeed can be used with almost the same command as for a multi-card run:
Expand Down
2 changes: 1 addition & 1 deletion examples/gaudi_spawn.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def main():
if not is_deepspeed_available():
raise ImportError(
"--use_deepspeed requires deepspeed: `pip install"
" git+https://github.com/HabanaAI/DeepSpeed.git@1.12.0`."
" git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0`."
)

# Patch sys.argv
Expand Down
4 changes: 2 additions & 2 deletions examples/multi-node-training/EFA/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
FROM vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest

# Installs pdsh and upgrade pip
RUN apt-get update && apt-get install -y pdsh && \
Expand All @@ -18,7 +18,7 @@ RUN sed -i 's/#Port 22/Port 3022/g' /etc/ssh/sshd_config && \

# Installs Optimum Habana and Habana's fork of DeepSpeed
RUN pip install optimum[habana] && \
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.12.0
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0

CMD ssh-keygen -t rsa -b 4096 -N '' -f ~/.ssh/id_rsa && \
chmod 600 ~/.ssh/id_rsa && \
Expand Down
4 changes: 2 additions & 2 deletions examples/multi-node-training/GaudiNIC/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:1.12.0-480
FROM vault.habana.ai/gaudi-docker/1.13.0/ubuntu20.04/habanalabs/pytorch-installer-2.1.0:latest

# Installs pdsh and upgrade pip
RUN apt-get update && apt-get install -y pdsh && \
Expand All @@ -12,7 +12,7 @@ RUN sed -i 's/#Port 22/Port 3022/g' /etc/ssh/sshd_config && \

# Installs Optimum Habana and Habana's fork of DeepSpeed
RUN pip install optimum[habana] && \
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.12.0
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0

CMD ssh-keygen -t rsa -b 4096 -N '' -f ~/.ssh/id_rsa && \
chmod 600 ~/.ssh/id_rsa && \
Expand Down
2 changes: 1 addition & 1 deletion examples/speech-recognition/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ On 8 HPUs, this script should run in *ca.* 49 minutes and yield a CTC loss of **

> You need to install DeepSpeed with:
> ```bash
> pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.12.0
> pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0
> ```
DeepSpeed can be used with almost the same command as for a multi-card run:
Expand Down
2 changes: 1 addition & 1 deletion examples/text-generation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ pip install -r requirements.txt

Then, if you plan to use [DeepSpeed-inference](https://docs.habana.ai/en/latest/PyTorch/DeepSpeed/Inference_Using_DeepSpeed.html) (e.g. to use BLOOM/BLOOMZ), you should install DeepSpeed as follows:
```bash
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.12.0
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.13.0
```


Expand Down
Loading

0 comments on commit 75aff85

Please sign in to comment.