Skip to content

Optimize memory when loading checkpoint #698

Optimize memory when loading checkpoint

Optimize memory when loading checkpoint #698

name: Run FA2-related unit tests
on:
workflow_dispatch:
push:
branches: [ main ]
# Only run tests if we modify the following files
paths:
- "src/**/*.py"
- "examples/**/*.py"
- "tests/**/*.py"
pull_request:
branches: [ '**' ]
paths:
- "src/**/*.py"
- "examples/**/*.py"
- "tests/**/*.py"
jobs:
tests:
runs-on:
group: aws-g5-4xlarge-plus
container:
image: runpod/pytorch:2.1.1-py3.10-cuda12.1.1-devel-ubuntu22.04
ports:
- 80
options: --gpus all --shm-size "8G"
steps:
- uses: actions/checkout@v3
- name: Python environment
run: |
which python
python --version
- name: Check Pytorch version
run: |
nvidia-smi
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
- name: Install nanotron
run: |
python -m pip install --upgrade pip
pip install packaging
pip install wheel
pip install "flash-attn>=2.5.0" --no-build-isolation
pip install -e .
pip install -e .[dev]
pip install -e .[test]
- name: Show installed libraries and their versions
run: pip freeze | tee installed.txt
- name: Run tests
# NOTE: -m fa2 will only run the unit tests that have the mark
# "fa2" (these are FA2-related tests)
run: pytest -m fa2 --color=yes --durations=0 --ignore tests/fp8 --ignore tests/nanoset --verbose tests/