Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into eval_harness
Browse files Browse the repository at this point in the history
  • Loading branch information
dlwh committed Nov 15, 2024
2 parents f9ccebb + f8ab21a commit 3040956
Show file tree
Hide file tree
Showing 168 changed files with 14,355 additions and 5,898 deletions.
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

scratch
cache
new-cache
wandb
checkpoints

Expand Down Expand Up @@ -116,3 +117,4 @@ dmypy.json

# local execution commands
local_*.sh
.aider*
47 changes: 47 additions & 0 deletions .github/workflows/docker-base-image.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: Build and Push Docker TPU Images

on:
workflow_run:
workflows: ["Run Tests"]
types:
- completed
branches: [main]
workflow_dispatch:

jobs:
build:
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2

- name: Cache Docker layers
uses: actions/cache@v3
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-
- name: Get current date
id: date
run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV

- name: Login to GitHub Container Registry
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.DOCKER_PUSH_TOKEN }}

- name: Build and Push Docker image
run: |
docker buildx build --file docker/tpu/Dockerfile.base --tag ghcr.io/${{ github.repository_owner }}/levanter-base:latest --tag ghcr.io/${{ github.repository_owner }}/levanter-base:${{ env.DATE }} --push .
- name: Build and Push Incremental Docker image
run: |
docker buildx build --file docker/tpu/Dockerfile.incremental --tag ghcr.io/${{ github.repository_owner }}/levanter-tpu:latest --tag ghcr.io/${{ github.repository_owner }}/levanter-tpu:${{ env.DATE }} --push .
72 changes: 72 additions & 0 deletions .github/workflows/launch_small_fast.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
name: Launch Llama 2 Small Fast

on:
workflow_run:
workflows: ["Build and Push Docker TPU Images"]
types:
- completed
branches: [main, "experiment/*"]
# pull_request:
workflow_dispatch:

jobs:
test:
if: (github.event.pull_request.head.repo.full_name == github.repository)
runs-on: ubuntu-latest
env:
TPU_ZONE: "us-central2-b"
TPU_TYPE: "v4-32"

steps:
- name: Checkout code
uses: actions/checkout@v2

- name: Set up Google Cloud SDK
uses: google-github-actions/setup-gcloud@v1
with:
project_id: ${{ secrets.GCP_PROJECT_ID }}

- name: Authenticate to Google Cloud
uses: google-github-actions/auth@v1
with:
credentials_json: ${{ secrets.GCP_SA_KEY }}

- name: Configure Google Cloud
run: |
gcloud config set project ${{ secrets.GCP_PROJECT_ID }}
REGION=${TPU_ZONE%-*}
echo "$REGION"
gcloud auth configure-docker $REGION-docker.pkg.dev
- name: Install locally
run: |
python -m pip install --upgrade pip
pip install -e .[test] "jax[cpu]==0.4.30"
- name: Launch Small Fast TPU Train LM job
run: |
export TPU_NAME=small-fast-${{ github.run_id }}
export WANDB_API_KEY=${{ secrets.WANDB_API_KEY }}
export RUN_ID=small_fast_${{ github.run_id }}
export HF_TOKEN=${{ secrets.HF_TOKEN }}
cat > .config <<EOF
env:
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
WANDB_ENTITY: stanford-mercury
WANDB_PROJECT: levanter
HF_TOKEN: ${{ secrets.HF_TOKEN }}
EOF
python infra/launch.py -e CI 1 --foreground --tpu_name ${TPU_NAME} --run_id $RUN_ID --zone ${TPU_ZONE} --tpu_type ${TPU_TYPE} --preemptible -- \
python -m levanter.main.train_lm \
--config_path config/llama_small_fast.yaml \
--trainer.checkpointer.base_path gs://levanter-checkpoints/llama-itest/ \
--trainer.checkpointer.save_interval 10m
--trainer.num_train_steps 10000
- name: Cleanup
if: ${{ always() }}
run: |
export TPU_NAME=small-fast-${{ github.run_id }}
gcloud compute tpus queued-resources delete $TPU_NAME --zone ${TPU_ZONE} --quiet --force
67 changes: 67 additions & 0 deletions .github/workflows/publish_dev.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
name: Publish Dev Build

on:
workflow_run:
workflows: ["Run Tests"]
types:
- completed
branches: [main]
workflow_dispatch:

jobs:
build-package:
runs-on: ubuntu-latest
if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success'}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'

- name: Calculate Version and Build Number
run: |
PROJECT_VERSION=$(sed -n 's/__version__ = "\(.*\)"/\1/p' src/levanter/__init__.py)
BUILD_NUMBER=$(git rev-list --count HEAD)
FULL_VERSION="${PROJECT_VERSION}.dev${BUILD_NUMBER}"
echo "FULL_VERSION=${FULL_VERSION}" >> $GITHUB_ENV
echo "Calculated version with build number: $FULL_VERSION"
- name: Update pyproject.toml version
run: |
# replace the version in pyproject.toml
sed -i "s/version = \".*\"/version = \"$FULL_VERSION\"/g" pyproject.toml
- name: Build package
run: |
python -m pip install --upgrade pip
pip install build
python -m build
- name: Upload package
uses: actions/upload-artifact@v4
with:
name: package
path: dist/


# cf https://test.pypi.org/manage/project/levanter/settings/publishing/
publish-dev:
runs-on: ubuntu-latest
needs:
- build-package
permissions:
id-token: write
steps:
- name: Retrieve release distributions
uses: actions/download-artifact@v4
with:
name: package
path: dist/

- name: Publish release distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1


2 changes: 1 addition & 1 deletion .github/workflows/run_entry_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
pip install . "jax[cpu]==${{ matrix.jax-version }}" "jaxlib==${{ matrix.jax-version }}"
pip install .[test] "jax[cpu]==${{ matrix.jax-version }}" "jaxlib==${{ matrix.jax-version }}"
pip install soundfile librosa
- name: Run entry tests with pytest
run: |
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/run_ray_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
pip install . "jax[cpu]==${{ matrix.jax-version }}" "jaxlib==${{ matrix.jax-version }}"
pip install .[test] "jax[cpu]==${{ matrix.jax-version }}" "jaxlib==${{ matrix.jax-version }}"
pip install soundfile librosa
- name: Run ray tests with pytest
run: |
XLA_FLAGS=--xla_force_host_platform_device_count=8 PYTHONPATH=$(pwd)/tests:$(pwd)/src:$(pwd):. pytest tests -m ray
PYTHONPATH=$(pwd)/tests:$(pwd)/src:$(pwd):. pytest tests -m ray
2 changes: 1 addition & 1 deletion .github/workflows/run_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install . "jax[cpu]==${{ matrix.jax-version }}" "jaxlib==${{ matrix.jax-version }}"
pip install .[test] "jax[cpu]==${{ matrix.jax-version }}" "jaxlib==${{ matrix.jax-version }}"
pip install -r ./tests/requirements.txt
- name: Test with pytest
run: |
Expand Down
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -150,5 +150,9 @@ ledger.json
/checkpoints
*.jaxpr

# local execution commands
local_*.sh

# aider
.aider*

.benchmarks
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@ repos:
hooks:
- id: mypy
args: [--ignore-missing-imports]
additional_dependencies: [wandb, types-PyYAML]
additional_dependencies: [wandb==0.17.8, types-PyYAML]
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,8 @@ Please see the [CUDA Getting Started](docs/Getting-Started-GPU.md) guide for mor

## Contributing

[![GitHub repo Good Issues for newbies](https://img.shields.io/github/issues/stanford-crfm/levanter/good%20first%20issue?style=flat&logo=github&logoColor=green&label=Good%20First%20issues)](https://github.com/stanford-crfm/levanter/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22) [![GitHub Help Wanted issues](https://img.shields.io/github/issues/stanford-crfm/levanter/help%20wanted?style=flat&logo=github&logoColor=b545d1&label=%22Help%20Wanted%22%20issues)](https://github.com/stanford-crfm/levanter/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22) [![GitHub Help Wanted PRs](https://img.shields.io/github/issues-pr/stanford-crfm/levanter/help%20wanted?style=flat&logo=github&logoColor=b545d1&label=%22Help%20Wanted%22%20PRs)](https://github.com/stanford-crfm/levanter/pulls?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22) [![GitHub repo Issues](https://img.shields.io/github/issues/stanford-crfm/levanter?style=flat&logo=github&logoColor=red&label=Issues)](https://github.com/stanford-crfm/levanter/issues?q=is%3Aopen)

We welcome contributions! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for more information.

## License
Expand Down
78 changes: 78 additions & 0 deletions config/data/dclm_gpt_neo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
cache_dir: "gs://marin-us-central2/tokenized/gpt_neox/"
tokenizer: "EleutherAI/gpt-neox-20b"
cache_options:
batch_size: 256
num_shard_groups: 1024
stop_strategy: restart
shuffle: 100000
configs:
"dclm":
train_urls:
- gs://marin-us-central2/raw/dclm/v2024-07-09-baseline-dedup/**/*.zstd
# these are just for eval
"paloma/4chan":
validation_urls:
- gs://levanter-data/paloma/4chan_meta_sep/val/val*.jsonl.gz
"paloma/c4_100_domains":
validation_urls:
- gs://levanter-data/paloma/c4_100_domains/val/val*.jsonl.gz
"paloma/c4_en":
validation_urls:
- gs://levanter-data/paloma/c4_en/val/val*.jsonl.gz
"paloma/dolma-v1_5":
validation_urls:
- gs://levanter-data/paloma/dolma-v1_5/val/val*.jsonl.gz
"paloma/dolma_100_programing_languages":
validation_urls:
- gs://levanter-data/paloma/dolma_100_programing_languages/val/val*.jsonl.gz
"paloma/dolma_100_subreddits":
validation_urls:
- gs://levanter-data/paloma/dolma_100_subreddits/val/val*.jsonl.gz
"paloma/falcon-refinedweb":
validation_urls:
- gs://levanter-data/paloma/falcon-refinedweb/val/val*.jsonl.gz
"paloma/gab":
validation_urls:
- gs://levanter-data/paloma/gab/val/val*.jsonl.gz
"paloma/m2d2_s2orc_unsplit":
validation_urls:
- gs://levanter-data/paloma/m2d2_s2orc_unsplit/val/val*.jsonl.gz
"paloma/m2d2_wikipedia_unsplit":
validation_urls:
- gs://levanter-data/paloma/m2d2_wikipedia_unsplit/val/val*.jsonl.gz
"paloma/manosphere_meta_sep":
validation_urls:
- gs://levanter-data/paloma/manosphere_meta_sep/val/val*.jsonl.gz
"paloma/mc4":
validation_urls:
- gs://levanter-data/paloma/mc4/val/val*.jsonl.gz
"paloma/ptb":
validation_urls:
- gs://levanter-data/paloma/ptb/val/val*.jsonl.gz
"paloma/redpajama":
validation_urls:
- gs://levanter-data/paloma/redpajama/val/val*.jsonl.gz
"paloma/twitterAAE_HELM_fixed":
validation_urls:
- gs://levanter-data/paloma/twitterAAE_HELM_fixed/val/val*.jsonl.gz
"paloma/wikitext_103":
validation_urls:
- gs://levanter-data/paloma/wikitext_103/val/val*.jsonl.gz
train_weights:
dclm: 1.0
paloma/4chan: 0.0
paloma/c4_100_domains: 0.0
paloma/c4_en: 0.0
paloma/dolma-v1_5: 0.0
paloma/dolma_100_programing_languages: 0.0
paloma/dolma_100_subreddits: 0.0
paloma/falcon-refinedweb: 0.0
paloma/gab: 0.0
paloma/m2d2_s2orc_unsplit: 0.0
paloma/m2d2_wikipedia_unsplit: 0.0
paloma/manosphere_meta_sep: 0.0
paloma/mc4: 0.0
paloma/ptb: 0.0
paloma/redpajama: 0.0
paloma/twitterAAE_HELM_fixed: 0.0
paloma/wikitext_103: 0.0
Loading

0 comments on commit 3040956

Please sign in to comment.