Skip to content

Commit

Permalink
Merge pull request #219 from ROCm/ci-upstream-sync-104_1
Browse files Browse the repository at this point in the history
CI: 02/03/25 upstream sync
  • Loading branch information
github-actions[bot] authored Feb 3, 2025
2 parents bc2b5d0 + ec2c305 commit 0962b96
Show file tree
Hide file tree
Showing 118 changed files with 4,869 additions and 1,429 deletions.
14 changes: 6 additions & 8 deletions .github/workflows/bazel_cpu_rbe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,19 @@ concurrency:
jobs:
run_tests:
if: github.event.repository.fork == false
strategy:
matrix:
runner: ["linux-x86-n2-16", "linux-arm64-c4a-16"]
enable-x_64: [1, 0]

runs-on: ${{ matrix.runner }}
container: ${{ (contains(matrix.runner, 'linux-x86') && 'us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest') ||
(contains(matrix.runner, 'linux-arm64') && 'us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build-arm64:latest') }}

env:
JAXCI_HERMETIC_PYTHON_VERSION: "3.12"
JAXCI_ENABLE_X64: ${{ matrix.enable-x_64 }}

# Begin Presubmit Naming Check - name modification requires internal check to be updated
strategy:
matrix:
runner: ["linux-x86-n2-16", "linux-arm64-c4a-16"]
enable-x_64: [1, 0]
name: "Bazel CPU tests (${{ matrix.runner }}, Python 3.12, x64=${{ matrix.enable-x_64 }})"

# End Presubmit Naming Check github-cpu-presubmits
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Wait For Connection
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/bazel_cuda_non_rbe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ on:
gcs_download_uri:
description: "GCS location URI from where the artifacts should be downloaded"
required: true
default: 'gs://general-ml-ci-transient/jax-github-actions/jax-fork/${{ github.workflow }}/${{ github.run_number }}/${{ github.run_attempt }}'
default: 'gs://general-ml-ci-transient/jax-github-actions/jax/${{ github.workflow }}/${{ github.run_number }}/${{ github.run_attempt }}'
type: string
halt-for-connection:
description: 'Should this workflow run wait for a remote connection?'
Expand Down
14 changes: 6 additions & 8 deletions .github/workflows/bazel_cuda_rbe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,18 @@ concurrency:
jobs:
run_tests:
if: github.event.repository.fork == false
strategy:
matrix:
runner: ["linux-x86-n2-16"]
enable-x_64: [1, 0]

runs-on: ${{ matrix.runner }}
container: 'us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest'

env:
JAXCI_HERMETIC_PYTHON_VERSION: "3.12"
JAXCI_ENABLE_X64: ${{ matrix.enable-x_64 }}

# Begin Presubmit Naming Check - name modification requires internal check to be updated
strategy:
matrix:
runner: ["linux-x86-n2-16"]
enable-x_64: [1, 0]
name: "Bazel single accelerator CUDA tests (${{ matrix.runner }}, Python 3.12, x64=${{ matrix.enable-x_64 }})"

# End Presubmit Naming Check github-cuda-presubmits
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Wait For Connection
Expand Down
7 changes: 3 additions & 4 deletions .github/workflows/cloud-tpu-ci-presubmit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,16 @@ concurrency:
jobs:
cloud-tpu-test:
if: github.event.repository.fork == false
# Begin Presubmit Naming Check - name modification requires internal check to be updated
strategy:
fail-fast: false # don't cancel all jobs on failure
matrix:
tpu: [
{type: "v5e-8", cores: "8", runner: "linux-x86-ct5lp-224-8tpu"}
]
python-version: ["3.10"]

name: "TPU test (jaxlib=head, ${{ matrix.tpu.type }})"

# End Presubmit Naming Check github-tpu-presubmits
env:
JAXCI_PYTHON: python${{ matrix.python-version }}
JAXCI_TPU_CORES: ${{ matrix.tpu.cores }}
Expand All @@ -52,7 +52,6 @@ jobs:
defaults:
run:
shell: bash -ex {0}

steps:
# https://opensource.google/documentation/reference/github/services#actions
# mandates using a specific commit for non-Google actions. We use
Expand Down Expand Up @@ -90,4 +89,4 @@ jobs:
with:
halt-dispatch-input: ${{ inputs.halt-for-connection }}
- name: Install jaxlib wheel and run tests
run: ./ci/run_pytest_tpu.sh
run: ./ci/run_pytest_tpu.sh
2 changes: 1 addition & 1 deletion .github/workflows/jax-array-api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
with:
repository: data-apis/array-api-tests
# TODO(jakevdp) update this to a stable release/tag when available.
ref: 'f7a74a685d78d98203fa991fc19a5d1fda57c212' # Latest commit as of 2025-01-07
ref: 'd982a6245400295477f5da5afa1c4a2a5e641ea4' # Latest commit as of 2025-01-30
submodules: 'true'
path: 'array-api-tests'
- name: Set up Python ${{ matrix.python-version }}
Expand Down
86 changes: 86 additions & 0 deletions .github/workflows/requirements_lock_3_13_ft.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
diff --git a/build/requirements_lock_3_13_ft.txt b/build/requirements_lock_3_13_ft.txt
index dfefaf042..2700e140e 100644
--- a/build/requirements_lock_3_13_ft.txt
+++ b/build/requirements_lock_3_13_ft.txt
@@ -4,6 +4,12 @@
#
# pip-compile --allow-unsafe --generate-hashes --output-file=build/requirements_lock_3_13_ft.txt build/requirements.in
#
+
+--pre
+--extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple
+numpy
+
+
absl-py==2.1.0 \
--hash=sha256:526a04eadab8b4ee719ce68f204172ead1027549089702d99b9059f129ff1308 \
--hash=sha256:7820790efbb316739cde8b4e19357243fc3608a152024288513dd968d7d959ff
@@ -328,68 +334,6 @@ mpmath==1.3.0 \
--hash=sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f \
--hash=sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c
# via -r build/test-requirements.txt
-numpy==2.2.1 ; python_version >= "3.13" \
- --hash=sha256:059e6a747ae84fce488c3ee397cee7e5f905fd1bda5fb18c66bc41807ff119b2 \
- --hash=sha256:08ef779aed40dbc52729d6ffe7dd51df85796a702afbf68a4f4e41fafdc8bda5 \
- --hash=sha256:164a829b6aacf79ca47ba4814b130c4020b202522a93d7bff2202bfb33b61c60 \
- --hash=sha256:26c9c4382b19fcfbbed3238a14abf7ff223890ea1936b8890f058e7ba35e8d71 \
- --hash=sha256:27f5cdf9f493b35f7e41e8368e7d7b4bbafaf9660cba53fb21d2cd174ec09631 \
- --hash=sha256:31b89fa67a8042e96715c68e071a1200c4e172f93b0fbe01a14c0ff3ff820fc8 \
- --hash=sha256:32cb94448be47c500d2c7a95f93e2f21a01f1fd05dd2beea1ccd049bb6001cd2 \
- --hash=sha256:360137f8fb1b753c5cde3ac388597ad680eccbbbb3865ab65efea062c4a1fd16 \
- --hash=sha256:3683a8d166f2692664262fd4900f207791d005fb088d7fdb973cc8d663626faa \
- --hash=sha256:38efc1e56b73cc9b182fe55e56e63b044dd26a72128fd2fbd502f75555d92591 \
- --hash=sha256:3d03883435a19794e41f147612a77a8f56d4e52822337844fff3d4040a142964 \
- --hash=sha256:3ecc47cd7f6ea0336042be87d9e7da378e5c7e9b3c8ad0f7c966f714fc10d821 \
- --hash=sha256:40f9e544c1c56ba8f1cf7686a8c9b5bb249e665d40d626a23899ba6d5d9e1484 \
- --hash=sha256:4250888bcb96617e00bfa28ac24850a83c9f3a16db471eca2ee1f1714df0f957 \
- --hash=sha256:4511d9e6071452b944207c8ce46ad2f897307910b402ea5fa975da32e0102800 \
- --hash=sha256:45681fd7128c8ad1c379f0ca0776a8b0c6583d2f69889ddac01559dfe4390918 \
- --hash=sha256:48fd472630715e1c1c89bf1feab55c29098cb403cc184b4859f9c86d4fcb6a95 \
- --hash=sha256:4c86e2a209199ead7ee0af65e1d9992d1dce7e1f63c4b9a616500f93820658d0 \
- --hash=sha256:4dfda918a13cc4f81e9118dea249e192ab167a0bb1966272d5503e39234d694e \
- --hash=sha256:5062dc1a4e32a10dc2b8b13cedd58988261416e811c1dc4dbdea4f57eea61b0d \
- --hash=sha256:51faf345324db860b515d3f364eaa93d0e0551a88d6218a7d61286554d190d73 \
- --hash=sha256:526fc406ab991a340744aad7e25251dd47a6720a685fa3331e5c59fef5282a59 \
- --hash=sha256:53c09385ff0b72ba79d8715683c1168c12e0b6e84fb0372e97553d1ea91efe51 \
- --hash=sha256:55ba24ebe208344aa7a00e4482f65742969a039c2acfcb910bc6fcd776eb4355 \
- --hash=sha256:5b6c390bfaef8c45a260554888966618328d30e72173697e5cabe6b285fb2348 \
- --hash=sha256:5c5cc0cbabe9452038ed984d05ac87910f89370b9242371bd9079cb4af61811e \
- --hash=sha256:5edb4e4caf751c1518e6a26a83501fda79bff41cc59dac48d70e6d65d4ec4440 \
- --hash=sha256:61048b4a49b1c93fe13426e04e04fdf5a03f456616f6e98c7576144677598675 \
- --hash=sha256:676f4eebf6b2d430300f1f4f4c2461685f8269f94c89698d832cdf9277f30b84 \
- --hash=sha256:67d4cda6fa6ffa073b08c8372aa5fa767ceb10c9a0587c707505a6d426f4e046 \
- --hash=sha256:694f9e921a0c8f252980e85bce61ebbd07ed2b7d4fa72d0e4246f2f8aa6642ab \
- --hash=sha256:733585f9f4b62e9b3528dd1070ec4f52b8acf64215b60a845fa13ebd73cd0712 \
- --hash=sha256:7671dc19c7019103ca44e8d94917eba8534c76133523ca8406822efdd19c9308 \
- --hash=sha256:780077d95eafc2ccc3ced969db22377b3864e5b9a0ea5eb347cc93b3ea900315 \
- --hash=sha256:7ba9cc93a91d86365a5d270dee221fdc04fb68d7478e6bf6af650de78a8339e3 \
- --hash=sha256:89b16a18e7bba224ce5114db863e7029803c179979e1af6ad6a6b11f70545008 \
- --hash=sha256:9036d6365d13b6cbe8f27a0eaf73ddcc070cae584e5ff94bb45e3e9d729feab5 \
- --hash=sha256:93cf4e045bae74c90ca833cba583c14b62cb4ba2cba0abd2b141ab52548247e2 \
- --hash=sha256:9ad014faa93dbb52c80d8f4d3dcf855865c876c9660cb9bd7553843dd03a4b1e \
- --hash=sha256:9b1d07b53b78bf84a96898c1bc139ad7f10fda7423f5fd158fd0f47ec5e01ac7 \
- --hash=sha256:a7746f235c47abc72b102d3bce9977714c2444bdfaea7888d241b4c4bb6a78bf \
- --hash=sha256:aa3017c40d513ccac9621a2364f939d39e550c542eb2a894b4c8da92b38896ab \
- --hash=sha256:b34d87e8a3090ea626003f87f9392b3929a7bbf4104a05b6667348b6bd4bf1cd \
- --hash=sha256:b541032178a718c165a49638d28272b771053f628382d5e9d1c93df23ff58dbf \
- --hash=sha256:ba5511d8f31c033a5fcbda22dd5c813630af98c70b2661f2d2c654ae3cdfcfc8 \
- --hash=sha256:bc8a37ad5b22c08e2dbd27df2b3ef7e5c0864235805b1e718a235bcb200cf1cb \
- --hash=sha256:bff7d8ec20f5f42607599f9994770fa65d76edca264a87b5e4ea5629bce12268 \
- --hash=sha256:c1ad395cf254c4fbb5b2132fee391f361a6e8c1adbd28f2cd8e79308a615fe9d \
- --hash=sha256:f1d09e520217618e76396377c81fba6f290d5f926f50c35f3a5f72b01a0da780 \
- --hash=sha256:f3eac17d9ec51be534685ba877b6ab5edc3ab7ec95c8f163e5d7b39859524716 \
- --hash=sha256:f419290bc8968a46c4933158c91a0012b7a99bb2e465d5ef5293879742f8797e \
- --hash=sha256:f62aa6ee4eb43b024b0e5a01cf65a0bb078ef8c395e8713c6e8a12a697144528 \
- --hash=sha256:f74e6fdeb9a265624ec3a3918430205dff1df7e95a230779746a6af78bc615af \
- --hash=sha256:f9b57eaa3b0cd8db52049ed0330747b0364e899e8a606a624813452b8203d5f7 \
- --hash=sha256:fce4f615f8ca31b2e61aa0eb5865a21e14f5629515c9151850aa936c02a1ee51
- # via
- # -r build/requirements.in
- # contourpy
- # matplotlib
- # ml-dtypes
- # scipy
opt-einsum==3.4.0 \
--hash=sha256:69bb92469f86a1565195ece4ac0323943e83477171b91d24c35afe028a90d7cd \
--hash=sha256:96ca72f1b886d148241348783498194c577fa30a8faac108586b14f1ba4473ac
66 changes: 66 additions & 0 deletions .github/workflows/tsan-suppressions.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# false-positive caused because we haven't tsan-instrumented libgcc_s. Multiple threads
# are racing on a call to __register_frame_info(), but that function appears to be correctly locked internally.
race:llvm::RuntimeDyldELF::registerEHFrames

# https://github.com/python/cpython/issues/128050
race:partial_vectorcall_fallback

# https://github.com/python/cpython/issues/128100
race:ensure_nonmanaged_dict

# https://github.com/openxla/xla/issues/20686
race:dnnl_sgemm

# https://github.com/python/cpython/issues/128130
race_top:run_eval_code_obj

# Likely only happens when the process is crashing.
race:dump_traceback

# https://github.com/python/cpython/issues/128137
# Fixed in Python 3.14, but not backported to 3.13.
race:immortalize_interned

# https://github.com/python/cpython/issues/128144
# Fixed in Python 3.14, but not backported to 3.13.
race_top:PyMember_GetOne

# https://github.com/python/cpython/issues/128133
# Fixed in Python 3.14, but not backported to 3.13.
race:bytes_hash

# https://github.com/python/cpython/issues/128657
race:py_digest_by_name

# https://github.com/python/cpython/issues/128714
race:func_get_annotations
race:type_get_annotations

# https://github.com/python/cpython/issues/129533
race:PyGC_Disable
race:PyGC_Enable

# Races because the LAPACK and BLAS in our scipy isn't TSAN instrumented.
race:heevd_ffi
race:gesdd_ffi
race:dscal_k_
race:scal_k_
race:gemm_beta
race:gemm_oncopy


# Races that are probably fixed
# https://github.com/numpy/numpy/issues/28041
# race:get_initial_from_ufunc
#
# https://github.com/numpy/numpy/issues/28042
# race:PyArray_UpdateFlags
#
# https://github.com/python/cpython/issues/128717
# race:Py_SetRecursionLimit
#
# https://github.com/numpy/numpy/issues/28143
# race:PyArray_CheckFromAny_int

# https://github.com/python/cpython/issues/128759
# race:PyType_Modified
125 changes: 125 additions & 0 deletions .github/workflows/tsan.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
name: CI - Free-threading and Thread Sanitizer (nightly)

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

on:
schedule:
- cron: "0 12 * * *" # Daily at 12:00 UTC
workflow_dispatch: # allows triggering the workflow run manually
pull_request: # Automatically trigger on pull requests affecting this file
branches:
- main
paths:
- '**/workflows/tsan.yaml'

jobs:
tsan:
runs-on: linux-x86-n2-64
container:
image: index.docker.io/library/ubuntu@sha256:b359f1067efa76f37863778f7b6d0e8d911e3ee8efa807ad01fbf5dc1ef9006b # ratchet:ubuntu:24.04
strategy:
fail-fast: false
defaults:
run:
shell: bash -l {0}
steps:
# Install git before actions/checkout as otherwise it will download the code with the GitHub
# REST API and therefore any subsequent git commands will fail.
- name: Install clang 18
env:
DEBIAN_FRONTEND: noninteractive
run: |
apt update
apt install -y clang-18 libstdc++-14-dev build-essential libssl-dev \
zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl git \
libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev \
libffi-dev liblzma-dev
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
path: jax
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
repository: python/cpython
path: cpython
ref: "3.13"

- name: Restore cached CPython with TSAN
id: cache-cpython-tsan-restore
uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
with:
path: |
./python-tsan.tgz
key: ${{ runner.os }}-cpython-tsan-${{ hashFiles('cpython/configure.ac') }}

- name: Build CPython with TSAN enabled
if: steps.cache-cpython-tsan-restore.outputs.cache-hit != 'true'
run: |
cd cpython
mkdir ${GITHUB_WORKSPACE}/cpython-tsan
CC=clang-18 CXX=clang++-18 ./configure --prefix ${GITHUB_WORKSPACE}/cpython-tsan --disable-gil --with-thread-sanitizer
make -j64
make install -j64
# Check whether free-threading mode is enabled
PYTHON_GIL=0 ${GITHUB_WORKSPACE}/cpython-tsan/bin/python3 -c "import sys; assert not sys._is_gil_enabled()"
# Create archive to be used with bazel as hermetic python:
cd ${GITHUB_WORKSPACE} && tar -czpf python-tsan.tgz cpython-tsan
- name: Save CPython with TSAN
id: cache-cpython-tsan-save
if: steps.cache-cpython-tsan-restore.outputs.cache-hit != 'true'
uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
with:
path: |
./python-tsan.tgz
key: ${{ runner.os }}-cpython-tsan-${{ hashFiles('cpython/configure.ac') }}

- name: Build Jax and run tests
timeout-minutes: 120
env:
JAX_NUM_GENERATED_CASES: 1
JAX_ENABLE_X64: true
JAX_SKIP_SLOW_TESTS: true
PY_COLORS: 1
run: |
cd jax
export PYTHON_SHA256=($(sha256sum ${GITHUB_WORKSPACE}/python-tsan.tgz))
echo "Python sha256: ${PYTHON_SHA256}"
python3 -VV
python3 build/build.py build --configure_only \
--python_version=3.13-ft \
--bazel_options=--repo_env=HERMETIC_PYTHON_URL="file://${GITHUB_WORKSPACE}/python-tsan.tgz" \
--bazel_options=--repo_env=HERMETIC_PYTHON_SHA256=${PYTHON_SHA256} \
--bazel_options=--repo_env=HERMETIC_PYTHON_PREFIX="cpython-tsan/" \
--bazel_options=--color=yes \
--bazel_options=--copt=-fsanitize=thread \
--bazel_options=--linkopt="-fsanitize=thread" \
--bazel_options=--copt=-g \
--clang_path=/usr/bin/clang-18
# Apply a patch to numpy in requirements lock 3.13 ft to use the nightly version
git apply .github/workflows/requirements_lock_3_13_ft.patch
echo "JAX_NUM_GENERATED_CASES=$JAX_NUM_GENERATED_CASES"
echo "JAX_ENABLE_X64=$JAX_ENABLE_X64"
echo "JAX_SKIP_SLOW_TESTS=$JAX_SKIP_SLOW_TESTS"
# Set symlink to the bazel executable
bazel_exec=($(ls bazel-*))
ln -s ${bazel_exec} bazel
./bazel test \
--test_env=JAX_NUM_GENERATED_CASES=$JAX_NUM_GENERATED_CASES \
--test_env=JAX_ENABLE_X64=$JAX_ENABLE_X64 \
--test_env=JAX_SKIP_SLOW_TESTS=$JAX_SKIP_SLOW_TESTS \
--test_env=PYTHON_GIL=0 \
--test_env=TSAN_OPTIONS=halt_on_error=1,suppressions=$PWD/.github/workflows/tsan-suppressions.txt \
--test_env=JAX_TEST_NUM_THREADS=8 \
--test_output=errors \
--local_test_jobs=32 \
--test_timeout=600 \
//tests:cpu_tests
Loading

0 comments on commit 0962b96

Please sign in to comment.