Skip to content

Commit

Permalink
Merge pull request #549 from kroma-network/perf/add-optimization-flags
Browse files Browse the repository at this point in the history
perf: add optimization flags
  • Loading branch information
chokobole authored Oct 18, 2024
2 parents ab62362 + fdbef39 commit 67bf0f9
Show file tree
Hide file tree
Showing 18 changed files with 87 additions and 112 deletions.
21 changes: 20 additions & 1 deletion .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ build --announce_rc
# TODO(chokobole): Remove when `cc_shared_library` is enabled by default
build --experimental_cc_shared_library

# Enable platform specific configurations by default depending on host machine.
common --enable_platform_specific_config

build:macos_x86_64 --config=macos
build:macos_x86_64 --cpu=darwin_x86_64
build:macos_x86_64 --host_cpu=darwin_x86_64
Expand Down Expand Up @@ -52,9 +55,22 @@ build:rocm --repo_env TACHYON_NEED_ROCM=1
# Options extracted from configure script
build:numa --//:has_numa

# Fastbuild config
build:fastbuild -c fastbuild

# Debug config
build:dbg -c dbg

# Opt config
build:opt -c opt
build:opt --//:has_openmp

build:maxopt -c opt
build:maxopt --//:has_openmp
build:maxopt --config=native
build:maxopt --copt=-flto
build:maxopt --linkopt=-flto

# By default, build Tachyon in C++ 17 mode.
build:linux --cxxopt=-std=c++17
build:linux --host_cxxopt=-std=c++17
Expand All @@ -70,11 +86,14 @@ build:windows --host_cxxopt=/std:c++17
build:avx_linux --copt=-mavx
build:avx2_linux --copt=-mavx2
build:avx512_linux --copt=-mavx512f
build:native_arch_linux --copt=-march=native
build:avx_windows --copt=/arch=AVX
build:avx2_windows --copt=/arch=AVX2
build:avx512_windows --copt=/arch=AVX512

# Compile for the native architecture
# WARN(batzor): this will make the build non-portable
build:native --copt=-march=native

# Enable googletest build with absl.
# See https://github.com/google/googletest/blob/v1.13.0/BUILD.bazel#L67C1-L70
build --define absl=1
Expand Down
31 changes: 9 additions & 22 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,6 @@ jobs:
matrix:
os: [ubuntu-latest, macos-latest-xlarge]
build_flag: [fastbuild, opt]
openmp: ["openmp", "no-openmp"]
include:
- os: ubuntu-latest
bazel_config: linux
- os: macos-latest-xlarge
bazel_config: macos_arm64
- openmp: "openmp"
has_openmp: "--//:has_openmp"
exclude:
- build_flag: fastbuild
openmp: "openmp"

runs-on: ${{ matrix.os }}
steps:
Expand Down Expand Up @@ -50,7 +39,7 @@ jobs:
# Avoid downloading Bazel every time.
bazelisk-cache: true
# Share a single build cache between workflows.
disk-cache: ${{ matrix.os }}-${{ matrix.build_flag }}-${{ matrix.openmp }}
disk-cache: ${{ matrix.os }}-${{ matrix.build_flag }}-build
# Share repository cache between workflows.
repository-cache: false
# Cache external repositories
Expand All @@ -65,49 +54,47 @@ jobs:
run: python3 -m pip install numpy

- name: Install OpenMP on linux
if: matrix.os == 'ubuntu-latest' && matrix.openmp == 'openmp'
if: matrix.os == 'ubuntu-latest' && matrix.build_flag == 'opt'
run: sudo apt-get install -y libomp-dev

- name: Install OpenMP on macos
if: matrix.os == 'macos-latest-xlarge' && matrix.openmp == 'openmp'
if: matrix.os == 'macos-latest-xlarge' && matrix.build_flag == 'opt'
run: brew install libomp

- name: Add .bazelrc.user on linux
if: matrix.os == 'ubuntu-latest'
run: echo "build --config linux" > .bazelrc.user &&
echo "build --action_env=CARGO=$HOME/.cargo/bin/cargo" >> .bazelrc.user &&
run: echo "build --action_env=CARGO=$HOME/.cargo/bin/cargo" >> .bazelrc.user &&
echo "build --@rules_rust//rust/toolchain/channel=nightly" >> .bazelrc.user

- name: Add .bazelrc.user on macos
if: matrix.os == 'macos-latest-xlarge'
run: brew install coreutils &&
export PATH="/opt/homebrew/opt/coreutils/libexec/gnubin:$PATH" &&
echo "build --config macos_arm64" > .bazelrc.user &&
echo "build --action_env=CARGO=$HOME/.cargo/bin/cargo" >> .bazelrc.user &&
echo "build --@rules_rust//rust/toolchain/channel=nightly" >> .bazelrc.user

- name: Build
run: bazel build -c ${{ matrix.build_flag }} ${{ matrix.has_openmp }} //...
run: bazel build --config ${{ matrix.build_flag }} //...

- name: Test
# NOTE(chokobole): Test timeouts are overridden 1.5x of the default timeout due to timeout failure on GitHub Actions.
# See https://github.com/kroma-network/tachyon/actions/runs/9581476338/job/26418352737.
run: bazel test -c ${{ matrix.build_flag }} ${{ matrix.has_openmp }} --test_output=errors --test_tag_filters -benchmark,-manual,-cuda //... --test_timeout=90,450,1350,5400
run: bazel test --config ${{ matrix.build_flag }} --test_output=errors --test_tag_filters -benchmark,-manual,-cuda //... --test_timeout=90,450,1350,5400

- name: Test Node Binding
run: |
cd tachyon/node/test
bazel test --config ${{ matrix.bazel_config }} -c ${{ matrix.build_flag}} --test_output=errors //...
bazel test -c ${{ matrix.build_flag}} --test_output=errors //...
- name: Test Py Binding
run: |
cd tachyon/py/test
bazel test --config ${{ matrix.bazel_config }} -c ${{ matrix.build_flag}} --test_output=errors //...
bazel test -c ${{ matrix.build_flag}} --test_output=errors //...
- name: Test Circom
run: |
cd vendors/circom
CARGO_BAZEL_REPIN=true bazel test --config ${{ matrix.bazel_config }} -c ${{ matrix.build_flag}} --test_output=errors //...
CARGO_BAZEL_REPIN=true bazel test --config ${{ matrix.build_flag}} --test_output=errors //...
lint:
runs-on: ubuntu-latest
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,13 @@ brew install gmp libomp
### Build

```shell
bazel build --config {os} //...
bazel build //...
```

### Test

```shell
bazel test --config {os} //...
bazel test //...
```

Check [How To Build](/docs/how_to_use/how_to_build.md) for more information.
1 change: 1 addition & 0 deletions benchmark/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ tachyon_cc_library(
name = "simple_reporter",
srcs = ["simple_reporter.cc"],
hdrs = ["simple_reporter.h"],
force_rtti = if_has_matplotlib(True, False),
local_defines = tachyon_matplotlib_defines(),
visibility = ["//benchmark:__subpackages__"],
deps = [
Expand Down
8 changes: 4 additions & 4 deletions benchmark/fft/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ CPU Caches:
### FFT

```shell
bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/fft:fft_benchmark -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --vendor arkworks --vendor bellman --vendor halo2 --check_results
bazel run --config opt --//:has_matplotlib //benchmark/fft:fft_benchmark -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --vendor arkworks --vendor bellman --vendor halo2 --check_results
```

#### On Intel i9-13900K
Expand Down Expand Up @@ -56,7 +56,7 @@ bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/f
### IFFT

```shell
bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/fft:fft_benchmark -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --vendor arkworks --vendor bellman --vendor halo2 --run_ifft --check_results
bazel run --config opt --//:has_matplotlib //benchmark/fft:fft_benchmark -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --vendor arkworks --vendor bellman --vendor halo2 --run_ifft --check_results
```

#### On Intel i9-13900K
Expand Down Expand Up @@ -94,7 +94,7 @@ bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/f
### FFT

```shell
bazel run -c opt --config cuda --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/fft:fft_benchmark_gpu -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --check_results
bazel run --config opt --config cuda --//:has_matplotlib //benchmark/fft:fft_benchmark_gpu -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --check_results
```

#### On RTX-4090
Expand All @@ -115,7 +115,7 @@ bazel run -c opt --config cuda --//:has_openmp --//:has_rtti --//:has_matplotlib
### IFFT

```shell
bazel run -c opt --config cuda --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/fft:fft_benchmark_gpu -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --run_ifft --check_results
bazel run --config opt --config cuda --//:has_matplotlib //benchmark/fft:fft_benchmark_gpu -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --run_ifft --check_results
```

#### On RTX-4090
Expand Down
4 changes: 2 additions & 2 deletions benchmark/fft_batch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ CPU Caches:
### FFTBatch

```shell
bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/fft_batch:fft_batch_benchmark -- -k 20 -k 21 -k 22 -k 23 -k 24 -k 25 -k 26 --vendor plonky3 -p baby_bear --check_results
bazel run --config opt --//:has_matplotlib //benchmark/fft_batch:fft_batch_benchmark -- -k 20 -k 21 -k 22 -k 23 -k 24 -k 25 -k 26 --vendor plonky3 -p baby_bear --check_results
```

WARNING: On Mac M3, tests beyond degree 24 are not feasible due to memory constraints.
Expand Down Expand Up @@ -54,7 +54,7 @@ WARNING: On Mac M3, tests beyond degree 24 are not feasible due to memory constr
### CosetLDEBatch

```shell
bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/fft_batch:fft_batch_benchmark -- -k 20 -k 21 -k 22 -k 23 -k 24 -k 25 --vendor plonky3 -p baby_bear --run_coset_lde --check_results
bazel run --config opt --//:has_matplotlib //benchmark/fft_batch:fft_batch_benchmark -- -k 20 -k 21 -k 22 -k 23 -k 24 -k 25 --vendor plonky3 -p baby_bear --run_coset_lde --check_results
```

WARNING: On Mac M3, tests beyond degree 24 are not feasible due to memory constraints.
Expand Down
2 changes: 1 addition & 1 deletion benchmark/fri/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ CPU Caches:
```

```shell
bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/fri:fri_benchmark -- -k 18 -k 19 -k 20 -k 21 -k 22 --batch_size 100 --input_num 4 --round_num 4 --log_blowup 2 --vendor plonky3 --check_results
bazel run --config opt --//:has_matplotlib //benchmark/fri:fri_benchmark -- -k 18 -k 19 -k 20 -k 21 -k 22 --batch_size 100 --input_num 4 --round_num 4 --log_blowup 2 --vendor plonky3 --check_results
```

## On Intel i9-13900K
Expand Down
8 changes: 4 additions & 4 deletions benchmark/msm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ CPU Caches:
### Uniform points

```shell
bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/msm:msm_benchmark -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --vendor arkworks --vendor bellman --vendor halo2 --check_results
bazel run --config opt --//:has_matplotlib //benchmark/msm:msm_benchmark -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --vendor arkworks --vendor bellman --vendor halo2 --check_results
```

#### On Intel i9-13900K
Expand Down Expand Up @@ -56,7 +56,7 @@ bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/m
### Non-uniform points

```shell
bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/msm:msm_benchmark -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --vendor arkworks --vendor bellman --vendor halo2 --test_set non_uniform --check_results
bazel run --config opt --//:has_matplotlib //benchmark/msm:msm_benchmark -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --vendor arkworks --vendor bellman --vendor halo2 --test_set non_uniform --check_results
```

#### On Intel i9-13900K
Expand Down Expand Up @@ -94,7 +94,7 @@ bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/m
### Uniform points

```shell
bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib --config cuda //benchmark/msm:msm_benchmark_gpu -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --test_set non_uniform --check_results
bazel run --config opt --//:has_matplotlib --config cuda //benchmark/msm:msm_benchmark_gpu -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --test_set non_uniform --check_results
```

#### On RTX-4090
Expand All @@ -115,7 +115,7 @@ bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib --config cuda
### Non-uniform points

```shell
bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib --config cuda //benchmark/msm:msm_benchmark_gpu -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --check_results
bazel run --config opt --//:has_matplotlib --config cuda //benchmark/msm:msm_benchmark_gpu -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --check_results
```

#### On RTX-4090
Expand Down
2 changes: 1 addition & 1 deletion benchmark/poseidon/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ CPU Caches:
```

```shell
bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/poseidon:poseidon_benchmark -- --check_results
bazel run --config opt --//:has_matplotlib //benchmark/poseidon:poseidon_benchmark -- --check_results
```

## On Intel i9-13900K
Expand Down
6 changes: 3 additions & 3 deletions benchmark/poseidon2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Note that Poseidon2 runs 10000x per test due to some time results being too smal
## BN254

```shell
bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/poseidon2:poseidon2_benchmark -- -p bn254_fr --vendor horizen --vendor plonky3 --check_results
bazel run --config opt --//:has_matplotlib //benchmark/poseidon2:poseidon2_benchmark -- -p bn254_fr --vendor horizen --vendor plonky3 --check_results
```

### On Intel i9-13900K
Expand Down Expand Up @@ -66,7 +66,7 @@ Note: Horizen and Plonky3 compute values with a different internal matrix, requi
### Horizen

```shell
bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/poseidon2:poseidon2_benchmark -- -p baby_bear --vendor horizen --check_results
bazel run --config opt --//:has_matplotlib //benchmark/poseidon2:poseidon2_benchmark -- -p baby_bear --vendor horizen --check_results
```

#### On Intel i9-13900K
Expand Down Expand Up @@ -108,7 +108,7 @@ bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/p
### Plonky3

```shell
bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/poseidon2:poseidon2_benchmark -- -p baby_bear --vendor plonky3 --check_results
bazel run --config opt --//:has_matplotlib //benchmark/poseidon2:poseidon2_benchmark -- -p baby_bear --vendor plonky3 --check_results
```

#### On Intel i9-13900K
Expand Down
4 changes: 2 additions & 2 deletions docker/Dockerfile.halo2.jammy
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ LABEL maintainer="The Tachyon Authors <[email protected]>"
COPY . /usr/src/tachyon
WORKDIR /usr/src/tachyon

RUN bazel build -c opt --config linux --//:has_openmp --//:c_shared_object //scripts/packages/debian/runtime:debian && \
bazel build -c opt --config linux --//:has_openmp --//:c_shared_object //scripts/packages/debian/dev:debian
RUN bazel build --config opt --config linux --//:c_shared_object //scripts/packages/debian/runtime:debian && \
bazel build --config opt --config linux --//:c_shared_object //scripts/packages/debian/dev:debian

FROM ubuntu:jammy AS tachyon-halo2
LABEL maintainer="The Tachyon Authors <[email protected]>"
Expand Down
Loading

0 comments on commit 67bf0f9

Please sign in to comment.