Skip to content

Commit

Permalink
Deploy compile_time benchmark CI (#153)
Browse files Browse the repository at this point in the history
Summary:
As the title says. It will upload the result to pytorch CI AWS S3 bucket and ready to query in ClickHouse.

Pull Request resolved: #153

Test Plan: https://github.com/pytorch-labs/tritonbench/actions/runs/13205478992/job/36867552165?pr=153

Reviewed By: adamomainz

Differential Revision: D69317025

Pulled By: xuzhao9

fbshipit-source-id: 6a6b66d6b20c483cce66994275d5a48c01a07732
  • Loading branch information
xuzhao9 authored and facebook-github-bot committed Feb 7, 2025
1 parent a42aa62 commit 5255f6b
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 1 deletion.
113 changes: 113 additions & 0 deletions .ci/test_infra/oss_ci_benchmark_v3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
"""
Convert Tritonbench json to ClickHouse oss_ci_benchmark_v3 schema.
https://github.com/pytorch/test-infra/blob/main/clickhouse_db_schema/oss_ci_benchmark_v3/schema.sql
"""

import argparse
import json
import re
from pathlib import Path

from typing import Any, Dict, List, Tuple


def parse_dependencies(envs: Dict[str, str]) -> Dict[str, Dict[str, Any]]:
dependencies = {
"pytorch": "pytorch/pytorch",
"triton": "triton-lang/triton",
"tritonbench": "pytorch-labs/tritonbench",
}
out = {}
for dep in dependencies:
out[dep] = {}
out[dep]["repo"] = dependencies[dep]
out[dep]["branch"] = envs[f"{dep}_branch"]
out[dep]["sha"] = envs[f"{dep}_commit"]
out[dep]["extra_info"] = {}
out[dep]["extra_info"]["commit_time"] = envs[f"{dep}_commit_time"]
return out


def parse_metric_id(metric_id: str) -> Tuple[str, str, str, str, str]:
print(metric_id)
# per-input metric
if "[x_" in metric_id:
metric_id_regex = (
r"tritonbench_([0-9a-z_]+)_([a-z_]+)\[x_(.*)-([0-9a-z_]+)\]_([a-z_]+)"
)
op, mode, input, backend, metric = re.match(metric_id_regex, metric_id).groups()
out = (op, mode, input, backend, metric)
return out
# aggregated metric
input = None
metric_id_regex = r"tritonbench_([0-9a-z_]+)_([a-z_]+)\[([0-9a-z_]+)\]-(.+)"
op, mode, backend, metric = re.search(metric_id_regex, metric_id).groups()
return (op, mode, input, backend, metric)


def generate_oss_ci_benchmark_v3_json(
benchmark_result: Dict[str, Any],
) -> List[Dict[str, Any]]:
"""
Parse Benchmark Json and return a list of entries
"""
common = {}
out = []
for metric_id in benchmark_result["metrics"]:
# bypass if the metric is a target value
if metric_id.endswith("-target"):
continue
entry = common.copy()
entry["dependencies"] = parse_dependencies(benchmark_result["env"])
op, mode, _input, backend, metric_name = parse_metric_id(metric_id)
metric_value = benchmark_result["metrics"][metric_id]
entry["benchmark"] = {
"name": benchmark_result["name"],
"mode": mode,
"dtype": "unknown",
"extra_info": {},
}
# We use the model field for operator
entry["model"] = {
"name": op,
"type": "tritonbench-oss",
"backend": backend,
}
entry["metric"] = {
"name": metric_name,
"benchmark_values": [metric_value],
}
out.append(entry)
return out


def v3_json_to_str(v3_json: List[Dict[str, Any]], to_lines: bool = True) -> str:
if to_lines:
entry_list = [json.dumps(entry) for entry in v3_json]
return "\n".join(entry_list)
else:
return json.dumps(v3_json, indent=4)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--json",
required=True,
help="Upload benchmark result json file.",
)
parser.add_argument("--output", required=True, help="output json.")
args = parser.parse_args()
upload_file_path = Path(args.json)
assert (
upload_file_path.exists()
), f"Specified result json path {args.json} does not exist."
with open(upload_file_path, "r") as fp:
benchmark_result = json.load(fp)
oss_ci_v3_json = generate_oss_ci_benchmark_v3_json(benchmark_result)
out_str = v3_json_to_str(oss_ci_v3_json)
output_dir = Path(args.output).parent
output_dir.mkdir(parents=True, exist_ok=True)
with open(args.output, "w") as fp:
fp.write(out_str)
print(f"[oss_ci_benchmark_v3] Successfully saved to {args.output}")
1 change: 1 addition & 0 deletions .ci/upload/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
boto3
2 changes: 2 additions & 0 deletions .ci/upload/scribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,13 @@
"github_run_attempt",
"github_run_id",
"github_run_number",
"github_workflow",
"github_workflow_ref",
"github_workflow_sha",
"job_name",
"runner_arch",
"runner_name",
"runner_type",
"runner_os",
"metric_id",
],
Expand Down
39 changes: 38 additions & 1 deletion .github/workflows/_linux-benchmark-h100.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ on:
required: True
description: |
Tritonbench Scribe Graph Access Token
AWS_ACCESS_KEY_ID:
required: True
description: |
AWS S3 bucket access key
AWS_SECRET_ACCESS_KEY:
required: True
description: |
AWS S3 bucket secret access key
inputs:
benchmark_name:
required: True
Expand All @@ -24,11 +32,17 @@ jobs:
runs-on: [gcp-h100-runner]
timeout-minutes: 240
environment: docker-s3-upload
permissions:
id-token: write
contents: read
env:
SETUP_SCRIPT: "/workspace/setup_instance.sh"
CONDA_ENV: ${{ inputs.conda_env }}
RUNNER_TYPE: "gcp-h100-runner"
JOB_NAME: tritonbench-h100-${{ inputs.conda_env }}-${{ inputs.benchmark_name }}
TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
steps:
- name: Checkout Tritonbench
uses: actions/checkout@v3
Expand All @@ -39,6 +53,13 @@ jobs:
bash .ci/gpu/tune-gcp-h100.sh
sudo ldconfig
nvidia-smi
- name: Authenticate with AWS
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
# The max duration enforced by the server side
role-duration-seconds: 18000
aws-region: us-east-1
- name: Benchmarking
run: |
bash .ci/tritonbench/run-benchmark.sh ${{ inputs.benchmark_name }}
Expand All @@ -51,8 +72,24 @@ jobs:
- name: Upload result to Scribe
run: |
. "${SETUP_SCRIPT}"
latest_result_json=$(find ./benchmark-output/ -name "result.json" | sort -r | head -n 1)
latest_result_json=$(find ./benchmark-output -name "result.json" | sort -r | head -n 1)
python ./.ci/upload/scribe.py --json ${latest_result_json}
- name: Rewrite Tritonbench json to ClickHouse style
run: |
. "${SETUP_SCRIPT}"
latest_result_json=$(find ./benchmark-output -name "result.json" | sort -r | head -n 1)
python ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \
--output benchmark-output/results/result.json
- name: Setup uploader dependencies
run: |
sudo apt-get install -y python3-pip
- name: Upload result to ClickHouse
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
benchmark-results-dir: benchmark-output/results
dry-run: false
schema-version: v3
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: Restore Nvidia GPU
if: always()
run: |
Expand Down
28 changes: 28 additions & 0 deletions .github/workflows/compile-time.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Triton Compile Time Benchmark
on:
schedule:
# Run nightly at 4 PM UTC after the nightly docker is updated
- cron: '0 16 * * *'
workflow_dispatch:
pull_request:
paths:
- benchmarks/compile_time/**
- .github/workflows/_linux-benchmark-h100.yml
- .github/workflows/compile-time.yml

jobs:
h100-triton-main-compile-time-benchmark:
uses: ./.github/workflows/_linux-benchmark-h100.yml
with:
conda_env: "triton-main"
benchmark_name: "compile_time"
secrets:
TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}



concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
File renamed without changes.
2 changes: 2 additions & 0 deletions tritonbench/utils/run_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,12 @@ def get_github_env() -> Dict[str, str]:
out["GITHUB_RUN_ATTEMPT"] = os.environ["GITHUB_RUN_ATTEMPT"]
out["GITHUB_RUN_ID"] = os.environ["GITHUB_RUN_ID"]
out["GITHUB_RUN_NUMBER"] = os.environ["GITHUB_RUN_NUMBER"]
out["GITHUB_WORKFLOW"] = os.environ["GITHUB_WORKFLOW"]
out["GITHUB_WORKFLOW_REF"] = os.environ["GITHUB_WORKFLOW_REF"]
out["GITHUB_WORKFLOW_SHA"] = os.environ["GITHUB_WORKFLOW_SHA"]
out["JOB_NAME"] = os.environ["JOB_NAME"]
out["RUNNER_ARCH"] = os.environ["RUNNER_ARCH"]
out["RUNNER_TYPE"] = os.environ["RUNNER_TYPE"]
out["RUNNER_NAME"] = os.environ["RUNNER_NAME"]
out["RUNNER_OS"] = os.environ["RUNNER_OS"]
return out
Expand Down

0 comments on commit 5255f6b

Please sign in to comment.