Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix HLO tests #19

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions xla/lit.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,13 @@
("%PYTHON", os.getenv("PYTHON", sys.executable)),
])

if lit_config.params.get('PTX') == 'GCN':
config.available_features.add("IS_ROCM")


# Include additional substitutions that may be defined via params
config.substitutions.extend(
("%%{%s}" % key, val)
for key, val in lit_config.params.items()
)

2 changes: 1 addition & 1 deletion xla/service/gpu/tests/add_preds.hlo
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb | FileCheck %s

// CHECK: define void @fusion({{.*}}%[[ARG0:.*]], {{.*}}%[[ARG1:.*]],
// CHECK: define{{( amdgpu_kernel)?}} void @fusion({{.*}}%[[ARG0:.*]], {{.*}}%[[ARG1:.*]],
// CHECK: %[[A:.*]] = load {{.*}} ptr %[[ARG0]]
// CHECK: %[[B:.*]] = load {{.*}} ptr %[[ARG1]]
// CHECK: or {{.*}} %[[A]], %[[B]]
Expand Down
5 changes: 3 additions & 2 deletions xla/service/gpu/tests/dot_bf16.hlo
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// RUN: hlo-opt %s --platform=gpu --stage=hlo --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/v100.txtpb --split-input-file | FileCheck %s --check-prefixes=CHECK-SM70
// RUN: hlo-opt %s --platform=gpu --stage=hlo --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/a100_80.txtpb --split-input-file --xla_gpu_autotune_level=0 --xla_gpu_enable_triton_gemm=false | FileCheck %s --check-prefixes=CHECK-SM80
// RUN: %if !IS_ROCM %{ hlo-opt %s --platform=gpu --stage=hlo --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/v100.txtpb --split-input-file | FileCheck %s --check-prefixes=CHECK-SM70 %}
// RUN: %if !IS_ROCM %{ hlo-opt %s --platform=gpu --stage=hlo --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/a100_80.txtpb --split-input-file --xla_gpu_autotune_level=0 --xla_gpu_enable_triton_gemm=false | FileCheck %s --check-prefixes=CHECK-SM80 %}
// RUN: %if IS_ROCM %{ hlo-opt %s --platform=gpu --stage=hlo --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/mi200.txtpb --split-input-file --xla_gpu_autotune_level=0 --xla_gpu_enable_triton_gemm=false | FileCheck %s --check-prefixes=CHECK-SM80 %}


// CHECK-SM70: custom-call(f32
Expand Down
2 changes: 1 addition & 1 deletion xla/service/gpu/tests/fused_scatter.hlo
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py

// CHECK: define void @wrapped_scatter
// CHECK: define{{( amdgpu_kernel)?}} void @wrapped_scatter
// CHECK: %[[VAL_70:.*]] = alloca i32, align 4
// CHECK-PTX: %[[VAL_71:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x
// CHECK-GCN: %[[VAL_71:.*]] = call i32 @llvm.amdgcn.workgroup.id.x
Expand Down
20 changes: 10 additions & 10 deletions xla/service/gpu/tests/launch_dimensions.hlo
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// This tests that we do not increase the grid launch size when
// few_waves is enabled.

// CHECK-LABEL: define void @wrapped_b
// CHECK-LABEL: define{{( amdgpu_kernel)?}} void @wrapped_b
// CHECK-PTX-DAG: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[ctaid_range:[0-9]+]]
// CHECK-GCN-DAG: call i32 @llvm.amdgcn.workgroup.id.x(), !range ![[ctaid_range:[0-9]+]]
// CHECK-PTX-DAG: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[tid_range:[0-9]+]]
Expand All @@ -27,7 +27,7 @@ ENTRY main {

// This tests that we cap grid launch code when few_waves is enabled.

// CHECK-LABEL: define void @wrapped_b
// CHECK-LABEL: define{{( amdgpu_kernel)?}} void @wrapped_b
// CHECK-PTX-DAG: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[ctaid_range:[0-9]+]]
// CHECK-GCN-DAG: call i32 @llvm.amdgcn.workgroup.id.x(), !range ![[ctaid_range:[0-9]+]]
// CHECK-PTX-DAG: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[tid_range:[0-9]+]]
Expand All @@ -53,7 +53,7 @@ ENTRY main {
// This tests that we cap grid launch code when few_waves is enabled
// and scalar broadcast are present.

// CHECK-LABEL: define void @fusion_3
// CHECK-LABEL: define{{( amdgpu_kernel)?}} void @fusion_3
// CHECK-PTX-DAG: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[ctaid_range:[0-9]+]]
// CHECK-PTX-DAG: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[tid_range:[0-9]+]]
// CHECK-PTX-DAG: ![[ctaid_range]] = !{i32 0, i32 1008}
Expand Down Expand Up @@ -84,7 +84,7 @@ ENTRY main {
// This tests that we enable few_waves in a simple fusion. It is the baseline
// for the tests below.

// CHECK-LABEL: define void @fusion
// CHECK-LABEL: define{{( amdgpu_kernel)?}} void @fusion
// CHECK-PTX-DAG: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[ctaid_range:[0-9]+]]
// CHECK-PTX-DAG: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[tid_range:[0-9]+]]
// CHECK-PTX-DAG: ![[ctaid_range]] = !{i32 0, i32 1008}
Expand Down Expand Up @@ -113,7 +113,7 @@ ENTRY main {

// This tests that we keep few_waves enabled for large constants.

// CHECK-LABEL: define void @fusion
// CHECK-LABEL: define{{( amdgpu_kernel)?}} void @fusion
// CHECK-PTX-DAG: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[ctaid_range:[0-9]+]]
// CHECK-PTX-DAG: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[tid_range:[0-9]+]]
// CHECK-PTX-DAG: ![[ctaid_range]] = !{i32 0, i32 1008}
Expand Down Expand Up @@ -141,7 +141,7 @@ ENTRY main {

// This tests that we disable few_waves if a non-elementwise op is present.

// CHECK-LABEL: define void @fusion
// CHECK-LABEL: define{{( amdgpu_kernel)?}} void @fusion
// CHECK-PTX-DAG: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[ctaid_range:[0-9]+]]
// CHECK-PTX-DAG: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[tid_range:[0-9]+]]
// CHECK-PTX-DAG: ![[ctaid_range]] = !{i32 0, i32 195313}
Expand Down Expand Up @@ -175,7 +175,7 @@ ENTRY main {
// - the fusion is not row-vectorizable
// It serves as a baseline for the tests below.

// CHECK-LABEL: define void @fusion
// CHECK-LABEL: define{{( amdgpu_kernel)?}} void @fusion
// CHECK-PTX-DAG: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[ctaid_range:[0-9]+]]
// CHECK-PTX-DAG: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[tid_range:[0-9]+]]
// CHECK-PTX-DAG: ![[ctaid_range]] = !{i32 0, i32 7813}
Expand Down Expand Up @@ -219,7 +219,7 @@ ENTRY main {
// - the fusion IS row-vectorizable
// In this case, the block count is changed from 7813 to 2000.

// CHECK-LABEL: define void @fusion
// CHECK-LABEL: define{{( amdgpu_kernel)?}} void @fusion
// CHECK-PTX-DAG: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[ctaid_range:[0-9]+]]
// CHECK-PTX-DAG: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[tid_range:[0-9]+]]
// CHECK-PTX-DAG: ![[ctaid_range]] = !{i32 0, i32 2000}
Expand Down Expand Up @@ -260,7 +260,7 @@ ENTRY main {
// - the fusion is not row-vectorizable
// In this case, the block count is changed from 7813 to 1008.

// CHECK-LABEL: define void @fusion
// CHECK-LABEL: define{{( amdgpu_kernel)?}} void @fusion
// CHECK-PTX-DAG: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[ctaid_range:[0-9]+]]
// CHECK-PTX-DAG: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[tid_range:[0-9]+]]
// CHECK-PTX-DAG: ![[ctaid_range]] = !{i32 0, i32 1008}
Expand Down Expand Up @@ -300,7 +300,7 @@ ENTRY main {
// This tests the GELU kernel. The original kernel that
// motivated few_waves implementation.

// CHECK-LABEL: define void @fusion
// CHECK-LABEL: define{{( amdgpu_kernel)?}} void @fusion
// CHECK-PTX-DAG: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[ctaid_range:[0-9]+]]
// CHECK-GCN-DAG: call i32 @llvm.amdgcn.workgroup.id.x(), !range ![[ctaid_range:[0-9]+]]
// CHECK-PTX-DAG: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[tid_range:[0-9]+]]
Expand Down
Loading
Loading