Skip to content

Commit

Permalink
Replace num_stages=0 with num_stages=2
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexAUT committed Nov 1, 2024
1 parent 1d60b05 commit 609b51c
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
10 changes: 5 additions & 5 deletions python/perf-kernels/03-matrix-multiplication-all-types.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,19 @@
configs=[
triton.Config(
{'BLOCK_SIZE_M': 256, 'BLOCK_SIZE_N': 256, 'BLOCK_SIZE_K': 128, 'GROUP_SIZE_M': 4, 'waves_per_eu': 0},
num_warps=8, num_stages=0),
num_warps=8, num_stages=2),
triton.Config(
{'BLOCK_SIZE_M': 128, 'BLOCK_SIZE_N': 256, 'BLOCK_SIZE_K': 32, 'GROUP_SIZE_M': 4, 'waves_per_eu': 0},
num_warps=8, num_stages=0),
num_warps=8, num_stages=2),
triton.Config(
{'BLOCK_SIZE_M': 128, 'BLOCK_SIZE_N': 256, 'BLOCK_SIZE_K': 16, 'GROUP_SIZE_M': 4, 'waves_per_eu': 2},
num_warps=4, num_stages=0),
num_warps=4, num_stages=2),
triton.Config(
{'BLOCK_SIZE_M': 128, 'BLOCK_SIZE_N': 128, 'BLOCK_SIZE_K': 32, 'GROUP_SIZE_M': 1, 'waves_per_eu': 2},
num_warps=8, num_stages=0),
num_warps=8, num_stages=2),
triton.Config(
{'BLOCK_SIZE_M': 128, 'BLOCK_SIZE_N': 64, 'BLOCK_SIZE_K': 32, 'GROUP_SIZE_M': 32, 'waves_per_eu': 2},
num_warps=4, num_stages=0),
num_warps=4, num_stages=2),
],
key=['M', 'N', 'K'],
use_cuda_graph=True,
Expand Down
2 changes: 1 addition & 1 deletion python/perf-kernels/streamk/tune_streamk.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def get_full_tuning_space():
block_k_range = [16, 32, 64, 128, 256]
num_warps_range = [1, 2, 4, 8]
group_m_range = [1, 4, 8, 16, 32]
# For now we see better perf with num_stages=0 for all gemm configs we care
# For now we see better perf with num_stages=2 for all gemm configs we care
# But keep this explicit so that we do not forget we may need to set it to
# other values in the future
num_stage_range = [2]
Expand Down

0 comments on commit 609b51c

Please sign in to comment.