Skip to content

Commit

Permalink
[Inductor] Support store SPIR-V binary file output from Intel Triton. (
Browse files Browse the repository at this point in the history
  • Loading branch information
etaf authored and pytorchmergebot committed Jul 22, 2024
1 parent 2820e1d commit 8da19fe
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 12 deletions.
5 changes: 2 additions & 3 deletions torch/_inductor/codecache.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ def get_path(
def get_hash(content: Union[str, bytes], extra: str = "", hash_type: str = "code"):
if hash_type == "code":
return code_hash(content, extra)
if hash_type in ["cubin", "hsaco"]:
if hash_type in ["cubin", "hsaco", "spv"]:
return code_hash(repr(content))
raise AssertionError(f"Unknown hash type {hash_type}")

Expand Down Expand Up @@ -1582,8 +1582,7 @@ class CudaKernelParamCache:
cache_clear = staticmethod(cache.clear)

@classmethod
def set(cls, key: str, params: Dict[str, str], cubin: str) -> None:
bin_type = "cubin" if torch.version.hip is None else "hsaco"
def set(cls, key: str, params: Dict[str, str], cubin: str, bin_type: str) -> None:
_, path = write(
cubin,
bin_type,
Expand Down
2 changes: 1 addition & 1 deletion torch/_inductor/codegen/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -1491,7 +1491,7 @@ def generate_save_uncompiled_kernels(self):
if not kernel.cuda_kernel_saved:
if len(kernel.launchers) == 0:
kernel.precompile()
kernel.save_cuda_kernel(
kernel.save_gpu_kernel(
grid=(0, 0, 0), # use dummy grid
stream="stream", # use dummy stream
launcher=kernel.launchers[0],
Expand Down
13 changes: 5 additions & 8 deletions torch/_inductor/runtime/triton_heuristics.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,7 @@ def autotune_to_one_config(self, *args, **kwargs):
if self.save_cache_hook:
self.save_cache_hook(self.launchers[0].config, time_taken_ns)

def save_cuda_kernel(self, grid, stream, launcher):
def save_gpu_kernel(self, grid, stream, launcher):
if callable(grid):
grid_x, grid_y, grid_z = grid(launcher.config.kwargs)
else:
Expand Down Expand Up @@ -753,12 +753,9 @@ def save_cuda_kernel(self, grid, stream, launcher):
}
from torch._inductor.codecache import CudaKernelParamCache

binary = (
launcher.bin.asm["cubin"]
if self.device_props.type != "hip"
else launcher.bin.asm["hsaco"]
)
CudaKernelParamCache.set(key, params, binary)
bin_type = {"hip": "hsaco", "xpu": "spv"}.get(self.device_props.type, "cubin")
binary = launcher.bin.asm[bin_type]
CudaKernelParamCache.set(key, params, binary, bin_type)

self.cuda_kernel_saved = True

Expand Down Expand Up @@ -831,7 +828,7 @@ def run(self, *args, grid, stream, **kwargs):

(launcher,) = self.launchers
if launcher.store_cubin:
self.save_cuda_kernel(grid, stream, launcher)
self.save_gpu_kernel(grid, stream, launcher)

if launcher.config.pre_hook is not None:
launcher.config.pre_hook(
Expand Down

0 comments on commit 8da19fe

Please sign in to comment.