Skip to content

Commit

Permalink
Enable Usage of Load and Store Cache Modifiers for NVIDIA GPUs in STR…
Browse files Browse the repository at this point in the history
…EAM (#30)
  • Loading branch information
fthaler authored Nov 13, 2024
1 parent 95a8faf commit e34cc8b
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 1 deletion.
4 changes: 4 additions & 0 deletions stencil_benchmarks/benchmarks_collection/stream/cuda_hip.j2
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ void {{ name }}({{ varargs | join(', ') }}) {
{%- macro load(access) %}
{%- if streaming_loads -%}
__builtin_nontemporal_load(&{{ access }})
{%- elif load_cache_modifier -%}
__ld{{ load_cache_modifier }}(&{{ access }})
{%- else -%}
{{ access }}
{%- endif -%}
Expand All @@ -120,6 +122,8 @@ void {{ name }}({{ varargs | join(', ') }}) {
{%- macro store(access, value) %}
{%- if streaming_stores -%}
__builtin_nontemporal_store({{ value }}, &{{ access }})
{%- elif store_cache_modifier -%}
__st{{ store_cache_modifier }}(&{{ access }}, {{ value }})
{%- else -%}
{{ access }} = {{ value }}
{%- endif -%}
Expand Down
19 changes: 18 additions & 1 deletion stencil_benchmarks/benchmarks_collection/stream/cuda_hip.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
import warnings
from pathlib import Path

from ...benchmark import Benchmark, ExecutionError, Parameter
from ...benchmark import Benchmark, ExecutionError, Parameter, ParameterError
from ...tools import compilation, cpphelpers, template


Expand All @@ -57,12 +57,27 @@ class Native(Benchmark):
index_type = Parameter("index data type", "std::size_t")
streaming_stores = Parameter("use streaming store instructions", False)
streaming_loads = Parameter("use streaming load instructions", False)
store_cache_modifier = Parameter(
"PTX cache modifier for stores", "", choices=["", "wb", "cg", "cs", "wt"]
)
load_cache_modifier = Parameter(
"PTX cache modifier for loads", "", choices=["", "cg", "ca", "cs", "lu", "cv"]
)
print_code = Parameter("print code", False)
verify = Parameter("verify results", True)

def setup(self):
super().setup()

if self.streaming_loads and self.load_cache_modifier:
raise ParameterError(
"streaming loads can not be combined with load cache modifier"
)
if self.streaming_stores and self.store_cache_modifier:
raise ParameterError(
"streaming stores can not be combined with store cache modifier"
)

elements_per_block = self.block_size * self.vector_size * self.unroll_factor
if self.array_size % elements_per_block:
warnings.warn("adapting array size to match block and vector sizes")
Expand Down Expand Up @@ -98,6 +113,8 @@ def template_args(self):
index_type=self.index_type,
streaming_loads=self.streaming_loads,
streaming_stores=self.streaming_stores,
store_cache_modifier=self.store_cache_modifier,
load_cache_modifier=self.load_cache_modifier,
verify=self.verify,
)

Expand Down

0 comments on commit e34cc8b

Please sign in to comment.