Skip to content

Commit

Permalink
Made it poissible to disable loop blocking if there are no independen…
Browse files Browse the repository at this point in the history
…t nodes.
  • Loading branch information
philip-paul-mueller committed Dec 6, 2024
1 parent 8b6abc2 commit 715be40
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def gt_auto_optimize(
gpu_block_size: Optional[Sequence[int | str] | str] = None,
blocking_dim: Optional[gtx_common.Dimension] = None,
blocking_size: int = 10,
require_independent_nodes: Optional[bool] = None,
reuse_transients: bool = False,
gpu_launch_bounds: Optional[int | str] = None,
gpu_launch_factor: Optional[int] = None,
Expand Down Expand Up @@ -90,6 +91,8 @@ def gt_auto_optimize(
one for all.
blocking_dim: On which dimension blocking should be applied.
blocking_size: How many elements each block should process.
require_independent_nodes: If `True` only apply loop blocking if there
are independent nodes.
reuse_transients: Run the `TransientReuse` transformation, might reduce memory footprint.
gpu_launch_bounds: Use this value as `__launch_bounds__` for _all_ GPU Maps.
gpu_launch_factor: Use the number of threads times this value as `__launch_bounds__`
Expand All @@ -101,7 +104,6 @@ def gt_auto_optimize(
validate: Perform validation during the steps.
validate_all: Perform extensive validation.
Note:
For identifying symbols that can be treated as compile time constants
`gt_find_constant_arguments()` function can be used.
Expand Down Expand Up @@ -227,6 +229,7 @@ def gt_auto_optimize(
gtx_transformations.LoopBlocking(
blocking_size=blocking_size,
blocking_parameter=blocking_dim,
require_independent_nodes=require_independent_nodes,
),
validate=validate,
validate_all=validate_all,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ class LoopBlocking(dace_transformation.SingleStateTransformation):
Args:
blocking_size: The size of the block, denoted as `B` above.
blocking_parameter: On which parameter should we block.
require_independent_nodes: If `True` then the transformation will only
apply if there are independent nodes. Defaults to `False`.
Todo:
- Modify the inner map such that it always starts at zero.
- Allow more than one parameter on which we block.
Expand All @@ -59,6 +60,12 @@ class LoopBlocking(dace_transformation.SingleStateTransformation):
desc="Name of the iteration variable on which to block (must be an exact match);"
" 'I' in the above description.",
)
require_independent_nodes = dace_properties.Property(
dtype=bool,
default=False,
desc="If 'True' then blocking is only applied if there are independent nodes.",
)

# Set of nodes that are independent of the blocking parameter.
_independent_nodes: Optional[set[dace_nodes.AccessNode]]
_dependent_nodes: Optional[set[dace_nodes.AccessNode]]
Expand All @@ -69,6 +76,7 @@ def __init__(
self,
blocking_size: Optional[int] = None,
blocking_parameter: Optional[Union[gtx_common.Dimension, str]] = None,
require_independent_nodes: Optional[bool] = None,
) -> None:
super().__init__()
if isinstance(blocking_parameter, gtx_common.Dimension):
Expand All @@ -77,6 +85,8 @@ def __init__(
self.blocking_parameter = blocking_parameter
if blocking_size is not None:
self.blocking_size = blocking_size
if require_independent_nodes is not None:
self.require_independent_nodes = require_independent_nodes
self._independent_nodes = None
self._dependent_nodes = None

Expand Down Expand Up @@ -250,6 +260,9 @@ def partition_map_output(
member variables are updated. If the partition does not exists the function
will return `False` and the respective member variables will be `None`.
The function will honor `self.require_independent_nodes`. Thus if no independent
nodes were found the function behaves as if the partition does not exist.
Args:
state: The state on which we operate.
sdfg: The SDFG in which we operate on.
Expand Down Expand Up @@ -295,6 +308,10 @@ def partition_map_output(
if not found_new_independent_node:
break

if self.require_independent_nodes and len(self._independent_nodes) == 0:
self._independent_nodes = None
return False

# After the independent set is computed compute the set of dependent nodes
# as the set of all nodes adjacent to `outer_entry` that are not dependent.
self._dependent_nodes = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -803,3 +803,52 @@ def test_loop_blocking_mixked_memlets_2():
assert isinstance(node, dace_nodes.MapEntry) or (node is mx)
else:
assert scope_dict[node] is inner_map_entry


def test_loop_blocking_no_independent_nodes():
import dace

sdfg = dace.SDFG(util.unique_name("mixed_memlet_sdfg"))
state = sdfg.add_state(is_start_block=True)
names = ["A", "B"]
for aname in names:
sdfg.add_array(
aname,
shape=(10, 10),
dtype=dace.float64,
transient=False,
)
state.add_mapped_tasklet(
"fully_dependent_computation",
map_ranges={"__i0": "0:10", "__i1": "0:10"},
inputs={"__in1": dace.Memlet("A[__i0, __i1]")},
code="__out = __in1 + 10.0",
outputs={"__out": dace.Memlet("B[__i0, __i1]")},
external_edges=True,
)
sdfg.validate()

# Because there is nothing that is independent the transformation will
# not apply if `require_independent_nodes` is enabled.
count = sdfg.apply_transformations_repeated(
gtx_transformations.LoopBlocking(
blocking_size=2,
blocking_parameter="__i1",
require_independent_nodes=True,
),
validate=True,
validate_all=True,
)
assert count == 0

# But it will apply once this requirement is lifted.
count = sdfg.apply_transformations_repeated(
gtx_transformations.LoopBlocking(
blocking_size=2,
blocking_parameter="__i1",
require_independent_nodes=False,
),
validate=True,
validate_all=True,
)
assert count == 1

0 comments on commit 715be40

Please sign in to comment.