From b0cd25b9263a3c615ee2f3325167944628fbfde5 Mon Sep 17 00:00:00 2001 From: Tal Ben-Nun Date: Mon, 4 Dec 2023 00:40:07 -0800 Subject: [PATCH] Fix two regressions in v0.15 (#1465) * Schedule tree: Fix support for empty memlets and array use in interstate edges * Move clearing local scope to tasklet processing due to shift in call stacks in v0.15 that may skip said clearing --- dace/codegen/targets/cpu.py | 1 + .../analysis/schedule_tree/sdfg_to_tree.py | 7 +++- tests/codegen/unparse_tasklet_test.py | 40 +++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py index 72ca554a4a..3944d05b09 100644 --- a/dace/codegen/targets/cpu.py +++ b/dace/codegen/targets/cpu.py @@ -1460,6 +1460,7 @@ def _generate_Tasklet(self, sdfg, dfg, state_id, node, function_stream, callsite callsite_stream.write('}', sdfg, state_id, node) callsite_stream.write(outer_stream_end.getvalue(), sdfg, state_id, node) + self._locals.clear_scope(self._ldepth + 1) self._dispatcher.defined_vars.exit_scope(node) def unparse_tasklet(self, sdfg, state_id, dfg, node, function_stream, inner_stream, locals, ldepth, diff --git a/dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py b/dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py index a519f24596..78b2280902 100644 --- a/dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py +++ b/dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py @@ -88,6 +88,8 @@ def dealias_sdfg(sdfg: SDFG): nsdfg.arrays[name] = child_arr for state in nsdfg.states(): for e in state.edges(): + if e.data.is_empty(): + continue if not state.is_leaf_memlet(e): continue @@ -129,7 +131,10 @@ def dealias_sdfg(sdfg: SDFG): syms.remove(memlet.data) for s in syms: if s in parent_edges: - repl_dict[s] = str(parent_edges[s].data) + if s in nsdfg.arrays: + repl_dict[s] = parent_edges[s].data.data + else: + repl_dict[s] = str(parent_edges[s].data) e.data.replace_dict(repl_dict) for name in child_names: edge = parent_edges[name] diff --git a/tests/codegen/unparse_tasklet_test.py b/tests/codegen/unparse_tasklet_test.py index 5281c109ba..2ed2bd494b 100644 --- a/tests/codegen/unparse_tasklet_test.py +++ b/tests/codegen/unparse_tasklet_test.py @@ -1,9 +1,11 @@ # Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved. import dace import numpy as np +import pytest def test_integer_power(): + @dace.program def powint(A: dace.float64[20], B: dace.float64[20]): for i in dace.map[0:20]: @@ -20,6 +22,7 @@ def powint(A: dace.float64[20], B: dace.float64[20]): def test_integer_power_constant(): + @dace.program def powint(A: dace.float64[20]): for i in dace.map[0:20]: @@ -35,6 +38,7 @@ def powint(A: dace.float64[20]): def test_equality(): + @dace.program def nested(a, b, c): pass @@ -61,8 +65,44 @@ def f32_pow_failure(array): assert ref.dtype == val.dtype +@pytest.mark.gpu +def test_tasklets_with_same_local_name(): + sdfg = dace.SDFG('tester') + sdfg.add_array('A', [4], dace.float32, dace.StorageType.GPU_Global) + state = sdfg.add_state() + me, mx = state.add_map('kernel', dict(i='0:1'), schedule=dace.ScheduleType.GPU_Device) + t1 = state.add_tasklet( + 'sgn', {'a'}, {'b'}, ''' +mylocal: dace.float32 +if a > 0: + mylocal = 1 +else: + mylocal = -1 +b = mylocal + ''') + t2 = state.add_tasklet( + 'sgn', {'a'}, {'b'}, ''' +mylocal: dace.float32 +if a > 0: + mylocal = 1 +else: + mylocal = -1 +b = mylocal + ''') + + a = state.add_read('A') + b = state.add_write('A') + state.add_memlet_path(a, me, t1, dst_conn='a', memlet=dace.Memlet('A[0]')) + state.add_memlet_path(a, me, t2, dst_conn='a', memlet=dace.Memlet('A[1]')) + state.add_memlet_path(t1, mx, b, src_conn='b', memlet=dace.Memlet('A[2]')) + state.add_memlet_path(t2, mx, b, src_conn='b', memlet=dace.Memlet('A[3]')) + + sdfg.compile() + + if __name__ == '__main__': test_integer_power() test_integer_power_constant() test_equality() test_pow_with_implicit_casting() + test_tasklets_with_same_local_name()