From e73cf438c80542559c7d0b94d426f1c86442d8b3 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Wed, 23 Oct 2024 15:47:39 +0200 Subject: [PATCH 01/23] updates to GTIR dace backend --- .../next/program_processors/runners/dace.py | 21 ++++++++++- .../runners/dace_fieldview/workflow.py | 37 ++++++++++++++----- tests/next_tests/definitions.py | 2 + .../ffront_tests/ffront_test_utils.py | 6 ++- 4 files changed, 54 insertions(+), 12 deletions(-) diff --git a/src/gt4py/next/program_processors/runners/dace.py b/src/gt4py/next/program_processors/runners/dace.py index 2db8e98804..1d3ac61d5d 100644 --- a/src/gt4py/next/program_processors/runners/dace.py +++ b/src/gt4py/next/program_processors/runners/dace.py @@ -8,8 +8,9 @@ import factory +from gt4py._core import definitions as core_defs from gt4py.next import allocators as next_allocators, backend -from gt4py.next.ffront import foast_to_gtir, past_to_itir +from gt4py.next.ffront import foast_to_gtir, foast_to_past, past_to_itir from gt4py.next.program_processors.runners.dace_fieldview import workflow as dace_fieldview_workflow from gt4py.next.program_processors.runners.dace_iterator import workflow as dace_iterator_workflow from gt4py.next.program_processors.runners.gtfn import GTFNBackendFactory @@ -47,10 +48,26 @@ class Params: gtir_cpu = backend.Backend( name="dace.gtir.cpu", - executor=dace_fieldview_workflow.DaCeWorkflowFactory(), + executor=dace_fieldview_workflow.DaCeWorkflowFactory(device_type=core_defs.DeviceType.CPU), allocator=next_allocators.StandardCPUFieldBufferAllocator(), transforms=backend.Transforms( past_to_itir=past_to_itir.past_to_itir_factory(to_gtir=True), foast_to_itir=foast_to_gtir.adapted_foast_to_gtir_factory(cached=True), + field_view_op_to_prog=foast_to_past.operator_to_program_factory( + foast_to_itir_step=foast_to_gtir.adapted_foast_to_gtir_factory(cached=True) + ), + ), +) + +gtir_gpu = backend.Backend( + name="dace.gtir.gpu", + executor=dace_fieldview_workflow.DaCeWorkflowFactory(device_type=core_defs.DeviceType.CUDA), + allocator=next_allocators.StandardGPUFieldBufferAllocator(), + transforms=backend.Transforms( + past_to_itir=past_to_itir.past_to_itir_factory(to_gtir=True), + foast_to_itir=foast_to_gtir.adapted_foast_to_gtir_factory(cached=True), + field_view_op_to_prog=foast_to_past.operator_to_program_factory( + foast_to_itir_step=foast_to_gtir.adapted_foast_to_gtir_factory(cached=True) + ), ), ) diff --git a/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py b/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py index ffc33a9f25..ce9f5bcc74 100644 --- a/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py +++ b/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py @@ -14,16 +14,21 @@ import dace import factory +from dace.transformation.auto import auto_optimize as dace_autoopt from gt4py._core import definitions as core_defs -from gt4py.next import common, config +from gt4py.next import allocators as gtx_allocators, common, config from gt4py.next.iterator import ir as itir +from gt4py.next.iterator import ir as itir, transforms as itir_transforms +from gt4py.next.iterator.transforms import infer_domain from gt4py.next.otf import languages, recipes, stages, step_types, workflow from gt4py.next.otf.binding import interface from gt4py.next.otf.languages import LanguageSettings from gt4py.next.program_processors.runners.dace_common import workflow as dace_workflow -from gt4py.next.program_processors.runners.dace_fieldview import gtir_sdfg -from gt4py.next.type_system import type_translation as tt +from gt4py.next.program_processors.runners.dace_fieldview import ( + gtir_sdfg, + transformations as gtx_transformations, +) @dataclasses.dataclass(frozen=True) @@ -33,7 +38,8 @@ class DaCeTranslator( ], step_types.TranslationStep[languages.SDFG, languages.LanguageSettings], ): - device_type: core_defs.DeviceType = core_defs.DeviceType.CPU + device_type: core_defs.DeviceType + auto_optimize: bool def _language_settings(self) -> languages.LanguageSettings: return languages.LanguageSettings( @@ -45,11 +51,20 @@ def generate_sdfg( ir: itir.Program, offset_provider: common.OffsetProvider, column_axis: Optional[common.Dimension], + auto_opt: bool, + on_gpu: bool, ) -> dace.SDFG: - # TODO(edopao): Call IR transformations and domain inference, finally lower IR to SDFG - raise NotImplementedError + ir = itir_transforms.apply_fieldview_transforms(ir, offset_provider=offset_provider) + ir = infer_domain.infer_program(ir, offset_provider=offset_provider) - return gtir_sdfg.build_sdfg_from_gtir(program=ir, offset_provider=offset_provider) + sdfg = gtir_sdfg.build_sdfg_from_gtir(ir, offset_provider=offset_provider) + + if auto_opt: + return gtx_transformations.gt_auto_optimize(sdfg, gpu=on_gpu) + elif on_gpu: + dace_autoopt.apply_gpu_storage(sdfg) + + return sdfg def __call__( self, inp: stages.CompilableProgram @@ -62,11 +77,13 @@ def __call__( program, inp.args.offset_provider, inp.args.column_axis, + auto_opt=self.auto_optimize, + on_gpu=(self.device_type == gtx_allocators.CUPY_DEVICE), ) param_types = tuple( - interface.Parameter(param, tt.from_value(arg)) - for param, arg in zip(sdfg.arg_names, inp.args.args) + interface.Parameter(param, arg_type) + for param, arg_type in zip(sdfg.arg_names, inp.args.args) ) module: stages.ProgramSource[languages.SDFG, languages.LanguageSettings] = ( @@ -100,10 +117,12 @@ class Params: cmake_build_type: config.CMakeBuildType = factory.LazyFunction( lambda: config.CMAKE_BUILD_TYPE ) + auto_optimize: bool = False translation = factory.SubFactory( DaCeTranslationStepFactory, device_type=factory.SelfAttribute("..device_type"), + auto_optimize=factory.SelfAttribute("..auto_optimize"), ) bindings = _no_bindings compilation = factory.SubFactory( diff --git a/tests/next_tests/definitions.py b/tests/next_tests/definitions.py index 123384a098..f11e4b8876 100644 --- a/tests/next_tests/definitions.py +++ b/tests/next_tests/definitions.py @@ -71,6 +71,7 @@ class OptionalProgramBackendId(_PythonObjectIdMixin, str, enum.Enum): DACE_CPU = "gt4py.next.program_processors.runners.dace.itir_cpu" DACE_GPU = "gt4py.next.program_processors.runners.dace.itir_gpu" GTIR_DACE_CPU = "gt4py.next.program_processors.runners.dace.gtir_cpu" + GTIR_DACE_GPU = "gt4py.next.program_processors.runners.dace.gtir_gpu" class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum): @@ -175,6 +176,7 @@ class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum): OptionalProgramBackendId.DACE_CPU: DACE_SKIP_TEST_LIST, OptionalProgramBackendId.DACE_GPU: DACE_SKIP_TEST_LIST, OptionalProgramBackendId.GTIR_DACE_CPU: GTIR_DACE_SKIP_TEST_LIST, + OptionalProgramBackendId.GTIR_DACE_GPU: GTIR_DACE_SKIP_TEST_LIST, ProgramBackendId.GTFN_CPU: GTFN_SKIP_TEST_LIST + [(USES_SCAN_NESTED, XFAIL, UNSUPPORTED_MESSAGE)], ProgramBackendId.GTFN_CPU_IMPERATIVE: GTFN_SKIP_TEST_LIST diff --git a/tests/next_tests/integration_tests/feature_tests/ffront_tests/ffront_test_utils.py b/tests/next_tests/integration_tests/feature_tests/ffront_tests/ffront_test_utils.py index a0e72ede8d..5f509f2bfe 100644 --- a/tests/next_tests/integration_tests/feature_tests/ffront_tests/ffront_test_utils.py +++ b/tests/next_tests/integration_tests/feature_tests/ffront_tests/ffront_test_utils.py @@ -62,12 +62,16 @@ def __gt_allocator__( next_tests.definitions.OptionalProgramBackendId.DACE_CPU, marks=pytest.mark.requires_dace, ), + pytest.param( + next_tests.definitions.OptionalProgramBackendId.DACE_GPU, + marks=(pytest.mark.requires_dace, pytest.mark.requires_gpu), + ), pytest.param( next_tests.definitions.OptionalProgramBackendId.GTIR_DACE_CPU, marks=pytest.mark.requires_dace, ), pytest.param( - next_tests.definitions.OptionalProgramBackendId.DACE_GPU, + next_tests.definitions.OptionalProgramBackendId.GTIR_DACE_GPU, marks=(pytest.mark.requires_dace, pytest.mark.requires_gpu), ), ], From a3c02e4e602c887ab2ef00c8c4858070b43679f6 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Wed, 23 Oct 2024 15:49:42 +0200 Subject: [PATCH 02/23] fix pre-commit --- .../next/program_processors/runners/dace_fieldview/workflow.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py b/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py index ce9f5bcc74..70f8542a4c 100644 --- a/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py +++ b/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py @@ -18,7 +18,6 @@ from gt4py._core import definitions as core_defs from gt4py.next import allocators as gtx_allocators, common, config -from gt4py.next.iterator import ir as itir from gt4py.next.iterator import ir as itir, transforms as itir_transforms from gt4py.next.iterator.transforms import infer_domain from gt4py.next.otf import languages, recipes, stages, step_types, workflow From 2ea04174e79c5b3594a2b8526c64cf77ae42561d Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Fri, 25 Oct 2024 12:05:41 +0200 Subject: [PATCH 03/23] remove extra call to domain inference --- .../next/program_processors/runners/dace_fieldview/workflow.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py b/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py index 70f8542a4c..10f1cbcd35 100644 --- a/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py +++ b/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py @@ -54,8 +54,6 @@ def generate_sdfg( on_gpu: bool, ) -> dace.SDFG: ir = itir_transforms.apply_fieldview_transforms(ir, offset_provider=offset_provider) - ir = infer_domain.infer_program(ir, offset_provider=offset_provider) - sdfg = gtir_sdfg.build_sdfg_from_gtir(ir, offset_provider=offset_provider) if auto_opt: From cc24240104d55ccd0f1de126466b072ba06e3a40 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Fri, 25 Oct 2024 12:07:51 +0200 Subject: [PATCH 04/23] fix pre-commit --- .../next/program_processors/runners/dace_fieldview/workflow.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py b/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py index 10f1cbcd35..725c2d87df 100644 --- a/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py +++ b/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py @@ -19,7 +19,6 @@ from gt4py._core import definitions as core_defs from gt4py.next import allocators as gtx_allocators, common, config from gt4py.next.iterator import ir as itir, transforms as itir_transforms -from gt4py.next.iterator.transforms import infer_domain from gt4py.next.otf import languages, recipes, stages, step_types, workflow from gt4py.next.otf.binding import interface from gt4py.next.otf.languages import LanguageSettings From df636abb59001f4dc8d736b151be48f5f606be6d Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Fri, 25 Oct 2024 13:12:45 +0200 Subject: [PATCH 05/23] Review comments --- .../program_processors/runners/dace_fieldview/workflow.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py b/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py index 725c2d87df..85ae95c432 100644 --- a/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py +++ b/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py @@ -14,7 +14,6 @@ import dace import factory -from dace.transformation.auto import auto_optimize as dace_autoopt from gt4py._core import definitions as core_defs from gt4py.next import allocators as gtx_allocators, common, config @@ -56,9 +55,9 @@ def generate_sdfg( sdfg = gtir_sdfg.build_sdfg_from_gtir(ir, offset_provider=offset_provider) if auto_opt: - return gtx_transformations.gt_auto_optimize(sdfg, gpu=on_gpu) + gtx_transformations.gt_auto_optimize(sdfg, gpu=on_gpu) elif on_gpu: - dace_autoopt.apply_gpu_storage(sdfg) + gtx_transformations.gt_gpu_transformation(sdfg, try_removing_trivial_maps=False) return sdfg From 5b4d2bf0873d1d1555c20fa600953f43b31beae8 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Mon, 4 Nov 2024 12:37:56 +0100 Subject: [PATCH 06/23] enable tests --- tests/next_tests/definitions.py | 5 ++++- .../feature_tests/dace/test_orchestration.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/next_tests/definitions.py b/tests/next_tests/definitions.py index c8319d4010..e4de62f028 100644 --- a/tests/next_tests/definitions.py +++ b/tests/next_tests/definitions.py @@ -148,7 +148,10 @@ class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum): (USES_ZERO_DIMENSIONAL_FIELDS, XFAIL, UNSUPPORTED_MESSAGE), ] GTIR_DACE_SKIP_TEST_LIST = [ - (ALL, SKIP, UNSUPPORTED_MESSAGE), + (USES_DYNAMIC_OFFSETS, XFAIL, UNSUPPORTED_MESSAGE), + (USES_NEGATIVE_MODULO, XFAIL, UNSUPPORTED_MESSAGE), + (USES_SCAN, XFAIL, UNSUPPORTED_MESSAGE), + (USES_SPARSE_FIELDS_AS_OUTPUT, XFAIL, UNSUPPORTED_MESSAGE), ] EMBEDDED_SKIP_LIST = [ (USES_DYNAMIC_OFFSETS, XFAIL, UNSUPPORTED_MESSAGE), diff --git a/tests/next_tests/integration_tests/feature_tests/dace/test_orchestration.py b/tests/next_tests/integration_tests/feature_tests/dace/test_orchestration.py index 306f0034b5..1da34db3c0 100644 --- a/tests/next_tests/integration_tests/feature_tests/dace/test_orchestration.py +++ b/tests/next_tests/integration_tests/feature_tests/dace/test_orchestration.py @@ -32,7 +32,10 @@ try: import dace - from gt4py.next.program_processors.runners.dace import run_dace_cpu, run_dace_gpu + from gt4py.next.program_processors.runners.dace import ( + itir_cpu as run_dace_cpu, + itir_gpu as run_dace_gpu, + ) except ImportError: dace: Optional[ModuleType] = None # type:ignore[no-redef] run_dace_cpu: Optional[next_backend.Backend] = None From 1ad362109fc6db218c748b29936c3b77aa31a838 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Mon, 4 Nov 2024 13:19:27 +0100 Subject: [PATCH 07/23] make dace.gtir backend non-cached --- .../next/program_processors/runners/dace.py | 51 +++++++++---------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/src/gt4py/next/program_processors/runners/dace.py b/src/gt4py/next/program_processors/runners/dace.py index 1d3ac61d5d..9a45b6a29a 100644 --- a/src/gt4py/next/program_processors/runners/dace.py +++ b/src/gt4py/next/program_processors/runners/dace.py @@ -8,8 +8,7 @@ import factory -from gt4py._core import definitions as core_defs -from gt4py.next import allocators as next_allocators, backend +from gt4py.next import backend from gt4py.next.ffront import foast_to_gtir, foast_to_past, past_to_itir from gt4py.next.program_processors.runners.dace_fieldview import workflow as dace_fieldview_workflow from gt4py.next.program_processors.runners.dace_iterator import workflow as dace_iterator_workflow @@ -26,12 +25,12 @@ class Params: ), ) auto_optimize = factory.Trait( - otf_workflow__translation__auto_optimize=True, name_temps="_opt" + otf_workflow__translation__auto_optimize=True, name_postfix="_opt" ) use_field_canonical_representation: bool = False name = factory.LazyAttribute( - lambda o: f"run_dace_{o.name_device}{o.name_temps}{o.name_cached}{o.name_postfix}" + lambda o: f"run_dace_{o.name_device}{o.name_temps}{o.name_cached}{o.name_postfix}.itir" ) transforms = backend.DEFAULT_TRANSFORMS @@ -46,28 +45,28 @@ class Params: itir_cpu = run_dace_cpu itir_gpu = run_dace_gpu -gtir_cpu = backend.Backend( - name="dace.gtir.cpu", - executor=dace_fieldview_workflow.DaCeWorkflowFactory(device_type=core_defs.DeviceType.CPU), - allocator=next_allocators.StandardCPUFieldBufferAllocator(), - transforms=backend.Transforms( - past_to_itir=past_to_itir.past_to_itir_factory(to_gtir=True), - foast_to_itir=foast_to_gtir.adapted_foast_to_gtir_factory(cached=True), - field_view_op_to_prog=foast_to_past.operator_to_program_factory( - foast_to_itir_step=foast_to_gtir.adapted_foast_to_gtir_factory(cached=True) - ), - ), -) - -gtir_gpu = backend.Backend( - name="dace.gtir.gpu", - executor=dace_fieldview_workflow.DaCeWorkflowFactory(device_type=core_defs.DeviceType.CUDA), - allocator=next_allocators.StandardGPUFieldBufferAllocator(), - transforms=backend.Transforms( + +class DaCeFieldviewBackendFactory(GTFNBackendFactory): + class Params: + otf_workflow = factory.SubFactory( + dace_fieldview_workflow.DaCeWorkflowFactory, + device_type=factory.SelfAttribute("..device_type"), + auto_optimize=factory.SelfAttribute("..auto_optimize"), + ) + auto_optimize = factory.Trait(name_postfix="_opt") + + name = factory.LazyAttribute( + lambda o: f"run_dace_{o.name_device}{o.name_temps}{o.name_cached}{o.name_postfix}.gtir" + ) + + transforms = backend.Transforms( past_to_itir=past_to_itir.past_to_itir_factory(to_gtir=True), - foast_to_itir=foast_to_gtir.adapted_foast_to_gtir_factory(cached=True), + foast_to_itir=foast_to_gtir.adapted_foast_to_gtir_factory(), field_view_op_to_prog=foast_to_past.operator_to_program_factory( - foast_to_itir_step=foast_to_gtir.adapted_foast_to_gtir_factory(cached=True) + foast_to_itir_step=foast_to_gtir.adapted_foast_to_gtir_factory() ), - ), -) + ) + + +gtir_cpu = DaCeFieldviewBackendFactory(cached=True, auto_optimize=False) +gtir_gpu = DaCeFieldviewBackendFactory(gpu=True, cached=True, auto_optimize=False) From 489869cb366ff2e41e186a6fe25d3a0b40256555 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Mon, 4 Nov 2024 13:29:33 +0100 Subject: [PATCH 08/23] fix pytest marker --- tests/next_tests/definitions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/next_tests/definitions.py b/tests/next_tests/definitions.py index 7c40eb789b..5471333bd5 100644 --- a/tests/next_tests/definitions.py +++ b/tests/next_tests/definitions.py @@ -146,11 +146,11 @@ class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum): (USES_TUPLE_ARGS, XFAIL, UNSUPPORTED_MESSAGE), (USES_TUPLE_RETURNS, XFAIL, UNSUPPORTED_MESSAGE), (USES_ZERO_DIMENSIONAL_FIELDS, XFAIL, UNSUPPORTED_MESSAGE), - (USES_INDEX_BUILTIN, XFAIL, UNSUPPORTED_MESSAGE), (STARTS_FROM_GTIR_PROGRAM, SKIP, UNSUPPORTED_MESSAGE), ] GTIR_DACE_SKIP_TEST_LIST = [ (USES_DYNAMIC_OFFSETS, XFAIL, UNSUPPORTED_MESSAGE), + (USES_INDEX_BUILTIN, XFAIL, UNSUPPORTED_MESSAGE), (USES_NEGATIVE_MODULO, XFAIL, UNSUPPORTED_MESSAGE), (USES_SCAN, XFAIL, UNSUPPORTED_MESSAGE), (USES_SPARSE_FIELDS_AS_OUTPUT, XFAIL, UNSUPPORTED_MESSAGE), From 27e5b908e8fe23c8813e53996b12910d91f0644b Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Mon, 4 Nov 2024 13:47:14 +0100 Subject: [PATCH 09/23] fix pytest marker (1) --- .../feature_tests/iterator_tests/test_program.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_program.py b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_program.py index db1c2a42aa..f6fd0a48d0 100644 --- a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_program.py +++ b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_program.py @@ -88,6 +88,7 @@ def index_program_shift(out, size): ) +@pytest.mark.starts_from_gtir_program @pytest.mark.uses_index_builtin def test_index_builtin_shift(program_processor): program_processor, validate = program_processor From 445b7824b9953a684a5530701f7a7f6d03854b15 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Mon, 4 Nov 2024 15:21:28 +0100 Subject: [PATCH 10/23] fix parsing of tuple program args --- .../runners/dace_common/dace_backend.py | 22 ++++++++++++++----- .../runners/dace_common/workflow.py | 4 ++-- .../runners/dace_fieldview/gtir_sdfg.py | 19 ++++++++++++---- 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py b/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py index 5d3cc7a358..98217bf600 100644 --- a/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py +++ b/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py @@ -88,10 +88,15 @@ def _get_shape_args( for name, value in args.items(): for sym, size in zip(arrays[name].shape, value.shape, strict=True): if isinstance(sym, dace.symbol): - assert sym.name not in shape_args - shape_args[sym.name] = size + if sym.name not in shape_args: + shape_args[sym.name] = size + elif shape_args[sym.name] != size: + # TODO(edopao): This case is only hit if all fields in a tuple have the same dims and sizes. + raise ValueError( + f"Expected array size {sym.name} for arg {name} to be {shape_args[sym.name]}, got {size}." + ) elif sym != size: - raise RuntimeError( + raise ValueError( f"Expected shape {arrays[name].shape} for arg {name}, got {value.shape}." ) return shape_args @@ -109,10 +114,15 @@ def _get_stride_args( f"Stride ({stride_size} bytes) for argument '{sym}' must be a multiple of item size ({value.itemsize} bytes)." ) if isinstance(sym, dace.symbol): - assert sym.name not in stride_args - stride_args[str(sym)] = stride + if sym.name not in stride_args: + stride_args[str(sym)] = stride + elif stride_args[sym.name] != stride: + # TODO(edopao): This case is only hit if all fields in a tuple have the same dims and sizes. + raise ValueError( + f"Expected array stride {sym.name} for arg {name} to be {stride_args[sym.name]}, got {stride}." + ) elif sym != stride: - raise RuntimeError( + raise ValueError( f"Expected stride {arrays[name].strides} for arg {name}, got {value.strides}." ) return stride_args diff --git a/src/gt4py/next/program_processors/runners/dace_common/workflow.py b/src/gt4py/next/program_processors/runners/dace_common/workflow.py index ae0a24605d..91e83dba9d 100644 --- a/src/gt4py/next/program_processors/runners/dace_common/workflow.py +++ b/src/gt4py/next/program_processors/runners/dace_common/workflow.py @@ -17,7 +17,7 @@ from dace.codegen.compiled_sdfg import _array_interface_ptr as get_array_interface_ptr from gt4py._core import definitions as core_defs -from gt4py.next import common, config +from gt4py.next import common, config, utils as gtx_utils from gt4py.next.otf import arguments, languages, stages, step_types, workflow from gt4py.next.otf.compilation import cache from gt4py.next.program_processors.runners.dace_common import dace_backend, utility as dace_utils @@ -116,7 +116,7 @@ def decorated_program( args = (*args, *arguments.iter_size_args(args)) if sdfg_program._lastargs: - kwargs = dict(zip(sdfg.arg_names, args, strict=True)) + kwargs = dict(zip(sdfg.arg_names, gtx_utils.flatten_nested_tuple(args), strict=True)) kwargs.update(dace_backend.get_sdfg_conn_args(sdfg, offset_provider, on_gpu)) use_fast_call = True diff --git a/src/gt4py/next/program_processors/runners/dace_fieldview/gtir_sdfg.py b/src/gt4py/next/program_processors/runners/dace_fieldview/gtir_sdfg.py index 48c666a363..0642ba003b 100644 --- a/src/gt4py/next/program_processors/runners/dace_fieldview/gtir_sdfg.py +++ b/src/gt4py/next/program_processors/runners/dace_fieldview/gtir_sdfg.py @@ -217,6 +217,7 @@ def _add_storage( name: str, gt_type: ts.DataType, transient: bool = True, + tuple_name: Optional[str] = None, ) -> list[tuple[str, ts.DataType]]: """ Add storage in the SDFG for a given GT4Py data symbol. @@ -236,6 +237,7 @@ def _add_storage( name: Symbol Name to be allocated. gt_type: GT4Py symbol type. transient: True when the data symbol has to be allocated as internal storage. + tuple_name: Must be set for tuple fields in order to use the same array shape and strides symbols. Returns: List of tuples '(data_name, gt_type)' where 'data_name' is the name of @@ -250,7 +252,9 @@ def _add_storage( name, gt_type, flatten=True ): tuple_fields.extend( - self._add_storage(sdfg, symbolic_arguments, tname, tsymbol_type, transient) + self._add_storage( + sdfg, symbolic_arguments, tname, tsymbol_type, transient, tuple_name=name + ) ) return tuple_fields @@ -260,9 +264,16 @@ def _add_storage( return self._add_storage(sdfg, symbolic_arguments, name, gt_type.dtype, transient) # handle default case: field with one or more dimensions dc_dtype = dace_utils.as_dace_type(gt_type.dtype) - # use symbolic shape, which allows to invoke the program with fields of different size; - # and symbolic strides, which enables decoupling the memory layout from generated code. - sym_shape, sym_strides = self._make_array_shape_and_strides(name, gt_type.dims) + if tuple_name is None: + # Use symbolic shape, which allows to invoke the program with fields of different size; + # and symbolic strides, which enables decoupling the memory layout from generated code. + sym_shape, sym_strides = self._make_array_shape_and_strides(name, gt_type.dims) + else: + # All fields in a tuple must have the same dims and sizes, + # therefore we use the same shape and strides symbols based on 'tuple_name'. + sym_shape, sym_strides = self._make_array_shape_and_strides( + tuple_name, gt_type.dims + ) sdfg.add_array(name, sym_shape, dc_dtype, strides=sym_strides, transient=transient) return [(name, gt_type)] From 83b7c257b60e65549fd10a6cb5fcaab65985f4b1 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Mon, 4 Nov 2024 15:33:15 +0100 Subject: [PATCH 11/23] cleanup gtir tests --- .../dace_tests/test_gtir_to_sdfg.py | 53 +++---------------- 1 file changed, 6 insertions(+), 47 deletions(-) diff --git a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_gtir_to_sdfg.py b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_gtir_to_sdfg.py index 9f5498b4a7..dea9f2879b 100644 --- a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_gtir_to_sdfg.py +++ b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_gtir_to_sdfg.py @@ -262,16 +262,8 @@ def test_gtir_tuple_args(): sdfg = dace_backend.build_sdfg_from_gtir(testee, CARTESIAN_OFFSETS) x_fields = (a, a, b) - x_symbols = dict( - __x_0_size_0=FSYMBOLS["__x_size_0"], - __x_0_stride_0=FSYMBOLS["__x_stride_0"], - __x_1_0_size_0=FSYMBOLS["__x_size_0"], - __x_1_0_stride_0=FSYMBOLS["__x_stride_0"], - __x_1_1_size_0=FSYMBOLS["__y_size_0"], - __x_1_1_stride_0=FSYMBOLS["__y_stride_0"], - ) - sdfg(*x_fields, c, **FSYMBOLS, **x_symbols) + sdfg(*x_fields, c, **FSYMBOLS) assert np.allclose(c, a * 2 + b) @@ -432,16 +424,8 @@ def test_gtir_tuple_return(): sdfg = dace_backend.build_sdfg_from_gtir(testee, CARTESIAN_OFFSETS) z_fields = (np.empty_like(a), np.empty_like(a), np.empty_like(a)) - z_symbols = dict( - __z_0_0_size_0=FSYMBOLS["__x_size_0"], - __z_0_0_stride_0=FSYMBOLS["__x_stride_0"], - __z_0_1_size_0=FSYMBOLS["__x_size_0"], - __z_0_1_stride_0=FSYMBOLS["__x_stride_0"], - __z_1_size_0=FSYMBOLS["__x_size_0"], - __z_1_stride_0=FSYMBOLS["__x_stride_0"], - ) - sdfg(a, b, *z_fields, **FSYMBOLS, **z_symbols) + sdfg(a, b, *z_fields, **FSYMBOLS) assert np.allclose(z_fields[0], a + b) assert np.allclose(z_fields[1], a) assert np.allclose(z_fields[2], b) @@ -694,18 +678,11 @@ def test_gtir_cond_with_tuple_return(): b = np.random.rand(N) c = np.random.rand(N) - z_symbols = dict( - __z_0_size_0=FSYMBOLS["__x_size_0"], - __z_0_stride_0=FSYMBOLS["__x_stride_0"], - __z_1_size_0=FSYMBOLS["__x_size_0"], - __z_1_stride_0=FSYMBOLS["__x_stride_0"], - ) - sdfg = dace_backend.build_sdfg_from_gtir(testee, CARTESIAN_OFFSETS) for s in [False, True]: z_fields = (np.empty_like(a), np.empty_like(a)) - sdfg(a, b, c, *z_fields, pred=np.bool_(s), **FSYMBOLS, **z_symbols) + sdfg(a, b, c, *z_fields, pred=np.bool_(s), **FSYMBOLS) assert np.allclose(z_fields[0], a if s else b) assert np.allclose(z_fields[1], b if s else a) @@ -1833,14 +1810,8 @@ def test_gtir_let_lambda_with_tuple1(): sdfg = dace_backend.build_sdfg_from_gtir(testee, CARTESIAN_OFFSETS) z_fields = (np.empty_like(a), np.empty_like(a)) - z_symbols = dict( - __z_0_size_0=FSYMBOLS["__x_size_0"], - __z_0_stride_0=FSYMBOLS["__x_stride_0"], - __z_1_size_0=FSYMBOLS["__x_size_0"], - __z_1_stride_0=FSYMBOLS["__x_stride_0"], - ) - sdfg(a, b, *z_fields, **FSYMBOLS, **z_symbols) + sdfg(a, b, *z_fields, **FSYMBOLS) assert np.allclose(z_fields[0], a) assert np.allclose(z_fields[1], b) @@ -1879,16 +1850,8 @@ def test_gtir_let_lambda_with_tuple2(): sdfg = dace_backend.build_sdfg_from_gtir(testee, CARTESIAN_OFFSETS) z_fields = (np.empty_like(a), np.empty_like(a), np.empty_like(a)) - z_symbols = dict( - __z_0_size_0=FSYMBOLS["__x_size_0"], - __z_0_stride_0=FSYMBOLS["__x_stride_0"], - __z_1_size_0=FSYMBOLS["__x_size_0"], - __z_1_stride_0=FSYMBOLS["__x_stride_0"], - __z_2_size_0=FSYMBOLS["__x_size_0"], - __z_2_stride_0=FSYMBOLS["__x_stride_0"], - ) - sdfg(a, b, *z_fields, **FSYMBOLS, **z_symbols) + sdfg(a, b, *z_fields, **FSYMBOLS) assert np.allclose(z_fields[0], a + b) assert np.allclose(z_fields[1], val) assert np.allclose(z_fields[2], b) @@ -1939,13 +1902,9 @@ def test_gtir_if_scalars(): d2 = np.random.randint(0, 1000) sdfg = dace_backend.build_sdfg_from_gtir(testee, {}) - x_symbols = dict( - __x_0_size_0=FSYMBOLS["__x_size_0"], - __x_0_stride_0=FSYMBOLS["__x_stride_0"], - ) for s in [False, True]: - sdfg(x_0=a, x_1_0=d1, x_1_1=d2, z=b, pred=np.bool_(s), **FSYMBOLS, **x_symbols) + sdfg(x_0=a, x_1_0=d1, x_1_1=d2, z=b, pred=np.bool_(s), **FSYMBOLS) assert np.allclose(b, (a + d1 if s else a + d2)) From ef82b1fee5f36ce64bfc9086c9424375d0c6d766 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Mon, 4 Nov 2024 15:54:27 +0100 Subject: [PATCH 12/23] fix for zero-dimensional arrays on gpu --- .../runners/dace_common/dace_backend.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py b/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py index 98217bf600..063f794652 100644 --- a/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py +++ b/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py @@ -86,6 +86,9 @@ def _get_shape_args( ) -> dict[str, int]: shape_args: dict[str, int] = {} for name, value in args.items(): + if len(value.shape) == 0: + # zero-dimensional array + continue for sym, size in zip(arrays[name].shape, value.shape, strict=True): if isinstance(sym, dace.symbol): if sym.name not in shape_args: @@ -94,7 +97,7 @@ def _get_shape_args( # TODO(edopao): This case is only hit if all fields in a tuple have the same dims and sizes. raise ValueError( f"Expected array size {sym.name} for arg {name} to be {shape_args[sym.name]}, got {size}." - ) + ) elif sym != size: raise ValueError( f"Expected shape {arrays[name].shape} for arg {name}, got {value.shape}." @@ -107,6 +110,9 @@ def _get_stride_args( ) -> dict[str, int]: stride_args = {} for name, value in args.items(): + if len(value.shape) == 0: + # zero-dimensional array + continue for sym, stride_size in zip(arrays[name].strides, value.strides, strict=True): stride, remainder = divmod(stride_size, value.itemsize) if remainder != 0: From 0f3fd38f24a12c386985c88057f434ff47dce520 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Mon, 4 Nov 2024 15:57:11 +0100 Subject: [PATCH 13/23] fix pre-commit --- .../next/program_processors/runners/dace_common/dace_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py b/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py index 063f794652..ca1119202d 100644 --- a/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py +++ b/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py @@ -97,7 +97,7 @@ def _get_shape_args( # TODO(edopao): This case is only hit if all fields in a tuple have the same dims and sizes. raise ValueError( f"Expected array size {sym.name} for arg {name} to be {shape_args[sym.name]}, got {size}." - ) + ) elif sym != size: raise ValueError( f"Expected shape {arrays[name].shape} for arg {name}, got {value.shape}." From a91ba4fb371325b66641fd50e1a36abd621e4cc7 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Mon, 4 Nov 2024 16:05:21 +0100 Subject: [PATCH 14/23] fix for zero-dimensional arrays on gpu (1) --- .../runners/dace_common/dace_backend.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py b/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py index ca1119202d..79b9351c45 100644 --- a/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py +++ b/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py @@ -86,9 +86,6 @@ def _get_shape_args( ) -> dict[str, int]: shape_args: dict[str, int] = {} for name, value in args.items(): - if len(value.shape) == 0: - # zero-dimensional array - continue for sym, size in zip(arrays[name].shape, value.shape, strict=True): if isinstance(sym, dace.symbol): if sym.name not in shape_args: @@ -110,9 +107,6 @@ def _get_stride_args( ) -> dict[str, int]: stride_args = {} for name, value in args.items(): - if len(value.shape) == 0: - # zero-dimensional array - continue for sym, stride_size in zip(arrays[name].strides, value.strides, strict=True): stride, remainder = divmod(stride_size, value.itemsize) if remainder != 0: @@ -171,9 +165,12 @@ def get_sdfg_args( sdfg: The SDFG for which we want to get the arguments. """ offset_provider = kwargs["offset_provider"] + xp = cp if on_gpu else np dace_args = _get_args(sdfg, args, use_field_canonical_representation) - dace_field_args = {n: v for n, v in dace_args.items() if not np.isscalar(v)} + dace_field_args = { + n: v for n, v in dace_args.items() if (not xp.isscalar(v)) and (len(v.shape) != 0) + } dace_conn_args = get_sdfg_conn_args(sdfg, offset_provider, on_gpu) dace_shapes = _get_shape_args(sdfg.arrays, dace_field_args) dace_conn_shapes = _get_shape_args(sdfg.arrays, dace_conn_args) From 88e27a26df55131e0def78d4ff7af24de989dec2 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Mon, 4 Nov 2024 16:33:32 +0100 Subject: [PATCH 15/23] fix for zero-dimensional arrays on gpu (2) --- .../program_processors/runners/dace_common/dace_backend.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py b/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py index 79b9351c45..8fe5aa2d72 100644 --- a/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py +++ b/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py @@ -32,7 +32,7 @@ def _convert_arg(arg: Any, sdfg_param: str, use_field_canonical_representation: # Note that 'ndarray.item()' always transforms the numpy scalar to a python scalar, # which may change its precision. To avoid this, we use here the empty tuple as index # for 'ndarray.__getitem__()'. - return arg.ndarray[()] + return arg.asnumpy()[()] # field domain offsets are not supported non_zero_offsets = [ (dim, dim_range) @@ -165,12 +165,9 @@ def get_sdfg_args( sdfg: The SDFG for which we want to get the arguments. """ offset_provider = kwargs["offset_provider"] - xp = cp if on_gpu else np dace_args = _get_args(sdfg, args, use_field_canonical_representation) - dace_field_args = { - n: v for n, v in dace_args.items() if (not xp.isscalar(v)) and (len(v.shape) != 0) - } + dace_field_args = {n: v for n, v in dace_args.items() if not np.isscalar(v)} dace_conn_args = get_sdfg_conn_args(sdfg, offset_provider, on_gpu) dace_shapes = _get_shape_args(sdfg.arrays, dace_field_args) dace_conn_shapes = _get_shape_args(sdfg.arrays, dace_conn_args) From 803612515fc85cf9da94b3a00ed14cc0203a05e8 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Mon, 4 Nov 2024 16:41:21 +0100 Subject: [PATCH 16/23] try removing simplify pass for gpu errors --- .../runners/dace_fieldview/transformations/gpu_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/gpu_utils.py b/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/gpu_utils.py index 16c9600a3a..ac4d37c29f 100644 --- a/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/gpu_utils.py +++ b/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/gpu_utils.py @@ -24,6 +24,7 @@ def gt_gpu_transformation( sdfg: dace.SDFG, + run_simplify: bool = False, try_removing_trivial_maps: bool = True, use_gpu_storage: bool = True, gpu_block_size: Optional[Sequence[int | str] | str] = None, @@ -46,6 +47,7 @@ def gt_gpu_transformation( Args: sdfg: The SDFG that should be processed. + run_simplify: Run simplify pass after applying GPU transformations. try_removing_trivial_maps: Try to get rid of trivial maps by incorporating them. use_gpu_storage: Assume that the non global memory is already on the GPU. This will avoid the data copy from host to GPU memory. @@ -83,7 +85,9 @@ def gt_gpu_transformation( simplify=False, ) # The documentation recommends to run simplify afterwards - gtx_transformations.gt_simplify(sdfg) + # TODO(phimuell): Re-enable simplify after dace is upgraded to v1.0.0 + if run_simplify: + gtx_transformations.gt_simplify(sdfg) if try_removing_trivial_maps: # A Tasklet, outside of a Map, that writes into an array on GPU can not work From 66d3be719da135a2e7bb11444e755dadfc0b67aa Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Mon, 4 Nov 2024 17:01:54 +0100 Subject: [PATCH 17/23] Run simplify before gpu transformations --- .../runners/dace_fieldview/transformations/gpu_utils.py | 6 +----- .../program_processors/runners/dace_fieldview/workflow.py | 1 + 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/gpu_utils.py b/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/gpu_utils.py index ac4d37c29f..16c9600a3a 100644 --- a/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/gpu_utils.py +++ b/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/gpu_utils.py @@ -24,7 +24,6 @@ def gt_gpu_transformation( sdfg: dace.SDFG, - run_simplify: bool = False, try_removing_trivial_maps: bool = True, use_gpu_storage: bool = True, gpu_block_size: Optional[Sequence[int | str] | str] = None, @@ -47,7 +46,6 @@ def gt_gpu_transformation( Args: sdfg: The SDFG that should be processed. - run_simplify: Run simplify pass after applying GPU transformations. try_removing_trivial_maps: Try to get rid of trivial maps by incorporating them. use_gpu_storage: Assume that the non global memory is already on the GPU. This will avoid the data copy from host to GPU memory. @@ -85,9 +83,7 @@ def gt_gpu_transformation( simplify=False, ) # The documentation recommends to run simplify afterwards - # TODO(phimuell): Re-enable simplify after dace is upgraded to v1.0.0 - if run_simplify: - gtx_transformations.gt_simplify(sdfg) + gtx_transformations.gt_simplify(sdfg) if try_removing_trivial_maps: # A Tasklet, outside of a Map, that writes into an array on GPU can not work diff --git a/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py b/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py index 85ae95c432..0fe19ab1e0 100644 --- a/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py +++ b/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py @@ -57,6 +57,7 @@ def generate_sdfg( if auto_opt: gtx_transformations.gt_auto_optimize(sdfg, gpu=on_gpu) elif on_gpu: + gtx_transformations.gt_simplify(sdfg) gtx_transformations.gt_gpu_transformation(sdfg, try_removing_trivial_maps=False) return sdfg From ef1df144ec7e3bc3a19ae414a59a5a95832393a3 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Mon, 4 Nov 2024 17:45:30 +0100 Subject: [PATCH 18/23] fix for array symbols --- .../next/program_processors/runners/dace_fieldview/gtir_sdfg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gt4py/next/program_processors/runners/dace_fieldview/gtir_sdfg.py b/src/gt4py/next/program_processors/runners/dace_fieldview/gtir_sdfg.py index 0642ba003b..f19f78d9d2 100644 --- a/src/gt4py/next/program_processors/runners/dace_fieldview/gtir_sdfg.py +++ b/src/gt4py/next/program_processors/runners/dace_fieldview/gtir_sdfg.py @@ -280,7 +280,7 @@ def _add_storage( elif isinstance(gt_type, ts.ScalarType): dc_dtype = dace_utils.as_dace_type(gt_type) - if name in symbolic_arguments: + if dace_utils.is_field_symbol(name) or name in symbolic_arguments: if name in sdfg.symbols: # Sometimes, when the field domain is implicitly derived from the # field domain, the gt4py lowering adds the field size as a scalar From 02a9391f7bdfdb2ecdea52debfe8a7543bd12ba4 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Mon, 4 Nov 2024 18:16:14 +0100 Subject: [PATCH 19/23] ADd pytest marker for failing gpu tests --- pyproject.toml | 1 + tests/next_tests/definitions.py | 5 ++++- .../feature_tests/ffront_tests/test_execution.py | 2 ++ .../feature_tests/ffront_tests/test_where.py | 1 + .../multi_feature_tests/ffront_tests/test_laplacian.py | 3 +++ 5 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 64f08e671e..e43ef4a019 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -238,6 +238,7 @@ markers = [ 'requires_atlas: tests that require `atlas4py` bindings package', 'requires_dace: tests that require `dace` package', 'requires_gpu: tests that require a NVidia GPU (`cupy` and `cudatoolkit` are required)', + 'requires_gtir_dace_gpu_fix: tests that still do not work on GTIR DaCe backend', 'starts_from_gtir_program: tests that require backend to start lowering from GTIR program', 'uses_applied_shifts: tests that require backend support for applied-shifts', 'uses_constant_fields: tests that require backend support for constant fields', diff --git a/tests/next_tests/definitions.py b/tests/next_tests/definitions.py index 5471333bd5..2d41588141 100644 --- a/tests/next_tests/definitions.py +++ b/tests/next_tests/definitions.py @@ -87,6 +87,8 @@ class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum): # to avoid needing to mark all tests. ALL = "all" REQUIRES_ATLAS = "requires_atlas" +# TODO(edopao): Remove, once issues are fixed +REQUIRES_GTIR_DACE_GPU_FIX = "requires_gtir_dace_gpu_fix" # TODO(havogt): Remove, skipped during refactoring to GTIR STARTS_FROM_GTIR_PROGRAM = "starts_from_gtir_program" USES_APPLIED_SHIFTS = "uses_applied_shifts" @@ -181,7 +183,8 @@ class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum): OptionalProgramBackendId.DACE_CPU: DACE_SKIP_TEST_LIST, OptionalProgramBackendId.DACE_GPU: DACE_SKIP_TEST_LIST, OptionalProgramBackendId.GTIR_DACE_CPU: GTIR_DACE_SKIP_TEST_LIST, - OptionalProgramBackendId.GTIR_DACE_GPU: GTIR_DACE_SKIP_TEST_LIST, + OptionalProgramBackendId.GTIR_DACE_GPU: GTIR_DACE_SKIP_TEST_LIST + + [(REQUIRES_GTIR_DACE_GPU_FIX, XFAIL, UNSUPPORTED_MESSAGE)], ProgramBackendId.GTFN_CPU: GTFN_SKIP_TEST_LIST + [(USES_SCAN_NESTED, XFAIL, UNSUPPORTED_MESSAGE)], ProgramBackendId.GTFN_CPU_IMPERATIVE: GTFN_SKIP_TEST_LIST diff --git a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_execution.py b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_execution.py index 7540d52fb3..88035caa4a 100644 --- a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_execution.py +++ b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_execution.py @@ -997,6 +997,7 @@ def program_domain( cases.verify(cartesian_case, program_domain, inp, out, lower_i, upper_i, inout=out, ref=ref) +@pytest.mark.requires_gtir_dace_gpu_fix def test_domain_input_bounds_1(cartesian_case): lower_i = 1 upper_i = 9 @@ -1042,6 +1043,7 @@ def program_domain( ) +@pytest.mark.requires_gtir_dace_gpu_fix def test_domain_tuple(cartesian_case): @gtx.field_operator def fieldop_domain_tuple( diff --git a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_where.py b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_where.py index 7d634cec90..861dbbb036 100644 --- a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_where.py +++ b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_where.py @@ -18,6 +18,7 @@ ) +@pytest.mark.requires_gtir_dace_gpu_fix @pytest.mark.uses_cartesian_shift def test_where_k_offset(cartesian_case): @gtx.field_operator diff --git a/tests/next_tests/integration_tests/multi_feature_tests/ffront_tests/test_laplacian.py b/tests/next_tests/integration_tests/multi_feature_tests/ffront_tests/test_laplacian.py index 850a20ff7e..79545a2666 100644 --- a/tests/next_tests/integration_tests/multi_feature_tests/ffront_tests/test_laplacian.py +++ b/tests/next_tests/integration_tests/multi_feature_tests/ffront_tests/test_laplacian.py @@ -84,6 +84,7 @@ def skewedlap_ref(inp): return -4.0 * inp[1:-1, 1:-1] + inp[2:, 2:] + inp[2:, :-2] + inp[:-2, 2:] + inp[:-2, :-2] +@pytest.mark.requires_gtir_dace_gpu_fix def test_ffront_lap(cartesian_case): in_field = cases.allocate(cartesian_case, lap_program, "in_field")() in_field = square(in_field) @@ -99,6 +100,7 @@ def test_ffront_lap(cartesian_case): ) +@pytest.mark.requires_gtir_dace_gpu_fix def test_ffront_skewedlap(cartesian_case): in_field = cases.allocate(cartesian_case, skewedlap_program, "in_field")() in_field = square(in_field) @@ -114,6 +116,7 @@ def test_ffront_skewedlap(cartesian_case): ) +@pytest.mark.requires_gtir_dace_gpu_fix def test_ffront_laplap(cartesian_case): in_field = cases.allocate(cartesian_case, laplap_program, "in_field")() in_field = square(in_field) From 6fcd8da2b5dd78b4ab6b29ba2752d4228332aa7c Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Mon, 4 Nov 2024 21:22:36 +0100 Subject: [PATCH 20/23] Revert "ADd pytest marker for failing gpu tests" This reverts commit 02a9391f7bdfdb2ecdea52debfe8a7543bd12ba4. --- pyproject.toml | 1 - tests/next_tests/definitions.py | 5 +---- .../feature_tests/ffront_tests/test_execution.py | 2 -- .../feature_tests/ffront_tests/test_where.py | 1 - .../multi_feature_tests/ffront_tests/test_laplacian.py | 3 --- 5 files changed, 1 insertion(+), 11 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e43ef4a019..64f08e671e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -238,7 +238,6 @@ markers = [ 'requires_atlas: tests that require `atlas4py` bindings package', 'requires_dace: tests that require `dace` package', 'requires_gpu: tests that require a NVidia GPU (`cupy` and `cudatoolkit` are required)', - 'requires_gtir_dace_gpu_fix: tests that still do not work on GTIR DaCe backend', 'starts_from_gtir_program: tests that require backend to start lowering from GTIR program', 'uses_applied_shifts: tests that require backend support for applied-shifts', 'uses_constant_fields: tests that require backend support for constant fields', diff --git a/tests/next_tests/definitions.py b/tests/next_tests/definitions.py index 2d41588141..5471333bd5 100644 --- a/tests/next_tests/definitions.py +++ b/tests/next_tests/definitions.py @@ -87,8 +87,6 @@ class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum): # to avoid needing to mark all tests. ALL = "all" REQUIRES_ATLAS = "requires_atlas" -# TODO(edopao): Remove, once issues are fixed -REQUIRES_GTIR_DACE_GPU_FIX = "requires_gtir_dace_gpu_fix" # TODO(havogt): Remove, skipped during refactoring to GTIR STARTS_FROM_GTIR_PROGRAM = "starts_from_gtir_program" USES_APPLIED_SHIFTS = "uses_applied_shifts" @@ -183,8 +181,7 @@ class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum): OptionalProgramBackendId.DACE_CPU: DACE_SKIP_TEST_LIST, OptionalProgramBackendId.DACE_GPU: DACE_SKIP_TEST_LIST, OptionalProgramBackendId.GTIR_DACE_CPU: GTIR_DACE_SKIP_TEST_LIST, - OptionalProgramBackendId.GTIR_DACE_GPU: GTIR_DACE_SKIP_TEST_LIST - + [(REQUIRES_GTIR_DACE_GPU_FIX, XFAIL, UNSUPPORTED_MESSAGE)], + OptionalProgramBackendId.GTIR_DACE_GPU: GTIR_DACE_SKIP_TEST_LIST, ProgramBackendId.GTFN_CPU: GTFN_SKIP_TEST_LIST + [(USES_SCAN_NESTED, XFAIL, UNSUPPORTED_MESSAGE)], ProgramBackendId.GTFN_CPU_IMPERATIVE: GTFN_SKIP_TEST_LIST diff --git a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_execution.py b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_execution.py index 88035caa4a..7540d52fb3 100644 --- a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_execution.py +++ b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_execution.py @@ -997,7 +997,6 @@ def program_domain( cases.verify(cartesian_case, program_domain, inp, out, lower_i, upper_i, inout=out, ref=ref) -@pytest.mark.requires_gtir_dace_gpu_fix def test_domain_input_bounds_1(cartesian_case): lower_i = 1 upper_i = 9 @@ -1043,7 +1042,6 @@ def program_domain( ) -@pytest.mark.requires_gtir_dace_gpu_fix def test_domain_tuple(cartesian_case): @gtx.field_operator def fieldop_domain_tuple( diff --git a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_where.py b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_where.py index 861dbbb036..7d634cec90 100644 --- a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_where.py +++ b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_where.py @@ -18,7 +18,6 @@ ) -@pytest.mark.requires_gtir_dace_gpu_fix @pytest.mark.uses_cartesian_shift def test_where_k_offset(cartesian_case): @gtx.field_operator diff --git a/tests/next_tests/integration_tests/multi_feature_tests/ffront_tests/test_laplacian.py b/tests/next_tests/integration_tests/multi_feature_tests/ffront_tests/test_laplacian.py index 79545a2666..850a20ff7e 100644 --- a/tests/next_tests/integration_tests/multi_feature_tests/ffront_tests/test_laplacian.py +++ b/tests/next_tests/integration_tests/multi_feature_tests/ffront_tests/test_laplacian.py @@ -84,7 +84,6 @@ def skewedlap_ref(inp): return -4.0 * inp[1:-1, 1:-1] + inp[2:, 2:] + inp[2:, :-2] + inp[:-2, 2:] + inp[:-2, :-2] -@pytest.mark.requires_gtir_dace_gpu_fix def test_ffront_lap(cartesian_case): in_field = cases.allocate(cartesian_case, lap_program, "in_field")() in_field = square(in_field) @@ -100,7 +99,6 @@ def test_ffront_lap(cartesian_case): ) -@pytest.mark.requires_gtir_dace_gpu_fix def test_ffront_skewedlap(cartesian_case): in_field = cases.allocate(cartesian_case, skewedlap_program, "in_field")() in_field = square(in_field) @@ -116,7 +114,6 @@ def test_ffront_skewedlap(cartesian_case): ) -@pytest.mark.requires_gtir_dace_gpu_fix def test_ffront_laplap(cartesian_case): in_field = cases.allocate(cartesian_case, laplap_program, "in_field")() in_field = square(in_field) From 9ffc665dc1caf3f93d45f8c7446e7fc37b8c1b04 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Mon, 4 Nov 2024 21:33:42 +0100 Subject: [PATCH 21/23] Keep gpu tests disabled --- tests/next_tests/definitions.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/next_tests/definitions.py b/tests/next_tests/definitions.py index 5471333bd5..1bcc3554a7 100644 --- a/tests/next_tests/definitions.py +++ b/tests/next_tests/definitions.py @@ -181,7 +181,11 @@ class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum): OptionalProgramBackendId.DACE_CPU: DACE_SKIP_TEST_LIST, OptionalProgramBackendId.DACE_GPU: DACE_SKIP_TEST_LIST, OptionalProgramBackendId.GTIR_DACE_CPU: GTIR_DACE_SKIP_TEST_LIST, - OptionalProgramBackendId.GTIR_DACE_GPU: GTIR_DACE_SKIP_TEST_LIST, + OptionalProgramBackendId.GTIR_DACE_GPU: GTIR_DACE_SKIP_TEST_LIST + + [ + # TODO(edopao): Enable when GPU codegen issues related to symbolic domain are fixed. + (ALL, XFAIL, UNSUPPORTED_MESSAGE), + ], ProgramBackendId.GTFN_CPU: GTFN_SKIP_TEST_LIST + [(USES_SCAN_NESTED, XFAIL, UNSUPPORTED_MESSAGE)], ProgramBackendId.GTFN_CPU_IMPERATIVE: GTFN_SKIP_TEST_LIST From d4973f5f3e8e2c5792bdc753ae7f40439196abd6 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Tue, 5 Nov 2024 11:27:02 +0100 Subject: [PATCH 22/23] edit code comments --- .../runners/dace_common/dace_backend.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py b/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py index 8fe5aa2d72..bbf45a822c 100644 --- a/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py +++ b/src/gt4py/next/program_processors/runners/dace_common/dace_backend.py @@ -91,7 +91,11 @@ def _get_shape_args( if sym.name not in shape_args: shape_args[sym.name] = size elif shape_args[sym.name] != size: - # TODO(edopao): This case is only hit if all fields in a tuple have the same dims and sizes. + # The same shape symbol is used by all fields of a tuple, because the current assumption is that all fields + # in a tuple have the same dimensions and sizes. Therefore, this if-branch only exists to ensure that array + # size (i.e. the value assigned to the shape symbol) is the same for all fields in a tuple. + # TODO(edopao): change to `assert sym.name not in shape_args` to ensure that shape symbols are unique, + # once the assumption on tuples is removed. raise ValueError( f"Expected array size {sym.name} for arg {name} to be {shape_args[sym.name]}, got {size}." ) @@ -117,7 +121,9 @@ def _get_stride_args( if sym.name not in stride_args: stride_args[str(sym)] = stride elif stride_args[sym.name] != stride: - # TODO(edopao): This case is only hit if all fields in a tuple have the same dims and sizes. + # See above comment in `_get_shape_args`, same for stride symbols of fields in a tuple. + # TODO(edopao): change to `assert sym.name not in stride_args` to ensure that stride symbols are unique, + # once the assumption on tuples is removed. raise ValueError( f"Expected array stride {sym.name} for arg {name} to be {stride_args[sym.name]}, got {stride}." ) From 1afb1cb2eb0ce391d451b6623a09a4e1fda8efca Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Tue, 5 Nov 2024 11:57:59 +0100 Subject: [PATCH 23/23] remove extra call to gt_simplify --- .../next/program_processors/runners/dace_fieldview/workflow.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py b/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py index 0fe19ab1e0..85ae95c432 100644 --- a/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py +++ b/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py @@ -57,7 +57,6 @@ def generate_sdfg( if auto_opt: gtx_transformations.gt_auto_optimize(sdfg, gpu=on_gpu) elif on_gpu: - gtx_transformations.gt_simplify(sdfg) gtx_transformations.gt_gpu_transformation(sdfg, try_removing_trivial_maps=False) return sdfg