From 9ee470cbdd5e4ac931b1c39c005048bff9a17ef9 Mon Sep 17 00:00:00 2001 From: Tal Ben-Nun Date: Fri, 23 Feb 2024 04:36:08 -0800 Subject: [PATCH] Generalize StructArrays to ContainerArrays and refactor View class structure (#1504) This PR enables the use of an array data descriptor that contains a nested data descriptor (e.g., ContainerArray of Arrays). Its contents can then be viewed normally with View or StructureView. With this, concepts such as jagged arrays are natively supported in DaCe (see test for example). Also adds support for using ctypes pointers and arrays as arguments to SDFGs. This PR also refactors the notion of views to a View interface, and provides views to arrays, structures, and container arrays. It also adds a syntactic-sugar/helper API to define a view of an existing data descriptor. --------- Co-authored-by: Alexandros Nikolaos Ziogas --- dace/codegen/compiled_sdfg.py | 14 +- dace/codegen/dispatcher.py | 4 +- dace/codegen/targets/cpu.py | 42 ++- dace/codegen/targets/framecode.py | 2 +- dace/data.py | 317 +++++++++++++++--- dace/dtypes.py | 2 + .../analysis/schedule_tree/sdfg_to_tree.py | 17 +- dace/sdfg/sdfg.py | 78 +++-- dace/sdfg/utils.py | 2 +- .../dataflow/redundant_array.py | 25 +- .../passes/constant_propagation.py | 2 +- .../passes/reference_reduction.py | 5 +- ..._array_test.py => container_array_test.py} | 120 +++++-- .../transformations/redundant_slices_test.py | 5 +- 14 files changed, 494 insertions(+), 141 deletions(-) rename tests/sdfg/data/{struct_array_test.py => container_array_test.py} (67%) diff --git a/dace/codegen/compiled_sdfg.py b/dace/codegen/compiled_sdfg.py index c2ca3316d7..5c0ccb86e7 100644 --- a/dace/codegen/compiled_sdfg.py +++ b/dace/codegen/compiled_sdfg.py @@ -158,6 +158,8 @@ def _array_interface_ptr(array: Any, storage: dtypes.StorageType) -> int: """ if hasattr(array, 'data_ptr'): return array.data_ptr() + if isinstance(array, ctypes.Array): + return ctypes.addressof(array) if storage == dtypes.StorageType.GPU_Global: try: @@ -508,13 +510,15 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]: if atype.optional is False: # If array cannot be None raise TypeError(f'Passing a None value to a non-optional array in argument "{a}"') # Otherwise, None values are passed as null pointers below + elif isinstance(arg, ctypes._Pointer): + pass else: raise TypeError(f'Passing an object (type {type(arg).__name__}) to an array in argument "{a}"') elif is_array and not is_dtArray: # GPU scalars and return values are pointers, so this is fine if atype.storage != dtypes.StorageType.GPU_Global and not a.startswith('__return'): raise TypeError(f'Passing an array to a scalar (type {atype.dtype.ctype}) in argument "{a}"') - elif (is_dtArray and is_ndarray and not isinstance(atype, dt.StructArray) + elif (is_dtArray and is_ndarray and not isinstance(atype, dt.ContainerArray) and atype.dtype.as_numpy_dtype() != arg.dtype): # Make exception for vector types if (isinstance(atype.dtype, dtypes.vector) and atype.dtype.vtype.as_numpy_dtype() == arg.dtype): @@ -565,14 +569,14 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]: arg_ctypes = tuple(at.dtype.as_ctypes() for at in argtypes) constants = self.sdfg.constants - callparams = tuple((arg, actype, atype, aname) + callparams = tuple((actype(arg.get()) if isinstance(arg, symbolic.symbol) else arg, actype, atype, aname) for arg, actype, atype, aname in zip(arglist, arg_ctypes, argtypes, argnames) if not (symbolic.issymbolic(arg) and (hasattr(arg, 'name') and arg.name in constants))) symbols = self._free_symbols initargs = tuple( - actype(arg) if not isinstance(arg, ctypes._SimpleCData) else arg for arg, actype, atype, aname in callparams - if aname in symbols) + actype(arg) if not isinstance(arg, (ctypes._SimpleCData, ctypes._Pointer)) else arg + for arg, actype, atype, aname in callparams if aname in symbols) try: # Replace arrays with their base host/device pointers @@ -581,7 +585,7 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]: if dtypes.is_array(arg): newargs[i] = ctypes.c_void_p(_array_interface_ptr( arg, atype.storage)) # `c_void_p` is subclass of `ctypes._SimpleCData`. - elif not isinstance(arg, (ctypes._SimpleCData)): + elif not isinstance(arg, (ctypes._SimpleCData, ctypes._Pointer)): newargs[i] = actype(arg) else: newargs[i] = arg diff --git a/dace/codegen/dispatcher.py b/dace/codegen/dispatcher.py index 359d3a5853..be032556a0 100644 --- a/dace/codegen/dispatcher.py +++ b/dace/codegen/dispatcher.py @@ -505,11 +505,11 @@ def get_copy_dispatcher(self, src_node, dst_node, edge, sdfg, state): dst_is_data = True # Skip copies to/from views where edge matches - if src_is_data and isinstance(src_node.desc(sdfg), (dt.StructureView, dt.View)): + if src_is_data and isinstance(src_node.desc(sdfg), dt.View): e = sdutil.get_view_edge(state, src_node) if e is edge: return None - if dst_is_data and isinstance(dst_node.desc(sdfg), (dt.StructureView, dt.View)): + if dst_is_data and isinstance(dst_node.desc(sdfg), dt.View): e = sdutil.get_view_edge(state, dst_node) if e is edge: return None diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py index 4e3af294fe..5527f93215 100644 --- a/dace/codegen/targets/cpu.py +++ b/dace/codegen/targets/cpu.py @@ -33,12 +33,12 @@ class CPUCodeGen(TargetCodeGenerator): def _define_sdfg_arguments(self, sdfg, arglist): - # NOTE: Multi-nesting with StructArrays must be further investigated. + # NOTE: Multi-nesting with container arrays must be further investigated. def _visit_structure(struct: data.Structure, args: dict, prefix: str = ''): for k, v in struct.members.items(): if isinstance(v, data.Structure): _visit_structure(v, args, f'{prefix}->{k}') - elif isinstance(v, data.StructArray): + elif isinstance(v, data.ContainerArray): _visit_structure(v.stype, args, f'{prefix}->{k}') elif isinstance(v, data.Data): args[f'{prefix}->{k}'] = v @@ -49,10 +49,11 @@ def _visit_structure(struct: data.Structure, args: dict, prefix: str = ''): if isinstance(arg_type, data.Structure): desc = sdfg.arrays[name] _visit_structure(arg_type, args, name) - elif isinstance(arg_type, data.StructArray): + elif isinstance(arg_type, data.ContainerArray): desc = sdfg.arrays[name] desc = desc.stype - _visit_structure(desc, args, name) + if isinstance(desc, data.Structure): + _visit_structure(desc, args, name) for name, arg_type in args.items(): if isinstance(arg_type, data.Scalar): @@ -221,6 +222,35 @@ def allocate_view(self, sdfg: SDFG, dfg: SDFGState, state_id: int, node: nodes.A dtypes.pointer(nodedesc.dtype), ancestor=0, is_write=is_write) + + # Test for views of container arrays and structs + if isinstance(sdfg.arrays[viewed_dnode.data], (data.Structure, data.ContainerArray, data.ContainerView)): + vdesc = sdfg.arrays[viewed_dnode.data] + ptrname = cpp.ptr(memlet.data, vdesc, sdfg, self._dispatcher.frame) + field_name = None + if is_write and mpath[-1].dst_conn: + field_name = mpath[-1].dst_conn + elif not is_write and mpath[0].src_conn: + field_name = mpath[0].src_conn + + # Plain view into a container array + if isinstance(vdesc, data.ContainerArray) and not isinstance(vdesc.stype, data.Structure): + offset = cpp.cpp_offset_expr(vdesc, memlet.subset) + value = f'{ptrname}[{offset}]' + else: + if field_name is not None: + if isinstance(vdesc, data.ContainerArray): + offset = cpp.cpp_offset_expr(vdesc, memlet.subset) + arrexpr = f'{ptrname}[{offset}]' + stype = vdesc.stype + else: + arrexpr = f'{ptrname}' + stype = vdesc + + value = f'{arrexpr}->{field_name}' + if isinstance(stype.members[field_name], data.Scalar): + value = '&' + value + if not declared: ctypedef = dtypes.pointer(nodedesc.dtype).ctype self._dispatcher.declared_arrays.add(aname, DefinedType.Pointer, ctypedef) @@ -358,7 +388,7 @@ def allocate_array(self, sdfg, dfg, state_id, node, nodedesc, function_stream, d self.allocate_array(sdfg, dfg, state_id, nodes.AccessNode(f"{name}.{k}"), v, function_stream, declaration_stream, allocation_stream) return - if isinstance(nodedesc, (data.StructureView, data.View)): + if isinstance(nodedesc, data.View): return self.allocate_view(sdfg, dfg, state_id, node, function_stream, declaration_stream, allocation_stream) if isinstance(nodedesc, data.Reference): return self.allocate_reference(sdfg, dfg, state_id, node, function_stream, declaration_stream, @@ -523,7 +553,7 @@ def deallocate_array(self, sdfg, dfg, state_id, node, nodedesc, function_stream, dtypes.AllocationLifetime.External) self._dispatcher.declared_arrays.remove(alloc_name, is_global=is_global) - if isinstance(nodedesc, (data.Scalar, data.StructureView, data.View, data.Stream, data.Reference)): + if isinstance(nodedesc, (data.Scalar, data.View, data.Stream, data.Reference)): return elif (nodedesc.storage == dtypes.StorageType.CPU_Heap or (nodedesc.storage == dtypes.StorageType.Register and symbolic.issymbolic(arrsize, sdfg.constants))): diff --git a/dace/codegen/targets/framecode.py b/dace/codegen/targets/framecode.py index b453da7479..44c67f261f 100644 --- a/dace/codegen/targets/framecode.py +++ b/dace/codegen/targets/framecode.py @@ -757,7 +757,7 @@ def determine_allocation_lifetime(self, top_sdfg: SDFG): instances = access_instances[sdfg.sdfg_id][name] # A view gets "allocated" everywhere it appears - if isinstance(desc, (data.StructureView, data.View)): + if isinstance(desc, data.View): for s, n in instances: self.to_allocate[s].append((sdfg, s, n, False, True, False)) self.to_allocate[s].append((sdfg, s, n, False, False, True)) diff --git a/dace/data.py b/dace/data.py index 2eff4d31d1..04bdc93357 100644 --- a/dace/data.py +++ b/dace/data.py @@ -267,7 +267,7 @@ def used_symbols(self, all_symbols: bool) -> Set[symbolic.SymbolicType]: rather than a set of strings. """ result = set() - if self.transient or all_symbols: + if (self.transient and not isinstance(self, (View, Reference))) or all_symbols: for s in self.shape: if isinstance(s, sp.Basic): result |= set(s.free_symbols) @@ -486,11 +486,11 @@ def as_arg(self, with_types=True, for_call=False, name=None): def __getitem__(self, s): """ This is syntactic sugar that allows us to define an array type with the following syntax: ``Structure[N,M]`` - :return: A ``data.StructArray`` data descriptor. + :return: A ``data.ContainerArray`` data descriptor. """ if isinstance(s, list) or isinstance(s, tuple): - return StructArray(self, tuple(s)) - return StructArray(self, (s, )) + return ContainerArray(self, tuple(s)) + return ContainerArray(self, (s, )) # NOTE: Like Scalars? @property @@ -652,7 +652,7 @@ def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]: """ Generates the fields needed for the index. - :returns: a Dict of fields that need to be present in the struct + :return: a Dict of fields that need to be present in the struct """ pass @@ -1202,32 +1202,6 @@ def from_json(json_obj, context=None): return tensor -@make_properties -class StructureView(Structure): - """ - Data descriptor that acts as a reference (or view) of another structure. - """ - - @staticmethod - def from_json(json_obj, context=None): - if json_obj['type'] != 'StructureView': - raise TypeError("Invalid data type") - - # Create dummy object - ret = StructureView({}) - serialize.set_properties_from_json(ret, json_obj, context=context) - - return ret - - def validate(self): - super().validate() - - # We ensure that allocation lifetime is always set to Scope, since the - # view is generated upon "allocation" - if self.lifetime != dtypes.AllocationLifetime.Scope: - raise ValueError('Only Scope allocation lifetime is supported for Views') - - @make_properties class Scalar(Data): """ Data descriptor of a scalar value. """ @@ -1280,6 +1254,10 @@ def offset(self): def start_offset(self): return 0 + @property + def alignment(self): + return 0 + @property def optional(self) -> bool: return False @@ -1597,7 +1575,7 @@ def used_symbols(self, all_symbols: bool) -> Set[symbolic.SymbolicType]: for o in self.offset: if isinstance(o, sp.Expr): result |= set(o.free_symbols) - if self.transient or all_symbols: + if (self.transient and not isinstance(self, (View, Reference))) or all_symbols: if isinstance(self.total_size, sp.Expr): result |= set(self.total_size.free_symbols) return result @@ -1809,13 +1787,13 @@ def free_symbols(self): @make_properties -class StructArray(Array): - """ Array of Structures. """ +class ContainerArray(Array): + """ An array that may contain other data containers (e.g., Structures, other arrays). """ stype = NestedDataClassProperty(allow_none=True, default=None) def __init__(self, - stype: Structure, + stype: Data, shape, transient=False, allow_conflicts=False, @@ -1827,17 +1805,20 @@ def __init__(self, lifetime=dtypes.AllocationLifetime.Scope, alignment=0, debuginfo=None, - total_size=-1, + total_size=None, start_offset=None, optional=None, pool=False): self.stype = stype if stype: - dtype = stype.dtype + if isinstance(stype, Structure): + dtype = stype.dtype + else: + dtype = dtypes.pointer(stype.dtype) else: - dtype = dtypes.int8 - super(StructArray, + dtype = dtypes.pointer(dtypes.typeclass(None)) # void* + super(ContainerArray, self).__init__(dtype, shape, transient, allow_conflicts, storage, location, strides, offset, may_alias, lifetime, alignment, debuginfo, total_size, start_offset, optional, pool) @@ -1859,11 +1840,10 @@ def from_json(cls, json_obj, context=None): return ret -@make_properties -class View(Array): +class View: """ - Data descriptor that acts as a reference (or view) of another array. Can - be used to reshape or reinterpret existing data without copying it. + Data descriptor that acts as a static reference (or view) of another data container. + Can be used to reshape or reinterpret existing data without copying it. To use a View, it needs to be referenced in an access node that is directly connected to another access node. The rules for deciding which access node @@ -1880,9 +1860,131 @@ class View(Array): * If both access nodes reside in the same scope, the input data is viewed. Other cases are ambiguous and will fail SDFG validation. + """ + + @staticmethod + def view(viewed_container: Data, debuginfo=None): + """ + Create a new View of the specified data container. + + :param viewed_container: The data container properties of this view + :param debuginfo: Specific source line information for this view, if + different from ``viewed_container``. + :return: A new subclass of View with the appropriate viewed container + properties, e.g., ``StructureView`` for a ``Structure``. + """ + debuginfo = debuginfo or viewed_container.debuginfo + # Construct the right kind of view from the input data container + if isinstance(viewed_container, Structure): + result = StructureView(members=cp.deepcopy(viewed_container.members), + name=viewed_container.name, + storage=viewed_container.storage, + location=viewed_container.location, + lifetime=viewed_container.lifetime, + debuginfo=debuginfo) + elif isinstance(viewed_container, ContainerArray): + result = ContainerView(stype=cp.deepcopy(viewed_container.stype), + shape=viewed_container.shape, + allow_conflicts=viewed_container.allow_conflicts, + storage=viewed_container.storage, + location=viewed_container.location, + strides=viewed_container.strides, + offset=viewed_container.offset, + may_alias=viewed_container.may_alias, + lifetime=viewed_container.lifetime, + alignment=viewed_container.alignment, + debuginfo=debuginfo, + total_size=viewed_container.total_size, + start_offset=viewed_container.start_offset, + optional=viewed_container.optional, + pool=viewed_container.pool) + elif isinstance(viewed_container, (Array, Scalar)): + result = ArrayView(dtype=viewed_container.dtype, + shape=viewed_container.shape, + allow_conflicts=viewed_container.allow_conflicts, + storage=viewed_container.storage, + location=viewed_container.location, + strides=viewed_container.strides, + offset=viewed_container.offset, + may_alias=viewed_container.may_alias, + lifetime=viewed_container.lifetime, + alignment=viewed_container.alignment, + debuginfo=debuginfo, + total_size=viewed_container.total_size, + start_offset=viewed_container.start_offset, + optional=viewed_container.optional, + pool=viewed_container.pool) + else: + # In undefined cases, make a container array view of size 1 + result = ContainerView(cp.deepcopy(viewed_container), [1], debuginfo=debuginfo) + + # Views are always transient + result.transient = True + return result + + +class Reference: + """ + Data descriptor that acts as a dynamic reference of another data descriptor. It can be used just like a regular + data descriptor, except that it could be set to an arbitrary container (or subset thereof) at runtime. To set a + reference, connect another access node to it and use the "set" connector. + + In order to enable data-centric analysis and optimizations, avoid using References as much as possible. + """ + + @staticmethod + def view(viewed_container: Data, debuginfo=None): + """ + Create a new Reference of the specified data container. + + :param viewed_container: The data container properties of this reference. + :param debuginfo: Specific source line information for this reference, if + different from ``viewed_container``. + :return: A new subclass of View with the appropriate viewed container + properties, e.g., ``StructureReference`` for a ``Structure``. + """ + result = cp.deepcopy(viewed_container) + + # Assign the right kind of reference from the input data container + # NOTE: The class assignment below is OK since the Reference class is a subclass of the instance, + # and those should not have additional fields. + if isinstance(viewed_container, ContainerArray): + result.__class__ = ContainerArrayReference + elif isinstance(viewed_container, Structure): + result.__class__ = StructureReference + elif isinstance(viewed_container, Array): + result.__class__ = ArrayReference + elif isinstance(viewed_container, Scalar): + result = ArrayReference(dtype=viewed_container.dtype, + shape=[1], + storage=viewed_container.storage, + lifetime=viewed_container.lifetime, + alignment=viewed_container.alignment, + debuginfo=viewed_container.debuginfo, + total_size=1, + start_offset=0, + optional=viewed_container.optional, + pool=False, + byval=False) + else: # In undefined cases, make a container array reference of size 1 + result = ContainerArrayReference(result, [1], debuginfo=debuginfo) + + if debuginfo is not None: + result.debuginfo = debuginfo + + # References are always transient + result.transient = True + return result + + +@make_properties +class ArrayView(Array, View): + """ + Data descriptor that acts as a static reference (or view) of another array. Can + be used to reshape or reinterpret existing data without copying it. In the Python frontend, ``numpy.reshape`` and ``numpy.ndarray.view`` both - generate Views. + generate ArrayViews. """ def validate(self): @@ -1900,11 +2002,82 @@ def as_array(self): @make_properties -class Reference(Array): +class StructureView(Structure, View): + """ + Data descriptor that acts as a view of another structure. + """ + + @staticmethod + def from_json(json_obj, context=None): + if json_obj['type'] != 'StructureView': + raise TypeError("Invalid data type") + + # Create dummy object + ret = StructureView({}) + serialize.set_properties_from_json(ret, json_obj, context=context) + + return ret + + def validate(self): + super().validate() + + # We ensure that allocation lifetime is always set to Scope, since the + # view is generated upon "allocation" + if self.lifetime != dtypes.AllocationLifetime.Scope: + raise ValueError('Only Scope allocation lifetime is supported for Views') + + def as_structure(self): + copy = cp.deepcopy(self) + copy.__class__ = Structure + return copy + + +@make_properties +class ContainerView(ContainerArray, View): + """ + Data descriptor that acts as a view of another container array. Can + be used to access nested container types without a copy. + """ + + def __init__(self, + stype: Data, + shape=None, + transient=True, + allow_conflicts=False, + storage=dtypes.StorageType.Default, + location=None, + strides=None, + offset=None, + may_alias=False, + lifetime=dtypes.AllocationLifetime.Scope, + alignment=0, + debuginfo=None, + total_size=None, + start_offset=None, + optional=None, + pool=False): + shape = [1] if shape is None else shape + super().__init__(stype, shape, transient, allow_conflicts, storage, location, strides, offset, may_alias, + lifetime, alignment, debuginfo, total_size, start_offset, optional, pool) + + def validate(self): + super().validate() + + # We ensure that allocation lifetime is always set to Scope, since the + # view is generated upon "allocation" + if self.lifetime != dtypes.AllocationLifetime.Scope: + raise ValueError('Only Scope allocation lifetime is supported for ContainerViews') + + def as_array(self): + copy = cp.deepcopy(self) + copy.__class__ = ContainerArray + return copy + + +@make_properties +class ArrayReference(Array, Reference): """ - Data descriptor that acts as a dynamic reference of another array. It can be used just like a regular array, - except that it could be set to an arbitrary array or sub-array at runtime. To set a reference, connect another - access node to it and use the "set" connector. + Data descriptor that acts as a dynamic reference of another array. See ``Reference`` for more information. In order to enable data-centric analysis and optimizations, avoid using References as much as possible. """ @@ -1923,6 +2096,54 @@ def as_array(self): return copy +@make_properties +class StructureReference(Structure, Reference): + """ + Data descriptor that acts as a dynamic reference of another Structure. See ``Reference`` for more information. + + In order to enable data-centric analysis and optimizations, avoid using References as much as possible. + """ + + def validate(self): + super().validate() + + # We ensure that allocation lifetime is always set to Scope, since the + # view is generated upon "allocation" + if self.lifetime != dtypes.AllocationLifetime.Scope: + raise ValueError('Only Scope allocation lifetime is supported for References') + + if 'set' in self.members: + raise NameError('A structure that is referenced may not contain a member called "set" (reserved keyword).') + + def as_structure(self): + copy = cp.deepcopy(self) + copy.__class__ = Structure + return copy + + +@make_properties +class ContainerArrayReference(ContainerArray, Reference): + """ + Data descriptor that acts as a dynamic reference of another data container array. See ``Reference`` for more + information. + + In order to enable data-centric analysis and optimizations, avoid using References as much as possible. + """ + + def validate(self): + super().validate() + + # We ensure that allocation lifetime is always set to Scope, since the + # view is generated upon "allocation" + if self.lifetime != dtypes.AllocationLifetime.Scope: + raise ValueError('Only Scope allocation lifetime is supported for References') + + def as_array(self): + copy = cp.deepcopy(self) + copy.__class__ = ContainerArray + return copy + + def make_array_from_descriptor(descriptor: Array, original_array: Optional[ArrayLike] = None, symbols: Optional[Dict[str, Any]] = None) -> ArrayLike: diff --git a/dace/dtypes.py b/dace/dtypes.py index a890668595..f3f27368a5 100644 --- a/dace/dtypes.py +++ b/dace/dtypes.py @@ -1550,6 +1550,8 @@ def is_array(obj: Any) -> bool: # In PyTorch, accessing this attribute throws a runtime error for # variables that require grad, or KeyError when a boolean array is used return True + if isinstance(obj, ctypes.Array): + return True if hasattr(obj, '__array_interface__'): return len(obj.__array_interface__['shape']) > 0 # NumPy scalars contain an empty shape tuple if hasattr(obj, 'data_ptr'): diff --git a/dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py b/dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py index 51871e6512..93c98e40c2 100644 --- a/dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py +++ b/dace/sdfg/analysis/schedule_tree/sdfg_to_tree.py @@ -71,16 +71,11 @@ def dealias_sdfg(sdfg: SDFG): for parent_name in to_unsqueeze: parent_arr = parent_sdfg.arrays[parent_name] if isinstance(parent_arr, data.View): - parent_arr = data.Array(parent_arr.dtype, parent_arr.shape, parent_arr.transient, - parent_arr.allow_conflicts, parent_arr.storage, parent_arr.location, - parent_arr.strides, parent_arr.offset, parent_arr.may_alias, - parent_arr.lifetime, parent_arr.alignment, parent_arr.debuginfo, - parent_arr.total_size, parent_arr.start_offset, parent_arr.optional, - parent_arr.pool) + parent_arr = parent_arr.as_array() elif isinstance(parent_arr, data.StructureView): - parent_arr = data.Structure(parent_arr.members, parent_arr.name, parent_arr.transient, - parent_arr.storage, parent_arr.location, parent_arr.lifetime, - parent_arr.debuginfo) + parent_arr = parent_arr.as_structure() + elif isinstance(parent_arr, data.ContainerView): + parent_arr = copy.deepcopy(parent_arr.stype) child_names = inv_replacements[parent_name] for name in child_names: child_arr = copy.deepcopy(parent_arr) @@ -402,7 +397,7 @@ def prepare_schedule_tree_edges(state: SDFGState) -> Dict[gr.MultiConnectorEdge[ # 1. Check for views if isinstance(e.src, dace.nodes.AccessNode): desc = e.src.desc(sdfg) - if isinstance(desc, (dace.data.View, dace.data.StructureView)): + if isinstance(desc, dace.data.View): vedge = sdutil.get_view_edge(state, e.src) if e is vedge: viewed_node = sdutil.get_view_node(state, e.src) @@ -412,7 +407,7 @@ def prepare_schedule_tree_edges(state: SDFGState) -> Dict[gr.MultiConnectorEdge[ continue if isinstance(e.dst, dace.nodes.AccessNode): desc = e.dst.desc(sdfg) - if isinstance(desc, (dace.data.View, dace.data.StructureView)): + if isinstance(desc, dace.data.View): vedge = sdutil.get_view_edge(state, e.dst) if e is vedge: viewed_node = sdutil.get_view_node(state, e.dst) diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index 74661daeda..d562c34bcf 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -617,9 +617,7 @@ def from_json(cls, json_obj, context_info=None): else: constants_prop = None - ret = SDFG(name=attrs['name'], - constants=constants_prop, - parent=context_info['sdfg']) + ret = SDFG(name=attrs['name'], constants=constants_prop, parent=context_info['sdfg']) dace.serialize.set_properties_from_json(ret, json_obj, @@ -1706,7 +1704,7 @@ def add_view(self, total_size=None, find_new_name=False, alignment=0, - may_alias=False) -> Tuple[str, dt.View]: + may_alias=False) -> Tuple[str, dt.ArrayView]: """ Adds a view to the SDFG data descriptor store. """ # convert strings to int if possible @@ -1721,18 +1719,18 @@ def add_view(self, if isinstance(dtype, type) and dtype in dtypes._CONSTANT_TYPES[:-1]: dtype = dtypes.typeclass(dtype) - desc = dt.View(dtype, - shape, - storage=storage, - allow_conflicts=allow_conflicts, - transient=True, - strides=strides, - offset=offset, - lifetime=dtypes.AllocationLifetime.Scope, - alignment=alignment, - debuginfo=debuginfo, - total_size=total_size, - may_alias=may_alias) + desc = dt.ArrayView(dtype, + shape, + storage=storage, + allow_conflicts=allow_conflicts, + transient=True, + strides=strides, + offset=offset, + lifetime=dtypes.AllocationLifetime.Scope, + alignment=alignment, + debuginfo=debuginfo, + total_size=total_size, + may_alias=may_alias) return self.add_datadesc(name, desc, find_new_name=find_new_name), desc @@ -1763,18 +1761,18 @@ def add_reference(self, if isinstance(dtype, type) and dtype in dtypes._CONSTANT_TYPES[:-1]: dtype = dtypes.typeclass(dtype) - desc = dt.Reference(dtype, - shape, - storage=storage, - allow_conflicts=allow_conflicts, - transient=True, - strides=strides, - offset=offset, - lifetime=dtypes.AllocationLifetime.Scope, - alignment=alignment, - debuginfo=debuginfo, - total_size=total_size, - may_alias=may_alias) + desc = dt.ArrayReference(dtype, + shape, + storage=storage, + allow_conflicts=allow_conflicts, + transient=True, + strides=strides, + offset=offset, + lifetime=dtypes.AllocationLifetime.Scope, + alignment=alignment, + debuginfo=debuginfo, + total_size=total_size, + may_alias=may_alias) return self.add_datadesc(name, desc, find_new_name=find_new_name), desc @@ -1961,6 +1959,30 @@ def _add_symbols(desc: dt.Data): return name + def add_datadesc_view(self, name: str, datadesc: dt.Data, find_new_name=False) -> str: + """ Adds a view of a given data descriptor to the SDFG array store. + + :param name: Name to use. + :param datadesc: Data descriptor to view. + :param find_new_name: If True and data descriptor with this name + exists, finds a new name to add. + :return: Name of the new data descriptor + """ + vdesc = dt.View.view(datadesc) + return self.add_datadesc(name, vdesc, find_new_name) + + def add_datadesc_reference(self, name: str, datadesc: dt.Data, find_new_name=False) -> str: + """ Adds a reference of a given data descriptor to the SDFG array store. + + :param name: Name to use. + :param datadesc: Data descriptor to view. + :param find_new_name: If True and data descriptor with this name + exists, finds a new name to add. + :return: Name of the new data descriptor + """ + vdesc = dt.Reference.view(datadesc) + return self.add_datadesc(name, vdesc, find_new_name) + def add_pgrid(self, shape: ShapeType = None, parent_grid: str = None, diff --git a/dace/sdfg/utils.py b/dace/sdfg/utils.py index 1405901802..c02e4a9805 100644 --- a/dace/sdfg/utils.py +++ b/dace/sdfg/utils.py @@ -1426,7 +1426,7 @@ def is_nonfree_sym_dependent(node: nd.AccessNode, desc: dt.Data, state: SDFGStat :param state: the state that contains the node :param fsymbols: the free symbols to check against """ - if isinstance(desc, (dt.StructureView, dt.View)): + if isinstance(desc, (dt.View)): # Views can be non-free symbol dependent due to the adjacent edges. e = get_view_edge(state, node) if e.data: diff --git a/dace/transformation/dataflow/redundant_array.py b/dace/transformation/dataflow/redundant_array.py index 039995ce11..680936dc70 100644 --- a/dace/transformation/dataflow/redundant_array.py +++ b/dace/transformation/dataflow/redundant_array.py @@ -452,10 +452,10 @@ def _make_view(self, sdfg: SDFG, graph: SDFGState, in_array: nodes.AccessNode, o view_strides = in_desc.strides if (b_dims_to_pop and len(b_dims_to_pop) == len(out_desc.shape) - len(in_desc.shape)): view_strides = [s for i, s in enumerate(out_desc.strides) if i not in b_dims_to_pop] - sdfg.arrays[in_array.data] = data.View(in_desc.dtype, in_desc.shape, True, in_desc.allow_conflicts, - out_desc.storage, out_desc.location, view_strides, in_desc.offset, - out_desc.may_alias, dtypes.AllocationLifetime.Scope, in_desc.alignment, - in_desc.debuginfo, in_desc.total_size) + sdfg.arrays[in_array.data] = data.ArrayView(in_desc.dtype, in_desc.shape, True, in_desc.allow_conflicts, + out_desc.storage, out_desc.location, view_strides, in_desc.offset, + out_desc.may_alias, dtypes.AllocationLifetime.Scope, + in_desc.alignment, in_desc.debuginfo, in_desc.total_size) in_array.add_out_connector('views', force=True) e1._src_conn = 'views' @@ -926,10 +926,11 @@ def apply(self, graph: SDFGState, sdfg: SDFG): view_strides = out_desc.strides if (a_dims_to_pop and len(a_dims_to_pop) == len(in_desc.shape) - len(out_desc.shape)): view_strides = [s for i, s in enumerate(in_desc.strides) if i not in a_dims_to_pop] - sdfg.arrays[out_array.data] = data.View(out_desc.dtype, out_desc.shape, True, out_desc.allow_conflicts, - in_desc.storage, in_desc.location, view_strides, out_desc.offset, - in_desc.may_alias, dtypes.AllocationLifetime.Scope, - out_desc.alignment, out_desc.debuginfo, out_desc.total_size) + sdfg.arrays[out_array.data] = data.ArrayView(out_desc.dtype, out_desc.shape, True, out_desc.allow_conflicts, + in_desc.storage, in_desc.location, view_strides, + out_desc.offset, in_desc.may_alias, + dtypes.AllocationLifetime.Scope, out_desc.alignment, + out_desc.debuginfo, out_desc.total_size) out_array.add_in_connector('views', force=True) e1._dst_conn = 'views' return out_array @@ -1572,7 +1573,7 @@ def apply(self, state: SDFGState, sdfg: SDFG): elif subset is not None: # Fill in the subset from the original memlet e.data.subset = copy.deepcopy(subset) - + else: # The memlet points to the other side, use ``other_subset`` if e.data.other_subset is not None: e.data.other_subset = self._offset_subset(mapping, subset, e.data.other_subset) @@ -1583,7 +1584,6 @@ def apply(self, state: SDFGState, sdfg: SDFG): # NOTE: It's only necessary to modify one subset of the memlet, as the space of the other differs from # the view space. - # Remove edge directly adjacent to view and reconnect state.remove_edge(edge) if is_src: @@ -1619,7 +1619,6 @@ class RemoveIntermediateWrite(pm.SingleStateTransformation): write = pm.PatternNode(nodes.AccessNode) map_exit = pm.PatternNode(nodes.MapExit) - @classmethod def expressions(cls): return [sdutil.node_path_graph(cls.write, cls.map_exit)] @@ -1630,7 +1629,7 @@ def can_be_applied(self, state: SDFGState, _: int, sdfg: SDFG, permissive=False) edges = state.edges_between(self.write, self.map_exit) if any(not e.data.is_empty() for e in edges): return False - + # The input edges must either depend on all the Map parameters or have WCR. for edge in state.in_edges(self.write): if edge.data.wcr: @@ -1645,7 +1644,7 @@ def apply(self, state: SDFGState, sdfg: SDFG): entry_node = state.entry_node(self.map_exit) scope_dict = state.scope_dict() - + outer_write = state.add_access(self.write.data) for edge in state.in_edges(self.write): state.add_memlet_path(edge.src, self.map_exit, outer_write, memlet=edge.data, src_conn=edge.src_conn) diff --git a/dace/transformation/passes/constant_propagation.py b/dace/transformation/passes/constant_propagation.py index 7b7ad9aa20..c244584d3a 100644 --- a/dace/transformation/passes/constant_propagation.py +++ b/dace/transformation/passes/constant_propagation.py @@ -171,7 +171,7 @@ def _add_nested_datanames(name: str, desc: data.Structure): for k, v in desc.members.items(): if isinstance(v, data.Structure): _add_nested_datanames(f'{name}.{k}', v) - elif isinstance(v, data.StructArray): + elif isinstance(v, data.ContainerArray): # TODO: How are we handling this? pass arrays.add(f'{name}.{k}') diff --git a/dace/transformation/passes/reference_reduction.py b/dace/transformation/passes/reference_reduction.py index 2af76852ba..698c54d133 100644 --- a/dace/transformation/passes/reference_reduction.py +++ b/dace/transformation/passes/reference_reduction.py @@ -241,8 +241,5 @@ def _create_view(self, refsource: Memlet, state: SDFGState, node: nodes.AccessNo state.add_edge(node, None, view, 'views', copy.deepcopy(refsource)) def change_ref_descriptors_to_views(self, sdfg: SDFG, names: Set[str]): - # A slightly hacky way to replace a reference class with a view. - # Since both classes have the same superclass, and all the fields - # are the same, this is safe to perform. for name in names: - sdfg.arrays[name].__class__ = data.View + sdfg.arrays[name] = data.View.view(sdfg.arrays[name]) diff --git a/tests/sdfg/data/struct_array_test.py b/tests/sdfg/data/container_array_test.py similarity index 67% rename from tests/sdfg/data/struct_array_test.py rename to tests/sdfg/data/container_array_test.py index 8e0f2f4739..7685361d0f 100644 --- a/tests/sdfg/data/struct_array_test.py +++ b/tests/sdfg/data/container_array_test.py @@ -11,17 +11,13 @@ def test_read_struct_array(): L, M, N, nnz = (dace.symbol(s) for s in ('L', 'M', 'N', 'nnz')) csr_obj = dace.data.Structure(dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), name='CSRMatrix') - csr_obj_view = dace.data.StructureView( - [('indptr', dace.int32[M + 1]), ('indices', dace.int32[nnz]), ('data', dace.float32[nnz])], - name='CSRMatrix', - transient=True) sdfg = dace.SDFG('array_of_csr_to_dense') sdfg.add_datadesc('A', csr_obj[L]) sdfg.add_array('B', [L, M, N], dace.float32) - sdfg.add_datadesc('vcsr', csr_obj_view) + sdfg.add_datadesc_view('vcsr', csr_obj) sdfg.add_view('vindptr', csr_obj.members['indptr'].shape, csr_obj.members['indptr'].dtype) sdfg.add_view('vindices', csr_obj.members['indices'].shape, csr_obj.members['indices'].dtype) sdfg.add_view('vdata', csr_obj.members['data'].shape, csr_obj.members['data'].dtype) @@ -41,7 +37,11 @@ def test_read_struct_array(): state.add_memlet_path(A, bme, vcsr, dst_conn='views', memlet=dace.Memlet(data='A', subset='b')) state.add_edge(vcsr, None, indptr, 'views', memlet=dace.Memlet.from_array('vcsr.indptr', csr_obj.members['indptr'])) - state.add_edge(vcsr, None, indices, 'views', memlet=dace.Memlet.from_array('vcsr.indices', csr_obj.members['indices'])) + state.add_edge(vcsr, + None, + indices, + 'views', + memlet=dace.Memlet.from_array('vcsr.indices', csr_obj.members['indices'])) state.add_edge(vcsr, None, data, 'views', memlet=dace.Memlet.from_array('vcsr.data', csr_obj.members['data'])) ime, imx = state.add_map('i', dict(i='0:M')) @@ -54,13 +54,19 @@ def test_read_struct_array(): state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i+1'), dst_conn='stop') state.add_memlet_path(indices, ime, jme, t, memlet=dace.Memlet(data='vindices', subset='idx'), dst_conn='j') state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val') - state.add_memlet_path(t, jmx, imx, bmx, B, memlet=dace.Memlet(data='B', subset='b, 0:M, 0:N', volume=1), src_conn='__out') + state.add_memlet_path(t, + jmx, + imx, + bmx, + B, + memlet=dace.Memlet(data='B', subset='b, 0:M, 0:N', volume=1), + src_conn='__out') func = sdfg.compile() rng = np.random.default_rng(42) - A = np.ndarray((10,), dtype=sparse.csr_matrix) - dace_A = np.ndarray((10,), dtype=ctypes.c_void_p) + A = np.ndarray((10, ), dtype=sparse.csr_matrix) + dace_A = np.ndarray((10, ), dtype=ctypes.c_void_p) B = np.zeros((10, 20, 20), dtype=np.float32) ctypes_A = [] @@ -83,20 +89,16 @@ def test_read_struct_array(): def test_write_struct_array(): L, M, N, nnz = (dace.symbol(s) for s in ('L', 'M', 'N', 'nnz')) - csr_obj = dace.data.Structure( - [('indptr', dace.int32[M + 1]), ('indices', dace.int32[nnz]), ('data', dace.float32[nnz])], - name='CSRMatrix') - csr_obj_view = dace.data.StructureView( - dict(indptr=dace.int32[M + 1], indices=dace.int32[nnz], data=dace.float32[nnz]), - name='CSRMatrix', - transient=True) + csr_obj = dace.data.Structure([('indptr', dace.int32[M + 1]), ('indices', dace.int32[nnz]), + ('data', dace.float32[nnz])], + name='CSRMatrix') sdfg = dace.SDFG('array_dense_to_csr') sdfg.add_array('A', [L, M, N], dace.float32) sdfg.add_datadesc('B', csr_obj[L]) - sdfg.add_datadesc('vcsr', csr_obj_view) + sdfg.add_datadesc_view('vcsr', csr_obj) sdfg.add_view('vindptr', csr_obj.members['indptr'].shape, csr_obj.members['indptr'].dtype) sdfg.add_view('vindices', csr_obj.members['indices'].shape, csr_obj.members['indices'].dtype) sdfg.add_view('vdata', csr_obj.members['data'].shape, csr_obj.members['data'].dtype) @@ -155,8 +157,8 @@ def test_write_struct_array(): func = sdfg.compile() rng = np.random.default_rng(42) - B = np.ndarray((10,), dtype=sparse.csr_matrix) - dace_B = np.ndarray((10,), dtype=ctypes.c_void_p) + B = np.ndarray((10, ), dtype=sparse.csr_matrix) + dace_B = np.ndarray((10, ), dtype=ctypes.c_void_p) A = np.empty((10, 20, 20), dtype=np.float32) ctypes_B = [] @@ -178,6 +180,86 @@ def test_write_struct_array(): assert np.allclose(A[b], B[b].toarray()) +def test_jagged_container_array(): + N = dace.symbol('N') + M = dace.symbol('M') + sdfg = dace.SDFG('tester') + sdfg.add_datadesc('A', dace.data.ContainerArray(dace.float64[N], [M])) + sdfg.add_view('v', [N], dace.float64) + sdfg.add_array('B', [1], dace.float64) + + # Make a state where the container array is first viewed with index i (i.e., dereferencing double** to double*) + # and then the view is accessed with index j + state = sdfg.add_state() + me, mx = state.add_map('outer', dict(i='0:M')) + ime, imx = state.add_map('inner', dict(j='0:i')) + t = state.add_tasklet('add', {'inp'}, {'out'}, 'out = inp') + r = state.add_read('A') + v = state.add_access('v') + w = state.add_write('B') + state.add_memlet_path(r, me, v, memlet=dace.Memlet('A[i]'), dst_conn='views') + state.add_memlet_path(v, ime, t, memlet=dace.Memlet('v[j]'), dst_conn='inp') + state.add_memlet_path(t, imx, mx, w, memlet=dace.Memlet('B[0]', wcr='lambda a,b: a+b'), src_conn='out') + + m = 20 + # Create a ctypes array of arrays + jagged_array = (ctypes.POINTER(ctypes.c_double) * m)(*[(ctypes.c_double * i)(*np.random.rand(i)) + for i in range(1, m + 1)]) + ref = 0 + for i in range(m): + for j in range(i): + ref += jagged_array[i][j] + + B = np.zeros([1]) + sdfg(A=jagged_array, B=B, M=m) + assert np.allclose(ref, B[0]) + + +def test_two_levels(): + N = dace.symbol('N') + M = dace.symbol('M') + K = dace.symbol('K') + sdfg = dace.SDFG('tester') + desc = dace.data.ContainerArray(dace.data.ContainerArray(dace.float64[N], [M]), [K]) + sdfg.add_datadesc('A', desc) + sdfg.add_datadesc_view('v', desc.stype) + sdfg.add_view('vv', [N], dace.float64) + sdfg.add_array('B', [1], dace.float64) + + # Make a state where the container is viewed twice in a row + state = sdfg.add_state() + r = state.add_read('A') + v = state.add_access('v') + v.add_in_connector('views') + vv = state.add_access('vv') + vv.add_in_connector('views') + w = state.add_write('B') + state.add_edge(r, None, v, 'views', dace.Memlet('A[1]')) + state.add_edge(v, None, vv, 'views', dace.Memlet('v[2]')) + state.add_edge(vv, None, w, None, dace.Memlet('vv[3]')) + + # Create a ctypes array of arrays + jagged_array = (ctypes.POINTER(ctypes.POINTER(ctypes.c_double)) * 5)( + *[ + # + (ctypes.POINTER(ctypes.c_double) * 5)( + *[ + # + (ctypes.c_double * 5)(*np.random.rand(5)) for _ in range(5) + # + ]) for _ in range(5) + # + ]) + + ref = jagged_array[1][2][3] + + B = np.zeros([1]) + sdfg(A=jagged_array, B=B) + assert np.allclose(ref, B[0]) + + if __name__ == '__main__': test_read_struct_array() test_write_struct_array() + test_jagged_container_array() + test_two_levels() diff --git a/tests/transformations/redundant_slices_test.py b/tests/transformations/redundant_slices_test.py index 16a027266d..b8de6f0ba7 100644 --- a/tests/transformations/redundant_slices_test.py +++ b/tests/transformations/redundant_slices_test.py @@ -94,7 +94,7 @@ def test_write_slice2(): @pytest.mark.parametrize('with_subset', (False, True)) def test_view_slice_detect_simple(with_subset): adesc = dace.float64[1, 1] - vdesc = dace.data.View(dace.float64, [1]) + vdesc = dace.data.View.view(dace.float64[1]) if with_subset: subset = dace.Memlet('A[0, 0]').subset @@ -115,7 +115,8 @@ def test_view_slice_detect_complex(with_subset): adesc = dace.float64[2, 2, 1, 1, N] adesc.strides = [5 * M * N * K, M * N * K, M * N, 1, N] - vdesc = dace.data.View(dace.float64, [2, 1, 2, 1, N, 1], strides=[5 * M * N * K, M * N * K, M * N * K, M * N, N, N]) + vdesc = dace.data.View.view( + dace.data.Array(dace.float64, [2, 1, 2, 1, N, 1], strides=[5 * M * N * K, M * N * K, M * N * K, M * N, N, N])) if with_subset: subset = dace.Memlet('A[0:2, 3:5, i, j, 0:M]').subset