diff --git a/kernel_maps.jl b/kernel_maps.jl new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/BoundaryConditions/fill_halo_regions.jl b/src/BoundaryConditions/fill_halo_regions.jl index 7ac3e79ea5..ad2b6d1558 100644 --- a/src/BoundaryConditions/fill_halo_regions.jl +++ b/src/BoundaryConditions/fill_halo_regions.jl @@ -46,7 +46,9 @@ const MaybeTupledData = Union{OffsetArray, NTuple{<:Any, OffsetArray}} "Fill halo regions in ``x``, ``y``, and ``z`` for a given field's data." function fill_halo_regions!(c::MaybeTupledData, boundary_conditions, indices, loc, grid, args...; - fill_boundary_normal_velocities = true, kwargs...) + fill_boundary_normal_velocities = true, + kwargs...) + arch = architecture(grid) if fill_boundary_normal_velocities diff --git a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl index 82c71b05dd..e61ef7dfa5 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl @@ -96,7 +96,6 @@ function compute_hydrostatic_free_surface_tendency_contributions!(model, kernel_ compute_hydrostatic_free_surface_Gc!, c_tendency, grid, - active_cells_map, args; active_cells_map) end @@ -161,12 +160,12 @@ function compute_hydrostatic_momentum_tendencies!(model, velocities, kernel_para launch!(arch, grid, kernel_parameters, compute_hydrostatic_free_surface_Gu!, model.timestepper.Gⁿ.u, grid, - active_cells_map, u_kernel_args; + u_kernel_args; active_cells_map) launch!(arch, grid, kernel_parameters, compute_hydrostatic_free_surface_Gv!, model.timestepper.Gⁿ.v, grid, - active_cells_map, v_kernel_args; + v_kernel_args; active_cells_map) compute_free_surface_tendency!(grid, model, :xy) @@ -200,45 +199,27 @@ end ##### """ Calculate the right-hand-side of the u-velocity equation. """ -@kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid, ::Nothing, args) +@kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid, args) i, j, k = @index(Global, NTuple) @inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...) end -@kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid, active_cells_map, args) - idx = @index(Global, Linear) - i, j, k = active_linear_index_to_tuple(idx, active_cells_map) - @inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...) -end - """ Calculate the right-hand-side of the v-velocity equation. """ -@kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid, ::Nothing, args) +@kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid, args) i, j, k = @index(Global, NTuple) @inbounds Gv[i, j, k] = hydrostatic_free_surface_v_velocity_tendency(i, j, k, grid, args...) end -@kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid, active_cells_map, args) - idx = @index(Global, Linear) - i, j, k = active_linear_index_to_tuple(idx, active_cells_map) - @inbounds Gv[i, j, k] = hydrostatic_free_surface_v_velocity_tendency(i, j, k, grid, args...) -end - ##### ##### Tendency calculators for tracers ##### """ Calculate the right-hand-side of the tracer advection-diffusion equation. """ -@kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid, ::Nothing, args) +@kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid, args) i, j, k = @index(Global, NTuple) @inbounds Gc[i, j, k] = hydrostatic_free_surface_tracer_tendency(i, j, k, grid, args...) end -@kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid, active_cells_map, args) - idx = @index(Global, Linear) - i, j, k = active_linear_index_to_tuple(idx, active_cells_map) - @inbounds Gc[i, j, k] = hydrostatic_free_surface_tracer_tendency(i, j, k, grid, args...) -end - ##### ##### Tendency calculators for an explicit free surface ##### diff --git a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl index c76ac3b525..9cbb8e271f 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl @@ -1,7 +1,6 @@ using Oceananigans.Fields: location using Oceananigans.TimeSteppers: ab2_step_field! using Oceananigans.TurbulenceClosures: implicit_step! -using Oceananigans.ImmersedBoundaries: retrieve_interior_active_cells_map, retrieve_surface_active_cells_map import Oceananigans.TimeSteppers: ab2_step! diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index 0042755123..786f99e129 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -133,7 +133,7 @@ end # Barotropic Model Kernels # u_Δz = u * Δz -@kernel function _barotropic_mode_kernel!(U, V, grid, ::Nothing, u, v) +@kernel function _barotropic_mode_kernel!(U, V, grid, u, v) i, j = @index(Global, NTuple) k_top = grid.Nz+1 @@ -146,26 +146,9 @@ end end end -# Barotropic Model Kernels -# u_Δz = u * Δz -@kernel function _barotropic_mode_kernel!(U, V, grid, active_cells_map, u, v) - idx = @index(Global, Linear) - i, j = active_linear_index_to_tuple(idx, active_cells_map) - k_top = grid.Nz+1 - - @inbounds U[i, j, k_top-1] = Δzᶠᶜᶜ(i, j, 1, grid) * u[i, j, 1] - @inbounds V[i, j, k_top-1] = Δzᶜᶠᶜ(i, j, 1, grid) * v[i, j, 1] - - for k in 2:grid.Nz - @inbounds U[i, j, k_top-1] += Δzᶠᶜᶜ(i, j, k, grid) * u[i, j, k] - @inbounds V[i, j, k_top-1] += Δzᶜᶠᶜ(i, j, k, grid) * v[i, j, k] - end -end - @inline function compute_barotropic_mode!(U, V, grid, u, v) active_cells_map = retrieve_surface_active_cells_map(grid) - - launch!(architecture(grid), grid, :xy, _barotropic_mode_kernel!, U, V, grid, active_cells_map, u, v; active_cells_map) + launch!(architecture(grid), grid, :xy, _barotropic_mode_kernel!, U, V, grid, u, v; active_cells_map) return nothing end diff --git a/src/Models/HydrostaticFreeSurfaceModels/store_hydrostatic_free_surface_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/store_hydrostatic_free_surface_tendencies.jl index a84d03e13f..089370fdd9 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/store_hydrostatic_free_surface_tendencies.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/store_hydrostatic_free_surface_tendencies.jl @@ -4,7 +4,6 @@ using Oceananigans.TimeSteppers: store_field_tendencies! using Oceananigans: prognostic_fields using Oceananigans.Grids: AbstractGrid -using Oceananigans.ImmersedBoundaries: retrieve_interior_active_cells_map using Oceananigans.Utils: launch! diff --git a/src/Models/NonhydrostaticModels/compute_nonhydrostatic_tendencies.jl b/src/Models/NonhydrostaticModels/compute_nonhydrostatic_tendencies.jl index ffa7865bb2..67bcbbbb98 100644 --- a/src/Models/NonhydrostaticModels/compute_nonhydrostatic_tendencies.jl +++ b/src/Models/NonhydrostaticModels/compute_nonhydrostatic_tendencies.jl @@ -103,15 +103,15 @@ function compute_interior_tendency_contributions!(model, kernel_parameters; acti exclude_periphery = true launch!(arch, grid, kernel_parameters, compute_Gu!, - tendencies.u, grid, active_cells_map, u_kernel_args; + tendencies.u, grid, u_kernel_args; active_cells_map, exclude_periphery) launch!(arch, grid, kernel_parameters, compute_Gv!, - tendencies.v, grid, active_cells_map, v_kernel_args; + tendencies.v, grid, v_kernel_args; active_cells_map, exclude_periphery) launch!(arch, grid, kernel_parameters, compute_Gw!, - tendencies.w, grid, active_cells_map, w_kernel_args; + tendencies.w, grid, w_kernel_args; active_cells_map, exclude_periphery) start_tracer_kernel_args = (advection, closure) @@ -131,7 +131,7 @@ function compute_interior_tendency_contributions!(model, kernel_parameters; acti forcing, clock) launch!(arch, grid, kernel_parameters, compute_Gc!, - c_tendency, grid, active_cells_map, args; + c_tendency, grid, args; active_cells_map) end @@ -143,57 +143,34 @@ end ##### """ Calculate the right-hand-side of the u-velocity equation. """ -@kernel function compute_Gu!(Gu, grid, ::Nothing, args) +@kernel function compute_Gu!(Gu, grid, args) i, j, k = @index(Global, NTuple) @inbounds Gu[i, j, k] = u_velocity_tendency(i, j, k, grid, args...) end -@kernel function compute_Gu!(Gu, grid, interior_map, args) - idx = @index(Global, Linear) - i, j, k = active_linear_index_to_tuple(idx, interior_map) - @inbounds Gu[i, j, k] = u_velocity_tendency(i, j, k, grid, args...) -end - """ Calculate the right-hand-side of the v-velocity equation. """ -@kernel function compute_Gv!(Gv, grid, ::Nothing, args) +@kernel function compute_Gv!(Gv, grid, args) i, j, k = @index(Global, NTuple) @inbounds Gv[i, j, k] = v_velocity_tendency(i, j, k, grid, args...) end -@kernel function compute_Gv!(Gv, grid, interior_map, args) - idx = @index(Global, Linear) - i, j, k = active_linear_index_to_tuple(idx, interior_map) - @inbounds Gv[i, j, k] = v_velocity_tendency(i, j, k, grid, args...) -end - """ Calculate the right-hand-side of the w-velocity equation. """ -@kernel function compute_Gw!(Gw, grid, ::Nothing, args) +@kernel function compute_Gw!(Gw, grid, args) i, j, k = @index(Global, NTuple) @inbounds Gw[i, j, k] = w_velocity_tendency(i, j, k, grid, args...) end -@kernel function compute_Gw!(Gw, grid, interior_map, args) - idx = @index(Global, Linear) - i, j, k = active_linear_index_to_tuple(idx, interior_map) - @inbounds Gw[i, j, k] = w_velocity_tendency(i, j, k, grid, args...) -end ##### ##### Tracer(s) ##### """ Calculate the right-hand-side of the tracer advection-diffusion equation. """ -@kernel function compute_Gc!(Gc, grid, ::Nothing, args) +@kernel function compute_Gc!(Gc, grid, args) i, j, k = @index(Global, NTuple) @inbounds Gc[i, j, k] = tracer_tendency(i, j, k, grid, args...) end -@kernel function compute_Gc!(Gc, grid, interior_map, args) - idx = @index(Global, Linear) - i, j, k = active_linear_index_to_tuple(idx, interior_map) - @inbounds Gc[i, j, k] = tracer_tendency(i, j, k, grid, args...) -end - ##### ##### Boundary contributions to tendencies due to user-prescribed fluxes ##### diff --git a/src/Solvers/batched_tridiagonal_solver.jl b/src/Solvers/batched_tridiagonal_solver.jl index 2886847938..4f756ca3ac 100644 --- a/src/Solvers/batched_tridiagonal_solver.jl +++ b/src/Solvers/batched_tridiagonal_solver.jl @@ -1,5 +1,6 @@ using Oceananigans.Architectures: on_architecture using Oceananigans.Grids: XDirection, YDirection, ZDirection +using Oceananigans.ImmersedBoundaries: retrieve_surface_active_cells_map import Oceananigans.Architectures: architecture @@ -99,13 +100,16 @@ Press William, H., Teukolsky Saul, A., Vetterling William, T., & Flannery Brian, """ function solve!(ϕ, solver::BatchedTridiagonalSolver, rhs, args...) - launch_config = if solver.tridiagonal_direction isa XDirection - :yz - elseif solver.tridiagonal_direction isa YDirection - :xz - elseif solver.tridiagonal_direction isa ZDirection - :xy - end + if solver.tridiagonal_direction isa XDirection + launch_config = :yz + active_cells_map = nothing + elseif solver.tridiagonal_direction isa YDirection + launch_config = :xz + active_cells_map = nothing + elseif solver.tridiagonal_direction isa ZDirection + launch_config = :xy + active_cells_map = retrieve_surface_active_cells_map(solver.grid) + end launch!(architecture(solver), solver.grid, launch_config, solve_batched_tridiagonal_system_kernel!, ϕ, @@ -117,7 +121,8 @@ function solve!(ϕ, solver::BatchedTridiagonalSolver, rhs, args...) solver.grid, solver.parameters, Tuple(args), - solver.tridiagonal_direction) + solver.tridiagonal_direction; + active_cells_map) return nothing end diff --git a/src/Utils/kernel_launching.jl b/src/Utils/kernel_launching.jl index 097ce8fe00..02473f0d5b 100644 --- a/src/Utils/kernel_launching.jl +++ b/src/Utils/kernel_launching.jl @@ -6,10 +6,13 @@ using Oceananigans: location using Oceananigans.Architectures using Oceananigans.Grids using Oceananigans.Grids: AbstractGrid +using Adapt using Base: @pure +using KernelAbstractions: Kernel import Oceananigans import KernelAbstractions: get, expand +import Base struct KernelParameters{S, O} end @@ -80,6 +83,12 @@ end contiguousrange(range::NTuple{N, Int}, offset::NTuple{N, Int}) where N = Tuple(1+o:r+o for (r, o) in zip(range, offset)) flatten_reduced_dimensions(worksize, dims) = Tuple(d ∈ dims ? 1 : worksize[d] for d = 1:3) +# Internal utility to launch a function mapped on an index_map +struct MappedFunction{F, M} <: Function + func::F + index_map::M +end + # Support for 1D heuristic_workgroup(Wx) = min(Wx, 256) @@ -227,6 +236,7 @@ the architecture `arch`. location = nothing, active_cells_map = nothing) + if !isnothing(active_cells_map) # everything else is irrelevant workgroup = min(length(active_cells_map), 256) worksize = length(active_cells_map) @@ -238,9 +248,17 @@ the architecture `arch`. dev = Architectures.device(arch) loop = kernel!(dev, workgroup, worksize) + + # Map out the function to use active_cells_map as an index map + if !isnothing(active_cells_map) + func = MappedFunction(loop.f, active_cells_map) + loop = Kernel{get_kernel_parameters(loop)..., typeof(func)}(dev, func) + end + return loop, worksize end +@inline get_kernel_parameters(::Kernel{Dev, B, W}) where {Dev, B, W} = Dev, B, W """ launch!(arch, grid, workspec, kernel!, kernel_args...; kw...) @@ -312,11 +330,16 @@ end ##### # TODO: when offsets are implemented in KA so that we can call `kernel(dev, group, size, offsets)`, remove all of this - -using KernelAbstractions: Kernel using KernelAbstractions.NDIteration: _Size, StaticSize using KernelAbstractions.NDIteration: NDRange +using KernelAbstractions.NDIteration +using KernelAbstractions: ndrange, workgroupsize +import KernelAbstractions: partition + +using KernelAbstractions: CompilerMetadata +import KernelAbstractions: __ndrange, __groupsize + struct OffsetStaticSize{S} <: _Size function OffsetStaticSize{S}() where S new{S::Tuple{Vararg}}() @@ -366,13 +389,6 @@ const OffsetNDRange{N} = NDRange{N, <:StaticSize, <:StaticSize, <:Any, <:KernelO return CartesianIndex(nI) end -using KernelAbstractions.NDIteration -using KernelAbstractions: ndrange, workgroupsize -import KernelAbstractions: partition - -using KernelAbstractions: CompilerMetadata -import KernelAbstractions: __ndrange, __groupsize - @inline __ndrange(::CompilerMetadata{NDRange}) where {NDRange<:OffsetStaticSize} = CartesianIndices(get(NDRange)) @inline __groupsize(cm::CompilerMetadata{NDRange}) where {NDRange<:OffsetStaticSize} = size(__ndrange(cm)) @@ -410,3 +426,89 @@ function partition(kernel::OffsetKernel, inrange, ingroupsize) return iterspace, dynamic end +##### +##### Utilities for Mapped kernels +##### + +struct IndexMap end + +const MappedNDRange{N} = NDRange{N, <:StaticSize, <:StaticSize, <:IndexMap, <:AbstractArray} where N + +# NDRange has been modified to include an index_map in place of workitems. +# Remember, dynamic offset kernels are not possible with this extension!! +# Also, mapped kernels work only with a 1D kernel and a 1D map, it is not possible to launch a ND kernel. +# TODO: maybe don't do this +@inline function expand(ndrange::MappedNDRange, groupidx::CartesianIndex{N}, idx::CartesianIndex{N}) where N + nI = ntuple(Val(N)) do I + Base.@_inline_meta + offsets = workitems(ndrange) + stride = size(offsets, I) + gidx = groupidx.I[I] + ndrange.workitems[(gidx - 1) * stride + idx.I[I]] + end + return CartesianIndex(nI...) +end + +const MappedKernel{D} = Kernel{D, <:Any, <:Any, <:MappedFunction} where D + +# Override the getproperty to make sure we launch the correct function in the kernel +@inline Base.getproperty(k::MappedKernel, prop::Symbol) = get_mapped_kernel_property(k, Val(prop)) + +@inline get_mapped_kernel_property(k, ::Val{prop}) where prop = getfield(k, prop) +@inline get_mapped_kernel_property(k, ::Val{:index_map}) = getfield(getfield(k, :f), :index_map) +@inline get_mapped_kernel_property(k, ::Val{:f}) = getfield(getfield(k, :f), :func) + +Adapt.adapt_structure(to, cm::CompilerMetadata{N, C}) where {N, C} = + CompilerMetadata{N, C}(Adapt.adapt(to, cm.groupindex), + Adapt.adapt(to, cm.ndrange), + Adapt.adapt(to, cm.iterspace)) + +Adapt.adapt_structure(to, ndrange::NDRange{N, B, W}) where {N, B, W} = + NDRange{N, B, W}(Adapt.adapt(to, ndrange.blocks), Adapt.adapt(to, ndrange.workitems)) + +# Extending the partition function to include offsets in NDRange: note that in this case the +# offsets take the place of the DynamicWorkitems which we assume is not needed in static kernels +function partition(kernel::MappedKernel, inrange, ingroupsize) + static_workgroupsize = workgroupsize(kernel) + + # Calculate the static NDRange and WorkgroupSize + index_map = kernel.index_map + range = length(index_map) + groupsize = get(static_workgroupsize) + + blocks, groupsize, dynamic = NDIteration.partition(range, groupsize) + + static_blocks = StaticSize{blocks} + static_workgroupsize = StaticSize{groupsize} # we might have padded workgroupsize + + iterspace = NDRange{length(range), static_blocks, static_workgroupsize}(IndexMap(), index_map) + + return iterspace, dynamic +end + +using KernelAbstractions: CompilerMetadata, NoDynamicCheck +using CUDA: CUDABackend + +import KernelAbstractions: mkcontext, __dynamic_checkbounds, __validindex + +# Very dangerous override of mkcontext which will not work if we are not +# carefull with making sure that indices are correct when launching a `MappedKernel` +# TODO: Definitely change this with options below +function mkcontext(kernel::Kernel{CUDABackend}, _ndrange, iterspace::MappedNDRange) + return CompilerMetadata{ndrange(kernel), NoDynamicCheck}(_ndrange, iterspace) +end + +# Alternative to the above to fix: +# const MappedCompilerMetadata = CompilerMetadata{<:Any, <:Any, <:Any, <:Any, <:MappedNDRange} + +# @inline __ndrange(cm::MappedCompilerMetadata) = cm.iterspace + +# @inline function __validindex(ctx::MappedCompilerMetadata, idx::CartesianIndex) +# # Turns this into a noop for code where we can turn of checkbounds of +# if __dynamic_checkbounds(ctx) +# I = @inbounds expand(__iterspace(ctx), __groupindex(ctx), idx) +# return I in __ndrange(ctx) +# else +# return true +# end +# end \ No newline at end of file diff --git a/test.jl b/test.jl new file mode 100644 index 0000000000..70239fa292 --- /dev/null +++ b/test.jl @@ -0,0 +1,18 @@ +using Revise +using Oceananigans +using Oceananigans.Utils + +using KernelAbstractions: @index, @kernel + +arch = CPU() + +@kernel function _test_indices(a) + i, j, k = @index(Global, NTuple) + a[i, j, k] = i + j + k +end + +grid = RectilinearGrid(arch, size = (3, 3, 3), extent = (1, 1, 1)) +array = zeros(arch, 3, 3, 3) +imap = on_architecture(arch, [(i, j, k) for i in 1:3, j in 1:3, k in 1:2]) + +launch!(arch, grid_cpu, :xyz, _test_indices, array; active_cells_map = imap) \ No newline at end of file