From 34d19c346d6dede722761c0b3db4d7a99743bbf9 Mon Sep 17 00:00:00 2001 From: nefrathenrici Date: Fri, 3 Jan 2025 09:01:50 -0800 Subject: [PATCH] wip --- ext/ClimaCommsMPIExt.jl | 28 ++++++++++++++++++++++++++++ src/singleton.jl | 6 ++++++ 2 files changed, 34 insertions(+) diff --git a/ext/ClimaCommsMPIExt.jl b/ext/ClimaCommsMPIExt.jl index d04732b9..ccd5ce08 100644 --- a/ext/ClimaCommsMPIExt.jl +++ b/ext/ClimaCommsMPIExt.jl @@ -271,4 +271,32 @@ function ClimaComms.finish( MPI.Waitall(ghost.send_reqs) end +function Base.summary(io::IO, ctx::ClimaComms.MPICommsContext) + if !MPI.Initialized() + ClimaComms.iamroot(ctx) && @warn "MPI is not initialized." + return nothing + end + + if ClimaComms.iamroot(ctx) + println(io, "Context: $(typeof(ctx).name.name)") + println(io, "Device: $(typeof(ctx.device))") + println(io, "Total Processes: $(ClimaComms.nprocs(ctx))") + end + ClimaComms.barrier(ctx) + rank = MPI.Comm_rank(ctx.mpicomm) + node_name = MPI.Get_processor_name() + + if ctx.device isa ClimaComms.CUDADevice + # Match GPU rank assignment - should this live in a function to avoid code duplication? + local_comm = MPI.Comm_split_type(ctx.mpicomm, MPI.COMM_TYPE_SHARED, rank) + local_rank = MPI.Comm_rank(local_comm) + local_size = MPI.Comm_size(local_comm) + print(io, "Rank: $rank, Local Rank: $local_rank, Node: $node_name") + + MPI.free(local_comm) + else + print(io, "Rank: $rank, Node: $node_name") + end +end + end diff --git a/src/singleton.jl b/src/singleton.jl index 82e3f1d7..ed73c877 100644 --- a/src/singleton.jl +++ b/src/singleton.jl @@ -49,3 +49,9 @@ graph_context(ctx::SingletonCommsContext, args...) = SingletonGraphContext(ctx) start(gctx::SingletonGraphContext) = nothing progress(gctx::SingletonGraphContext) = nothing finish(gctx::SingletonGraphContext) = nothing + +function Base.summary(io::IO, ctx::SingletonCommsContext) + println(io) + println(io, "Context: $(typeof(ctx).name.name)") + println(io, "Device: $(typeof(ctx.device))") +end