Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

@mtlprintf #418

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
9 changes: 9 additions & 0 deletions docs/src/api/kernel.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,13 @@ MtlThreadGroupArray
MemoryFlags
threadgroup_barrier
simdgroup_barrier
```

## Printing

```@docs
@mtlprintf
@mtlprint
@mtlprintln
@mtlshow
```
28 changes: 28 additions & 0 deletions docs/src/usage/kernel.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,34 @@ Additional notes:
- Kernels must always return nothing
- Kernels are asynchronous. To synchronize, use the `Metal.@sync` macro.

## Printing

When debugging, it's not uncommon to want to print some values. This is achieved with `@mtlprintf`:

```julia
function gpu_add2_print!(y, x)
index = thread_position_in_grid_1d()
@mtlprintf("thread %d", index)
@inbounds y[i] += x[i]
return nothing
end

A = Metal.ones(Float32, 8);
B = Metal.rand(Float32, 8);

@metal threads=length(A) gpu_add2_print!(A, B)
```

`@mtlprintf` is supported on macOS 15 and later. `@mtlprintf` support most of the format specifiers that `printf`
supports in C with the following exceptions:
- `%n` and `%s` conversion specifiers are not supported
- Default argument promotion applies to arguments of half type which promote to the `double` type
- The format string must be a string literal

Metal places output from `@mtlprintf` into a log buffer. The system only removes the messages from the log buffer when the command buffer completes. When the log buffer becomes full, the system drops all subsequent messages.

See also: `@mtlprint`, `@mtlprintln` and `@mtlshow`
christiangnrd marked this conversation as resolved.
Show resolved Hide resolved

## Other Helpful Links

[Metal Shading Language Specification](https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf)
1 change: 1 addition & 0 deletions lib/mtl/MTL.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ include("events.jl")
include("fences.jl")
include("heap.jl")
include("buffer.jl")
include("log_state.jl")
include("command_queue.jl")
include("command_buf.jl")
include("compute_pipeline.jl")
Expand Down
22 changes: 22 additions & 0 deletions lib/mtl/command_queue.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,24 @@

export MTLCommandQueueDescriptor

# @objcwrapper immutable=false MTLCommandQueueDescriptor <: NSObject

function MTLCommandQueueDescriptor()
handle = @objc [MTLCommandQueueDescriptor alloc]::id{MTLCommandQueueDescriptor}
obj = MTLCommandQueueDescriptor(handle)
finalizer(release, obj)
@objc [obj::id{MTLCommandQueueDescriptor} init]::id{MTLCommandQueueDescriptor}
return obj
end

function MTLCommandQueue(dev::MTLDevice, descriptor::MTLCommandQueueDescriptor)
handle = @objc [dev::id{MTLDevice} newCommandQueueWithDescriptor:descriptor::id{MTLCommandQueueDescriptor}]::id{MTLCommandQueue}
obj = MTLCommandQueue(handle)
finalizer(release, obj)
return obj
end


export MTLCommandQueue

# @objcwrapper immutable=false MTLCommandQueue <: NSObject
Expand All @@ -8,3 +29,4 @@ function MTLCommandQueue(dev::MTLDevice)
finalizer(release, obj)
return obj
end

4 changes: 2 additions & 2 deletions lib/mtl/libmtl.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1395,7 +1395,7 @@ end
@autoproperty dispatchType::MTLDispatchType
end

@objcwrapper immutable = true availability = macos(v"15.0.0") MTLCommandQueueDescriptor <: NSObject
@objcwrapper immutable = false availability = macos(v"15.0.0") MTLCommandQueueDescriptor <: NSObject

@objcproperties MTLCommandQueueDescriptor begin
@autoproperty maxCommandBufferCount::UInt64 setter = setMaxCommandBufferCount
Expand Down Expand Up @@ -2675,7 +2675,7 @@ end
MTLLogLevelFault = 5
end

@objcwrapper immutable = true availability = macos(v"15.0.0") MTLLogStateDescriptor <: NSObject
@objcwrapper immutable = false availability = macos(v"15.0.0") MTLLogStateDescriptor <: NSObject

@objcproperties MTLLogStateDescriptor begin
@autoproperty level::MTLLogLevel setter = setLevel
Expand Down
26 changes: 26 additions & 0 deletions lib/mtl/log_state.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
export MTLLogLevel

export MTLLogStateDescriptor

# @objcwrapper immutable = false MTLLogStateDescriptor <: NSObject

function MTLLogStateDescriptor()
handle = @objc [MTLLogStateDescriptor alloc]::id{MTLLogStateDescriptor}
obj = MTLLogStateDescriptor(handle)
finalizer(release, obj)
@objc [obj::id{MTLLogStateDescriptor} init]::id{MTLLogStateDescriptor}
return obj
end


export MTLLogState

# @objcwrapper immutable = true MTLLogState <: NSObject

function MTLLogState(dev::MTLDevice, descriptor::MTLLogStateDescriptor)
err = Ref{id{NSError}}(nil)
handle = @objc [dev::id{MTLDevice} newLogStateWithDescriptor:descriptor::id{MTLLogStateDescriptor}
error:err::Ptr{id{NSError}}]::id{MTLLogState}
err[] == nil || throw(NSError(err[]))
MTLLogState(handle)
end
6 changes: 6 additions & 0 deletions res/wrap/libmtl.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ immutable=false
[api.MTLCommandQueue]
immutable=false

[api.MTLCommandQueueDescriptor]
immutable=false

[api.MTLCompileOptions]
immutable=false
[api.MTLCompileOptions.proptype]
Expand Down Expand Up @@ -85,6 +88,9 @@ immutable=false
[api.MTLLibrary]
immutable=false

[api.MTLLogStateDescriptor]
immutable=false

[api.MTLSharedEvent]
immutable=false

Expand Down
1 change: 1 addition & 0 deletions src/Metal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ include("device/intrinsics/synchronization.jl")
include("device/intrinsics/memory.jl")
include("device/intrinsics/simd.jl")
include("device/intrinsics/atomics.jl")
include("device/intrinsics/output.jl")
include("device/quirks.jl")

# array essentials
Expand Down
8 changes: 4 additions & 4 deletions src/compiler/compilation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ function compile(@nospecialize(job::CompilerJob))

@signpost_interval log=log_compiler() "Generate LLVM IR" begin
# TODO: on 1.9, this actually creates a context. cache those.
ir, entry = JuliaContext() do ctx
ir, entry, loggingEnabled = JuliaContext() do ctx
mod, meta = GPUCompiler.compile(:llvm, job)
string(mod), LLVM.name(meta.entry)
string(mod), LLVM.name(meta.entry), haskey(functions(mod), "air.os_log")
end
end

Expand Down Expand Up @@ -172,7 +172,7 @@ function compile(@nospecialize(job::CompilerJob))
end
end

return (; ir, air, metallib, entry)
return (; ir, air, metallib, entry, loggingEnabled)
end

# link into an executable kernel
Expand Down Expand Up @@ -210,5 +210,5 @@ end
end
end

pipeline_state
pipeline_state, compiled.loggingEnabled
end
35 changes: 32 additions & 3 deletions src/compiler/execution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ mtlconvert(arg, cce=nothing) = adapt(Adaptor(cce), arg)
struct HostKernel{F,TT}
f::F
pipeline::MTLComputePipelineState
loggingEnabled::Bool
end

const mtlfunction_lock = ReentrantLock()
Expand All @@ -186,15 +187,15 @@ function mtlfunction(f::F, tt::TT=Tuple{}; name=nothing, kwargs...) where {F,TT}
cache = compiler_cache(dev)
source = methodinstance(F, tt)
config = compiler_config(dev; name, kwargs...)::MetalCompilerConfig
pipeline = GPUCompiler.cached_compilation(cache, source, config, compile, link)
pipeline, loggingEnabled = GPUCompiler.cached_compilation(cache, source, config, compile, link)

# create a callable object that captures the function instance. we don't need to think
# about world age here, as GPUCompiler already does and will return a different object
h = hash(pipeline, hash(f, hash(tt)))
kernel = get(_kernel_instances, h, nothing)
if kernel === nothing
# create the kernel state object
kernel = HostKernel{F,tt}(f, pipeline)
kernel = HostKernel{F, tt}(f, pipeline, loggingEnabled)
_kernel_instances[h] = kernel
end
return kernel::HostKernel{F,tt}
Expand Down Expand Up @@ -275,7 +276,35 @@ end
(threads.width * threads.height * threads.depth) > kernel.pipeline.maxTotalThreadsPerThreadgroup &&
throw(ArgumentError("Number of threads in group ($(threads.width * threads.height * threads.depth)) should not exceed $(kernel.pipeline.maxTotalThreadsPerThreadgroup)"))

cmdbuf = MTLCommandBuffer(queue)
cmdbuf = if kernel.loggingEnabled
# TODO: make this a dynamic error, i.e., from the kernel (JuliaGPU/Metal.jl#433)
@static if !is_macos(v"15.0.0")
error("Logging is only supported on macOS 15 or higher")
end
Comment on lines +281 to +283
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The macOS 13/14 output test fails because compilation fails before reaching this check. I tested it before rebasing and the availability PR doesn't seem to affect it.


if MTLCaptureManager().isCapturing
error("Logging is not supported while GPU frame capturing")
end

log_state_descriptor = MTLLogStateDescriptor()
log_state_descriptor.level = MTL.MTLLogLevelDebug
log_state = MTLLogState(queue.device, log_state_descriptor)

function log_handler(subSystem, category, logLevel, message)
Core.print(String(NSString(message)))
return nothing
end

block = @objcblock(log_handler, Nothing, (id{NSString}, id{NSString}, NSInteger, id{NSString}))
@objc [log_state::id{MTLLogState} addLogHandler:block::id{NSBlock}]::Nothing

cmdbuf_descriptor = MTLCommandBufferDescriptor()
cmdbuf_descriptor.logState = log_state
MTLCommandBuffer(queue, cmdbuf_descriptor)
else
MTLCommandBuffer(queue)
end

cmdbuf.label = "MTLCommandBuffer($(nameof(kernel.f)))"
cce = MTLComputeCommandEncoder(cmdbuf)
argument_buffers = try
Expand Down
Loading
Loading