From feddc5c799eb8567b3556516fd068f33efbcee9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gunnar=20Farneb=C3=A4ck?= Date: Thu, 19 Oct 2023 11:36:50 +0200 Subject: [PATCH 1/3] Add a high level release function. --- README.md | 4 ++++ src/highlevel.jl | 25 ++++++++++++++++++++++++- test/test_highlevel.jl | 8 ++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6031538..7379086 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,10 @@ julia> import CUDA, cuDNN julia> ORT.load_inference(path, execution_provider=:cuda) ``` +Memory allocated by a model is eventually automatically released after +it goes out of scope, when the model object is deleted by the garbage +collector. It can also be immediately released with `release(model)`. + The low level API mirrors the offical [C-API](https://github.com/microsoft/onnxruntime/blob/v1.8.1/include/onnxruntime/core/session/onnxruntime_c_api.h#L347). The above example looks like this: ```julia using ONNXRunTime.CAPI diff --git a/src/highlevel.jl b/src/highlevel.jl index 5c6ccd4..58b7a9e 100644 --- a/src/highlevel.jl +++ b/src/highlevel.jl @@ -12,7 +12,7 @@ end using .CAPI using .CAPI: juliatype, EXECUTION_PROVIDERS -export InferenceSession, load_inference +export InferenceSession, load_inference, release """ $TYPEDEF @@ -174,8 +174,31 @@ function (o::InferenceSession)( throw(ArgumentError(msg)) end end + isalive(o) || error("Session has been released and can no longer be called.") inp_names, input_tensors = prepare_inputs(o, inputs) run_options = nothing output_tensors = Run(o.api, o.session, run_options, inp_names, input_tensors, output_names) make_output(o, inputs, output_names, output_tensors) end + +""" + release(o::InferenceSession)::Nothing + +Release memory allocated to an [`InferenceSession`](@ref). This also +happens automatically when the object has gone out of scope and the +garbage collector deletes it. + +However, there is no guarantee when that happens, so it can be useful +to manually release the memory. This is especially true when the model +has allocated GPU memory, which does not put pressure on the garbage +collector to run promptly. + +Using the inference session after releasing is an error. +""" +function release(o::InferenceSession) + CAPI.release(o.api, o.session) + CAPI.release(o.api, o.meminfo) + CAPI.release(o.api, o.allocator) +end + +isalive(o::InferenceSession) = all(CAPI.isalive, (o.session, o.meminfo, o.allocator)) diff --git a/test/test_highlevel.jl b/test/test_highlevel.jl index 55a9535..330c801 100644 --- a/test/test_highlevel.jl +++ b/test/test_highlevel.jl @@ -151,6 +151,14 @@ using ONNXRunTime: juliatype @test out.x_plus_1 ≈ x .+ 1 @test out.y_plus_2 ≈ y .+ 2 end + @testset "Release session" begin + path = ORT.testdatapath("increment2x3.onnx") + model = ORT.load_inference(path, execution_provider=:cpu) + input = randn(Float32, 2, 3) + y = model((;input)) + release(model) + @test_throws ErrorException y = model((;input)) + end end From 098582aa425730c20a3e1ee166628a5441043626 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gunnar=20Farneb=C3=A4ck?= Date: Thu, 19 Oct 2023 11:45:40 +0200 Subject: [PATCH 2/3] Add a missing line to the cuda extension test script. --- test/test_cuda_extension.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_cuda_extension.jl b/test/test_cuda_extension.jl index fe2a1f5..6545fc2 100644 --- a/test/test_cuda_extension.jl +++ b/test/test_cuda_extension.jl @@ -77,6 +77,7 @@ end using ONNXRunTime load_inference("$(onnx_path)", execution_provider = :cpu) """ + @test success(run(`julia +1.9 --project=$(env) -e "$(test_script)"`)) # CUDA not loaded. Well, cuDNN pulls in CUDA so this passes anyway. test_script = """ using ONNXRunTime From 9260fe20982d6e95f86a6f827319bf241e369773 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gunnar=20Farneb=C3=A4ck?= Date: Thu, 19 Oct 2023 12:28:43 +0200 Subject: [PATCH 3/3] Minor tweaks and bump version. --- Project.toml | 2 +- src/highlevel.jl | 2 +- test/test_highlevel.jl | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 42e7791..e9a8bda 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ONNXRunTime" uuid = "e034b28e-924e-41b2-b98f-d2bbeb830c6a" authors = ["Jan Weidner and contributors"] -version = "0.4.0" +version = "0.4.1" [deps] ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197" diff --git a/src/highlevel.jl b/src/highlevel.jl index 58b7a9e..3c13bf2 100644 --- a/src/highlevel.jl +++ b/src/highlevel.jl @@ -146,6 +146,7 @@ function (o::InferenceSession)( inputs, output_names=nothing ) + isalive(o) || error("Session has been released and can no longer be called.") if output_names === nothing output_names = @__MODULE__().output_names(o) end @@ -174,7 +175,6 @@ function (o::InferenceSession)( throw(ArgumentError(msg)) end end - isalive(o) || error("Session has been released and can no longer be called.") inp_names, input_tensors = prepare_inputs(o, inputs) run_options = nothing output_tensors = Run(o.api, o.session, run_options, inp_names, input_tensors, output_names) diff --git a/test/test_highlevel.jl b/test/test_highlevel.jl index 330c801..d9565d1 100644 --- a/test/test_highlevel.jl +++ b/test/test_highlevel.jl @@ -158,6 +158,7 @@ using ONNXRunTime: juliatype y = model((;input)) release(model) @test_throws ErrorException y = model((;input)) + @test_throws "Session has been released and can no longer be called." y = model((;input)) end end