From feddc5c799eb8567b3556516fd068f33efbcee9f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gunnar=20Farneb=C3=A4ck?=
 <gunnar.farneback@contextvision.se>
Date: Thu, 19 Oct 2023 11:36:50 +0200
Subject: [PATCH 1/3] Add a high level release function.

---
 README.md              |  4 ++++
 src/highlevel.jl       | 25 ++++++++++++++++++++++++-
 test/test_highlevel.jl |  8 ++++++++
 3 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 6031538..7379086 100644
--- a/README.md
+++ b/README.md
@@ -49,6 +49,10 @@ julia> import CUDA, cuDNN
 julia> ORT.load_inference(path, execution_provider=:cuda)
 ```
 
+Memory allocated by a model is eventually automatically released after
+it goes out of scope, when the model object is deleted by the garbage
+collector. It can also be immediately released with `release(model)`.
+
 The low level API mirrors the offical [C-API](https://github.com/microsoft/onnxruntime/blob/v1.8.1/include/onnxruntime/core/session/onnxruntime_c_api.h#L347). The above example looks like this:
 ```julia
 using ONNXRunTime.CAPI
diff --git a/src/highlevel.jl b/src/highlevel.jl
index 5c6ccd4..58b7a9e 100644
--- a/src/highlevel.jl
+++ b/src/highlevel.jl
@@ -12,7 +12,7 @@ end
 
 using .CAPI
 using .CAPI: juliatype, EXECUTION_PROVIDERS
-export InferenceSession, load_inference
+export InferenceSession, load_inference, release
 
 """
     $TYPEDEF
@@ -174,8 +174,31 @@ function (o::InferenceSession)(
             throw(ArgumentError(msg))
         end
     end
+    isalive(o) || error("Session has been released and can no longer be called.")
     inp_names, input_tensors = prepare_inputs(o, inputs)
     run_options    = nothing
     output_tensors = Run(o.api, o.session, run_options, inp_names, input_tensors, output_names)
     make_output(o, inputs, output_names, output_tensors)
 end
+
+"""
+    release(o::InferenceSession)::Nothing
+
+Release memory allocated to an [`InferenceSession`](@ref). This also
+happens automatically when the object has gone out of scope and the
+garbage collector deletes it.
+
+However, there is no guarantee when that happens, so it can be useful
+to manually release the memory. This is especially true when the model
+has allocated GPU memory, which does not put pressure on the garbage
+collector to run promptly.
+
+Using the inference session after releasing is an error.
+"""
+function release(o::InferenceSession)
+    CAPI.release(o.api, o.session)
+    CAPI.release(o.api, o.meminfo)
+    CAPI.release(o.api, o.allocator)
+end
+
+isalive(o::InferenceSession) = all(CAPI.isalive, (o.session, o.meminfo, o.allocator))
diff --git a/test/test_highlevel.jl b/test/test_highlevel.jl
index 55a9535..330c801 100644
--- a/test/test_highlevel.jl
+++ b/test/test_highlevel.jl
@@ -151,6 +151,14 @@ using ONNXRunTime: juliatype
         @test out.x_plus_1  ≈ x .+ 1
         @test out.y_plus_2  ≈ y .+ 2
     end
+    @testset "Release session" begin
+        path = ORT.testdatapath("increment2x3.onnx")
+        model = ORT.load_inference(path, execution_provider=:cpu)
+        input = randn(Float32, 2, 3)
+        y = model((;input))
+        release(model)
+        @test_throws ErrorException y = model((;input))
+    end
 end
 
 

From 098582aa425730c20a3e1ee166628a5441043626 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gunnar=20Farneb=C3=A4ck?=
 <gunnar.farneback@contextvision.se>
Date: Thu, 19 Oct 2023 11:45:40 +0200
Subject: [PATCH 2/3] Add a missing line to the cuda extension test script.

---
 test/test_cuda_extension.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/test_cuda_extension.jl b/test/test_cuda_extension.jl
index fe2a1f5..6545fc2 100644
--- a/test/test_cuda_extension.jl
+++ b/test/test_cuda_extension.jl
@@ -77,6 +77,7 @@ end
                       using ONNXRunTime
                       load_inference("$(onnx_path)", execution_provider = :cpu)
                       """
+        @test success(run(`julia +1.9 --project=$(env) -e "$(test_script)"`))
         # CUDA not loaded. Well, cuDNN pulls in CUDA so this passes anyway.
         test_script = """
                       using ONNXRunTime

From 9260fe20982d6e95f86a6f827319bf241e369773 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gunnar=20Farneb=C3=A4ck?=
 <gunnar.farneback@contextvision.se>
Date: Thu, 19 Oct 2023 12:28:43 +0200
Subject: [PATCH 3/3] Minor tweaks and bump version.

---
 Project.toml           | 2 +-
 src/highlevel.jl       | 2 +-
 test/test_highlevel.jl | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index 42e7791..e9a8bda 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "ONNXRunTime"
 uuid = "e034b28e-924e-41b2-b98f-d2bbeb830c6a"
 authors = ["Jan Weidner <jw3126@gmail.com> and contributors"]
-version = "0.4.0"
+version = "0.4.1"
 
 [deps]
 ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197"
diff --git a/src/highlevel.jl b/src/highlevel.jl
index 58b7a9e..3c13bf2 100644
--- a/src/highlevel.jl
+++ b/src/highlevel.jl
@@ -146,6 +146,7 @@ function (o::InferenceSession)(
                                inputs,
                                output_names=nothing
                               )
+    isalive(o) || error("Session has been released and can no longer be called.")
     if output_names === nothing
         output_names = @__MODULE__().output_names(o)
     end
@@ -174,7 +175,6 @@ function (o::InferenceSession)(
             throw(ArgumentError(msg))
         end
     end
-    isalive(o) || error("Session has been released and can no longer be called.")
     inp_names, input_tensors = prepare_inputs(o, inputs)
     run_options    = nothing
     output_tensors = Run(o.api, o.session, run_options, inp_names, input_tensors, output_names)
diff --git a/test/test_highlevel.jl b/test/test_highlevel.jl
index 330c801..d9565d1 100644
--- a/test/test_highlevel.jl
+++ b/test/test_highlevel.jl
@@ -158,6 +158,7 @@ using ONNXRunTime: juliatype
         y = model((;input))
         release(model)
         @test_throws ErrorException y = model((;input))
+        @test_throws "Session has been released and can no longer be called." y = model((;input))
     end
 end