From d0b1a6c3cbd604a6601125d68fbfafaa64976de0 Mon Sep 17 00:00:00 2001 From: Kibaek Kim Date: Tue, 8 Aug 2023 22:55:01 -0500 Subject: [PATCH 1/3] fixed warnings for race condition --- src/CUDA/dcauchy.jl | 2 ++ src/CUDA/dprsrch.jl | 1 + src/CUDA/dspcg.jl | 1 + 3 files changed, 4 insertions(+) diff --git a/src/CUDA/dcauchy.jl b/src/CUDA/dcauchy.jl index cbff20b..2a5a14e 100644 --- a/src/CUDA/dcauchy.jl +++ b/src/CUDA/dcauchy.jl @@ -33,6 +33,7 @@ q = p5*ddot(n,s,1,wa,1) + gts interp = (q >= mu0*gts) end + CUDA.sync_threads() # Either interpolate or extrapolate to find a successful step. @@ -81,6 +82,7 @@ search = false end end + CUDA.sync_threads() # Recover the last successful step. diff --git a/src/CUDA/dprsrch.jl b/src/CUDA/dprsrch.jl index 0f645a9..89187f6 100644 --- a/src/CUDA/dprsrch.jl +++ b/src/CUDA/dprsrch.jl @@ -46,6 +46,7 @@ alpha = interpf*alpha end end + CUDA.sync_threads() # Force at least one more constraint to be added to the active # set if alpha < brptmin and the full step is not successful. diff --git a/src/CUDA/dspcg.jl b/src/CUDA/dspcg.jl index 4895d72..605d48b 100644 --- a/src/CUDA/dspcg.jl +++ b/src/CUDA/dspcg.jl @@ -51,6 +51,7 @@ end end end + CUDA.sync_threads() nfree = CUDA.shfl_sync(0xffffffff, nfree, 1) # Exit if there are no free constraints. From 1537a8b6e97e0cbd0303e77a8f28055df42b4ee6 Mon Sep 17 00:00:00 2001 From: Kibaek Kim Date: Thu, 10 Aug 2023 14:57:15 -0500 Subject: [PATCH 2/3] revert --- src/CUDA/dcauchy.jl | 2 -- src/CUDA/dprsrch.jl | 1 - 2 files changed, 3 deletions(-) diff --git a/src/CUDA/dcauchy.jl b/src/CUDA/dcauchy.jl index 2a5a14e..cbff20b 100644 --- a/src/CUDA/dcauchy.jl +++ b/src/CUDA/dcauchy.jl @@ -33,7 +33,6 @@ q = p5*ddot(n,s,1,wa,1) + gts interp = (q >= mu0*gts) end - CUDA.sync_threads() # Either interpolate or extrapolate to find a successful step. @@ -82,7 +81,6 @@ search = false end end - CUDA.sync_threads() # Recover the last successful step. diff --git a/src/CUDA/dprsrch.jl b/src/CUDA/dprsrch.jl index 89187f6..0f645a9 100644 --- a/src/CUDA/dprsrch.jl +++ b/src/CUDA/dprsrch.jl @@ -46,7 +46,6 @@ alpha = interpf*alpha end end - CUDA.sync_threads() # Force at least one more constraint to be added to the active # set if alpha < brptmin and the full step is not successful. From 03c23328e32ae659faa74461cceb112ffd36b352 Mon Sep 17 00:00:00 2001 From: Kibaek Kim Date: Thu, 10 Aug 2023 14:57:54 -0500 Subject: [PATCH 3/3] maybe the root cause of race warnings --- src/CUDA/ddot.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/CUDA/ddot.jl b/src/CUDA/ddot.jl index 0a7740b..638ca24 100644 --- a/src/CUDA/ddot.jl +++ b/src/CUDA/ddot.jl @@ -9,5 +9,6 @@ @inbounds for i=1:n v += dx[i]*dy[i] end + CUDA.sync_threads() return v end