From 3ebc6bc7f73c1e4af5e93d3540785975addb0742 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Tue, 18 Apr 2023 09:53:04 -0500 Subject: [PATCH] Migrate to KA 0.9 * Ported to KA 0.9 API * Project.toml for tests --- Project.toml | 21 ++------- examples/Project.toml | 2 - examples/admm/acopf_admm_gpu.jl | 60 ++++++++++++++------------ examples/admm/environment.jl | 2 +- examples/admm/generator_kernel.jl | 9 ++-- examples/opf.jl | 4 +- test/KA.jl | 71 ++++++++++++++++++++----------- test/Project.toml | 12 ++++++ test/admmtest.jl | 2 - test/runtests.jl | 6 +-- 10 files changed, 102 insertions(+), 87 deletions(-) create mode 100644 test/Project.toml diff --git a/Project.toml b/Project.toml index dcdf024..51b4a36 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ExaTron" uuid = "28b18bf8-76f9-41ea-81fa-0f922810b349" authors = ["Youngdae Kim ", "François Pacaud ", "Kibaek Kim ", "Michel Schanen "] -version = "2.1.3" +version = "3.0.0" [deps] DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" @@ -13,22 +13,7 @@ Requires = "ae029012-a4dd-5104-9daa-d747884805df" TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" [compat] -KernelAbstractions = "0.8" +KernelAbstractions = "0.9" Requires = "1" TOML = "1" -julia = "1.7" - -[extras] -AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" -CUDAKernels = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57" -KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" -PowerModels = "c36e90e8-916a-50a6-bd94-075b64ef4655" -ROCKernels = "7eb9e9f0-4bd3-4c4c-8bef-26bd9629d9b9" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" -StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[targets] -test = ["AMDGPU", "CUDA", "CUDAKernels", "KernelAbstractions", "PowerModels", "Random", "ROCKernels", "SparseArrays", "StatsBase", "Test"] +julia = "1.8" diff --git a/examples/Project.toml b/examples/Project.toml index 7bb4b3a..05c0163 100644 --- a/examples/Project.toml +++ b/examples/Project.toml @@ -1,12 +1,10 @@ [deps] AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" -CUDAKernels = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57" ExaTronKernels = "dec458bd-9b7b-4867-ad20-20d9a112d608" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" PowerModels = "c36e90e8-916a-50a6-bd94-075b64ef4655" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" -ROCKernels = "7eb9e9f0-4bd3-4c4c-8bef-26bd9629d9b9" Requires = "ae029012-a4dd-5104-9daa-d747884805df" diff --git a/examples/admm/acopf_admm_gpu.jl b/examples/admm/acopf_admm_gpu.jl index d157e80..8be12ed 100644 --- a/examples/admm/acopf_admm_gpu.jl +++ b/examples/admm/acopf_admm_gpu.jl @@ -31,7 +31,7 @@ function get_generator_data(data::OPFData, ::KA.CPU) return pgmin,pgmax,qgmin,qgmax,c2,c1,c0 end -function get_generator_data(data::OPFData, ::CUDADevice) +function get_generator_data(data::OPFData, ::CUDABackend) ngen = length(data.generators) pgmin = CuArray{Float64}(undef, ngen) @@ -109,7 +109,7 @@ function get_bus_data(data::OPFData, ::KA.CPU) return FrStart,FrIdx,ToStart,ToIdx,GenStart,GenIdx,Pd,Qd end -function get_bus_data(data::OPFData, ::CUDADevice) +function get_bus_data(data::OPFData, ::CUDABackend) nbus = length(data.buses) FrIdx = [l for b=1:nbus for l in data.FromLines[b]] @@ -194,7 +194,7 @@ function get_branch_data(data::OPFData, device::KA.CPU) ybus.YttR, ybus.YttI, ybus.YtfR, ybus.YtfI, frBound, toBound end -function get_branch_data(data::OPFData, device::CUDADevice) +function get_branch_data(data::OPFData, device::CUDABackend) buses = data.buses lines = data.lines BusIdx = data.BusIdx @@ -492,40 +492,44 @@ function admm_solve!(env::AdmmEnv, sol::SolutionOneLevel; iterlim=800, scale=1e- break end else - wait(copy_data_kernel(env.device, 64, mod.nvar)(mod.nvar, sol.u_prev, sol.u_curr, dependencies=Event(env.device))) - wait(copy_data_kernel(env.device, 64, mod.nvar)(mod.nvar, sol.v_prev, sol.v_curr, dependencies=Event(env.device))) - wait(copy_data_kernel(env.device, 64, mod.nvar)(mod.nvar, sol.l_prev, sol.l_curr, dependencies=Event(env.device))) + copy_data_kernel(env.device, 64, mod.nvar)(mod.nvar, sol.u_prev, sol.u_curr) + copy_data_kernel(env.device, 64, mod.nvar)(mod.nvar, sol.v_prev, sol.v_curr) + copy_data_kernel(env.device, 64, mod.nvar)(mod.nvar, sol.l_prev, sol.l_curr) + KA.synchronize(env.device) generator_kernel(mod.gen_mod, data.baseMVA, sol.u_curr, sol.v_curr, sol.l_curr, sol.rho, env.device) - wait(polar_kernel(env.device, 32, mod.nline*32)(mod.n, mod.nline, mod.line_start, scale, - sol.u_curr, sol.v_curr, sol.l_curr, sol.rho, - shift_lines, env.membuf, mod.YffR, mod.YffI, mod.YftR, mod.YftI, - mod.YttR, mod.YttI, mod.YtfR, mod.YtfI, mod.FrBound, mod.ToBound, - dependencies=Event(env.device) - ) + polar_kernel(env.device, 32, mod.nline*32)( + mod.n, mod.nline, mod.line_start, scale, + sol.u_curr, sol.v_curr, sol.l_curr, sol.rho, + shift_lines, env.membuf, mod.YffR, mod.YffI, mod.YftR, mod.YftI, + mod.YttR, mod.YttI, mod.YtfR, mod.YtfI, mod.FrBound, mod.ToBound ) - wait(bus_kernel(env.device, 32, mod.nbus)(data.baseMVA, mod.nbus, mod.gen_mod.gen_start, mod.line_start, - mod.FrStart, mod.FrIdx, mod.ToStart, mod.ToIdx, mod.GenStart, - mod.GenIdx, mod.Pd, mod.Qd, sol.u_curr, sol.v_curr, sol.l_curr, - sol.rho, mod.YshR, mod.YshI, - dependencies=Event(env.device) - ) + KA.synchronize(env.device) + + bus_kernel(env.device, 32, mod.nbus)( + data.baseMVA, mod.nbus, mod.gen_mod.gen_start, mod.line_start, + mod.FrStart, mod.FrIdx, mod.ToStart, mod.ToIdx, mod.GenStart, + mod.GenIdx, mod.Pd, mod.Qd, sol.u_curr, sol.v_curr, sol.l_curr, + sol.rho, mod.YshR, mod.YshI ) + KA.synchronize(env.device) - wait(update_multiplier_kernel(env.device, 32, mod.nvar)(mod.nvar, sol.l_curr, - sol.u_curr, sol.v_curr, sol.rho, - dependencies=Event(env.device) - ) + update_multiplier_kernel(env.device, 32, mod.nvar)( + mod.nvar, sol.l_curr, + sol.u_curr, sol.v_curr, sol.rho ) - wait(primal_residual_kernel(env.device, 32, mod.nvar)(mod.nvar, sol.rp, sol.u_curr, sol.v_curr, - dependencies=Event(env.device) - ) + KA.synchronize(env.device) + + primal_residual_kernel(env.device, 32, mod.nvar)( + mod.nvar, sol.rp, sol.u_curr, sol.v_curr ) - wait(dual_residual_kernel(env.device, 32, mod.nvar)(mod.nvar, sol.rd, sol.v_prev, sol.v_curr, sol.rho, - dependencies=Event(env.device) - ) + KA.synchronize(env.device) + + dual_residual_kernel(env.device, 32, mod.nvar)( + mod.nvar, sol.rd, sol.v_prev, sol.v_curr, sol.rho ) + KA.synchronize(env.device) gpu_primres = norm(sol.rp) gpu_dualres = norm(sol.rd) diff --git a/examples/admm/environment.jl b/examples/admm/environment.jl index 323ea7f..81afc00 100644 --- a/examples/admm/environment.jl +++ b/examples/admm/environment.jl @@ -278,7 +278,7 @@ AdmmEnv(opfdata::OPFData, device::KA.CPU, rho_pq, rho_va; options...) = AdmmEnv{ opfdata, rho_pq, rho_va; device=device, options... ) -AdmmEnv(opfdata::OPFData, device::CUDADevice, rho_pq, rho_va; options...) = AdmmEnv{Float64, CuArray{Float64, 1}, CuArray{Int, 1}, CuArray{Float64, 2}}( +AdmmEnv(opfdata::OPFData, device::CUDABackend, rho_pq, rho_va; options...) = AdmmEnv{Float64, CuArray{Float64, 1}, CuArray{Int, 1}, CuArray{Float64, 2}}( opfdata, rho_pq, rho_va; device=device, options... ) diff --git a/examples/admm/generator_kernel.jl b/examples/admm/generator_kernel.jl index ddb2178..8b257fe 100644 --- a/examples/admm/generator_kernel.jl +++ b/examples/admm/generator_kernel.jl @@ -52,11 +52,12 @@ function generator_kernel( ) nblk = div(gen_mod.ngen, 32, RoundUp) nblk - wait(generator_kernel(device, 32, gen_mod.ngen)(baseMVA, gen_mod.ngen, gen_mod.gen_start, - u, v, l, rho, gen_mod.pgmin, gen_mod.pgmax, gen_mod.qgmin, gen_mod.qgmax, gen_mod.c2, gen_mod.c1, - dependencies=Event(device) - ) + generator_kernel(device, 32, gen_mod.ngen)( + baseMVA, gen_mod.ngen, gen_mod.gen_start, + u, v, l, rho, gen_mod.pgmin, gen_mod.pgmax, gen_mod.qgmin, gen_mod.qgmax, gen_mod.c2, gen_mod.c1 ) + KA.synchronize(device) + return 0.0 end diff --git a/examples/opf.jl b/examples/opf.jl index 41b3ca1..0ef9723 100644 --- a/examples/opf.jl +++ b/examples/opf.jl @@ -1,8 +1,6 @@ using KernelAbstractions using AMDGPU -using ROCKernels using CUDA -using CUDAKernels # One day for Intel GPUs # using oneAPI @@ -32,7 +30,7 @@ rho_va = parse(Float64, ARGS[2]) max_iter = parse(Int, ARGS[3]) # Indicate which GPU device to use device = CPU() -# device = CUDADevice() +# device = CUDABackend() # device = ROCDevice() # verbose = 0: No output # verbose = 1: Final result metrics diff --git a/test/KA.jl b/test/KA.jl index c38f635..e909677 100644 --- a/test/KA.jl +++ b/test/KA.jl @@ -6,6 +6,8 @@ using LinearAlgebra using Random using Test +const KA = KernelAbstractions + """ Test ExaTron's internal routines written for GPU. @@ -47,14 +49,12 @@ n = 4 nblk = 4 if has_cuda_gpu() - using CUDAKernels - device = CUDADevice() + device = CUDABackend() AT = CuArray elseif has_rocm_gpu() - using ROCKernels # Set for crusher login node to avoid other users AMDGPU.default_device!(AMDGPU.devices()[2]) - device = AMDGPU.ROCDevice() + device = AMDGPU.ROCBackend() AT = ROCArray else device = CPU() @@ -94,7 +94,8 @@ end d_in = AT{Float64,2}(undef, (n,n)) d_out = AT{Float64,2}(undef, (n,n)) copyto!(d_in, tron_A.vals) - wait(dicf_test(device, n)(Val{n}(), d_in, d_out, ndrange=(n,nblk), dependencies=Event(device))) + dicf_test(device, n)(Val{n}(), d_in, d_out, ndrange=(n,nblk)) + KA.synchronize(device) h_L = d_out |> Array tron_L = ExaTron.TronDenseMatrix{Array{Float64,2}}(n) @@ -149,7 +150,8 @@ end d_out = AT{Float64,2}(undef, (n,n)) alpha = 1.0 copyto!(dA, tron_A.vals) - wait(dicfs_test(device, n)(Val{n}(),alpha,dA,d_out,ndrange=(n, nblk),dependencies=Event(device))) + dicfs_test(device, n)(Val{n}(),alpha,dA,d_out,ndrange=(n, nblk)) + KA.synchronize(device) h_L = d_out |> Array iwa = zeros(Int, 3*n) wa1 = zeros(n) @@ -164,7 +166,8 @@ end tron_A.vals[j,j] = -tron_A.vals[j,j] end copyto!(dA, tron_A.vals) - wait(dicfs_test(device, n)(Val{n}(),alpha,dA,d_out,ndrange=(n,nblk),dependencies=Event(device))) + dicfs_test(device, n)(Val{n}(),alpha,dA,d_out,ndrange=(n,nblk)) + KA.synchronize(device) copyto!(h_L, d_out) ExaTron.dicfs(n, n^2, tron_A, tron_L, 5, alpha, iwa, wa1, wa2) @@ -236,7 +239,8 @@ end copyto!(du, xu) copyto!(dg, g) copyto!(dA, A.vals) - wait(dcauchy_test(device, n)(Val{n}(),dx,dl,du,dA,dg,delta,alpha,d_out1,d_out2,ndrange=(n,nblk),dependencies=Event(device))) + dcauchy_test(device, n)(Val{n}(),dx,dl,du,dA,dg,delta,alpha,d_out1,d_out2,ndrange=(n,nblk)) + KA.synchronize(device) h_s = zeros(n) h_alpha = zeros(n) copyto!(h_s, d_out1) @@ -311,7 +315,8 @@ end d_out = AT{Float64}(undef, n) copyto!(d_in, A) copyto!(d_g, g) - wait(dtrpcg_test(device, n)(Val{n}(),delta,tol,stol,d_in,d_g,d_out_L,d_out,ndrange=(n,nblk),dependencies=Event(device))) + dtrpcg_test(device, n)(Val{n}(),delta,tol,stol,d_in,d_g,d_out_L,d_out,ndrange=(n,nblk)) + KA.synchronize(device) h_w = zeros(n) h_L = zeros(n,n) copyto!(h_L, d_out_L) @@ -396,7 +401,8 @@ end copyto!(dg, g) copyto!(dw, w) copyto!(dA, A.vals) - wait(dprsrch_test(device, n)(Val{n}(),dx,dl,du,dg,dw,dA,d_out1,d_out2,ndrange=(n,nblk),dependencies=Event(device))) + dprsrch_test(device, n)(Val{n}(),dx,dl,du,dg,dw,dA,d_out1,d_out2,ndrange=(n,nblk)) + KA.synchronize(device) h_x = zeros(n) h_w = zeros(n) copyto!(h_x, d_out1) @@ -437,7 +443,8 @@ end d_in = AT{Float64}(undef, 2*n) d_out = AT{Float64}(undef, n) copyto!(d_in, h_in) - wait(daxpy_test(device,n)(Val{n}(),da,d_in,d_out,ndrange=(n,nblk),dependencies=Event(device))) + daxpy_test(device,n)(Val{n}(),da,d_in,d_out,ndrange=(n,nblk)) + KA.synchronize(device) copyto!(h_out, d_out) @test norm(h_out .- (h_in[n+1:2*n] .+ da.*h_in[1:n])) <= 1e-12 @@ -479,7 +486,8 @@ end d_out = AT{Float64}(undef, n) copyto!(d_z, z) copyto!(d_in, h_in) - wait(dssyax_test(device,n)(Val{n}(),d_z,d_in,d_out,ndrange=(n,nblk),dependencies=Event(device))) + dssyax_test(device,n)(Val{n}(),d_z,d_in,d_out,ndrange=(n,nblk)) + KA.synchronize(device) copyto!(h_out, d_out) @test norm(h_out .- h_in*z) <= 1e-12 @@ -534,7 +542,8 @@ end copyto!(dx, x) copyto!(dl, xl) copyto!(du, xu) - wait(dmid_test(device,n)(Val{n}(),dx,dl,du,d_out,ndrange=(n,nblk),dependencies=Event(device))) + dmid_test(device,n)(Val{n}(),dx,dl,du,d_out,ndrange=(n,nblk)) + KA.synchronize(device) copyto!(x_out, d_out) ExaTron.dmid(n, x, xl, xu) @@ -605,7 +614,8 @@ end copyto!(dl, xl) copyto!(du, xu) copyto!(dw, w) - wait(dgpstep_test(device,n)(Val{n}(),dx,dl,du,alpha,dw,d_out,ndrange=(n,nblk),dependencies=Event(device))) + dgpstep_test(device,n)(Val{n}(),dx,dl,du,alpha,dw,d_out,ndrange=(n,nblk)) + KA.synchronize(device) copyto!(s_out, d_out) ExaTron.dgpstep(n, x, xl, xu, alpha, w, s) @@ -667,7 +677,8 @@ end copyto!(dl, xl) copyto!(du, xu) copyto!(dw, w) - wait(dbreakpt_test(device,n)(Val{n}(),dx,dl,du,dw,d_nbrpt,d_brptmin,d_brptmax,ndrange=(n,nblk),dependencies=Event(device))) + dbreakpt_test(device,n)(Val{n}(),dx,dl,du,dw,d_nbrpt,d_brptmin,d_brptmax,ndrange=(n,nblk)) + KA.synchronize(device) copyto!(h_nbrpt, d_nbrpt) copyto!(h_brptmin, d_brptmin) copyto!(h_brptmax, d_brptmax) @@ -706,7 +717,8 @@ end d_in = AT{Float64}(undef, n) d_out = AT{Float64,2}(undef, (n,n)) copyto!(d_in, h_in) - wait(dnrm2_test(device,n)(Val{n}(),d_in,d_out,ndrange=(n,nblk),dependencies=Event(device))) + dnrm2_test(device,n)(Val{n}(),d_in,d_out,ndrange=(n,nblk)) + KA.synchronize(device) copyto!(h_out, d_out) xnorm = norm(h_in, 2) @@ -746,7 +758,8 @@ end d_out = AT{Float64}(undef, n) h_wa = zeros(n) copyto!(d_A, A) - wait(nrm2_test(device,n)(Val{n}(),d_A,d_out,ndrange=(n,nblk),dependencies=Event(device))) + nrm2_test(device,n)(Val{n}(),d_A,d_out,ndrange=(n,nblk)) + KA.synchronize(device) copyto!(h_wa, d_out) @test norm(wa .- h_wa) <= 1e-10 @@ -781,7 +794,8 @@ end d_in = AT{Float64}(undef, n) d_out = AT{Float64}(undef, n) copyto!(d_in, h_in) - wait(dcopy_test(device,n)(Val{n}(),d_in,d_out,ndrange=(n,nblk),dependencies=Event(device))) + dcopy_test(device,n)(Val{n}(),d_in,d_out,ndrange=(n,nblk)) + KA.synchronize(device) copyto!(h_out, d_out) @test !(false in (h_in .== h_out)) @@ -818,7 +832,8 @@ end d_in = AT{Float64}(undef, n) d_out = AT{Float64,2}(undef, (n,n)) copyto!(d_in, h_in) - wait(ddot_test(device, n)(Val{n}(),d_in,d_out,ndrange=(n,nblk),dependencies=Event(device))) + ddot_test(device, n)(Val{n}(),d_in,d_out,ndrange=(n,nblk)) + KA.synchronize(device) copyto!(h_out, d_out) @test norm(dot(h_in,h_in) .- h_out, 2) <= 1e-10 @@ -854,7 +869,8 @@ end d_in = AT{Float64}(undef, n) d_out = AT{Float64}(undef, n) copyto!(d_in, h_in) - wait(dscal_test(device,n)(Val{n}(),da,d_in,d_out,ndrange=(n,nblk),dependencies=Event(device))) + dscal_test(device,n)(Val{n}(),da,d_in,d_out,ndrange=(n,nblk)) + KA.synchronize(device) copyto!(h_out, d_out) @test norm(h_out .- (da.*h_in)) <= 1e-12 @@ -897,7 +913,8 @@ end d_out = AT{Float64,2}(undef, (n,n)) copyto!(d_x, x) copyto!(d_p, p) - wait(dtrqsol_test(device,n)(Val{n}(),d_x,d_p,d_out,delta,ndrange=(n,nblk),dependencies=Event(device))) + dtrqsol_test(device,n)(Val{n}(),d_x,d_p,d_out,delta,ndrange=(n,nblk)) + KA.synchronize(device) d_out = d_out |> Array @test norm(sigma .- d_out) <= 1e-10 @@ -994,7 +1011,8 @@ end copyto!(dg, g) copyto!(ds, s) - wait(dspcg_test(device, n)(Val{n}(),delta,rtol,cg_itermax,dx,dxl,dxu,dA,dg,ds,d_out,ndrange=(n,1),dependencies=Event(device))) + dspcg_test(device, n)(Val{n}(),delta,rtol,cg_itermax,dx,dxl,dxu,dA,dg,ds,d_out,ndrange=(n,1)) + KA.synchronize(device) h_x = zeros(n) copyto!(h_x, d_out) @@ -1046,7 +1064,8 @@ end copyto!(dxu, xu) copyto!(dg, g) - wait(dgpnorm_test(device, n)(Val{n}(), dx, dxl, dxu, dg, d_out, ndrange=(n,n*nblk), dependencies=Event(device))) + dgpnorm_test(device, n)(Val{n}(), dx, dxl, dxu, dg, d_out, ndrange=(n,n*nblk)) + KA.synchronize(device) h_v = zeros(n) copyto!(h_v, d_out) @@ -1151,7 +1170,8 @@ end copyto!(dA, tron_A.vals) copyto!(dg, g) - wait(dtron_test(device,n)(Val{n}(),f,frtol,fatol,fmin,cgtol,cg_itermax,delta,task,disave,ddsave,dx,dxl,dxu,dA,dg,d_out,ndrange=(n,n*nblk),dependencies=Event(device))) + dtron_test(device,n)(Val{n}(),f,frtol,fatol,fmin,cgtol,cg_itermax,delta,task,disave,ddsave,dx,dxl,dxu,dA,dg,d_out,ndrange=(n,n*nblk)) + KA.synchronize(device) h_x = zeros(n) copyto!(h_x, d_out) @@ -1395,7 +1415,8 @@ end copyto!(tron.x, x) status = ExaTron.solveProblem(tron) - wait(driver_kernel_test(device,n)(Val{n}(),max_feval,max_minor,dx,dxl,dxu,dA,dc,d_out,ndrange=(n,nblk),dependencies=Event(device))) + driver_kernel_test(device,n)(Val{n}(),max_feval,max_minor,dx,dxl,dxu,dA,dc,d_out,ndrange=(n,nblk)) + KA.synchronize(device) h_x = zeros(n) copyto!(h_x, d_out) diff --git a/test/Project.toml b/test/Project.toml new file mode 100644 index 0000000..9f72b27 --- /dev/null +++ b/test/Project.toml @@ -0,0 +1,12 @@ +[deps] +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +PowerModels = "c36e90e8-916a-50a6-bd94-075b64ef4655" +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" diff --git a/test/admmtest.jl b/test/admmtest.jl index a1d80a0..93136c9 100644 --- a/test/admmtest.jl +++ b/test/admmtest.jl @@ -1,8 +1,6 @@ using KernelAbstractions using AMDGPU -using ROCKernels using CUDA -using CUDAKernels using LinearAlgebra using Printf using PowerModels diff --git a/test/runtests.jl b/test/runtests.jl index f458571..8c903ee 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -17,12 +17,10 @@ using Test if CUDA.has_cuda_gpu() || AMDGPU.has_rocm_gpu() include("KA.jl") if CUDA.has_cuda_gpu() - using CUDAKernels - push!(devices, CUDADevice()) + push!(devices, CUDABackend()) end if AMDGPU.has_rocm_gpu() - using ROCKernels - push!(devices, ROCDevice()) + push!(devices, ROCBackend()) end end @testset "Testing one-level ADMM using $device" for device in devices