From 60127f79644250ccfb2db5cfaca65afcc8e32ad5 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Tue, 27 Aug 2024 12:53:53 -0400 Subject: [PATCH] fix: remove deprecated deps --- .buildkite/pipeline.yml | 2 +- .github/workflows/CI.yml | 2 - docs/Project.toml | 3 +- docs/src/examples/mnist_conv_neural_ode.md | 39 +++---- docs/src/examples/mnist_neural_ode.md | 112 +++++++++------------ test/neural_de_tests.jl | 1 - 6 files changed, 66 insertions(+), 93 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index dd8ce199a..b7fd951b7 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -1,5 +1,5 @@ steps: - - label: "Julia 1" + - label: "Julia 1 (CUDA)" plugins: - JuliaCI/julia#v1: version: "1.10" diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index eb9553e34..5c60daf78 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -46,8 +46,6 @@ jobs: coverage: false env: GROUP: ${{ matrix.group }} - RETESTITEMS_NWORKERS: 0 - RETESTITEMS_TESTITEM_TIMEOUT: 3600 - uses: julia-actions/julia-processcoverage@v1 - uses: codecov/codecov-action@v4 with: diff --git a/docs/Project.toml b/docs/Project.toml index b4a9d80a3..25450fcef 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -14,10 +14,10 @@ IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Lux = "b2108857-7c20-44ae-9111-449ecde12c47" LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda" -MLDataUtils = "cc2ba9b6-d476-5e6d-8eaf-a92d5412d41d" MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" +OneHotArrays = "0b1bfda6-eb8a-41d2-88d8-f5af5cad476f" Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e" @@ -52,7 +52,6 @@ IterTools = "1" LinearAlgebra = "1" Lux = "0.5.5" LuxCUDA = "0.3" -MLDataUtils = "0.5" MLDatasets = "0.7" MLUtils = "0.4" NNlib = "0.9" diff --git a/docs/src/examples/mnist_conv_neural_ode.md b/docs/src/examples/mnist_conv_neural_ode.md index 023658593..740c12a37 100644 --- a/docs/src/examples/mnist_conv_neural_ode.md +++ b/docs/src/examples/mnist_conv_neural_ode.md @@ -8,10 +8,9 @@ using Fully Connected Layers. ```@example mnist_cnn using DiffEqFlux, Statistics, ComponentArrays, CUDA, Zygote, MLDatasets, OrdinaryDiffEq, - Printf, Test, LuxCUDA, Random + Printf, Test, LuxCUDA, Random, MLUtils, OneHotArrays using Optimization, OptimizationOptimisers using MLDatasets: MNIST -using MLDataUtils: LabelEnc, convertlabel, stratifiedobs, batchview const cdev = cpu_device() const gdev = gpu_device() @@ -22,26 +21,21 @@ ENV["DATADEPS_ALWAYS_ACCEPT"] = true logitcrossentropy(ŷ, y) = mean(-sum(y .* logsoftmax(ŷ; dims = 1); dims = 1)) function loadmnist(batchsize = bs) - # Use MLDataUtils LabelEnc for natural onehot conversion - function onehot(labels_raw) - convertlabel(LabelEnc.OneOfK, labels_raw, LabelEnc.NativeLabels(collect(0:9))) - end # Load MNIST - mnist = MNIST(; split = :train) - imgs, labels_raw = mnist.features, mnist.targets + dataset = MNIST(; split = :train) + imgs = dataset.features + labels_raw = dataset.targets + # Process images into (H,W,C,BS) batches - x_train = Float32.(reshape(imgs, size(imgs, 1), size(imgs, 2), 1, size(imgs, 3))) |> - gdev - x_train = batchview(x_train, batchsize) - # Onehot and batch the labels - y_train = onehot(labels_raw) |> gdev - y_train = batchview(y_train, batchsize) - return x_train, y_train + x_data = Float32.(reshape(imgs, size(imgs, 1), size(imgs, 2), 1, size(imgs, 3))) + y_data = onehotbatch(labels_raw, 0:9) + + return DataLoader((x_data, y_data); batchsize, shuffle = true) end # Main const bs = 32 -x_train, y_train = loadmnist(bs) +dataloader = loadmnist(bs) down = Chain(Conv((3, 3), 1 => 64, relu; stride = 1), GroupNorm(64, 64), Conv((4, 4), 64 => 64, relu; stride = 2, pad = 1), @@ -56,9 +50,7 @@ fc = Chain(GroupNorm(64, 64), x -> relu.(x), MeanPool((6, 6)), nn_ode = NeuralODE(dudt, (0.0f0, 1.0f0), Tsit5(); save_everystep = false, reltol = 1e-3, abstol = 1e-3, save_start = false) -function DiffEqArray_to_Array(x) - xarr = gdev(x.u[1]) -end +DiffEqArray_to_Array(x) = x.u[end] # Build our over-all model topology m = Chain(down, # (28, 28, 1, BS) -> (6, 6, 64, BS) @@ -70,8 +62,9 @@ ps = ComponentArray(ps) |> gdev st = st |> gdev # To understand the intermediate NN-ODE layer, we can examine it's dimensionality -img = x_train[1][:, :, :, 1:1] |> gdev -lab = y_train[1][:, 1:1] |> gdev +x_train1, y_train1 = first(dataloader) +img = x_train1[:, :, :, 1:1] |> gdev +lab = y_train1[:, 1:1] |> gdev x_m, _ = m(img, ps, st) @@ -91,7 +84,7 @@ function accuracy(model, data, ps, st; n_batches = 10) end # burn in accuracy -accuracy(m, zip(x_train, y_train), ps, st) +accuracy(m, ((x_train1, y_train1),), ps, st) function loss_function(ps, x, y) pred, st_ = m(x, ps, st) @@ -99,7 +92,7 @@ function loss_function(ps, x, y) end #burn in loss -loss_function(ps, x_train[1], y_train[1]) +loss_function(ps, x_train1, y_train1) opt = OptimizationOptimisers.Adam(0.05) iter = 0 diff --git a/docs/src/examples/mnist_neural_ode.md b/docs/src/examples/mnist_neural_ode.md index b120db59f..77e879579 100644 --- a/docs/src/examples/mnist_neural_ode.md +++ b/docs/src/examples/mnist_neural_ode.md @@ -1,15 +1,15 @@ # [GPU-based MNIST Neural ODE Classifier](@id mnist) -Training a classifier for **MNIST** using a neural ordinary differential equation **NeuralODE** -on **GPUs** with **minibatching**. +Training a classifier for **MNIST** using a neural ordinary differential equation +**NeuralODE** on **GPUs** with **minibatching**. (Step-by-step description below) ```@example mnist -using DiffEqFlux, CUDA, Zygote, MLDataUtils, NNlib, OrdinaryDiffEq, Test, Lux, Statistics, - ComponentArrays, Random, Optimization, OptimizationOptimisers, LuxCUDA +using DiffEqFlux, CUDA, Zygote, NNlib, OrdinaryDiffEq, Test, Lux, Statistics, + ComponentArrays, Random, Optimization, OptimizationOptimisers, LuxCUDA, + MLUtils, OneHotArrays using MLDatasets: MNIST -using MLDataUtils: LabelEnc, convertlabel, stratifiedobs CUDA.allowscalar(false) ENV["DATADEPS_ALWAYS_ACCEPT"] = true @@ -20,26 +20,21 @@ const gdev = gpu_device() logitcrossentropy(ŷ, y) = mean(-sum(y .* logsoftmax(ŷ; dims = 1); dims = 1)) function loadmnist(batchsize = bs) - # Use MLDataUtils LabelEnc for natural onehot conversion - function onehot(labels_raw) - convertlabel(LabelEnc.OneOfK, labels_raw, LabelEnc.NativeLabels(collect(0:9))) - end # Load MNIST - mnist = MNIST(; split = :train) - imgs, labels_raw = mnist.features, mnist.targets + dataset = MNIST(; split = :train) + imgs = dataset.features + labels_raw = dataset.targets + # Process images into (H,W,C,BS) batches - x_train = Float32.(reshape(imgs, size(imgs, 1), size(imgs, 2), 1, size(imgs, 3))) |> - gdev - x_train = batchview(x_train, batchsize) - # Onehot and batch the labels - y_train = onehot(labels_raw) |> gdev - y_train = batchview(y_train, batchsize) - return x_train, y_train + x_data = Float32.(reshape(imgs, size(imgs, 1), size(imgs, 2), 1, size(imgs, 3))) + y_data = onehotbatch(labels_raw, 0:9) + + return DataLoader((x_data, y_data); batchsize, shuffle = true) end # Main -const bs = 128 -x_train, y_train = loadmnist(bs) +const bs = 32 +dataloader = loadmnist(bs) down = Lux.Chain(Lux.FlattenLayer(), Lux.Dense(784, 20, tanh)) nn = Lux.Chain(Lux.Dense(20, 10, tanh), Lux.Dense(10, 10, tanh), Lux.Dense(10, 20, tanh)) @@ -48,30 +43,29 @@ fc = Lux.Dense(20, 10) nn_ode = NeuralODE(nn, (0.0f0, 1.0f0), Tsit5(); save_everystep = false, reltol = 1e-3, abstol = 1e-3, save_start = false) -function DiffEqArray_to_Array(x) - xarr = gdev(x.u[1]) - return xarr -end +DiffEqArray_to_Array(x) = x.u[end] -#Build our over-all model topology +# Build our over-all model topology m = Lux.Chain(; down, nn_ode, convert = Lux.WrappedFunction(DiffEqArray_to_Array), fc) ps, st = Lux.setup(Xoshiro(0), m) ps = ComponentArray(ps) |> gdev st = st |> gdev -#We can also build the model topology without a NN-ODE +# We can also build the model topology without a NN-ODE m_no_ode = Lux.Chain(; down, nn, fc) ps_no_ode, st_no_ode = Lux.setup(Xoshiro(0), m_no_ode) ps_no_ode = ComponentArray(ps_no_ode) |> gdev st_no_ode = st_no_ode |> gdev -#To understand the intermediate NN-ODE layer, we can examine it's dimensionality -x_d = first(down(x_train[1], ps.down, st.down)) +x_train1, y_train1 = first(dataloader) + +# To understand the intermediate NN-ODE layer, we can examine it's dimensionality +x_d = first(down(x_train1, ps.down, st.down)) # We can see that we can compute the forward pass through the NN topology featuring an NNODE layer. -x_m = first(m(x_train[1], ps, st)) -#Or without the NN-ODE layer. -x_m = first(m_no_ode(x_train[1], ps_no_ode, st_no_ode)) +x_m = first(m(x_train1, ps, st)) +# Or without the NN-ODE layer. +x_m = first(m_no_ode(x_train1, ps_no_ode, st_no_ode)) classify(x) = argmax.(eachcol(x)) @@ -87,16 +81,15 @@ function accuracy(model, data, ps, st; n_batches = 100) end return total_correct / total end -#burn in accuracy -accuracy(m, zip(x_train, y_train), ps, st) + +accuracy(m, ((x_train1, y_train1),), ps, st) # burn in accuracy function loss_function(ps, x, y) pred, st_ = m(x, ps, st) return logitcrossentropy(pred, y), pred end -#burn in loss -loss_function(ps, x_train[1], y_train[1]) +loss_function(ps, x_train1, y_train1) # burn in loss opt = OptimizationOptimisers.Adam(0.05) iter = 0 @@ -107,8 +100,8 @@ opt_prob = OptimizationProblem(opt_func, ps) function callback(ps, l, pred) global iter += 1 - #Monitor that the weights do infact update - #Every 10 training iterations show accuracy + # Monitor that the weights do infact update + # Every 10 training iterations show accuracy if (iter % 10 == 0) @info "[MNIST GPU] Accuracy: $(accuracy(m, zip(x_train, y_train), ps, st))" end @@ -125,10 +118,10 @@ res = Optimization.solve(opt_prob, opt, zip(x_train, y_train); callback) ### Load Packages ```@example mnist -using DiffEqFlux, CUDA, Zygote, MLDataUtils, NNlib, OrdinaryDiffEq, Test, Lux, Statistics, - ComponentArrays, Random, Optimization, OptimizationOptimisers, LuxCUDA +using DiffEqFlux, CUDA, Zygote, NNlib, OrdinaryDiffEq, Test, Lux, Statistics, + ComponentArrays, Random, Optimization, OptimizationOptimisers, LuxCUDA, + MLUtils, OneHotArrays using MLDatasets: MNIST -using MLDataUtils: LabelEnc, convertlabel, stratifiedobs ``` ### GPU @@ -163,21 +156,16 @@ meaning that every minibatch will contain 128 images with a single color channel logitcrossentropy(ŷ, y) = mean(-sum(y .* logsoftmax(ŷ; dims = 1); dims = 1)) function loadmnist(batchsize = bs) - # Use MLDataUtils LabelEnc for natural onehot conversion - function onehot(labels_raw) - convertlabel(LabelEnc.OneOfK, labels_raw, LabelEnc.NativeLabels(collect(0:9))) - end # Load MNIST - mnist = MNIST(; split = :train) - imgs, labels_raw = mnist.features, mnist.targets + dataset = MNIST(; split = :train) + imgs = dataset.features + labels_raw = dataset.targets + # Process images into (H,W,C,BS) batches - x_train = Float32.(reshape(imgs, size(imgs, 1), size(imgs, 2), 1, size(imgs, 3))) |> - gdev - x_train = batchview(x_train, batchsize) - # Onehot and batch the labels - y_train = onehot(labels_raw) |> gdev - y_train = batchview(y_train, batchsize) - return x_train, y_train + x_data = Float32.(reshape(imgs, size(imgs, 1), size(imgs, 2), 1, size(imgs, 3))) + y_data = onehotbatch(labels_raw, 0:9) + + return DataLoader((x_data, y_data); batchsize, shuffle = true) end ``` @@ -185,8 +173,8 @@ and then loaded from main: ```@example mnist # Main -const bs = 128 -x_train, y_train = loadmnist(bs) +const bs = 32 +dataloader = loadmnist(bs) ``` ### Layers @@ -222,10 +210,7 @@ a Matrix (CuArray), and reduces the matrix from 3 to 2 dimensions for use in the nn_ode = NeuralODE(nn, (0.0f0, 1.0f0), Tsit5(); save_everystep = false, reltol = 1e-3, abstol = 1e-3, save_start = false) -function DiffEqArray_to_Array(x) - xarr = gdev(x.u[1]) - return xarr -end +DiffEqArray_to_Array(x) = x.u[end] ``` For CPU: If this function does not automatically fall back to CPU when no GPU is present, we can @@ -269,7 +254,7 @@ function accuracy(model, data, ps, st; n_batches = 100) end return total_correct / total end -#burn in accuracy + accuracy(m, zip(x_train, y_train), ps, st) ``` @@ -290,8 +275,7 @@ function loss_function(ps, x, y) return logitcrossentropy(pred, y), pred end -#burn in loss -loss_function(ps, x_train[1], y_train[1]) +loss_function(ps, x_train1, y_train1) ``` #### Optimizer @@ -316,8 +300,8 @@ opt_prob = OptimizationProblem(opt_func, ps) function callback(ps, l, pred) global iter += 1 - #Monitor that the weights do infact update - #Every 10 training iterations show accuracy + # Monitor that the weights do infact update + # Every 10 training iterations show accuracy if (iter % 10 == 0) @info "[MNIST GPU] Accuracy: $(accuracy(m, zip(x_train, y_train), ps, st))" end diff --git a/test/neural_de_tests.jl b/test/neural_de_tests.jl index 8bbd35a23..b32b0d498 100644 --- a/test/neural_de_tests.jl +++ b/test/neural_de_tests.jl @@ -277,7 +277,6 @@ end pd = ComponentArray(pd) |> gdev st = st |> gdev broken = hasfield(typeof(kwargs), :sensealg) && - ndims(u0) == 2 && kwargs.sensealg isa TrackerAdjoint @test begin grads = Zygote.gradient(sum ∘ last ∘ first ∘ node, u0, pd, st)