Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/base loader #20

Merged
merged 12 commits into from
Oct 7, 2020
9 changes: 6 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,16 @@ LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.1
Suggests:
testthat
testthat,
magick
Imports:
torch,
fs,
magick,
rlang,
rappdirs,
utils
utils,
jpeg,
png,
abind
Remotes:
mlverse/torch
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ S3method(transform_to_tensor,default)
S3method(transform_to_tensor,matrix)
S3method(transform_vflip,default)
S3method(transform_vflip,torch_tensor)
export(base_loader)
export(image_folder_dataset)
export(kmnist_dataset)
export(magick_loader)
Expand Down
4 changes: 1 addition & 3 deletions R/dataset-mnist.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,7 @@ mnist_dataset <- dataset(
},
.getitem = function(index) {
img <- self$data[index, ,]
target <- torch::torch_tensor(self$targets[index],
dtype = torch::torch_long())
target$squeeze_(1)
target <- self$targets[index]

if (!is.null(self$transform))
img <- self$transform(img)
Expand Down
35 changes: 30 additions & 5 deletions R/folder-dataset.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

IMG_EXTENSIONS <- c('jpg', 'jpeg', 'png', 'ppm', 'bmp', 'pgm', 'tif', 'tiff', 'webp')
IMG_EXTENSIONS <- c('jpg', 'jpeg', 'png') # 'ppm', 'bmp', 'pgm', 'tif', 'tiff', 'webp'

has_file_allowed_extension <- function(filename, extensions) {
tolower(fs::path_ext(filename)) %in% tolower(extensions )
Expand Down Expand Up @@ -114,13 +114,38 @@ folder_dataset <- torch::dataset(
#'
#' @export
magick_loader <- function(path) {
magick::image_read(path)
}

if (!requireNamespace("magick"))
runtime_error("The `magick` package must be installed to load images.")

magick::image_read(path)
#' Base loader
#'
#' Loads an image using `jpeg`, or `png` packages depending on the
#' file extension.
#'
#' @param path path to the image to load from
#'
#' @export
base_loader <- function(path) {

ext <- tolower(fs::path_ext(path))

if (ext %in% c("jpg", "jpeg"))
img <- jpeg::readJPEG(path)
else if (ext %in% c("png"))
img <- png::readPNG(path)
else
runtime_error(sprintf("unknown extension '%s' in path '%s'", ext, path))

if (length(dim(img)) == 2)
img <- abind::abind(img, img, img, along = 3)
else if (length(dim(img)) == 3 && dim(img)[1] == 1)
img <- abind::abind(img, img, img, along = 1)

img
}


#' Create an image folder dataset
#'
#' A generic data loader for images stored in folders.
Expand Down Expand Up @@ -162,7 +187,7 @@ image_folder_dataset <- dataset(
loader=NULL, is_valid_file=NULL) {

if (is.null(loader))
loader <- magick_loader
loader <- base_loader

if (!is.null(is_valid_file))
extensions <- NULL
Expand Down
4 changes: 2 additions & 2 deletions R/transforms-defaults.R
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,9 @@ get_random_resized_crop_params <- function(img, scale, ratio) {
} else {
w <- width
h <- height
i <- (height - h) %/% 2
j <- (width - w) %/% 2
}
i <- (height - h) %/% 2
j <- (width - w) %/% 2

c(i, j, h, w)
}
Expand Down
2 changes: 1 addition & 1 deletion R/transforms-tensor.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ transform_normalize.torch_tensor <- function(img, mean, std, inplace = FALSE) {
mean <- torch::torch_tensor(mean, dtype=dtype, device=img$device)
std <- torch::torch_tensor(std, dtype=dtype, device=img$device)

if (torch::as_array((std == 0)$any())) {
if ((std == 0)$any()$item()) {
value_error("std evaluated to zero after conversion to {dtype}, leading to division by zero.")
}

Expand Down
17 changes: 17 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,23 @@ template:
development:
mode: auto

navbar:
structure:
left: [home, examples, reference, news]
right: [github]
components:
examples:
text: Examples
menu:
- text: mnist-mlp
href: articles/examples/mnist-mlp.html
- text: mnist-cnn
href: articles/examples/mnist-cnn.html
- text: mnist-dcgan
href: articles/examples/mnist-dcgan.html
- text: tinyimagenet-alexnet
href: articles/examples/tinyimagenet-alexnet.html

reference:
- title: Transforms
desc: Image transformation functions
Expand Down
15 changes: 15 additions & 0 deletions man/base_loader.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions tests/testthat/assets/class/cat/cat.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
test file to see if it's ignored by the dataset
52 changes: 25 additions & 27 deletions tests/testthat/test-dataset-mnist.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,54 +2,52 @@ context("dataset-mnist")

test_that("tests for the mnist dataset", {

# skipped on mac because I use mac and I don't want too download mnist evrytime
#skip_on_os("mac")

dir <- tempfile(fileext = "/")

expect_error(
ds <- mnist_dataset(dir)
)

ds <- mnist_dataset(dir, download = TRUE, transform = function(x) {
torch::torch_tensor(x)
})
ds <- mnist_dataset(dir, download = TRUE)

i <- ds[1]

expect_tensor(i[[1]])
expect_tensor(i[[2]]$to(dtype = torch_int()))

expect_tensor_shape(torch::torch_tensor(ds$data), c(60000, 28, 28))
expect_tensor_shape(torch::torch_tensor(ds$targets)$to(dtype = torch_int()), c(60000))

expect_equal(dim(i[[1]]), c(28, 28))
expect_equal(i[[2]], 6)
expect_equal(length(ds), 60000)

ds <- mnist_dataset(dir, transform = transform_to_tensor)
dl <- torch::dataloader(ds, batch_size = 32)
expect_length(dl, 1875)
iter <- dataloader_make_iter(dl)
i <- dataloader_next(iter)
expect_tensor_shape(i[[1]], c(32, 1, 28, 28))
expect_tensor_shape(i[[2]], 32)
expect_true((torch_max(i[[1]]) <= 1)$item())

})


test_that("tests for the kmnist dataset", {

# skipped on mac because I use mac and I don't want too download mnist evrytime
skip_on_os("mac")

dir <- tempfile(fileext = "/")

expect_error(
ds <- kmnist_dataset(dir)
)

ds <- kmnist_dataset(dir, download = TRUE, transform = function(x) {
torch::torch_tensor(x)
})
i <- ds[1]

expect_tensor(i[[1]])
expect_tensor(i[[2]])
expect_equal(length(i[[2]]$shape), 0)

expect_tensor_shape(torch::torch_tensor(ds$data), c(60000, 28, 28))
expect_tensor_shape(torch::torch_tensor(ds$targets)$to(dtype = torch_int()), c(60000))
ds <- kmnist_dataset(dir, download = TRUE)

i <- ds[1]
expect_equal(dim(i[[1]]), c(28, 28))
expect_equal(i[[2]], 9)
expect_equal(length(ds), 60000)

ds <- kmnist_dataset(dir, transform = transform_to_tensor)
dl <- torch::dataloader(ds, batch_size = 32)
expect_length(dl, 1875)
iter <- dataloader_make_iter(dl)
i <- dataloader_next(iter)
expect_tensor_shape(i[[1]], c(32, 1, 28, 28))
expect_tensor_shape(i[[2]], 32)
expect_true((torch_max(i[[1]]) <= 1)$item())
})
2 changes: 2 additions & 0 deletions tests/testthat/test-folder-dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,6 @@ test_that("image_folder dataset", {
expect_tensor_shape(batch[[2]], 2)
}

expect_length(ds, 12)

})
76 changes: 54 additions & 22 deletions vignettes/examples/mnist-cnn.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,29 @@
dir <- "~/Downloads/mnist"

ds <- mnist_dataset(
dir,
download = TRUE,
transform = function(x) {
x <- x$to(dtype = torch_float())/256
x[newaxis,..]
}
# Packages ----------------------------------------------------------------
library(torch)
library(torchvision)


# Datasets and loaders ----------------------------------------------------

dir <- "~/Downloads/mnist" #caching directory

train_ds <- mnist_dataset(
dir,
download = TRUE,
transform = transform_to_tensor
)
dl <- dataloader(ds, batch_size = 32, shuffle = TRUE)

test_ds <- mnist_dataset(
dir,
train = FALSE,
transform = transform_to_tensor
)

train_dl <- dataloader(train_ds, batch_size = 32, shuffle = TRUE)
test_dl <- dataloader(test_ds, batch_size = 32)


# Buildifng the network ---------------------------------------------------

net <- nn_module(
"Net",
Expand Down Expand Up @@ -38,28 +53,45 @@ net <- nn_module(
)

model <- net()

# ove model to cuda if it's available
device <- if(cuda_is_available()) "cuda" else "cpu"
model$to(device = device)

# Training loop -----------------------------------------------------------

optimizer <- optim_sgd(model$parameters, lr = 0.01)

epochs <- 10

for (epoch in 1:10) {

pb <- progress::progress_bar$new(
total = length(dl),
total = length(train_dl),
format = "[:bar] :eta Loss: :loss"
)
l <- c()

for (b in enumerate(dl)) {

train_losses <- c()
test_losses <- c()

for (b in enumerate(train_dl)) {
optimizer$zero_grad()
output <- model(b[[1]])
loss <- nnf_nll_loss(output, b[[2]])
output <- model(b[[1]]$to(device = device))
loss <- nnf_nll_loss(output, b[[2]]$to(device = device))
loss$backward()
optimizer$step()
l <- c(l, loss$item())
pb$tick(tokens = list(loss = mean(l)))
train_losses <- c(train_losses, loss$item())
pb$tick(tokens = list(loss = mean(train_losses)))
}

cat(sprintf("Loss at epoch %d: %3f\n", epoch, mean(l)))

for (b in enumerate(test_dl)) {
model$eval()
output <- model(b[[1]]$to(device = device))
loss <- nnf_nll_loss(output, b[[2]]$to(device = device))
test_losses <- c(test_losses, loss$item())
model$train()
}

cat(sprintf("Loss at epoch %d [Train: %3f] [Test: %3f]\n",
epoch, mean(train_losses), mean(test_losses)))
}

Loading