Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

imagenet_r dataset added #285

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion continuum/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from continuum.datasets.cifar100 import CIFAR100
from continuum.datasets.core50 import Core50, Core50v2_79, Core50v2_196, Core50v2_391
from continuum.datasets.fellowship import CIFARFellowship, Fellowship, MNISTFellowship
from continuum.datasets.imagenet import ImageNet100, ImageNet1000, TinyImageNet200
from continuum.datasets.imagenet import ImageNet100, ImageNet1000, TinyImageNet200, ImageNetR
from continuum.datasets.synbols import Synbols
from continuum.datasets.nlp import MultiNLI
from continuum.datasets.pytorch import (
Expand Down
76 changes: 74 additions & 2 deletions continuum/datasets/imagenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

import numpy as np
from torchvision import transforms

import yaml
from continuum.datasets import ImageFolderDataset, _ContinuumDataset
from continuum.download import download, unzip
from continuum.download import download, unzip, untar
from continuum.tasks import TaskType


Expand Down Expand Up @@ -125,6 +125,78 @@ def _parse_subset(
return subset # type: ignore


class ImageNetR(_ContinuumDataset):
"""Imagenet_R dataset.
- 200 classes
- 500 images per class
- size 224x224
"""

url = "https://people.eecs.berkeley.edu/~hendrycks/imagenet-r.tar"
num_classes = 200

def _download(self):
path = os.path.join(self.data_path, "imagenet-r")
if not os.path.exists(path):
if not os.path.exists(f"{path}.tar"):
download(self.url, self.data_path)
untar(f"{path}.tar")

""" Download the yaml files with train and test splits from the CODA-Prompt repository"""
if not os.path.exists(os.path.join(path, 'imagenet-r_train.yaml')):
download('https://raw.githubusercontent.com/GT-RIPL/CODA-Prompt/main/dataloaders/splits/imagenet-r_train.yaml', path)

if not os.path.exists(os.path.join(path, 'imagenet-r_test.yaml')):
download('https://raw.githubusercontent.com/GT-RIPL/CODA-Prompt/main/dataloaders/splits/imagenet-r_test.yaml', path)

if not os.path.exists(os.path.join(path, 'class_mapping.txt')):
download('https://gist.githubusercontent.com/ranarag/6620c8fa7da24e1f56f7cdba88d6343a/raw/1d22f7b4902efa22b77c88879579057ac88feb4d/class_mapping.txt', path)

@property
def data_type(self) -> TaskType:
return TaskType.IMAGE_PATH

def get_data(self) -> Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]]:
path = os.path.join(self.data_path, "imagenet-r")
with open(os.path.join(path, 'class_mapping.txt'), 'r') as fid:
class_mapping = fid.read().split('\n')
class_mapping = {x.split(' ')[0]: x.split(' ')[1] for x in class_mapping}
if self.train:
data_config = yaml.load(open(os.path.join(path,'imagenet-r_train.yaml'), 'r'), \
Loader=yaml.Loader)
else:
data_config = yaml.load(open(os.path.join(path,'imagenet-r_test.yaml'), 'r'), \
Loader=yaml.Loader)

x = []
y = []
self.classes = [" "] * 200
for idx, fname in enumerate(data_config['data']):
data_fname = '/'.join(fname.split('/')[2:])
if self.classes[int(data_config['targets'][idx])] == " ":
self.classes[int(data_config['targets'][idx])] = class_mapping[data_fname.split('/')[0]]
x.append(os.path.join(path, data_fname))
y.append(int(data_config['targets'][idx]))

x = np.array(x)
y = np.array(y)
for i in range(200):
assert self.classes[i] != " ", "Class not found"
return x, y, None


@property
def transformations(self):
"""Default transformations if nothing is provided to the scenario."""
return [
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
]




class TinyImageNet200(_ContinuumDataset):
"""Smaller version of ImageNet.

Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ h5py>=3.1.0
requests>=2.24.0
ImageHash>=4.2.1
datasets>=1.6.0
pyaml
51 changes: 49 additions & 2 deletions tests/test_imagenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest

from continuum.datasets import ImageNet100
from continuum.datasets import ImageNet100, ImageNet_R
from continuum.scenarios import ClassIncremental


Expand All @@ -11,29 +11,58 @@
False: 5000 # test
}


nb_images_per_subset_imagenet_r = {
True: 24000, # train
False: 6000 # test
}
@pytest.fixture
def ImageNet100Test(tmpdir):
folder = os.path.join(tmpdir, "imagenet100test")
os.makedirs(folder)
return ImageNet100(folder, data_subset=None, download=True, train=False)


@pytest.fixture
def ImageNet_RTrain(tmpdir):
folder = os.path.join(tmpdir, "imagenet_rtrain")
os.makedirs(folder)
return ImageNet_R(folder, download=True, train=True)


@pytest.fixture
def ImageNet_RTest(tmpdir):
folder = os.path.join(tmpdir, "imagenet_rtest")
os.makedirs(folder)
return ImageNet_R(folder, download=True, train=False)


@pytest.fixture
def ImageNet100Train(tmpdir):
folder = os.path.join(tmpdir, "imagenet100train")
os.makedirs(folder)
return ImageNet100(folder, data_subset=None, download=True, train=True)


@pytest.mark.parametrize("train", [True, False])
def test_parsing_imagenet_r(ImageNet_RTrain, ImageNet_RTest, train):
dataset = ImageNet_RTrain if train else ImageNet_RTest
x, y, t = dataset.get_data()

assert all("train" if train else "test" in path for path in x)

@pytest.mark.parametrize("train", [True, False])
def test_parsing_imagenet100(ImageNet100Train, ImageNet100Test, train):
dataset = ImageNet100Train if train else ImageNet100Test
x, y, t = dataset.get_data()

assert all("train" if train else "test" in path for path in x)

@pytest.mark.parametrize("train", [True, False])
def test_nb_imagenet_r(ImageNet_RTrain, ImageNet_RTest, train):
dataset = ImageNet_RTrain if train else ImageNet_RTest
x, y, t = dataset.get_data()

assert len(x) == nb_images_per_subset_imagenet_r[train]

@pytest.mark.parametrize("train", [True, False])
def test_nb_imagenet100(ImageNet100Train, ImageNet100Test, train):
Expand All @@ -43,6 +72,24 @@ def test_nb_imagenet100(ImageNet100Train, ImageNet100Test, train):
assert len(x) == nb_images_per_subset[train]


# @pytest.mark.parametrize("train,div", [
# (True, 1), (True, 2),
# (False, 1), (True, 2)
# ])
# def test_customsubset_imagenet_r(ImageNet_RTrain, ImageNet_RTest, train, div):
# dataset = ImageNet_RTrain if train else ImageNet_RTest
# x, y, t = dataset.get_data()

# new_x = x[:len(x) // div]
# new_y = y[:len(y) // div]

# subset = ImageNet_R(dataset.data_path, data_subset=(new_x, new_y), download=False, train=train)
# x2, y2, t2 = subset.get_data()

# assert len(x) // div == len(x2)



@pytest.mark.parametrize("train,div", [
(True, 1), (True, 2),
(False, 1), (True, 2)
Expand Down