From e06e9e67940a8fce21fb8f49fa5d0dc5b0783df9 Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Wed, 22 Nov 2017 18:44:03 +0100 Subject: [PATCH 01/18] utils.transform: Add initial transform utilities. --- keras_retinanet/utils/transform.py | 125 +++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 keras_retinanet/utils/transform.py diff --git a/keras_retinanet/utils/transform.py b/keras_retinanet/utils/transform.py new file mode 100644 index 000000000..5b7c23f36 --- /dev/null +++ b/keras_retinanet/utils/transform.py @@ -0,0 +1,125 @@ +import numpy as np + +DEFAULT_PRNG = np.random + + +def _random_vector(min, max, prng = DEFAULT_PRNG): + """ Construct a random column vector between min and max. + # Arguments + min: the minimum value for each component + max: the maximum value for each component + """ + width = np.array(max) - np.array(min) + return prng.uniform(0, 1, (2, 1)) * width + min + + +def rotation(angle): + """ Construct a homogeneous 2D rotation matrix. + # Arguments + angle: the angle in radians + # Returns + the rotation matrix as 3 by 3 numpy array + """ + return np.array([ + [np.cos(angle), -np.sin(angle), 0], + [np.sin(angle), np.cos(angle), 0], + [0, 0, 1] + ]) + + +def random_rotation(min, max, prng = DEFAULT_PRNG): + """ Construct a random rotation between -max and max. + # Arguments + max: the maximum absolute angle in radians + # Returns + a homogeneous 3 by 3 rotation matrix + """ + return rotation(prng.uniform(min, max)) + + +def translation(translation): + """ Construct a homogeneous 2D translation matrix. + # Arguments + translation: the translation as column vector + # Returns + the translation matrix as 3 by 3 numpy array + """ + return np.array([ + [1, 0, translation[0, 0]], + [0, 1, translation[1, 0]], + [0, 0, 1] + ]) + + +def random_translation(min, max, prng = DEFAULT_PRNG): + """ Construct a random 2D translation between min and max. + # Arguments + min: the minumum translation for each dimension + max: the maximum translation for each dimension + # Returns + a homogeneous 3 by 3 translation matrix + """ + assert min.shape == (2, 1) + assert max.shape == (2, 1) + return translation(_random_vector(min, max, prng)) + + +def shear(amount): + """ Construct a homogeneous 2D shear matrix. + # Arguments + amount: the shear amount + # Returns + the shear matrix as 3 by 3 numpy array + """ + return np.array([ + [1, -np.sin(amount), 0], + [0, np.cos(amount), 0], + [0, 0, 1] + ]) + + +def random_shear(min, max, prng = DEFAULT_PRNG): + """ Construct a random 2D shear matrix with shear angle between -max and max. + # Arguments + amount: the max shear amount + # Returns + a homogeneous 3 by 3 shear matrix + """ + return shear(prng.uniform(min, max)) + + +def scaling(factor): + """ Construct a homogeneous 2D scaling matrix. + # Arguments + factor: a 2D vector for X and Y scaling + # Returns + the zoom matrix as 3 by 3 numpy array + """ + return np.array([ + [factor[0, 0], 0, 0], + [0, factor[1, 0], 0], + [0, 0, 1] + ]) + + +def random_scaling(min, max, prng = DEFAULT_PRNG): + """ Construct a random 2D scale matrix between -max and max. + # Arguments + factor: a 2D vector for maximum X and Y scaling + # Returns + a homogeneous 3 by 3 scaling matrix + """ + assert min.shape == (2, 1) + assert max.shape == (2, 1) + return scaling(_random_vector(min, max, prng)) + + +def transform_around(transform, center): + """ Get a transform applying the given transform with a different origin. + # Arguments: + transform: the transformation matrix + center: the origin of the transformation + # Return: + translate(center) * transform * translate(-center) + """ + return np.dot(np.dot(translation(center), transform), translation(-center)) From 5666e064357e674ead47611a28e6dc24da1c550c Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Wed, 13 Dec 2017 21:30:04 +0100 Subject: [PATCH 02/18] utils.transform: Add random_transform and friends. --- keras_retinanet/utils/transform.py | 101 +++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/keras_retinanet/utils/transform.py b/keras_retinanet/utils/transform.py index 5b7c23f36..fa09a9b17 100644 --- a/keras_retinanet/utils/transform.py +++ b/keras_retinanet/utils/transform.py @@ -123,3 +123,104 @@ def transform_around(transform, center): translate(center) * transform * translate(-center) """ return np.dot(np.dot(translation(center), transform), translation(-center)) + + +def random_transform( + min_rotation, + max_rotation, + min_translation, + max_translation, + min_shear, + max_shear, + min_scaling, + max_scaling, + flip_x_chance, + flip_y_chance, + prng = DEFAULT_PRNG +): + """ Create a random transformation. + + The transformation consists of the following operations in this order (from left to right): + * rotation + * translation + * shear + * scaling + * flip x (if applied) + * flip y (if applied) + + # Arguments + min_rotation: The minimum rotation for the transform as scalar. + max_rotation: The maximum rotation for the transform as scalar. + min_translation: The minimum translation for the transform as 2D column vector. + max_translation: The maximum translation for the transform as 2D column vector. + min_shear: The minimum shear for the transform as scalar. + max_shear: The maximum shear for the transform as scalar. + min_scaling: The minimum scaling for the transform as 2D column vector. + max_scaling: The maximum scaling for the transform as 2D column vector. + flip_x_chance: The chance (0 to 1) that a transform will contain a flip along X direction. + flip_y_chance: The chance (0 to 1) that a transform will contain a flip along Y direction. + prng: The pseudo-random number generator to use. + """ + result = random_rotation(min_rotation, max_rotation, prng) + result = np.dot(result, random_translation(min_translation, max_translation, prng)) + result = np.dot(result, random_shear(min_shear, max_shear, prng)) + result = np.dot(result, random_scaling(min_scaling, max_scaling, prng)) + flip_x = prng.uniform(0, 1) < flip_x_chance + flip_y = prng.uniform(0, 1) < flip_y_chance + result = np.dot(result, scaling(vec2(-1 if flip_x else 1, -1 if flip_y else 1))) + return result + + +def random_transform_generator(*args, **kwargs): + """ Create a random transform generator with the same arugments as `random_transform`. """ + while True: + yield random_transform(*args, **kwargs) + + +def _translate_image_data_generator_params(image_data_generator): + """ Translate the properties of a Keras ImageDataGenerator to keyword arguments for random_transform() """ + + +def random_transform_from_image_data_generator(image_data_generator, prng = DEFAULT_PRNG): + """ Create a random transform using the same parameters as a Keras ImageDataGenerator. + + Note that the image dimensions are unknown at this points, + so the transform origin should be modified to the image center before using it. + Additionally, the translation is relative to the image size. + You can use `transform_image` to fix these details for you. + + # Arguments + image_data_generator: The Keras ImageDataGenerator to mimick. + prng: The speudo-random number generator to use. + """ + rotation = image_data_generator.rotation_range + translation = np.array([[image_data_generator.width_shift_range], [image_data_generator.height_shift_range]]) + shear = image_data_generator.shear_range + min_zoom = image_data_generator.zoom_range[0] + max_zoom = image_data_generator.zoom_range[1] + flip_x = 0.5 if image_data_generator.horizontal_flip else 0 + flip_y = 0.5 if image_data_generator.vertical_flip else 0 + + return random_transform( + min_rotation = -rotation, + max_rotation = +rotation, + min_translation = -translation, + max_translation = +translation, + min_shear = -shear, + max_shear = +shear, + min_scaling = vec2(min_zoom, min_zoom), + max_scaling = vec2(max_zoom, max_zoom), + flip_x_chance = flip_x, + flip_y_chance = flip_y, + prng = prng + ) + + +def random_transform_generator_from_image_data_generator(image_data_generator, prng = DEFAULT_PRNG): + """ Create a random transform generator that mimicks a Keras ImageDataGenerator. + # Arguments + image_data_generator: The Keras ImageDataGenerator to mimick. + prng: The speudo-random number generator to use. + """ + while True: + yield random_transform_from_image_data_generator(image_data_generator, prng) From 200685b64242d2108bc4e147a4a71494db16d8c8 Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Wed, 13 Dec 2017 23:42:15 +0100 Subject: [PATCH 03/18] utils.transform: Add colvec() helper. --- keras_retinanet/utils/transform.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/keras_retinanet/utils/transform.py b/keras_retinanet/utils/transform.py index fa09a9b17..061b6a2de 100644 --- a/keras_retinanet/utils/transform.py +++ b/keras_retinanet/utils/transform.py @@ -3,6 +3,11 @@ DEFAULT_PRNG = np.random +def colvec(*args): + """ Create a numpy array representing a column vector. """ + return np.array([args]).T + + def _random_vector(min, max, prng = DEFAULT_PRNG): """ Construct a random column vector between min and max. # Arguments @@ -208,8 +213,8 @@ def random_transform_from_image_data_generator(image_data_generator, prng = DEFA max_translation = +translation, min_shear = -shear, max_shear = +shear, - min_scaling = vec2(min_zoom, min_zoom), - max_scaling = vec2(max_zoom, max_zoom), + min_scaling = colvec(min_zoom, min_zoom), + max_scaling = colvec(max_zoom, max_zoom), flip_x_chance = flip_x, flip_y_chance = flip_y, prng = prng From 79150d4316a16046330286d5f5fbcd4754a892d6 Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Wed, 13 Dec 2017 23:42:44 +0100 Subject: [PATCH 04/18] utils.transform: Add transform_aabb function. --- keras_retinanet/utils/transform.py | 33 ++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/keras_retinanet/utils/transform.py b/keras_retinanet/utils/transform.py index 061b6a2de..65d8cf7db 100644 --- a/keras_retinanet/utils/transform.py +++ b/keras_retinanet/utils/transform.py @@ -8,6 +8,39 @@ def colvec(*args): return np.array([args]).T +def transform_aabb(transform, x1, y1, x2, y2): + """ Apply a transformation to an axis aligned bounding box. + + The result is a new AABB in the same coordinate system + containing all tranformed points of the original AABB. + + # Arguments + transform: The transormation to apply. + x1: The minimum X value of the AABB. + y1: The minimum y value of the AABB. + x2: The maximum X value of the AABB. + y2: The maximum y value of the AABB. + # Returns + The new AABB as tuple (x1, y1, x2, y2) + """ + # Point 2 is not within the AABB itself. + x2 -= 1 + y2 -= 1 + # Transform all 4 corners of the AABB. + points = transform.dot([ + [x1, x2, x1, x2], + [y1, y2, y2, y1], + [1, 1, 1, 1 ], + ]) + + # Extract the min and max corners again. + min_corner = points.min(axis=1) + max_corner = points.max(axis=1) + + # Make point 2 exclusive again. + return min_corner[0], min_corner[1], max_corner[0] + 1, max_corner[1] + 1 + + def _random_vector(min, max, prng = DEFAULT_PRNG): """ Construct a random column vector between min and max. # Arguments From b311108c7ca399ef25c3917c681dc0c195046f4e Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Wed, 13 Dec 2017 23:43:12 +0100 Subject: [PATCH 05/18] utils.transform: Rename transform_around to change_transform_origin. --- keras_retinanet/utils/transform.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/keras_retinanet/utils/transform.py b/keras_retinanet/utils/transform.py index 65d8cf7db..e46f69487 100644 --- a/keras_retinanet/utils/transform.py +++ b/keras_retinanet/utils/transform.py @@ -152,11 +152,11 @@ def random_scaling(min, max, prng = DEFAULT_PRNG): return scaling(_random_vector(min, max, prng)) -def transform_around(transform, center): - """ Get a transform applying the given transform with a different origin. +def change_transform_origin(transform, center): + """ Create a new transform with the origin at a different location. # Arguments: transform: the transformation matrix - center: the origin of the transformation + center: the new origin of the transformation # Return: translate(center) * transform * translate(-center) """ From 9063cbb4fcfee064a5cdeaeb2f2ab23ebe910c51 Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Wed, 13 Dec 2017 23:47:03 +0100 Subject: [PATCH 06/18] Use utils.transform for image augmentation. --- keras_retinanet/preprocessing/generator.py | 16 +++++-- keras_retinanet/utils/image.py | 55 ++++++++++------------ 2 files changed, 38 insertions(+), 33 deletions(-) diff --git a/keras_retinanet/preprocessing/generator.py b/keras_retinanet/preprocessing/generator.py index d94d86b59..a618469db 100644 --- a/keras_retinanet/preprocessing/generator.py +++ b/keras_retinanet/preprocessing/generator.py @@ -22,8 +22,11 @@ import keras -from ..utils.image import preprocess_image, resize_image, random_transform from ..utils.anchors import anchor_targets_bbox +from ..utils.image import apply_transform, preprocess_image, resize_image + +from ..utils.transform import DEFAULT_PRNG +from ..utils.transform import random_transform_generator_from_image_data_generator class Generator(object): @@ -35,6 +38,7 @@ def __init__( shuffle_groups=True, image_min_side=600, image_max_side=1024, + prng=DEFAULT_PRNG, seed=None ): self.image_data_generator = image_data_generator @@ -44,9 +48,11 @@ def __init__( self.image_min_side = image_min_side self.image_max_side = image_max_side + self.transform_generator = random_transform_generator_from_image_data_generator(image_data_generator) + if seed is None: seed = np.uint32((time.time() % 1)) * 1000 - np.random.seed(seed) + prng.seed(seed) self.group_index = 0 self.lock = threading.Lock() @@ -118,7 +124,11 @@ def preprocess_group(self, image_group, annotations_group): image = self.preprocess_image(image) # randomly transform both image and annotations - image, annotations = random_transform(image, annotations, self.image_data_generator) + transform = next(self.transform_generator) + channel_axis = self.image_data_generator.channel_axis - 1 # no batch axis + fill_mode = self.image_data_generator.fill_mode + cval = self.image_data_generator.cval + image, annotations = apply_transform(transform, image, annotations, channel_axis=channel_axis, fill_mode=fill_mode, cval=cval) # resize image image, image_scale = self.resize_image(image) diff --git a/keras_retinanet/utils/image.py b/keras_retinanet/utils/image.py index c523e8c82..beb5c36a8 100644 --- a/keras_retinanet/utils/image.py +++ b/keras_retinanet/utils/image.py @@ -21,6 +21,8 @@ import cv2 import PIL +from .transform import change_transform_origin, transform_aabb, colvec + def read_image_bgr(path): image = np.asarray(PIL.Image.open(path).convert('RGB')) @@ -48,44 +50,37 @@ def preprocess_image(x): return x -def random_transform( - image, - boxes, - image_data_generator, - seed=None -): - if seed is None: - seed = np.random.randint(10000) +def adjust_transform_for_image(image, transform): + """ Adjust a transformation for a specific image. + + The translation of the matrix will be scaled with the size of the image. + The linear part of the transformation will adjusted so that the origin of the transformation will be at the center of the image. + """ + height, width, channels = image.shape - image = image_data_generator.random_transform(image, seed=seed) + # Move the origin of transformation. + result = change_transform_origin(transform, colvec(width, height) * 0.5) - # set fill mode so that masks are not enlarged - fill_mode = image_data_generator.fill_mode - image_data_generator.fill_mode = 'constant' + # Scale the translation with the image size. + result[0:2, 2] *= [width, height] - for index in range(boxes.shape[0]): - # generate box mask and randomly transform it - mask = np.zeros_like(image, dtype=np.uint8) - b = boxes[index, :4].astype(int) + return result - assert(b[0] < b[2] and b[1] < b[3]), 'Annotations contain invalid box: {}'.format(b) - assert(b[2] <= image.shape[1] and b[3] <= image.shape[0]), 'Annotation ({}) is outside of image shape ({}).'.format(b, image.shape) - mask[b[1]:b[3], b[0]:b[2], :] = 255 - mask = image_data_generator.random_transform(mask, seed=seed)[..., 0] - mask = mask.copy() # to force contiguous arrays +def apply_transform(transform, image, annotations, channel_axis, fill_mode, cval): + # Update transform for image size. + transform = adjust_transform_for_image(image, transform) - # find bounding box again in augmented image - [i, j] = np.where(mask == 255) - boxes[index, 0] = float(min(j)) - boxes[index, 1] = float(min(i)) - boxes[index, 2] = float(max(j)) + 1 # set box to an open interval [min, max) - boxes[index, 3] = float(max(i)) + 1 # set box to an open interval [min, max) + # Transform the image itself. + image = keras.preprocessing.image.apply_transform(image, transform, channel_axis=channel_axis, fill_mode=fill_mode, cval=cval) - # restore fill_mode - image_data_generator.fill_mode = fill_mode + # Transform the bounding boxes in the annotations. + annotations = annotations.copy() + for index in range(annotations.shape[0]): + box = annotations[index, :4] + annotations[index, :4] = transform_aabb(transform, box[0], box[1], box[2], box[3]) - return image, boxes + return image, annotations def resize_image(img, min_side=600, max_side=1024): From 588f9c98832d4b7c30d72fdc2dc987e7e3913a2a Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Thu, 14 Dec 2017 22:54:21 +0100 Subject: [PATCH 07/18] utils.transform: Improve documentation. --- keras_retinanet/utils/transform.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/keras_retinanet/utils/transform.py b/keras_retinanet/utils/transform.py index e46f69487..607ce90cd 100644 --- a/keras_retinanet/utils/transform.py +++ b/keras_retinanet/utils/transform.py @@ -11,8 +11,8 @@ def colvec(*args): def transform_aabb(transform, x1, y1, x2, y2): """ Apply a transformation to an axis aligned bounding box. - The result is a new AABB in the same coordinate system - containing all tranformed points of the original AABB. + The result is a new AABB in the same coordinate system as the original AABB. + The new AABB contains all corner points of the original AABB after applying the given transformation. # Arguments transform: The transormation to apply. @@ -23,7 +23,7 @@ def transform_aabb(transform, x1, y1, x2, y2): # Returns The new AABB as tuple (x1, y1, x2, y2) """ - # Point 2 is not within the AABB itself. + # Point x2,y2 is not within the AABB itself. x2 -= 1 y2 -= 1 # Transform all 4 corners of the AABB. @@ -68,7 +68,9 @@ def rotation(angle): def random_rotation(min, max, prng = DEFAULT_PRNG): """ Construct a random rotation between -max and max. # Arguments - max: the maximum absolute angle in radians + min: a scalar for the minumum absolute angle in radians + max: a scalar for the maximum absolute angle in radians + prng: the pseudo-random number generator to use. # Returns a homogeneous 3 by 3 rotation matrix """ @@ -92,8 +94,9 @@ def translation(translation): def random_translation(min, max, prng = DEFAULT_PRNG): """ Construct a random 2D translation between min and max. # Arguments - min: the minumum translation for each dimension - max: the maximum translation for each dimension + min: a 2D column vector with the minumum translation for each dimension + max: a 2D column vector with the maximum translation for each dimension + prng: the pseudo-random number generator to use. # Returns a homogeneous 3 by 3 translation matrix """ @@ -119,7 +122,9 @@ def shear(amount): def random_shear(min, max, prng = DEFAULT_PRNG): """ Construct a random 2D shear matrix with shear angle between -max and max. # Arguments - amount: the max shear amount + min: the minumum shear factor. + max: the maximum shear factor. + prng: the pseudo-random number generator to use. # Returns a homogeneous 3 by 3 shear matrix """ @@ -129,7 +134,7 @@ def random_shear(min, max, prng = DEFAULT_PRNG): def scaling(factor): """ Construct a homogeneous 2D scaling matrix. # Arguments - factor: a 2D vector for X and Y scaling + factor: a 2D column vector for X and Y scaling # Returns the zoom matrix as 3 by 3 numpy array """ @@ -143,7 +148,9 @@ def scaling(factor): def random_scaling(min, max, prng = DEFAULT_PRNG): """ Construct a random 2D scale matrix between -max and max. # Arguments - factor: a 2D vector for maximum X and Y scaling + min: a 2D column vector containing the minimum scaling factor for X and Y. + min: a 2D column vector containing The maximum scaling factor for X and Y. + prng: the pseudo-random number generator to use. # Returns a homogeneous 3 by 3 scaling matrix """ From 6acf9678523e58bad34af474af3604a3cfbbcf05 Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Thu, 14 Dec 2017 22:54:48 +0100 Subject: [PATCH 08/18] utils.transform: Split out random_flip. --- keras_retinanet/utils/transform.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/keras_retinanet/utils/transform.py b/keras_retinanet/utils/transform.py index 607ce90cd..d1d8706fe 100644 --- a/keras_retinanet/utils/transform.py +++ b/keras_retinanet/utils/transform.py @@ -159,6 +159,21 @@ def random_scaling(min, max, prng = DEFAULT_PRNG): return scaling(_random_vector(min, max, prng)) +def random_flip(flip_x_chance, flip_y_chance, prng = DEFAULT_PRNG): + """ Construct a transformation randomly containing X/Y flips (or not). + # Arguments + flip_x_chance: The chance that the result will contain a flip along the X axis. + flip_y_chance: The chance that the result will contain a flip along the Y axis. + prng: The pseudo-random number generator to use. + # Returns + a homogeneous 3 by 3 transformation matrix + """ + flip_x = prng.uniform(0, 1) < flip_x_chance + flip_y = prng.uniform(0, 1) < flip_y_chance + # 1 - 2 * bool gives 1 for False and -1 for True. + return scaling(colvec(1 - 2 * flip_x, 1 - 2 * flip_y)) + + def change_transform_origin(transform, center): """ Create a new transform with the origin at a different location. # Arguments: From e28fb7d5456fd65af54373c5dafd329a1b1c0599 Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Thu, 14 Dec 2017 22:55:07 +0100 Subject: [PATCH 09/18] utils.transform: Use np.linalg.multi_dot. --- keras_retinanet/utils/transform.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/keras_retinanet/utils/transform.py b/keras_retinanet/utils/transform.py index d1d8706fe..3f014a071 100644 --- a/keras_retinanet/utils/transform.py +++ b/keras_retinanet/utils/transform.py @@ -221,14 +221,13 @@ def random_transform( flip_y_chance: The chance (0 to 1) that a transform will contain a flip along Y direction. prng: The pseudo-random number generator to use. """ - result = random_rotation(min_rotation, max_rotation, prng) - result = np.dot(result, random_translation(min_translation, max_translation, prng)) - result = np.dot(result, random_shear(min_shear, max_shear, prng)) - result = np.dot(result, random_scaling(min_scaling, max_scaling, prng)) - flip_x = prng.uniform(0, 1) < flip_x_chance - flip_y = prng.uniform(0, 1) < flip_y_chance - result = np.dot(result, scaling(vec2(-1 if flip_x else 1, -1 if flip_y else 1))) - return result + return np.linalg.multi_dot([ + random_rotation(min_rotation, max_rotation, prng), + random_translation(min_translation, max_translation, prng), + random_shear(min_shear, max_shear, prng), + random_scaling(min_scaling, max_scaling, prng), + random_flip(flip_x_chance, flip_x_chance) + ]) def random_transform_generator(*args, **kwargs): From 7e05b02fabfc387ad48ec23a66c423fccf618a44 Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Thu, 14 Dec 2017 23:21:21 +0100 Subject: [PATCH 10/18] utils.transform: Add defaults to generate identity in random_transform. --- keras_retinanet/utils/transform.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/keras_retinanet/utils/transform.py b/keras_retinanet/utils/transform.py index 3f014a071..0a474306d 100644 --- a/keras_retinanet/utils/transform.py +++ b/keras_retinanet/utils/transform.py @@ -186,16 +186,16 @@ def change_transform_origin(transform, center): def random_transform( - min_rotation, - max_rotation, - min_translation, - max_translation, - min_shear, - max_shear, - min_scaling, - max_scaling, - flip_x_chance, - flip_y_chance, + min_rotation=0, + max_rotation=0, + min_translation=colvec(0, 0), + max_translation=colvec(0, 0), + min_shear=0, + max_shear=0, + min_scaling=colvec(1, 1), + max_scaling=colvec(1, 1), + flip_x_chance=0, + flip_y_chance=0, prng = DEFAULT_PRNG ): """ Create a random transformation. From b097c96ce2db1b57b772290b67af61d1c942c13e Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Fri, 15 Dec 2017 00:18:22 +0100 Subject: [PATCH 11/18] Remove use of keras.preprocessing.image.ImageDataGenerator. --- keras_retinanet/bin/evaluate_coco.py | 4 -- keras_retinanet/bin/train.py | 17 ++--- keras_retinanet/preprocessing/coco.py | 4 +- .../preprocessing/csv_generator.py | 3 +- keras_retinanet/preprocessing/generator.py | 64 +++++++++++-------- keras_retinanet/preprocessing/pascal_voc.py | 3 +- keras_retinanet/utils/image.py | 52 +++++++++++---- keras_retinanet/utils/transform.py | 45 ------------- tests/preprocessing/test_generator.py | 3 +- 9 files changed, 85 insertions(+), 110 deletions(-) diff --git a/keras_retinanet/bin/evaluate_coco.py b/keras_retinanet/bin/evaluate_coco.py index cb0203184..fcc88efa0 100755 --- a/keras_retinanet/bin/evaluate_coco.py +++ b/keras_retinanet/bin/evaluate_coco.py @@ -72,14 +72,10 @@ def main(args=None): print('Loading model, this may take a second...') model = keras.models.load_model(args.model, custom_objects=custom_objects) - # create image data generator object - test_image_data_generator = keras.preprocessing.image.ImageDataGenerator() - # create a generator for testing data test_generator = CocoGenerator( args.coco_path, args.set, - test_image_data_generator, ) evaluate_coco(test_generator, model, args.score_threshold) diff --git a/keras_retinanet/bin/train.py b/keras_retinanet/bin/train.py index 06d87c494..0fdcdff36 100755 --- a/keras_retinanet/bin/train.py +++ b/keras_retinanet/bin/train.py @@ -38,6 +38,7 @@ from ..preprocessing.pascal_voc import PascalVocGenerator from ..preprocessing.csv_generator import CSVGenerator from ..models.resnet import resnet50_retinanet +from ..utils.transform import random_transform_generator from ..utils.keras_version import check_keras_version @@ -109,11 +110,8 @@ def create_callbacks(model, training_model, prediction_model, validation_generat def create_generators(args): - # create image data generator objects - train_image_data_generator = keras.preprocessing.image.ImageDataGenerator( - horizontal_flip=True, - ) - val_image_data_generator = keras.preprocessing.image.ImageDataGenerator() + # create random transform generator for augmenting training data + transform_generator = random_transform_generator(flip_x_chance=0.5) if args.dataset_type == 'coco': # import here to prevent unnecessary dependency on cocoapi @@ -122,35 +120,33 @@ def create_generators(args): train_generator = CocoGenerator( args.coco_path, 'train2017', - train_image_data_generator, + transform_generator=transform_generator, batch_size=args.batch_size ) validation_generator = CocoGenerator( args.coco_path, 'val2017', - val_image_data_generator, batch_size=args.batch_size ) elif args.dataset_type == 'pascal': train_generator = PascalVocGenerator( args.pascal_path, 'trainval', - train_image_data_generator, + transform_generator=transform_generator, batch_size=args.batch_size ) validation_generator = PascalVocGenerator( args.pascal_path, 'test', - val_image_data_generator, batch_size=args.batch_size ) elif args.dataset_type == 'csv': train_generator = CSVGenerator( args.annotations, args.classes, - train_image_data_generator, + transform_generator=transform_generator, batch_size=args.batch_size ) @@ -158,7 +154,6 @@ def create_generators(args): validation_generator = CSVGenerator( args.val_annotations, args.classes, - val_image_data_generator, batch_size=args.batch_size ) else: diff --git a/keras_retinanet/preprocessing/coco.py b/keras_retinanet/preprocessing/coco.py index 94e5195fb..fc0637100 100644 --- a/keras_retinanet/preprocessing/coco.py +++ b/keras_retinanet/preprocessing/coco.py @@ -24,7 +24,7 @@ class CocoGenerator(Generator): - def __init__(self, data_dir, set_name, image_data_generator, *args, **kwargs): + def __init__(self, data_dir, set_name, **kwargs): self.data_dir = data_dir self.set_name = set_name self.coco = COCO(os.path.join(data_dir, 'annotations', 'instances_' + set_name + '.json')) @@ -32,7 +32,7 @@ def __init__(self, data_dir, set_name, image_data_generator, *args, **kwargs): self.load_classes() - super(CocoGenerator, self).__init__(image_data_generator, **kwargs) + super(CocoGenerator, self).__init__(**kwargs) def load_classes(self): # load class names (name -> label) diff --git a/keras_retinanet/preprocessing/csv_generator.py b/keras_retinanet/preprocessing/csv_generator.py index abb02c414..0e2c22df9 100644 --- a/keras_retinanet/preprocessing/csv_generator.py +++ b/keras_retinanet/preprocessing/csv_generator.py @@ -108,7 +108,6 @@ def __init__( self, csv_data_file, csv_class_file, - image_data_generator, base_dir=None, **kwargs ): @@ -139,7 +138,7 @@ def __init__( raise_from(ValueError('invalid CSV annotations file: {}: {}'.format(csv_data_file, e)), None) self.image_names = list(self.image_data.keys()) - super(CSVGenerator, self).__init__(image_data_generator, **kwargs) + super(CSVGenerator, self).__init__(**kwargs) def size(self): return len(self.image_names) diff --git a/keras_retinanet/preprocessing/generator.py b/keras_retinanet/preprocessing/generator.py index a618469db..17a3de75e 100644 --- a/keras_retinanet/preprocessing/generator.py +++ b/keras_retinanet/preprocessing/generator.py @@ -23,36 +23,34 @@ import keras from ..utils.anchors import anchor_targets_bbox -from ..utils.image import apply_transform, preprocess_image, resize_image - -from ..utils.transform import DEFAULT_PRNG -from ..utils.transform import random_transform_generator_from_image_data_generator +from ..utils.image import ( + TransformParameters, + adjust_transform_for_image, + apply_transform, + preprocess_image, + resize_image, +) +from ..utils.transform import transform_aabb class Generator(object): def __init__( self, - image_data_generator, + transform_generator = None, batch_size=1, group_method='ratio', # one of 'none', 'random', 'ratio' shuffle_groups=True, image_min_side=600, image_max_side=1024, - prng=DEFAULT_PRNG, - seed=None + transform_parameters=None, ): - self.image_data_generator = image_data_generator + self.transform_generator = transform_generator self.batch_size = int(batch_size) self.group_method = group_method self.shuffle_groups = shuffle_groups self.image_min_side = image_min_side self.image_max_side = image_max_side - - self.transform_generator = random_transform_generator_from_image_data_generator(image_data_generator) - - if seed is None: - seed = np.uint32((time.time() % 1)) * 1000 - prng.seed(seed) + self.transform_parameters = transform_parameters or TransformParameters() self.group_index = 0 self.lock = threading.Lock() @@ -118,23 +116,33 @@ def resize_image(self, image): def preprocess_image(self, image): return preprocess_image(image) - def preprocess_group(self, image_group, annotations_group): - for index, (image, annotations) in enumerate(zip(image_group, annotations_group)): - # preprocess the image (subtract imagenet mean) - image = self.preprocess_image(image) + def preprocess_group_entry(self, image, annotations): + # preprocess the image + image = self.preprocess_image(image) - # randomly transform both image and annotations - transform = next(self.transform_generator) - channel_axis = self.image_data_generator.channel_axis - 1 # no batch axis - fill_mode = self.image_data_generator.fill_mode - cval = self.image_data_generator.cval - image, annotations = apply_transform(transform, image, annotations, channel_axis=channel_axis, fill_mode=fill_mode, cval=cval) + # randomly transform both image and annotations + if self.transform_generator is not None: + transform = adjust_transform_for_image(next(self.transform_generator), image) + apply_transform(transform, image, self.transform_parameters) - # resize image - image, image_scale = self.resize_image(image) + # Transform the bounding boxes in the annotations. + annotations = annotations.copy() + for index in range(annotations.shape[0]): + box = annotations[index, :4] + annotations[index, :4] = transform_aabb(transform, box[0], box[1], box[2], box[3]) - # apply resizing to annotations too - annotations[:, :4] *= image_scale + # resize image + image, image_scale = self.resize_image(image) + + # apply resizing to annotations too + annotations[:, :4] *= image_scale + + return image, annotations + + def preprocess_group(self, image_group, annotations_group): + for index, (image, annotations) in enumerate(zip(image_group, annotations_group)): + # preprocess a single group entry + image, annotations = self.preprocess_group_entry(image, annotations) # copy processed data back to group image_group[index] = image diff --git a/keras_retinanet/preprocessing/pascal_voc.py b/keras_retinanet/preprocessing/pascal_voc.py index 8db5f8f14..e273aacb9 100644 --- a/keras_retinanet/preprocessing/pascal_voc.py +++ b/keras_retinanet/preprocessing/pascal_voc.py @@ -71,7 +71,6 @@ def __init__( self, data_dir, set_name, - image_data_generator, classes=voc_classes, image_extension='.jpg', skip_truncated=False, @@ -90,7 +89,7 @@ def __init__( for key, value in self.classes.items(): self.labels[value] = key - super(PascalVocGenerator, self).__init__(image_data_generator, **kwargs) + super(PascalVocGenerator, self).__init__(**kwargs) def size(self): return len(self.image_names) diff --git a/keras_retinanet/utils/image.py b/keras_retinanet/utils/image.py index beb5c36a8..d73fd5267 100644 --- a/keras_retinanet/utils/image.py +++ b/keras_retinanet/utils/image.py @@ -50,7 +50,7 @@ def preprocess_image(x): return x -def adjust_transform_for_image(image, transform): +def adjust_transform_for_image(transform, image): """ Adjust a transformation for a specific image. The translation of the matrix will be scaled with the size of the image. @@ -67,20 +67,44 @@ def adjust_transform_for_image(image, transform): return result -def apply_transform(transform, image, annotations, channel_axis, fill_mode, cval): - # Update transform for image size. - transform = adjust_transform_for_image(image, transform) +class TransformParameters: + """ Struct holding parameters determining how to transform images. - # Transform the image itself. - image = keras.preprocessing.image.apply_transform(image, transform, channel_axis=channel_axis, fill_mode=fill_mode, cval=cval) - - # Transform the bounding boxes in the annotations. - annotations = annotations.copy() - for index in range(annotations.shape[0]): - box = annotations[index, :4] - annotations[index, :4] = transform_aabb(transform, box[0], box[1], box[2], box[3]) - - return image, annotations + # Arguments + fill_mode: Same as for keras.preprocessing.image.apply_transform + cval: Same as for keras.preprocessing.image.apply_transform + data_format: Same as for keras.preprocessing.image.apply_transform + """ + def __init__( + self, + fill_mode = 'nearest', + cval = 0, + data_format = None, + ): + self.fill_mode = fill_mode + self.cval = cval + + if data_format is None: + data_format = keras.backend.image_data_format() + self.data_format = data_format + + if data_format == 'channels_first': + self.channel_axis = 0 + elif data_format == 'channels_last': + self.channel_axis = 2 + else: + raise ValueError("invalid data_format, expected 'channels_first' or 'channels_last', got '{}'".format(data_format)) + + +def apply_transform(transform, image, params): + """ Wrapper around keras.preprocessing.image.apply_transform using TransformParameters. """ + return keras.preprocessing.image.apply_transform( + image, + transform, + channel_axis=params.channel_axis, + fill_mode=params.fill_mode, + cval=params.cval + ) def resize_image(img, min_side=600, max_side=1024): diff --git a/keras_retinanet/utils/transform.py b/keras_retinanet/utils/transform.py index 0a474306d..f5a531fe4 100644 --- a/keras_retinanet/utils/transform.py +++ b/keras_retinanet/utils/transform.py @@ -236,50 +236,5 @@ def random_transform_generator(*args, **kwargs): yield random_transform(*args, **kwargs) -def _translate_image_data_generator_params(image_data_generator): - """ Translate the properties of a Keras ImageDataGenerator to keyword arguments for random_transform() """ - -def random_transform_from_image_data_generator(image_data_generator, prng = DEFAULT_PRNG): - """ Create a random transform using the same parameters as a Keras ImageDataGenerator. - - Note that the image dimensions are unknown at this points, - so the transform origin should be modified to the image center before using it. - Additionally, the translation is relative to the image size. - You can use `transform_image` to fix these details for you. - - # Arguments - image_data_generator: The Keras ImageDataGenerator to mimick. - prng: The speudo-random number generator to use. - """ - rotation = image_data_generator.rotation_range - translation = np.array([[image_data_generator.width_shift_range], [image_data_generator.height_shift_range]]) - shear = image_data_generator.shear_range - min_zoom = image_data_generator.zoom_range[0] - max_zoom = image_data_generator.zoom_range[1] - flip_x = 0.5 if image_data_generator.horizontal_flip else 0 - flip_y = 0.5 if image_data_generator.vertical_flip else 0 - - return random_transform( - min_rotation = -rotation, - max_rotation = +rotation, - min_translation = -translation, - max_translation = +translation, - min_shear = -shear, - max_shear = +shear, - min_scaling = colvec(min_zoom, min_zoom), - max_scaling = colvec(max_zoom, max_zoom), - flip_x_chance = flip_x, - flip_y_chance = flip_y, - prng = prng - ) - - -def random_transform_generator_from_image_data_generator(image_data_generator, prng = DEFAULT_PRNG): - """ Create a random transform generator that mimicks a Keras ImageDataGenerator. - # Arguments - image_data_generator: The Keras ImageDataGenerator to mimick. - prng: The speudo-random number generator to use. - """ while True: - yield random_transform_from_image_data_generator(image_data_generator, prng) diff --git a/tests/preprocessing/test_generator.py b/tests/preprocessing/test_generator.py index d28e0212b..2740c4300 100644 --- a/tests/preprocessing/test_generator.py +++ b/tests/preprocessing/test_generator.py @@ -15,7 +15,6 @@ """ import keras.backend -from keras.preprocessing.image import ImageDataGenerator from keras_retinanet.preprocessing.generator import Generator import numpy as np @@ -27,7 +26,7 @@ def __init__(self, annotations_group, num_classes=0, image=None): self.annotations_group = annotations_group self.num_classes_ = num_classes self.image = image - super(SimpleGenerator, self).__init__(ImageDataGenerator(), group_method='none', shuffle_groups=False) + super(SimpleGenerator, self).__init__(group_method='none', shuffle_groups=False) def num_classes(self): return self.num_classes_ From 201eac80833f1931e9e24719008044fa66ed268f Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Fri, 15 Dec 2017 00:25:29 +0100 Subject: [PATCH 12/18] utils.transform: Make random_transform_generator use a dedicated PRNG. --- keras_retinanet/utils/transform.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/keras_retinanet/utils/transform.py b/keras_retinanet/utils/transform.py index f5a531fe4..8b0ba40cb 100644 --- a/keras_retinanet/utils/transform.py +++ b/keras_retinanet/utils/transform.py @@ -230,11 +230,15 @@ def random_transform( ]) -def random_transform_generator(*args, **kwargs): - """ Create a random transform generator with the same arugments as `random_transform`. """ - while True: - yield random_transform(*args, **kwargs) +def random_transform_generator(prng = None, **kwargs): + """ Create a random transform generator with the same arugments as `random_transform`. + Uses a dedicated, newly created, properly seeded PRNG by default instead of the global DEFAULT_PRNG. + """ + if prng is None: + # RandomState automatically seeds using the best available method. + prng = np.random.RandomState() while True: + yield random_transform(prng=prng, **kwargs) From 3d8a1d59464f583f5d3f5d590349abdd855caf43 Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Sat, 16 Dec 2017 23:15:53 +0100 Subject: [PATCH 13/18] utils.transform: Remove wrong bounds correction from transform_aabb. --- keras_retinanet/utils/transform.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/keras_retinanet/utils/transform.py b/keras_retinanet/utils/transform.py index 8b0ba40cb..3eccf152b 100644 --- a/keras_retinanet/utils/transform.py +++ b/keras_retinanet/utils/transform.py @@ -23,9 +23,6 @@ def transform_aabb(transform, x1, y1, x2, y2): # Returns The new AABB as tuple (x1, y1, x2, y2) """ - # Point x2,y2 is not within the AABB itself. - x2 -= 1 - y2 -= 1 # Transform all 4 corners of the AABB. points = transform.dot([ [x1, x2, x1, x2], @@ -38,7 +35,7 @@ def transform_aabb(transform, x1, y1, x2, y2): max_corner = points.max(axis=1) # Make point 2 exclusive again. - return min_corner[0], min_corner[1], max_corner[0] + 1, max_corner[1] + 1 + return min_corner[0], min_corner[1], max_corner[0], max_corner[1] def _random_vector(min, max, prng = DEFAULT_PRNG): From c9798dfc7ff87c23d7ce5d2bf9267c1262801568 Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Sat, 16 Dec 2017 23:23:45 +0100 Subject: [PATCH 14/18] utils.transform: Add tests. --- tests/utils/__init__.py | 0 tests/utils/test_transform.py | 145 ++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+) create mode 100644 tests/utils/__init__.py create mode 100644 tests/utils/test_transform.py diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/utils/test_transform.py b/tests/utils/test_transform.py new file mode 100644 index 000000000..ddebf7f2d --- /dev/null +++ b/tests/utils/test_transform.py @@ -0,0 +1,145 @@ +import numpy as np +from numpy.testing import assert_almost_equal +from math import pi + +from keras_retinanet.utils.transform import ( + colvec, + transform_aabb, + rotation, random_rotation, + translation, random_translation, + scaling, random_scaling, + shear, random_shear, + random_flip, + random_transform, + random_transform_generator, +) + + +def test_colvec(): + assert np.array_equal(colvec(0), np.array([[0]])) + assert np.array_equal(colvec(1, 2, 3), np.array([[1], [2], [3]])) + assert np.array_equal(colvec(-1, -2), np.array([[-1], [-2]])) + + +def test_rotation(): + assert_almost_equal(colvec( 1, 0, 1), rotation(0.0 * pi).dot(colvec(1, 0, 1))) + assert_almost_equal(colvec( 0, 1, 1), rotation(0.5 * pi).dot(colvec(1, 0, 1))) + assert_almost_equal(colvec(-1, 0, 1), rotation(1.0 * pi).dot(colvec(1, 0, 1))) + assert_almost_equal(colvec( 0, -1, 1), rotation(1.5 * pi).dot(colvec(1, 0, 1))) + assert_almost_equal(colvec( 1, 0, 1), rotation(2.0 * pi).dot(colvec(1, 0, 1))) + + assert_almost_equal(colvec( 0, 1, 1), rotation(0.0 * pi).dot(colvec(0, 1, 1))) + assert_almost_equal(colvec(-1, 0, 1), rotation(0.5 * pi).dot(colvec(0, 1, 1))) + assert_almost_equal(colvec( 0, -1, 1), rotation(1.0 * pi).dot(colvec(0, 1, 1))) + assert_almost_equal(colvec( 1, 0, 1), rotation(1.5 * pi).dot(colvec(0, 1, 1))) + assert_almost_equal(colvec( 0, 1, 1), rotation(2.0 * pi).dot(colvec(0, 1, 1))) + + +def test_random_rotation(): + prng = np.random.RandomState(0) + for i in range(100): + assert_almost_equal(1, np.linalg.det(random_rotation(-i, i, prng))) + + +def test_translation(): + assert_almost_equal(colvec( 1, 2, 1), translation(colvec( 0, 0)).dot(colvec(1, 2, 1))) + assert_almost_equal(colvec( 4, 6, 1), translation(colvec( 3, 4)).dot(colvec(1, 2, 1))) + assert_almost_equal(colvec(-2, -2, 1), translation(colvec(-3, -4)).dot(colvec(1, 2, 1))) + + +def assert_is_translation(transform, min, max): + assert transform.shape == (3, 3) + assert np.array_equal(transform[:, 0:2], np.eye(3, 2)) + assert transform[2, 2] == 1 + assert np.greater_equal(transform[0:2, 2:3], min).all() + assert np.less( transform[0:2, 2:3], max).all() + + +def test_random_translation(): + prng = np.random.RandomState(0) + min = colvec(-10, -20) + max = colvec(20, 10) + for i in range(100): + assert_is_translation(random_translation(min, max, prng), min, max) + + +def test_shear(): + assert_almost_equal(colvec( 1, 2, 1), shear(0.0 * pi).dot(colvec(1, 2, 1))) + assert_almost_equal(colvec(-1, 0, 1), shear(0.5 * pi).dot(colvec(1, 2, 1))) + assert_almost_equal(colvec( 1, -2, 1), shear(1.0 * pi).dot(colvec(1, 2, 1))) + assert_almost_equal(colvec( 3, 0, 1), shear(1.5 * pi).dot(colvec(1, 2, 1))) + assert_almost_equal(colvec( 1, 2, 1), shear(2.0 * pi).dot(colvec(1, 2, 1))) + + +def assert_is_shear(transform): + assert transform.shape == (3, 3) + assert np.array_equal(transform[:, 0], [1, 0, 0]) + assert np.array_equal(transform[:, 2], [0, 0, 1]) + assert transform[2, 1] == 0 + # sin^2 + cos^2 == 1 + assert_almost_equal(1, transform[0, 1] ** 2 + transform[1, 1] ** 2) + + +def test_random_shear(): + prng = np.random.RandomState(0) + for i in range(100): + assert_is_shear(random_shear(-pi, pi, prng)) + + +def test_scaling(): + assert_almost_equal(colvec(1.0, 2, 1), scaling(colvec(1.0, 1.0)).dot(colvec(1, 2, 1))) + assert_almost_equal(colvec(0.0, 2, 1), scaling(colvec(0.0, 1.0)).dot(colvec(1, 2, 1))) + assert_almost_equal(colvec(1.0, 0, 1), scaling(colvec(1.0, 0.0)).dot(colvec(1, 2, 1))) + assert_almost_equal(colvec(0.5, 4, 1), scaling(colvec(0.5, 2.0)).dot(colvec(1, 2, 1))) + + +def assert_is_scaling(transform, min, max): + assert transform.shape == (3, 3) + assert np.array_equal(transform[2, :], [0, 0, 1]) + assert np.array_equal(transform[:, 2], [0, 0, 1]) + assert transform[1, 0] == 0 + assert transform[0, 1] == 0 + assert np.greater_equal(np.diagonal(transform)[:2], min).all() + assert np.less( np.diagonal(transform)[:2], max).all() + + +def test_random_scaling(): + prng = np.random.RandomState(0) + min = colvec(0.1, 0.2) + max = colvec(20, 10) + for i in range(100): + assert_is_scaling(random_scaling(min, max, prng), min[:, 0], max[:, 0]) + + +def assert_is_flip(transform): + assert transform.shape == (3, 3) + assert np.array_equal(transform[2, :], [0, 0, 1]) + assert np.array_equal(transform[:, 2], [0, 0, 1]) + assert transform[1, 0] == 0 + assert transform[0, 1] == 0 + assert abs(transform[0, 0]) == 1 + assert abs(transform[1, 1]) == 1 + + +def test_random_flip(): + prng = np.random.RandomState(0) + min = colvec(0.1, 0.2) + max = colvec(20, 10) + for i in range(100): + assert_is_flip(random_flip(0.5, 0.5, prng)) + + +def test_random_transform(): + prng = np.random.RandomState(0) + for i in range(100): + transform = random_transform(prng=prng) + assert np.array_equal(transform, np.identity(3)) + + for i, transform in zip(range(100), random_transform_generator(prng=np.random.RandomState())): + assert np.array_equal(transform, np.identity(3)) + + +def test_transform_aabb(): + assert (1, 2, 3, 4) == transform_aabb(np.identity(3), 1, 2, 3, 4) + assert_almost_equal(colvec(-3, -4, -1, -2), colvec(*transform_aabb(rotation(pi), 1, 2, 3, 4))) + assert_almost_equal(colvec( 2, 4, 4, 6), colvec(*transform_aabb(translation(colvec(1, 2)), 1, 2, 3, 4))) From ac169917c6f0625481e8b8c215f5065b37ab604b Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Sun, 17 Dec 2017 00:00:29 +0100 Subject: [PATCH 15/18] utils.transform: Use 1 dimensional vectors instead of column vectors. --- keras_retinanet/preprocessing/generator.py | 3 +- keras_retinanet/utils/image.py | 2 +- keras_retinanet/utils/transform.py | 50 +++++++++++----------- tests/utils/test_transform.py | 22 +++++----- 4 files changed, 37 insertions(+), 40 deletions(-) diff --git a/keras_retinanet/preprocessing/generator.py b/keras_retinanet/preprocessing/generator.py index 17a3de75e..3618aaba6 100644 --- a/keras_retinanet/preprocessing/generator.py +++ b/keras_retinanet/preprocessing/generator.py @@ -128,8 +128,7 @@ def preprocess_group_entry(self, image, annotations): # Transform the bounding boxes in the annotations. annotations = annotations.copy() for index in range(annotations.shape[0]): - box = annotations[index, :4] - annotations[index, :4] = transform_aabb(transform, box[0], box[1], box[2], box[3]) + annotations[index, :4] = transform_aabb(transform, annotations[index, :4]) # resize image image, image_scale = self.resize_image(image) diff --git a/keras_retinanet/utils/image.py b/keras_retinanet/utils/image.py index d73fd5267..89e9c7a36 100644 --- a/keras_retinanet/utils/image.py +++ b/keras_retinanet/utils/image.py @@ -59,7 +59,7 @@ def adjust_transform_for_image(transform, image): height, width, channels = image.shape # Move the origin of transformation. - result = change_transform_origin(transform, colvec(width, height) * 0.5) + result = change_transform_origin(transform, (0.5 * width, 0.5 * height)) # Scale the translation with the image size. result[0:2, 2] *= [width, height] diff --git a/keras_retinanet/utils/transform.py b/keras_retinanet/utils/transform.py index 3eccf152b..57cab3480 100644 --- a/keras_retinanet/utils/transform.py +++ b/keras_retinanet/utils/transform.py @@ -8,7 +8,7 @@ def colvec(*args): return np.array([args]).T -def transform_aabb(transform, x1, y1, x2, y2): +def transform_aabb(transform, aabb): """ Apply a transformation to an axis aligned bounding box. The result is a new AABB in the same coordinate system as the original AABB. @@ -23,6 +23,7 @@ def transform_aabb(transform, x1, y1, x2, y2): # Returns The new AABB as tuple (x1, y1, x2, y2) """ + x1, y1, x2, y2 = aabb # Transform all 4 corners of the AABB. points = transform.dot([ [x1, x2, x1, x2], @@ -34,18 +35,20 @@ def transform_aabb(transform, x1, y1, x2, y2): min_corner = points.min(axis=1) max_corner = points.max(axis=1) - # Make point 2 exclusive again. - return min_corner[0], min_corner[1], max_corner[0], max_corner[1] + return [min_corner[0], min_corner[1], max_corner[0], max_corner[1]] def _random_vector(min, max, prng = DEFAULT_PRNG): - """ Construct a random column vector between min and max. + """ Construct a random vector between min and max. # Arguments min: the minimum value for each component max: the maximum value for each component """ - width = np.array(max) - np.array(min) - return prng.uniform(0, 1, (2, 1)) * width + min + min = np.array(min) + max = np.array(max) + assert min.shape == max.shape + assert len(min.shape) == 1 + return prng.uniform(min, max) def rotation(angle): @@ -77,13 +80,13 @@ def random_rotation(min, max, prng = DEFAULT_PRNG): def translation(translation): """ Construct a homogeneous 2D translation matrix. # Arguments - translation: the translation as column vector + translation: the translation 2D vector # Returns the translation matrix as 3 by 3 numpy array """ return np.array([ - [1, 0, translation[0, 0]], - [0, 1, translation[1, 0]], + [1, 0, translation[0]], + [0, 1, translation[1]], [0, 0, 1] ]) @@ -91,14 +94,12 @@ def translation(translation): def random_translation(min, max, prng = DEFAULT_PRNG): """ Construct a random 2D translation between min and max. # Arguments - min: a 2D column vector with the minumum translation for each dimension - max: a 2D column vector with the maximum translation for each dimension + min: a 2D vector with the minumum translation for each dimension + max: a 2D vector with the maximum translation for each dimension prng: the pseudo-random number generator to use. # Returns a homogeneous 3 by 3 translation matrix """ - assert min.shape == (2, 1) - assert max.shape == (2, 1) return translation(_random_vector(min, max, prng)) @@ -131,13 +132,13 @@ def random_shear(min, max, prng = DEFAULT_PRNG): def scaling(factor): """ Construct a homogeneous 2D scaling matrix. # Arguments - factor: a 2D column vector for X and Y scaling + factor: a 2D vector for X and Y scaling # Returns the zoom matrix as 3 by 3 numpy array """ return np.array([ - [factor[0, 0], 0, 0], - [0, factor[1, 0], 0], + [factor[0], 0, 0], + [0, factor[1], 0], [0, 0, 1] ]) @@ -145,14 +146,12 @@ def scaling(factor): def random_scaling(min, max, prng = DEFAULT_PRNG): """ Construct a random 2D scale matrix between -max and max. # Arguments - min: a 2D column vector containing the minimum scaling factor for X and Y. - min: a 2D column vector containing The maximum scaling factor for X and Y. + min: a 2D vector containing the minimum scaling factor for X and Y. + min: a 2D vector containing The maximum scaling factor for X and Y. prng: the pseudo-random number generator to use. # Returns a homogeneous 3 by 3 scaling matrix """ - assert min.shape == (2, 1) - assert max.shape == (2, 1) return scaling(_random_vector(min, max, prng)) @@ -168,7 +167,7 @@ def random_flip(flip_x_chance, flip_y_chance, prng = DEFAULT_PRNG): flip_x = prng.uniform(0, 1) < flip_x_chance flip_y = prng.uniform(0, 1) < flip_y_chance # 1 - 2 * bool gives 1 for False and -1 for True. - return scaling(colvec(1 - 2 * flip_x, 1 - 2 * flip_y)) + return scaling((1 - 2 * flip_x, 1 - 2 * flip_y)) def change_transform_origin(transform, center): @@ -179,18 +178,19 @@ def change_transform_origin(transform, center): # Return: translate(center) * transform * translate(-center) """ + center = np.array(center) return np.dot(np.dot(translation(center), transform), translation(-center)) def random_transform( min_rotation=0, max_rotation=0, - min_translation=colvec(0, 0), - max_translation=colvec(0, 0), + min_translation=(0, 0), + max_translation=(0, 0), min_shear=0, max_shear=0, - min_scaling=colvec(1, 1), - max_scaling=colvec(1, 1), + min_scaling=(1, 1), + max_scaling=(1, 1), flip_x_chance=0, flip_y_chance=0, prng = DEFAULT_PRNG diff --git a/tests/utils/test_transform.py b/tests/utils/test_transform.py index ddebf7f2d..6c7604700 100644 --- a/tests/utils/test_transform.py +++ b/tests/utils/test_transform.py @@ -51,14 +51,14 @@ def assert_is_translation(transform, min, max): assert transform.shape == (3, 3) assert np.array_equal(transform[:, 0:2], np.eye(3, 2)) assert transform[2, 2] == 1 - assert np.greater_equal(transform[0:2, 2:3], min).all() - assert np.less( transform[0:2, 2:3], max).all() + assert np.greater_equal(transform[0:2, 2], min).all() + assert np.less( transform[0:2, 2], max).all() def test_random_translation(): prng = np.random.RandomState(0) - min = colvec(-10, -20) - max = colvec(20, 10) + min = (-10, -20) + max = (20, 10) for i in range(100): assert_is_translation(random_translation(min, max, prng), min, max) @@ -105,10 +105,10 @@ def assert_is_scaling(transform, min, max): def test_random_scaling(): prng = np.random.RandomState(0) - min = colvec(0.1, 0.2) - max = colvec(20, 10) + min = (0.1, 0.2) + max = (20, 10) for i in range(100): - assert_is_scaling(random_scaling(min, max, prng), min[:, 0], max[:, 0]) + assert_is_scaling(random_scaling(min, max, prng), min, max) def assert_is_flip(transform): @@ -123,8 +123,6 @@ def assert_is_flip(transform): def test_random_flip(): prng = np.random.RandomState(0) - min = colvec(0.1, 0.2) - max = colvec(20, 10) for i in range(100): assert_is_flip(random_flip(0.5, 0.5, prng)) @@ -140,6 +138,6 @@ def test_random_transform(): def test_transform_aabb(): - assert (1, 2, 3, 4) == transform_aabb(np.identity(3), 1, 2, 3, 4) - assert_almost_equal(colvec(-3, -4, -1, -2), colvec(*transform_aabb(rotation(pi), 1, 2, 3, 4))) - assert_almost_equal(colvec( 2, 4, 4, 6), colvec(*transform_aabb(translation(colvec(1, 2)), 1, 2, 3, 4))) + assert np.array_equal([1, 2, 3, 4], transform_aabb(np.identity(3), [1, 2, 3, 4])) + assert_almost_equal([-3, -4, -1, -2], transform_aabb(rotation(pi), [1, 2, 3, 4])) + assert_almost_equal([ 2, 4, 4, 6], transform_aabb(translation([1, 2]), [1, 2, 3, 4])) From e072b38f5e5ac1d654d4470b5430402dc666ed14 Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Sun, 17 Dec 2017 00:24:44 +0100 Subject: [PATCH 16/18] utils.transform: Add test for change_transform_origin. --- tests/utils/test_transform.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/utils/test_transform.py b/tests/utils/test_transform.py index 6c7604700..f8b94e788 100644 --- a/tests/utils/test_transform.py +++ b/tests/utils/test_transform.py @@ -12,6 +12,7 @@ random_flip, random_transform, random_transform_generator, + change_transform_origin, ) @@ -141,3 +142,11 @@ def test_transform_aabb(): assert np.array_equal([1, 2, 3, 4], transform_aabb(np.identity(3), [1, 2, 3, 4])) assert_almost_equal([-3, -4, -1, -2], transform_aabb(rotation(pi), [1, 2, 3, 4])) assert_almost_equal([ 2, 4, 4, 6], transform_aabb(translation([1, 2]), [1, 2, 3, 4])) + + +def test_change_transform_origin(): + prng = np.random.RandomState(0) + assert np.array_equal(change_transform_origin(translation([3, 4]), [1, 2]), translation([3, 4])) + assert_almost_equal(colvec(1, 2, 1), change_transform_origin(rotation(pi), [1, 2]).dot(colvec(1, 2, 1))) + assert_almost_equal(colvec(0, 0, 1), change_transform_origin(rotation(pi), [1, 2]).dot(colvec(2, 4, 1))) + assert_almost_equal(colvec(0, 0, 1), change_transform_origin(scaling([0.5, 0.5]), [-2, -4]).dot(colvec(2, 4, 1))) From b13e08ff41d850571907b17a1aa61df774b22c0c Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Thu, 4 Jan 2018 19:36:12 +0100 Subject: [PATCH 17/18] utils.transform: Remove spaces around '=' of default arguments. --- keras_retinanet/utils/transform.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/keras_retinanet/utils/transform.py b/keras_retinanet/utils/transform.py index 57cab3480..96473600d 100644 --- a/keras_retinanet/utils/transform.py +++ b/keras_retinanet/utils/transform.py @@ -38,7 +38,7 @@ def transform_aabb(transform, aabb): return [min_corner[0], min_corner[1], max_corner[0], max_corner[1]] -def _random_vector(min, max, prng = DEFAULT_PRNG): +def _random_vector(min, max, prng=DEFAULT_PRNG): """ Construct a random vector between min and max. # Arguments min: the minimum value for each component @@ -65,7 +65,7 @@ def rotation(angle): ]) -def random_rotation(min, max, prng = DEFAULT_PRNG): +def random_rotation(min, max, prng=DEFAULT_PRNG): """ Construct a random rotation between -max and max. # Arguments min: a scalar for the minumum absolute angle in radians @@ -91,7 +91,7 @@ def translation(translation): ]) -def random_translation(min, max, prng = DEFAULT_PRNG): +def random_translation(min, max, prng=DEFAULT_PRNG): """ Construct a random 2D translation between min and max. # Arguments min: a 2D vector with the minumum translation for each dimension @@ -117,7 +117,7 @@ def shear(amount): ]) -def random_shear(min, max, prng = DEFAULT_PRNG): +def random_shear(min, max, prng=DEFAULT_PRNG): """ Construct a random 2D shear matrix with shear angle between -max and max. # Arguments min: the minumum shear factor. @@ -143,7 +143,7 @@ def scaling(factor): ]) -def random_scaling(min, max, prng = DEFAULT_PRNG): +def random_scaling(min, max, prng=DEFAULT_PRNG): """ Construct a random 2D scale matrix between -max and max. # Arguments min: a 2D vector containing the minimum scaling factor for X and Y. @@ -155,7 +155,7 @@ def random_scaling(min, max, prng = DEFAULT_PRNG): return scaling(_random_vector(min, max, prng)) -def random_flip(flip_x_chance, flip_y_chance, prng = DEFAULT_PRNG): +def random_flip(flip_x_chance, flip_y_chance, prng=DEFAULT_PRNG): """ Construct a transformation randomly containing X/Y flips (or not). # Arguments flip_x_chance: The chance that the result will contain a flip along the X axis. @@ -193,7 +193,7 @@ def random_transform( max_scaling=(1, 1), flip_x_chance=0, flip_y_chance=0, - prng = DEFAULT_PRNG + prng=DEFAULT_PRNG ): """ Create a random transformation. @@ -227,7 +227,7 @@ def random_transform( ]) -def random_transform_generator(prng = None, **kwargs): +def random_transform_generator(prng=None, **kwargs): """ Create a random transform generator with the same arugments as `random_transform`. Uses a dedicated, newly created, properly seeded PRNG by default instead of the global DEFAULT_PRNG. From ca6f5b6c16138db4292a2061435bfeaf741bab22 Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Sat, 6 Jan 2018 00:00:00 +0100 Subject: [PATCH 18/18] preprocessing.generator: Check truthiness instead of `is None`. --- keras_retinanet/preprocessing/generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_retinanet/preprocessing/generator.py b/keras_retinanet/preprocessing/generator.py index 3618aaba6..8a7a95604 100644 --- a/keras_retinanet/preprocessing/generator.py +++ b/keras_retinanet/preprocessing/generator.py @@ -121,7 +121,7 @@ def preprocess_group_entry(self, image, annotations): image = self.preprocess_image(image) # randomly transform both image and annotations - if self.transform_generator is not None: + if self.transform_generator: transform = adjust_transform_for_image(next(self.transform_generator), image) apply_transform(transform, image, self.transform_parameters)