From 49f6bb14d693e0801c79780f0430b044319e290f Mon Sep 17 00:00:00 2001 From: Gowtham Paimagam Date: Fri, 13 Sep 2024 15:37:30 +0200 Subject: [PATCH 01/20] Add `DifferentialBinarization` model --- keras_nlp/src/models/diffbin/__init__.py | 13 + keras_nlp/src/models/diffbin/diffbin.py | 243 +++++++++++++++++++ keras_nlp/src/models/diffbin/diffbin_test.py | 59 +++++ keras_nlp/src/models/diffbin/losses.py | 139 +++++++++++ 4 files changed, 454 insertions(+) create mode 100644 keras_nlp/src/models/diffbin/__init__.py create mode 100644 keras_nlp/src/models/diffbin/diffbin.py create mode 100644 keras_nlp/src/models/diffbin/diffbin_test.py create mode 100644 keras_nlp/src/models/diffbin/losses.py diff --git a/keras_nlp/src/models/diffbin/__init__.py b/keras_nlp/src/models/diffbin/__init__.py new file mode 100644 index 0000000000..3364a6bd16 --- /dev/null +++ b/keras_nlp/src/models/diffbin/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2024 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/keras_nlp/src/models/diffbin/diffbin.py b/keras_nlp/src/models/diffbin/diffbin.py new file mode 100644 index 0000000000..9b25c5d0dd --- /dev/null +++ b/keras_nlp/src/models/diffbin/diffbin.py @@ -0,0 +1,243 @@ +# Copyright 2024 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import keras +from keras import layers + +from keras_nlp.src.api_export import keras_nlp_export +from keras_nlp.src.models.diffbin.losses import DBLoss +from keras_nlp.src.models.task import Task + + +@keras_nlp_export("keras_nlp.models.DifferentialBinarization") +class DifferentialBinarization(Task): + """ + A Keras model implementing the Differential Binarization + architecture for scene text detection, described in + [Real-time Scene Text Detection with Differentiable Binarization]( + https://arxiv.org/abs/1911.08947). + + Args: + backbone: A `keras_nlp.models.ResNetBackbone` instance. + fpn_channels: int. The number of channels to output by the Feature + Pyramid Network. Defaults to 256. + fpn_kernel_list: list of ints. The number of filters for probability + and threshold maps. Defaults to [3, 2, 2]. + + Examples: + ```python + input_data = np.ones(shape=(8, 224, 224, 3)) + + backbone = keras_nlp.models.ResNetBackbone.from_preset("resnet50_vd") + detector = keras_nlp.models.DifferentialBinarization( + backbone=backbone + ) + + detector(input_data) + ``` + """ + + def __init__( + self, + backbone, + fpn_channels=256, + head_kernel_list=[3, 2, 2], + step_function_k=50.0, + preprocessor=None, # adding this dummy arg for saved model test + **kwargs, + ): + backbone = backbone + + inputs = backbone.input + x = backbone.pyramid_outputs + x = diffbin_fpn_model(x, out_channels=fpn_channels) + probability_maps = diffbin_head( + x, + in_channels=fpn_channels, + kernel_list=head_kernel_list, + name="head_prob", + ) + threshold_maps = diffbin_head( + x, + in_channels=fpn_channels, + kernel_list=head_kernel_list, + name="head_thresh", + ) + binary_maps = step_function( + probability_maps, threshold_maps, k=step_function_k + ) + outputs = layers.Concatenate(axis=-1)( + [probability_maps, threshold_maps, binary_maps] + ) + + super().__init__(inputs=inputs, outputs=outputs, **kwargs) + + self.backbone = backbone + self.fpn_channels = fpn_channels + self.head_kernel_list = head_kernel_list + self.step_function_k = step_function_k + + def compile( + self, + optimizer="auto", + loss="auto", + **kwargs, + ): + """Configures the `DifferentialBinarization` task for training. + + `DifferentialBinarization` extends the default compilation signature of + `keras.Model.compile` with defaults for `optimizer` and `loss`. To + override these defaults, pass any value to these arguments during + compilation. + + Args: + optimizer: `"auto"`, an optimizer name, or a `keras.Optimizer` + instance. Defaults to `"auto"`, which uses the default optimizer + for `DifferentialBinarization`. See `keras.Model.compile` and + `keras.optimizers` for more info on possible `optimizer` values. + loss: `"auto"`, a loss name, or a `keras.losses.Loss` instance. + Defaults to `"auto"`, in which case the default loss + computation of `DifferentialBinarization` will be applied. See + `keras.Model.compile` and `keras.losses` for more info on + possible `loss` values. + **kwargs: See `keras.Model.compile` for a full list of arguments + supported by the compile method. + """ + if optimizer == "auto": + optimizer = keras.optimizers.SGD( + learning_rate=0.007, weight_decay=0.0001, momentum=0.9 + ) + if loss == "auto": + loss = DBLoss() + super().compile( + optimizer=optimizer, + loss=loss, + **kwargs, + ) + + def get_config(self): + # Backbone serialized in `super` + config = super().get_config() + config.update( + { + "fpn_channels": self.fpn_channels, + "head_kernel_list": self.head_kernel_list, + "step_function_k": self.step_function_k, + } + ) + return config + + +def diffbin_fpn_model(inputs, out_channels): + in2 = layers.Conv2D( + out_channels, kernel_size=1, use_bias=False, name="neck_in2" + )(inputs["P2"]) + in3 = layers.Conv2D( + out_channels, kernel_size=1, use_bias=False, name="neck_in3" + )(inputs["P3"]) + in4 = layers.Conv2D( + out_channels, kernel_size=1, use_bias=False, name="neck_in4" + )(inputs["P4"]) + in5 = layers.Conv2D( + out_channels, kernel_size=1, use_bias=False, name="neck_in5" + )(inputs["P5"]) + out4 = layers.Add(name="add1")([layers.UpSampling2D()(in5), in4]) + out3 = layers.Add(name="add2")([layers.UpSampling2D()(out4), in3]) + out2 = layers.Add(name="add3")([layers.UpSampling2D()(out3), in2]) + p5 = layers.Conv2D( + out_channels // 4, + kernel_size=3, + padding="same", + use_bias=False, + name="neck_p5", + )(in5) + p4 = layers.Conv2D( + out_channels // 4, + kernel_size=3, + padding="same", + use_bias=False, + name="neck_p4", + )(out4) + p3 = layers.Conv2D( + out_channels // 4, + kernel_size=3, + padding="same", + use_bias=False, + name="neck_p3", + )(out3) + p2 = layers.Conv2D( + out_channels // 4, + kernel_size=3, + padding="same", + use_bias=False, + name="neck_p2", + )(out2) + p5 = layers.UpSampling2D((8, 8))(p5) + p4 = layers.UpSampling2D((4, 4))(p4) + p3 = layers.UpSampling2D((2, 2))(p3) + + fused = layers.Concatenate(axis=-1)([p5, p4, p3, p2]) + return fused + + +def step_function(x, y, k): + return 1.0 / (1.0 + keras.ops.exp(-k * (x - y))) + + +def diffbin_head(inputs, in_channels, kernel_list, name): + x = layers.Conv2D( + in_channels // 4, + kernel_size=kernel_list[0], + padding="same", + use_bias=False, + name=f"{name}_conv0_weights", + )(inputs) + x = layers.BatchNormalization( + beta_initializer=keras.initializers.Constant(1e-4), + gamma_initializer=keras.initializers.Constant(1.0), + name=f"{name}_conv0_bn", + )(x) + x = layers.ReLU(name=f"{name}_conv0_relu")(x) + x = layers.Conv2DTranspose( + in_channels // 4, + kernel_size=kernel_list[1], + strides=2, + padding="valid", + bias_initializer=keras.initializers.RandomUniform( + minval=-1.0 / math.sqrt(in_channels // 4 * 1.0), + maxval=1.0 / math.sqrt(in_channels // 4 * 1.0), + ), + name=f"{name}_conv1_weights", + )(x) + x = layers.BatchNormalization( + beta_initializer=keras.initializers.Constant(1e-4), + gamma_initializer=keras.initializers.Constant(1.0), + name=f"{name}_conv1_bn", + )(x) + x = layers.ReLU(name=f"{name}_conv1_relu")(x) + x = layers.Conv2DTranspose( + 1, + kernel_size=kernel_list[2], + strides=2, + padding="valid", + activation="sigmoid", + bias_initializer=keras.initializers.RandomUniform( + minval=-1.0 / math.sqrt(in_channels // 4 * 1.0), + maxval=1.0 / math.sqrt(in_channels // 4 * 1.0), + ), + name=f"{name}_conv2_weights", + )(x) + return x diff --git a/keras_nlp/src/models/diffbin/diffbin_test.py b/keras_nlp/src/models/diffbin/diffbin_test.py new file mode 100644 index 0000000000..4f5063ef1f --- /dev/null +++ b/keras_nlp/src/models/diffbin/diffbin_test.py @@ -0,0 +1,59 @@ +# Copyright 2024 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from keras import ops + +from keras_nlp.src.models.diffbin.diffbin import DifferentialBinarization +from keras_nlp.src.models.resnet.resnet_backbone import ResNetBackbone +from keras_nlp.src.tests.test_case import TestCase + + +class DifferentialBinarizationTest(TestCase): + def setUp(self): + self.images = ops.ones((2, 224, 224, 3)) + self.labels = ops.zeros((2, 224, 224, 4)) + self.backbone = ResNetBackbone( + input_conv_filters=[64], + input_conv_kernel_sizes=[7], + stackwise_num_filters=[64, 128, 256, 512], + stackwise_num_blocks=[3, 4, 6, 3], + stackwise_num_strides=[1, 2, 2, 2], + block_type="bottleneck_block", + image_shape=(224, 224, 3), + include_rescaling=False, + ) + self.init_kwargs = { + "backbone": self.backbone, + } + self.train_data = (self.images, self.labels) + + def test_basics(self): + pytest.skip( + reason="TODO: enable after preprocessor flow is figured out" + ) + self.run_task_test( + cls=DifferentialBinarization, + init_kwargs=self.init_kwargs, + train_data=self.train_data, + expected_output_shape=(2, 224, 224, 3), + ) + + @pytest.mark.large + def test_saved_model(self): + self.run_model_saving_test( + cls=DifferentialBinarization, + init_kwargs=self.init_kwargs, + input_data=self.images, + ) diff --git a/keras_nlp/src/models/diffbin/losses.py b/keras_nlp/src/models/diffbin/losses.py new file mode 100644 index 0000000000..df349afb55 --- /dev/null +++ b/keras_nlp/src/models/diffbin/losses.py @@ -0,0 +1,139 @@ +# Copyright 2024 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import keras +from keras import ops + + +class DiceLoss: + def __init__(self, eps=1e-6, **kwargs): + self.eps = eps + + def __call__(self, y_true, y_pred, mask, weights=None): + if weights is not None: + mask = weights * mask + intersection = ops.sum((y_pred * y_true * mask)) + union = ops.sum((y_pred * mask)) + ops.sum(y_true * mask) + self.eps + loss = 1 - 2.0 * intersection / union + return loss + + +class MaskL1Loss: + def __init__(self, **kwargs): + pass + + def __call__(self, y_true, y_pred, mask): + mask_sum = ops.sum(mask) + loss = ops.where( + mask_sum == 0.0, + 0.0, + ops.sum(ops.absolute(y_pred - y_true) * mask) / mask_sum, + ) + return loss + + +class BalanceCrossEntropyLoss: + def __init__(self, negative_ratio=3.0, eps=1e-6, **kwargs): + self.negative_ratio = negative_ratio + self.eps = eps + + def __call__(self, y_true, y_pred, mask, return_origin=False): + positive = ops.cast((y_true > 0.5) & ops.cast(mask, "bool"), "uint8") + negative = ops.cast((y_true < 0.5) & ops.cast(mask, "bool"), "uint8") + positive_count = ops.sum(ops.cast(positive, "int32")) + negative_count = ops.sum(ops.cast(negative, "int32")) + negative_count_max = ops.cast( + ops.cast(positive_count, "float32") * self.negative_ratio, "int32" + ) + + negative_count = ops.where( + negative_count > negative_count_max, + negative_count_max, + negative_count, + ) + # Keras' losses reduce some axis. Since we don't want that here, we add + # a dummy dimension to y_true and y_pred + loss = keras.losses.BinaryCrossentropy( + from_logits=False, + label_smoothing=0.0, + axis=-1, + reduction=None, + )(y_true=y_true[..., None], y_pred=y_pred[..., None]) + + positive_loss = loss * ops.cast(positive, "float32") + negative_loss = loss * ops.cast(negative, "float32") + + # hard negative mining, as suggested in the paper: + # compute the threshold for hard negatives, and zero-out + # negative losses below the threshold. using this approach, + # we achieve efficient computation on GPUs + + # compute negative_count relative to the element count of y_pred + negative_count_rel = ops.cast(negative_count, "float32") / ops.prod( + ops.shape(negative_count) + ) + # compute the threshold value for negative losses and zero neg. loss + # values below this threshold + negative_loss_thresh = ops.quantile( + negative_loss, 1.0 - negative_count_rel + ) + negative_loss = negative_loss * ops.cast( + negative_loss > negative_loss_thresh, "float32" + ) + + balance_loss = (ops.sum(positive_loss) + ops.sum(negative_loss)) / ( + ops.cast(positive_count + negative_count, "float32") + self.eps + ) + + if return_origin: + return balance_loss, loss + return balance_loss + + +class DBLoss(keras.losses.Loss): + def __init__(self, eps=1e-6, l1_scale=10.0, bce_scale=5.0, **kwargs): + super().__init__(*kwargs) + self.dice_loss = DiceLoss(eps=eps) + self.l1_loss = MaskL1Loss() + self.bce_loss = BalanceCrossEntropyLoss() + + self.l1_scale = l1_scale + self.bce_scale = bce_scale + + def call(self, y_true, y_pred): + p_map_pred, t_map_pred, b_map_pred = ops.unstack(y_pred, 3, axis=-1) + shrink_map, shrink_mask, thresh_map, thresh_mask = ops.unstack( + y_true, 4, axis=-1 + ) + + # we here implement L1BalanceCELoss from PyTorch's + # Differential Binarization implementation + Ls = self.bce_loss( + y_true=shrink_map, + y_pred=p_map_pred, + mask=shrink_mask, + return_origin=False, + ) + Lt = self.l1_loss( + y_true=thresh_map, + y_pred=t_map_pred, + mask=thresh_mask, + ) + dice_loss = self.dice_loss( + y_true=shrink_map, + y_pred=b_map_pred, + mask=shrink_mask, + ) + loss = dice_loss + self.l1_scale * Lt + Ls * self.bce_scale + return loss From 5b4e011ff1f2c9a5aff0b39f3c2d9baf1d23c4a4 Mon Sep 17 00:00:00 2001 From: Gowtham Paimagam Date: Tue, 22 Oct 2024 20:14:08 +0200 Subject: [PATCH 02/20] Added tests for `DifferentialBinarization` losses --- keras_nlp/src/models/diffbin/losses_test.py | 85 +++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 keras_nlp/src/models/diffbin/losses_test.py diff --git a/keras_nlp/src/models/diffbin/losses_test.py b/keras_nlp/src/models/diffbin/losses_test.py new file mode 100644 index 0000000000..21bbec9b8c --- /dev/null +++ b/keras_nlp/src/models/diffbin/losses_test.py @@ -0,0 +1,85 @@ +# Copyright 2024 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np + +from keras_nlp.src.models.diffbin.losses import DBLoss +from keras_nlp.src.models.diffbin.losses import DiceLoss +from keras_nlp.src.models.diffbin.losses import MaskL1Loss +from keras_nlp.src.tests.test_case import TestCase + + +class DiceLossTest(TestCase): + def setUp(self): + self.loss_obj = DiceLoss() + + def test_loss(self): + y_true = np.array([1.0, 1.0, 0.0, 0.0]) + y_pred = np.array([0.1, 0.2, 0.3, 0.4]) + mask = np.array([0.0, 1.0, 1.0, 0.0]) + weights = np.array([4.0, 5.0, 6.0, 7.0]) + loss = self.loss_obj(y_true, y_pred, mask, weights) + self.assertAlmostEqual(loss.numpy(), 0.74358, delta=1e-4) + + def test_correct(self): + y_true = np.array([1.0, 1.0, 0.0, 0.0]) + y_pred = y_true + mask = np.array([0.0, 1.0, 1.0, 0.0]) + loss = self.loss_obj(y_true, y_pred, mask) + self.assertAlmostEqual(loss.numpy(), 0.0, delta=1e-4) + + +class MaskL1LossTest(TestCase): + def setUp(self): + self.loss_obj = MaskL1Loss() + + def test_masked(self): + y_true = np.array([1.0, 2.0, 3.0, 4.0]) + y_pred = np.array([0.1, 0.2, 0.3, 0.4]) + mask = np.array([0.0, 1.0, 0.0, 1.0]) + loss = self.loss_obj(y_true, y_pred, mask) + self.assertAlmostEqual(loss.numpy(), 2.7, delta=1e-4) + + +class DBLossTest(TestCase): + def setUp(self): + self.loss_obj = DBLoss() + + def test_loss(self): + shrink_map = thresh_map = np.array( + [[1.0, 1.0, 0.0], [1.0, 1.0, 0.0], [0.0, 0.0, 0.0]] + ) + p_map_pred = b_map_pred = t_map_pred = np.array( + [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]] + ) + shrink_mask = thresh_mask = np.ones_like(p_map_pred) + y_true = np.stack( + (shrink_map, shrink_mask, thresh_map, thresh_mask), axis=-1 + ) + y_pred = np.stack((p_map_pred, t_map_pred, b_map_pred), axis=-1) + loss = self.loss_obj(y_true, y_pred) + self.assertAlmostEqual(loss.numpy(), 14.1123, delta=1e-4) + + def test_correct(self): + shrink_map = thresh_map = np.array( + [[1.0, 1.0, 0.0], [1.0, 1.0, 0.0], [0.0, 0.0, 0.0]] + ) + p_map_pred, b_map_pred, t_map_pred = shrink_map, shrink_map, thresh_map + shrink_mask = thresh_mask = np.ones_like(p_map_pred) + y_true = np.stack( + (shrink_map, shrink_mask, thresh_map, thresh_mask), axis=-1 + ) + y_pred = np.stack((p_map_pred, t_map_pred, b_map_pred), axis=-1) + loss = self.loss_obj(y_true, y_pred) + self.assertAlmostEqual(loss.numpy(), 0.0, delta=1e-4) From 12ab81cc346603a4fade88d0d6e0e46b100e1b89 Mon Sep 17 00:00:00 2001 From: Gowtham Paimagam Date: Tue, 22 Oct 2024 20:23:05 +0200 Subject: [PATCH 03/20] Moved `DifferentialBinarization` to keras_hub --- .../models/differential_binarization}/__init__.py | 0 .../models/differential_binarization}/diffbin.py | 14 +++++++------- .../differential_binarization}/diffbin_test.py | 8 +++++--- .../models/differential_binarization}/losses.py | 0 .../differential_binarization}/losses_test.py | 8 ++++---- 5 files changed, 16 insertions(+), 14 deletions(-) rename {keras_nlp/src/models/diffbin => keras_hub/src/models/differential_binarization}/__init__.py (100%) rename {keras_nlp/src/models/diffbin => keras_hub/src/models/differential_binarization}/diffbin.py (94%) rename {keras_nlp/src/models/diffbin => keras_hub/src/models/differential_binarization}/diffbin_test.py (89%) rename {keras_nlp/src/models/diffbin => keras_hub/src/models/differential_binarization}/losses.py (100%) rename {keras_nlp/src/models/diffbin => keras_hub/src/models/differential_binarization}/losses_test.py (91%) diff --git a/keras_nlp/src/models/diffbin/__init__.py b/keras_hub/src/models/differential_binarization/__init__.py similarity index 100% rename from keras_nlp/src/models/diffbin/__init__.py rename to keras_hub/src/models/differential_binarization/__init__.py diff --git a/keras_nlp/src/models/diffbin/diffbin.py b/keras_hub/src/models/differential_binarization/diffbin.py similarity index 94% rename from keras_nlp/src/models/diffbin/diffbin.py rename to keras_hub/src/models/differential_binarization/diffbin.py index 9b25c5d0dd..c86d0cc7a8 100644 --- a/keras_nlp/src/models/diffbin/diffbin.py +++ b/keras_hub/src/models/differential_binarization/diffbin.py @@ -16,13 +16,13 @@ import keras from keras import layers +from keras_hub.src.api_export import keras_nlp_export +from keras_hub.src.models.task import Task -from keras_nlp.src.api_export import keras_nlp_export -from keras_nlp.src.models.diffbin.losses import DBLoss -from keras_nlp.src.models.task import Task +from keras_hub.src.models.differential_binarization.losses import DBLoss -@keras_nlp_export("keras_nlp.models.DifferentialBinarization") +@keras_nlp_export("keras_hub.models.DifferentialBinarization") class DifferentialBinarization(Task): """ A Keras model implementing the Differential Binarization @@ -31,7 +31,7 @@ class DifferentialBinarization(Task): https://arxiv.org/abs/1911.08947). Args: - backbone: A `keras_nlp.models.ResNetBackbone` instance. + backbone: A `keras_hub.models.ResNetBackbone` instance. fpn_channels: int. The number of channels to output by the Feature Pyramid Network. Defaults to 256. fpn_kernel_list: list of ints. The number of filters for probability @@ -41,8 +41,8 @@ class DifferentialBinarization(Task): ```python input_data = np.ones(shape=(8, 224, 224, 3)) - backbone = keras_nlp.models.ResNetBackbone.from_preset("resnet50_vd") - detector = keras_nlp.models.DifferentialBinarization( + backbone = keras_hub.models.ResNetBackbone.from_preset("resnet50_vd") + detector = keras_hub.models.DifferentialBinarization( backbone=backbone ) diff --git a/keras_nlp/src/models/diffbin/diffbin_test.py b/keras_hub/src/models/differential_binarization/diffbin_test.py similarity index 89% rename from keras_nlp/src/models/diffbin/diffbin_test.py rename to keras_hub/src/models/differential_binarization/diffbin_test.py index 4f5063ef1f..0041639b6e 100644 --- a/keras_nlp/src/models/diffbin/diffbin_test.py +++ b/keras_hub/src/models/differential_binarization/diffbin_test.py @@ -14,10 +14,12 @@ import pytest from keras import ops +from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone +from keras_hub.src.tests.test_case import TestCase -from keras_nlp.src.models.diffbin.diffbin import DifferentialBinarization -from keras_nlp.src.models.resnet.resnet_backbone import ResNetBackbone -from keras_nlp.src.tests.test_case import TestCase +from keras_hub.src.models.differential_binarization.diffbin import ( + DifferentialBinarization, +) class DifferentialBinarizationTest(TestCase): diff --git a/keras_nlp/src/models/diffbin/losses.py b/keras_hub/src/models/differential_binarization/losses.py similarity index 100% rename from keras_nlp/src/models/diffbin/losses.py rename to keras_hub/src/models/differential_binarization/losses.py diff --git a/keras_nlp/src/models/diffbin/losses_test.py b/keras_hub/src/models/differential_binarization/losses_test.py similarity index 91% rename from keras_nlp/src/models/diffbin/losses_test.py rename to keras_hub/src/models/differential_binarization/losses_test.py index 21bbec9b8c..058f8b4f81 100644 --- a/keras_nlp/src/models/diffbin/losses_test.py +++ b/keras_hub/src/models/differential_binarization/losses_test.py @@ -13,11 +13,11 @@ # limitations under the License. import numpy as np +from keras_hub.src.tests.test_case import TestCase -from keras_nlp.src.models.diffbin.losses import DBLoss -from keras_nlp.src.models.diffbin.losses import DiceLoss -from keras_nlp.src.models.diffbin.losses import MaskL1Loss -from keras_nlp.src.tests.test_case import TestCase +from keras_hub.src.models.differential_binarization.losses import DBLoss +from keras_hub.src.models.differential_binarization.losses import DiceLoss +from keras_hub.src.models.differential_binarization.losses import MaskL1Loss class DiceLossTest(TestCase): From e68512cf5c71c3ff3f1764f5a12cc0bea0a45c3b Mon Sep 17 00:00:00 2001 From: Gowtham Paimagam Date: Tue, 22 Oct 2024 20:30:11 +0200 Subject: [PATCH 04/20] Renamed to `differential_binarization.py` --- .../{diffbin.py => differential_binarization.py} | 0 .../{diffbin_test.py => differential_binarization_test.py} | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename keras_hub/src/models/differential_binarization/{diffbin.py => differential_binarization.py} (100%) rename keras_hub/src/models/differential_binarization/{diffbin_test.py => differential_binarization_test.py} (95%) diff --git a/keras_hub/src/models/differential_binarization/diffbin.py b/keras_hub/src/models/differential_binarization/differential_binarization.py similarity index 100% rename from keras_hub/src/models/differential_binarization/diffbin.py rename to keras_hub/src/models/differential_binarization/differential_binarization.py diff --git a/keras_hub/src/models/differential_binarization/diffbin_test.py b/keras_hub/src/models/differential_binarization/differential_binarization_test.py similarity index 95% rename from keras_hub/src/models/differential_binarization/diffbin_test.py rename to keras_hub/src/models/differential_binarization/differential_binarization_test.py index 0041639b6e..810238251e 100644 --- a/keras_hub/src/models/differential_binarization/diffbin_test.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_test.py @@ -17,7 +17,7 @@ from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone from keras_hub.src.tests.test_case import TestCase -from keras_hub.src.models.differential_binarization.diffbin import ( +from keras_hub.src.models.differential_binarization.differential_binarization import ( DifferentialBinarization, ) From 0c3235ccfde4996b96c6cf05f83d98a6807243c2 Mon Sep 17 00:00:00 2001 From: Gowtham Paimagam Date: Tue, 22 Oct 2024 21:26:24 +0200 Subject: [PATCH 05/20] Refactorings for `DifferentialBinarization` --- .../differential_binarization.py | 78 ++----------- .../differential_binarization_backbone.py | 105 ++++++++++++++++++ 2 files changed, 115 insertions(+), 68 deletions(-) create mode 100644 keras_hub/src/models/differential_binarization/differential_binarization_backbone.py diff --git a/keras_hub/src/models/differential_binarization/differential_binarization.py b/keras_hub/src/models/differential_binarization/differential_binarization.py index c86d0cc7a8..f02ddef8b9 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization.py @@ -16,13 +16,13 @@ import keras from keras import layers -from keras_hub.src.api_export import keras_nlp_export +from keras_hub.src.api_export import keras_hub_export from keras_hub.src.models.task import Task from keras_hub.src.models.differential_binarization.losses import DBLoss -@keras_nlp_export("keras_hub.models.DifferentialBinarization") +@keras_hub_export("keras_hub.models.DifferentialBinarization") class DifferentialBinarization(Task): """ A Keras model implementing the Differential Binarization @@ -31,17 +31,17 @@ class DifferentialBinarization(Task): https://arxiv.org/abs/1911.08947). Args: - backbone: A `keras_hub.models.ResNetBackbone` instance. - fpn_channels: int. The number of channels to output by the Feature - Pyramid Network. Defaults to 256. - fpn_kernel_list: list of ints. The number of filters for probability + backbone: A `keras_hub.models.DifferentialBinarizationBackbone` + instance. + head_kernel_list: list of ints. The number of filters for probability and threshold maps. Defaults to [3, 2, 2]. Examples: ```python input_data = np.ones(shape=(8, 224, 224, 3)) - backbone = keras_hub.models.ResNetBackbone.from_preset("resnet50_vd") + image_encoder = keras_hub.models.ResNetBackbone.from_preset("resnet50_vd") + backbone = keras_hub.models.DifferentialBinarizationBackbone(image_encoder) detector = keras_hub.models.DifferentialBinarization( backbone=backbone ) @@ -53,26 +53,23 @@ class DifferentialBinarization(Task): def __init__( self, backbone, - fpn_channels=256, head_kernel_list=[3, 2, 2], step_function_k=50.0, preprocessor=None, # adding this dummy arg for saved model test **kwargs, ): - backbone = backbone inputs = backbone.input - x = backbone.pyramid_outputs - x = diffbin_fpn_model(x, out_channels=fpn_channels) + x = backbone(inputs) probability_maps = diffbin_head( x, - in_channels=fpn_channels, + in_channels=backbone.fpn_channels, kernel_list=head_kernel_list, name="head_prob", ) threshold_maps = diffbin_head( x, - in_channels=fpn_channels, + in_channels=backbone.fpn_channels, kernel_list=head_kernel_list, name="head_thresh", ) @@ -86,7 +83,6 @@ def __init__( super().__init__(inputs=inputs, outputs=outputs, **kwargs) self.backbone = backbone - self.fpn_channels = fpn_channels self.head_kernel_list = head_kernel_list self.step_function_k = step_function_k @@ -133,66 +129,12 @@ def get_config(self): config = super().get_config() config.update( { - "fpn_channels": self.fpn_channels, "head_kernel_list": self.head_kernel_list, "step_function_k": self.step_function_k, } ) return config - -def diffbin_fpn_model(inputs, out_channels): - in2 = layers.Conv2D( - out_channels, kernel_size=1, use_bias=False, name="neck_in2" - )(inputs["P2"]) - in3 = layers.Conv2D( - out_channels, kernel_size=1, use_bias=False, name="neck_in3" - )(inputs["P3"]) - in4 = layers.Conv2D( - out_channels, kernel_size=1, use_bias=False, name="neck_in4" - )(inputs["P4"]) - in5 = layers.Conv2D( - out_channels, kernel_size=1, use_bias=False, name="neck_in5" - )(inputs["P5"]) - out4 = layers.Add(name="add1")([layers.UpSampling2D()(in5), in4]) - out3 = layers.Add(name="add2")([layers.UpSampling2D()(out4), in3]) - out2 = layers.Add(name="add3")([layers.UpSampling2D()(out3), in2]) - p5 = layers.Conv2D( - out_channels // 4, - kernel_size=3, - padding="same", - use_bias=False, - name="neck_p5", - )(in5) - p4 = layers.Conv2D( - out_channels // 4, - kernel_size=3, - padding="same", - use_bias=False, - name="neck_p4", - )(out4) - p3 = layers.Conv2D( - out_channels // 4, - kernel_size=3, - padding="same", - use_bias=False, - name="neck_p3", - )(out3) - p2 = layers.Conv2D( - out_channels // 4, - kernel_size=3, - padding="same", - use_bias=False, - name="neck_p2", - )(out2) - p5 = layers.UpSampling2D((8, 8))(p5) - p4 = layers.UpSampling2D((4, 4))(p4) - p3 = layers.UpSampling2D((2, 2))(p3) - - fused = layers.Concatenate(axis=-1)([p5, p4, p3, p2]) - return fused - - def step_function(x, y, k): return 1.0 / (1.0 + keras.ops.exp(-k * (x - y))) diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py b/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py new file mode 100644 index 0000000000..fa0bf59c06 --- /dev/null +++ b/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py @@ -0,0 +1,105 @@ +# Copyright 2024 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from keras import layers +from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.models.backbone import Backbone + + +@keras_hub_export("keras_hub.models.DifferentialBinarizationBackbone") +class DifferentialBinarization(Backbone): + """ + A Keras model implementing the Differential Binarization + architecture for scene text detection, described in + [Real-time Scene Text Detection with Differentiable Binarization]( + https://arxiv.org/abs/1911.08947). + + Args: + image_encoder: A `keras_hub.models.ResNetBackbone` instance. + + """ + + def __init__( + self, + image_encoder, + fpn_channels=256, + **kwargs, + ): + inputs = image_encoder.input + x = image_encoder.pyramid_outputs + x = diffbin_fpn_model(x, out_channels=fpn_channels) + + super().__init__(inputs=inputs, outputs=x, **kwargs) + + self.image_encoder = image_encoder + self.fpn_channels = fpn_channels + + def get_config(self): + config = super().get_config() + config["fpn_channels"] = self.fpn_channels + config["image_encoder"] = self.image_encoder + return config + + +def diffbin_fpn_model(inputs, out_channels): + in2 = layers.Conv2D( + out_channels, kernel_size=1, use_bias=False, name="neck_in2" + )(inputs["P2"]) + in3 = layers.Conv2D( + out_channels, kernel_size=1, use_bias=False, name="neck_in3" + )(inputs["P3"]) + in4 = layers.Conv2D( + out_channels, kernel_size=1, use_bias=False, name="neck_in4" + )(inputs["P4"]) + in5 = layers.Conv2D( + out_channels, kernel_size=1, use_bias=False, name="neck_in5" + )(inputs["P5"]) + out4 = layers.Add(name="add1")([layers.UpSampling2D()(in5), in4]) + out3 = layers.Add(name="add2")([layers.UpSampling2D()(out4), in3]) + out2 = layers.Add(name="add3")([layers.UpSampling2D()(out3), in2]) + p5 = layers.Conv2D( + out_channels // 4, + kernel_size=3, + padding="same", + use_bias=False, + name="neck_p5", + )(in5) + p4 = layers.Conv2D( + out_channels // 4, + kernel_size=3, + padding="same", + use_bias=False, + name="neck_p4", + )(out4) + p3 = layers.Conv2D( + out_channels // 4, + kernel_size=3, + padding="same", + use_bias=False, + name="neck_p3", + )(out3) + p2 = layers.Conv2D( + out_channels // 4, + kernel_size=3, + padding="same", + use_bias=False, + name="neck_p2", + )(out2) + p5 = layers.UpSampling2D((8, 8))(p5) + p4 = layers.UpSampling2D((4, 4))(p4) + p3 = layers.UpSampling2D((2, 2))(p3) + + fused = layers.Concatenate(axis=-1)([p5, p4, p3, p2]) + return fused + From 6797231e2a2619e088bfc94ef34fd1c14caa7871 Mon Sep 17 00:00:00 2001 From: Gowtham Paimagam Date: Tue, 22 Oct 2024 21:53:12 +0200 Subject: [PATCH 06/20] More refactorings --- keras_hub/api/models/__init__.py | 6 ++++++ .../differential_binarization.py | 15 ++++++++++++--- .../differential_binarization_backbone.py | 10 +++++++--- .../differential_binarization_test.py | 4 ++-- .../differential_binarization/losses_test.py | 2 +- 5 files changed, 28 insertions(+), 9 deletions(-) diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py index e0e8773a35..2af2110aa1 100644 --- a/keras_hub/api/models/__init__.py +++ b/keras_hub/api/models/__init__.py @@ -101,6 +101,12 @@ from keras_hub.src.models.densenet.densenet_image_classifier_preprocessor import ( DenseNetImageClassifierPreprocessor, ) +from keras_hub.src.models.differential_binarization.differential_binarization import ( + DifferentialBinarization, +) +from keras_hub.src.models.differential_binarization.differential_binarization_backbone import ( + DifferentialBinarizationBackbone, +) from keras_hub.src.models.distil_bert.distil_bert_backbone import ( DistilBertBackbone, ) diff --git a/keras_hub/src/models/differential_binarization/differential_binarization.py b/keras_hub/src/models/differential_binarization/differential_binarization.py index f02ddef8b9..9fd68e260b 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization.py @@ -16,10 +16,10 @@ import keras from keras import layers -from keras_hub.src.api_export import keras_hub_export -from keras_hub.src.models.task import Task +from keras_hub.src.api_export import keras_hub_export from keras_hub.src.models.differential_binarization.losses import DBLoss +from keras_hub.src.models.task import Task @keras_hub_export("keras_hub.models.DifferentialBinarization") @@ -35,12 +35,19 @@ class DifferentialBinarization(Task): instance. head_kernel_list: list of ints. The number of filters for probability and threshold maps. Defaults to [3, 2, 2]. + step_function_k: float. `k` parameter used within the differential + binarization step function. + preprocessor: `None`, a `keras_hub.models.Preprocessor` instance, + a `keras.Layer` instance, or a callable. If `None` no preprocessing + will be applied to the inputs. Examples: ```python input_data = np.ones(shape=(8, 224, 224, 3)) - image_encoder = keras_hub.models.ResNetBackbone.from_preset("resnet50_vd") + image_encoder = keras_hub.models.ResNetBackbone.from_preset( + "resnet_vd_50_imagenet" + ) backbone = keras_hub.models.DifferentialBinarizationBackbone(image_encoder) detector = keras_hub.models.DifferentialBinarization( backbone=backbone @@ -85,6 +92,7 @@ def __init__( self.backbone = backbone self.head_kernel_list = head_kernel_list self.step_function_k = step_function_k + self.preprocessor = preprocessor def compile( self, @@ -135,6 +143,7 @@ def get_config(self): ) return config + def step_function(x, y, k): return 1.0 / (1.0 + keras.ops.exp(-k * (x - y))) diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py b/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py index fa0bf59c06..883a9dda34 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py @@ -13,21 +13,26 @@ # limitations under the License. from keras import layers + from keras_hub.src.api_export import keras_hub_export from keras_hub.src.models.backbone import Backbone @keras_hub_export("keras_hub.models.DifferentialBinarizationBackbone") -class DifferentialBinarization(Backbone): +class DifferentialBinarizationBackbone(Backbone): """ A Keras model implementing the Differential Binarization architecture for scene text detection, described in [Real-time Scene Text Detection with Differentiable Binarization]( https://arxiv.org/abs/1911.08947). + This class contains the backbone architecture containing the feature + pyramid network. + Args: image_encoder: A `keras_hub.models.ResNetBackbone` instance. - + fpn_channels: int. The number of channels to output by the feature + pyramid network. Defaults to 256. """ def __init__( @@ -102,4 +107,3 @@ def diffbin_fpn_model(inputs, out_channels): fused = layers.Concatenate(axis=-1)([p5, p4, p3, p2]) return fused - diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_test.py b/keras_hub/src/models/differential_binarization/differential_binarization_test.py index 810238251e..ee3fe3a54b 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_test.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_test.py @@ -14,12 +14,12 @@ import pytest from keras import ops -from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone -from keras_hub.src.tests.test_case import TestCase from keras_hub.src.models.differential_binarization.differential_binarization import ( DifferentialBinarization, ) +from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone +from keras_hub.src.tests.test_case import TestCase class DifferentialBinarizationTest(TestCase): diff --git a/keras_hub/src/models/differential_binarization/losses_test.py b/keras_hub/src/models/differential_binarization/losses_test.py index 058f8b4f81..44d9d9ec2f 100644 --- a/keras_hub/src/models/differential_binarization/losses_test.py +++ b/keras_hub/src/models/differential_binarization/losses_test.py @@ -13,11 +13,11 @@ # limitations under the License. import numpy as np -from keras_hub.src.tests.test_case import TestCase from keras_hub.src.models.differential_binarization.losses import DBLoss from keras_hub.src.models.differential_binarization.losses import DiceLoss from keras_hub.src.models.differential_binarization.losses import MaskL1Loss +from keras_hub.src.tests.test_case import TestCase class DiceLossTest(TestCase): From 4845b6a754b49cd54a3c8fc4bf4c373422359cf3 Mon Sep 17 00:00:00 2001 From: Gowtham Paimagam Date: Tue, 22 Oct 2024 22:12:17 +0200 Subject: [PATCH 07/20] Fix tests --- .../differential_binarization_backbone.py | 10 +++++++++- .../differential_binarization_test.py | 17 ++++++++++++----- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py b/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py index 883a9dda34..b0eab44190 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import keras from keras import layers from keras_hub.src.api_export import keras_hub_export @@ -53,9 +54,16 @@ def __init__( def get_config(self): config = super().get_config() config["fpn_channels"] = self.fpn_channels - config["image_encoder"] = self.image_encoder + config["image_encoder"] = keras.layers.serialize(self.image_encoder) return config + @classmethod + def from_config(cls, config): + config["image_encoder"] = keras.layers.deserialize( + config["image_encoder"] + ) + return cls(**config) + def diffbin_fpn_model(inputs, out_channels): in2 = layers.Conv2D( diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_test.py b/keras_hub/src/models/differential_binarization/differential_binarization_test.py index ee3fe3a54b..c32e26bcfa 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_test.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_test.py @@ -18,7 +18,13 @@ from keras_hub.src.models.differential_binarization.differential_binarization import ( DifferentialBinarization, ) +from keras_hub.src.models.differential_binarization.differential_binarization_backbone import ( + DifferentialBinarizationBackbone, +) from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone +from keras_hub.src.models.resnet.resnet_image_classifier_preprocessor import ( + ResNetImageClassifierPreprocessor, +) from keras_hub.src.tests.test_case import TestCase @@ -26,7 +32,7 @@ class DifferentialBinarizationTest(TestCase): def setUp(self): self.images = ops.ones((2, 224, 224, 3)) self.labels = ops.zeros((2, 224, 224, 4)) - self.backbone = ResNetBackbone( + image_encoder = ResNetBackbone( input_conv_filters=[64], input_conv_kernel_sizes=[7], stackwise_num_filters=[64, 128, 256, 512], @@ -34,17 +40,18 @@ def setUp(self): stackwise_num_strides=[1, 2, 2, 2], block_type="bottleneck_block", image_shape=(224, 224, 3), - include_rescaling=False, ) + self.backbone = DifferentialBinarizationBackbone( + image_encoder=image_encoder + ) + self.preprocessor = ResNetImageClassifierPreprocessor() self.init_kwargs = { "backbone": self.backbone, + "preprocessor": self.preprocessor, } self.train_data = (self.images, self.labels) def test_basics(self): - pytest.skip( - reason="TODO: enable after preprocessor flow is figured out" - ) self.run_task_test( cls=DifferentialBinarization, init_kwargs=self.init_kwargs, From 83edf9ad22ca84e26420e3a65c7a6bd7a4508439 Mon Sep 17 00:00:00 2001 From: Gowtham Paimagam Date: Tue, 29 Oct 2024 20:02:40 +0100 Subject: [PATCH 08/20] Add preprocessor and image converter --- .../differential_binarization.py | 11 +++++--- ...fferential_binarization_image_converter.py | 8 ++++++ .../differential_binarization_preprocessor.py | 26 +++++++++++++++++++ 3 files changed, 42 insertions(+), 3 deletions(-) create mode 100644 keras_hub/src/models/differential_binarization/differential_binarization_image_converter.py create mode 100644 keras_hub/src/models/differential_binarization/differential_binarization_preprocessor.py diff --git a/keras_hub/src/models/differential_binarization/differential_binarization.py b/keras_hub/src/models/differential_binarization/differential_binarization.py index 9fd68e260b..4c91693915 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization.py @@ -18,12 +18,14 @@ from keras import layers from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.models.image_segmenter import ImageSegmenter from keras_hub.src.models.differential_binarization.losses import DBLoss -from keras_hub.src.models.task import Task +from keras_hub.src.models.differential_binarization.differential_binarization_preprocessor import DifferentialBinarizationPreprocessor +from keras_hub.src.models.differential_binarization.differential_binarization_backbone import DifferentialBinarizationBackbone @keras_hub_export("keras_hub.models.DifferentialBinarization") -class DifferentialBinarization(Task): +class DifferentialBinarization(ImageSegmenter): """ A Keras model implementing the Differential Binarization architecture for scene text detection, described in @@ -57,12 +59,15 @@ class DifferentialBinarization(Task): ``` """ + backbone_cls = DifferentialBinarizationBackbone + preprocessor_cls = DifferentialBinarizationPreprocessor + def __init__( self, backbone, head_kernel_list=[3, 2, 2], step_function_k=50.0, - preprocessor=None, # adding this dummy arg for saved model test + preprocessor=None, **kwargs, ): diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_image_converter.py b/keras_hub/src/models/differential_binarization/differential_binarization_image_converter.py new file mode 100644 index 0000000000..53560dd385 --- /dev/null +++ b/keras_hub/src/models/differential_binarization/differential_binarization_image_converter.py @@ -0,0 +1,8 @@ +from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.layers.preprocessing.image_converter import ImageConverter +from keras_hub.src.models.differential_binarization.differential_binarization_backbone import DifferentialBinarizationBackbone + + +@keras_hub_export("keras_hub.layers.DifferentialBinarizationImageConverter") +class DifferentialBinarizationImageConverter(ImageConverter): + backbone_cls = DifferentialBinarizationBackbone diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_preprocessor.py b/keras_hub/src/models/differential_binarization/differential_binarization_preprocessor.py new file mode 100644 index 0000000000..57528ad801 --- /dev/null +++ b/keras_hub/src/models/differential_binarization/differential_binarization_preprocessor.py @@ -0,0 +1,26 @@ +# Copyright 2024 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.models.image_segmenter_preprocessor import ( + ImageSegmenterPreprocessor, +) +from keras_hub.src.models.differential_binarization.differential_binarization_backbone import DifferentialBinarizationBackbone +from keras_hub.src.models.differential_binarization.differential_binarization_image_converter import DifferentialBinarizationImageConverter + + +@keras_hub_export("keras_hub.models.DifferentialBinarizationPreprocessor") +class DifferentialBinarizationPreprocessor(ImageSegmenterPreprocessor): + backbone_cls = DifferentialBinarizationBackbone + image_converter_cls = DifferentialBinarizationImageConverter From f15b7b90822d44ce174e4de4ce9f0a37146bb3ed Mon Sep 17 00:00:00 2001 From: Gowtham Paimagam Date: Tue, 29 Oct 2024 21:19:21 +0100 Subject: [PATCH 09/20] Add presets --- .../differential_binarization_presets.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 keras_hub/src/models/differential_binarization/differential_binarization_presets.py diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_presets.py b/keras_hub/src/models/differential_binarization/differential_binarization_presets.py new file mode 100644 index 0000000000..c6b243ec34 --- /dev/null +++ b/keras_hub/src/models/differential_binarization/differential_binarization_presets.py @@ -0,0 +1,15 @@ +"""Differential Binarization preset configurations.""" + +backbone_presets = { + "diffbin_r50vd_icdar2015": { + "metadata": { + "description": ("Differential Binarization using 50-layer" + "ResNetVD trained on the ICDAR2015 dataset."), + "params": 25482722, + "official_name": "DifferentialBinarization", + "path": "differential_binarization", + "model_card": "https://arxiv.org/abs/1911.08947", + }, + "kaggle_handle": "", # TODO + } +} \ No newline at end of file From 392dbffcd39ab170260b0591d8a63edd35380cb4 Mon Sep 17 00:00:00 2001 From: Gowtham Paimagam Date: Tue, 29 Oct 2024 21:38:05 +0100 Subject: [PATCH 10/20] Run formatting script --- .../differential_binarization.py | 10 +++++++--- .../differential_binarization_image_converter.py | 4 +++- .../differential_binarization_preprocessor.py | 8 ++++++-- .../differential_binarization_presets.py | 10 ++++++---- .../differential_binarization_test.py | 8 ++++---- 5 files changed, 26 insertions(+), 14 deletions(-) diff --git a/keras_hub/src/models/differential_binarization/differential_binarization.py b/keras_hub/src/models/differential_binarization/differential_binarization.py index 4c91693915..4a0536e5a1 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization.py @@ -18,10 +18,14 @@ from keras import layers from keras_hub.src.api_export import keras_hub_export -from keras_hub.src.models.image_segmenter import ImageSegmenter +from keras_hub.src.models.differential_binarization.differential_binarization_backbone import ( + DifferentialBinarizationBackbone, +) +from keras_hub.src.models.differential_binarization.differential_binarization_preprocessor import ( + DifferentialBinarizationPreprocessor, +) from keras_hub.src.models.differential_binarization.losses import DBLoss -from keras_hub.src.models.differential_binarization.differential_binarization_preprocessor import DifferentialBinarizationPreprocessor -from keras_hub.src.models.differential_binarization.differential_binarization_backbone import DifferentialBinarizationBackbone +from keras_hub.src.models.image_segmenter import ImageSegmenter @keras_hub_export("keras_hub.models.DifferentialBinarization") diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_image_converter.py b/keras_hub/src/models/differential_binarization/differential_binarization_image_converter.py index 53560dd385..b0ff8adbc1 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_image_converter.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_image_converter.py @@ -1,6 +1,8 @@ from keras_hub.src.api_export import keras_hub_export from keras_hub.src.layers.preprocessing.image_converter import ImageConverter -from keras_hub.src.models.differential_binarization.differential_binarization_backbone import DifferentialBinarizationBackbone +from keras_hub.src.models.differential_binarization.differential_binarization_backbone import ( + DifferentialBinarizationBackbone, +) @keras_hub_export("keras_hub.layers.DifferentialBinarizationImageConverter") diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_preprocessor.py b/keras_hub/src/models/differential_binarization/differential_binarization_preprocessor.py index 57528ad801..3607420bb5 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_preprocessor.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_preprocessor.py @@ -13,11 +13,15 @@ # limitations under the License. from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.models.differential_binarization.differential_binarization_backbone import ( + DifferentialBinarizationBackbone, +) +from keras_hub.src.models.differential_binarization.differential_binarization_image_converter import ( + DifferentialBinarizationImageConverter, +) from keras_hub.src.models.image_segmenter_preprocessor import ( ImageSegmenterPreprocessor, ) -from keras_hub.src.models.differential_binarization.differential_binarization_backbone import DifferentialBinarizationBackbone -from keras_hub.src.models.differential_binarization.differential_binarization_image_converter import DifferentialBinarizationImageConverter @keras_hub_export("keras_hub.models.DifferentialBinarizationPreprocessor") diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_presets.py b/keras_hub/src/models/differential_binarization/differential_binarization_presets.py index c6b243ec34..4548f4cc6a 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_presets.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_presets.py @@ -3,13 +3,15 @@ backbone_presets = { "diffbin_r50vd_icdar2015": { "metadata": { - "description": ("Differential Binarization using 50-layer" - "ResNetVD trained on the ICDAR2015 dataset."), + "description": ( + "Differential Binarization using 50-layer" + "ResNetVD trained on the ICDAR2015 dataset." + ), "params": 25482722, "official_name": "DifferentialBinarization", "path": "differential_binarization", "model_card": "https://arxiv.org/abs/1911.08947", }, - "kaggle_handle": "", # TODO + "kaggle_handle": "", # TODO } -} \ No newline at end of file +} diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_test.py b/keras_hub/src/models/differential_binarization/differential_binarization_test.py index c32e26bcfa..c5cfeeba88 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_test.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_test.py @@ -21,10 +21,10 @@ from keras_hub.src.models.differential_binarization.differential_binarization_backbone import ( DifferentialBinarizationBackbone, ) -from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone -from keras_hub.src.models.resnet.resnet_image_classifier_preprocessor import ( - ResNetImageClassifierPreprocessor, +from keras_hub.src.models.differential_binarization.differential_binarization_preprocessor import ( + DifferentialBinarizationPreprocessor, ) +from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone from keras_hub.src.tests.test_case import TestCase @@ -44,7 +44,7 @@ def setUp(self): self.backbone = DifferentialBinarizationBackbone( image_encoder=image_encoder ) - self.preprocessor = ResNetImageClassifierPreprocessor() + self.preprocessor = DifferentialBinarizationPreprocessor() self.init_kwargs = { "backbone": self.backbone, "preprocessor": self.preprocessor, From db70eb53b60f1a2d7d0b13e1cd3f7984f180c8cc Mon Sep 17 00:00:00 2001 From: Gowtham Paimagam Date: Tue, 29 Oct 2024 22:05:19 +0100 Subject: [PATCH 11/20] Impl additional tests --- .../differential_binarization/__init__.py | 6 ++ ...differential_binarization_backbone_test.py | 55 +++++++++++++++++++ .../differential_binarization_test.py | 16 ++++++ 3 files changed, 77 insertions(+) create mode 100644 keras_hub/src/models/differential_binarization/differential_binarization_backbone_test.py diff --git a/keras_hub/src/models/differential_binarization/__init__.py b/keras_hub/src/models/differential_binarization/__init__.py index 3364a6bd16..200fcc8a3d 100644 --- a/keras_hub/src/models/differential_binarization/__init__.py +++ b/keras_hub/src/models/differential_binarization/__init__.py @@ -11,3 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from keras_hub.src.models.differential_binarization.differential_binarization_backbone import DifferentialBinarizationBackbone +from keras_hub.src.models.differential_binarization.differential_binarization_presets import backbone_presets +from keras_hub.src.utils.preset_utils import register_presets + +register_presets(backbone_presets, DifferentialBinarizationBackbone) \ No newline at end of file diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_backbone_test.py b/keras_hub/src/models/differential_binarization/differential_binarization_backbone_test.py new file mode 100644 index 0000000000..1178c00e8a --- /dev/null +++ b/keras_hub/src/models/differential_binarization/differential_binarization_backbone_test.py @@ -0,0 +1,55 @@ +# Copyright 2024 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from keras import ops + +from keras_hub.src.models.differential_binarization.differential_binarization_backbone import ( + DifferentialBinarizationBackbone, +) +from keras_hub.src.models.differential_binarization.differential_binarization_preprocessor import DifferentialBinarizationPreprocessor +from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone +from keras_hub.src.tests.test_case import TestCase + + +class DifferentialBinarizationTest(TestCase): + def setUp(self): + self.batch_size = 2 + self.image_size = 16 + self.images = ops.ones((2, 224, 224, 3)) + self.image_encoder = ResNetBackbone( + input_conv_filters=[64], + input_conv_kernel_sizes=[7], + stackwise_num_filters=[64, 128, 256, 512], + stackwise_num_blocks=[3, 4, 6, 3], + stackwise_num_strides=[1, 2, 2, 2], + block_type="bottleneck_block", + image_shape=(224, 224, 3), + ) + self.preprocessor = DifferentialBinarizationPreprocessor() + self.init_kwargs = { + "image_encoder": self.image_encoder, + "backbone": self.backbone, + "preprocessor": self.preprocessor, + } + + def test_backbone_basics(self): + self.run_backbone_test( + cls=DifferentialBinarizationBackbone, + init_kwargs=self.init_kwargs, + input_data = self.images, + expected_output_shape=(2, 56, 56, 256,), + run_mixed_precision_check=False, + run_quantization_check=False, + ) diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_test.py b/keras_hub/src/models/differential_binarization/differential_binarization_test.py index c5cfeeba88..0c4124f8c7 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_test.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_test.py @@ -66,3 +66,19 @@ def test_saved_model(self): init_kwargs=self.init_kwargs, input_data=self.images, ) + + def test_end_to_end_model_predict(self): + model = DifferentialBinarization(**self.init_kwargs) + outputs = model.predict(self.images) + self.assertAllEqual(outputs.shape, (2, 224, 224, 3)) + + @pytest.mark.skip(reason="disabled until preset's been uploaded to Kaggle") + @pytest.mark.extra_large + def test_all_presets(self): + for preset in DifferentialBinarization.presets: + self.run_preset_test( + cls=DifferentialBinarization, + preset=preset, + input_data=self.images, + expected_output_shape=(2, 224, 224, 3) + ) \ No newline at end of file From 18fcbfb2023a28efe1cb222f49164a2ca3ddd0f6 Mon Sep 17 00:00:00 2001 From: Gowtham Paimagam Date: Tue, 29 Oct 2024 22:07:04 +0100 Subject: [PATCH 12/20] Fixed formatting --- .../models/differential_binarization/__init__.py | 10 +++++++--- .../differential_binarization_backbone_test.py | 14 ++++++++++---- .../differential_binarization_test.py | 4 ++-- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/keras_hub/src/models/differential_binarization/__init__.py b/keras_hub/src/models/differential_binarization/__init__.py index 200fcc8a3d..94a90f3c6d 100644 --- a/keras_hub/src/models/differential_binarization/__init__.py +++ b/keras_hub/src/models/differential_binarization/__init__.py @@ -12,8 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from keras_hub.src.models.differential_binarization.differential_binarization_backbone import DifferentialBinarizationBackbone -from keras_hub.src.models.differential_binarization.differential_binarization_presets import backbone_presets +from keras_hub.src.models.differential_binarization.differential_binarization_backbone import ( + DifferentialBinarizationBackbone, +) +from keras_hub.src.models.differential_binarization.differential_binarization_presets import ( + backbone_presets, +) from keras_hub.src.utils.preset_utils import register_presets -register_presets(backbone_presets, DifferentialBinarizationBackbone) \ No newline at end of file +register_presets(backbone_presets, DifferentialBinarizationBackbone) diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_backbone_test.py b/keras_hub/src/models/differential_binarization/differential_binarization_backbone_test.py index 1178c00e8a..ff65adcd48 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_backbone_test.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_backbone_test.py @@ -12,13 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest from keras import ops from keras_hub.src.models.differential_binarization.differential_binarization_backbone import ( DifferentialBinarizationBackbone, ) -from keras_hub.src.models.differential_binarization.differential_binarization_preprocessor import DifferentialBinarizationPreprocessor +from keras_hub.src.models.differential_binarization.differential_binarization_preprocessor import ( + DifferentialBinarizationPreprocessor, +) from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone from keras_hub.src.tests.test_case import TestCase @@ -48,8 +49,13 @@ def test_backbone_basics(self): self.run_backbone_test( cls=DifferentialBinarizationBackbone, init_kwargs=self.init_kwargs, - input_data = self.images, - expected_output_shape=(2, 56, 56, 256,), + input_data=self.images, + expected_output_shape=( + 2, + 56, + 56, + 256, + ), run_mixed_precision_check=False, run_quantization_check=False, ) diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_test.py b/keras_hub/src/models/differential_binarization/differential_binarization_test.py index 0c4124f8c7..39b32ecd12 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_test.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_test.py @@ -80,5 +80,5 @@ def test_all_presets(self): cls=DifferentialBinarization, preset=preset, input_data=self.images, - expected_output_shape=(2, 224, 224, 3) - ) \ No newline at end of file + expected_output_shape=(2, 224, 224, 3), + ) From 898235db76e6eebc088a1f0c5664b0971d6dcd25 Mon Sep 17 00:00:00 2001 From: Gowtham Paimagam Date: Tue, 29 Oct 2024 22:09:53 +0100 Subject: [PATCH 13/20] Removed copyright statements --- .../models/differential_binarization/__init__.py | 14 -------------- .../differential_binarization.py | 14 -------------- .../differential_binarization_backbone.py | 14 -------------- .../differential_binarization_backbone_test.py | 14 -------------- .../differential_binarization_preprocessor.py | 14 -------------- .../differential_binarization_test.py | 14 -------------- .../models/differential_binarization/losses.py | 15 --------------- .../differential_binarization/losses_test.py | 14 -------------- 8 files changed, 113 deletions(-) diff --git a/keras_hub/src/models/differential_binarization/__init__.py b/keras_hub/src/models/differential_binarization/__init__.py index 94a90f3c6d..1bcdbaed54 100644 --- a/keras_hub/src/models/differential_binarization/__init__.py +++ b/keras_hub/src/models/differential_binarization/__init__.py @@ -1,17 +1,3 @@ -# Copyright 2024 The KerasNLP Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - from keras_hub.src.models.differential_binarization.differential_binarization_backbone import ( DifferentialBinarizationBackbone, ) diff --git a/keras_hub/src/models/differential_binarization/differential_binarization.py b/keras_hub/src/models/differential_binarization/differential_binarization.py index 4a0536e5a1..50a18d8d37 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization.py @@ -1,17 +1,3 @@ -# Copyright 2024 The KerasNLP Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - import math import keras diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py b/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py index b0eab44190..1512924d04 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py @@ -1,17 +1,3 @@ -# Copyright 2024 The KerasNLP Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - import keras from keras import layers diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_backbone_test.py b/keras_hub/src/models/differential_binarization/differential_binarization_backbone_test.py index ff65adcd48..fdbb9d8687 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_backbone_test.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_backbone_test.py @@ -1,17 +1,3 @@ -# Copyright 2024 The KerasNLP Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - from keras import ops from keras_hub.src.models.differential_binarization.differential_binarization_backbone import ( diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_preprocessor.py b/keras_hub/src/models/differential_binarization/differential_binarization_preprocessor.py index 3607420bb5..55ae05cf09 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_preprocessor.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_preprocessor.py @@ -1,17 +1,3 @@ -# Copyright 2024 The KerasNLP Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - from keras_hub.src.api_export import keras_hub_export from keras_hub.src.models.differential_binarization.differential_binarization_backbone import ( DifferentialBinarizationBackbone, diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_test.py b/keras_hub/src/models/differential_binarization/differential_binarization_test.py index 39b32ecd12..c1a3c15049 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_test.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_test.py @@ -1,17 +1,3 @@ -# Copyright 2024 The KerasNLP Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - import pytest from keras import ops diff --git a/keras_hub/src/models/differential_binarization/losses.py b/keras_hub/src/models/differential_binarization/losses.py index df349afb55..77dc9702f2 100644 --- a/keras_hub/src/models/differential_binarization/losses.py +++ b/keras_hub/src/models/differential_binarization/losses.py @@ -1,21 +1,6 @@ -# Copyright 2024 The KerasNLP Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - import keras from keras import ops - class DiceLoss: def __init__(self, eps=1e-6, **kwargs): self.eps = eps diff --git a/keras_hub/src/models/differential_binarization/losses_test.py b/keras_hub/src/models/differential_binarization/losses_test.py index 44d9d9ec2f..56b518d2b6 100644 --- a/keras_hub/src/models/differential_binarization/losses_test.py +++ b/keras_hub/src/models/differential_binarization/losses_test.py @@ -1,17 +1,3 @@ -# Copyright 2024 The KerasNLP Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - import numpy as np from keras_hub.src.models.differential_binarization.losses import DBLoss From eaec8689c450e0653a68df703abf5871055da6cf Mon Sep 17 00:00:00 2001 From: Gowtham Paimagam Date: Tue, 29 Oct 2024 22:17:16 +0100 Subject: [PATCH 14/20] Fix tests, run `api_gen.sh` --- keras_hub/api/layers/__init__.py | 3 +++ keras_hub/api/models/__init__.py | 3 +++ .../differential_binarization_backbone_test.py | 2 -- keras_hub/src/models/differential_binarization/losses.py | 1 + 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/keras_hub/api/layers/__init__.py b/keras_hub/api/layers/__init__.py index 78a26075d1..fd842d8738 100644 --- a/keras_hub/api/layers/__init__.py +++ b/keras_hub/api/layers/__init__.py @@ -40,6 +40,9 @@ from keras_hub.src.models.densenet.densenet_image_converter import ( DenseNetImageConverter, ) +from keras_hub.src.models.differential_binarization.differential_binarization_image_converter import ( + DifferentialBinarizationImageConverter, +) from keras_hub.src.models.mit.mit_image_converter import MiTImageConverter from keras_hub.src.models.pali_gemma.pali_gemma_image_converter import ( PaliGemmaImageConverter, diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py index 2af2110aa1..be3f38669c 100644 --- a/keras_hub/api/models/__init__.py +++ b/keras_hub/api/models/__init__.py @@ -107,6 +107,9 @@ from keras_hub.src.models.differential_binarization.differential_binarization_backbone import ( DifferentialBinarizationBackbone, ) +from keras_hub.src.models.differential_binarization.differential_binarization_preprocessor import ( + DifferentialBinarizationPreprocessor, +) from keras_hub.src.models.distil_bert.distil_bert_backbone import ( DistilBertBackbone, ) diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_backbone_test.py b/keras_hub/src/models/differential_binarization/differential_binarization_backbone_test.py index fdbb9d8687..9eb6ffcf3c 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_backbone_test.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_backbone_test.py @@ -27,8 +27,6 @@ def setUp(self): self.preprocessor = DifferentialBinarizationPreprocessor() self.init_kwargs = { "image_encoder": self.image_encoder, - "backbone": self.backbone, - "preprocessor": self.preprocessor, } def test_backbone_basics(self): diff --git a/keras_hub/src/models/differential_binarization/losses.py b/keras_hub/src/models/differential_binarization/losses.py index 77dc9702f2..31f8509455 100644 --- a/keras_hub/src/models/differential_binarization/losses.py +++ b/keras_hub/src/models/differential_binarization/losses.py @@ -1,6 +1,7 @@ import keras from keras import ops + class DiceLoss: def __init__(self, eps=1e-6, **kwargs): self.eps = eps From 9fb6e6593f95b44deb8792b34796d7b7e5a99346 Mon Sep 17 00:00:00 2001 From: Gowtham Paimagam Date: Mon, 11 Nov 2024 20:38:21 +0100 Subject: [PATCH 15/20] Addressed comments --- keras_hub/api/models/__init__.py | 6 +- .../differential_binarization.py | 189 ------------------ .../differential_binarization_backbone.py | 122 +++++++++-- ...differential_binarization_backbone_test.py | 23 +-- .../differential_binarization_ocr.py | 113 +++++++++++ ... => differential_binarization_ocr_test.py} | 34 ++-- 6 files changed, 252 insertions(+), 235 deletions(-) delete mode 100644 keras_hub/src/models/differential_binarization/differential_binarization.py create mode 100644 keras_hub/src/models/differential_binarization/differential_binarization_ocr.py rename keras_hub/src/models/differential_binarization/{differential_binarization_test.py => differential_binarization_ocr_test.py} (70%) diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py index be3f38669c..df06b3e2c2 100644 --- a/keras_hub/api/models/__init__.py +++ b/keras_hub/api/models/__init__.py @@ -101,12 +101,12 @@ from keras_hub.src.models.densenet.densenet_image_classifier_preprocessor import ( DenseNetImageClassifierPreprocessor, ) -from keras_hub.src.models.differential_binarization.differential_binarization import ( - DifferentialBinarization, -) from keras_hub.src.models.differential_binarization.differential_binarization_backbone import ( DifferentialBinarizationBackbone, ) +from keras_hub.src.models.differential_binarization.differential_binarization_ocr import ( + DifferentialBinarizationOCR, +) from keras_hub.src.models.differential_binarization.differential_binarization_preprocessor import ( DifferentialBinarizationPreprocessor, ) diff --git a/keras_hub/src/models/differential_binarization/differential_binarization.py b/keras_hub/src/models/differential_binarization/differential_binarization.py deleted file mode 100644 index 50a18d8d37..0000000000 --- a/keras_hub/src/models/differential_binarization/differential_binarization.py +++ /dev/null @@ -1,189 +0,0 @@ -import math - -import keras -from keras import layers - -from keras_hub.src.api_export import keras_hub_export -from keras_hub.src.models.differential_binarization.differential_binarization_backbone import ( - DifferentialBinarizationBackbone, -) -from keras_hub.src.models.differential_binarization.differential_binarization_preprocessor import ( - DifferentialBinarizationPreprocessor, -) -from keras_hub.src.models.differential_binarization.losses import DBLoss -from keras_hub.src.models.image_segmenter import ImageSegmenter - - -@keras_hub_export("keras_hub.models.DifferentialBinarization") -class DifferentialBinarization(ImageSegmenter): - """ - A Keras model implementing the Differential Binarization - architecture for scene text detection, described in - [Real-time Scene Text Detection with Differentiable Binarization]( - https://arxiv.org/abs/1911.08947). - - Args: - backbone: A `keras_hub.models.DifferentialBinarizationBackbone` - instance. - head_kernel_list: list of ints. The number of filters for probability - and threshold maps. Defaults to [3, 2, 2]. - step_function_k: float. `k` parameter used within the differential - binarization step function. - preprocessor: `None`, a `keras_hub.models.Preprocessor` instance, - a `keras.Layer` instance, or a callable. If `None` no preprocessing - will be applied to the inputs. - - Examples: - ```python - input_data = np.ones(shape=(8, 224, 224, 3)) - - image_encoder = keras_hub.models.ResNetBackbone.from_preset( - "resnet_vd_50_imagenet" - ) - backbone = keras_hub.models.DifferentialBinarizationBackbone(image_encoder) - detector = keras_hub.models.DifferentialBinarization( - backbone=backbone - ) - - detector(input_data) - ``` - """ - - backbone_cls = DifferentialBinarizationBackbone - preprocessor_cls = DifferentialBinarizationPreprocessor - - def __init__( - self, - backbone, - head_kernel_list=[3, 2, 2], - step_function_k=50.0, - preprocessor=None, - **kwargs, - ): - - inputs = backbone.input - x = backbone(inputs) - probability_maps = diffbin_head( - x, - in_channels=backbone.fpn_channels, - kernel_list=head_kernel_list, - name="head_prob", - ) - threshold_maps = diffbin_head( - x, - in_channels=backbone.fpn_channels, - kernel_list=head_kernel_list, - name="head_thresh", - ) - binary_maps = step_function( - probability_maps, threshold_maps, k=step_function_k - ) - outputs = layers.Concatenate(axis=-1)( - [probability_maps, threshold_maps, binary_maps] - ) - - super().__init__(inputs=inputs, outputs=outputs, **kwargs) - - self.backbone = backbone - self.head_kernel_list = head_kernel_list - self.step_function_k = step_function_k - self.preprocessor = preprocessor - - def compile( - self, - optimizer="auto", - loss="auto", - **kwargs, - ): - """Configures the `DifferentialBinarization` task for training. - - `DifferentialBinarization` extends the default compilation signature of - `keras.Model.compile` with defaults for `optimizer` and `loss`. To - override these defaults, pass any value to these arguments during - compilation. - - Args: - optimizer: `"auto"`, an optimizer name, or a `keras.Optimizer` - instance. Defaults to `"auto"`, which uses the default optimizer - for `DifferentialBinarization`. See `keras.Model.compile` and - `keras.optimizers` for more info on possible `optimizer` values. - loss: `"auto"`, a loss name, or a `keras.losses.Loss` instance. - Defaults to `"auto"`, in which case the default loss - computation of `DifferentialBinarization` will be applied. See - `keras.Model.compile` and `keras.losses` for more info on - possible `loss` values. - **kwargs: See `keras.Model.compile` for a full list of arguments - supported by the compile method. - """ - if optimizer == "auto": - optimizer = keras.optimizers.SGD( - learning_rate=0.007, weight_decay=0.0001, momentum=0.9 - ) - if loss == "auto": - loss = DBLoss() - super().compile( - optimizer=optimizer, - loss=loss, - **kwargs, - ) - - def get_config(self): - # Backbone serialized in `super` - config = super().get_config() - config.update( - { - "head_kernel_list": self.head_kernel_list, - "step_function_k": self.step_function_k, - } - ) - return config - - -def step_function(x, y, k): - return 1.0 / (1.0 + keras.ops.exp(-k * (x - y))) - - -def diffbin_head(inputs, in_channels, kernel_list, name): - x = layers.Conv2D( - in_channels // 4, - kernel_size=kernel_list[0], - padding="same", - use_bias=False, - name=f"{name}_conv0_weights", - )(inputs) - x = layers.BatchNormalization( - beta_initializer=keras.initializers.Constant(1e-4), - gamma_initializer=keras.initializers.Constant(1.0), - name=f"{name}_conv0_bn", - )(x) - x = layers.ReLU(name=f"{name}_conv0_relu")(x) - x = layers.Conv2DTranspose( - in_channels // 4, - kernel_size=kernel_list[1], - strides=2, - padding="valid", - bias_initializer=keras.initializers.RandomUniform( - minval=-1.0 / math.sqrt(in_channels // 4 * 1.0), - maxval=1.0 / math.sqrt(in_channels // 4 * 1.0), - ), - name=f"{name}_conv1_weights", - )(x) - x = layers.BatchNormalization( - beta_initializer=keras.initializers.Constant(1e-4), - gamma_initializer=keras.initializers.Constant(1.0), - name=f"{name}_conv1_bn", - )(x) - x = layers.ReLU(name=f"{name}_conv1_relu")(x) - x = layers.Conv2DTranspose( - 1, - kernel_size=kernel_list[2], - strides=2, - padding="valid", - activation="sigmoid", - bias_initializer=keras.initializers.RandomUniform( - minval=-1.0 / math.sqrt(in_channels // 4 * 1.0), - maxval=1.0 / math.sqrt(in_channels // 4 * 1.0), - ), - name=f"{name}_conv2_weights", - )(x) - return x diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py b/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py index 1512924d04..d91f4d473f 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py @@ -26,20 +26,44 @@ def __init__( self, image_encoder, fpn_channels=256, + head_kernel_list=[3, 2, 2], + dtype=None, **kwargs, ): + # === Functional Model === inputs = image_encoder.input x = image_encoder.pyramid_outputs - x = diffbin_fpn_model(x, out_channels=fpn_channels) + x = diffbin_fpn_model(x, out_channels=fpn_channels, dtype=dtype) - super().__init__(inputs=inputs, outputs=x, **kwargs) + probability_maps = diffbin_head( + x, + in_channels=fpn_channels, + kernel_list=head_kernel_list, + name="head_prob", + ) + threshold_maps = diffbin_head( + x, + in_channels=fpn_channels, + kernel_list=head_kernel_list, + name="head_thresh", + ) + + outputs = { + "probability_maps": probability_maps, + "threshold_maps": threshold_maps, + } + super().__init__(inputs=inputs, outputs=outputs, dtype=dtype, **kwargs) + + # === Config === self.image_encoder = image_encoder self.fpn_channels = fpn_channels + self.head_kernel_list = head_kernel_list def get_config(self): config = super().get_config() config["fpn_channels"] = self.fpn_channels + config["head_kernel_list"] = self.head_kernel_list config["image_encoder"] = keras.layers.serialize(self.image_encoder) return config @@ -51,28 +75,49 @@ def from_config(cls, config): return cls(**config) -def diffbin_fpn_model(inputs, out_channels): +def diffbin_fpn_model(inputs, out_channels, dtype=None): in2 = layers.Conv2D( - out_channels, kernel_size=1, use_bias=False, name="neck_in2" + out_channels, + kernel_size=1, + use_bias=False, + name="neck_in2", + dtype=dtype, )(inputs["P2"]) in3 = layers.Conv2D( - out_channels, kernel_size=1, use_bias=False, name="neck_in3" + out_channels, + kernel_size=1, + use_bias=False, + name="neck_in3", + dtype=dtype, )(inputs["P3"]) in4 = layers.Conv2D( - out_channels, kernel_size=1, use_bias=False, name="neck_in4" + out_channels, + kernel_size=1, + use_bias=False, + name="neck_in4", + dtype=dtype, )(inputs["P4"]) in5 = layers.Conv2D( - out_channels, kernel_size=1, use_bias=False, name="neck_in5" + out_channels, + kernel_size=1, + use_bias=False, + name="neck_in5", + dtype=dtype, )(inputs["P5"]) - out4 = layers.Add(name="add1")([layers.UpSampling2D()(in5), in4]) - out3 = layers.Add(name="add2")([layers.UpSampling2D()(out4), in3]) - out2 = layers.Add(name="add3")([layers.UpSampling2D()(out3), in2]) + out4 = layers.Add(name="add1")([layers.UpSampling2D(dtype=dtype)(in5), in4]) + out3 = layers.Add(name="add2")( + [layers.UpSampling2D(dtype=dtype)(out4), in3] + ) + out2 = layers.Add(name="add3")( + [layers.UpSampling2D(dtype=dtype)(out3), in2] + ) p5 = layers.Conv2D( out_channels // 4, kernel_size=3, padding="same", use_bias=False, name="neck_p5", + dtype=dtype, )(in5) p4 = layers.Conv2D( out_channels // 4, @@ -80,6 +125,7 @@ def diffbin_fpn_model(inputs, out_channels): padding="same", use_bias=False, name="neck_p4", + dtype=dtype, )(out4) p3 = layers.Conv2D( out_channels // 4, @@ -87,6 +133,7 @@ def diffbin_fpn_model(inputs, out_channels): padding="same", use_bias=False, name="neck_p3", + dtype=dtype, )(out3) p2 = layers.Conv2D( out_channels // 4, @@ -94,10 +141,57 @@ def diffbin_fpn_model(inputs, out_channels): padding="same", use_bias=False, name="neck_p2", + dtype=dtype, )(out2) - p5 = layers.UpSampling2D((8, 8))(p5) - p4 = layers.UpSampling2D((4, 4))(p4) - p3 = layers.UpSampling2D((2, 2))(p3) + p5 = layers.UpSampling2D((8, 8), dtype=dtype)(p5) + p4 = layers.UpSampling2D((4, 4), dtype=dtype)(p4) + p3 = layers.UpSampling2D((2, 2), dtype=dtype)(p3) - fused = layers.Concatenate(axis=-1)([p5, p4, p3, p2]) + fused = layers.Concatenate(axis=-1, dtype=dtype)([p5, p4, p3, p2]) return fused + + +def diffbin_head(inputs, in_channels, kernel_list, name): + x = layers.Conv2D( + in_channels // 4, + kernel_size=kernel_list[0], + padding="same", + use_bias=False, + name=f"{name}_conv0_weights", + )(inputs) + x = layers.BatchNormalization( + beta_initializer=keras.initializers.Constant(1e-4), + gamma_initializer=keras.initializers.Constant(1.0), + name=f"{name}_conv0_bn", + )(x) + x = layers.ReLU(name=f"{name}_conv0_relu")(x) + x = layers.Conv2DTranspose( + in_channels // 4, + kernel_size=kernel_list[1], + strides=2, + padding="valid", + bias_initializer=keras.initializers.RandomUniform( + minval=-1.0 / (in_channels // 4 * 1.0) ** 0.5, + maxval=1.0 / (in_channels // 4 * 1.0) ** 0.5, + ), + name=f"{name}_conv1_weights", + )(x) + x = layers.BatchNormalization( + beta_initializer=keras.initializers.Constant(1e-4), + gamma_initializer=keras.initializers.Constant(1.0), + name=f"{name}_conv1_bn", + )(x) + x = layers.ReLU(name=f"{name}_conv1_relu")(x) + x = layers.Conv2DTranspose( + 1, + kernel_size=kernel_list[2], + strides=2, + padding="valid", + activation="sigmoid", + bias_initializer=keras.initializers.RandomUniform( + minval=-1.0 / (in_channels // 4 * 1.0) ** 0.5, + maxval=1.0 / (in_channels // 4 * 1.0) ** 0.5, + ), + name=f"{name}_conv2_weights", + )(x) + return x diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_backbone_test.py b/keras_hub/src/models/differential_binarization/differential_binarization_backbone_test.py index 9eb6ffcf3c..86da0aa6ce 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_backbone_test.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_backbone_test.py @@ -12,34 +12,33 @@ class DifferentialBinarizationTest(TestCase): def setUp(self): - self.batch_size = 2 - self.image_size = 16 - self.images = ops.ones((2, 224, 224, 3)) + self.images = ops.ones((2, 32, 32, 3)) self.image_encoder = ResNetBackbone( - input_conv_filters=[64], + input_conv_filters=[4], input_conv_kernel_sizes=[7], - stackwise_num_filters=[64, 128, 256, 512], + stackwise_num_filters=[64, 4, 4, 4], stackwise_num_blocks=[3, 4, 6, 3], stackwise_num_strides=[1, 2, 2, 2], block_type="bottleneck_block", - image_shape=(224, 224, 3), + image_shape=(32, 32, 3), ) self.preprocessor = DifferentialBinarizationPreprocessor() self.init_kwargs = { "image_encoder": self.image_encoder, + "fpn_channels": 16, + "head_kernel_list": [3, 2, 2], } def test_backbone_basics(self): + expected_output_shape = { + "probability_maps": (2, 32, 32, 1), + "threshold_maps": (2, 32, 32, 1), + } self.run_backbone_test( cls=DifferentialBinarizationBackbone, init_kwargs=self.init_kwargs, input_data=self.images, - expected_output_shape=( - 2, - 56, - 56, - 256, - ), + expected_output_shape=expected_output_shape, run_mixed_precision_check=False, run_quantization_check=False, ) diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_ocr.py b/keras_hub/src/models/differential_binarization/differential_binarization_ocr.py new file mode 100644 index 0000000000..7239633d36 --- /dev/null +++ b/keras_hub/src/models/differential_binarization/differential_binarization_ocr.py @@ -0,0 +1,113 @@ +import keras +from keras import layers + +from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.models.differential_binarization.differential_binarization_backbone import ( + DifferentialBinarizationBackbone, +) +from keras_hub.src.models.differential_binarization.differential_binarization_preprocessor import ( + DifferentialBinarizationPreprocessor, +) +from keras_hub.src.models.differential_binarization.losses import DBLoss +from keras_hub.src.models.image_segmenter import ImageSegmenter + + +@keras_hub_export("keras_hub.models.DifferentialBinarizationOCR") +class DifferentialBinarizationOCR(ImageSegmenter): + """ + A Keras model implementing the Differential Binarization + architecture for scene text detection, described in + [Real-time Scene Text Detection with Differentiable Binarization]( + https://arxiv.org/abs/1911.08947). + + Args: + backbone: A `keras_hub.models.DifferentialBinarizationBackbone` + instance. + preprocessor: `None`, a `keras_hub.models.Preprocessor` instance, + a `keras.Layer` instance, or a callable. If `None` no preprocessing + will be applied to the inputs. + + Examples: + ```python + input_data = np.ones(shape=(8, 224, 224, 3)) + + image_encoder = keras_hub.models.ResNetBackbone.from_preset( + "resnet_vd_50_imagenet" + ) + backbone = keras_hub.models.DifferentialBinarizationBackbone(image_encoder) + detector = keras_hub.models.DifferentialBinarizationOCR( + backbone=backbone + ) + + detector(input_data) + ``` + """ + + backbone_cls = DifferentialBinarizationBackbone + preprocessor_cls = DifferentialBinarizationPreprocessor + + def __init__( + self, + backbone, + preprocessor=None, + **kwargs, + ): + + # === Functional Model === + inputs = backbone.input + x = backbone(inputs) + probability_maps = x["probability_maps"] + threshold_maps = x["threshold_maps"] + binary_maps = step_function(probability_maps, threshold_maps) + outputs = layers.Concatenate(axis=-1)( + [probability_maps, threshold_maps, binary_maps] + ) + + super().__init__(inputs=inputs, outputs=outputs, **kwargs) + + # === Config === + self.backbone = backbone + self.preprocessor = preprocessor + + def compile( + self, + optimizer="auto", + loss="auto", + **kwargs, + ): + """Configures the `DifferentialBinarizationOCR` task for training. + + `DifferentialBinarizationOCR` extends the default compilation signature + of `keras.Model.compile` with defaults for `optimizer` and `loss`. To + override these defaults, pass any value to these arguments during + compilation. + + Args: + optimizer: `"auto"`, an optimizer name, or a `keras.Optimizer` + instance. Defaults to `"auto"`, which uses the default + optimizer for `DifferentialBinarizationOCR`. See + `keras.Model.compile` and `keras.optimizers` for more info on + possible `optimizer` values. + loss: `"auto"`, a loss name, or a `keras.losses.Loss` instance. + Defaults to `"auto"`, in which case the default loss + computation of `DifferentialBinarizationOCR` will be applied. + See `keras.Model.compile` and `keras.losses` for more info on + possible `loss` values. + **kwargs: See `keras.Model.compile` for a full list of arguments + supported by the compile method. + """ + if optimizer == "auto": + optimizer = keras.optimizers.SGD( + learning_rate=0.007, weight_decay=0.0001, momentum=0.9 + ) + if loss == "auto": + loss = DBLoss() + super().compile( + optimizer=optimizer, + loss=loss, + **kwargs, + ) + + +def step_function(x, y, k=50.0): + return 1.0 / (1.0 + keras.ops.exp(-k * (x - y))) diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_test.py b/keras_hub/src/models/differential_binarization/differential_binarization_ocr_test.py similarity index 70% rename from keras_hub/src/models/differential_binarization/differential_binarization_test.py rename to keras_hub/src/models/differential_binarization/differential_binarization_ocr_test.py index c1a3c15049..4d0e10fc67 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_test.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_ocr_test.py @@ -1,12 +1,12 @@ import pytest from keras import ops -from keras_hub.src.models.differential_binarization.differential_binarization import ( - DifferentialBinarization, -) from keras_hub.src.models.differential_binarization.differential_binarization_backbone import ( DifferentialBinarizationBackbone, ) +from keras_hub.src.models.differential_binarization.differential_binarization_ocr import ( + DifferentialBinarizationOCR, +) from keras_hub.src.models.differential_binarization.differential_binarization_preprocessor import ( DifferentialBinarizationPreprocessor, ) @@ -14,18 +14,18 @@ from keras_hub.src.tests.test_case import TestCase -class DifferentialBinarizationTest(TestCase): +class DifferentialBinarizationOCRTest(TestCase): def setUp(self): - self.images = ops.ones((2, 224, 224, 3)) - self.labels = ops.zeros((2, 224, 224, 4)) + self.images = ops.ones((2, 32, 32, 3)) + self.labels = ops.zeros((2, 32, 32, 4)) image_encoder = ResNetBackbone( - input_conv_filters=[64], + input_conv_filters=[4], input_conv_kernel_sizes=[7], - stackwise_num_filters=[64, 128, 256, 512], + stackwise_num_filters=[64, 4, 4, 4], stackwise_num_blocks=[3, 4, 6, 3], stackwise_num_strides=[1, 2, 2, 2], block_type="bottleneck_block", - image_shape=(224, 224, 3), + image_shape=(32, 32, 3), ) self.backbone = DifferentialBinarizationBackbone( image_encoder=image_encoder @@ -39,32 +39,32 @@ def setUp(self): def test_basics(self): self.run_task_test( - cls=DifferentialBinarization, + cls=DifferentialBinarizationOCR, init_kwargs=self.init_kwargs, train_data=self.train_data, - expected_output_shape=(2, 224, 224, 3), + expected_output_shape=(2, 32, 32, 3), ) @pytest.mark.large def test_saved_model(self): self.run_model_saving_test( - cls=DifferentialBinarization, + cls=DifferentialBinarizationOCR, init_kwargs=self.init_kwargs, input_data=self.images, ) def test_end_to_end_model_predict(self): - model = DifferentialBinarization(**self.init_kwargs) + model = DifferentialBinarizationOCR(**self.init_kwargs) outputs = model.predict(self.images) - self.assertAllEqual(outputs.shape, (2, 224, 224, 3)) + self.assertAllEqual(outputs.shape, (2, 32, 32, 3)) @pytest.mark.skip(reason="disabled until preset's been uploaded to Kaggle") @pytest.mark.extra_large def test_all_presets(self): - for preset in DifferentialBinarization.presets: + for preset in DifferentialBinarizationOCR.presets: self.run_preset_test( - cls=DifferentialBinarization, + cls=DifferentialBinarizationOCR, preset=preset, input_data=self.images, - expected_output_shape=(2, 224, 224, 3), + expected_output_shape=(2, 32, 32, 3), ) From e4a334d526afef8b3f94e6139de6bd9b2c7aa0f4 Mon Sep 17 00:00:00 2001 From: Gowtham Paimagam Date: Wed, 13 Nov 2024 17:01:44 +0100 Subject: [PATCH 16/20] Fixed torch and jax tests --- .../differential_binarization_ocr_test.py | 4 +++- .../src/models/differential_binarization/losses.py | 7 ++++--- .../models/differential_binarization/losses_test.py | 10 +++++----- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_ocr_test.py b/keras_hub/src/models/differential_binarization/differential_binarization_ocr_test.py index 4d0e10fc67..175a26a495 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_ocr_test.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_ocr_test.py @@ -17,7 +17,9 @@ class DifferentialBinarizationOCRTest(TestCase): def setUp(self): self.images = ops.ones((2, 32, 32, 3)) - self.labels = ops.zeros((2, 32, 32, 4)) + self.labels = ops.concatenate( + (ops.zeros((2, 16, 32, 4)), ops.ones((2, 16, 32, 4))), axis=1 + ) image_encoder = ResNetBackbone( input_conv_filters=[4], input_conv_kernel_sizes=[7], diff --git a/keras_hub/src/models/differential_binarization/losses.py b/keras_hub/src/models/differential_binarization/losses.py index 31f8509455..7e9a28c1f1 100644 --- a/keras_hub/src/models/differential_binarization/losses.py +++ b/keras_hub/src/models/differential_binarization/losses.py @@ -60,14 +60,15 @@ def __call__(self, y_true, y_pred, mask, return_origin=False): positive_loss = loss * ops.cast(positive, "float32") negative_loss = loss * ops.cast(negative, "float32") - # hard negative mining, as suggested in the paper: - # compute the threshold for hard negatives, and zero-out + # hard negative mining, as suggested in + # [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/abs/1911.08947): + # Compute the threshold for hard negatives, and zero-out # negative losses below the threshold. using this approach, # we achieve efficient computation on GPUs # compute negative_count relative to the element count of y_pred negative_count_rel = ops.cast(negative_count, "float32") / ops.prod( - ops.shape(negative_count) + ops.cast(ops.shape(y_pred), "float32") ) # compute the threshold value for negative losses and zero neg. loss # values below this threshold diff --git a/keras_hub/src/models/differential_binarization/losses_test.py b/keras_hub/src/models/differential_binarization/losses_test.py index 56b518d2b6..aae390095c 100644 --- a/keras_hub/src/models/differential_binarization/losses_test.py +++ b/keras_hub/src/models/differential_binarization/losses_test.py @@ -16,14 +16,14 @@ def test_loss(self): mask = np.array([0.0, 1.0, 1.0, 0.0]) weights = np.array([4.0, 5.0, 6.0, 7.0]) loss = self.loss_obj(y_true, y_pred, mask, weights) - self.assertAlmostEqual(loss.numpy(), 0.74358, delta=1e-4) + self.assertAlmostEqual(loss, 0.74358, delta=1e-4) def test_correct(self): y_true = np.array([1.0, 1.0, 0.0, 0.0]) y_pred = y_true mask = np.array([0.0, 1.0, 1.0, 0.0]) loss = self.loss_obj(y_true, y_pred, mask) - self.assertAlmostEqual(loss.numpy(), 0.0, delta=1e-4) + self.assertAlmostEqual(loss, 0.0, delta=1e-4) class MaskL1LossTest(TestCase): @@ -35,7 +35,7 @@ def test_masked(self): y_pred = np.array([0.1, 0.2, 0.3, 0.4]) mask = np.array([0.0, 1.0, 0.0, 1.0]) loss = self.loss_obj(y_true, y_pred, mask) - self.assertAlmostEqual(loss.numpy(), 2.7, delta=1e-4) + self.assertAlmostEqual(loss, 2.7, delta=1e-4) class DBLossTest(TestCase): @@ -55,7 +55,7 @@ def test_loss(self): ) y_pred = np.stack((p_map_pred, t_map_pred, b_map_pred), axis=-1) loss = self.loss_obj(y_true, y_pred) - self.assertAlmostEqual(loss.numpy(), 14.1123, delta=1e-4) + self.assertAlmostEqual(loss, 14.1123, delta=1e-4) def test_correct(self): shrink_map = thresh_map = np.array( @@ -68,4 +68,4 @@ def test_correct(self): ) y_pred = np.stack((p_map_pred, t_map_pred, b_map_pred), axis=-1) loss = self.loss_obj(y_true, y_pred) - self.assertAlmostEqual(loss.numpy(), 0.0, delta=1e-4) + self.assertAlmostEqual(loss, 0.0, delta=1e-4) From 49d6f6dcb0b32b25d13b684a3ccc5758d5a1253d Mon Sep 17 00:00:00 2001 From: Gowtham Paimagam Date: Thu, 14 Nov 2024 20:15:05 +0100 Subject: [PATCH 17/20] Improved code readability --- .../differential_binarization_backbone.py | 86 ++++++++++++------- 1 file changed, 54 insertions(+), 32 deletions(-) diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py b/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py index d91f4d473f..ac9e39ac84 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py @@ -14,12 +14,16 @@ class DifferentialBinarizationBackbone(Backbone): https://arxiv.org/abs/1911.08947). This class contains the backbone architecture containing the feature - pyramid network. + pyramid network and model heads. Args: image_encoder: A `keras_hub.models.ResNetBackbone` instance. fpn_channels: int. The number of channels to output by the feature pyramid network. Defaults to 256. + head_kernel_list: list of ints. The kernel sizes of probability map and + threshold map heads. Defaults to [3, 2, 2]. + dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype + to use for the model's computations and weights. """ def __init__( @@ -76,79 +80,97 @@ def from_config(cls, config): def diffbin_fpn_model(inputs, out_channels, dtype=None): - in2 = layers.Conv2D( + # lateral layers composing the FPN's bottom-up pathway using + # pointwise convolutions of ResNet's pyramid outputs + lateral_p2 = layers.Conv2D( out_channels, kernel_size=1, use_bias=False, - name="neck_in2", + name="neck_lateral_p2", dtype=dtype, )(inputs["P2"]) - in3 = layers.Conv2D( + lateral_p3 = layers.Conv2D( out_channels, kernel_size=1, use_bias=False, - name="neck_in3", + name="neck_lateral_p3", dtype=dtype, )(inputs["P3"]) - in4 = layers.Conv2D( + lateral_p4 = layers.Conv2D( out_channels, kernel_size=1, use_bias=False, - name="neck_in4", + name="neck_lateral_p4", dtype=dtype, )(inputs["P4"]) - in5 = layers.Conv2D( + lateral_p5 = layers.Conv2D( out_channels, kernel_size=1, use_bias=False, - name="neck_in5", + name="neck_lateral_p5", dtype=dtype, )(inputs["P5"]) - out4 = layers.Add(name="add1")([layers.UpSampling2D(dtype=dtype)(in5), in4]) - out3 = layers.Add(name="add2")( - [layers.UpSampling2D(dtype=dtype)(out4), in3] + # top-down fusion pathway consisting of upsampling layers with + # skip connections + topdown_p5 = lateral_p5 + topdown_p4 = layers.Add(name="neck_topdown_p4")( + [ + layers.UpSampling2D(dtype=dtype)(topdown_p5), + lateral_p4, + ] ) - out2 = layers.Add(name="add3")( - [layers.UpSampling2D(dtype=dtype)(out3), in2] + topdown_p3 = layers.Add(name="neck_topdown_p3")( + [ + layers.UpSampling2D(dtype=dtype)(topdown_p4), + lateral_p3, + ] ) - p5 = layers.Conv2D( + topdown_p2 = layers.Add(name="neck_topdown_p2")( + [ + layers.UpSampling2D(dtype=dtype)(topdown_p3), + lateral_p2, + ] + ) + # construct merged feature maps for each pyramid level + featuremap_p5 = layers.Conv2D( out_channels // 4, kernel_size=3, padding="same", use_bias=False, - name="neck_p5", + name="neck_featuremap_p5", dtype=dtype, - )(in5) - p4 = layers.Conv2D( + )(topdown_p5) + featuremap_p4 = layers.Conv2D( out_channels // 4, kernel_size=3, padding="same", use_bias=False, - name="neck_p4", + name="neck_featuremap_p4", dtype=dtype, - )(out4) - p3 = layers.Conv2D( + )(topdown_p4) + featuremap_p3 = layers.Conv2D( out_channels // 4, kernel_size=3, padding="same", use_bias=False, - name="neck_p3", + name="neck_featuremap_p3", dtype=dtype, - )(out3) - p2 = layers.Conv2D( + )(topdown_p3) + featuremap_p2 = layers.Conv2D( out_channels // 4, kernel_size=3, padding="same", use_bias=False, - name="neck_p2", + name="neck_featuremap_p2", dtype=dtype, - )(out2) - p5 = layers.UpSampling2D((8, 8), dtype=dtype)(p5) - p4 = layers.UpSampling2D((4, 4), dtype=dtype)(p4) - p3 = layers.UpSampling2D((2, 2), dtype=dtype)(p3) - - fused = layers.Concatenate(axis=-1, dtype=dtype)([p5, p4, p3, p2]) - return fused + )(topdown_p2) + featuremap_p5 = layers.UpSampling2D((8, 8), dtype=dtype)(featuremap_p5) + featuremap_p4 = layers.UpSampling2D((4, 4), dtype=dtype)(featuremap_p4) + featuremap_p3 = layers.UpSampling2D((2, 2), dtype=dtype)(featuremap_p3) + featuremap = layers.Concatenate(axis=-1, dtype=dtype)( + [featuremap_p5, featuremap_p4, featuremap_p3, featuremap_p2] + ) + return featuremap def diffbin_head(inputs, in_channels, kernel_list, name): From d96b8991037ae007b1305aa0e68939347fab5031 Mon Sep 17 00:00:00 2001 From: Gowtham Paimagam Date: Fri, 22 Nov 2024 20:58:52 +0100 Subject: [PATCH 18/20] Improved/added docstrings --- .../differential_binarization_backbone.py | 11 ++-- .../differential_binarization_ocr.py | 14 +++-- .../differential_binarization/losses.py | 56 ++++++++++++++++--- 3 files changed, 64 insertions(+), 17 deletions(-) diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py b/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py index ac9e39ac84..941d309951 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_backbone.py @@ -7,14 +7,15 @@ @keras_hub_export("keras_hub.models.DifferentialBinarizationBackbone") class DifferentialBinarizationBackbone(Backbone): - """ - A Keras model implementing the Differential Binarization - architecture for scene text detection, described in + """Differential Binarization architecture for scene text detection. + + This class implements the Differential Binarization architecture for + detecting text in natural images, described in [Real-time Scene Text Detection with Differentiable Binarization]( https://arxiv.org/abs/1911.08947). - This class contains the backbone architecture containing the feature - pyramid network and model heads. + The backbone architecture in this class contains the feature pyramid + network and model heads. Args: image_encoder: A `keras_hub.models.ResNetBackbone` instance. diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_ocr.py b/keras_hub/src/models/differential_binarization/differential_binarization_ocr.py index 7239633d36..16a812650f 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_ocr.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_ocr.py @@ -14,11 +14,15 @@ @keras_hub_export("keras_hub.models.DifferentialBinarizationOCR") class DifferentialBinarizationOCR(ImageSegmenter): - """ - A Keras model implementing the Differential Binarization - architecture for scene text detection, described in - [Real-time Scene Text Detection with Differentiable Binarization]( - https://arxiv.org/abs/1911.08947). + """Differential Binarization scene text detection task. + + `DifferentialBinarizationOCR` tasks wrap a + `keras_hub.models.DifferentialBinarizationBackbone` and a + `keras_hub.models.Preprocessor` to create a model that can be used for + detecting text in natural images. + + The probability map output generated by `predict()` can be translated into + polygon representation using `model.postprocess_to_polygons()`. Args: backbone: A `keras_hub.models.DifferentialBinarizationBackbone` diff --git a/keras_hub/src/models/differential_binarization/losses.py b/keras_hub/src/models/differential_binarization/losses.py index 7e9a28c1f1..dec9d9b7ca 100644 --- a/keras_hub/src/models/differential_binarization/losses.py +++ b/keras_hub/src/models/differential_binarization/losses.py @@ -3,7 +3,19 @@ class DiceLoss: - def __init__(self, eps=1e-6, **kwargs): + """Computes the Dice loss for image segmentation tasks. + + Dice loss evaluates the overlap between predicted and ground truth masks + and is particularly effective in handling class imbalance. + + This class does not subclass `keras.losses.Loss`, as it expects an + additional `mask` argument for loss computation. + + Args: + eps: float. A small constant to avoid zero division. Defaults to 1e-6. + """ + + def __init__(self, eps=1e-6): self.eps = eps def __call__(self, y_true, y_pred, mask, weights=None): @@ -16,8 +28,11 @@ def __call__(self, y_true, y_pred, mask, weights=None): class MaskL1Loss: - def __init__(self, **kwargs): - pass + """Computes the L1 loss of masked predictions. + + This class does not subclass `keras.losses.Loss`, as it expects an + additional `mask` argument for loss computation. + """ def __call__(self, y_true, y_pred, mask): mask_sum = ops.sum(mask) @@ -30,7 +45,25 @@ def __call__(self, y_true, y_pred, mask): class BalanceCrossEntropyLoss: - def __init__(self, negative_ratio=3.0, eps=1e-6, **kwargs): + """Compute binary cross entropy, balancing negatives with positives. + + This class uses hard negative mining, as described in + [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/abs/1911.08947) + for balancing negatives with positives. Hence, for loss computation we only + consider a certain fraction of top negatives, relative to the number of + positives. + + This class does not subclass `keras.losses.Loss`, as it expects an + additional `mask` argument for loss computation. + + Args: + negative_ratio: float. The upper bound for the number of negatives we + consider for loss computation, relative to the number of positives. + Defaults to 3.0. + eps: float. A small constant to avoid zero division. Defaults to 1e-6. + """ + + def __init__(self, negative_ratio=3.0, eps=1e-6): self.negative_ratio = negative_ratio self.eps = eps @@ -60,10 +93,9 @@ def __call__(self, y_true, y_pred, mask, return_origin=False): positive_loss = loss * ops.cast(positive, "float32") negative_loss = loss * ops.cast(negative, "float32") - # hard negative mining, as suggested in - # [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/abs/1911.08947): + # Hard negative mining # Compute the threshold for hard negatives, and zero-out - # negative losses below the threshold. using this approach, + # negative losses below the threshold. Using this approach, # we achieve efficient computation on GPUs # compute negative_count relative to the element count of y_pred @@ -89,6 +121,16 @@ def __call__(self, y_true, y_pred, mask, return_origin=False): class DBLoss(keras.losses.Loss): + """Computes the loss for the Differential Binarization model. + + Args: + eps: float. A small constant to avoid zero division. Defaults to 1e-6. + l1_scale: float. The scaling factor for the threshold map output's L1 + loss contribution to the total loss. Defaults to 10.0. + bce_scale: float. The scaling factor for the probability map's balance + cross entropy loss contribution to the total loss. Defaults to 5.0. + """ + def __init__(self, eps=1e-6, l1_scale=10.0, bce_scale=5.0, **kwargs): super().__init__(*kwargs) self.dice_loss = DiceLoss(eps=eps) From 2f27981dc73e3004a6317cdb36162f1474c34736 Mon Sep 17 00:00:00 2001 From: Gowtham Paimagam Date: Mon, 25 Nov 2024 16:21:53 +0100 Subject: [PATCH 19/20] Added `ImageTextDetector` task --- .../differential_binarization_ocr.py | 15 +- .../differential_binarization/losses.py | 4 +- keras_hub/src/models/image_text_detector.py | 277 ++++++++++++++++++ .../src/models/image_text_detector_test.py | 31 ++ 4 files changed, 322 insertions(+), 5 deletions(-) create mode 100644 keras_hub/src/models/image_text_detector.py create mode 100644 keras_hub/src/models/image_text_detector_test.py diff --git a/keras_hub/src/models/differential_binarization/differential_binarization_ocr.py b/keras_hub/src/models/differential_binarization/differential_binarization_ocr.py index 16a812650f..7c7245b312 100644 --- a/keras_hub/src/models/differential_binarization/differential_binarization_ocr.py +++ b/keras_hub/src/models/differential_binarization/differential_binarization_ocr.py @@ -9,11 +9,11 @@ DifferentialBinarizationPreprocessor, ) from keras_hub.src.models.differential_binarization.losses import DBLoss -from keras_hub.src.models.image_segmenter import ImageSegmenter +from keras_hub.src.models.image_text_detector import ImageTextDetector @keras_hub_export("keras_hub.models.DifferentialBinarizationOCR") -class DifferentialBinarizationOCR(ImageSegmenter): +class DifferentialBinarizationOCR(ImageTextDetector): """Differential Binarization scene text detection task. `DifferentialBinarizationOCR` tasks wrap a @@ -43,7 +43,15 @@ class DifferentialBinarizationOCR(ImageSegmenter): backbone=backbone ) - detector(input_data) + map_output = detector(input_data) + ``` + + `map_output` now holds a 8x224x224x3 tensor, where the last dimension + corresponds to the model's probability map, threshold map and binary map + outputs. Use `postprocess_to_polygons()` to obtain a polygon + representation: + ```python + detector.postprocess_to_polygons(map_output[...,0]) ``` """ @@ -101,6 +109,7 @@ def compile( supported by the compile method. """ if optimizer == "auto": + # parameters from https://arxiv.org/abs/1911.08947 optimizer = keras.optimizers.SGD( learning_rate=0.007, weight_decay=0.0001, momentum=0.9 ) diff --git a/keras_hub/src/models/differential_binarization/losses.py b/keras_hub/src/models/differential_binarization/losses.py index dec9d9b7ca..3e7d3f75e4 100644 --- a/keras_hub/src/models/differential_binarization/losses.py +++ b/keras_hub/src/models/differential_binarization/losses.py @@ -4,7 +4,7 @@ class DiceLoss: """Computes the Dice loss for image segmentation tasks. - + Dice loss evaluates the overlap between predicted and ground truth masks and is particularly effective in handling class imbalance. @@ -122,7 +122,7 @@ def __call__(self, y_true, y_pred, mask, return_origin=False): class DBLoss(keras.losses.Loss): """Computes the loss for the Differential Binarization model. - + Args: eps: float. A small constant to avoid zero division. Defaults to 1e-6. l1_scale: float. The scaling factor for the threshold map output's L1 diff --git a/keras_hub/src/models/image_text_detector.py b/keras_hub/src/models/image_text_detector.py new file mode 100644 index 0000000000..ec8f80e99a --- /dev/null +++ b/keras_hub/src/models/image_text_detector.py @@ -0,0 +1,277 @@ +import keras +import numpy as np + +from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.models.task import Task + + +@keras_hub_export("keras_hub.models.ImageTextDetector") +class ImageTextDetector(Task): + """Base class for all text detection tasks. + + `ImageTextDetector` tasks wrap a `keras_hub.models.Task` and + a `keras_hub.models.Preprocessor` to create a model that can be used for + image segmentation. + + All `ImageTextDetector` tasks include a `from_preset()` constructor which + can be used to load a pre-trained config and weights. + + Args: + detection_thresh: The value for thresholding predicted mask outputs. + Defaults to 0.3. + min_area: Minimum area for a polygon to be considered valid. Defaults + to 10.0. + unclip_ratio: Expansion ratio of for the detected polygons. + Defaults to 3.0. + """ + + def __init__( + self, detection_thresh=0.3, min_area=10.0, unclip_ratio=2.0, **kwargs + ): + self.detection_thresh = detection_thresh + self.min_area = min_area + self.unclip_ratio = unclip_ratio + super().__init__(**kwargs) + + def get_config(self): + config = super().get_config() + config.update( + { + "detection_thresh": self.detection_thresh, + "min_area": self.min_area, + "unclip_ratio": self.unclip_ratio, + } + ) + return config + + def postprocess_to_polygons(self, masks, contour_finder="simple"): + """Converts the mask output of a text detector to polygon coordinates. + + Args: + masks: Segmentation masks (3D batch of masks). + contour_finder: Determines the method for contour finding. Possible + values are "simple", which detects connected regions by walking + the image, and "opencv", which uses OpenCV's contour finder if + available. Defaults to "simple". + + Returns: + List-of-list-of-lists. A list of polygons for each batch element, + where each polygon is represented as a list of (x, y) points. + """ + + if not isinstance(masks, np.ndarray): + masks = keras.ops.convert_to_numpy(masks) + masks = masks > self.detection_thresh + polygons = [] + for mask in masks: + mask_polygons = mask_to_polygons( + mask, min_area=self.min_area, contour_finder=contour_finder + ) + mask_polygons = [ + unclip_polygon(polygon, self.unclip_ratio) + for polygon in mask_polygons + ] + polygons.append(mask_polygons) + return polygons + + +def compute_polygon_area(polygon): + """Calculates the area of a polygon.""" + x, y = zip(*polygon) + return 0.5 * abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1))) + + +def simplify_polygon(polygon, approximation_tol=2.0): + """Simplifies a polygon using Ramer-Douglas-Peucker.""" + + if len(polygon) < 3: + return polygon + + def perpendicular_distance(point): + if np.all(line_start == line_end): + return np.linalg.norm(point - line_start) + line_vec = line_end - line_start + point_vec = point - line_start + return np.linalg.norm( + np.cross(line_vec, point_vec) / np.linalg.norm(line_vec) + ) + + # find the point with the maximum distance from the line segment + line_start, line_end = polygon[0], polygon[-1] + distances = np.array([perpendicular_distance(pt) for pt in polygon[1:-1]]) + max_idx = np.argmax(distances) + max_dist = distances[max_idx] + if max_dist > approximation_tol: + # simplify recursively + left = simplify_polygon(polygon[: max_idx + 2], approximation_tol) + right = simplify_polygon(polygon[max_idx + 1 :], approximation_tol) + return np.vstack((left[:-1], right)) + else: + return np.array([line_start, line_end]) + + +def compute_edge_normals(polygon): + """Computes outward normals for each edge of a polygon.""" + normals = [] + n = len(polygon) + for i in range(n): + # get edge vector + x1, y1 = polygon[i] + x2, y2 = polygon[(i + 1) % n] + edge = np.array([x2 - x1, y2 - y1]) + # rotate by 90 degrees (clockwise) to get the normal vector + normal = np.array([edge[1], -edge[0]]) + normal = normal / np.linalg.norm(normal) # normalize + normals.append(normal) + return normals + + +def convex_hull(points): + """Graham scan algorithm for computing the convex hull of a set of 2D points.""" + points = points[np.lexsort((points[:, 1], points[:, 0]))] + + def cross_product(o, a, b): + return (a[0] - o[0]) * (b[1] - o[1]) - (a[1] - o[1]) * (b[0] - o[0]) + + lower = [] + for p in points: + while len(lower) >= 2 and cross_product(lower[-2], lower[-1], p) <= 0: + lower.pop() + lower.append(p) + upper = [] + for p in reversed(points): + while len(upper) >= 2 and cross_product(upper[-2], upper[-1], p) <= 0: + upper.pop() + upper.append(p) + return np.array(lower[:-1] + upper[:-1]) + + +def walk_contour(x, y, visited, mask): + """Depth-first search to extract a contour.""" + contour = [] + stack = [(x, y)] + while stack: + cx, cy = stack.pop() + if not visited[cx, cy] and mask[cx, cy]: + visited[cx, cy] = True + # we typically store the y-coordinate in the first dimension + contour.append((cy, cx)) + # Add neighbors (8-connectivity) + neighbors = [ + (cx - 1, cy), + (cx + 1, cy), + (cx, cy - 1), + (cx, cy + 1), + (cx - 1, cy - 1), + (cx + 1, cy + 1), + (cx - 1, cy + 1), + (cx + 1, cy - 1), + ] + for nx, ny in neighbors: + if not visited[nx, ny]: + if 0 <= nx < mask.shape[0] and 0 <= ny < mask.shape[1]: + stack.append((nx, ny)) + return contour + + +def find_contours_simple(mask): + """Simple DFS-based contour finding.""" + visited = np.zeros_like(mask, dtype=bool) + contours = [] + for i in range(mask.shape[0]): + for j in range(mask.shape[1]): + if not mask[i, j] or visited[i, j]: + continue + contour = walk_contour(i, j, visited, mask) + if contour: + contour = convex_hull(np.array(contour)) + contours.append(contour) + return contours + + +def mask_to_polygons( + mask, min_area=10.0, approximation_tol=2.0, contour_finder="simple" +): + """Converts a binary segmentation mask to polygon representations. + + Args: + mask: Binary segmentation mask (2D numpy array where 1 indicates + text regions and 0 is background). + min_area: Minimum area for a polygon to be considered valid. Defaults + to 10.0. + approximation_tol: Approximation tolerance for simplifying polygons + (higher for less detail). Defaults to 2.0. + contour_finder: Determines the method for contour finding. Possible + values are "simple", which detects connected regions by walking + the image, and "opencv", which uses OpenCV's contour finder if + available. Defaults to "simple". + + Returns: + A list of polygons, where each polygon is represented as a list of + (x, y) points. + """ + + if contour_finder == "opencv": + import cv2 + + contours, _ = cv2.findContours( + mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE + ) + contours = [ + contour.squeeze(axis=1) + for contour in contours + if contour.shape[0] > 2 + ] # Squeeze to 2D + elif contour_finder == "simple": + contours = find_contours_simple(mask) + else: + raise ValueError( + f"Invalid argument for contour_finder: {contour_finder}." + ) + + polygons = [] + for contour in contours: + if len(contour) < 3: + continue + simplified = simplify_polygon(contour, approximation_tol) + # Lower-bound the area of detected polygons to filter out noise + area = compute_polygon_area(simplified) + if area >= min_area: + polygons.append(simplified.tolist()) + return polygons + + +def unclip_polygon(polygon, unclip_ratio): + """Enlarges a polygon by a specified ratio. + + Args: + polygon: List of (x, y) coordinates of the polygon vertices. + unclip_ratio: Expansion ratio. + + Returns: + List of expanded polygon coordinates. + """ + + # compute the expansion distance based on the polygon's area and perimeter + area = compute_polygon_area(polygon) + perimeter = sum( + np.linalg.norm( + np.array(polygon[i]) - np.array(polygon[(i + 1) % len(polygon)]) + ) + for i in range(len(polygon)) + ) + distance = area * unclip_ratio / perimeter + # enlarge the polygon by moving vertices outwards based on + # (outwards-pointing) normals of edge vectors + normals = compute_edge_normals(polygon) + expanded_polygon = [] + for i, (x, y) in enumerate(polygon): + # average the normals of the two adjacent edges + prev_normal = normals[i - 1] + curr_normal = normals[i] + avg_normal = (prev_normal + curr_normal) / 2 + avg_normal = avg_normal / np.linalg.norm(avg_normal) # normalize + # offset the vertex along the averaged normal + offset_x, offset_y = avg_normal * distance + expanded_polygon.append((x + offset_x, y + offset_y)) + return expanded_polygon diff --git a/keras_hub/src/models/image_text_detector_test.py b/keras_hub/src/models/image_text_detector_test.py new file mode 100644 index 0000000000..1a97d3072d --- /dev/null +++ b/keras_hub/src/models/image_text_detector_test.py @@ -0,0 +1,31 @@ +import numpy as np + +from keras_hub.src.models.image_text_detector import mask_to_polygons +from keras_hub.src.models.image_text_detector import unclip_polygon +from keras_hub.src.tests.test_case import TestCase + + +class PolygonFunctionsTest(TestCase): + def test_mask_to_polygons(self): + mask = np.zeros((50, 50), dtype=np.uint8) + # detect two square regions + mask[5:15, 10:20] = 1 + mask[35:45, 30:40] = 1 + detected_polygons = mask_to_polygons(mask, contour_finder="simple") + self.assertEqual( + detected_polygons, + [ + [[10, 5], [19, 5], [19, 14], [10, 14]], + [[30, 35], [39, 35], [39, 44], [30, 44]], + ], + ) + + def test_unclip(self): + polygon = [(10, 10), (20, 10), (20, 20), (10, 20)] + unclip_ratio = 1.5 + unclipped_box = unclip_polygon(polygon, unclip_ratio) + self.assertAllClose( + unclipped_box, + [(7.348, 7.348), (22.65, 7.348), (22.65, 22.65), (7.348, 22.65)], + rtol=1e-3, + ) From 66afeb97d4d8398eaf5997f4a50aea8492752a18 Mon Sep 17 00:00:00 2001 From: Gowtham Paimagam Date: Mon, 25 Nov 2024 16:28:08 +0100 Subject: [PATCH 20/20] Run `api_gen.sh` --- keras_hub/api/models/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py index 0f881b63ae..02cadca61a 100644 --- a/keras_hub/api/models/__init__.py +++ b/keras_hub/api/models/__init__.py @@ -198,6 +198,7 @@ from keras_hub.src.models.image_segmenter_preprocessor import ( ImageSegmenterPreprocessor, ) +from keras_hub.src.models.image_text_detector import ImageTextDetector from keras_hub.src.models.image_to_image import ImageToImage from keras_hub.src.models.inpaint import Inpaint from keras_hub.src.models.llama3.llama3_backbone import Llama3Backbone