From 79110fbf200cea4d12e7efaa2f925de3f6c5c8ea Mon Sep 17 00:00:00 2001
From: Piseth Ky <pisethk@google.com>
Date: Mon, 4 Nov 2024 16:33:06 -0800
Subject: [PATCH 01/17] WIP initially adding edge presets

---
 .../efficientnet/efficientnet_backbone.py     |  12 ++
 .../src/models/efficientnet/fusedmbconv.py    |  10 +-
 .../src/utils/timm/convert_efficientnet.py    | 161 +++++++++++++++---
 .../convert_efficientnet_checkpoints.py       |  13 +-
 4 files changed, 160 insertions(+), 36 deletions(-)

diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
index 4016bb01e4..1deef08cf7 100644
--- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py
+++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
@@ -100,6 +100,7 @@ def __init__(
         stackwise_squeeze_and_excite_ratios,
         stackwise_strides,
         stackwise_block_types,
+        stackwise_force_input_filters=[0]*7,
         dropout=0.2,
         depth_divisor=8,
         min_depth=8,
@@ -163,6 +164,7 @@ def __init__(
             num_repeats = stackwise_num_repeats[i]
             input_filters = stackwise_input_filters[i]
             output_filters = stackwise_output_filters[i]
+            force_input_filters = stackwise_force_input_filters[i]
 
             # Update block input and output filters based on depth multiplier.
             input_filters = round_filters(
@@ -200,6 +202,16 @@ def __init__(
                     self._pyramid_outputs[f"P{curr_pyramid_level}"] = x
                     curr_pyramid_level += 1
 
+                if force_input_filters > 0:
+                    input_filters = round_filters(
+                        filters=force_input_filters,
+                        width_coefficient=width_coefficient,
+                        min_depth=min_depth,
+                        depth_divisor=depth_divisor,
+                        use_depth_divisor_as_min_depth=use_depth_divisor_as_min_depth,
+                        cap_round_filter_decrease=cap_round_filter_decrease,
+                    )
+
                 # 97 is the start of the lowercase alphabet.
                 letter_identifier = chr(j + 97)
                 stackwise_block_type = stackwise_block_types[i]
diff --git a/keras_hub/src/models/efficientnet/fusedmbconv.py b/keras_hub/src/models/efficientnet/fusedmbconv.py
index 96f55a22b8..fc17414bd6 100644
--- a/keras_hub/src/models/efficientnet/fusedmbconv.py
+++ b/keras_hub/src/models/efficientnet/fusedmbconv.py
@@ -107,12 +107,6 @@ def __init__(
             self.activation, name=self.name + "expand_activation"
         )
 
-        self.bn2 = keras.layers.BatchNormalization(
-            axis=BN_AXIS,
-            momentum=self.batch_norm_momentum,
-            name=self.name + "bn",
-        )
-
         self.se_conv1 = keras.layers.Conv2D(
             self.filters_se,
             1,
@@ -144,7 +138,7 @@ def __init__(
             name=self.name + "project_conv",
         )
 
-        self.bn3 = keras.layers.BatchNormalization(
+        self.bn2 = keras.layers.BatchNormalization(
             axis=BN_AXIS,
             momentum=self.batch_norm_momentum,
             name=self.name + "project_bn",
@@ -192,7 +186,7 @@ def call(self, inputs):
 
         # Output phase:
         x = self.output_conv(x)
-        x = self.bn3(x)
+        x = self.bn2(x)
         if self.expand_ratio == 1:
             x = self.act(x)
 
diff --git a/keras_hub/src/utils/timm/convert_efficientnet.py b/keras_hub/src/utils/timm/convert_efficientnet.py
index 609c26d355..ad47e70c1f 100644
--- a/keras_hub/src/utils/timm/convert_efficientnet.py
+++ b/keras_hub/src/utils/timm/convert_efficientnet.py
@@ -18,6 +18,25 @@
         "width_coefficient": 1.0,
         "depth_coefficient": 1.1,
     },
+    "el": {
+        "width_coefficient": 1.2,
+        "depth_coefficient": 1.4,
+        "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5],
+        "stackwise_num_repeats": [1, 2, 4, 5, 4, 2],
+        "stackwise_input_filters": [32, 24, 32, 48, 96, 144],
+        "stackwise_output_filters": [24, 32, 48, 96, 144, 192],
+        "stackwise_expansion_ratios": [4, 8, 8, 8, 8, 8],
+        "stackwise_strides": [1, 2, 2, 2, 1, 2],
+        "stackwise_squeeze_and_excite_ratios": [0] * 6,
+        "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3,
+        "stackwise_force_input_filters": [24, 0, 0, 0, 0, 0],
+    },
+    "em": {
+
+    },
+    "es": {
+
+    },
 }
 
 
@@ -68,21 +87,21 @@ def convert_weights(backbone, loader, timm_config):
     timm_architecture = timm_config["architecture"]
     variant = "_".join(timm_architecture.split("_")[1:])
 
-    def port_conv2d(keras_layer_name, hf_weight_prefix, port_bias=True):
+    def port_conv2d(keras_layer, hf_weight_prefix, port_bias=True):
         loader.port_weight(
-            backbone.get_layer(keras_layer_name).kernel,
+            keras_layer.kernel,
             hf_weight_key=f"{hf_weight_prefix}.weight",
             hook_fn=lambda x, _: np.transpose(x, (2, 3, 1, 0)),
         )
 
         if port_bias:
             loader.port_weight(
-                backbone.get_layer(keras_layer_name).bias,
+                keras_layer.bias,
                 hf_weight_key=f"{hf_weight_prefix}.bias",
             )
 
     def port_depthwise_conv2d(
-        keras_layer_name,
+        keras_layer,
         hf_weight_prefix,
         port_bias=True,
         depth_multiplier=1,
@@ -99,39 +118,39 @@ def convert_pt_conv2d_kernel(pt_kernel):
             )
 
         loader.port_weight(
-            backbone.get_layer(keras_layer_name).kernel,
+            keras_layer.kernel,
             hf_weight_key=f"{hf_weight_prefix}.weight",
             hook_fn=lambda x, _: convert_pt_conv2d_kernel(x),
         )
 
         if port_bias:
             loader.port_weight(
-                backbone.get_layer(keras_layer_name).bias,
+                keras_layer.bias,
                 hf_weight_key=f"{hf_weight_prefix}.bias",
             )
 
-    def port_batch_normalization(keras_layer_name, hf_weight_prefix):
+    def port_batch_normalization(keras_layer, hf_weight_prefix):
         loader.port_weight(
-            backbone.get_layer(keras_layer_name).gamma,
+            keras_layer.gamma,
             hf_weight_key=f"{hf_weight_prefix}.weight",
         )
         loader.port_weight(
-            backbone.get_layer(keras_layer_name).beta,
+            keras_layer.beta,
             hf_weight_key=f"{hf_weight_prefix}.bias",
         )
         loader.port_weight(
-            backbone.get_layer(keras_layer_name).moving_mean,
+            keras_layer.moving_mean,
             hf_weight_key=f"{hf_weight_prefix}.running_mean",
         )
         loader.port_weight(
-            backbone.get_layer(keras_layer_name).moving_variance,
+            keras_layer.moving_variance,
             hf_weight_key=f"{hf_weight_prefix}.running_var",
         )
         # do we need num batches tracked?
 
     # Stem
-    port_conv2d("stem_conv", "conv_stem", port_bias=False)
-    port_batch_normalization("stem_bn", "bn1")
+    port_conv2d(backbone.get_layer("stem_conv"), "conv_stem", port_bias=False)
+    port_batch_normalization(backbone.get_layer("stem_bn"), "bn1")
 
     # Stages
     num_stacks = len(backbone.stackwise_kernel_sizes)
@@ -149,67 +168,157 @@ def port_batch_normalization(keras_layer_name, hf_weight_prefix):
 
             conv_pw_count = 0
             bn_count = 1
-            conv_pw_name_map = ["conv_pw", "conv_pwl"]
 
             # 97 is the start of the lowercase alphabet.
             letter_identifier = chr(block_idx + 97)
 
+            keras_block_prefix = f"block{stack_index+1}{letter_identifier}_"
+            hf_block_prefix = f"blocks.{stack_index}.{block_idx}."
+
             if block_type == "v1":
-                keras_block_prefix = f"block{stack_index+1}{letter_identifier}_"
-                hf_block_prefix = f"blocks.{stack_index}.{block_idx}."
+                conv_pw_name_map = ["conv_pw", "conv_pwl"]
+                # Initial Expansion Conv
+                if expansion_ratio != 1:
+                    port_conv2d(
+                        backbone.get_layer(keras_block_prefix + "expand_conv"),
+                        hf_block_prefix + conv_pw_name_map[conv_pw_count],
+                        port_bias=False,
+                    )
+                    conv_pw_count += 1
+                    port_batch_normalization(
+                        backbone.get_layer(keras_block_prefix + "expand_bn"),
+                        hf_block_prefix + f"bn{bn_count}",
+                    )
+                    bn_count += 1
+
+                # Depthwise Conv
+                port_depthwise_conv2d(
+                    backbone.get_layer(keras_block_prefix + "dwconv"),
+                    hf_block_prefix + "conv_dw",
+                    port_bias=False,
+                )
+                port_batch_normalization(
+                    backbone.get_layer(keras_block_prefix + "dwconv_bn"),
+                    hf_block_prefix + f"bn{bn_count}",
+                )
+                bn_count += 1
+
+                # Squeeze and Excite
+                port_conv2d(
+                    backbone.get_layer(keras_block_prefix + "se_reduce"),
+                    hf_block_prefix + "se.conv_reduce",
+                )
+                port_conv2d(
+                    backbone.get_layer(keras_block_prefix + "se_expand"),
+                    hf_block_prefix + "se.conv_expand",
+                )
+
+                # Output/Projection
+                port_conv2d(
+                    backbone.get_layer(keras_block_prefix + "project"),
+                    hf_block_prefix + conv_pw_name_map[conv_pw_count],
+                    port_bias=False,
+                )
+                conv_pw_count += 1
+                port_batch_normalization(
+                    backbone.get_layer(keras_block_prefix + "project_bn"),
+                    hf_block_prefix + f"bn{bn_count}",
+                )
+                bn_count += 1
+            elif block_type == "fused":
+                fused_block_layer = backbone.get_layer(keras_block_prefix)
+
+                # Initial Expansion Conv
+                if expansion_ratio != 1:
+                    port_conv2d(
+                        fused_block_layer.conv1,
+                        hf_block_prefix + "conv_exp",
+                        port_bias=False,
+                    )
+                    conv_pw_count += 1
+                    port_batch_normalization(
+                        fused_block_layer.bn1,
+                        hf_block_prefix + f"bn{bn_count}",
+                    )
+                    bn_count += 1
+
+                # Squeeze and Excite
+                port_conv2d(
+                    fused_block_layer.se_conv1,
+                    hf_block_prefix + "se.conv_reduce",
+                )
+                port_conv2d(
+                    fused_block_layer.se_conv2,
+                    hf_block_prefix + "se.conv_expand",
+                )
+
+                # Output/Projection
+                port_conv2d(
+                    fused_block_layer.output_conv,
+                    hf_block_prefix + "conv_pwl",
+                    port_bias=False,
+                )
+                conv_pw_count += 1
+                port_batch_normalization(
+                    fused_block_layer.bn3,
+                    hf_block_prefix + f"bn{bn_count}",
+                )
+                bn_count += 1
 
+            elif block_type == "unfused":
+                unfused_block_layer = backbone.get_layer(keras_block_prefix)
                 # Initial Expansion Conv
                 if expansion_ratio != 1:
                     port_conv2d(
-                        keras_block_prefix + "expand_conv",
+                        unfused_block_layer.get_layer(keras_block_prefix + "expand_conv"),
                         hf_block_prefix + conv_pw_name_map[conv_pw_count],
                         port_bias=False,
                     )
                     conv_pw_count += 1
                     port_batch_normalization(
-                        keras_block_prefix + "expand_bn",
+                        unfused_block_layer.get_layer(keras_block_prefix + "expand_bn"),
                         hf_block_prefix + f"bn{bn_count}",
                     )
                     bn_count += 1
 
                 # Depthwise Conv
                 port_depthwise_conv2d(
-                    keras_block_prefix + "dwconv",
+                    unfused_block_layer.get_layer(keras_block_prefix + "dwconv"),
                     hf_block_prefix + "conv_dw",
                     port_bias=False,
                 )
                 port_batch_normalization(
-                    keras_block_prefix + "dwconv_bn",
+                    unfused_block_layer.get_layer(keras_block_prefix + "dwconv_bn"),
                     hf_block_prefix + f"bn{bn_count}",
                 )
                 bn_count += 1
 
                 # Squeeze and Excite
                 port_conv2d(
-                    keras_block_prefix + "se_reduce",
+                    unfused_block_layer.get_layer(keras_block_prefix + "se_reduce"),
                     hf_block_prefix + "se.conv_reduce",
                 )
                 port_conv2d(
-                    keras_block_prefix + "se_expand",
+                    unfused_block_layer.get_layer(keras_block_prefix + "se_expand"),
                     hf_block_prefix + "se.conv_expand",
                 )
 
                 # Output/Projection
                 port_conv2d(
-                    keras_block_prefix + "project",
+                    unfused_block_layer.get_layer(keras_block_prefix + "project"),
                     hf_block_prefix + conv_pw_name_map[conv_pw_count],
                     port_bias=False,
                 )
                 conv_pw_count += 1
                 port_batch_normalization(
-                    keras_block_prefix + "project_bn",
+                    unfused_block_layer.get_layer(keras_block_prefix + "project_bn"),
                     hf_block_prefix + f"bn{bn_count}",
                 )
                 bn_count += 1
 
     # Head/Top
-    port_conv2d("top_conv", "conv_head", port_bias=False)
-    port_batch_normalization("top_bn", "bn2")
+    port_conv2d(backbone.get_layer("top_conv"), "conv_head", port_bias=False)
+    port_batch_normalization(backbone.get_layer("top_bn"), "bn2")
 
 
 def convert_head(task, loader, timm_config):
diff --git a/tools/checkpoint_conversion/convert_efficientnet_checkpoints.py b/tools/checkpoint_conversion/convert_efficientnet_checkpoints.py
index 5790d6130c..75810a19a9 100644
--- a/tools/checkpoint_conversion/convert_efficientnet_checkpoints.py
+++ b/tools/checkpoint_conversion/convert_efficientnet_checkpoints.py
@@ -2,9 +2,15 @@
 Convert efficientnet checkpoints.
 
 python tools/checkpoint_conversion/convert_efficientnet_checkpoints.py \
-    --preset efficientnet_b0_ra_imagenet --upload_uri kaggle://kerashub/efficientnet/keras/efficientnet_b0_ra_imagenet
+    --preset efficientnet_b0_ra_imagenet --upload_uri kaggle://keras/efficientnet/keras/efficientnet_b0_ra_imagenet
 python tools/checkpoint_conversion/convert_efficientnet_checkpoints.py \
-    --preset efficientnet_b1_ft_imagenet --upload_uri kaggle://kerashub/efficientnet/keras/efficientnet_b1_ft_imagenet
+    --preset efficientnet_b1_ft_imagenet --upload_uri kaggle://keras/efficientnet/keras/efficientnet_b1_ft_imagenet
+python tools/checkpoint_conversion/convert_efficientnet_checkpoints.py \
+    --preset efficientnet_el_ra_imagenet --upload_uri kaggle://keras/efficientnet/keras/efficientnet_el_ra_imagenet
+python tools/checkpoint_conversion/convert_efficientnet_checkpoints.py \
+    --preset efficientnet_em_ra2_imagenet --upload_uri kaggle://keras/efficientnet/keras/efficientnet_em_ra2_imagenet
+python tools/checkpoint_conversion/convert_efficientnet_checkpoints.py \
+    --preset efficientnet_es_ra_imagenet --upload_uri kaggle://keras/efficientnet/keras/efficientnet_es_ra_imagenet
 """
 
 import os
@@ -23,6 +29,9 @@
 PRESET_MAP = {
     "efficientnet_b0_ra_imagenet": "timm/efficientnet_b0.ra_in1k",
     "efficientnet_b1_ft_imagenet": "timm/efficientnet_b1.ft_in1k",
+    "efficientnet_el_ra_imagenet": "timm/efficientnet_el.ra_in1k",
+    "efficientnet_em_ra2_imagenet": "timm/efficientnet_em.ra2_in1k",
+    "efficientnet_es_ra_imagenet": "timm/efficientnet_es.ra_in1k",
 }
 FLAGS = flags.FLAGS
 

From 9cb5c1318f9e0f014d1c76ca7c6c3795a951dce3 Mon Sep 17 00:00:00 2001
From: Piseth Ky <pisethk@google.com>
Date: Tue, 5 Nov 2024 16:44:53 -0800
Subject: [PATCH 02/17] WIP el variant working

---
 .../efficientnet/efficientnet_backbone.py     |  6 ++
 .../src/models/efficientnet/fusedmbconv.py    | 20 ++++-
 keras_hub/src/models/efficientnet/mbconv.py   | 11 ++-
 .../src/utils/timm/convert_efficientnet.py    | 81 ++++++++++---------
 4 files changed, 80 insertions(+), 38 deletions(-)

diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
index 1deef08cf7..a6f7639b5c 100644
--- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py
+++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
@@ -101,6 +101,7 @@ def __init__(
         stackwise_strides,
         stackwise_block_types,
         stackwise_force_input_filters=[0]*7,
+        stackwise_nores_option=[False]*7,
         dropout=0.2,
         depth_divisor=8,
         min_depth=8,
@@ -165,6 +166,7 @@ def __init__(
             input_filters = stackwise_input_filters[i]
             output_filters = stackwise_output_filters[i]
             force_input_filters = stackwise_force_input_filters[i]
+            nores = stackwise_nores_option[i]
 
             # Update block input and output filters based on depth multiplier.
             input_filters = round_filters(
@@ -244,6 +246,8 @@ def __init__(
                         activation=activation,
                         dropout=dropout * block_id / blocks,
                         batch_norm_momentum=batch_norm_momentum,
+                        batch_norm_epsilon=batch_norm_epsilon,
+                        nores=nores,
                         name=block_name,
                     )
                     x = block(x)
@@ -303,6 +307,7 @@ def __init__(
         self.stackwise_strides = stackwise_strides
         self.stackwise_block_types = stackwise_block_types
 
+        self.stackwise_force_input_filters=stackwise_force_input_filters,
         self.include_stem_padding = include_stem_padding
         self.use_depth_divisor_as_min_depth = use_depth_divisor_as_min_depth
         self.cap_round_filter_decrease = cap_round_filter_decrease
@@ -330,6 +335,7 @@ def get_config(self):
                 "stackwise_squeeze_and_excite_ratios": self.stackwise_squeeze_and_excite_ratios,
                 "stackwise_strides": self.stackwise_strides,
                 "stackwise_block_types": self.stackwise_block_types,
+                "stackwise_force_input_filters": self.stackwise_force_input_filters,
                 "include_stem_padding": self.include_stem_padding,
                 "use_depth_divisor_as_min_depth": self.use_depth_divisor_as_min_depth,
                 "cap_round_filter_decrease": self.cap_round_filter_decrease,
diff --git a/keras_hub/src/models/efficientnet/fusedmbconv.py b/keras_hub/src/models/efficientnet/fusedmbconv.py
index fc17414bd6..9e06e5bee6 100644
--- a/keras_hub/src/models/efficientnet/fusedmbconv.py
+++ b/keras_hub/src/models/efficientnet/fusedmbconv.py
@@ -70,8 +70,10 @@ def __init__(
         data_format="channels_last",
         se_ratio=0.0,
         batch_norm_momentum=0.9,
+        batch_norm_epsilon=1e-3,
         activation="swish",
         dropout=0.2,
+        nores=False,
         **kwargs
     ):
         super().__init__(**kwargs)
@@ -83,8 +85,10 @@ def __init__(
         self.data_format = data_format
         self.se_ratio = se_ratio
         self.batch_norm_momentum = batch_norm_momentum
+        self.batch_norm_epsilon = batch_norm_epsilon
         self.activation = activation
         self.dropout = dropout
+        self.nores = nores
         self.filters = self.input_filters * self.expand_ratio
         self.filters_se = max(1, int(input_filters * se_ratio))
 
@@ -101,6 +105,7 @@ def __init__(
         self.bn1 = keras.layers.BatchNormalization(
             axis=BN_AXIS,
             momentum=self.batch_norm_momentum,
+            epsilon=self.batch_norm_epsilon,
             name=self.name + "expand_bn",
         )
         self.act = keras.layers.Activation(
@@ -141,6 +146,7 @@ def __init__(
         self.bn2 = keras.layers.BatchNormalization(
             axis=BN_AXIS,
             momentum=self.batch_norm_momentum,
+            epsilon=self.batch_norm_epsilon,
             name=self.name + "project_bn",
         )
 
@@ -190,8 +196,18 @@ def call(self, inputs):
         if self.expand_ratio == 1:
             x = self.act(x)
 
+        # For EdgeTPU Version the stem output does not match the parameterized
+        # input filters, thus this check needs to be dynamic and not based
+        # on initial parameterization. This hack is ported from timm.
+        # if self.data_format == "channels_last":
+        #     input_filters = inputs.shape[-1]
+        #     x_filters = x.shape[-1]
+        # else:
+        #     input_filters = inputs.shape[1]
+        #     x_filters = x.shape[1]
+
         # Residual:
-        if self.strides == 1 and self.input_filters == self.output_filters:
+        if self.strides == 1 and self.input_filters == self.output_filters and not self.nores:
             if self.dropout:
                 x = self.dropout_layer(x)
             x = keras.layers.Add(name=self.name + "add")([x, inputs])
@@ -207,8 +223,10 @@ def get_config(self):
             "data_format": self.data_format,
             "se_ratio": self.se_ratio,
             "batch_norm_momentum": self.batch_norm_momentum,
+            "batch_norm_epsilon": self.batch_norm_epsilon,
             "activation": self.activation,
             "dropout": self.dropout,
+            "nores": self.nores,
         }
 
         base_config = super().get_config()
diff --git a/keras_hub/src/models/efficientnet/mbconv.py b/keras_hub/src/models/efficientnet/mbconv.py
index 392e62c04f..80178bbba6 100644
--- a/keras_hub/src/models/efficientnet/mbconv.py
+++ b/keras_hub/src/models/efficientnet/mbconv.py
@@ -23,8 +23,10 @@ def __init__(
         data_format="channels_last",
         se_ratio=0.0,
         batch_norm_momentum=0.9,
+        batch_norm_epsilon=1e-3,
         activation="swish",
         dropout=0.2,
+        nores=False,
         **kwargs
     ):
         """Implementation of the MBConv block
@@ -83,8 +85,10 @@ def __init__(
         self.data_format = data_format
         self.se_ratio = se_ratio
         self.batch_norm_momentum = batch_norm_momentum
+        self.batch_norm_epsilon = batch_norm_epsilon
         self.activation = activation
         self.dropout = dropout
+        self.nores = nores
         self.filters = self.input_filters * self.expand_ratio
         self.filters_se = max(1, int(input_filters * se_ratio))
 
@@ -101,6 +105,7 @@ def __init__(
         self.bn1 = keras.layers.BatchNormalization(
             axis=BN_AXIS,
             momentum=self.batch_norm_momentum,
+            epsilon=self.batch_norm_epsilon,
             name=self.name + "expand_bn",
         )
         self.act = keras.layers.Activation(
@@ -119,6 +124,7 @@ def __init__(
         self.bn2 = keras.layers.BatchNormalization(
             axis=BN_AXIS,
             momentum=self.batch_norm_momentum,
+            epsilon=self.batch_norm_epsilon,
             name=self.name + "bn",
         )
 
@@ -156,6 +162,7 @@ def __init__(
         self.bn3 = keras.layers.BatchNormalization(
             axis=BN_AXIS,
             momentum=self.batch_norm_momentum,
+            epsilon=self.batch_norm_epsilon,
             name=self.name + "project_bn",
         )
 
@@ -207,7 +214,7 @@ def call(self, inputs):
         x = self.output_conv(x)
         x = self.bn3(x)
 
-        if self.strides == 1 and self.input_filters == self.output_filters:
+        if self.strides == 1 and self.input_filters == self.output_filters and not self.nores:
             if self.dropout:
                 x = self.dropout_layer(x)
             x = keras.layers.Add(name=self.name + "add")([x, inputs])
@@ -223,8 +230,10 @@ def get_config(self):
             "data_format": self.data_format,
             "se_ratio": self.se_ratio,
             "batch_norm_momentum": self.batch_norm_momentum,
+            "batch_norm_epsilon": self.batch_norm_epsilon,
             "activation": self.activation,
             "dropout": self.dropout,
+            "nores": self.nores,
         }
         base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
diff --git a/keras_hub/src/utils/timm/convert_efficientnet.py b/keras_hub/src/utils/timm/convert_efficientnet.py
index ad47e70c1f..c8293135f4 100644
--- a/keras_hub/src/utils/timm/convert_efficientnet.py
+++ b/keras_hub/src/utils/timm/convert_efficientnet.py
@@ -30,6 +30,8 @@
         "stackwise_squeeze_and_excite_ratios": [0] * 6,
         "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3,
         "stackwise_force_input_filters": [24, 0, 0, 0, 0, 0],
+        "stackwise_nores_option": [True] + [False] * 5,
+        "activation": "relu",
     },
     "em": {
 
@@ -86,6 +88,7 @@ def convert_backbone_config(timm_config):
 def convert_weights(backbone, loader, timm_config):
     timm_architecture = timm_config["architecture"]
     variant = "_".join(timm_architecture.split("_")[1:])
+    # backbone.build(input_shape=timm_config["pretrained_cfg"]["input_size"])
 
     def port_conv2d(keras_layer, hf_weight_prefix, port_bias=True):
         loader.port_weight(
@@ -163,6 +166,9 @@ def port_batch_normalization(keras_layer, hf_weight_prefix):
         repeats = int(
             math.ceil(VARIANT_MAP[variant]["depth_coefficient"] * repeats)
         )
+        se_ratio = VARIANT_MAP[variant]["stackwise_squeeze_and_excite_ratios"][
+            stack_index
+        ]
 
         for block_idx in range(repeats):
 
@@ -203,15 +209,16 @@ def port_batch_normalization(keras_layer, hf_weight_prefix):
                 )
                 bn_count += 1
 
-                # Squeeze and Excite
-                port_conv2d(
-                    backbone.get_layer(keras_block_prefix + "se_reduce"),
-                    hf_block_prefix + "se.conv_reduce",
-                )
-                port_conv2d(
-                    backbone.get_layer(keras_block_prefix + "se_expand"),
-                    hf_block_prefix + "se.conv_expand",
-                )
+                if 0 < se_ratio <= 1:
+                    # Squeeze and Excite
+                    port_conv2d(
+                        backbone.get_layer(keras_block_prefix + "se_reduce"),
+                        hf_block_prefix + "se.conv_reduce",
+                    )
+                    port_conv2d(
+                        backbone.get_layer(keras_block_prefix + "se_expand"),
+                        hf_block_prefix + "se.conv_expand",
+                    )
 
                 # Output/Projection
                 port_conv2d(
@@ -242,15 +249,16 @@ def port_batch_normalization(keras_layer, hf_weight_prefix):
                     )
                     bn_count += 1
 
-                # Squeeze and Excite
-                port_conv2d(
-                    fused_block_layer.se_conv1,
-                    hf_block_prefix + "se.conv_reduce",
-                )
-                port_conv2d(
-                    fused_block_layer.se_conv2,
-                    hf_block_prefix + "se.conv_expand",
-                )
+                if 0 < se_ratio <= 1:
+                    # Squeeze and Excite
+                    port_conv2d(
+                        fused_block_layer.se_conv1,
+                        hf_block_prefix + "se.conv_reduce",
+                    )
+                    port_conv2d(
+                        fused_block_layer.se_conv2,
+                        hf_block_prefix + "se.conv_expand",
+                    )
 
                 # Output/Projection
                 port_conv2d(
@@ -260,7 +268,7 @@ def port_batch_normalization(keras_layer, hf_weight_prefix):
                 )
                 conv_pw_count += 1
                 port_batch_normalization(
-                    fused_block_layer.bn3,
+                    fused_block_layer.bn2,
                     hf_block_prefix + f"bn{bn_count}",
                 )
                 bn_count += 1
@@ -270,48 +278,49 @@ def port_batch_normalization(keras_layer, hf_weight_prefix):
                 # Initial Expansion Conv
                 if expansion_ratio != 1:
                     port_conv2d(
-                        unfused_block_layer.get_layer(keras_block_prefix + "expand_conv"),
-                        hf_block_prefix + conv_pw_name_map[conv_pw_count],
+                        unfused_block_layer.conv1,
+                        hf_block_prefix + "conv_pw",
                         port_bias=False,
                     )
                     conv_pw_count += 1
                     port_batch_normalization(
-                        unfused_block_layer.get_layer(keras_block_prefix + "expand_bn"),
+                        unfused_block_layer.bn1,
                         hf_block_prefix + f"bn{bn_count}",
                     )
                     bn_count += 1
 
                 # Depthwise Conv
                 port_depthwise_conv2d(
-                    unfused_block_layer.get_layer(keras_block_prefix + "dwconv"),
+                    unfused_block_layer.depthwise,
                     hf_block_prefix + "conv_dw",
                     port_bias=False,
                 )
                 port_batch_normalization(
-                    unfused_block_layer.get_layer(keras_block_prefix + "dwconv_bn"),
+                    unfused_block_layer.bn2,
                     hf_block_prefix + f"bn{bn_count}",
                 )
                 bn_count += 1
 
-                # Squeeze and Excite
-                port_conv2d(
-                    unfused_block_layer.get_layer(keras_block_prefix + "se_reduce"),
-                    hf_block_prefix + "se.conv_reduce",
-                )
-                port_conv2d(
-                    unfused_block_layer.get_layer(keras_block_prefix + "se_expand"),
-                    hf_block_prefix + "se.conv_expand",
-                )
+                if 0 < se_ratio <= 1:
+                    # Squeeze and Excite
+                    port_conv2d(
+                        unfused_block_layer.se_conv1,
+                        hf_block_prefix + "se.conv_reduce",
+                    )
+                    port_conv2d(
+                        unfused_block_layer.se_conv2,
+                        hf_block_prefix + "se.conv_expand",
+                    )
 
                 # Output/Projection
                 port_conv2d(
-                    unfused_block_layer.get_layer(keras_block_prefix + "project"),
-                    hf_block_prefix + conv_pw_name_map[conv_pw_count],
+                    unfused_block_layer.output_conv,
+                    hf_block_prefix + "conv_pwl",
                     port_bias=False,
                 )
                 conv_pw_count += 1
                 port_batch_normalization(
-                    unfused_block_layer.get_layer(keras_block_prefix + "project_bn"),
+                    unfused_block_layer.bn3,
                     hf_block_prefix + f"bn{bn_count}",
                 )
                 bn_count += 1

From 64b6af8471152aaef86021bc9cc26c36b4813dd5 Mon Sep 17 00:00:00 2001
From: Piseth Ky <pisethk@google.com>
Date: Thu, 7 Nov 2024 14:14:51 -0800
Subject: [PATCH 03/17] added all hf timm edge presets

---
 .../src/utils/timm/convert_efficientnet.py    | 39 +++++++++++++------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/keras_hub/src/utils/timm/convert_efficientnet.py b/keras_hub/src/utils/timm/convert_efficientnet.py
index c8293135f4..68fed5fb5d 100644
--- a/keras_hub/src/utils/timm/convert_efficientnet.py
+++ b/keras_hub/src/utils/timm/convert_efficientnet.py
@@ -13,10 +13,12 @@
     "b0": {
         "width_coefficient": 1.0,
         "depth_coefficient": 1.0,
+        "stackwise_squeeze_and_excite_ratios": [0.25] * 7,
     },
     "b1": {
         "width_coefficient": 1.0,
         "depth_coefficient": 1.1,
+        "stackwise_squeeze_and_excite_ratios": [0.25] * 7,
     },
     "el": {
         "width_coefficient": 1.2,
@@ -34,10 +36,34 @@
         "activation": "relu",
     },
     "em": {
-
+        "width_coefficient": 1.0,
+        "depth_coefficient": 1.1,
+        "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5],
+        "stackwise_num_repeats": [1, 2, 4, 5, 4, 2],
+        "stackwise_input_filters": [32, 24, 32, 48, 96, 144],
+        "stackwise_output_filters": [24, 32, 48, 96, 144, 192],
+        "stackwise_expansion_ratios": [4, 8, 8, 8, 8, 8],
+        "stackwise_strides": [1, 2, 2, 2, 1, 2],
+        "stackwise_squeeze_and_excite_ratios": [0] * 6,
+        "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3,
+        "stackwise_force_input_filters": [24, 0, 0, 0, 0, 0],
+        "stackwise_nores_option": [True] + [False] * 5,
+        "activation": "relu",
     },
     "es": {
-
+        "width_coefficient": 1.0,
+        "depth_coefficient": 1.0,
+        "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5],
+        "stackwise_num_repeats": [1, 2, 4, 5, 4, 2],
+        "stackwise_input_filters": [32, 24, 32, 48, 96, 144],
+        "stackwise_output_filters": [24, 32, 48, 96, 144, 192],
+        "stackwise_expansion_ratios": [4, 8, 8, 8, 8, 8],
+        "stackwise_strides": [1, 2, 2, 2, 1, 2],
+        "stackwise_squeeze_and_excite_ratios": [0] * 6,
+        "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3,
+        "stackwise_force_input_filters": [24, 0, 0, 0, 0, 0],
+        "stackwise_nores_option": [True] + [False] * 5,
+        "activation": "relu",
     },
 }
 
@@ -52,15 +78,6 @@ def convert_backbone_config(timm_config):
         "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
         "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
         "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
-        "stackwise_squeeze_and_excite_ratios": [
-            0.25,
-            0.25,
-            0.25,
-            0.25,
-            0.25,
-            0.25,
-            0.25,
-        ],
         "stackwise_block_types": ["v1"] * 7,
         "min_depth": None,
         "include_stem_padding": True,

From ccb508b9df074c60167d9f2c6d54aed2f547391a Mon Sep 17 00:00:00 2001
From: Piseth Ky <pisethk@google.com>
Date: Thu, 7 Nov 2024 14:20:02 -0800
Subject: [PATCH 04/17] removing irrelevant note

---
 keras_hub/src/models/efficientnet/fusedmbconv.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/keras_hub/src/models/efficientnet/fusedmbconv.py b/keras_hub/src/models/efficientnet/fusedmbconv.py
index 9e06e5bee6..7bb706ca9c 100644
--- a/keras_hub/src/models/efficientnet/fusedmbconv.py
+++ b/keras_hub/src/models/efficientnet/fusedmbconv.py
@@ -196,16 +196,6 @@ def call(self, inputs):
         if self.expand_ratio == 1:
             x = self.act(x)
 
-        # For EdgeTPU Version the stem output does not match the parameterized
-        # input filters, thus this check needs to be dynamic and not based
-        # on initial parameterization. This hack is ported from timm.
-        # if self.data_format == "channels_last":
-        #     input_filters = inputs.shape[-1]
-        #     x_filters = x.shape[-1]
-        # else:
-        #     input_filters = inputs.shape[1]
-        #     x_filters = x.shape[1]
-
         # Residual:
         if self.strides == 1 and self.input_filters == self.output_filters and not self.nores:
             if self.dropout:

From cebc921b5810c9ea854f95f56758f24a1bc1b5ef Mon Sep 17 00:00:00 2001
From: Piseth Ky <pisethk@google.com>
Date: Thu, 7 Nov 2024 14:25:13 -0800
Subject: [PATCH 05/17] format pass

---
 keras_hub/src/models/efficientnet/efficientnet_backbone.py | 6 +++---
 keras_hub/src/models/efficientnet/fusedmbconv.py           | 6 +++++-
 keras_hub/src/models/efficientnet/mbconv.py                | 6 +++++-
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
index a6f7639b5c..9f3825ea1b 100644
--- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py
+++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
@@ -100,8 +100,8 @@ def __init__(
         stackwise_squeeze_and_excite_ratios,
         stackwise_strides,
         stackwise_block_types,
-        stackwise_force_input_filters=[0]*7,
-        stackwise_nores_option=[False]*7,
+        stackwise_force_input_filters=[0] * 7,
+        stackwise_nores_option=[False] * 7,
         dropout=0.2,
         depth_divisor=8,
         min_depth=8,
@@ -307,7 +307,7 @@ def __init__(
         self.stackwise_strides = stackwise_strides
         self.stackwise_block_types = stackwise_block_types
 
-        self.stackwise_force_input_filters=stackwise_force_input_filters,
+        self.stackwise_force_input_filters = (stackwise_force_input_filters,)
         self.include_stem_padding = include_stem_padding
         self.use_depth_divisor_as_min_depth = use_depth_divisor_as_min_depth
         self.cap_round_filter_decrease = cap_round_filter_decrease
diff --git a/keras_hub/src/models/efficientnet/fusedmbconv.py b/keras_hub/src/models/efficientnet/fusedmbconv.py
index 7bb706ca9c..8d2cc2fdef 100644
--- a/keras_hub/src/models/efficientnet/fusedmbconv.py
+++ b/keras_hub/src/models/efficientnet/fusedmbconv.py
@@ -197,7 +197,11 @@ def call(self, inputs):
             x = self.act(x)
 
         # Residual:
-        if self.strides == 1 and self.input_filters == self.output_filters and not self.nores:
+        if (
+            self.strides == 1
+            and self.input_filters == self.output_filters
+            and not self.nores
+        ):
             if self.dropout:
                 x = self.dropout_layer(x)
             x = keras.layers.Add(name=self.name + "add")([x, inputs])
diff --git a/keras_hub/src/models/efficientnet/mbconv.py b/keras_hub/src/models/efficientnet/mbconv.py
index 80178bbba6..e9acbfeb9a 100644
--- a/keras_hub/src/models/efficientnet/mbconv.py
+++ b/keras_hub/src/models/efficientnet/mbconv.py
@@ -214,7 +214,11 @@ def call(self, inputs):
         x = self.output_conv(x)
         x = self.bn3(x)
 
-        if self.strides == 1 and self.input_filters == self.output_filters and not self.nores:
+        if (
+            self.strides == 1
+            and self.input_filters == self.output_filters
+            and not self.nores
+        ):
             if self.dropout:
                 x = self.dropout_layer(x)
             x = keras.layers.Add(name=self.name + "add")([x, inputs])

From 11a86a20cc706d801f9dafd53488ece5bb892208 Mon Sep 17 00:00:00 2001
From: Piseth Ky <pisethk@google.com>
Date: Thu, 7 Nov 2024 14:28:28 -0800
Subject: [PATCH 06/17] remove irrelevant old commented code

---
 keras_hub/src/utils/timm/convert_efficientnet.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/keras_hub/src/utils/timm/convert_efficientnet.py b/keras_hub/src/utils/timm/convert_efficientnet.py
index 68fed5fb5d..5c58c7c04b 100644
--- a/keras_hub/src/utils/timm/convert_efficientnet.py
+++ b/keras_hub/src/utils/timm/convert_efficientnet.py
@@ -105,7 +105,6 @@ def convert_backbone_config(timm_config):
 def convert_weights(backbone, loader, timm_config):
     timm_architecture = timm_config["architecture"]
     variant = "_".join(timm_architecture.split("_")[1:])
-    # backbone.build(input_shape=timm_config["pretrained_cfg"]["input_size"])
 
     def port_conv2d(keras_layer, hf_weight_prefix, port_bias=True):
         loader.port_weight(

From 5533922b55847b6557118e51c4fdf361b38791f1 Mon Sep 17 00:00:00 2001
From: Piseth Ky <pisethk@google.com>
Date: Thu, 7 Nov 2024 15:16:53 -0800
Subject: [PATCH 07/17] fix unit test regression

---
 .../models/efficientnet/efficientnet_backbone.py   |  2 +-
 .../efficientnet/efficientnet_backbone_test.py     | 14 +++++---------
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
index 9f3825ea1b..f2a3b70912 100644
--- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py
+++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
@@ -307,7 +307,7 @@ def __init__(
         self.stackwise_strides = stackwise_strides
         self.stackwise_block_types = stackwise_block_types
 
-        self.stackwise_force_input_filters = (stackwise_force_input_filters,)
+        self.stackwise_force_input_filters = stackwise_force_input_filters
         self.include_stem_padding = include_stem_padding
         self.use_depth_divisor_as_min_depth = use_depth_divisor_as_min_depth
         self.cap_round_filter_decrease = cap_round_filter_decrease
diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone_test.py b/keras_hub/src/models/efficientnet/efficientnet_backbone_test.py
index f31004b5dc..c11e636540 100644
--- a/keras_hub/src/models/efficientnet/efficientnet_backbone_test.py
+++ b/keras_hub/src/models/efficientnet/efficientnet_backbone_test.py
@@ -26,6 +26,8 @@ def setUp(self):
             ],
             "stackwise_strides": [1, 2, 2, 2, 1, 2],
             "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3,
+            "stackwise_force_input_filters": [0] * 6,
+            "stackwise_nores_option": [False] * 6,
             "width_coefficient": 1.0,
             "depth_coefficient": 1.0,
         }
@@ -60,15 +62,9 @@ def test_valid_call_original_v1(self):
             "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320],
             "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6],
             "stackwise_strides": [1, 2, 2, 2, 1, 2, 1],
-            "stackwise_squeeze_and_excite_ratios": [
-                0.25,
-                0.25,
-                0.25,
-                0.25,
-                0.25,
-                0.25,
-                0.25,
-            ],
+            "stackwise_squeeze_and_excite_ratios": [0.25] * 7,
+            "stackwise_force_input_filters": [0] * 7,
+            "stackwise_nores_option": [False] * 7,
             "width_coefficient": 1.0,
             "depth_coefficient": 1.0,
             "stackwise_block_types": ["v1"] * 7,

From a935e6240b9d053bac00bf8aceca4dc5a3b353bc Mon Sep 17 00:00:00 2001
From: Piseth Ky <pisethk@google.com>
Date: Thu, 7 Nov 2024 16:12:34 -0800
Subject: [PATCH 08/17] add presets to preset file

---
 .../efficientnet/efficientnet_presets.py      | 41 ++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/keras_hub/src/models/efficientnet/efficientnet_presets.py b/keras_hub/src/models/efficientnet/efficientnet_presets.py
index 39c9514816..2a3c279350 100644
--- a/keras_hub/src/models/efficientnet/efficientnet_presets.py
+++ b/keras_hub/src/models/efficientnet/efficientnet_presets.py
@@ -17,7 +17,7 @@
     "efficientnet_b1_ft_imagenet": {
         "metadata": {
             "description": (
-                "EfficientNet B1 model fine-trained on the ImageNet 1k dataset."
+                "EfficientNet B1 model fine-tuned on the ImageNet 1k dataset."
             ),
             "params": 7794184,
             "official_name": "EfficientNet",
@@ -26,4 +26,43 @@
         },
         "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet_b1_ft_imagenet",
     },
+    "efficientnet_el_ra_imagenet": {
+        "metadata": {
+            "description": (
+                "EfficientNet-EdgeTPU Large model trained on the ImageNet 1k "
+                "dataset with RandAugment recipe."
+            ),
+            "params": 10589712,
+            "official_name": "EfficientNet",
+            "path": "efficientnet",
+            "model_card": "https://arxiv.org/abs/1905.11946",
+        },
+        "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet_b1_ft_imagenet",
+    },
+    "efficientnet_em_ra2_imagenet": {
+        "metadata": {
+            "description": (
+                "EfficientNet-EdgeTPU Medium model trained on the ImageNet 1k "
+                "dataset with RandAugment2 recipe."
+            ),
+            "params": 6899496,
+            "official_name": "EfficientNet",
+            "path": "efficientnet",
+            "model_card": "https://arxiv.org/abs/1905.11946",
+        },
+        "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet_b1_ft_imagenet",
+    },
+    "efficientnet_es_ra_imagenet": {
+        "metadata": {
+            "description": (
+                "EfficientNet-EdgeTPU Small model trained on the ImageNet 1k "
+                "dataset with RandAugment recipe."
+            ),
+            "params": 5438392,
+            "official_name": "EfficientNet",
+            "path": "efficientnet",
+            "model_card": "https://arxiv.org/abs/1905.11946",
+        },
+        "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet_b1_ft_imagenet",
+    },
 }

From b89638fb1296976fbf2a3841ffbca3677e43a572 Mon Sep 17 00:00:00 2001
From: Piseth Ky <pisethk@google.com>
Date: Thu, 7 Nov 2024 16:42:27 -0800
Subject: [PATCH 09/17] WIP starting changes needed for additional presets

---
 .../src/utils/timm/convert_efficientnet.py    | 45 +++++++++++++++++++
 .../convert_efficientnet_checkpoints.py       |  9 ++++
 2 files changed, 54 insertions(+)

diff --git a/keras_hub/src/utils/timm/convert_efficientnet.py b/keras_hub/src/utils/timm/convert_efficientnet.py
index 5c58c7c04b..f256ca4e93 100644
--- a/keras_hub/src/utils/timm/convert_efficientnet.py
+++ b/keras_hub/src/utils/timm/convert_efficientnet.py
@@ -65,6 +65,51 @@
         "stackwise_nores_option": [True] + [False] * 5,
         "activation": "relu",
     },
+    "rw_m": {
+        "width_coefficient": 1.2,
+        "depth_coefficient": (1.2,) * 4 + (1.6,) * 2,
+        "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5],
+        "stackwise_num_repeats": [1, 2, 4, 5, 4, 2],
+        "stackwise_input_filters": [32, 24, 32, 48, 96, 144],
+        "stackwise_output_filters": [24, 32, 48, 96, 144, 192],
+        "stackwise_expansion_ratios": [4, 8, 8, 8, 8, 8],
+        "stackwise_strides": [1, 2, 2, 2, 1, 2],
+        "stackwise_squeeze_and_excite_ratios": [0] * 6,
+        "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3,
+        "stackwise_force_input_filters": [24, 0, 0, 0, 0, 0],
+        "stackwise_nores_option": [True] + [False] * 5,
+        "activation": "relu",
+    },
+    "rw_s": {
+        "width_coefficient": 1.0,
+        "depth_coefficient": 1.0,
+        "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5],
+        "stackwise_num_repeats": [1, 2, 4, 5, 4, 2],
+        "stackwise_input_filters": [32, 24, 32, 48, 96, 144],
+        "stackwise_output_filters": [24, 32, 48, 96, 144, 192],
+        "stackwise_expansion_ratios": [4, 8, 8, 8, 8, 8],
+        "stackwise_strides": [1, 2, 2, 2, 1, 2],
+        "stackwise_squeeze_and_excite_ratios": [0] * 6,
+        "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3,
+        "stackwise_force_input_filters": [24, 0, 0, 0, 0, 0],
+        "stackwise_nores_option": [True] + [False] * 5,
+        "activation": "relu",
+    },
+    "rw_t": {
+        "width_coefficient": 0.8,
+        "depth_coefficient": 0.9,
+        "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5],
+        "stackwise_num_repeats": [1, 2, 4, 5, 4, 2],
+        "stackwise_input_filters": [32, 24, 32, 48, 96, 144],
+        "stackwise_output_filters": [24, 32, 48, 96, 144, 192],
+        "stackwise_expansion_ratios": [4, 8, 8, 8, 8, 8],
+        "stackwise_strides": [1, 2, 2, 2, 1, 2],
+        "stackwise_squeeze_and_excite_ratios": [0] * 6,
+        "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3,
+        "stackwise_force_input_filters": [24, 0, 0, 0, 0, 0],
+        "stackwise_nores_option": [True] + [False] * 5,
+        "activation": "relu",
+    },
 }
 
 
diff --git a/tools/checkpoint_conversion/convert_efficientnet_checkpoints.py b/tools/checkpoint_conversion/convert_efficientnet_checkpoints.py
index 75810a19a9..366a05a16a 100644
--- a/tools/checkpoint_conversion/convert_efficientnet_checkpoints.py
+++ b/tools/checkpoint_conversion/convert_efficientnet_checkpoints.py
@@ -11,6 +11,12 @@
     --preset efficientnet_em_ra2_imagenet --upload_uri kaggle://keras/efficientnet/keras/efficientnet_em_ra2_imagenet
 python tools/checkpoint_conversion/convert_efficientnet_checkpoints.py \
     --preset efficientnet_es_ra_imagenet --upload_uri kaggle://keras/efficientnet/keras/efficientnet_es_ra_imagenet
+python tools/checkpoint_conversion/convert_efficientnet_checkpoints.py \
+    --preset efficientnet2_rw_m_agc_imagenet --upload_uri kaggle://keras/efficientnet/keras/efficientnet_el_ra_imagenet
+python tools/checkpoint_conversion/convert_efficientnet_checkpoints.py \
+    --preset efficientnet2_rw_s_ra2_imagenet --upload_uri kaggle://keras/efficientnet/keras/efficientnet_em_ra2_imagenet
+python tools/checkpoint_conversion/convert_efficientnet_checkpoints.py \
+    --preset efficientnet2_rw_t_ra2_imagenet --upload_uri kaggle://keras/efficientnet/keras/efficientnet_es_ra_imagenet
 """
 
 import os
@@ -32,6 +38,9 @@
     "efficientnet_el_ra_imagenet": "timm/efficientnet_el.ra_in1k",
     "efficientnet_em_ra2_imagenet": "timm/efficientnet_em.ra2_in1k",
     "efficientnet_es_ra_imagenet": "timm/efficientnet_es.ra_in1k",
+    "efficientnet2_rw_m_agc_imagenet": "timm/efficientnetv2_rw_m.agc_in1k",
+    "efficientnet2_rw_s_ra2_imagenet": "timm/efficientnetv2_rw_s.ra2_in1k",
+    "efficientnet2_rw_t_ra2_imagenet": "timm/efficientnetv2_rw_t.ra2_in1k",
 }
 FLAGS = flags.FLAGS
 

From 9e1a850a47cac50842820374d18731be4436abd7 Mon Sep 17 00:00:00 2001
From: Piseth Ky <pisethk@google.com>
Date: Fri, 8 Nov 2024 16:50:03 -0800
Subject: [PATCH 10/17] WIP 2 variants working

---
 .../src/models/efficientnet/convbnact.py      | 139 ++++++++++++++++++
 .../src/models/efficientnet/convbnact_test.py |  32 ++++
 .../efficientnet/efficientnet_backbone.py     |  36 ++++-
 .../src/models/efficientnet/fusedmbconv.py    |  32 ++--
 keras_hub/src/models/efficientnet/mbconv.py   |  11 +-
 .../src/utils/timm/convert_efficientnet.py    | 107 ++++++++------
 6 files changed, 297 insertions(+), 60 deletions(-)
 create mode 100644 keras_hub/src/models/efficientnet/convbnact.py
 create mode 100644 keras_hub/src/models/efficientnet/convbnact_test.py

diff --git a/keras_hub/src/models/efficientnet/convbnact.py b/keras_hub/src/models/efficientnet/convbnact.py
new file mode 100644
index 0000000000..01ae845834
--- /dev/null
+++ b/keras_hub/src/models/efficientnet/convbnact.py
@@ -0,0 +1,139 @@
+import keras
+
+BN_AXIS = 3
+
+CONV_KERNEL_INITIALIZER = {
+    "class_name": "VarianceScaling",
+    "config": {
+        "scale": 2.0,
+        "mode": "fan_out",
+        "distribution": "truncated_normal",
+    },
+}
+
+
+class ConvBNActBlock(keras.layers.Layer):
+    """
+    Args:
+        input_filters: int, the number of input filters
+        output_filters: int, the number of output filters
+        expand_ratio: default 1, the ratio by which input_filters are multiplied
+            to expand the structure in the middle expansion phase
+        kernel_size: default 3, the kernel_size to apply to the expansion phase
+            convolutions
+        strides: default 1, the strides to apply to the expansion phase
+            convolutions
+        se_ratio: default 0.0, The filters used in the Squeeze-Excitation phase,
+            and are chosen as the maximum between 1 and input_filters*se_ratio
+        batch_norm_momentum: default 0.9, the BatchNormalization momentum
+        activation: default "swish", the activation function used between
+            convolution operations
+        dropout: float, the optional dropout rate to apply before the output
+            convolution, defaults to 0.2
+
+    Returns:
+        A tensor representing a feature map, passed through the FusedMBConv
+        block
+
+    Note:
+        Not intended to be used outside of the EfficientNet architecture.
+    """
+
+    def __init__(
+        self,
+        input_filters,
+        output_filters,
+        expand_ratio=1,
+        kernel_size=3,
+        strides=1,
+        data_format="channels_last",
+        batch_norm_momentum=0.9,
+        batch_norm_epsilon=1e-3,
+        activation="swish",
+        dropout=0.2,
+        nores=False,
+        **kwargs
+    ):
+        super().__init__(**kwargs)
+        self.input_filters = input_filters
+        self.output_filters = output_filters
+        self.expand_ratio = expand_ratio
+        self.kernel_size = kernel_size
+        self.strides = strides
+        self.data_format = data_format
+        self.batch_norm_momentum = batch_norm_momentum
+        self.batch_norm_epsilon = batch_norm_epsilon
+        self.activation = activation
+        self.dropout = dropout
+        self.nores = nores
+        self.filters = self.input_filters * self.expand_ratio
+
+        padding_pixels = kernel_size // 2
+        self.conv1_pad = keras.layers.ZeroPadding2D(
+            padding=(padding_pixels, padding_pixels),
+            name=self.name + "conv_pad",
+        )
+        self.conv1 = keras.layers.Conv2D(
+            filters=self.filters,
+            kernel_size=kernel_size,
+            strides=strides,
+            kernel_initializer=CONV_KERNEL_INITIALIZER,
+            padding="valid",
+            data_format=data_format,
+            use_bias=False,
+            name=self.name + "conv",
+        )
+        self.bn1 = keras.layers.BatchNormalization(
+            axis=BN_AXIS,
+            momentum=self.batch_norm_momentum,
+            epsilon=self.batch_norm_epsilon,
+            name=self.name + "bn",
+        )
+        self.act = keras.layers.Activation(
+            self.activation, name=self.name + "activation"
+        )
+
+        if self.dropout:
+            self.dropout_layer = keras.layers.Dropout(
+                self.dropout,
+                noise_shape=(None, 1, 1, 1),
+                name=self.name + "drop",
+            )
+
+    def build(self, input_shape):
+        if self.name is None:
+            self.name = keras.backend.get_uid("block0")
+
+    def call(self, inputs):
+        x = self.conv1_pad(inputs)
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.act(x)
+
+        # Residual:
+        if (
+            self.strides == 1
+            and self.input_filters == self.output_filters
+            and not self.nores
+        ):
+            if self.dropout:
+                x = self.dropout_layer(x)
+            x = keras.layers.Add(name=self.name + "add")([x, inputs])
+        return x
+    
+    def get_config(self):
+        config = {
+            "input_filters": self.input_filters,
+            "output_filters": self.output_filters,
+            "kernel_size": self.kernel_size,
+            "strides": self.strides,
+            "data_format": self.data_format,
+            "batch_norm_momentum": self.batch_norm_momentum,
+            "batch_norm_epsilon": self.batch_norm_epsilon,
+            "activation": self.activation,
+            "dropout": self.dropout,
+            "nores": self.nores,
+        }
+
+        base_config = super().get_config()
+        return dict(list(base_config.items()) + list(config.items()))
diff --git a/keras_hub/src/models/efficientnet/convbnact_test.py b/keras_hub/src/models/efficientnet/convbnact_test.py
new file mode 100644
index 0000000000..54f8957b04
--- /dev/null
+++ b/keras_hub/src/models/efficientnet/convbnact_test.py
@@ -0,0 +1,32 @@
+import keras
+
+from keras_hub.src.models.efficientnet.convbnact import ConvBNActBlock
+from keras_hub.src.tests.test_case import TestCase
+
+
+class ConvBNActBlockTest(TestCase):
+    def test_same_input_output_shapes(self):
+        inputs = keras.random.normal(shape=(1, 64, 64, 32), dtype="float32")
+        layer = ConvBNActBlock(input_filters=32, output_filters=32)
+
+        output = layer(inputs)
+        self.assertEquals(output.shape, (1, 64, 64, 32))
+        self.assertLen(output, 1)
+
+    def test_different_input_output_shapes(self):
+        inputs = keras.random.normal(shape=(1, 64, 64, 32), dtype="float32")
+        layer = ConvBNActBlock(input_filters=32, output_filters=48)
+
+        output = layer(inputs)
+        self.assertEquals(output.shape, (1, 64, 64, 48))
+        self.assertLen(output, 1)
+
+    def test_squeeze_excitation_ratio(self):
+        inputs = keras.random.normal(shape=(1, 64, 64, 32), dtype="float32")
+        layer = ConvBNActBlock(
+            input_filters=32, output_filters=48, se_ratio=0.25
+        )
+
+        output = layer(inputs)
+        self.assertEquals(output.shape, (1, 64, 64, 48))
+        self.assertLen(output, 1)
diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
index f2a3b70912..29459e65ac 100644
--- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py
+++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
@@ -3,6 +3,7 @@
 import keras
 
 from keras_hub.src.api_export import keras_hub_export
+from keras_hub.src.models.efficientnet.convbnact import ConvBNActBlock
 from keras_hub.src.models.efficientnet.fusedmbconv import FusedMBConvBlock
 from keras_hub.src.models.efficientnet.mbconv import MBConvBlock
 from keras_hub.src.models.feature_pyramid_backbone import FeaturePyramidBackbone
@@ -115,6 +116,7 @@ def __init__(
         batch_norm_momentum=0.9,
         batch_norm_epsilon=1e-5,
         projection_activation=None,
+        num_features=1280,
         **kwargs,
     ):
         image_input = keras.layers.Input(shape=input_shape)
@@ -161,12 +163,20 @@ def __init__(
         self._pyramid_outputs = {}
         curr_pyramid_level = 1
 
-        for i in range(len(stackwise_kernel_sizes)):
+        num_stacks = len(stackwise_kernel_sizes)
+
+        if isinstance(depth_coefficient, tuple):
+            assert len(depth_coefficient) == num_stacks
+        else:
+            depth_coefficient = (depth_coefficient,) * num_stacks
+
+        for i in range(num_stacks):
             num_repeats = stackwise_num_repeats[i]
             input_filters = stackwise_input_filters[i]
             output_filters = stackwise_output_filters[i]
             force_input_filters = stackwise_force_input_filters[i]
             nores = stackwise_nores_option[i]
+            stack_depth_coefficient = depth_coefficient[i]
 
             # Update block input and output filters based on depth multiplier.
             input_filters = round_filters(
@@ -188,7 +198,7 @@ def __init__(
 
             repeats = round_repeats(
                 repeats=num_repeats,
-                depth_coefficient=depth_coefficient,
+                depth_coefficient=stack_depth_coefficient,
             )
             strides = stackwise_strides[i]
             squeeze_and_excite_ratio = stackwise_squeeze_and_excite_ratios[i]
@@ -234,7 +244,7 @@ def __init__(
                         batch_norm_epsilon=batch_norm_epsilon,
                         name=block_name,
                     )
-                else:
+                elif stackwise_block_type in ("fused", "unfused"):
                     block = get_conv_constructor(stackwise_block_type)(
                         input_filters=input_filters,
                         output_filters=output_filters,
@@ -251,11 +261,27 @@ def __init__(
                         name=block_name,
                     )
                     x = block(x)
+                else: # cba block
+                    block = ConvBNActBlock(
+                        input_filters=input_filters,
+                        output_filters=output_filters,
+                        expand_ratio=stackwise_expansion_ratios[i],
+                        kernel_size=stackwise_kernel_sizes[i],
+                        strides=strides,
+                        data_format=data_format,
+                        activation=activation,
+                        dropout=dropout * block_id / blocks,
+                        batch_norm_momentum=batch_norm_momentum,
+                        batch_norm_epsilon=batch_norm_epsilon,
+                        nores=nores,
+                        name=block_name,
+                    )
+                    x = block(x)
                 block_id += 1
 
         # Build top
         top_filters = round_filters(
-            filters=1280,
+            filters=num_features,
             width_coefficient=width_coefficient,
             min_depth=min_depth,
             depth_divisor=depth_divisor,
@@ -577,6 +603,8 @@ def get_conv_constructor(conv_type):
         return MBConvBlock
     elif conv_type == "fused":
         return FusedMBConvBlock
+    elif conv_type == "cba":
+        return ConvBNActBlock
     else:
         raise ValueError(
             "Expected `conv_type` to be "
diff --git a/keras_hub/src/models/efficientnet/fusedmbconv.py b/keras_hub/src/models/efficientnet/fusedmbconv.py
index 8d2cc2fdef..17e1351c2b 100644
--- a/keras_hub/src/models/efficientnet/fusedmbconv.py
+++ b/keras_hub/src/models/efficientnet/fusedmbconv.py
@@ -74,6 +74,7 @@ def __init__(
         activation="swish",
         dropout=0.2,
         nores=False,
+        projection_kernel_size=1,
         **kwargs
     ):
         super().__init__(**kwargs)
@@ -89,15 +90,21 @@ def __init__(
         self.activation = activation
         self.dropout = dropout
         self.nores = nores
+        self.projection_kernel_size = projection_kernel_size
         self.filters = self.input_filters * self.expand_ratio
         self.filters_se = max(1, int(input_filters * se_ratio))
 
+        padding_pixels = kernel_size // 2
+        self.conv1_pad = keras.layers.ZeroPadding2D(
+            padding=(padding_pixels, padding_pixels),
+            name=self.name + "expand_conv_pad",
+        )
         self.conv1 = keras.layers.Conv2D(
             filters=self.filters,
             kernel_size=kernel_size,
             strides=strides,
             kernel_initializer=CONV_KERNEL_INITIALIZER,
-            padding="same",
+            padding="valid",
             data_format=data_format,
             use_bias=False,
             name=self.name + "expand_conv",
@@ -132,12 +139,17 @@ def __init__(
             name=self.name + "se_expand",
         )
 
+        padding_pixels = projection_kernel_size // 2
+        self.output_conv_pad = keras.layers.ZeroPadding2D(
+            padding=(padding_pixels, padding_pixels),
+            name=self.name + "project_conv_pad",
+        )
         self.output_conv = keras.layers.Conv2D(
             filters=self.output_filters,
-            kernel_size=1 if expand_ratio != 1 else kernel_size,
+            kernel_size=projection_kernel_size,
             strides=1,
             kernel_initializer=CONV_KERNEL_INITIALIZER,
-            padding="same",
+            padding="valid",
             data_format=data_format,
             use_bias=False,
             name=self.name + "project_conv",
@@ -163,12 +175,10 @@ def build(self, input_shape):
 
     def call(self, inputs):
         # Expansion phase
-        if self.expand_ratio != 1:
-            x = self.conv1(inputs)
-            x = self.bn1(x)
-            x = self.act(x)
-        else:
-            x = inputs
+        x = self.conv1_pad(inputs)
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.act(x)
 
         # Squeeze and excite
         if 0 < self.se_ratio <= 1:
@@ -191,10 +201,9 @@ def call(self, inputs):
             x = keras.layers.multiply([x, se], name=self.name + "se_excite")
 
         # Output phase:
+        x = self.output_conv_pad(x)
         x = self.output_conv(x)
         x = self.bn2(x)
-        if self.expand_ratio == 1:
-            x = self.act(x)
 
         # Residual:
         if (
@@ -221,6 +230,7 @@ def get_config(self):
             "activation": self.activation,
             "dropout": self.dropout,
             "nores": self.nores,
+            "projection_kernel_size": self.projection_kernel_size,
         }
 
         base_config = super().get_config()
diff --git a/keras_hub/src/models/efficientnet/mbconv.py b/keras_hub/src/models/efficientnet/mbconv.py
index e9acbfeb9a..9584f7391c 100644
--- a/keras_hub/src/models/efficientnet/mbconv.py
+++ b/keras_hub/src/models/efficientnet/mbconv.py
@@ -148,12 +148,18 @@ def __init__(
             name=self.name + "se_expand",
         )
 
+        projection_kernel_size = 1 if expand_ratio != 1 else kernel_size
+        padding_pixels = projection_kernel_size // 2
+        self.output_conv_pad = keras.layers.ZeroPadding2D(
+            padding=(padding_pixels, padding_pixels),
+            name=self.name + "project_conv_pad",
+        )
         self.output_conv = keras.layers.Conv2D(
             filters=self.output_filters,
-            kernel_size=1 if expand_ratio != 1 else kernel_size,
+            kernel_size=projection_kernel_size,
             strides=1,
             kernel_initializer=CONV_KERNEL_INITIALIZER,
-            padding="same",
+            padding="valid",
             data_format=data_format,
             use_bias=False,
             name=self.name + "project_conv",
@@ -211,6 +217,7 @@ def call(self, inputs):
             x = keras.layers.multiply([x, se], name=self.name + "se_excite")
 
         # Output phase
+        x = self.output_conv_pad(x)
         x = self.output_conv(x)
         x = self.bn3(x)
 
diff --git a/keras_hub/src/utils/timm/convert_efficientnet.py b/keras_hub/src/utils/timm/convert_efficientnet.py
index f256ca4e93..322c578e87 100644
--- a/keras_hub/src/utils/timm/convert_efficientnet.py
+++ b/keras_hub/src/utils/timm/convert_efficientnet.py
@@ -68,47 +68,49 @@
     "rw_m": {
         "width_coefficient": 1.2,
         "depth_coefficient": (1.2,) * 4 + (1.6,) * 2,
-        "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5],
-        "stackwise_num_repeats": [1, 2, 4, 5, 4, 2],
-        "stackwise_input_filters": [32, 24, 32, 48, 96, 144],
-        "stackwise_output_filters": [24, 32, 48, 96, 144, 192],
-        "stackwise_expansion_ratios": [4, 8, 8, 8, 8, 8],
+        "stackwise_kernel_sizes": [3, 3, 3, 3, 3, 3],
+        "stackwise_num_repeats": [2, 4, 4, 6, 9, 15],
+        "stackwise_input_filters": [24, 24, 48, 64, 128, 160],
+        "stackwise_output_filters": [24, 48, 64, 128, 160, 272],
+        "stackwise_expansion_ratios": [1, 4, 4, 4, 6, 6],
         "stackwise_strides": [1, 2, 2, 2, 1, 2],
-        "stackwise_squeeze_and_excite_ratios": [0] * 6,
+        "stackwise_squeeze_and_excite_ratios": [0, 0, 0, 0.25, 0.25, 0.25],
         "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3,
-        "stackwise_force_input_filters": [24, 0, 0, 0, 0, 0],
-        "stackwise_nores_option": [True] + [False] * 5,
-        "activation": "relu",
+        "stackwise_force_input_filters": [0, 0, 0, 0, 0, 0],
+        "stackwise_nores_option": [False] * 6,
+        "activation": "silu",
+        "num_features": 1792,
     },
     "rw_s": {
         "width_coefficient": 1.0,
         "depth_coefficient": 1.0,
-        "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5],
-        "stackwise_num_repeats": [1, 2, 4, 5, 4, 2],
-        "stackwise_input_filters": [32, 24, 32, 48, 96, 144],
-        "stackwise_output_filters": [24, 32, 48, 96, 144, 192],
-        "stackwise_expansion_ratios": [4, 8, 8, 8, 8, 8],
+        "stackwise_kernel_sizes": [3, 3, 3, 3, 3, 3],
+        "stackwise_num_repeats": [2, 4, 4, 6, 9, 15],
+        "stackwise_input_filters": [24, 24, 48, 64, 128, 160],
+        "stackwise_output_filters": [24, 48, 64, 128, 160, 272],
+        "stackwise_expansion_ratios": [1, 4, 4, 4, 6, 6],
         "stackwise_strides": [1, 2, 2, 2, 1, 2],
-        "stackwise_squeeze_and_excite_ratios": [0] * 6,
+        "stackwise_squeeze_and_excite_ratios": [0, 0, 0, 0.25, 0.25, 0.25],
         "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3,
-        "stackwise_force_input_filters": [24, 0, 0, 0, 0, 0],
-        "stackwise_nores_option": [True] + [False] * 5,
-        "activation": "relu",
+        "stackwise_force_input_filters": [0, 0, 0, 0, 0, 0],
+        "stackwise_nores_option": [False] * 6,
+        "activation": "silu",
+        "num_features": 1792,
     },
     "rw_t": {
         "width_coefficient": 0.8,
         "depth_coefficient": 0.9,
-        "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5],
-        "stackwise_num_repeats": [1, 2, 4, 5, 4, 2],
-        "stackwise_input_filters": [32, 24, 32, 48, 96, 144],
-        "stackwise_output_filters": [24, 32, 48, 96, 144, 192],
-        "stackwise_expansion_ratios": [4, 8, 8, 8, 8, 8],
+        "stackwise_kernel_sizes": [3, 3, 3, 3, 3, 3],
+        "stackwise_num_repeats": [2, 4, 4, 6, 9, 15],
+        "stackwise_input_filters": [24, 24, 48, 64, 128, 160],
+        "stackwise_output_filters": [24, 48, 64, 128, 160, 256],
+        "stackwise_expansion_ratios": [1, 4, 4, 4, 6, 6],
         "stackwise_strides": [1, 2, 2, 2, 1, 2],
-        "stackwise_squeeze_and_excite_ratios": [0] * 6,
-        "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3,
-        "stackwise_force_input_filters": [24, 0, 0, 0, 0, 0],
-        "stackwise_nores_option": [True] + [False] * 5,
-        "activation": "relu",
+        "stackwise_squeeze_and_excite_ratios": [0, 0, 0, 0.25, 0.25, 0.25],
+        "stackwise_block_types": ["cba"] + ["fused"] * 2 + ["unfused"] * 3,
+        "stackwise_force_input_filters": [0, 0, 0, 0, 0, 0],
+        "stackwise_nores_option": [False] * 6,
+        "activation": "silu",
     },
 }
 
@@ -218,15 +220,21 @@ def port_batch_normalization(keras_layer, hf_weight_prefix):
 
     # Stages
     num_stacks = len(backbone.stackwise_kernel_sizes)
+
+    depth_coefficient = VARIANT_MAP[variant]["depth_coefficient"]
+    if isinstance(depth_coefficient, tuple):
+        assert len(depth_coefficient) == num_stacks
+    else:
+        depth_coefficient = (depth_coefficient,) * num_stacks
+
     for stack_index in range(num_stacks):
 
         block_type = backbone.stackwise_block_types[stack_index]
         expansion_ratio = backbone.stackwise_expansion_ratios[stack_index]
         repeats = backbone.stackwise_num_repeats[stack_index]
+        stack_depth_coefficient = depth_coefficient[stack_index]
 
-        repeats = int(
-            math.ceil(VARIANT_MAP[variant]["depth_coefficient"] * repeats)
-        )
+        repeats = int(math.ceil(stack_depth_coefficient * repeats))
         se_ratio = VARIANT_MAP[variant]["stackwise_squeeze_and_excite_ratios"][
             stack_index
         ]
@@ -297,18 +305,17 @@ def port_batch_normalization(keras_layer, hf_weight_prefix):
                 fused_block_layer = backbone.get_layer(keras_block_prefix)
 
                 # Initial Expansion Conv
-                if expansion_ratio != 1:
-                    port_conv2d(
-                        fused_block_layer.conv1,
-                        hf_block_prefix + "conv_exp",
-                        port_bias=False,
-                    )
-                    conv_pw_count += 1
-                    port_batch_normalization(
-                        fused_block_layer.bn1,
-                        hf_block_prefix + f"bn{bn_count}",
-                    )
-                    bn_count += 1
+                port_conv2d(
+                    fused_block_layer.conv1,
+                    hf_block_prefix + "conv_exp",
+                    port_bias=False,
+                )
+                conv_pw_count += 1
+                port_batch_normalization(
+                    fused_block_layer.bn1,
+                    hf_block_prefix + f"bn{bn_count}",
+                )
+                bn_count += 1
 
                 if 0 < se_ratio <= 1:
                     # Squeeze and Excite
@@ -385,6 +392,20 @@ def port_batch_normalization(keras_layer, hf_weight_prefix):
                     hf_block_prefix + f"bn{bn_count}",
                 )
                 bn_count += 1
+            elif block_type == "cba":
+                cba_block_layer = backbone.get_layer(keras_block_prefix)
+                # Initial Expansion Conv
+                port_conv2d(
+                    cba_block_layer.conv1,
+                    hf_block_prefix + "conv_pw",
+                    port_bias=False,
+                )
+                conv_pw_count += 1
+                port_batch_normalization(
+                    cba_block_layer.bn1,
+                    hf_block_prefix + f"bn{bn_count}",
+                )
+                bn_count += 1
 
     # Head/Top
     port_conv2d(backbone.get_layer("top_conv"), "conv_head", port_bias=False)

From e2adf4732ff06ed88608d3482bcae0aa62e65d80 Mon Sep 17 00:00:00 2001
From: Piseth Ky <pisethk@google.com>
Date: Mon, 11 Nov 2024 14:19:23 -0800
Subject: [PATCH 11/17] add ConvBNAct Block and tiny variant

---
 keras_hub/src/models/efficientnet/convbnact.py         |  7 ++-----
 keras_hub/src/models/efficientnet/convbnact_test.py    | 10 ----------
 .../src/models/efficientnet/efficientnet_backbone.py   |  3 +--
 keras_hub/src/utils/timm/convert_efficientnet.py       |  2 +-
 4 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/keras_hub/src/models/efficientnet/convbnact.py b/keras_hub/src/models/efficientnet/convbnact.py
index 01ae845834..5d8e379708 100644
--- a/keras_hub/src/models/efficientnet/convbnact.py
+++ b/keras_hub/src/models/efficientnet/convbnact.py
@@ -43,7 +43,6 @@ def __init__(
         self,
         input_filters,
         output_filters,
-        expand_ratio=1,
         kernel_size=3,
         strides=1,
         data_format="channels_last",
@@ -57,7 +56,6 @@ def __init__(
         super().__init__(**kwargs)
         self.input_filters = input_filters
         self.output_filters = output_filters
-        self.expand_ratio = expand_ratio
         self.kernel_size = kernel_size
         self.strides = strides
         self.data_format = data_format
@@ -66,7 +64,6 @@ def __init__(
         self.activation = activation
         self.dropout = dropout
         self.nores = nores
-        self.filters = self.input_filters * self.expand_ratio
 
         padding_pixels = kernel_size // 2
         self.conv1_pad = keras.layers.ZeroPadding2D(
@@ -74,7 +71,7 @@ def __init__(
             name=self.name + "conv_pad",
         )
         self.conv1 = keras.layers.Conv2D(
-            filters=self.filters,
+            filters=self.output_filters,
             kernel_size=kernel_size,
             strides=strides,
             kernel_initializer=CONV_KERNEL_INITIALIZER,
@@ -120,7 +117,7 @@ def call(self, inputs):
                 x = self.dropout_layer(x)
             x = keras.layers.Add(name=self.name + "add")([x, inputs])
         return x
-    
+
     def get_config(self):
         config = {
             "input_filters": self.input_filters,
diff --git a/keras_hub/src/models/efficientnet/convbnact_test.py b/keras_hub/src/models/efficientnet/convbnact_test.py
index 54f8957b04..720ed57365 100644
--- a/keras_hub/src/models/efficientnet/convbnact_test.py
+++ b/keras_hub/src/models/efficientnet/convbnact_test.py
@@ -20,13 +20,3 @@ def test_different_input_output_shapes(self):
         output = layer(inputs)
         self.assertEquals(output.shape, (1, 64, 64, 48))
         self.assertLen(output, 1)
-
-    def test_squeeze_excitation_ratio(self):
-        inputs = keras.random.normal(shape=(1, 64, 64, 32), dtype="float32")
-        layer = ConvBNActBlock(
-            input_filters=32, output_filters=48, se_ratio=0.25
-        )
-
-        output = layer(inputs)
-        self.assertEquals(output.shape, (1, 64, 64, 48))
-        self.assertLen(output, 1)
diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
index 29459e65ac..880e41912f 100644
--- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py
+++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
@@ -261,11 +261,10 @@ def __init__(
                         name=block_name,
                     )
                     x = block(x)
-                else: # cba block
+                else:  # cba block
                     block = ConvBNActBlock(
                         input_filters=input_filters,
                         output_filters=output_filters,
-                        expand_ratio=stackwise_expansion_ratios[i],
                         kernel_size=stackwise_kernel_sizes[i],
                         strides=strides,
                         data_format=data_format,
diff --git a/keras_hub/src/utils/timm/convert_efficientnet.py b/keras_hub/src/utils/timm/convert_efficientnet.py
index 322c578e87..58214ea806 100644
--- a/keras_hub/src/utils/timm/convert_efficientnet.py
+++ b/keras_hub/src/utils/timm/convert_efficientnet.py
@@ -397,7 +397,7 @@ def port_batch_normalization(keras_layer, hf_weight_prefix):
                 # Initial Expansion Conv
                 port_conv2d(
                     cba_block_layer.conv1,
-                    hf_block_prefix + "conv_pw",
+                    hf_block_prefix + "conv",
                     port_bias=False,
                 )
                 conv_pw_count += 1

From 753caaa256e8ed205bf451600e77f36bdb585de1 Mon Sep 17 00:00:00 2001
From: Piseth Ky <pisethk@google.com>
Date: Mon, 11 Nov 2024 14:43:50 -0800
Subject: [PATCH 12/17] adds preset data

---
 .../efficientnet/efficientnet_presets.py      | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/keras_hub/src/models/efficientnet/efficientnet_presets.py b/keras_hub/src/models/efficientnet/efficientnet_presets.py
index 2a3c279350..a40aec949a 100644
--- a/keras_hub/src/models/efficientnet/efficientnet_presets.py
+++ b/keras_hub/src/models/efficientnet/efficientnet_presets.py
@@ -65,4 +65,43 @@
         },
         "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet_b1_ft_imagenet",
     },
+    "efficientnet2_rw_m_agc_imagenet": {
+        "metadata": {
+            "description": (
+                "EfficientNet-v2 Medium model trained on the ImageNet 1k "
+                "dataset with adaptive gradient clipping."
+            ),
+            "params": 53236442,
+            "official_name": "EfficientNet",
+            "path": "efficientnet",
+            "model_card": "https://arxiv.org/abs/2104.00298",
+        },
+        "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet2_rw_m_agc_imagenet",
+    },
+    "efficientnet2_rw_s_ra2_imagenet": {
+        "metadata": {
+            "description": (
+                "EfficientNet-v2 Small model trained on the ImageNet 1k "
+                "dataset with RandAugment2 recipe."
+            ),
+            "params": 23941296,
+            "official_name": "EfficientNet",
+            "path": "efficientnet",
+            "model_card": "https://arxiv.org/abs/2104.00298",
+        },
+        "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet2_rw_s_ra2_imagenet",
+    },
+    "efficientnet2_rw_t_ra2_imagenet": {
+        "metadata": {
+            "description": (
+                "EfficientNet-v2 Tiny model trained on the ImageNet 1k "
+                "dataset with RandAugment2 recipe."
+            ),
+            "params": 13649388,
+            "official_name": "EfficientNet",
+            "path": "efficientnet",
+            "model_card": "https://arxiv.org/abs/2104.00298",
+        },
+        "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet2_rw_t_ra2_imagenet",
+    },
 }

From 041de38219c458a7231acfbf4bcba3aab1f9f421 Mon Sep 17 00:00:00 2001
From: Piseth Ky <pisethk@google.com>
Date: Mon, 11 Nov 2024 14:59:57 -0800
Subject: [PATCH 13/17] updated correct config

---
 .../src/models/efficientnet/efficientnet_backbone.py   | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
index 880e41912f..b86a5431c8 100644
--- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py
+++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
@@ -169,6 +169,7 @@ def __init__(
             assert len(depth_coefficient) == num_stacks
         else:
             depth_coefficient = (depth_coefficient,) * num_stacks
+            dc_originally_scalar = True
 
         for i in range(num_stacks):
             num_repeats = stackwise_num_repeats[i]
@@ -315,7 +316,12 @@ def __init__(
 
         # === Config ===
         self.width_coefficient = width_coefficient
-        self.depth_coefficient = depth_coefficient
+
+        if dc_originally_scalar:
+            self.depth_coefficient = depth_coefficient[0]
+        else:
+            self.depth_coefficient = depth_coefficient
+
         self.dropout = dropout
         self.depth_divisor = depth_divisor
         self.min_depth = min_depth
@@ -607,6 +613,6 @@ def get_conv_constructor(conv_type):
     else:
         raise ValueError(
             "Expected `conv_type` to be "
-            "one of 'unfused', 'fused', but got "
+            "one of 'unfused', 'fused', 'cba', but got "
             f"`conv_type={conv_type}`"
         )

From 9eacf214f852356f9117c609cc2e40372629c868 Mon Sep 17 00:00:00 2001
From: Piseth Ky <pisethk@google.com>
Date: Mon, 18 Nov 2024 12:19:06 -0800
Subject: [PATCH 14/17] resolve merge conflicts

---
 .../efficientnet/efficientnet_presets.py      | 39 -------------------
 .../src/utils/timm/convert_efficientnet.py    | 20 ++++------
 2 files changed, 8 insertions(+), 51 deletions(-)

diff --git a/keras_hub/src/models/efficientnet/efficientnet_presets.py b/keras_hub/src/models/efficientnet/efficientnet_presets.py
index 5a538bf406..f47c72bf80 100644
--- a/keras_hub/src/models/efficientnet/efficientnet_presets.py
+++ b/keras_hub/src/models/efficientnet/efficientnet_presets.py
@@ -124,45 +124,6 @@
         },
         "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet_b5_sw_ft_imagenet/1",
     },
-    "efficientnet_el_ra_imagenet": {
-        "metadata": {
-            "description": (
-                "EfficientNet-EdgeTPU Large model trained on the ImageNet 1k "
-                "dataset with RandAugment recipe."
-            ),
-            "params": 10589712,
-            "official_name": "EfficientNet",
-            "path": "efficientnet",
-            "model_card": "https://arxiv.org/abs/1905.11946",
-        },
-        "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet_el_ra_imagenet/1",
-    },
-    "efficientnet_em_ra2_imagenet": {
-        "metadata": {
-            "description": (
-                "EfficientNet-EdgeTPU Medium model trained on the ImageNet 1k "
-                "dataset with RandAugment2 recipe."
-            ),
-            "params": 6899496,
-            "official_name": "EfficientNet",
-            "path": "efficientnet",
-            "model_card": "https://arxiv.org/abs/1905.11946",
-        },
-        "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet_em_ra2_imagenet/1",
-    },
-    "efficientnet_es_ra_imagenet": {
-        "metadata": {
-            "description": (
-                "EfficientNet-EdgeTPU Small model trained on the ImageNet 1k "
-                "dataset with RandAugment recipe."
-            ),
-            "params": 5438392,
-            "official_name": "EfficientNet",
-            "path": "efficientnet",
-            "model_card": "https://arxiv.org/abs/1905.11946",
-        },
-        "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet_es_ra_imagenet/1",
-    },
     "efficientnet_el_ra_imagenet": {
         "metadata": {
             "description": (
diff --git a/keras_hub/src/utils/timm/convert_efficientnet.py b/keras_hub/src/utils/timm/convert_efficientnet.py
index 8a719a7f73..b4ece53b42 100644
--- a/keras_hub/src/utils/timm/convert_efficientnet.py
+++ b/keras_hub/src/utils/timm/convert_efficientnet.py
@@ -325,18 +325,14 @@ def port_batch_normalization(keras_layer, hf_weight_prefix):
                 port_conv2d(
                     fused_block_layer.conv1,
                     hf_block_prefix + "conv_exp",
-                if expansion_ratio != 1:
-                    port_conv2d(
-                        fused_block_layer.conv1,
-                        hf_block_prefix + "conv_exp",
-                        port_bias=False,
-                    )
-                    conv_pw_count += 1
-                    port_batch_normalization(
-                        fused_block_layer.bn1,
-                        hf_block_prefix + f"bn{bn_count}",
-                    )
-                    bn_count += 1
+                    port_bias=False,
+                )
+                conv_pw_count += 1
+                port_batch_normalization(
+                    fused_block_layer.bn1,
+                    hf_block_prefix + f"bn{bn_count}",
+                )
+                bn_count += 1
 
                 if 0 < se_ratio <= 1:
                     # Squeeze and Excite

From a84bddb06df2319b83d08ffea70803429f617c21 Mon Sep 17 00:00:00 2001
From: Piseth Ky <pisethk@google.com>
Date: Thu, 21 Nov 2024 13:37:44 -0800
Subject: [PATCH 15/17] review updates

---
 .../efficientnet/{convbnact.py => cba.py}     |  43 +++---
 .../{convbnact_test.py => cba_test.py}        |   8 +-
 .../efficientnet/efficientnet_backbone.py     | 130 +++++++++---------
 .../src/models/efficientnet/fusedmbconv.py    |   6 +
 .../src/utils/timm/convert_efficientnet.py    |  12 +-
 5 files changed, 100 insertions(+), 99 deletions(-)
 rename keras_hub/src/models/efficientnet/{convbnact.py => cba.py} (77%)
 rename keras_hub/src/models/efficientnet/{convbnact_test.py => cba_test.py} (69%)

diff --git a/keras_hub/src/models/efficientnet/convbnact.py b/keras_hub/src/models/efficientnet/cba.py
similarity index 77%
rename from keras_hub/src/models/efficientnet/convbnact.py
rename to keras_hub/src/models/efficientnet/cba.py
index 5d8e379708..54fd8d87e9 100644
--- a/keras_hub/src/models/efficientnet/convbnact.py
+++ b/keras_hub/src/models/efficientnet/cba.py
@@ -12,27 +12,28 @@
 }
 
 
-class ConvBNActBlock(keras.layers.Layer):
+class CBABlock(keras.layers.Layer):
     """
     Args:
         input_filters: int, the number of input filters
         output_filters: int, the number of output filters
-        expand_ratio: default 1, the ratio by which input_filters are multiplied
-            to expand the structure in the middle expansion phase
         kernel_size: default 3, the kernel_size to apply to the expansion phase
             convolutions
         strides: default 1, the strides to apply to the expansion phase
             convolutions
-        se_ratio: default 0.0, The filters used in the Squeeze-Excitation phase,
-            and are chosen as the maximum between 1 and input_filters*se_ratio
+        data_format: str, channels_last (default) or channels_first, expects
+            tensors to be of shape (N, H, W, C) or (N, C, H, W) respectively
         batch_norm_momentum: default 0.9, the BatchNormalization momentum
+        batch_norm_epsilon: default 1e-3, the BatchNormalization epsilon
         activation: default "swish", the activation function used between
             convolution operations
         dropout: float, the optional dropout rate to apply before the output
             convolution, defaults to 0.2
+        nores: bool, default False, forces no residual connection if True,
+            otherwise allows it if False.
 
     Returns:
-        A tensor representing a feature map, passed through the FusedMBConv
+        A tensor representing a feature map, passed through the ConvBNAct
         block
 
     Note:
@@ -119,18 +120,20 @@ def call(self, inputs):
         return x
 
     def get_config(self):
-        config = {
-            "input_filters": self.input_filters,
-            "output_filters": self.output_filters,
-            "kernel_size": self.kernel_size,
-            "strides": self.strides,
-            "data_format": self.data_format,
-            "batch_norm_momentum": self.batch_norm_momentum,
-            "batch_norm_epsilon": self.batch_norm_epsilon,
-            "activation": self.activation,
-            "dropout": self.dropout,
-            "nores": self.nores,
-        }
+        config = super().get_config()
+        config.update(
+            {
+                "input_filters": self.input_filters,
+                "output_filters": self.output_filters,
+                "kernel_size": self.kernel_size,
+                "strides": self.strides,
+                "data_format": self.data_format,
+                "batch_norm_momentum": self.batch_norm_momentum,
+                "batch_norm_epsilon": self.batch_norm_epsilon,
+                "activation": self.activation,
+                "dropout": self.dropout,
+                "nores": self.nores,
+            }
+        )
 
-        base_config = super().get_config()
-        return dict(list(base_config.items()) + list(config.items()))
+        return config
diff --git a/keras_hub/src/models/efficientnet/convbnact_test.py b/keras_hub/src/models/efficientnet/cba_test.py
similarity index 69%
rename from keras_hub/src/models/efficientnet/convbnact_test.py
rename to keras_hub/src/models/efficientnet/cba_test.py
index 720ed57365..ec028b1239 100644
--- a/keras_hub/src/models/efficientnet/convbnact_test.py
+++ b/keras_hub/src/models/efficientnet/cba_test.py
@@ -1,13 +1,13 @@
 import keras
 
-from keras_hub.src.models.efficientnet.convbnact import ConvBNActBlock
+from keras_hub.src.models.efficientnet.cba import CBABlock
 from keras_hub.src.tests.test_case import TestCase
 
 
-class ConvBNActBlockTest(TestCase):
+class CBABlockTest(TestCase):
     def test_same_input_output_shapes(self):
         inputs = keras.random.normal(shape=(1, 64, 64, 32), dtype="float32")
-        layer = ConvBNActBlock(input_filters=32, output_filters=32)
+        layer = CBABlock(input_filters=32, output_filters=32)
 
         output = layer(inputs)
         self.assertEquals(output.shape, (1, 64, 64, 32))
@@ -15,7 +15,7 @@ def test_same_input_output_shapes(self):
 
     def test_different_input_output_shapes(self):
         inputs = keras.random.normal(shape=(1, 64, 64, 32), dtype="float32")
-        layer = ConvBNActBlock(input_filters=32, output_filters=48)
+        layer = CBABlock(input_filters=32, output_filters=48)
 
         output = layer(inputs)
         self.assertEquals(output.shape, (1, 64, 64, 48))
diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
index 3f17cad590..1dd520e04e 100644
--- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py
+++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
@@ -3,7 +3,7 @@
 import keras
 
 from keras_hub.src.api_export import keras_hub_export
-from keras_hub.src.models.efficientnet.convbnact import ConvBNActBlock
+from keras_hub.src.models.efficientnet.cba import CBABlock
 from keras_hub.src.models.efficientnet.fusedmbconv import FusedMBConvBlock
 from keras_hub.src.models.efficientnet.mbconv import MBConvBlock
 from keras_hub.src.models.feature_pyramid_backbone import FeaturePyramidBackbone
@@ -27,15 +27,12 @@ class EfficientNetBackbone(FeaturePyramidBackbone):
         (https://arxiv.org/abs/2104.00298) (ICML 2021)
 
     Args:
-        width_coefficient: float, scaling coefficient for network width.
-        depth_coefficient: float, scaling coefficient for network depth.
-        dropout: float, dropout rate at skip connections. The default
-            value is set to 0.2.
-        depth_divisor: integer, a unit of network width. The default value is
-            set to 8.
-        activation: activation function to use between each convolutional layer.
-        input_shape: optional shape tuple, it should have exactly 3 input
-            channels.
+        stackwise_width_coefficient: list[float] or float, scaling coefficient
+            for network width. If single float, it is assumed that this value
+            applies to all stacks.
+        stackwise_depth_coefficient: list[float] or float, scaling coefficient
+            for network depth. If single float, it is assumed that this value
+            applies to all stacks.
         stackwise_kernel_sizes:  list of ints, the kernel sizes used for each
             conv block.
         stackwise_num_repeats: list of ints, number of times to repeat each
@@ -62,8 +59,17 @@ class EfficientNetBackbone(FeaturePyramidBackbone):
         stackwise_nores_option: list of bools, toggles if residiual connection
             is not used. If False (default), the stack will use residual
             connections, otherwise not.
+        dropout: float, dropout rate at skip connections. The default
+            value is set to 0.2.
+        depth_divisor: integer, a unit of network width. The default value is
+            set to 8.
         min_depth: integer, minimum number of filters. Can be None and ignored
             if use_depth_divisor_as_min_depth is set to True.
+        activation: activation function to use between each convolutional layer.
+        input_shape: optional shape tuple, it should have exactly 3 input
+            channels.
+
+
         include_initial_padding: bool, whether to include initial zero padding
             (as per v1).
         use_depth_divisor_as_min_depth: bool, whether to use depth_divisor as
@@ -100,8 +106,8 @@ class EfficientNetBackbone(FeaturePyramidBackbone):
     def __init__(
         self,
         *,
-        width_coefficient,
-        depth_coefficient,
+        stackwise_width_coefficient=None,
+        stackwise_depth_coefficient=None,
         stackwise_kernel_sizes,
         stackwise_num_repeats,
         stackwise_input_filters,
@@ -128,6 +134,16 @@ def __init__(
         num_features=1280,
         **kwargs,
     ):
+        num_stacks = len(stackwise_kernel_sizes)
+        if "depth_coefficient" in kwargs:
+            stackwise_depth_coefficient = [
+                kwargs.pop("depth_coefficient")
+            ] * num_stacks
+        if "width_coefficient" in kwargs:
+            stackwise_width_coefficient = [
+                kwargs.pop("width_coefficient")
+            ] * num_stacks
+
         image_input = keras.layers.Input(shape=input_shape)
 
         x = image_input  # Intermediate result.
@@ -140,7 +156,7 @@ def __init__(
         # Build stem
         stem_filters = round_filters(
             filters=stackwise_input_filters[0],
-            width_coefficient=width_coefficient,
+            width_coefficient=stackwise_width_coefficient[0],
             min_depth=min_depth,
             depth_divisor=depth_divisor,
             use_depth_divisor_as_min_depth=use_depth_divisor_as_min_depth,
@@ -172,26 +188,19 @@ def __init__(
         self._pyramid_outputs = {}
         curr_pyramid_level = 1
 
-        num_stacks = len(stackwise_kernel_sizes)
-
-        if isinstance(depth_coefficient, tuple):
-            assert len(depth_coefficient) == num_stacks
-        else:
-            depth_coefficient = (depth_coefficient,) * num_stacks
-            dc_originally_scalar = True
-
         for i in range(num_stacks):
             num_repeats = stackwise_num_repeats[i]
             input_filters = stackwise_input_filters[i]
             output_filters = stackwise_output_filters[i]
             force_input_filters = stackwise_force_input_filters[i]
             nores = stackwise_nores_option[i]
-            stack_depth_coefficient = depth_coefficient[i]
+            stack_width_coefficient = stackwise_width_coefficient[i]
+            stack_depth_coefficient = stackwise_depth_coefficient[i]
 
             # Update block input and output filters based on depth multiplier.
             input_filters = round_filters(
                 filters=input_filters,
-                width_coefficient=width_coefficient,
+                width_coefficient=stack_width_coefficient,
                 min_depth=min_depth,
                 depth_divisor=depth_divisor,
                 use_depth_divisor_as_min_depth=use_depth_divisor_as_min_depth,
@@ -199,7 +208,7 @@ def __init__(
             )
             output_filters = round_filters(
                 filters=output_filters,
-                width_coefficient=width_coefficient,
+                width_coefficient=stack_width_coefficient,
                 min_depth=min_depth,
                 depth_divisor=depth_divisor,
                 use_depth_divisor_as_min_depth=use_depth_divisor_as_min_depth,
@@ -227,7 +236,7 @@ def __init__(
                 if force_input_filters > 0:
                     input_filters = round_filters(
                         filters=force_input_filters,
-                        width_coefficient=width_coefficient,
+                        width_coefficient=stack_width_coefficient,
                         min_depth=min_depth,
                         depth_divisor=depth_divisor,
                         use_depth_divisor_as_min_depth=use_depth_divisor_as_min_depth,
@@ -254,44 +263,36 @@ def __init__(
                         batch_norm_epsilon=batch_norm_epsilon,
                         name=block_name,
                     )
-                elif stackwise_block_type in ("fused", "unfused"):
-                    block = get_conv_constructor(stackwise_block_type)(
-                        input_filters=input_filters,
-                        output_filters=output_filters,
-                        expand_ratio=stackwise_expansion_ratios[i],
-                        kernel_size=stackwise_kernel_sizes[i],
-                        strides=strides,
-                        data_format=data_format,
-                        se_ratio=squeeze_and_excite_ratio,
-                        activation=activation,
-                        dropout=dropout * block_id / blocks,
-                        batch_norm_momentum=batch_norm_momentum,
-                        batch_norm_epsilon=batch_norm_epsilon,
-                        nores=nores,
-                        name=block_name,
-                    )
-                    x = block(x)
-                else:  # cba block
-                    block = ConvBNActBlock(
-                        input_filters=input_filters,
-                        output_filters=output_filters,
-                        kernel_size=stackwise_kernel_sizes[i],
-                        strides=strides,
-                        data_format=data_format,
-                        activation=activation,
-                        dropout=dropout * block_id / blocks,
-                        batch_norm_momentum=batch_norm_momentum,
-                        batch_norm_epsilon=batch_norm_epsilon,
-                        nores=nores,
-                        name=block_name,
-                    )
+                else:
+                    constructor = get_conv_constructor(stackwise_block_type)
+                    block_kwargs = {
+                        "input_filters": input_filters,
+                        "output_filters": output_filters,
+                        "kernel_size": stackwise_kernel_sizes[i],
+                        "strides": strides,
+                        "data_format": data_format,
+                        "activation": activation,
+                        "dropout": dropout * block_id / blocks,
+                        "batch_norm_momentum": batch_norm_momentum,
+                        "batch_norm_epsilon": batch_norm_epsilon,
+                        "nores": nores,
+                        "name": block_name,
+                    }
+
+                    if stackwise_block_type in ("fused", "unfused"):
+                        block_kwargs["expand_ratio"] = (
+                            stackwise_expansion_ratios[i]
+                        )
+                        block_kwargs["se_ratio"] = squeeze_and_excite_ratio
+
+                    block = constructor(**block_kwargs)
                     x = block(x)
                 block_id += 1
 
         # Build top
         top_filters = round_filters(
             filters=num_features,
-            width_coefficient=width_coefficient,
+            width_coefficient=stackwise_width_coefficient[-1],
             min_depth=min_depth,
             depth_divisor=depth_divisor,
             use_depth_divisor_as_min_depth=use_depth_divisor_as_min_depth,
@@ -324,13 +325,8 @@ def __init__(
         super().__init__(inputs=image_input, outputs=x, **kwargs)
 
         # === Config ===
-        self.width_coefficient = width_coefficient
-
-        if dc_originally_scalar:
-            self.depth_coefficient = depth_coefficient[0]
-        else:
-            self.depth_coefficient = depth_coefficient
-
+        self.stackwise_width_coefficient = stackwise_width_coefficient
+        self.stackwise_depth_coefficient = stackwise_depth_coefficient
         self.dropout = dropout
         self.depth_divisor = depth_divisor
         self.min_depth = min_depth
@@ -360,8 +356,8 @@ def get_config(self):
         config = super().get_config()
         config.update(
             {
-                "width_coefficient": self.width_coefficient,
-                "depth_coefficient": self.depth_coefficient,
+                "stackwise_width_coefficient": self.stackwise_width_coefficient,
+                "stackwise_depth_coefficient": self.stackwise_depth_coefficient,
                 "dropout": self.dropout,
                 "depth_divisor": self.depth_divisor,
                 "min_depth": self.min_depth,
@@ -618,7 +614,7 @@ def get_conv_constructor(conv_type):
     elif conv_type == "fused":
         return FusedMBConvBlock
     elif conv_type == "cba":
-        return ConvBNActBlock
+        return CBABlock
     else:
         raise ValueError(
             "Expected `conv_type` to be "
diff --git a/keras_hub/src/models/efficientnet/fusedmbconv.py b/keras_hub/src/models/efficientnet/fusedmbconv.py
index 2d01c4663a..d0b7f79e70 100644
--- a/keras_hub/src/models/efficientnet/fusedmbconv.py
+++ b/keras_hub/src/models/efficientnet/fusedmbconv.py
@@ -44,6 +44,8 @@ class FusedMBConvBlock(keras.layers.Layer):
             convolutions
         strides: default 1, the strides to apply to the expansion phase
             convolutions
+        data_format: str, channels_last (default) or channels_first, expects
+            tensors to be of shape (N, H, W, C) or (N, C, H, W) respectively
         se_ratio: default 0.0, The filters used in the Squeeze-Excitation phase,
             and are chosen as the maximum between 1 and input_filters*se_ratio
         batch_norm_momentum: default 0.9, the BatchNormalization momentum
@@ -54,6 +56,10 @@ class FusedMBConvBlock(keras.layers.Layer):
             convolution operations
         dropout: float, the optional dropout rate to apply before the output
             convolution, defaults to 0.2
+        nores: bool, default False, forces no residual connection if True,
+            otherwise allows it if False.
+        projection_kernel_size: default 1, the kernel_size to apply to the
+            output projection phase convolution
 
     Returns:
         A tensor representing a feature map, passed through the FusedMBConv
diff --git a/keras_hub/src/utils/timm/convert_efficientnet.py b/keras_hub/src/utils/timm/convert_efficientnet.py
index c85a9efb2d..8d4600c47b 100644
--- a/keras_hub/src/utils/timm/convert_efficientnet.py
+++ b/keras_hub/src/utils/timm/convert_efficientnet.py
@@ -93,7 +93,7 @@
     },
     "rw_m": {
         "width_coefficient": 1.2,
-        "depth_coefficient": (1.2,) * 4 + (1.6,) * 2,
+        "stackwise_depth_coefficient": [1.2] * 4 + [1.6] * 2,
         "stackwise_kernel_sizes": [3, 3, 3, 3, 3, 3],
         "stackwise_num_repeats": [2, 4, 4, 6, 9, 15],
         "stackwise_input_filters": [24, 24, 48, 64, 128, 160],
@@ -247,18 +247,14 @@ def port_batch_normalization(keras_layer, hf_weight_prefix):
     # Stages
     num_stacks = len(backbone.stackwise_kernel_sizes)
 
-    depth_coefficient = VARIANT_MAP[variant]["depth_coefficient"]
-    if isinstance(depth_coefficient, tuple):
-        assert len(depth_coefficient) == num_stacks
-    else:
-        depth_coefficient = (depth_coefficient,) * num_stacks
-
     for stack_index in range(num_stacks):
 
         block_type = backbone.stackwise_block_types[stack_index]
         expansion_ratio = backbone.stackwise_expansion_ratios[stack_index]
         repeats = backbone.stackwise_num_repeats[stack_index]
-        stack_depth_coefficient = depth_coefficient[stack_index]
+        stack_depth_coefficient = backbone.stackwise_depth_coefficient[
+            stack_index
+        ]
 
         repeats = int(math.ceil(stack_depth_coefficient * repeats))
 

From e1a597db14bcd39f0161c26a19ab092056d8acd0 Mon Sep 17 00:00:00 2001
From: Piseth Ky <pisethk@google.com>
Date: Mon, 25 Nov 2024 14:46:00 -0800
Subject: [PATCH 16/17] add projection_activation argument to fused blocks to
 fix timm discrepancy

---
 .../models/efficientnet/efficientnet_backbone.py   |  5 +++++
 keras_hub/src/models/efficientnet/fusedmbconv.py   | 14 ++++++++++++--
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
index 1dd520e04e..755e2e021e 100644
--- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py
+++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
@@ -285,6 +285,11 @@ def __init__(
                         )
                         block_kwargs["se_ratio"] = squeeze_and_excite_ratio
 
+                        if stackwise_block_type == "fused":
+                            block_kwargs["projection_activation"] = (
+                                projection_activation
+                            )
+
                     block = constructor(**block_kwargs)
                     x = block(x)
                 block_id += 1
diff --git a/keras_hub/src/models/efficientnet/fusedmbconv.py b/keras_hub/src/models/efficientnet/fusedmbconv.py
index d0b7f79e70..4a022d5fe2 100644
--- a/keras_hub/src/models/efficientnet/fusedmbconv.py
+++ b/keras_hub/src/models/efficientnet/fusedmbconv.py
@@ -54,6 +54,8 @@ class FusedMBConvBlock(keras.layers.Layer):
             by 0 errors.
         activation: default "swish", the activation function used between
             convolution operations
+        projection_activation: default None, the activation function to use
+            after the output projection convoultion
         dropout: float, the optional dropout rate to apply before the output
             convolution, defaults to 0.2
         nores: bool, default False, forces no residual connection if True,
@@ -81,6 +83,7 @@ def __init__(
         batch_norm_momentum=0.9,
         batch_norm_epsilon=1e-3,
         activation="swish",
+        projection_activation=None,
         dropout=0.2,
         nores=False,
         projection_kernel_size=1,
@@ -97,6 +100,7 @@ def __init__(
         self.batch_norm_momentum = batch_norm_momentum
         self.batch_norm_epsilon = batch_norm_epsilon
         self.activation = activation
+        self.projection_activation = projection_activation
         self.dropout = dropout
         self.nores = nores
         self.projection_kernel_size = projection_kernel_size
@@ -171,6 +175,11 @@ def __init__(
             name=self.name + "project_bn",
         )
 
+        if self.projection_activation:
+            self.projection_act = keras.layers.Activation(
+                self.projection_activation, name=self.name + "projection_act"
+            )
+
         if self.dropout:
             self.dropout_layer = keras.layers.Dropout(
                 self.dropout,
@@ -213,8 +222,8 @@ def call(self, inputs):
         x = self.output_conv_pad(x)
         x = self.output_conv(x)
         x = self.bn2(x)
-        if self.expand_ratio == 1:
-            x = self.act(x)
+        if self.expand_ratio == 1 and self.projection_activation:
+            x = self.projection_act(x)
 
         # Residual:
         if (
@@ -239,6 +248,7 @@ def get_config(self):
             "batch_norm_momentum": self.batch_norm_momentum,
             "batch_norm_epsilon": self.batch_norm_epsilon,
             "activation": self.activation,
+            "projection_activation": self.projection_activation,
             "dropout": self.dropout,
             "nores": self.nores,
             "projection_kernel_size": self.projection_kernel_size,

From 5ed664ff96424592ec3fd6f7ea79c7aa0bb53e98 Mon Sep 17 00:00:00 2001
From: Piseth Ky <pisethk@google.com>
Date: Tue, 3 Dec 2024 15:13:46 -0800
Subject: [PATCH 17/17] additional review updates

---
 keras_hub/src/models/efficientnet/cba.py      | 22 ++++----
 .../efficientnet/efficientnet_backbone.py     | 28 +++++-----
 .../src/models/efficientnet/fusedmbconv.py    | 28 +++++-----
 keras_hub/src/models/efficientnet/mbconv.py   | 30 ++++++-----
 .../src/utils/timm/convert_efficientnet.py    | 54 +++++++++----------
 5 files changed, 84 insertions(+), 78 deletions(-)

diff --git a/keras_hub/src/models/efficientnet/cba.py b/keras_hub/src/models/efficientnet/cba.py
index 54fd8d87e9..4e145282aa 100644
--- a/keras_hub/src/models/efficientnet/cba.py
+++ b/keras_hub/src/models/efficientnet/cba.py
@@ -2,15 +2,6 @@
 
 BN_AXIS = 3
 
-CONV_KERNEL_INITIALIZER = {
-    "class_name": "VarianceScaling",
-    "config": {
-        "scale": 2.0,
-        "mode": "fan_out",
-        "distribution": "truncated_normal",
-    },
-}
-
 
 class CBABlock(keras.layers.Layer):
     """
@@ -75,7 +66,7 @@ def __init__(
             filters=self.output_filters,
             kernel_size=kernel_size,
             strides=strides,
-            kernel_initializer=CONV_KERNEL_INITIALIZER,
+            kernel_initializer=self._conv_kernel_initializer(),
             padding="valid",
             data_format=data_format,
             use_bias=False,
@@ -98,6 +89,17 @@ def __init__(
                 name=self.name + "drop",
             )
 
+    def _conv_kernel_initializer(
+        self,
+        scale=2.0,
+        mode="fan_out",
+        distribution="truncated_normal",
+        seed=None,
+    ):
+        return keras.initializers.VarianceScaling(
+            scale=scale, mode=mode, distribution=distribution, seed=seed
+        )
+
     def build(self, input_shape):
         if self.name is None:
             self.name = keras.backend.get_uid("block0")
diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
index 755e2e021e..c71979ad0d 100644
--- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py
+++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py
@@ -27,10 +27,10 @@ class EfficientNetBackbone(FeaturePyramidBackbone):
         (https://arxiv.org/abs/2104.00298) (ICML 2021)
 
     Args:
-        stackwise_width_coefficient: list[float] or float, scaling coefficient
+        stackwise_width_coefficients: list[float], scaling coefficient
             for network width. If single float, it is assumed that this value
             applies to all stacks.
-        stackwise_depth_coefficient: list[float] or float, scaling coefficient
+        stackwise_depth_coefficients: list[float], scaling coefficient
             for network depth. If single float, it is assumed that this value
             applies to all stacks.
         stackwise_kernel_sizes:  list of ints, the kernel sizes used for each
@@ -106,8 +106,8 @@ class EfficientNetBackbone(FeaturePyramidBackbone):
     def __init__(
         self,
         *,
-        stackwise_width_coefficient=None,
-        stackwise_depth_coefficient=None,
+        stackwise_width_coefficients=None,
+        stackwise_depth_coefficients=None,
         stackwise_kernel_sizes,
         stackwise_num_repeats,
         stackwise_input_filters,
@@ -136,11 +136,11 @@ def __init__(
     ):
         num_stacks = len(stackwise_kernel_sizes)
         if "depth_coefficient" in kwargs:
-            stackwise_depth_coefficient = [
+            stackwise_depth_coefficients = [
                 kwargs.pop("depth_coefficient")
             ] * num_stacks
         if "width_coefficient" in kwargs:
-            stackwise_width_coefficient = [
+            stackwise_width_coefficients = [
                 kwargs.pop("width_coefficient")
             ] * num_stacks
 
@@ -156,7 +156,7 @@ def __init__(
         # Build stem
         stem_filters = round_filters(
             filters=stackwise_input_filters[0],
-            width_coefficient=stackwise_width_coefficient[0],
+            width_coefficient=stackwise_width_coefficients[0],
             min_depth=min_depth,
             depth_divisor=depth_divisor,
             use_depth_divisor_as_min_depth=use_depth_divisor_as_min_depth,
@@ -194,8 +194,8 @@ def __init__(
             output_filters = stackwise_output_filters[i]
             force_input_filters = stackwise_force_input_filters[i]
             nores = stackwise_nores_option[i]
-            stack_width_coefficient = stackwise_width_coefficient[i]
-            stack_depth_coefficient = stackwise_depth_coefficient[i]
+            stack_width_coefficient = stackwise_width_coefficients[i]
+            stack_depth_coefficient = stackwise_depth_coefficients[i]
 
             # Update block input and output filters based on depth multiplier.
             input_filters = round_filters(
@@ -297,7 +297,7 @@ def __init__(
         # Build top
         top_filters = round_filters(
             filters=num_features,
-            width_coefficient=stackwise_width_coefficient[-1],
+            width_coefficient=stackwise_width_coefficients[-1],
             min_depth=min_depth,
             depth_divisor=depth_divisor,
             use_depth_divisor_as_min_depth=use_depth_divisor_as_min_depth,
@@ -330,8 +330,8 @@ def __init__(
         super().__init__(inputs=image_input, outputs=x, **kwargs)
 
         # === Config ===
-        self.stackwise_width_coefficient = stackwise_width_coefficient
-        self.stackwise_depth_coefficient = stackwise_depth_coefficient
+        self.stackwise_width_coefficients = stackwise_width_coefficients
+        self.stackwise_depth_coefficients = stackwise_depth_coefficients
         self.dropout = dropout
         self.depth_divisor = depth_divisor
         self.min_depth = min_depth
@@ -361,8 +361,8 @@ def get_config(self):
         config = super().get_config()
         config.update(
             {
-                "stackwise_width_coefficient": self.stackwise_width_coefficient,
-                "stackwise_depth_coefficient": self.stackwise_depth_coefficient,
+                "stackwise_width_coefficients": self.stackwise_width_coefficients,
+                "stackwise_depth_coefficients": self.stackwise_depth_coefficients,
                 "dropout": self.dropout,
                 "depth_divisor": self.depth_divisor,
                 "min_depth": self.min_depth,
diff --git a/keras_hub/src/models/efficientnet/fusedmbconv.py b/keras_hub/src/models/efficientnet/fusedmbconv.py
index 4a022d5fe2..01934b7622 100644
--- a/keras_hub/src/models/efficientnet/fusedmbconv.py
+++ b/keras_hub/src/models/efficientnet/fusedmbconv.py
@@ -2,15 +2,6 @@
 
 BN_AXIS = 3
 
-CONV_KERNEL_INITIALIZER = {
-    "class_name": "VarianceScaling",
-    "config": {
-        "scale": 2.0,
-        "mode": "fan_out",
-        "distribution": "truncated_normal",
-    },
-}
-
 
 class FusedMBConvBlock(keras.layers.Layer):
     """Implementation of the FusedMBConv block
@@ -116,7 +107,7 @@ def __init__(
             filters=self.filters,
             kernel_size=kernel_size,
             strides=strides,
-            kernel_initializer=CONV_KERNEL_INITIALIZER,
+            kernel_initializer=self._conv_kernel_initializer(),
             padding="valid",
             data_format=data_format,
             use_bias=False,
@@ -138,7 +129,7 @@ def __init__(
             padding="same",
             data_format=data_format,
             activation=self.activation,
-            kernel_initializer=CONV_KERNEL_INITIALIZER,
+            kernel_initializer=self._conv_kernel_initializer(),
             name=self.name + "se_reduce",
         )
 
@@ -148,7 +139,7 @@ def __init__(
             padding="same",
             data_format=data_format,
             activation="sigmoid",
-            kernel_initializer=CONV_KERNEL_INITIALIZER,
+            kernel_initializer=self._conv_kernel_initializer(),
             name=self.name + "se_expand",
         )
 
@@ -161,7 +152,7 @@ def __init__(
             filters=self.output_filters,
             kernel_size=projection_kernel_size,
             strides=1,
-            kernel_initializer=CONV_KERNEL_INITIALIZER,
+            kernel_initializer=self._conv_kernel_initializer(),
             padding="valid",
             data_format=data_format,
             use_bias=False,
@@ -187,6 +178,17 @@ def __init__(
                 name=self.name + "drop",
             )
 
+    def _conv_kernel_initializer(
+        self,
+        scale=2.0,
+        mode="fan_out",
+        distribution="truncated_normal",
+        seed=None,
+    ):
+        return keras.initializers.VarianceScaling(
+            scale=scale, mode=mode, distribution=distribution, seed=seed
+        )
+
     def build(self, input_shape):
         if self.name is None:
             self.name = keras.backend.get_uid("block0")
diff --git a/keras_hub/src/models/efficientnet/mbconv.py b/keras_hub/src/models/efficientnet/mbconv.py
index 1cd9263c04..20afab4e85 100644
--- a/keras_hub/src/models/efficientnet/mbconv.py
+++ b/keras_hub/src/models/efficientnet/mbconv.py
@@ -2,15 +2,6 @@
 
 BN_AXIS = 3
 
-CONV_KERNEL_INITIALIZER = {
-    "class_name": "VarianceScaling",
-    "config": {
-        "scale": 2.0,
-        "mode": "fan_out",
-        "distribution": "truncated_normal",
-    },
-}
-
 
 class MBConvBlock(keras.layers.Layer):
     def __init__(
@@ -99,7 +90,7 @@ def __init__(
             filters=self.filters,
             kernel_size=1,
             strides=1,
-            kernel_initializer=CONV_KERNEL_INITIALIZER,
+            kernel_initializer=self._conv_kernel_initializer(),
             padding="same",
             data_format=data_format,
             use_bias=False,
@@ -117,7 +108,7 @@ def __init__(
         self.depthwise = keras.layers.DepthwiseConv2D(
             kernel_size=self.kernel_size,
             strides=self.strides,
-            depthwise_initializer=CONV_KERNEL_INITIALIZER,
+            depthwise_initializer=self._conv_kernel_initializer(),
             padding="same",
             data_format=data_format,
             use_bias=False,
@@ -137,7 +128,7 @@ def __init__(
             padding="same",
             data_format=data_format,
             activation=self.activation,
-            kernel_initializer=CONV_KERNEL_INITIALIZER,
+            kernel_initializer=self._conv_kernel_initializer(),
             name=self.name + "se_reduce",
         )
 
@@ -147,7 +138,7 @@ def __init__(
             padding="same",
             data_format=data_format,
             activation="sigmoid",
-            kernel_initializer=CONV_KERNEL_INITIALIZER,
+            kernel_initializer=self._conv_kernel_initializer(),
             name=self.name + "se_expand",
         )
 
@@ -161,7 +152,7 @@ def __init__(
             filters=self.output_filters,
             kernel_size=projection_kernel_size,
             strides=1,
-            kernel_initializer=CONV_KERNEL_INITIALIZER,
+            kernel_initializer=self._conv_kernel_initializer(),
             padding="valid",
             data_format=data_format,
             use_bias=False,
@@ -182,6 +173,17 @@ def __init__(
                 name=self.name + "drop",
             )
 
+    def _conv_kernel_initializer(
+        self,
+        scale=2.0,
+        mode="fan_out",
+        distribution="truncated_normal",
+        seed=None,
+    ):
+        return keras.initializers.VarianceScaling(
+            scale=scale, mode=mode, distribution=distribution, seed=seed
+        )
+
     def build(self, input_shape):
         if self.name is None:
             self.name = keras.backend.get_uid("block0")
diff --git a/keras_hub/src/utils/timm/convert_efficientnet.py b/keras_hub/src/utils/timm/convert_efficientnet.py
index 8d4600c47b..fcedb2ecd1 100644
--- a/keras_hub/src/utils/timm/convert_efficientnet.py
+++ b/keras_hub/src/utils/timm/convert_efficientnet.py
@@ -11,44 +11,44 @@
 
 VARIANT_MAP = {
     "b0": {
-        "width_coefficient": 1.0,
-        "depth_coefficient": 1.0,
+        "stackwise_width_coefficients": [1.0] * 7,
+        "stackwise_depth_coefficients": [1.0] * 7,
         "stackwise_squeeze_and_excite_ratios": [0.25] * 7,
     },
     "b1": {
-        "width_coefficient": 1.0,
-        "depth_coefficient": 1.1,
+        "stackwise_width_coefficients": [1.0] * 7,
+        "stackwise_depth_coefficients": [1.1] * 7,
         "stackwise_squeeze_and_excite_ratios": [0.25] * 7,
     },
     "b2": {
-        "width_coefficient": 1.1,
-        "depth_coefficient": 1.2,
+        "stackwise_width_coefficients": [1.1] * 7,
+        "stackwise_depth_coefficients": [1.2] * 7,
         "stackwise_squeeze_and_excite_ratios": [0.25] * 7,
     },
     "b3": {
-        "width_coefficient": 1.2,
-        "depth_coefficient": 1.4,
+        "stackwise_width_coefficients": [1.2] * 7,
+        "stackwise_depth_coefficients": [1.4] * 7,
         "stackwise_squeeze_and_excite_ratios": [0.25] * 7,
     },
     "b4": {
-        "width_coefficient": 1.4,
-        "depth_coefficient": 1.8,
+        "stackwise_width_coefficients": [1.4] * 7,
+        "stackwise_depth_coefficients": [1.8] * 7,
         "stackwise_squeeze_and_excite_ratios": [0.25] * 7,
     },
     "b5": {
-        "width_coefficient": 1.6,
-        "depth_coefficient": 2.2,
+        "stackwise_width_coefficients": [1.6] * 7,
+        "stackwise_depth_coefficients": [2.2] * 7,
         "stackwise_squeeze_and_excite_ratios": [0.25] * 7,
     },
     "lite0": {
-        "width_coefficient": 1.0,
-        "depth_coefficient": 1.0,
+        "stackwise_width_coefficients": [1.0] * 7,
+        "stackwise_depth_coefficients": [1.0] * 7,
         "stackwise_squeeze_and_excite_ratios": [0] * 7,
         "activation": "relu6",
     },
     "el": {
-        "width_coefficient": 1.2,
-        "depth_coefficient": 1.4,
+        "stackwise_width_coefficients": [1.2] * 6,
+        "stackwise_depth_coefficients": [1.4] * 6,
         "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5],
         "stackwise_num_repeats": [1, 2, 4, 5, 4, 2],
         "stackwise_input_filters": [32, 24, 32, 48, 96, 144],
@@ -62,8 +62,8 @@
         "activation": "relu",
     },
     "em": {
-        "width_coefficient": 1.0,
-        "depth_coefficient": 1.1,
+        "stackwise_width_coefficients": [1.0] * 6,
+        "stackwise_depth_coefficients": [1.1] * 6,
         "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5],
         "stackwise_num_repeats": [1, 2, 4, 5, 4, 2],
         "stackwise_input_filters": [32, 24, 32, 48, 96, 144],
@@ -77,8 +77,8 @@
         "activation": "relu",
     },
     "es": {
-        "width_coefficient": 1.0,
-        "depth_coefficient": 1.0,
+        "stackwise_width_coefficients": [1.0] * 6,
+        "stackwise_depth_coefficients": [1.0] * 6,
         "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5],
         "stackwise_num_repeats": [1, 2, 4, 5, 4, 2],
         "stackwise_input_filters": [32, 24, 32, 48, 96, 144],
@@ -92,8 +92,8 @@
         "activation": "relu",
     },
     "rw_m": {
-        "width_coefficient": 1.2,
-        "stackwise_depth_coefficient": [1.2] * 4 + [1.6] * 2,
+        "stackwise_width_coefficients": [1.2] * 6,
+        "stackwise_depth_coefficients": [1.2] * 4 + [1.6] * 2,
         "stackwise_kernel_sizes": [3, 3, 3, 3, 3, 3],
         "stackwise_num_repeats": [2, 4, 4, 6, 9, 15],
         "stackwise_input_filters": [24, 24, 48, 64, 128, 160],
@@ -108,8 +108,8 @@
         "num_features": 1792,
     },
     "rw_s": {
-        "width_coefficient": 1.0,
-        "depth_coefficient": 1.0,
+        "stackwise_width_coefficients": [1.0] * 6,
+        "stackwise_depth_coefficients": [1.0] * 6,
         "stackwise_kernel_sizes": [3, 3, 3, 3, 3, 3],
         "stackwise_num_repeats": [2, 4, 4, 6, 9, 15],
         "stackwise_input_filters": [24, 24, 48, 64, 128, 160],
@@ -124,8 +124,8 @@
         "num_features": 1792,
     },
     "rw_t": {
-        "width_coefficient": 0.8,
-        "depth_coefficient": 0.9,
+        "stackwise_width_coefficients": [0.8] * 6,
+        "stackwise_depth_coefficients": [0.9] * 6,
         "stackwise_kernel_sizes": [3, 3, 3, 3, 3, 3],
         "stackwise_num_repeats": [2, 4, 4, 6, 9, 15],
         "stackwise_input_filters": [24, 24, 48, 64, 128, 160],
@@ -252,7 +252,7 @@ def port_batch_normalization(keras_layer, hf_weight_prefix):
         block_type = backbone.stackwise_block_types[stack_index]
         expansion_ratio = backbone.stackwise_expansion_ratios[stack_index]
         repeats = backbone.stackwise_num_repeats[stack_index]
-        stack_depth_coefficient = backbone.stackwise_depth_coefficient[
+        stack_depth_coefficient = backbone.stackwise_depth_coefficients[
             stack_index
         ]