From 79110fbf200cea4d12e7efaa2f925de3f6c5c8ea Mon Sep 17 00:00:00 2001 From: Piseth Ky Date: Mon, 4 Nov 2024 16:33:06 -0800 Subject: [PATCH 01/17] WIP initially adding edge presets --- .../efficientnet/efficientnet_backbone.py | 12 ++ .../src/models/efficientnet/fusedmbconv.py | 10 +- .../src/utils/timm/convert_efficientnet.py | 161 +++++++++++++++--- .../convert_efficientnet_checkpoints.py | 13 +- 4 files changed, 160 insertions(+), 36 deletions(-) diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py index 4016bb01e4..1deef08cf7 100644 --- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py +++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py @@ -100,6 +100,7 @@ def __init__( stackwise_squeeze_and_excite_ratios, stackwise_strides, stackwise_block_types, + stackwise_force_input_filters=[0]*7, dropout=0.2, depth_divisor=8, min_depth=8, @@ -163,6 +164,7 @@ def __init__( num_repeats = stackwise_num_repeats[i] input_filters = stackwise_input_filters[i] output_filters = stackwise_output_filters[i] + force_input_filters = stackwise_force_input_filters[i] # Update block input and output filters based on depth multiplier. input_filters = round_filters( @@ -200,6 +202,16 @@ def __init__( self._pyramid_outputs[f"P{curr_pyramid_level}"] = x curr_pyramid_level += 1 + if force_input_filters > 0: + input_filters = round_filters( + filters=force_input_filters, + width_coefficient=width_coefficient, + min_depth=min_depth, + depth_divisor=depth_divisor, + use_depth_divisor_as_min_depth=use_depth_divisor_as_min_depth, + cap_round_filter_decrease=cap_round_filter_decrease, + ) + # 97 is the start of the lowercase alphabet. letter_identifier = chr(j + 97) stackwise_block_type = stackwise_block_types[i] diff --git a/keras_hub/src/models/efficientnet/fusedmbconv.py b/keras_hub/src/models/efficientnet/fusedmbconv.py index 96f55a22b8..fc17414bd6 100644 --- a/keras_hub/src/models/efficientnet/fusedmbconv.py +++ b/keras_hub/src/models/efficientnet/fusedmbconv.py @@ -107,12 +107,6 @@ def __init__( self.activation, name=self.name + "expand_activation" ) - self.bn2 = keras.layers.BatchNormalization( - axis=BN_AXIS, - momentum=self.batch_norm_momentum, - name=self.name + "bn", - ) - self.se_conv1 = keras.layers.Conv2D( self.filters_se, 1, @@ -144,7 +138,7 @@ def __init__( name=self.name + "project_conv", ) - self.bn3 = keras.layers.BatchNormalization( + self.bn2 = keras.layers.BatchNormalization( axis=BN_AXIS, momentum=self.batch_norm_momentum, name=self.name + "project_bn", @@ -192,7 +186,7 @@ def call(self, inputs): # Output phase: x = self.output_conv(x) - x = self.bn3(x) + x = self.bn2(x) if self.expand_ratio == 1: x = self.act(x) diff --git a/keras_hub/src/utils/timm/convert_efficientnet.py b/keras_hub/src/utils/timm/convert_efficientnet.py index 609c26d355..ad47e70c1f 100644 --- a/keras_hub/src/utils/timm/convert_efficientnet.py +++ b/keras_hub/src/utils/timm/convert_efficientnet.py @@ -18,6 +18,25 @@ "width_coefficient": 1.0, "depth_coefficient": 1.1, }, + "el": { + "width_coefficient": 1.2, + "depth_coefficient": 1.4, + "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5], + "stackwise_num_repeats": [1, 2, 4, 5, 4, 2], + "stackwise_input_filters": [32, 24, 32, 48, 96, 144], + "stackwise_output_filters": [24, 32, 48, 96, 144, 192], + "stackwise_expansion_ratios": [4, 8, 8, 8, 8, 8], + "stackwise_strides": [1, 2, 2, 2, 1, 2], + "stackwise_squeeze_and_excite_ratios": [0] * 6, + "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3, + "stackwise_force_input_filters": [24, 0, 0, 0, 0, 0], + }, + "em": { + + }, + "es": { + + }, } @@ -68,21 +87,21 @@ def convert_weights(backbone, loader, timm_config): timm_architecture = timm_config["architecture"] variant = "_".join(timm_architecture.split("_")[1:]) - def port_conv2d(keras_layer_name, hf_weight_prefix, port_bias=True): + def port_conv2d(keras_layer, hf_weight_prefix, port_bias=True): loader.port_weight( - backbone.get_layer(keras_layer_name).kernel, + keras_layer.kernel, hf_weight_key=f"{hf_weight_prefix}.weight", hook_fn=lambda x, _: np.transpose(x, (2, 3, 1, 0)), ) if port_bias: loader.port_weight( - backbone.get_layer(keras_layer_name).bias, + keras_layer.bias, hf_weight_key=f"{hf_weight_prefix}.bias", ) def port_depthwise_conv2d( - keras_layer_name, + keras_layer, hf_weight_prefix, port_bias=True, depth_multiplier=1, @@ -99,39 +118,39 @@ def convert_pt_conv2d_kernel(pt_kernel): ) loader.port_weight( - backbone.get_layer(keras_layer_name).kernel, + keras_layer.kernel, hf_weight_key=f"{hf_weight_prefix}.weight", hook_fn=lambda x, _: convert_pt_conv2d_kernel(x), ) if port_bias: loader.port_weight( - backbone.get_layer(keras_layer_name).bias, + keras_layer.bias, hf_weight_key=f"{hf_weight_prefix}.bias", ) - def port_batch_normalization(keras_layer_name, hf_weight_prefix): + def port_batch_normalization(keras_layer, hf_weight_prefix): loader.port_weight( - backbone.get_layer(keras_layer_name).gamma, + keras_layer.gamma, hf_weight_key=f"{hf_weight_prefix}.weight", ) loader.port_weight( - backbone.get_layer(keras_layer_name).beta, + keras_layer.beta, hf_weight_key=f"{hf_weight_prefix}.bias", ) loader.port_weight( - backbone.get_layer(keras_layer_name).moving_mean, + keras_layer.moving_mean, hf_weight_key=f"{hf_weight_prefix}.running_mean", ) loader.port_weight( - backbone.get_layer(keras_layer_name).moving_variance, + keras_layer.moving_variance, hf_weight_key=f"{hf_weight_prefix}.running_var", ) # do we need num batches tracked? # Stem - port_conv2d("stem_conv", "conv_stem", port_bias=False) - port_batch_normalization("stem_bn", "bn1") + port_conv2d(backbone.get_layer("stem_conv"), "conv_stem", port_bias=False) + port_batch_normalization(backbone.get_layer("stem_bn"), "bn1") # Stages num_stacks = len(backbone.stackwise_kernel_sizes) @@ -149,67 +168,157 @@ def port_batch_normalization(keras_layer_name, hf_weight_prefix): conv_pw_count = 0 bn_count = 1 - conv_pw_name_map = ["conv_pw", "conv_pwl"] # 97 is the start of the lowercase alphabet. letter_identifier = chr(block_idx + 97) + keras_block_prefix = f"block{stack_index+1}{letter_identifier}_" + hf_block_prefix = f"blocks.{stack_index}.{block_idx}." + if block_type == "v1": - keras_block_prefix = f"block{stack_index+1}{letter_identifier}_" - hf_block_prefix = f"blocks.{stack_index}.{block_idx}." + conv_pw_name_map = ["conv_pw", "conv_pwl"] + # Initial Expansion Conv + if expansion_ratio != 1: + port_conv2d( + backbone.get_layer(keras_block_prefix + "expand_conv"), + hf_block_prefix + conv_pw_name_map[conv_pw_count], + port_bias=False, + ) + conv_pw_count += 1 + port_batch_normalization( + backbone.get_layer(keras_block_prefix + "expand_bn"), + hf_block_prefix + f"bn{bn_count}", + ) + bn_count += 1 + + # Depthwise Conv + port_depthwise_conv2d( + backbone.get_layer(keras_block_prefix + "dwconv"), + hf_block_prefix + "conv_dw", + port_bias=False, + ) + port_batch_normalization( + backbone.get_layer(keras_block_prefix + "dwconv_bn"), + hf_block_prefix + f"bn{bn_count}", + ) + bn_count += 1 + + # Squeeze and Excite + port_conv2d( + backbone.get_layer(keras_block_prefix + "se_reduce"), + hf_block_prefix + "se.conv_reduce", + ) + port_conv2d( + backbone.get_layer(keras_block_prefix + "se_expand"), + hf_block_prefix + "se.conv_expand", + ) + + # Output/Projection + port_conv2d( + backbone.get_layer(keras_block_prefix + "project"), + hf_block_prefix + conv_pw_name_map[conv_pw_count], + port_bias=False, + ) + conv_pw_count += 1 + port_batch_normalization( + backbone.get_layer(keras_block_prefix + "project_bn"), + hf_block_prefix + f"bn{bn_count}", + ) + bn_count += 1 + elif block_type == "fused": + fused_block_layer = backbone.get_layer(keras_block_prefix) + + # Initial Expansion Conv + if expansion_ratio != 1: + port_conv2d( + fused_block_layer.conv1, + hf_block_prefix + "conv_exp", + port_bias=False, + ) + conv_pw_count += 1 + port_batch_normalization( + fused_block_layer.bn1, + hf_block_prefix + f"bn{bn_count}", + ) + bn_count += 1 + + # Squeeze and Excite + port_conv2d( + fused_block_layer.se_conv1, + hf_block_prefix + "se.conv_reduce", + ) + port_conv2d( + fused_block_layer.se_conv2, + hf_block_prefix + "se.conv_expand", + ) + + # Output/Projection + port_conv2d( + fused_block_layer.output_conv, + hf_block_prefix + "conv_pwl", + port_bias=False, + ) + conv_pw_count += 1 + port_batch_normalization( + fused_block_layer.bn3, + hf_block_prefix + f"bn{bn_count}", + ) + bn_count += 1 + elif block_type == "unfused": + unfused_block_layer = backbone.get_layer(keras_block_prefix) # Initial Expansion Conv if expansion_ratio != 1: port_conv2d( - keras_block_prefix + "expand_conv", + unfused_block_layer.get_layer(keras_block_prefix + "expand_conv"), hf_block_prefix + conv_pw_name_map[conv_pw_count], port_bias=False, ) conv_pw_count += 1 port_batch_normalization( - keras_block_prefix + "expand_bn", + unfused_block_layer.get_layer(keras_block_prefix + "expand_bn"), hf_block_prefix + f"bn{bn_count}", ) bn_count += 1 # Depthwise Conv port_depthwise_conv2d( - keras_block_prefix + "dwconv", + unfused_block_layer.get_layer(keras_block_prefix + "dwconv"), hf_block_prefix + "conv_dw", port_bias=False, ) port_batch_normalization( - keras_block_prefix + "dwconv_bn", + unfused_block_layer.get_layer(keras_block_prefix + "dwconv_bn"), hf_block_prefix + f"bn{bn_count}", ) bn_count += 1 # Squeeze and Excite port_conv2d( - keras_block_prefix + "se_reduce", + unfused_block_layer.get_layer(keras_block_prefix + "se_reduce"), hf_block_prefix + "se.conv_reduce", ) port_conv2d( - keras_block_prefix + "se_expand", + unfused_block_layer.get_layer(keras_block_prefix + "se_expand"), hf_block_prefix + "se.conv_expand", ) # Output/Projection port_conv2d( - keras_block_prefix + "project", + unfused_block_layer.get_layer(keras_block_prefix + "project"), hf_block_prefix + conv_pw_name_map[conv_pw_count], port_bias=False, ) conv_pw_count += 1 port_batch_normalization( - keras_block_prefix + "project_bn", + unfused_block_layer.get_layer(keras_block_prefix + "project_bn"), hf_block_prefix + f"bn{bn_count}", ) bn_count += 1 # Head/Top - port_conv2d("top_conv", "conv_head", port_bias=False) - port_batch_normalization("top_bn", "bn2") + port_conv2d(backbone.get_layer("top_conv"), "conv_head", port_bias=False) + port_batch_normalization(backbone.get_layer("top_bn"), "bn2") def convert_head(task, loader, timm_config): diff --git a/tools/checkpoint_conversion/convert_efficientnet_checkpoints.py b/tools/checkpoint_conversion/convert_efficientnet_checkpoints.py index 5790d6130c..75810a19a9 100644 --- a/tools/checkpoint_conversion/convert_efficientnet_checkpoints.py +++ b/tools/checkpoint_conversion/convert_efficientnet_checkpoints.py @@ -2,9 +2,15 @@ Convert efficientnet checkpoints. python tools/checkpoint_conversion/convert_efficientnet_checkpoints.py \ - --preset efficientnet_b0_ra_imagenet --upload_uri kaggle://kerashub/efficientnet/keras/efficientnet_b0_ra_imagenet + --preset efficientnet_b0_ra_imagenet --upload_uri kaggle://keras/efficientnet/keras/efficientnet_b0_ra_imagenet python tools/checkpoint_conversion/convert_efficientnet_checkpoints.py \ - --preset efficientnet_b1_ft_imagenet --upload_uri kaggle://kerashub/efficientnet/keras/efficientnet_b1_ft_imagenet + --preset efficientnet_b1_ft_imagenet --upload_uri kaggle://keras/efficientnet/keras/efficientnet_b1_ft_imagenet +python tools/checkpoint_conversion/convert_efficientnet_checkpoints.py \ + --preset efficientnet_el_ra_imagenet --upload_uri kaggle://keras/efficientnet/keras/efficientnet_el_ra_imagenet +python tools/checkpoint_conversion/convert_efficientnet_checkpoints.py \ + --preset efficientnet_em_ra2_imagenet --upload_uri kaggle://keras/efficientnet/keras/efficientnet_em_ra2_imagenet +python tools/checkpoint_conversion/convert_efficientnet_checkpoints.py \ + --preset efficientnet_es_ra_imagenet --upload_uri kaggle://keras/efficientnet/keras/efficientnet_es_ra_imagenet """ import os @@ -23,6 +29,9 @@ PRESET_MAP = { "efficientnet_b0_ra_imagenet": "timm/efficientnet_b0.ra_in1k", "efficientnet_b1_ft_imagenet": "timm/efficientnet_b1.ft_in1k", + "efficientnet_el_ra_imagenet": "timm/efficientnet_el.ra_in1k", + "efficientnet_em_ra2_imagenet": "timm/efficientnet_em.ra2_in1k", + "efficientnet_es_ra_imagenet": "timm/efficientnet_es.ra_in1k", } FLAGS = flags.FLAGS From 9cb5c1318f9e0f014d1c76ca7c6c3795a951dce3 Mon Sep 17 00:00:00 2001 From: Piseth Ky Date: Tue, 5 Nov 2024 16:44:53 -0800 Subject: [PATCH 02/17] WIP el variant working --- .../efficientnet/efficientnet_backbone.py | 6 ++ .../src/models/efficientnet/fusedmbconv.py | 20 ++++- keras_hub/src/models/efficientnet/mbconv.py | 11 ++- .../src/utils/timm/convert_efficientnet.py | 81 ++++++++++--------- 4 files changed, 80 insertions(+), 38 deletions(-) diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py index 1deef08cf7..a6f7639b5c 100644 --- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py +++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py @@ -101,6 +101,7 @@ def __init__( stackwise_strides, stackwise_block_types, stackwise_force_input_filters=[0]*7, + stackwise_nores_option=[False]*7, dropout=0.2, depth_divisor=8, min_depth=8, @@ -165,6 +166,7 @@ def __init__( input_filters = stackwise_input_filters[i] output_filters = stackwise_output_filters[i] force_input_filters = stackwise_force_input_filters[i] + nores = stackwise_nores_option[i] # Update block input and output filters based on depth multiplier. input_filters = round_filters( @@ -244,6 +246,8 @@ def __init__( activation=activation, dropout=dropout * block_id / blocks, batch_norm_momentum=batch_norm_momentum, + batch_norm_epsilon=batch_norm_epsilon, + nores=nores, name=block_name, ) x = block(x) @@ -303,6 +307,7 @@ def __init__( self.stackwise_strides = stackwise_strides self.stackwise_block_types = stackwise_block_types + self.stackwise_force_input_filters=stackwise_force_input_filters, self.include_stem_padding = include_stem_padding self.use_depth_divisor_as_min_depth = use_depth_divisor_as_min_depth self.cap_round_filter_decrease = cap_round_filter_decrease @@ -330,6 +335,7 @@ def get_config(self): "stackwise_squeeze_and_excite_ratios": self.stackwise_squeeze_and_excite_ratios, "stackwise_strides": self.stackwise_strides, "stackwise_block_types": self.stackwise_block_types, + "stackwise_force_input_filters": self.stackwise_force_input_filters, "include_stem_padding": self.include_stem_padding, "use_depth_divisor_as_min_depth": self.use_depth_divisor_as_min_depth, "cap_round_filter_decrease": self.cap_round_filter_decrease, diff --git a/keras_hub/src/models/efficientnet/fusedmbconv.py b/keras_hub/src/models/efficientnet/fusedmbconv.py index fc17414bd6..9e06e5bee6 100644 --- a/keras_hub/src/models/efficientnet/fusedmbconv.py +++ b/keras_hub/src/models/efficientnet/fusedmbconv.py @@ -70,8 +70,10 @@ def __init__( data_format="channels_last", se_ratio=0.0, batch_norm_momentum=0.9, + batch_norm_epsilon=1e-3, activation="swish", dropout=0.2, + nores=False, **kwargs ): super().__init__(**kwargs) @@ -83,8 +85,10 @@ def __init__( self.data_format = data_format self.se_ratio = se_ratio self.batch_norm_momentum = batch_norm_momentum + self.batch_norm_epsilon = batch_norm_epsilon self.activation = activation self.dropout = dropout + self.nores = nores self.filters = self.input_filters * self.expand_ratio self.filters_se = max(1, int(input_filters * se_ratio)) @@ -101,6 +105,7 @@ def __init__( self.bn1 = keras.layers.BatchNormalization( axis=BN_AXIS, momentum=self.batch_norm_momentum, + epsilon=self.batch_norm_epsilon, name=self.name + "expand_bn", ) self.act = keras.layers.Activation( @@ -141,6 +146,7 @@ def __init__( self.bn2 = keras.layers.BatchNormalization( axis=BN_AXIS, momentum=self.batch_norm_momentum, + epsilon=self.batch_norm_epsilon, name=self.name + "project_bn", ) @@ -190,8 +196,18 @@ def call(self, inputs): if self.expand_ratio == 1: x = self.act(x) + # For EdgeTPU Version the stem output does not match the parameterized + # input filters, thus this check needs to be dynamic and not based + # on initial parameterization. This hack is ported from timm. + # if self.data_format == "channels_last": + # input_filters = inputs.shape[-1] + # x_filters = x.shape[-1] + # else: + # input_filters = inputs.shape[1] + # x_filters = x.shape[1] + # Residual: - if self.strides == 1 and self.input_filters == self.output_filters: + if self.strides == 1 and self.input_filters == self.output_filters and not self.nores: if self.dropout: x = self.dropout_layer(x) x = keras.layers.Add(name=self.name + "add")([x, inputs]) @@ -207,8 +223,10 @@ def get_config(self): "data_format": self.data_format, "se_ratio": self.se_ratio, "batch_norm_momentum": self.batch_norm_momentum, + "batch_norm_epsilon": self.batch_norm_epsilon, "activation": self.activation, "dropout": self.dropout, + "nores": self.nores, } base_config = super().get_config() diff --git a/keras_hub/src/models/efficientnet/mbconv.py b/keras_hub/src/models/efficientnet/mbconv.py index 392e62c04f..80178bbba6 100644 --- a/keras_hub/src/models/efficientnet/mbconv.py +++ b/keras_hub/src/models/efficientnet/mbconv.py @@ -23,8 +23,10 @@ def __init__( data_format="channels_last", se_ratio=0.0, batch_norm_momentum=0.9, + batch_norm_epsilon=1e-3, activation="swish", dropout=0.2, + nores=False, **kwargs ): """Implementation of the MBConv block @@ -83,8 +85,10 @@ def __init__( self.data_format = data_format self.se_ratio = se_ratio self.batch_norm_momentum = batch_norm_momentum + self.batch_norm_epsilon = batch_norm_epsilon self.activation = activation self.dropout = dropout + self.nores = nores self.filters = self.input_filters * self.expand_ratio self.filters_se = max(1, int(input_filters * se_ratio)) @@ -101,6 +105,7 @@ def __init__( self.bn1 = keras.layers.BatchNormalization( axis=BN_AXIS, momentum=self.batch_norm_momentum, + epsilon=self.batch_norm_epsilon, name=self.name + "expand_bn", ) self.act = keras.layers.Activation( @@ -119,6 +124,7 @@ def __init__( self.bn2 = keras.layers.BatchNormalization( axis=BN_AXIS, momentum=self.batch_norm_momentum, + epsilon=self.batch_norm_epsilon, name=self.name + "bn", ) @@ -156,6 +162,7 @@ def __init__( self.bn3 = keras.layers.BatchNormalization( axis=BN_AXIS, momentum=self.batch_norm_momentum, + epsilon=self.batch_norm_epsilon, name=self.name + "project_bn", ) @@ -207,7 +214,7 @@ def call(self, inputs): x = self.output_conv(x) x = self.bn3(x) - if self.strides == 1 and self.input_filters == self.output_filters: + if self.strides == 1 and self.input_filters == self.output_filters and not self.nores: if self.dropout: x = self.dropout_layer(x) x = keras.layers.Add(name=self.name + "add")([x, inputs]) @@ -223,8 +230,10 @@ def get_config(self): "data_format": self.data_format, "se_ratio": self.se_ratio, "batch_norm_momentum": self.batch_norm_momentum, + "batch_norm_epsilon": self.batch_norm_epsilon, "activation": self.activation, "dropout": self.dropout, + "nores": self.nores, } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) diff --git a/keras_hub/src/utils/timm/convert_efficientnet.py b/keras_hub/src/utils/timm/convert_efficientnet.py index ad47e70c1f..c8293135f4 100644 --- a/keras_hub/src/utils/timm/convert_efficientnet.py +++ b/keras_hub/src/utils/timm/convert_efficientnet.py @@ -30,6 +30,8 @@ "stackwise_squeeze_and_excite_ratios": [0] * 6, "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3, "stackwise_force_input_filters": [24, 0, 0, 0, 0, 0], + "stackwise_nores_option": [True] + [False] * 5, + "activation": "relu", }, "em": { @@ -86,6 +88,7 @@ def convert_backbone_config(timm_config): def convert_weights(backbone, loader, timm_config): timm_architecture = timm_config["architecture"] variant = "_".join(timm_architecture.split("_")[1:]) + # backbone.build(input_shape=timm_config["pretrained_cfg"]["input_size"]) def port_conv2d(keras_layer, hf_weight_prefix, port_bias=True): loader.port_weight( @@ -163,6 +166,9 @@ def port_batch_normalization(keras_layer, hf_weight_prefix): repeats = int( math.ceil(VARIANT_MAP[variant]["depth_coefficient"] * repeats) ) + se_ratio = VARIANT_MAP[variant]["stackwise_squeeze_and_excite_ratios"][ + stack_index + ] for block_idx in range(repeats): @@ -203,15 +209,16 @@ def port_batch_normalization(keras_layer, hf_weight_prefix): ) bn_count += 1 - # Squeeze and Excite - port_conv2d( - backbone.get_layer(keras_block_prefix + "se_reduce"), - hf_block_prefix + "se.conv_reduce", - ) - port_conv2d( - backbone.get_layer(keras_block_prefix + "se_expand"), - hf_block_prefix + "se.conv_expand", - ) + if 0 < se_ratio <= 1: + # Squeeze and Excite + port_conv2d( + backbone.get_layer(keras_block_prefix + "se_reduce"), + hf_block_prefix + "se.conv_reduce", + ) + port_conv2d( + backbone.get_layer(keras_block_prefix + "se_expand"), + hf_block_prefix + "se.conv_expand", + ) # Output/Projection port_conv2d( @@ -242,15 +249,16 @@ def port_batch_normalization(keras_layer, hf_weight_prefix): ) bn_count += 1 - # Squeeze and Excite - port_conv2d( - fused_block_layer.se_conv1, - hf_block_prefix + "se.conv_reduce", - ) - port_conv2d( - fused_block_layer.se_conv2, - hf_block_prefix + "se.conv_expand", - ) + if 0 < se_ratio <= 1: + # Squeeze and Excite + port_conv2d( + fused_block_layer.se_conv1, + hf_block_prefix + "se.conv_reduce", + ) + port_conv2d( + fused_block_layer.se_conv2, + hf_block_prefix + "se.conv_expand", + ) # Output/Projection port_conv2d( @@ -260,7 +268,7 @@ def port_batch_normalization(keras_layer, hf_weight_prefix): ) conv_pw_count += 1 port_batch_normalization( - fused_block_layer.bn3, + fused_block_layer.bn2, hf_block_prefix + f"bn{bn_count}", ) bn_count += 1 @@ -270,48 +278,49 @@ def port_batch_normalization(keras_layer, hf_weight_prefix): # Initial Expansion Conv if expansion_ratio != 1: port_conv2d( - unfused_block_layer.get_layer(keras_block_prefix + "expand_conv"), - hf_block_prefix + conv_pw_name_map[conv_pw_count], + unfused_block_layer.conv1, + hf_block_prefix + "conv_pw", port_bias=False, ) conv_pw_count += 1 port_batch_normalization( - unfused_block_layer.get_layer(keras_block_prefix + "expand_bn"), + unfused_block_layer.bn1, hf_block_prefix + f"bn{bn_count}", ) bn_count += 1 # Depthwise Conv port_depthwise_conv2d( - unfused_block_layer.get_layer(keras_block_prefix + "dwconv"), + unfused_block_layer.depthwise, hf_block_prefix + "conv_dw", port_bias=False, ) port_batch_normalization( - unfused_block_layer.get_layer(keras_block_prefix + "dwconv_bn"), + unfused_block_layer.bn2, hf_block_prefix + f"bn{bn_count}", ) bn_count += 1 - # Squeeze and Excite - port_conv2d( - unfused_block_layer.get_layer(keras_block_prefix + "se_reduce"), - hf_block_prefix + "se.conv_reduce", - ) - port_conv2d( - unfused_block_layer.get_layer(keras_block_prefix + "se_expand"), - hf_block_prefix + "se.conv_expand", - ) + if 0 < se_ratio <= 1: + # Squeeze and Excite + port_conv2d( + unfused_block_layer.se_conv1, + hf_block_prefix + "se.conv_reduce", + ) + port_conv2d( + unfused_block_layer.se_conv2, + hf_block_prefix + "se.conv_expand", + ) # Output/Projection port_conv2d( - unfused_block_layer.get_layer(keras_block_prefix + "project"), - hf_block_prefix + conv_pw_name_map[conv_pw_count], + unfused_block_layer.output_conv, + hf_block_prefix + "conv_pwl", port_bias=False, ) conv_pw_count += 1 port_batch_normalization( - unfused_block_layer.get_layer(keras_block_prefix + "project_bn"), + unfused_block_layer.bn3, hf_block_prefix + f"bn{bn_count}", ) bn_count += 1 From 64b6af8471152aaef86021bc9cc26c36b4813dd5 Mon Sep 17 00:00:00 2001 From: Piseth Ky Date: Thu, 7 Nov 2024 14:14:51 -0800 Subject: [PATCH 03/17] added all hf timm edge presets --- .../src/utils/timm/convert_efficientnet.py | 39 +++++++++++++------ 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/keras_hub/src/utils/timm/convert_efficientnet.py b/keras_hub/src/utils/timm/convert_efficientnet.py index c8293135f4..68fed5fb5d 100644 --- a/keras_hub/src/utils/timm/convert_efficientnet.py +++ b/keras_hub/src/utils/timm/convert_efficientnet.py @@ -13,10 +13,12 @@ "b0": { "width_coefficient": 1.0, "depth_coefficient": 1.0, + "stackwise_squeeze_and_excite_ratios": [0.25] * 7, }, "b1": { "width_coefficient": 1.0, "depth_coefficient": 1.1, + "stackwise_squeeze_and_excite_ratios": [0.25] * 7, }, "el": { "width_coefficient": 1.2, @@ -34,10 +36,34 @@ "activation": "relu", }, "em": { - + "width_coefficient": 1.0, + "depth_coefficient": 1.1, + "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5], + "stackwise_num_repeats": [1, 2, 4, 5, 4, 2], + "stackwise_input_filters": [32, 24, 32, 48, 96, 144], + "stackwise_output_filters": [24, 32, 48, 96, 144, 192], + "stackwise_expansion_ratios": [4, 8, 8, 8, 8, 8], + "stackwise_strides": [1, 2, 2, 2, 1, 2], + "stackwise_squeeze_and_excite_ratios": [0] * 6, + "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3, + "stackwise_force_input_filters": [24, 0, 0, 0, 0, 0], + "stackwise_nores_option": [True] + [False] * 5, + "activation": "relu", }, "es": { - + "width_coefficient": 1.0, + "depth_coefficient": 1.0, + "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5], + "stackwise_num_repeats": [1, 2, 4, 5, 4, 2], + "stackwise_input_filters": [32, 24, 32, 48, 96, 144], + "stackwise_output_filters": [24, 32, 48, 96, 144, 192], + "stackwise_expansion_ratios": [4, 8, 8, 8, 8, 8], + "stackwise_strides": [1, 2, 2, 2, 1, 2], + "stackwise_squeeze_and_excite_ratios": [0] * 6, + "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3, + "stackwise_force_input_filters": [24, 0, 0, 0, 0, 0], + "stackwise_nores_option": [True] + [False] * 5, + "activation": "relu", }, } @@ -52,15 +78,6 @@ def convert_backbone_config(timm_config): "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320], "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6], "stackwise_strides": [1, 2, 2, 2, 1, 2, 1], - "stackwise_squeeze_and_excite_ratios": [ - 0.25, - 0.25, - 0.25, - 0.25, - 0.25, - 0.25, - 0.25, - ], "stackwise_block_types": ["v1"] * 7, "min_depth": None, "include_stem_padding": True, From ccb508b9df074c60167d9f2c6d54aed2f547391a Mon Sep 17 00:00:00 2001 From: Piseth Ky Date: Thu, 7 Nov 2024 14:20:02 -0800 Subject: [PATCH 04/17] removing irrelevant note --- keras_hub/src/models/efficientnet/fusedmbconv.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/keras_hub/src/models/efficientnet/fusedmbconv.py b/keras_hub/src/models/efficientnet/fusedmbconv.py index 9e06e5bee6..7bb706ca9c 100644 --- a/keras_hub/src/models/efficientnet/fusedmbconv.py +++ b/keras_hub/src/models/efficientnet/fusedmbconv.py @@ -196,16 +196,6 @@ def call(self, inputs): if self.expand_ratio == 1: x = self.act(x) - # For EdgeTPU Version the stem output does not match the parameterized - # input filters, thus this check needs to be dynamic and not based - # on initial parameterization. This hack is ported from timm. - # if self.data_format == "channels_last": - # input_filters = inputs.shape[-1] - # x_filters = x.shape[-1] - # else: - # input_filters = inputs.shape[1] - # x_filters = x.shape[1] - # Residual: if self.strides == 1 and self.input_filters == self.output_filters and not self.nores: if self.dropout: From cebc921b5810c9ea854f95f56758f24a1bc1b5ef Mon Sep 17 00:00:00 2001 From: Piseth Ky Date: Thu, 7 Nov 2024 14:25:13 -0800 Subject: [PATCH 05/17] format pass --- keras_hub/src/models/efficientnet/efficientnet_backbone.py | 6 +++--- keras_hub/src/models/efficientnet/fusedmbconv.py | 6 +++++- keras_hub/src/models/efficientnet/mbconv.py | 6 +++++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py index a6f7639b5c..9f3825ea1b 100644 --- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py +++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py @@ -100,8 +100,8 @@ def __init__( stackwise_squeeze_and_excite_ratios, stackwise_strides, stackwise_block_types, - stackwise_force_input_filters=[0]*7, - stackwise_nores_option=[False]*7, + stackwise_force_input_filters=[0] * 7, + stackwise_nores_option=[False] * 7, dropout=0.2, depth_divisor=8, min_depth=8, @@ -307,7 +307,7 @@ def __init__( self.stackwise_strides = stackwise_strides self.stackwise_block_types = stackwise_block_types - self.stackwise_force_input_filters=stackwise_force_input_filters, + self.stackwise_force_input_filters = (stackwise_force_input_filters,) self.include_stem_padding = include_stem_padding self.use_depth_divisor_as_min_depth = use_depth_divisor_as_min_depth self.cap_round_filter_decrease = cap_round_filter_decrease diff --git a/keras_hub/src/models/efficientnet/fusedmbconv.py b/keras_hub/src/models/efficientnet/fusedmbconv.py index 7bb706ca9c..8d2cc2fdef 100644 --- a/keras_hub/src/models/efficientnet/fusedmbconv.py +++ b/keras_hub/src/models/efficientnet/fusedmbconv.py @@ -197,7 +197,11 @@ def call(self, inputs): x = self.act(x) # Residual: - if self.strides == 1 and self.input_filters == self.output_filters and not self.nores: + if ( + self.strides == 1 + and self.input_filters == self.output_filters + and not self.nores + ): if self.dropout: x = self.dropout_layer(x) x = keras.layers.Add(name=self.name + "add")([x, inputs]) diff --git a/keras_hub/src/models/efficientnet/mbconv.py b/keras_hub/src/models/efficientnet/mbconv.py index 80178bbba6..e9acbfeb9a 100644 --- a/keras_hub/src/models/efficientnet/mbconv.py +++ b/keras_hub/src/models/efficientnet/mbconv.py @@ -214,7 +214,11 @@ def call(self, inputs): x = self.output_conv(x) x = self.bn3(x) - if self.strides == 1 and self.input_filters == self.output_filters and not self.nores: + if ( + self.strides == 1 + and self.input_filters == self.output_filters + and not self.nores + ): if self.dropout: x = self.dropout_layer(x) x = keras.layers.Add(name=self.name + "add")([x, inputs]) From 11a86a20cc706d801f9dafd53488ece5bb892208 Mon Sep 17 00:00:00 2001 From: Piseth Ky Date: Thu, 7 Nov 2024 14:28:28 -0800 Subject: [PATCH 06/17] remove irrelevant old commented code --- keras_hub/src/utils/timm/convert_efficientnet.py | 1 - 1 file changed, 1 deletion(-) diff --git a/keras_hub/src/utils/timm/convert_efficientnet.py b/keras_hub/src/utils/timm/convert_efficientnet.py index 68fed5fb5d..5c58c7c04b 100644 --- a/keras_hub/src/utils/timm/convert_efficientnet.py +++ b/keras_hub/src/utils/timm/convert_efficientnet.py @@ -105,7 +105,6 @@ def convert_backbone_config(timm_config): def convert_weights(backbone, loader, timm_config): timm_architecture = timm_config["architecture"] variant = "_".join(timm_architecture.split("_")[1:]) - # backbone.build(input_shape=timm_config["pretrained_cfg"]["input_size"]) def port_conv2d(keras_layer, hf_weight_prefix, port_bias=True): loader.port_weight( From 5533922b55847b6557118e51c4fdf361b38791f1 Mon Sep 17 00:00:00 2001 From: Piseth Ky Date: Thu, 7 Nov 2024 15:16:53 -0800 Subject: [PATCH 07/17] fix unit test regression --- .../models/efficientnet/efficientnet_backbone.py | 2 +- .../efficientnet/efficientnet_backbone_test.py | 14 +++++--------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py index 9f3825ea1b..f2a3b70912 100644 --- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py +++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py @@ -307,7 +307,7 @@ def __init__( self.stackwise_strides = stackwise_strides self.stackwise_block_types = stackwise_block_types - self.stackwise_force_input_filters = (stackwise_force_input_filters,) + self.stackwise_force_input_filters = stackwise_force_input_filters self.include_stem_padding = include_stem_padding self.use_depth_divisor_as_min_depth = use_depth_divisor_as_min_depth self.cap_round_filter_decrease = cap_round_filter_decrease diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone_test.py b/keras_hub/src/models/efficientnet/efficientnet_backbone_test.py index f31004b5dc..c11e636540 100644 --- a/keras_hub/src/models/efficientnet/efficientnet_backbone_test.py +++ b/keras_hub/src/models/efficientnet/efficientnet_backbone_test.py @@ -26,6 +26,8 @@ def setUp(self): ], "stackwise_strides": [1, 2, 2, 2, 1, 2], "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3, + "stackwise_force_input_filters": [0] * 6, + "stackwise_nores_option": [False] * 6, "width_coefficient": 1.0, "depth_coefficient": 1.0, } @@ -60,15 +62,9 @@ def test_valid_call_original_v1(self): "stackwise_output_filters": [16, 24, 40, 80, 112, 192, 320], "stackwise_expansion_ratios": [1, 6, 6, 6, 6, 6, 6], "stackwise_strides": [1, 2, 2, 2, 1, 2, 1], - "stackwise_squeeze_and_excite_ratios": [ - 0.25, - 0.25, - 0.25, - 0.25, - 0.25, - 0.25, - 0.25, - ], + "stackwise_squeeze_and_excite_ratios": [0.25] * 7, + "stackwise_force_input_filters": [0] * 7, + "stackwise_nores_option": [False] * 7, "width_coefficient": 1.0, "depth_coefficient": 1.0, "stackwise_block_types": ["v1"] * 7, From a935e6240b9d053bac00bf8aceca4dc5a3b353bc Mon Sep 17 00:00:00 2001 From: Piseth Ky Date: Thu, 7 Nov 2024 16:12:34 -0800 Subject: [PATCH 08/17] add presets to preset file --- .../efficientnet/efficientnet_presets.py | 41 ++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/keras_hub/src/models/efficientnet/efficientnet_presets.py b/keras_hub/src/models/efficientnet/efficientnet_presets.py index 39c9514816..2a3c279350 100644 --- a/keras_hub/src/models/efficientnet/efficientnet_presets.py +++ b/keras_hub/src/models/efficientnet/efficientnet_presets.py @@ -17,7 +17,7 @@ "efficientnet_b1_ft_imagenet": { "metadata": { "description": ( - "EfficientNet B1 model fine-trained on the ImageNet 1k dataset." + "EfficientNet B1 model fine-tuned on the ImageNet 1k dataset." ), "params": 7794184, "official_name": "EfficientNet", @@ -26,4 +26,43 @@ }, "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet_b1_ft_imagenet", }, + "efficientnet_el_ra_imagenet": { + "metadata": { + "description": ( + "EfficientNet-EdgeTPU Large model trained on the ImageNet 1k " + "dataset with RandAugment recipe." + ), + "params": 10589712, + "official_name": "EfficientNet", + "path": "efficientnet", + "model_card": "https://arxiv.org/abs/1905.11946", + }, + "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet_b1_ft_imagenet", + }, + "efficientnet_em_ra2_imagenet": { + "metadata": { + "description": ( + "EfficientNet-EdgeTPU Medium model trained on the ImageNet 1k " + "dataset with RandAugment2 recipe." + ), + "params": 6899496, + "official_name": "EfficientNet", + "path": "efficientnet", + "model_card": "https://arxiv.org/abs/1905.11946", + }, + "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet_b1_ft_imagenet", + }, + "efficientnet_es_ra_imagenet": { + "metadata": { + "description": ( + "EfficientNet-EdgeTPU Small model trained on the ImageNet 1k " + "dataset with RandAugment recipe." + ), + "params": 5438392, + "official_name": "EfficientNet", + "path": "efficientnet", + "model_card": "https://arxiv.org/abs/1905.11946", + }, + "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet_b1_ft_imagenet", + }, } From b89638fb1296976fbf2a3841ffbca3677e43a572 Mon Sep 17 00:00:00 2001 From: Piseth Ky Date: Thu, 7 Nov 2024 16:42:27 -0800 Subject: [PATCH 09/17] WIP starting changes needed for additional presets --- .../src/utils/timm/convert_efficientnet.py | 45 +++++++++++++++++++ .../convert_efficientnet_checkpoints.py | 9 ++++ 2 files changed, 54 insertions(+) diff --git a/keras_hub/src/utils/timm/convert_efficientnet.py b/keras_hub/src/utils/timm/convert_efficientnet.py index 5c58c7c04b..f256ca4e93 100644 --- a/keras_hub/src/utils/timm/convert_efficientnet.py +++ b/keras_hub/src/utils/timm/convert_efficientnet.py @@ -65,6 +65,51 @@ "stackwise_nores_option": [True] + [False] * 5, "activation": "relu", }, + "rw_m": { + "width_coefficient": 1.2, + "depth_coefficient": (1.2,) * 4 + (1.6,) * 2, + "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5], + "stackwise_num_repeats": [1, 2, 4, 5, 4, 2], + "stackwise_input_filters": [32, 24, 32, 48, 96, 144], + "stackwise_output_filters": [24, 32, 48, 96, 144, 192], + "stackwise_expansion_ratios": [4, 8, 8, 8, 8, 8], + "stackwise_strides": [1, 2, 2, 2, 1, 2], + "stackwise_squeeze_and_excite_ratios": [0] * 6, + "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3, + "stackwise_force_input_filters": [24, 0, 0, 0, 0, 0], + "stackwise_nores_option": [True] + [False] * 5, + "activation": "relu", + }, + "rw_s": { + "width_coefficient": 1.0, + "depth_coefficient": 1.0, + "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5], + "stackwise_num_repeats": [1, 2, 4, 5, 4, 2], + "stackwise_input_filters": [32, 24, 32, 48, 96, 144], + "stackwise_output_filters": [24, 32, 48, 96, 144, 192], + "stackwise_expansion_ratios": [4, 8, 8, 8, 8, 8], + "stackwise_strides": [1, 2, 2, 2, 1, 2], + "stackwise_squeeze_and_excite_ratios": [0] * 6, + "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3, + "stackwise_force_input_filters": [24, 0, 0, 0, 0, 0], + "stackwise_nores_option": [True] + [False] * 5, + "activation": "relu", + }, + "rw_t": { + "width_coefficient": 0.8, + "depth_coefficient": 0.9, + "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5], + "stackwise_num_repeats": [1, 2, 4, 5, 4, 2], + "stackwise_input_filters": [32, 24, 32, 48, 96, 144], + "stackwise_output_filters": [24, 32, 48, 96, 144, 192], + "stackwise_expansion_ratios": [4, 8, 8, 8, 8, 8], + "stackwise_strides": [1, 2, 2, 2, 1, 2], + "stackwise_squeeze_and_excite_ratios": [0] * 6, + "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3, + "stackwise_force_input_filters": [24, 0, 0, 0, 0, 0], + "stackwise_nores_option": [True] + [False] * 5, + "activation": "relu", + }, } diff --git a/tools/checkpoint_conversion/convert_efficientnet_checkpoints.py b/tools/checkpoint_conversion/convert_efficientnet_checkpoints.py index 75810a19a9..366a05a16a 100644 --- a/tools/checkpoint_conversion/convert_efficientnet_checkpoints.py +++ b/tools/checkpoint_conversion/convert_efficientnet_checkpoints.py @@ -11,6 +11,12 @@ --preset efficientnet_em_ra2_imagenet --upload_uri kaggle://keras/efficientnet/keras/efficientnet_em_ra2_imagenet python tools/checkpoint_conversion/convert_efficientnet_checkpoints.py \ --preset efficientnet_es_ra_imagenet --upload_uri kaggle://keras/efficientnet/keras/efficientnet_es_ra_imagenet +python tools/checkpoint_conversion/convert_efficientnet_checkpoints.py \ + --preset efficientnet2_rw_m_agc_imagenet --upload_uri kaggle://keras/efficientnet/keras/efficientnet_el_ra_imagenet +python tools/checkpoint_conversion/convert_efficientnet_checkpoints.py \ + --preset efficientnet2_rw_s_ra2_imagenet --upload_uri kaggle://keras/efficientnet/keras/efficientnet_em_ra2_imagenet +python tools/checkpoint_conversion/convert_efficientnet_checkpoints.py \ + --preset efficientnet2_rw_t_ra2_imagenet --upload_uri kaggle://keras/efficientnet/keras/efficientnet_es_ra_imagenet """ import os @@ -32,6 +38,9 @@ "efficientnet_el_ra_imagenet": "timm/efficientnet_el.ra_in1k", "efficientnet_em_ra2_imagenet": "timm/efficientnet_em.ra2_in1k", "efficientnet_es_ra_imagenet": "timm/efficientnet_es.ra_in1k", + "efficientnet2_rw_m_agc_imagenet": "timm/efficientnetv2_rw_m.agc_in1k", + "efficientnet2_rw_s_ra2_imagenet": "timm/efficientnetv2_rw_s.ra2_in1k", + "efficientnet2_rw_t_ra2_imagenet": "timm/efficientnetv2_rw_t.ra2_in1k", } FLAGS = flags.FLAGS From 9e1a850a47cac50842820374d18731be4436abd7 Mon Sep 17 00:00:00 2001 From: Piseth Ky Date: Fri, 8 Nov 2024 16:50:03 -0800 Subject: [PATCH 10/17] WIP 2 variants working --- .../src/models/efficientnet/convbnact.py | 139 ++++++++++++++++++ .../src/models/efficientnet/convbnact_test.py | 32 ++++ .../efficientnet/efficientnet_backbone.py | 36 ++++- .../src/models/efficientnet/fusedmbconv.py | 32 ++-- keras_hub/src/models/efficientnet/mbconv.py | 11 +- .../src/utils/timm/convert_efficientnet.py | 107 ++++++++------ 6 files changed, 297 insertions(+), 60 deletions(-) create mode 100644 keras_hub/src/models/efficientnet/convbnact.py create mode 100644 keras_hub/src/models/efficientnet/convbnact_test.py diff --git a/keras_hub/src/models/efficientnet/convbnact.py b/keras_hub/src/models/efficientnet/convbnact.py new file mode 100644 index 0000000000..01ae845834 --- /dev/null +++ b/keras_hub/src/models/efficientnet/convbnact.py @@ -0,0 +1,139 @@ +import keras + +BN_AXIS = 3 + +CONV_KERNEL_INITIALIZER = { + "class_name": "VarianceScaling", + "config": { + "scale": 2.0, + "mode": "fan_out", + "distribution": "truncated_normal", + }, +} + + +class ConvBNActBlock(keras.layers.Layer): + """ + Args: + input_filters: int, the number of input filters + output_filters: int, the number of output filters + expand_ratio: default 1, the ratio by which input_filters are multiplied + to expand the structure in the middle expansion phase + kernel_size: default 3, the kernel_size to apply to the expansion phase + convolutions + strides: default 1, the strides to apply to the expansion phase + convolutions + se_ratio: default 0.0, The filters used in the Squeeze-Excitation phase, + and are chosen as the maximum between 1 and input_filters*se_ratio + batch_norm_momentum: default 0.9, the BatchNormalization momentum + activation: default "swish", the activation function used between + convolution operations + dropout: float, the optional dropout rate to apply before the output + convolution, defaults to 0.2 + + Returns: + A tensor representing a feature map, passed through the FusedMBConv + block + + Note: + Not intended to be used outside of the EfficientNet architecture. + """ + + def __init__( + self, + input_filters, + output_filters, + expand_ratio=1, + kernel_size=3, + strides=1, + data_format="channels_last", + batch_norm_momentum=0.9, + batch_norm_epsilon=1e-3, + activation="swish", + dropout=0.2, + nores=False, + **kwargs + ): + super().__init__(**kwargs) + self.input_filters = input_filters + self.output_filters = output_filters + self.expand_ratio = expand_ratio + self.kernel_size = kernel_size + self.strides = strides + self.data_format = data_format + self.batch_norm_momentum = batch_norm_momentum + self.batch_norm_epsilon = batch_norm_epsilon + self.activation = activation + self.dropout = dropout + self.nores = nores + self.filters = self.input_filters * self.expand_ratio + + padding_pixels = kernel_size // 2 + self.conv1_pad = keras.layers.ZeroPadding2D( + padding=(padding_pixels, padding_pixels), + name=self.name + "conv_pad", + ) + self.conv1 = keras.layers.Conv2D( + filters=self.filters, + kernel_size=kernel_size, + strides=strides, + kernel_initializer=CONV_KERNEL_INITIALIZER, + padding="valid", + data_format=data_format, + use_bias=False, + name=self.name + "conv", + ) + self.bn1 = keras.layers.BatchNormalization( + axis=BN_AXIS, + momentum=self.batch_norm_momentum, + epsilon=self.batch_norm_epsilon, + name=self.name + "bn", + ) + self.act = keras.layers.Activation( + self.activation, name=self.name + "activation" + ) + + if self.dropout: + self.dropout_layer = keras.layers.Dropout( + self.dropout, + noise_shape=(None, 1, 1, 1), + name=self.name + "drop", + ) + + def build(self, input_shape): + if self.name is None: + self.name = keras.backend.get_uid("block0") + + def call(self, inputs): + x = self.conv1_pad(inputs) + x = self.conv1(x) + x = self.bn1(x) + x = self.act(x) + + # Residual: + if ( + self.strides == 1 + and self.input_filters == self.output_filters + and not self.nores + ): + if self.dropout: + x = self.dropout_layer(x) + x = keras.layers.Add(name=self.name + "add")([x, inputs]) + return x + + def get_config(self): + config = { + "input_filters": self.input_filters, + "output_filters": self.output_filters, + "kernel_size": self.kernel_size, + "strides": self.strides, + "data_format": self.data_format, + "batch_norm_momentum": self.batch_norm_momentum, + "batch_norm_epsilon": self.batch_norm_epsilon, + "activation": self.activation, + "dropout": self.dropout, + "nores": self.nores, + } + + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras_hub/src/models/efficientnet/convbnact_test.py b/keras_hub/src/models/efficientnet/convbnact_test.py new file mode 100644 index 0000000000..54f8957b04 --- /dev/null +++ b/keras_hub/src/models/efficientnet/convbnact_test.py @@ -0,0 +1,32 @@ +import keras + +from keras_hub.src.models.efficientnet.convbnact import ConvBNActBlock +from keras_hub.src.tests.test_case import TestCase + + +class ConvBNActBlockTest(TestCase): + def test_same_input_output_shapes(self): + inputs = keras.random.normal(shape=(1, 64, 64, 32), dtype="float32") + layer = ConvBNActBlock(input_filters=32, output_filters=32) + + output = layer(inputs) + self.assertEquals(output.shape, (1, 64, 64, 32)) + self.assertLen(output, 1) + + def test_different_input_output_shapes(self): + inputs = keras.random.normal(shape=(1, 64, 64, 32), dtype="float32") + layer = ConvBNActBlock(input_filters=32, output_filters=48) + + output = layer(inputs) + self.assertEquals(output.shape, (1, 64, 64, 48)) + self.assertLen(output, 1) + + def test_squeeze_excitation_ratio(self): + inputs = keras.random.normal(shape=(1, 64, 64, 32), dtype="float32") + layer = ConvBNActBlock( + input_filters=32, output_filters=48, se_ratio=0.25 + ) + + output = layer(inputs) + self.assertEquals(output.shape, (1, 64, 64, 48)) + self.assertLen(output, 1) diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py index f2a3b70912..29459e65ac 100644 --- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py +++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py @@ -3,6 +3,7 @@ import keras from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.models.efficientnet.convbnact import ConvBNActBlock from keras_hub.src.models.efficientnet.fusedmbconv import FusedMBConvBlock from keras_hub.src.models.efficientnet.mbconv import MBConvBlock from keras_hub.src.models.feature_pyramid_backbone import FeaturePyramidBackbone @@ -115,6 +116,7 @@ def __init__( batch_norm_momentum=0.9, batch_norm_epsilon=1e-5, projection_activation=None, + num_features=1280, **kwargs, ): image_input = keras.layers.Input(shape=input_shape) @@ -161,12 +163,20 @@ def __init__( self._pyramid_outputs = {} curr_pyramid_level = 1 - for i in range(len(stackwise_kernel_sizes)): + num_stacks = len(stackwise_kernel_sizes) + + if isinstance(depth_coefficient, tuple): + assert len(depth_coefficient) == num_stacks + else: + depth_coefficient = (depth_coefficient,) * num_stacks + + for i in range(num_stacks): num_repeats = stackwise_num_repeats[i] input_filters = stackwise_input_filters[i] output_filters = stackwise_output_filters[i] force_input_filters = stackwise_force_input_filters[i] nores = stackwise_nores_option[i] + stack_depth_coefficient = depth_coefficient[i] # Update block input and output filters based on depth multiplier. input_filters = round_filters( @@ -188,7 +198,7 @@ def __init__( repeats = round_repeats( repeats=num_repeats, - depth_coefficient=depth_coefficient, + depth_coefficient=stack_depth_coefficient, ) strides = stackwise_strides[i] squeeze_and_excite_ratio = stackwise_squeeze_and_excite_ratios[i] @@ -234,7 +244,7 @@ def __init__( batch_norm_epsilon=batch_norm_epsilon, name=block_name, ) - else: + elif stackwise_block_type in ("fused", "unfused"): block = get_conv_constructor(stackwise_block_type)( input_filters=input_filters, output_filters=output_filters, @@ -251,11 +261,27 @@ def __init__( name=block_name, ) x = block(x) + else: # cba block + block = ConvBNActBlock( + input_filters=input_filters, + output_filters=output_filters, + expand_ratio=stackwise_expansion_ratios[i], + kernel_size=stackwise_kernel_sizes[i], + strides=strides, + data_format=data_format, + activation=activation, + dropout=dropout * block_id / blocks, + batch_norm_momentum=batch_norm_momentum, + batch_norm_epsilon=batch_norm_epsilon, + nores=nores, + name=block_name, + ) + x = block(x) block_id += 1 # Build top top_filters = round_filters( - filters=1280, + filters=num_features, width_coefficient=width_coefficient, min_depth=min_depth, depth_divisor=depth_divisor, @@ -577,6 +603,8 @@ def get_conv_constructor(conv_type): return MBConvBlock elif conv_type == "fused": return FusedMBConvBlock + elif conv_type == "cba": + return ConvBNActBlock else: raise ValueError( "Expected `conv_type` to be " diff --git a/keras_hub/src/models/efficientnet/fusedmbconv.py b/keras_hub/src/models/efficientnet/fusedmbconv.py index 8d2cc2fdef..17e1351c2b 100644 --- a/keras_hub/src/models/efficientnet/fusedmbconv.py +++ b/keras_hub/src/models/efficientnet/fusedmbconv.py @@ -74,6 +74,7 @@ def __init__( activation="swish", dropout=0.2, nores=False, + projection_kernel_size=1, **kwargs ): super().__init__(**kwargs) @@ -89,15 +90,21 @@ def __init__( self.activation = activation self.dropout = dropout self.nores = nores + self.projection_kernel_size = projection_kernel_size self.filters = self.input_filters * self.expand_ratio self.filters_se = max(1, int(input_filters * se_ratio)) + padding_pixels = kernel_size // 2 + self.conv1_pad = keras.layers.ZeroPadding2D( + padding=(padding_pixels, padding_pixels), + name=self.name + "expand_conv_pad", + ) self.conv1 = keras.layers.Conv2D( filters=self.filters, kernel_size=kernel_size, strides=strides, kernel_initializer=CONV_KERNEL_INITIALIZER, - padding="same", + padding="valid", data_format=data_format, use_bias=False, name=self.name + "expand_conv", @@ -132,12 +139,17 @@ def __init__( name=self.name + "se_expand", ) + padding_pixels = projection_kernel_size // 2 + self.output_conv_pad = keras.layers.ZeroPadding2D( + padding=(padding_pixels, padding_pixels), + name=self.name + "project_conv_pad", + ) self.output_conv = keras.layers.Conv2D( filters=self.output_filters, - kernel_size=1 if expand_ratio != 1 else kernel_size, + kernel_size=projection_kernel_size, strides=1, kernel_initializer=CONV_KERNEL_INITIALIZER, - padding="same", + padding="valid", data_format=data_format, use_bias=False, name=self.name + "project_conv", @@ -163,12 +175,10 @@ def build(self, input_shape): def call(self, inputs): # Expansion phase - if self.expand_ratio != 1: - x = self.conv1(inputs) - x = self.bn1(x) - x = self.act(x) - else: - x = inputs + x = self.conv1_pad(inputs) + x = self.conv1(x) + x = self.bn1(x) + x = self.act(x) # Squeeze and excite if 0 < self.se_ratio <= 1: @@ -191,10 +201,9 @@ def call(self, inputs): x = keras.layers.multiply([x, se], name=self.name + "se_excite") # Output phase: + x = self.output_conv_pad(x) x = self.output_conv(x) x = self.bn2(x) - if self.expand_ratio == 1: - x = self.act(x) # Residual: if ( @@ -221,6 +230,7 @@ def get_config(self): "activation": self.activation, "dropout": self.dropout, "nores": self.nores, + "projection_kernel_size": self.projection_kernel_size, } base_config = super().get_config() diff --git a/keras_hub/src/models/efficientnet/mbconv.py b/keras_hub/src/models/efficientnet/mbconv.py index e9acbfeb9a..9584f7391c 100644 --- a/keras_hub/src/models/efficientnet/mbconv.py +++ b/keras_hub/src/models/efficientnet/mbconv.py @@ -148,12 +148,18 @@ def __init__( name=self.name + "se_expand", ) + projection_kernel_size = 1 if expand_ratio != 1 else kernel_size + padding_pixels = projection_kernel_size // 2 + self.output_conv_pad = keras.layers.ZeroPadding2D( + padding=(padding_pixels, padding_pixels), + name=self.name + "project_conv_pad", + ) self.output_conv = keras.layers.Conv2D( filters=self.output_filters, - kernel_size=1 if expand_ratio != 1 else kernel_size, + kernel_size=projection_kernel_size, strides=1, kernel_initializer=CONV_KERNEL_INITIALIZER, - padding="same", + padding="valid", data_format=data_format, use_bias=False, name=self.name + "project_conv", @@ -211,6 +217,7 @@ def call(self, inputs): x = keras.layers.multiply([x, se], name=self.name + "se_excite") # Output phase + x = self.output_conv_pad(x) x = self.output_conv(x) x = self.bn3(x) diff --git a/keras_hub/src/utils/timm/convert_efficientnet.py b/keras_hub/src/utils/timm/convert_efficientnet.py index f256ca4e93..322c578e87 100644 --- a/keras_hub/src/utils/timm/convert_efficientnet.py +++ b/keras_hub/src/utils/timm/convert_efficientnet.py @@ -68,47 +68,49 @@ "rw_m": { "width_coefficient": 1.2, "depth_coefficient": (1.2,) * 4 + (1.6,) * 2, - "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5], - "stackwise_num_repeats": [1, 2, 4, 5, 4, 2], - "stackwise_input_filters": [32, 24, 32, 48, 96, 144], - "stackwise_output_filters": [24, 32, 48, 96, 144, 192], - "stackwise_expansion_ratios": [4, 8, 8, 8, 8, 8], + "stackwise_kernel_sizes": [3, 3, 3, 3, 3, 3], + "stackwise_num_repeats": [2, 4, 4, 6, 9, 15], + "stackwise_input_filters": [24, 24, 48, 64, 128, 160], + "stackwise_output_filters": [24, 48, 64, 128, 160, 272], + "stackwise_expansion_ratios": [1, 4, 4, 4, 6, 6], "stackwise_strides": [1, 2, 2, 2, 1, 2], - "stackwise_squeeze_and_excite_ratios": [0] * 6, + "stackwise_squeeze_and_excite_ratios": [0, 0, 0, 0.25, 0.25, 0.25], "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3, - "stackwise_force_input_filters": [24, 0, 0, 0, 0, 0], - "stackwise_nores_option": [True] + [False] * 5, - "activation": "relu", + "stackwise_force_input_filters": [0, 0, 0, 0, 0, 0], + "stackwise_nores_option": [False] * 6, + "activation": "silu", + "num_features": 1792, }, "rw_s": { "width_coefficient": 1.0, "depth_coefficient": 1.0, - "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5], - "stackwise_num_repeats": [1, 2, 4, 5, 4, 2], - "stackwise_input_filters": [32, 24, 32, 48, 96, 144], - "stackwise_output_filters": [24, 32, 48, 96, 144, 192], - "stackwise_expansion_ratios": [4, 8, 8, 8, 8, 8], + "stackwise_kernel_sizes": [3, 3, 3, 3, 3, 3], + "stackwise_num_repeats": [2, 4, 4, 6, 9, 15], + "stackwise_input_filters": [24, 24, 48, 64, 128, 160], + "stackwise_output_filters": [24, 48, 64, 128, 160, 272], + "stackwise_expansion_ratios": [1, 4, 4, 4, 6, 6], "stackwise_strides": [1, 2, 2, 2, 1, 2], - "stackwise_squeeze_and_excite_ratios": [0] * 6, + "stackwise_squeeze_and_excite_ratios": [0, 0, 0, 0.25, 0.25, 0.25], "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3, - "stackwise_force_input_filters": [24, 0, 0, 0, 0, 0], - "stackwise_nores_option": [True] + [False] * 5, - "activation": "relu", + "stackwise_force_input_filters": [0, 0, 0, 0, 0, 0], + "stackwise_nores_option": [False] * 6, + "activation": "silu", + "num_features": 1792, }, "rw_t": { "width_coefficient": 0.8, "depth_coefficient": 0.9, - "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5], - "stackwise_num_repeats": [1, 2, 4, 5, 4, 2], - "stackwise_input_filters": [32, 24, 32, 48, 96, 144], - "stackwise_output_filters": [24, 32, 48, 96, 144, 192], - "stackwise_expansion_ratios": [4, 8, 8, 8, 8, 8], + "stackwise_kernel_sizes": [3, 3, 3, 3, 3, 3], + "stackwise_num_repeats": [2, 4, 4, 6, 9, 15], + "stackwise_input_filters": [24, 24, 48, 64, 128, 160], + "stackwise_output_filters": [24, 48, 64, 128, 160, 256], + "stackwise_expansion_ratios": [1, 4, 4, 4, 6, 6], "stackwise_strides": [1, 2, 2, 2, 1, 2], - "stackwise_squeeze_and_excite_ratios": [0] * 6, - "stackwise_block_types": ["fused"] * 3 + ["unfused"] * 3, - "stackwise_force_input_filters": [24, 0, 0, 0, 0, 0], - "stackwise_nores_option": [True] + [False] * 5, - "activation": "relu", + "stackwise_squeeze_and_excite_ratios": [0, 0, 0, 0.25, 0.25, 0.25], + "stackwise_block_types": ["cba"] + ["fused"] * 2 + ["unfused"] * 3, + "stackwise_force_input_filters": [0, 0, 0, 0, 0, 0], + "stackwise_nores_option": [False] * 6, + "activation": "silu", }, } @@ -218,15 +220,21 @@ def port_batch_normalization(keras_layer, hf_weight_prefix): # Stages num_stacks = len(backbone.stackwise_kernel_sizes) + + depth_coefficient = VARIANT_MAP[variant]["depth_coefficient"] + if isinstance(depth_coefficient, tuple): + assert len(depth_coefficient) == num_stacks + else: + depth_coefficient = (depth_coefficient,) * num_stacks + for stack_index in range(num_stacks): block_type = backbone.stackwise_block_types[stack_index] expansion_ratio = backbone.stackwise_expansion_ratios[stack_index] repeats = backbone.stackwise_num_repeats[stack_index] + stack_depth_coefficient = depth_coefficient[stack_index] - repeats = int( - math.ceil(VARIANT_MAP[variant]["depth_coefficient"] * repeats) - ) + repeats = int(math.ceil(stack_depth_coefficient * repeats)) se_ratio = VARIANT_MAP[variant]["stackwise_squeeze_and_excite_ratios"][ stack_index ] @@ -297,18 +305,17 @@ def port_batch_normalization(keras_layer, hf_weight_prefix): fused_block_layer = backbone.get_layer(keras_block_prefix) # Initial Expansion Conv - if expansion_ratio != 1: - port_conv2d( - fused_block_layer.conv1, - hf_block_prefix + "conv_exp", - port_bias=False, - ) - conv_pw_count += 1 - port_batch_normalization( - fused_block_layer.bn1, - hf_block_prefix + f"bn{bn_count}", - ) - bn_count += 1 + port_conv2d( + fused_block_layer.conv1, + hf_block_prefix + "conv_exp", + port_bias=False, + ) + conv_pw_count += 1 + port_batch_normalization( + fused_block_layer.bn1, + hf_block_prefix + f"bn{bn_count}", + ) + bn_count += 1 if 0 < se_ratio <= 1: # Squeeze and Excite @@ -385,6 +392,20 @@ def port_batch_normalization(keras_layer, hf_weight_prefix): hf_block_prefix + f"bn{bn_count}", ) bn_count += 1 + elif block_type == "cba": + cba_block_layer = backbone.get_layer(keras_block_prefix) + # Initial Expansion Conv + port_conv2d( + cba_block_layer.conv1, + hf_block_prefix + "conv_pw", + port_bias=False, + ) + conv_pw_count += 1 + port_batch_normalization( + cba_block_layer.bn1, + hf_block_prefix + f"bn{bn_count}", + ) + bn_count += 1 # Head/Top port_conv2d(backbone.get_layer("top_conv"), "conv_head", port_bias=False) From e2adf4732ff06ed88608d3482bcae0aa62e65d80 Mon Sep 17 00:00:00 2001 From: Piseth Ky Date: Mon, 11 Nov 2024 14:19:23 -0800 Subject: [PATCH 11/17] add ConvBNAct Block and tiny variant --- keras_hub/src/models/efficientnet/convbnact.py | 7 ++----- keras_hub/src/models/efficientnet/convbnact_test.py | 10 ---------- .../src/models/efficientnet/efficientnet_backbone.py | 3 +-- keras_hub/src/utils/timm/convert_efficientnet.py | 2 +- 4 files changed, 4 insertions(+), 18 deletions(-) diff --git a/keras_hub/src/models/efficientnet/convbnact.py b/keras_hub/src/models/efficientnet/convbnact.py index 01ae845834..5d8e379708 100644 --- a/keras_hub/src/models/efficientnet/convbnact.py +++ b/keras_hub/src/models/efficientnet/convbnact.py @@ -43,7 +43,6 @@ def __init__( self, input_filters, output_filters, - expand_ratio=1, kernel_size=3, strides=1, data_format="channels_last", @@ -57,7 +56,6 @@ def __init__( super().__init__(**kwargs) self.input_filters = input_filters self.output_filters = output_filters - self.expand_ratio = expand_ratio self.kernel_size = kernel_size self.strides = strides self.data_format = data_format @@ -66,7 +64,6 @@ def __init__( self.activation = activation self.dropout = dropout self.nores = nores - self.filters = self.input_filters * self.expand_ratio padding_pixels = kernel_size // 2 self.conv1_pad = keras.layers.ZeroPadding2D( @@ -74,7 +71,7 @@ def __init__( name=self.name + "conv_pad", ) self.conv1 = keras.layers.Conv2D( - filters=self.filters, + filters=self.output_filters, kernel_size=kernel_size, strides=strides, kernel_initializer=CONV_KERNEL_INITIALIZER, @@ -120,7 +117,7 @@ def call(self, inputs): x = self.dropout_layer(x) x = keras.layers.Add(name=self.name + "add")([x, inputs]) return x - + def get_config(self): config = { "input_filters": self.input_filters, diff --git a/keras_hub/src/models/efficientnet/convbnact_test.py b/keras_hub/src/models/efficientnet/convbnact_test.py index 54f8957b04..720ed57365 100644 --- a/keras_hub/src/models/efficientnet/convbnact_test.py +++ b/keras_hub/src/models/efficientnet/convbnact_test.py @@ -20,13 +20,3 @@ def test_different_input_output_shapes(self): output = layer(inputs) self.assertEquals(output.shape, (1, 64, 64, 48)) self.assertLen(output, 1) - - def test_squeeze_excitation_ratio(self): - inputs = keras.random.normal(shape=(1, 64, 64, 32), dtype="float32") - layer = ConvBNActBlock( - input_filters=32, output_filters=48, se_ratio=0.25 - ) - - output = layer(inputs) - self.assertEquals(output.shape, (1, 64, 64, 48)) - self.assertLen(output, 1) diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py index 29459e65ac..880e41912f 100644 --- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py +++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py @@ -261,11 +261,10 @@ def __init__( name=block_name, ) x = block(x) - else: # cba block + else: # cba block block = ConvBNActBlock( input_filters=input_filters, output_filters=output_filters, - expand_ratio=stackwise_expansion_ratios[i], kernel_size=stackwise_kernel_sizes[i], strides=strides, data_format=data_format, diff --git a/keras_hub/src/utils/timm/convert_efficientnet.py b/keras_hub/src/utils/timm/convert_efficientnet.py index 322c578e87..58214ea806 100644 --- a/keras_hub/src/utils/timm/convert_efficientnet.py +++ b/keras_hub/src/utils/timm/convert_efficientnet.py @@ -397,7 +397,7 @@ def port_batch_normalization(keras_layer, hf_weight_prefix): # Initial Expansion Conv port_conv2d( cba_block_layer.conv1, - hf_block_prefix + "conv_pw", + hf_block_prefix + "conv", port_bias=False, ) conv_pw_count += 1 From 753caaa256e8ed205bf451600e77f36bdb585de1 Mon Sep 17 00:00:00 2001 From: Piseth Ky Date: Mon, 11 Nov 2024 14:43:50 -0800 Subject: [PATCH 12/17] adds preset data --- .../efficientnet/efficientnet_presets.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/keras_hub/src/models/efficientnet/efficientnet_presets.py b/keras_hub/src/models/efficientnet/efficientnet_presets.py index 2a3c279350..a40aec949a 100644 --- a/keras_hub/src/models/efficientnet/efficientnet_presets.py +++ b/keras_hub/src/models/efficientnet/efficientnet_presets.py @@ -65,4 +65,43 @@ }, "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet_b1_ft_imagenet", }, + "efficientnet2_rw_m_agc_imagenet": { + "metadata": { + "description": ( + "EfficientNet-v2 Medium model trained on the ImageNet 1k " + "dataset with adaptive gradient clipping." + ), + "params": 53236442, + "official_name": "EfficientNet", + "path": "efficientnet", + "model_card": "https://arxiv.org/abs/2104.00298", + }, + "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet2_rw_m_agc_imagenet", + }, + "efficientnet2_rw_s_ra2_imagenet": { + "metadata": { + "description": ( + "EfficientNet-v2 Small model trained on the ImageNet 1k " + "dataset with RandAugment2 recipe." + ), + "params": 23941296, + "official_name": "EfficientNet", + "path": "efficientnet", + "model_card": "https://arxiv.org/abs/2104.00298", + }, + "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet2_rw_s_ra2_imagenet", + }, + "efficientnet2_rw_t_ra2_imagenet": { + "metadata": { + "description": ( + "EfficientNet-v2 Tiny model trained on the ImageNet 1k " + "dataset with RandAugment2 recipe." + ), + "params": 13649388, + "official_name": "EfficientNet", + "path": "efficientnet", + "model_card": "https://arxiv.org/abs/2104.00298", + }, + "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet2_rw_t_ra2_imagenet", + }, } From 041de38219c458a7231acfbf4bcba3aab1f9f421 Mon Sep 17 00:00:00 2001 From: Piseth Ky Date: Mon, 11 Nov 2024 14:59:57 -0800 Subject: [PATCH 13/17] updated correct config --- .../src/models/efficientnet/efficientnet_backbone.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py index 880e41912f..b86a5431c8 100644 --- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py +++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py @@ -169,6 +169,7 @@ def __init__( assert len(depth_coefficient) == num_stacks else: depth_coefficient = (depth_coefficient,) * num_stacks + dc_originally_scalar = True for i in range(num_stacks): num_repeats = stackwise_num_repeats[i] @@ -315,7 +316,12 @@ def __init__( # === Config === self.width_coefficient = width_coefficient - self.depth_coefficient = depth_coefficient + + if dc_originally_scalar: + self.depth_coefficient = depth_coefficient[0] + else: + self.depth_coefficient = depth_coefficient + self.dropout = dropout self.depth_divisor = depth_divisor self.min_depth = min_depth @@ -607,6 +613,6 @@ def get_conv_constructor(conv_type): else: raise ValueError( "Expected `conv_type` to be " - "one of 'unfused', 'fused', but got " + "one of 'unfused', 'fused', 'cba', but got " f"`conv_type={conv_type}`" ) From 9eacf214f852356f9117c609cc2e40372629c868 Mon Sep 17 00:00:00 2001 From: Piseth Ky Date: Mon, 18 Nov 2024 12:19:06 -0800 Subject: [PATCH 14/17] resolve merge conflicts --- .../efficientnet/efficientnet_presets.py | 39 ------------------- .../src/utils/timm/convert_efficientnet.py | 20 ++++------ 2 files changed, 8 insertions(+), 51 deletions(-) diff --git a/keras_hub/src/models/efficientnet/efficientnet_presets.py b/keras_hub/src/models/efficientnet/efficientnet_presets.py index 5a538bf406..f47c72bf80 100644 --- a/keras_hub/src/models/efficientnet/efficientnet_presets.py +++ b/keras_hub/src/models/efficientnet/efficientnet_presets.py @@ -124,45 +124,6 @@ }, "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet_b5_sw_ft_imagenet/1", }, - "efficientnet_el_ra_imagenet": { - "metadata": { - "description": ( - "EfficientNet-EdgeTPU Large model trained on the ImageNet 1k " - "dataset with RandAugment recipe." - ), - "params": 10589712, - "official_name": "EfficientNet", - "path": "efficientnet", - "model_card": "https://arxiv.org/abs/1905.11946", - }, - "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet_el_ra_imagenet/1", - }, - "efficientnet_em_ra2_imagenet": { - "metadata": { - "description": ( - "EfficientNet-EdgeTPU Medium model trained on the ImageNet 1k " - "dataset with RandAugment2 recipe." - ), - "params": 6899496, - "official_name": "EfficientNet", - "path": "efficientnet", - "model_card": "https://arxiv.org/abs/1905.11946", - }, - "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet_em_ra2_imagenet/1", - }, - "efficientnet_es_ra_imagenet": { - "metadata": { - "description": ( - "EfficientNet-EdgeTPU Small model trained on the ImageNet 1k " - "dataset with RandAugment recipe." - ), - "params": 5438392, - "official_name": "EfficientNet", - "path": "efficientnet", - "model_card": "https://arxiv.org/abs/1905.11946", - }, - "kaggle_handle": "kaggle://keras/efficientnet/keras/efficientnet_es_ra_imagenet/1", - }, "efficientnet_el_ra_imagenet": { "metadata": { "description": ( diff --git a/keras_hub/src/utils/timm/convert_efficientnet.py b/keras_hub/src/utils/timm/convert_efficientnet.py index 8a719a7f73..b4ece53b42 100644 --- a/keras_hub/src/utils/timm/convert_efficientnet.py +++ b/keras_hub/src/utils/timm/convert_efficientnet.py @@ -325,18 +325,14 @@ def port_batch_normalization(keras_layer, hf_weight_prefix): port_conv2d( fused_block_layer.conv1, hf_block_prefix + "conv_exp", - if expansion_ratio != 1: - port_conv2d( - fused_block_layer.conv1, - hf_block_prefix + "conv_exp", - port_bias=False, - ) - conv_pw_count += 1 - port_batch_normalization( - fused_block_layer.bn1, - hf_block_prefix + f"bn{bn_count}", - ) - bn_count += 1 + port_bias=False, + ) + conv_pw_count += 1 + port_batch_normalization( + fused_block_layer.bn1, + hf_block_prefix + f"bn{bn_count}", + ) + bn_count += 1 if 0 < se_ratio <= 1: # Squeeze and Excite From a84bddb06df2319b83d08ffea70803429f617c21 Mon Sep 17 00:00:00 2001 From: Piseth Ky Date: Thu, 21 Nov 2024 13:37:44 -0800 Subject: [PATCH 15/17] review updates --- .../efficientnet/{convbnact.py => cba.py} | 43 +++--- .../{convbnact_test.py => cba_test.py} | 8 +- .../efficientnet/efficientnet_backbone.py | 130 +++++++++--------- .../src/models/efficientnet/fusedmbconv.py | 6 + .../src/utils/timm/convert_efficientnet.py | 12 +- 5 files changed, 100 insertions(+), 99 deletions(-) rename keras_hub/src/models/efficientnet/{convbnact.py => cba.py} (77%) rename keras_hub/src/models/efficientnet/{convbnact_test.py => cba_test.py} (69%) diff --git a/keras_hub/src/models/efficientnet/convbnact.py b/keras_hub/src/models/efficientnet/cba.py similarity index 77% rename from keras_hub/src/models/efficientnet/convbnact.py rename to keras_hub/src/models/efficientnet/cba.py index 5d8e379708..54fd8d87e9 100644 --- a/keras_hub/src/models/efficientnet/convbnact.py +++ b/keras_hub/src/models/efficientnet/cba.py @@ -12,27 +12,28 @@ } -class ConvBNActBlock(keras.layers.Layer): +class CBABlock(keras.layers.Layer): """ Args: input_filters: int, the number of input filters output_filters: int, the number of output filters - expand_ratio: default 1, the ratio by which input_filters are multiplied - to expand the structure in the middle expansion phase kernel_size: default 3, the kernel_size to apply to the expansion phase convolutions strides: default 1, the strides to apply to the expansion phase convolutions - se_ratio: default 0.0, The filters used in the Squeeze-Excitation phase, - and are chosen as the maximum between 1 and input_filters*se_ratio + data_format: str, channels_last (default) or channels_first, expects + tensors to be of shape (N, H, W, C) or (N, C, H, W) respectively batch_norm_momentum: default 0.9, the BatchNormalization momentum + batch_norm_epsilon: default 1e-3, the BatchNormalization epsilon activation: default "swish", the activation function used between convolution operations dropout: float, the optional dropout rate to apply before the output convolution, defaults to 0.2 + nores: bool, default False, forces no residual connection if True, + otherwise allows it if False. Returns: - A tensor representing a feature map, passed through the FusedMBConv + A tensor representing a feature map, passed through the ConvBNAct block Note: @@ -119,18 +120,20 @@ def call(self, inputs): return x def get_config(self): - config = { - "input_filters": self.input_filters, - "output_filters": self.output_filters, - "kernel_size": self.kernel_size, - "strides": self.strides, - "data_format": self.data_format, - "batch_norm_momentum": self.batch_norm_momentum, - "batch_norm_epsilon": self.batch_norm_epsilon, - "activation": self.activation, - "dropout": self.dropout, - "nores": self.nores, - } + config = super().get_config() + config.update( + { + "input_filters": self.input_filters, + "output_filters": self.output_filters, + "kernel_size": self.kernel_size, + "strides": self.strides, + "data_format": self.data_format, + "batch_norm_momentum": self.batch_norm_momentum, + "batch_norm_epsilon": self.batch_norm_epsilon, + "activation": self.activation, + "dropout": self.dropout, + "nores": self.nores, + } + ) - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + return config diff --git a/keras_hub/src/models/efficientnet/convbnact_test.py b/keras_hub/src/models/efficientnet/cba_test.py similarity index 69% rename from keras_hub/src/models/efficientnet/convbnact_test.py rename to keras_hub/src/models/efficientnet/cba_test.py index 720ed57365..ec028b1239 100644 --- a/keras_hub/src/models/efficientnet/convbnact_test.py +++ b/keras_hub/src/models/efficientnet/cba_test.py @@ -1,13 +1,13 @@ import keras -from keras_hub.src.models.efficientnet.convbnact import ConvBNActBlock +from keras_hub.src.models.efficientnet.cba import CBABlock from keras_hub.src.tests.test_case import TestCase -class ConvBNActBlockTest(TestCase): +class CBABlockTest(TestCase): def test_same_input_output_shapes(self): inputs = keras.random.normal(shape=(1, 64, 64, 32), dtype="float32") - layer = ConvBNActBlock(input_filters=32, output_filters=32) + layer = CBABlock(input_filters=32, output_filters=32) output = layer(inputs) self.assertEquals(output.shape, (1, 64, 64, 32)) @@ -15,7 +15,7 @@ def test_same_input_output_shapes(self): def test_different_input_output_shapes(self): inputs = keras.random.normal(shape=(1, 64, 64, 32), dtype="float32") - layer = ConvBNActBlock(input_filters=32, output_filters=48) + layer = CBABlock(input_filters=32, output_filters=48) output = layer(inputs) self.assertEquals(output.shape, (1, 64, 64, 48)) diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py index 3f17cad590..1dd520e04e 100644 --- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py +++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py @@ -3,7 +3,7 @@ import keras from keras_hub.src.api_export import keras_hub_export -from keras_hub.src.models.efficientnet.convbnact import ConvBNActBlock +from keras_hub.src.models.efficientnet.cba import CBABlock from keras_hub.src.models.efficientnet.fusedmbconv import FusedMBConvBlock from keras_hub.src.models.efficientnet.mbconv import MBConvBlock from keras_hub.src.models.feature_pyramid_backbone import FeaturePyramidBackbone @@ -27,15 +27,12 @@ class EfficientNetBackbone(FeaturePyramidBackbone): (https://arxiv.org/abs/2104.00298) (ICML 2021) Args: - width_coefficient: float, scaling coefficient for network width. - depth_coefficient: float, scaling coefficient for network depth. - dropout: float, dropout rate at skip connections. The default - value is set to 0.2. - depth_divisor: integer, a unit of network width. The default value is - set to 8. - activation: activation function to use between each convolutional layer. - input_shape: optional shape tuple, it should have exactly 3 input - channels. + stackwise_width_coefficient: list[float] or float, scaling coefficient + for network width. If single float, it is assumed that this value + applies to all stacks. + stackwise_depth_coefficient: list[float] or float, scaling coefficient + for network depth. If single float, it is assumed that this value + applies to all stacks. stackwise_kernel_sizes: list of ints, the kernel sizes used for each conv block. stackwise_num_repeats: list of ints, number of times to repeat each @@ -62,8 +59,17 @@ class EfficientNetBackbone(FeaturePyramidBackbone): stackwise_nores_option: list of bools, toggles if residiual connection is not used. If False (default), the stack will use residual connections, otherwise not. + dropout: float, dropout rate at skip connections. The default + value is set to 0.2. + depth_divisor: integer, a unit of network width. The default value is + set to 8. min_depth: integer, minimum number of filters. Can be None and ignored if use_depth_divisor_as_min_depth is set to True. + activation: activation function to use between each convolutional layer. + input_shape: optional shape tuple, it should have exactly 3 input + channels. + + include_initial_padding: bool, whether to include initial zero padding (as per v1). use_depth_divisor_as_min_depth: bool, whether to use depth_divisor as @@ -100,8 +106,8 @@ class EfficientNetBackbone(FeaturePyramidBackbone): def __init__( self, *, - width_coefficient, - depth_coefficient, + stackwise_width_coefficient=None, + stackwise_depth_coefficient=None, stackwise_kernel_sizes, stackwise_num_repeats, stackwise_input_filters, @@ -128,6 +134,16 @@ def __init__( num_features=1280, **kwargs, ): + num_stacks = len(stackwise_kernel_sizes) + if "depth_coefficient" in kwargs: + stackwise_depth_coefficient = [ + kwargs.pop("depth_coefficient") + ] * num_stacks + if "width_coefficient" in kwargs: + stackwise_width_coefficient = [ + kwargs.pop("width_coefficient") + ] * num_stacks + image_input = keras.layers.Input(shape=input_shape) x = image_input # Intermediate result. @@ -140,7 +156,7 @@ def __init__( # Build stem stem_filters = round_filters( filters=stackwise_input_filters[0], - width_coefficient=width_coefficient, + width_coefficient=stackwise_width_coefficient[0], min_depth=min_depth, depth_divisor=depth_divisor, use_depth_divisor_as_min_depth=use_depth_divisor_as_min_depth, @@ -172,26 +188,19 @@ def __init__( self._pyramid_outputs = {} curr_pyramid_level = 1 - num_stacks = len(stackwise_kernel_sizes) - - if isinstance(depth_coefficient, tuple): - assert len(depth_coefficient) == num_stacks - else: - depth_coefficient = (depth_coefficient,) * num_stacks - dc_originally_scalar = True - for i in range(num_stacks): num_repeats = stackwise_num_repeats[i] input_filters = stackwise_input_filters[i] output_filters = stackwise_output_filters[i] force_input_filters = stackwise_force_input_filters[i] nores = stackwise_nores_option[i] - stack_depth_coefficient = depth_coefficient[i] + stack_width_coefficient = stackwise_width_coefficient[i] + stack_depth_coefficient = stackwise_depth_coefficient[i] # Update block input and output filters based on depth multiplier. input_filters = round_filters( filters=input_filters, - width_coefficient=width_coefficient, + width_coefficient=stack_width_coefficient, min_depth=min_depth, depth_divisor=depth_divisor, use_depth_divisor_as_min_depth=use_depth_divisor_as_min_depth, @@ -199,7 +208,7 @@ def __init__( ) output_filters = round_filters( filters=output_filters, - width_coefficient=width_coefficient, + width_coefficient=stack_width_coefficient, min_depth=min_depth, depth_divisor=depth_divisor, use_depth_divisor_as_min_depth=use_depth_divisor_as_min_depth, @@ -227,7 +236,7 @@ def __init__( if force_input_filters > 0: input_filters = round_filters( filters=force_input_filters, - width_coefficient=width_coefficient, + width_coefficient=stack_width_coefficient, min_depth=min_depth, depth_divisor=depth_divisor, use_depth_divisor_as_min_depth=use_depth_divisor_as_min_depth, @@ -254,44 +263,36 @@ def __init__( batch_norm_epsilon=batch_norm_epsilon, name=block_name, ) - elif stackwise_block_type in ("fused", "unfused"): - block = get_conv_constructor(stackwise_block_type)( - input_filters=input_filters, - output_filters=output_filters, - expand_ratio=stackwise_expansion_ratios[i], - kernel_size=stackwise_kernel_sizes[i], - strides=strides, - data_format=data_format, - se_ratio=squeeze_and_excite_ratio, - activation=activation, - dropout=dropout * block_id / blocks, - batch_norm_momentum=batch_norm_momentum, - batch_norm_epsilon=batch_norm_epsilon, - nores=nores, - name=block_name, - ) - x = block(x) - else: # cba block - block = ConvBNActBlock( - input_filters=input_filters, - output_filters=output_filters, - kernel_size=stackwise_kernel_sizes[i], - strides=strides, - data_format=data_format, - activation=activation, - dropout=dropout * block_id / blocks, - batch_norm_momentum=batch_norm_momentum, - batch_norm_epsilon=batch_norm_epsilon, - nores=nores, - name=block_name, - ) + else: + constructor = get_conv_constructor(stackwise_block_type) + block_kwargs = { + "input_filters": input_filters, + "output_filters": output_filters, + "kernel_size": stackwise_kernel_sizes[i], + "strides": strides, + "data_format": data_format, + "activation": activation, + "dropout": dropout * block_id / blocks, + "batch_norm_momentum": batch_norm_momentum, + "batch_norm_epsilon": batch_norm_epsilon, + "nores": nores, + "name": block_name, + } + + if stackwise_block_type in ("fused", "unfused"): + block_kwargs["expand_ratio"] = ( + stackwise_expansion_ratios[i] + ) + block_kwargs["se_ratio"] = squeeze_and_excite_ratio + + block = constructor(**block_kwargs) x = block(x) block_id += 1 # Build top top_filters = round_filters( filters=num_features, - width_coefficient=width_coefficient, + width_coefficient=stackwise_width_coefficient[-1], min_depth=min_depth, depth_divisor=depth_divisor, use_depth_divisor_as_min_depth=use_depth_divisor_as_min_depth, @@ -324,13 +325,8 @@ def __init__( super().__init__(inputs=image_input, outputs=x, **kwargs) # === Config === - self.width_coefficient = width_coefficient - - if dc_originally_scalar: - self.depth_coefficient = depth_coefficient[0] - else: - self.depth_coefficient = depth_coefficient - + self.stackwise_width_coefficient = stackwise_width_coefficient + self.stackwise_depth_coefficient = stackwise_depth_coefficient self.dropout = dropout self.depth_divisor = depth_divisor self.min_depth = min_depth @@ -360,8 +356,8 @@ def get_config(self): config = super().get_config() config.update( { - "width_coefficient": self.width_coefficient, - "depth_coefficient": self.depth_coefficient, + "stackwise_width_coefficient": self.stackwise_width_coefficient, + "stackwise_depth_coefficient": self.stackwise_depth_coefficient, "dropout": self.dropout, "depth_divisor": self.depth_divisor, "min_depth": self.min_depth, @@ -618,7 +614,7 @@ def get_conv_constructor(conv_type): elif conv_type == "fused": return FusedMBConvBlock elif conv_type == "cba": - return ConvBNActBlock + return CBABlock else: raise ValueError( "Expected `conv_type` to be " diff --git a/keras_hub/src/models/efficientnet/fusedmbconv.py b/keras_hub/src/models/efficientnet/fusedmbconv.py index 2d01c4663a..d0b7f79e70 100644 --- a/keras_hub/src/models/efficientnet/fusedmbconv.py +++ b/keras_hub/src/models/efficientnet/fusedmbconv.py @@ -44,6 +44,8 @@ class FusedMBConvBlock(keras.layers.Layer): convolutions strides: default 1, the strides to apply to the expansion phase convolutions + data_format: str, channels_last (default) or channels_first, expects + tensors to be of shape (N, H, W, C) or (N, C, H, W) respectively se_ratio: default 0.0, The filters used in the Squeeze-Excitation phase, and are chosen as the maximum between 1 and input_filters*se_ratio batch_norm_momentum: default 0.9, the BatchNormalization momentum @@ -54,6 +56,10 @@ class FusedMBConvBlock(keras.layers.Layer): convolution operations dropout: float, the optional dropout rate to apply before the output convolution, defaults to 0.2 + nores: bool, default False, forces no residual connection if True, + otherwise allows it if False. + projection_kernel_size: default 1, the kernel_size to apply to the + output projection phase convolution Returns: A tensor representing a feature map, passed through the FusedMBConv diff --git a/keras_hub/src/utils/timm/convert_efficientnet.py b/keras_hub/src/utils/timm/convert_efficientnet.py index c85a9efb2d..8d4600c47b 100644 --- a/keras_hub/src/utils/timm/convert_efficientnet.py +++ b/keras_hub/src/utils/timm/convert_efficientnet.py @@ -93,7 +93,7 @@ }, "rw_m": { "width_coefficient": 1.2, - "depth_coefficient": (1.2,) * 4 + (1.6,) * 2, + "stackwise_depth_coefficient": [1.2] * 4 + [1.6] * 2, "stackwise_kernel_sizes": [3, 3, 3, 3, 3, 3], "stackwise_num_repeats": [2, 4, 4, 6, 9, 15], "stackwise_input_filters": [24, 24, 48, 64, 128, 160], @@ -247,18 +247,14 @@ def port_batch_normalization(keras_layer, hf_weight_prefix): # Stages num_stacks = len(backbone.stackwise_kernel_sizes) - depth_coefficient = VARIANT_MAP[variant]["depth_coefficient"] - if isinstance(depth_coefficient, tuple): - assert len(depth_coefficient) == num_stacks - else: - depth_coefficient = (depth_coefficient,) * num_stacks - for stack_index in range(num_stacks): block_type = backbone.stackwise_block_types[stack_index] expansion_ratio = backbone.stackwise_expansion_ratios[stack_index] repeats = backbone.stackwise_num_repeats[stack_index] - stack_depth_coefficient = depth_coefficient[stack_index] + stack_depth_coefficient = backbone.stackwise_depth_coefficient[ + stack_index + ] repeats = int(math.ceil(stack_depth_coefficient * repeats)) From e1a597db14bcd39f0161c26a19ab092056d8acd0 Mon Sep 17 00:00:00 2001 From: Piseth Ky Date: Mon, 25 Nov 2024 14:46:00 -0800 Subject: [PATCH 16/17] add projection_activation argument to fused blocks to fix timm discrepancy --- .../models/efficientnet/efficientnet_backbone.py | 5 +++++ keras_hub/src/models/efficientnet/fusedmbconv.py | 14 ++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py index 1dd520e04e..755e2e021e 100644 --- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py +++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py @@ -285,6 +285,11 @@ def __init__( ) block_kwargs["se_ratio"] = squeeze_and_excite_ratio + if stackwise_block_type == "fused": + block_kwargs["projection_activation"] = ( + projection_activation + ) + block = constructor(**block_kwargs) x = block(x) block_id += 1 diff --git a/keras_hub/src/models/efficientnet/fusedmbconv.py b/keras_hub/src/models/efficientnet/fusedmbconv.py index d0b7f79e70..4a022d5fe2 100644 --- a/keras_hub/src/models/efficientnet/fusedmbconv.py +++ b/keras_hub/src/models/efficientnet/fusedmbconv.py @@ -54,6 +54,8 @@ class FusedMBConvBlock(keras.layers.Layer): by 0 errors. activation: default "swish", the activation function used between convolution operations + projection_activation: default None, the activation function to use + after the output projection convoultion dropout: float, the optional dropout rate to apply before the output convolution, defaults to 0.2 nores: bool, default False, forces no residual connection if True, @@ -81,6 +83,7 @@ def __init__( batch_norm_momentum=0.9, batch_norm_epsilon=1e-3, activation="swish", + projection_activation=None, dropout=0.2, nores=False, projection_kernel_size=1, @@ -97,6 +100,7 @@ def __init__( self.batch_norm_momentum = batch_norm_momentum self.batch_norm_epsilon = batch_norm_epsilon self.activation = activation + self.projection_activation = projection_activation self.dropout = dropout self.nores = nores self.projection_kernel_size = projection_kernel_size @@ -171,6 +175,11 @@ def __init__( name=self.name + "project_bn", ) + if self.projection_activation: + self.projection_act = keras.layers.Activation( + self.projection_activation, name=self.name + "projection_act" + ) + if self.dropout: self.dropout_layer = keras.layers.Dropout( self.dropout, @@ -213,8 +222,8 @@ def call(self, inputs): x = self.output_conv_pad(x) x = self.output_conv(x) x = self.bn2(x) - if self.expand_ratio == 1: - x = self.act(x) + if self.expand_ratio == 1 and self.projection_activation: + x = self.projection_act(x) # Residual: if ( @@ -239,6 +248,7 @@ def get_config(self): "batch_norm_momentum": self.batch_norm_momentum, "batch_norm_epsilon": self.batch_norm_epsilon, "activation": self.activation, + "projection_activation": self.projection_activation, "dropout": self.dropout, "nores": self.nores, "projection_kernel_size": self.projection_kernel_size, From 5ed664ff96424592ec3fd6f7ea79c7aa0bb53e98 Mon Sep 17 00:00:00 2001 From: Piseth Ky Date: Tue, 3 Dec 2024 15:13:46 -0800 Subject: [PATCH 17/17] additional review updates --- keras_hub/src/models/efficientnet/cba.py | 22 ++++---- .../efficientnet/efficientnet_backbone.py | 28 +++++----- .../src/models/efficientnet/fusedmbconv.py | 28 +++++----- keras_hub/src/models/efficientnet/mbconv.py | 30 ++++++----- .../src/utils/timm/convert_efficientnet.py | 54 +++++++++---------- 5 files changed, 84 insertions(+), 78 deletions(-) diff --git a/keras_hub/src/models/efficientnet/cba.py b/keras_hub/src/models/efficientnet/cba.py index 54fd8d87e9..4e145282aa 100644 --- a/keras_hub/src/models/efficientnet/cba.py +++ b/keras_hub/src/models/efficientnet/cba.py @@ -2,15 +2,6 @@ BN_AXIS = 3 -CONV_KERNEL_INITIALIZER = { - "class_name": "VarianceScaling", - "config": { - "scale": 2.0, - "mode": "fan_out", - "distribution": "truncated_normal", - }, -} - class CBABlock(keras.layers.Layer): """ @@ -75,7 +66,7 @@ def __init__( filters=self.output_filters, kernel_size=kernel_size, strides=strides, - kernel_initializer=CONV_KERNEL_INITIALIZER, + kernel_initializer=self._conv_kernel_initializer(), padding="valid", data_format=data_format, use_bias=False, @@ -98,6 +89,17 @@ def __init__( name=self.name + "drop", ) + def _conv_kernel_initializer( + self, + scale=2.0, + mode="fan_out", + distribution="truncated_normal", + seed=None, + ): + return keras.initializers.VarianceScaling( + scale=scale, mode=mode, distribution=distribution, seed=seed + ) + def build(self, input_shape): if self.name is None: self.name = keras.backend.get_uid("block0") diff --git a/keras_hub/src/models/efficientnet/efficientnet_backbone.py b/keras_hub/src/models/efficientnet/efficientnet_backbone.py index 755e2e021e..c71979ad0d 100644 --- a/keras_hub/src/models/efficientnet/efficientnet_backbone.py +++ b/keras_hub/src/models/efficientnet/efficientnet_backbone.py @@ -27,10 +27,10 @@ class EfficientNetBackbone(FeaturePyramidBackbone): (https://arxiv.org/abs/2104.00298) (ICML 2021) Args: - stackwise_width_coefficient: list[float] or float, scaling coefficient + stackwise_width_coefficients: list[float], scaling coefficient for network width. If single float, it is assumed that this value applies to all stacks. - stackwise_depth_coefficient: list[float] or float, scaling coefficient + stackwise_depth_coefficients: list[float], scaling coefficient for network depth. If single float, it is assumed that this value applies to all stacks. stackwise_kernel_sizes: list of ints, the kernel sizes used for each @@ -106,8 +106,8 @@ class EfficientNetBackbone(FeaturePyramidBackbone): def __init__( self, *, - stackwise_width_coefficient=None, - stackwise_depth_coefficient=None, + stackwise_width_coefficients=None, + stackwise_depth_coefficients=None, stackwise_kernel_sizes, stackwise_num_repeats, stackwise_input_filters, @@ -136,11 +136,11 @@ def __init__( ): num_stacks = len(stackwise_kernel_sizes) if "depth_coefficient" in kwargs: - stackwise_depth_coefficient = [ + stackwise_depth_coefficients = [ kwargs.pop("depth_coefficient") ] * num_stacks if "width_coefficient" in kwargs: - stackwise_width_coefficient = [ + stackwise_width_coefficients = [ kwargs.pop("width_coefficient") ] * num_stacks @@ -156,7 +156,7 @@ def __init__( # Build stem stem_filters = round_filters( filters=stackwise_input_filters[0], - width_coefficient=stackwise_width_coefficient[0], + width_coefficient=stackwise_width_coefficients[0], min_depth=min_depth, depth_divisor=depth_divisor, use_depth_divisor_as_min_depth=use_depth_divisor_as_min_depth, @@ -194,8 +194,8 @@ def __init__( output_filters = stackwise_output_filters[i] force_input_filters = stackwise_force_input_filters[i] nores = stackwise_nores_option[i] - stack_width_coefficient = stackwise_width_coefficient[i] - stack_depth_coefficient = stackwise_depth_coefficient[i] + stack_width_coefficient = stackwise_width_coefficients[i] + stack_depth_coefficient = stackwise_depth_coefficients[i] # Update block input and output filters based on depth multiplier. input_filters = round_filters( @@ -297,7 +297,7 @@ def __init__( # Build top top_filters = round_filters( filters=num_features, - width_coefficient=stackwise_width_coefficient[-1], + width_coefficient=stackwise_width_coefficients[-1], min_depth=min_depth, depth_divisor=depth_divisor, use_depth_divisor_as_min_depth=use_depth_divisor_as_min_depth, @@ -330,8 +330,8 @@ def __init__( super().__init__(inputs=image_input, outputs=x, **kwargs) # === Config === - self.stackwise_width_coefficient = stackwise_width_coefficient - self.stackwise_depth_coefficient = stackwise_depth_coefficient + self.stackwise_width_coefficients = stackwise_width_coefficients + self.stackwise_depth_coefficients = stackwise_depth_coefficients self.dropout = dropout self.depth_divisor = depth_divisor self.min_depth = min_depth @@ -361,8 +361,8 @@ def get_config(self): config = super().get_config() config.update( { - "stackwise_width_coefficient": self.stackwise_width_coefficient, - "stackwise_depth_coefficient": self.stackwise_depth_coefficient, + "stackwise_width_coefficients": self.stackwise_width_coefficients, + "stackwise_depth_coefficients": self.stackwise_depth_coefficients, "dropout": self.dropout, "depth_divisor": self.depth_divisor, "min_depth": self.min_depth, diff --git a/keras_hub/src/models/efficientnet/fusedmbconv.py b/keras_hub/src/models/efficientnet/fusedmbconv.py index 4a022d5fe2..01934b7622 100644 --- a/keras_hub/src/models/efficientnet/fusedmbconv.py +++ b/keras_hub/src/models/efficientnet/fusedmbconv.py @@ -2,15 +2,6 @@ BN_AXIS = 3 -CONV_KERNEL_INITIALIZER = { - "class_name": "VarianceScaling", - "config": { - "scale": 2.0, - "mode": "fan_out", - "distribution": "truncated_normal", - }, -} - class FusedMBConvBlock(keras.layers.Layer): """Implementation of the FusedMBConv block @@ -116,7 +107,7 @@ def __init__( filters=self.filters, kernel_size=kernel_size, strides=strides, - kernel_initializer=CONV_KERNEL_INITIALIZER, + kernel_initializer=self._conv_kernel_initializer(), padding="valid", data_format=data_format, use_bias=False, @@ -138,7 +129,7 @@ def __init__( padding="same", data_format=data_format, activation=self.activation, - kernel_initializer=CONV_KERNEL_INITIALIZER, + kernel_initializer=self._conv_kernel_initializer(), name=self.name + "se_reduce", ) @@ -148,7 +139,7 @@ def __init__( padding="same", data_format=data_format, activation="sigmoid", - kernel_initializer=CONV_KERNEL_INITIALIZER, + kernel_initializer=self._conv_kernel_initializer(), name=self.name + "se_expand", ) @@ -161,7 +152,7 @@ def __init__( filters=self.output_filters, kernel_size=projection_kernel_size, strides=1, - kernel_initializer=CONV_KERNEL_INITIALIZER, + kernel_initializer=self._conv_kernel_initializer(), padding="valid", data_format=data_format, use_bias=False, @@ -187,6 +178,17 @@ def __init__( name=self.name + "drop", ) + def _conv_kernel_initializer( + self, + scale=2.0, + mode="fan_out", + distribution="truncated_normal", + seed=None, + ): + return keras.initializers.VarianceScaling( + scale=scale, mode=mode, distribution=distribution, seed=seed + ) + def build(self, input_shape): if self.name is None: self.name = keras.backend.get_uid("block0") diff --git a/keras_hub/src/models/efficientnet/mbconv.py b/keras_hub/src/models/efficientnet/mbconv.py index 1cd9263c04..20afab4e85 100644 --- a/keras_hub/src/models/efficientnet/mbconv.py +++ b/keras_hub/src/models/efficientnet/mbconv.py @@ -2,15 +2,6 @@ BN_AXIS = 3 -CONV_KERNEL_INITIALIZER = { - "class_name": "VarianceScaling", - "config": { - "scale": 2.0, - "mode": "fan_out", - "distribution": "truncated_normal", - }, -} - class MBConvBlock(keras.layers.Layer): def __init__( @@ -99,7 +90,7 @@ def __init__( filters=self.filters, kernel_size=1, strides=1, - kernel_initializer=CONV_KERNEL_INITIALIZER, + kernel_initializer=self._conv_kernel_initializer(), padding="same", data_format=data_format, use_bias=False, @@ -117,7 +108,7 @@ def __init__( self.depthwise = keras.layers.DepthwiseConv2D( kernel_size=self.kernel_size, strides=self.strides, - depthwise_initializer=CONV_KERNEL_INITIALIZER, + depthwise_initializer=self._conv_kernel_initializer(), padding="same", data_format=data_format, use_bias=False, @@ -137,7 +128,7 @@ def __init__( padding="same", data_format=data_format, activation=self.activation, - kernel_initializer=CONV_KERNEL_INITIALIZER, + kernel_initializer=self._conv_kernel_initializer(), name=self.name + "se_reduce", ) @@ -147,7 +138,7 @@ def __init__( padding="same", data_format=data_format, activation="sigmoid", - kernel_initializer=CONV_KERNEL_INITIALIZER, + kernel_initializer=self._conv_kernel_initializer(), name=self.name + "se_expand", ) @@ -161,7 +152,7 @@ def __init__( filters=self.output_filters, kernel_size=projection_kernel_size, strides=1, - kernel_initializer=CONV_KERNEL_INITIALIZER, + kernel_initializer=self._conv_kernel_initializer(), padding="valid", data_format=data_format, use_bias=False, @@ -182,6 +173,17 @@ def __init__( name=self.name + "drop", ) + def _conv_kernel_initializer( + self, + scale=2.0, + mode="fan_out", + distribution="truncated_normal", + seed=None, + ): + return keras.initializers.VarianceScaling( + scale=scale, mode=mode, distribution=distribution, seed=seed + ) + def build(self, input_shape): if self.name is None: self.name = keras.backend.get_uid("block0") diff --git a/keras_hub/src/utils/timm/convert_efficientnet.py b/keras_hub/src/utils/timm/convert_efficientnet.py index 8d4600c47b..fcedb2ecd1 100644 --- a/keras_hub/src/utils/timm/convert_efficientnet.py +++ b/keras_hub/src/utils/timm/convert_efficientnet.py @@ -11,44 +11,44 @@ VARIANT_MAP = { "b0": { - "width_coefficient": 1.0, - "depth_coefficient": 1.0, + "stackwise_width_coefficients": [1.0] * 7, + "stackwise_depth_coefficients": [1.0] * 7, "stackwise_squeeze_and_excite_ratios": [0.25] * 7, }, "b1": { - "width_coefficient": 1.0, - "depth_coefficient": 1.1, + "stackwise_width_coefficients": [1.0] * 7, + "stackwise_depth_coefficients": [1.1] * 7, "stackwise_squeeze_and_excite_ratios": [0.25] * 7, }, "b2": { - "width_coefficient": 1.1, - "depth_coefficient": 1.2, + "stackwise_width_coefficients": [1.1] * 7, + "stackwise_depth_coefficients": [1.2] * 7, "stackwise_squeeze_and_excite_ratios": [0.25] * 7, }, "b3": { - "width_coefficient": 1.2, - "depth_coefficient": 1.4, + "stackwise_width_coefficients": [1.2] * 7, + "stackwise_depth_coefficients": [1.4] * 7, "stackwise_squeeze_and_excite_ratios": [0.25] * 7, }, "b4": { - "width_coefficient": 1.4, - "depth_coefficient": 1.8, + "stackwise_width_coefficients": [1.4] * 7, + "stackwise_depth_coefficients": [1.8] * 7, "stackwise_squeeze_and_excite_ratios": [0.25] * 7, }, "b5": { - "width_coefficient": 1.6, - "depth_coefficient": 2.2, + "stackwise_width_coefficients": [1.6] * 7, + "stackwise_depth_coefficients": [2.2] * 7, "stackwise_squeeze_and_excite_ratios": [0.25] * 7, }, "lite0": { - "width_coefficient": 1.0, - "depth_coefficient": 1.0, + "stackwise_width_coefficients": [1.0] * 7, + "stackwise_depth_coefficients": [1.0] * 7, "stackwise_squeeze_and_excite_ratios": [0] * 7, "activation": "relu6", }, "el": { - "width_coefficient": 1.2, - "depth_coefficient": 1.4, + "stackwise_width_coefficients": [1.2] * 6, + "stackwise_depth_coefficients": [1.4] * 6, "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5], "stackwise_num_repeats": [1, 2, 4, 5, 4, 2], "stackwise_input_filters": [32, 24, 32, 48, 96, 144], @@ -62,8 +62,8 @@ "activation": "relu", }, "em": { - "width_coefficient": 1.0, - "depth_coefficient": 1.1, + "stackwise_width_coefficients": [1.0] * 6, + "stackwise_depth_coefficients": [1.1] * 6, "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5], "stackwise_num_repeats": [1, 2, 4, 5, 4, 2], "stackwise_input_filters": [32, 24, 32, 48, 96, 144], @@ -77,8 +77,8 @@ "activation": "relu", }, "es": { - "width_coefficient": 1.0, - "depth_coefficient": 1.0, + "stackwise_width_coefficients": [1.0] * 6, + "stackwise_depth_coefficients": [1.0] * 6, "stackwise_kernel_sizes": [3, 3, 3, 5, 5, 5], "stackwise_num_repeats": [1, 2, 4, 5, 4, 2], "stackwise_input_filters": [32, 24, 32, 48, 96, 144], @@ -92,8 +92,8 @@ "activation": "relu", }, "rw_m": { - "width_coefficient": 1.2, - "stackwise_depth_coefficient": [1.2] * 4 + [1.6] * 2, + "stackwise_width_coefficients": [1.2] * 6, + "stackwise_depth_coefficients": [1.2] * 4 + [1.6] * 2, "stackwise_kernel_sizes": [3, 3, 3, 3, 3, 3], "stackwise_num_repeats": [2, 4, 4, 6, 9, 15], "stackwise_input_filters": [24, 24, 48, 64, 128, 160], @@ -108,8 +108,8 @@ "num_features": 1792, }, "rw_s": { - "width_coefficient": 1.0, - "depth_coefficient": 1.0, + "stackwise_width_coefficients": [1.0] * 6, + "stackwise_depth_coefficients": [1.0] * 6, "stackwise_kernel_sizes": [3, 3, 3, 3, 3, 3], "stackwise_num_repeats": [2, 4, 4, 6, 9, 15], "stackwise_input_filters": [24, 24, 48, 64, 128, 160], @@ -124,8 +124,8 @@ "num_features": 1792, }, "rw_t": { - "width_coefficient": 0.8, - "depth_coefficient": 0.9, + "stackwise_width_coefficients": [0.8] * 6, + "stackwise_depth_coefficients": [0.9] * 6, "stackwise_kernel_sizes": [3, 3, 3, 3, 3, 3], "stackwise_num_repeats": [2, 4, 4, 6, 9, 15], "stackwise_input_filters": [24, 24, 48, 64, 128, 160], @@ -252,7 +252,7 @@ def port_batch_normalization(keras_layer, hf_weight_prefix): block_type = backbone.stackwise_block_types[stack_index] expansion_ratio = backbone.stackwise_expansion_ratios[stack_index] repeats = backbone.stackwise_num_repeats[stack_index] - stack_depth_coefficient = backbone.stackwise_depth_coefficient[ + stack_depth_coefficient = backbone.stackwise_depth_coefficients[ stack_index ]