Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Support for ConvTranspose Layers (1D and 2D) #644

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 165 additions & 0 deletions hls4ml/backends/fpga/fpga_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
Activation,
BatchNormalization,
Conv1D,
Conv1DTranspose,
Conv2D,
Conv2DTranspose,
Dense,
Dot,
Embedding,
Expand Down Expand Up @@ -52,7 +54,9 @@ def __init__(self, name):
accum_layers = [
Dense,
Conv1D,
Conv1DTranspose,
Conv2D,
Conv2DTranspose,
SeparableConv1D,
SeparableConv2D,
Pooling1D,
Expand Down Expand Up @@ -158,6 +162,22 @@ def get_layer_mult_size(self, layer):
n_out = layer.get_attr('n_out')
return n_in, n_out

if 'Conv1DTranspose' in layer.class_name:
trfilt_width = (layer.get_attr('filt_width') + layer.get_attr('stride_width') - 1) \
// layer.get_attr('stride_width')
n_in = layer.get_attr('n_chan') * trfilt_width
n_out = layer.get_attr('n_filt')
return n_in, n_out

if 'Conv2DTranspose' in layer.class_name:
trfilt_width = (layer.get_attr('filt_width') + layer.get_attr('stride_width') - 1) \
// layer.get_attr('stride_width')
trfilt_height = (layer.get_attr('filt_height') + layer.get_attr('stride_height') - 1) \
// layer.get_attr('stride_height')
n_in = layer.get_attr('n_chan') * trfilt_height * trfilt_width
n_out = layer.get_attr('n_filt')
return n_in, n_out

if 'Conv1D' in layer.class_name:
n_in = layer.get_attr('n_chan') * layer.get_attr('filt_width')
n_out = layer.get_attr('n_filt')
Expand Down Expand Up @@ -713,7 +733,67 @@ def generate_conv1d_line_buffer_fn(self, layer_idx, n_partitions, in_W, in_C, ke
" ) {{\n"
).format(index=layer_idx)
indent = ' '
for partition_idx, partition in enumerate(np.split(im2col_matrix, n_partitions)):
generated_code += indent * 2 + 'if (partition == {:>3}) {{\n'.format(partition_idx)
for pixel_idx, arr in enumerate(partition):
buffer_stmts = []
for j, v in enumerate(arr):
if v == 0:
val = '0'
else:
val = 'data[{}]'.format(int(v-1))
buffer_stmts.append('buffer[{}][{}] = {:>10};'.format(pixel_idx, j, val))
generated_code += indent * 3 + ' '.join(buffer_stmts) + '\n'
generated_code += '\n' + indent * 2 + '}\n'

generated_code += indent + '}\n'
generated_code += '};\n'

return generated_code

def _compute_conv1d_tr_im2col(self, input_shape, out_w, kernel=3, stride=1):
W, C = input_shape

tr_kernel = (kernel+stride-1)//stride

input_img = np.arange(1, W * C + 1)
im_matrix = np.zeros((tr_kernel * C * out_w, ))

index = 0
for i_ow in range(out_w):
for i_kw in range(tr_kernel):
for i_c in range(C):
# input column is just the output column shifted
input_col = i_ow - (tr_kernel-1) + i_kw
if (input_col >= 0 and input_col < W):
im_matrix[index] = input_img[input_col * C + i_c]
else:
im_matrix[index] = 0
index += 1
im_matrix = im_matrix.reshape(out_w, -1)
return im_matrix


def generate_conv1d_tr_line_buffer_fn(self, layer_idx, n_partitions, in_W, in_C, out_W, kernel=3, stride=1):

im2col_matrix = self._compute_conv1d_tr_im2col(
(in_W, in_C),
out_W,
kernel,
stride,
)

generated_code = (
"template<class data_T, typename CONFIG_T>\n"
"class fill_buffer_{index} : public FillConv1DBuffer<data_T, CONFIG_T> {{\n"
" public:\n"
" static void fill_buffer(\n"
" data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],\n"
" data_T buffer[CONFIG_T::n_pixels][CONFIG_T::trfilt_width * CONFIG_T::n_chan],\n"
" const unsigned partition\n"
" ) {{\n"
).format(index=layer_idx)
indent = ' '
for partition_idx, partition in enumerate(np.split(im2col_matrix, n_partitions)):
generated_code += indent * 2 + f'if (partition == {partition_idx:>3}) {{\n'
for pixel_idx, arr in enumerate(partition):
Expand Down Expand Up @@ -862,6 +942,91 @@ def generate_conv2d_line_buffer_fn(

return generated_code

def _compute_conv2d_tr_im2col(self, input_shape, out_shape, kernel=(3, 3), stride=(1, 1)):
H, W, C = input_shape
kernel_h, kernel_w = kernel
stride_h, stride_w = stride
out_h, out_w = out_shape

tr_kernel_h = (kernel_h+stride_h-1)//stride_h
tr_kernel_w = (kernel_w+stride_w-1)//stride_w

input_img = np.arange(1, H * W * C + 1)
im_matrix = np.zeros((tr_kernel_h * tr_kernel_w * C * out_h * out_w, ))

index = 0
for i_oh in range(out_h):
for i_ow in range(out_w):
for i_kh in range(tr_kernel_h):
input_row = i_oh - (tr_kernel_h-1) + i_kh
for i_kw in range(tr_kernel_w):
for i_c in range(C):
if (input_row < 0 or input_row >= H):
im_matrix[index] = 0
else:
input_col = i_ow - (tr_kernel_w-1) + i_kw
if (input_col >= 0 and input_col < W):
im_matrix[index] = input_img[input_row * W * C + input_col * C + i_c]
else:
im_matrix[index] = 0
index += 1

im_matrix = im_matrix.reshape(out_h * out_w, -1)
return im_matrix


def generate_conv2d_tr_line_buffer_fn(self, layer_idx, n_partitions, in_H, in_W, in_C, out_H, out_W, kernel=(3, 3), stride=(1, 1)):
if isinstance(kernel, Iterable):
kernel_height = kernel[0]
kernel_width = kernel[1]
else:
kernel_height = kernel
kernel_width = kernel

if isinstance(stride, Iterable):
stride_height = stride[0]
stride_width = stride[1]
else:
stride_height = stride
stride_width = stride

im2col_matrix = self._compute_conv2d_tr_im2col(
(in_H, in_W, in_C),
(out_W, out_W),
(kernel_height, kernel_width),
(stride_height, stride_width),
)

generated_code = (
"template<class data_T, typename CONFIG_T>\n"
"class fill_buffer_{index} : public FillConv2DBuffer<data_T, CONFIG_T> {{\n"
" public:\n"
" static void fill_buffer(\n"
" data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_T::n_chan],\n"
" data_T buffer[CONFIG_T::n_pixels][CONFIG_T::trfilt_height * CONFIG_T::trfilt_width * CONFIG_T::n_chan],\n"
" const unsigned partition\n"
" ) {{\n"
).format(index=layer_idx)
indent = ' '

for partition_idx, partition in enumerate(np.split(im2col_matrix, n_partitions)):
generated_code += indent * 2 + 'if (partition == {:>3}) {{\n'.format(partition_idx)
for pixel_idx, arr in enumerate(partition):
buffer_stmts = []
for j, v in enumerate(arr):
if v == 0:
val = '0'
else:
val = 'data[{}]'.format(int(v-1))
buffer_stmts.append('buffer[{}][{}] = {:>10};'.format(pixel_idx, j, val))
generated_code += indent * 3 + ' '.join(buffer_stmts) + '\n'
generated_code += '\n' + indent * 2 + '}\n'

generated_code += indent + '}\n'
generated_code += '};\n'

return generated_code

@model_optimizer()
def write_hls(self, model):
self.writer.write_hls(model)
Expand Down
9 changes: 9 additions & 0 deletions hls4ml/backends/fpga/fpga_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,15 @@ def __init__(self, type_converter):

class StaticWeightVariableDefinition(VariableDefinition):
def definition_cpp(self, name_suffix='', as_reference=False):
if self.keep_dims > 0:
size_str = ''
for dim in range(self.keep_dims):
size_str += '[{cur_dim}]'.format(cur_dim=self.shape[dim])
final_dim = 1
for dim in range(self.keep_dims, len(self.shape)):
final_dim *= self.shape[dim]
size_str += '[{last_dim}]'.format(last_dim=final_dim)
return '{type} {name}{sizes}'.format(type=self.type.name, name=self.name, sizes=size_str)
return '{type} {name}[{size}]'.format(type=self.type.name, name=self.name, size=self.data_length)

class StaticWeightVariableConverter(object):
Expand Down
38 changes: 35 additions & 3 deletions hls4ml/backends/fpga/passes/codegen.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
from hls4ml.model.optimizer import OptimizerPass
from hls4ml.model.layers import Conv1D, Conv2D
from hls4ml.model.layers import Conv1D, Conv2D, Conv1DTranspose, Conv2DTranspose
from hls4ml.model.types import Source

class GenerateConvIm2col(OptimizerPass):
''' Generates tcode for im2col step of 1D/2d convolution '''
def match(self, node):
return isinstance(node, (Conv1D, Conv2D)) and \
return isinstance(node, (Conv1D, Conv2D, Conv1DTranspose, Conv2DTranspose)) and \
node.model.config.get_config_value('IOType') == 'io_parallel'

def transform(self, model, node):
node_class = node.__class__.__name__
if '1D' in node_class:
if '1DTranspose' in node_class:
self._generate_im2col_1d_transpose(node)
elif '1D' in node_class:
self._generate_im2col_1d(node)
elif '2DTranspose' in node_class:
self._generate_im2col_2d_transpose(node)
elif '2D' in node_class:
self._generate_im2col_2d(node)
else:
Expand All @@ -30,6 +34,19 @@ def _generate_im2col_1d(self, node):

node.set_attr('line_buffer_codegen', Source(code_str))

def _generate_im2col_1d_transpose(self, node):
code_str = node.model.config.backend.generate_conv1d_tr_line_buffer_fn(
node.get_attr('index'),
node.get_attr('n_partitions'),
node.get_input_variable().shape[0],
node.get_input_variable().shape[1],
node.get_attr('proc_width'),
kernel=node.get_attr('filt_width'),
stride=node.get_attr('stride_width'),
)

node.set_attr('line_buffer_codegen', Source(code_str))

def _generate_im2col_2d(self, node):
code_str = node.model.config.backend.generate_conv2d_line_buffer_fn(
node.get_attr('index'),
Expand All @@ -43,3 +60,18 @@ def _generate_im2col_2d(self, node):
)

node.set_attr('line_buffer_codegen', Source(code_str))

def _generate_im2col_2d_transpose(self, node):
code_str = node.model.config.backend.generate_conv2d_tr_line_buffer_fn(
node.get_attr('index'),
node.get_attr('n_partitions'),
node.get_input_variable().shape[0],
node.get_input_variable().shape[1],
node.get_input_variable().shape[2],
node.get_attr('proc_height'),
node.get_attr('proc_width'),
kernel=(node.get_attr('filt_height'), node.get_attr('filt_width')),
stride=(node.get_attr('stride_height'), node.get_attr('stride_width')),
)

node.set_attr('line_buffer_codegen', Source(code_str))
Loading