merge develop: fix conflict

uTensor · May 9, 2019 · 815b3d0 · 815b3d0
2 parents 0fc08d3 + afeba1e
commit 815b3d0
Show file tree

Hide file tree

Showing 9 changed files with 261 additions and 5 deletions.
diff --git a/utensor_cgen/backend/operators.py b/utensor_cgen/backend/operators.py
@@ -1,13 +1,13 @@
 # -*- coding:utf8 -*-
 import os
 
-import idx2numpy as idx2np
 import numpy as np
 
+import idx2numpy as idx2np
 from utensor_cgen.logger import logger
+from utensor_cgen.matcher import OpEqualityDelegate, _morphism
 from utensor_cgen.transformer.optimizer import RefCntOptimizer
 from utensor_cgen.utils import NamescopedKWArgsParser
-from utensor_cgen.matcher import OpEqualityDelegate, _morphism
 
 from .snippets import *  # pylint: disable=W0401,W0614
 
@@ -124,6 +124,7 @@ def __init__(self, op_info, **kwargs):
     to_eval = parser.get('to_eval', False)
     self._snippet = MaxOpSnippet(inputs, output, out_dtype, out_shape, ref_count, to_eval)
 
+
 @OperatorFactory.register
 class _MaxPool(_Operator):
 
@@ -228,6 +229,7 @@ def __init__(self, op_info, **kwargs):
                                     x_dtype, w_dtype, out_dtype,
                                     ref_count, to_eval)
 
+
 @OperatorFactory.register
 class _QuantizedMatMulOperator(_Operator):
 
@@ -249,6 +251,7 @@ def __init__(self, op_info, **kwargs):
                                              x_dtype, w_dtype, out_dtype, 
                                              ref_counts, to_eval)
 
+
 @OperatorFactory.register
 class _ReluOperator(_Operator):
 
@@ -312,6 +315,27 @@ def __init__(self, op_info, **kwargs):
                                           x_dtype, w_dtype, out_dtype, 
                                           ref_counts, to_eval)
 
+
+@OperatorFactory.register
+class _QuantizedMulOperator(_Operator):
+
+  op_type = "QuantizedMul"
+
+  def __init__(self, op_info, **kwargs):
+    _Operator.__init__(self)
+    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
+    outputs = [tensor_info.name for tensor_info in op_info.output_tensors]
+    x_dtype, w_dtype, out_dtype = (op_info.input_tensors[0].dtype,
+                                   op_info.input_tensors[1].dtype,
+                                   op_info.output_tensors[0].dtype)
+    parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
+                                    op_info.op_attr)
+    ref_counts = parser.get('ref_counts', [])
+    to_eval = parser.get('to_eval', False)
+    self._snippet = QuantizedMulOpSnippet(inputs, outputs, 
+                                          x_dtype, w_dtype, out_dtype, 
+                                          ref_counts, to_eval)
+
 
 @OperatorFactory.register
 class _RequantizationRangeOperator(_Operator):
@@ -385,6 +409,7 @@ def __init__(self, op_info, **kwargs):
                                               ref_counts=ref_counts,
                                               to_eval=to_eval)
 
+
 @OperatorFactory.register
 class _CMSIS_NN_FCOperator(_Operator):
 
@@ -410,6 +435,7 @@ def __init__(self, op_info, **kwargs):
                                               out_dtype=out_dtype,
                                               to_eval=to_eval)
 
+
 @OperatorFactory.register
 class _Conv2DOperator(_Operator):
 
@@ -432,6 +458,55 @@ def __init__(self, op_info, **kwargs):
                                      in_dtype=in_dtype, filter_dtype=filter_dtype, out_dtype=out_dtype,
                                      ref_count=ref_count, to_eval=to_eval)
 
+
+@OperatorFactory.register
+class _FusedConv2DMaxpoolOperator(_Operator):
+
+  op_type = "FusedConv2DMaxpool"
+
+  def __init__(self, op_info, **kwargs):
+    _Operator.__init__(self)
+    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
+    output = op_info.output_tensors[0].name
+    in_dtype, filter_dtype = (op_info.input_tensors[0].dtype,
+                              op_info.input_tensors[1].dtype)
+    out_dtype = op_info.output_tensors[0].dtype
+    strides = op_info.op_attr["strides"].value.ints_value
+    ksize = op_info.op_attr["ksize"].value.ints_value
+    padding = op_info.op_attr["padding"].value.decode('utf8')
+    parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
+                                    op_info.op_attr)
+    ref_count = parser.get('ref_counts', [0])[0]
+    to_eval = parser.get('to_eval', False)
+    self._snippet = FusedConv2DMaxpoolOpSnippet(inputs, output, strides, ksize, padding,
+                                     in_dtype=in_dtype, filter_dtype=filter_dtype, out_dtype=out_dtype,
+                                     ref_count=ref_count, to_eval=to_eval)
+
+
+@OperatorFactory.register
+class _QuantizedFusedConv2DMaxpoolOperator(_Operator):
+
+  op_type = "QuantizedFusedConv2DMaxpool"
+
+  def __init__(self, op_info, **kwargs):
+    _Operator.__init__(self)
+    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
+    output = op_info.output_tensors[0].name
+    in_dtype, filter_dtype = (op_info.input_tensors[0].dtype,
+                              op_info.input_tensors[1].dtype)
+    out_dtype = op_info.output_tensors[0].dtype
+    strides = op_info.op_attr["strides"].value.ints_value
+    ksize = op_info.op_attr["ksize"].value.ints_value
+    padding = op_info.op_attr["padding"].value.decode('utf8')
+    parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
+                                    op_info.op_attr)
+    ref_count = parser.get('ref_counts', [0])[0]
+    to_eval = parser.get('to_eval', False)
+    self._snippet = QuantizedFusedConv2DMaxpoolOpSnippet(inputs, output, strides, ksize, padding,
+                                     in_dtype=in_dtype, filter_dtype=filter_dtype, out_dtype=out_dtype,
+                                     ref_count=ref_count, to_eval=to_eval)
+
+
 @OperatorFactory.register
 class _Conv2DQuantOperator(_Operator):
 
@@ -453,6 +528,8 @@ def __init__(self, op_info, **kwargs):
     self._snippet = Conv2DQuantOpSnippent(inputs, outputs, strides, padding,
                                      in_dtype=in_dtype, filter_dtype=filter_dtype, out_dtypes=out_dtypes,
                                      ref_counts=ref_counts, to_eval=to_eval)
+
+
 @OperatorFactory.register
 class _Uint8Q7OriginOperator(_Operator):
 
@@ -489,6 +566,7 @@ def __init__(self, op_info, **kwargs):
     to_eval = parser.get('to_eval', False)
     self._snippet = QuantRangeForMultiplicationSnippet(inputs, outputs, output_type, ref_counts, to_eval)
 
+
 @OperatorFactory.register
 @OpEqualityDelegate.is_compatible_with("Const", _morphism.Inline2ConstMorphism)
 class _InlineOperator(_Operator):
@@ -528,6 +606,7 @@ def _prepare_inline_array_name(self, tensor_name):
     preapred = "inline_{}".format(inline)
     return preapred
 
+
 @OperatorFactory.register
 @OpEqualityDelegate.is_compatible_with("Inline", _morphism.Const2InlineMorphism)
 class _ConstOperator(_Operator):
@@ -568,6 +647,7 @@ def _tf_save_data(self, path, value):
       idx2np.convert_to_file(fid, np_array)
     logger.info("saving %s", path)
 
+
 @OperatorFactory.register
 class _RamOperator(_Operator):
 
@@ -593,6 +673,7 @@ def _prepare_tensor_name(self, tensor_name):
     prepared = tensor_name.replace(":", "_").replace("/", "_")
     return prepared
 
+
 @OperatorFactory.register
 class _ShapeOperator(_Operator):
     op_type = "Shape"
@@ -633,6 +714,7 @@ def __init__(self, op_info, **kwargs):
                                               new_axis_mask, shrink_axis_mask,
                                               ref_count, to_eval)
 
+
 @OperatorFactory.register
 class _PackOperator(_Operator):
     op_type = "Pack"
@@ -665,3 +747,20 @@ def __init__(self, op_info, **kwargs):
         to_eval = parser.get('to_eval', True)
         out_dtype = op_info.output_tensors[0].dtype
         self._snippet = SoftmaxOpSnippet(inputs, output, out_dtype, ref_count, to_eval)
+
+
+@OperatorFactory.register
+class _GatherOperator(_Operator):
+
+  op_type = "Gather" # tf op type
+
+  def __init__(self, op_info, **kwargs):
+    _Operator.__init__(self)
+    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
+    output = op_info.output_tensors[0].name
+    tf_dtype = op_info.input_tensors[0].dtype
+    parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE, 
+                                    op_info.op_attr)
+    ref_count = parser.get('ref_counts', [0])[0]
+    to_eval = parser.get('to_eval', False)
+    self._snippet = GatherOpSnippet(inputs, output, tf_dtype, ref_count, to_eval)
diff --git a/utensor_cgen/backend/snippets/_snippets.py b/utensor_cgen/backend/snippets/_snippets.py
@@ -17,8 +17,11 @@
            "RequantizationRangeOpSnippet", "RequantizeOpSnippet",
            "CommentSnippet", "ContextHeaderSnippet",
            "ContextSnippetsContainer", "QuantizedAddOpSnippet",
+           "QuantizedMulOpSnippet",
            "CreateTensorBinarySnippet", "WeightSnippet",
            "ContextGlobalArrayContainer", "QuantRangeForMultiplicationSnippet",
+           "FusedConv2DOpMaxpoolSnippet", "QuantizedFusedConv2DOpMaxpoolSnippet",
+           "GatherOpSnippet",
            "CreateTensorRamSnippet", "Uint8Q7OriginSnippet"]
 
 # TODO: Better abstraction, i.e a better backend for code generation
@@ -371,6 +374,31 @@ def __init__(self, inputs, outputs, x_dtype, w_dtype, out_dtype,
     self.template_vars["out_dtype"] = NP_TYPES_MAP[out_dtype].tensor_type_str
     self.template_vars["to_eval"] = to_eval
 
+class QuantizedMulOpSnippet(Snippet):
+  __template_name__ = "snippets/qmul_op.cpp"
+  __headers__ = set(['"uTensor/ops/MathOps.hpp"'])
+
+  def __init__(self, inputs, outputs, x_dtype, w_dtype, out_dtype,
+               ref_counts=None,
+               to_eval=False):
+    Snippet.__init__(self)
+    if ref_counts is None:
+      ref_counts = []
+    # hack on different arguments order between tensorflow and uTensor
+    inputs = _permute_args(inputs, [0, 2, 3, 1, 4, 5])
+    if ref_counts:
+      err_msg = ("incorrect number of ref_counts and outputs: {}, {}"
+                 .format(ref_counts, outputs))
+      assert len(ref_counts) == len(outputs), err_msg
+      self.template_vars['ref_counts'] = ref_counts
+
+    self.template_vars["inputs"] = inputs
+    self.template_vars["outputs"] = outputs
+    self.template_vars["x_dtype"] = NP_TYPES_MAP[x_dtype].tensor_type_str
+    self.template_vars["w_dtype"] = NP_TYPES_MAP[w_dtype].tensor_type_str
+    self.template_vars["out_dtype"] = NP_TYPES_MAP[out_dtype].tensor_type_str
+    self.template_vars["to_eval"] = to_eval
+
 
 class QuantizeV2OpSnippet(Snippet):
   __template_name__ = "snippets/quantV2_op.cpp"
@@ -616,6 +644,48 @@ def __init__(self, inputs, output, strides, padding,
     self.template_vars["padding"] = padding
     self.template_vars["to_eval"] = to_eval
 
+class FusedConv2DOpMaxpoolSnippet(Snippet):
+  __template_name__ = "snippets/fused_conv2d_maxpool_op.cpp"
+  __headers__ = set(['"uTensor/ops/MatrixOps.hpp"'])
+
+  def __init__(self, inputs, output, strides, ksize, padding,
+               in_dtype, filter_dtype, out_dtype,
+               ref_count=0,
+               to_eval=False):
+    Snippet.__init__(self)
+    if ref_count:
+      self.template_vars["ref_count"] = ref_count
+    self.template_vars["inputs"] = inputs
+    self.template_vars["output"] = output
+    self.template_vars["in_dtype"] = NP_TYPES_MAP[in_dtype].tensor_type_str
+    self.template_vars["filter_dtype"] = NP_TYPES_MAP[filter_dtype].tensor_type_str
+    self.template_vars["out_dtype"] = NP_TYPES_MAP[out_dtype].tensor_type_str
+    self.template_vars["strides"] = strides
+    self.template_vars["ksize"] = ksize
+    self.template_vars["padding"] = padding
+    self.template_vars["to_eval"] = to_eval
+
+class QuantizedFusedConv2DOpMaxpoolSnippet(Snippet):
+  __template_name__ = "snippets/fused_conv2d_maxpool_op.cpp"
+  __headers__ = set(['"uTensor/ops/MatrixOps.hpp"'])
+
+  def __init__(self, inputs, output, strides, ksize, padding,
+               in_dtype, filter_dtype, out_dtype,
+               ref_count=0,
+               to_eval=False):
+    Snippet.__init__(self)
+    if ref_count:
+      self.template_vars["ref_count"] = ref_count
+    self.template_vars["inputs"] = inputs
+    self.template_vars["output"] = output
+    self.template_vars["in_dtype"] = NP_TYPES_MAP[in_dtype].tensor_type_str
+    self.template_vars["filter_dtype"] = NP_TYPES_MAP[filter_dtype].tensor_type_str
+    self.template_vars["out_dtype"] = NP_TYPES_MAP[out_dtype].tensor_type_str
+    self.template_vars["strides"] = strides
+    self.template_vars["ksize"] = ksize
+    self.template_vars["padding"] = padding
+    self.template_vars["to_eval"] = to_eval
+
 class Conv2DQuantOpSnippent(Snippet):
   __template_name__ = "snippets/qconv2d_op.cpp"
   __headers__ = set(['"uTensor/ops/MatrixOps.hpp"'])
@@ -738,3 +808,19 @@ def __init__(self,
     self.template_vars["ref_counts"] = ref_counts
     self.add_header('"{}"'.format(ctx_header_name))
     self.add_header('"{}"'.format(ctx_weightheader_name))
+
+class GatherOpSnippet(Snippet):
+  __template_name__ = "snippets/gather_op.cpp"
+  __headers__ = set(['"uTensor/ops/ArrayOps.hpp"'])
+
+  def __init__(self, inputs, output, np_dtype,
+               ref_count=0,
+               to_eval=False):
+    Snippet.__init__(self)
+    if ref_count:
+      self.template_vars["ref_count"] = ref_count
+    self.template_vars["in_dtype"] = NP_TYPES_MAP[np_dtype].tensor_type_str
+    self.template_vars["out_dtype"] = NP_TYPES_MAP[np_dtype].tensor_type_str
+    self.template_vars["inputs"] = inputs
+    self.template_vars["output"] = output
+    self.template_vars["to_eval"] = to_eval
diff --git a/utensor_cgen/backend/snippets/templates/snippets/fused_conv2d_maxpool_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/fused_conv2d_maxpool_op.cpp
@@ -0,0 +1,14 @@
+{
+    {% if ref_count %}
+    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}", {{ref_count}});
+    {% else %}
+    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}");
+    {% endif %}
+    ctx.push(new FusedConvMaxpoolOp<{{in_dtype}}, {{filter_dtype}}, {{out_dtype}}>({ {% for s in strides[:-1]%}{{s}}, {%endfor%}{{strides[-1]}} }, { {% for s in ksize[:-1]%}{{s}}, {%endfor%}{{ksize[-1]}} },{{padding}}),
+             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" },
+             { "{{output}}"});
+    {% if to_eval %}
+    ctx.eval();
+    {% endif %}
+}
+
diff --git a/utensor_cgen/backend/snippets/templates/snippets/gather_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/gather_op.cpp
@@ -0,0 +1,16 @@
+{% if create_sptr %}
+S_TENSOR {{sptr_name}};
+{% endif %}
+{
+    {% if ref_count %}
+    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}", {{ref_count}});
+    {% else %}
+    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}");
+    {% endif %}
+    ctx.push(new GatherOp<{{in_dtype}}, {{out_dtype}}>(),
+             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" }, 
+             { "{{output}}" });
+    {% if to_eval %}
+    ctx.eval();
+    {% endif %}
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/qmul_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/qmul_op.cpp
@@ -0,0 +1,23 @@
+{% if create_sptr %}
+S_TENSOR {%for sptr_name in sptr_names[:-1]%}{{sptr_name}}, {%endfor%} {{sptr_names[-1]}};
+{% endif %}
+{
+    {% if ref_counts %}
+    ctx.add(new RamTensor<{{out_dtype}}>(), "{{outputs[0]}}", {{ref_counts[0]}});
+    ctx.add(new RamTensor<float>({1}), "{{outputs[1]}}", {{ref_counts[1]}});
+    ctx.add(new RamTensor<float>({1}), "{{outputs[2]}}", {{ref_counts[2]}});
+    {% else %}
+    ctx.add(new RamTensor<{{out_dtype}}>(), "{{outputs[0]}}");
+    ctx.add(new RamTensor<float>({1}), "{{outputs[1]}}");
+    ctx.add(new RamTensor<float>({1}), "{{outputs[2]}}");
+    {% endif %}
+    ctx.push(new QuantizedMulOp<{{x_dtype}}, {{w_dtype}}, {{out_dtype}}>(), 
+             { {%for tname in inputs[:-1] %}"{{tname}}", {% endfor %} "{{inputs[-1]}}" },
+             { {%for tname in outputs[:-1] %}"{{tname}}", {% endfor %} "{{outputs[-1]}}" });
+    {% for sptr_name, output in zip(sptr_names, outputs) %}
+    {{sptr_name}} = ctx.get("{{output}}");
+    {% endfor %}
+    {% if to_eval %}
+    ctx.eval();
+    {% endif %}
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/quantized_fused_conv2d_maxpool_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/quantized_fused_conv2d_maxpool_op.cpp
@@ -0,0 +1,18 @@
+{
+    {% if ref_counts %}
+    ctx.add(new RamTensor<{{out_dtypes[0]}}>(), "{{outputs[0]}}", {{ref_counts[0]}});
+    ctx.add(new RamTensor<{{out_dtypes[1]}}>({1}), "{{outputs[1]}}", {{ref_counts[1]}});
+    ctx.add(new RamTensor<{{out_dtypes[2]}}>({1}), "{{outputs[2]}}", {{ref_counts[2]}});
+    {% else %}
+    ctx.add(new RamTensor<{{out_dtypes[0]}}>(), "{{outputs[0]}}");
+    ctx.add(new RamTensor<{{out_dtypes[1]}}>({1}), "{{outputs[1]}}");
+    ctx.add(new RamTensor<{{out_dtypes[2]}}>({1}), "{{outputs[2]}}");
+    {% endif %}
+    ctx.push(new QuantizedFusedConvMaxpoolOp<{{in_dtype}}, {{filter_dtype}}, {{out_dtype}}>({ {% for s in strides[:-1]%}{{s}}, {%endfor%}{{strides[-1]}} }, { {% for s in ksize[:-1]%}{{s}}, {%endfor%}{{ksize[-1]}} },{{padding}}),
+             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" },
+             { {% for tname in outputs[:-1]%}"{{tname}}", {%endfor%}"{{outputs[-1]}}" });
+    {% if to_eval %}
+    ctx.eval();
+    {% endif %}
+}
+