From 556e0bcf66cabfcc5bf2bf11f3d8981981c0baa9 Mon Sep 17 00:00:00 2001
From: Neil Tan <neil.c.tan@gmail.com>
Date: Sat, 4 May 2019 23:41:24 +0800
Subject: [PATCH 01/17] linear_reorder: wip fails at quantization: missing
 attribute

---
 utensor_cgen/backend/code_generator.py      |  2 +
 utensor_cgen/cli.py                         |  2 +-
 utensor_cgen/experimental/ugraph_builder.py | 66 ++++++++++++++++--
 utensor_cgen/experimental/ugraph_matcher.py | 12 ++++
 utensor_cgen/ir/base.py                     |  8 ++-
 utensor_cgen/transformer/linear_reoder.py   | 77 +++++++++++++++++++++
 utensor_cgen/transformer/pipline.py         |  2 +
 7 files changed, 159 insertions(+), 10 deletions(-)
 create mode 100644 utensor_cgen/transformer/linear_reoder.py

diff --git a/utensor_cgen/backend/code_generator.py b/utensor_cgen/backend/code_generator.py
index f01be1d2..dc0fc5ab 100644
--- a/utensor_cgen/backend/code_generator.py
+++ b/utensor_cgen/backend/code_generator.py
@@ -81,6 +81,8 @@ def formatter(name, kwargs):
     )
     quant_ugraph = self._transform_graph(ugraph,
                                          self.trans_methods)
+    from utensor_cgen.ir.misc.graph_viz import viz_graph
+    viz_graph('out_graph', True, quant_ugraph)
     _logger.info('Graph transormation done')
 
     if self.save_graph:
diff --git a/utensor_cgen/cli.py b/utensor_cgen/cli.py
index a03b7ce4..7a2e948e 100644
--- a/utensor_cgen/cli.py
+++ b/utensor_cgen/cli.py
@@ -46,7 +46,7 @@ def cli():
               help="list of output nodes")
 @click.option("--transform-methods",
               type=NArgsKwargsParam(sep='|>'),
-              default='dropout|>quantize|>inline|>biasAdd|>remove_id_op|>refcnt',
+              default='dropout|>linear_reoder|>quantize|>inline|>biasAdd|>remove_id_op|>refcnt',
               help='optimization pipeline',
               metavar='METHOD[|>METHOD|>...]',
               show_default=True)
diff --git a/utensor_cgen/experimental/ugraph_builder.py b/utensor_cgen/experimental/ugraph_builder.py
index 09f5389e..a5545b95 100644
--- a/utensor_cgen/experimental/ugraph_builder.py
+++ b/utensor_cgen/experimental/ugraph_builder.py
@@ -13,7 +13,8 @@
 from utensor_cgen.experimental.ugraph_util_functions import *
 
 
-__all__ = ["transpose_offline", "Const_Op", "Ram_Op", "Const_Reshape", "Uint8Q7Origin_Op", "CMSIS_FC_Op", "QuantRangeForMultiplicationu8u8int32_Op"]
+__all__ = ["transpose_offline", "Const_Op", "Ram_Op", "Const_Reshape", "Uint8Q7Origin_Op", "CMSIS_FC_Op", "QuantRangeForMultiplicationu8u8int32_Op",
+            "conv2d_op", "relu_op", "maxpool_op"]
 
 # Let us get unique names for custom injected nodes
 def static_vars(**kwargs):
@@ -67,7 +68,7 @@ def Const_Op(name, np_array, ugraph):
                           ugraph=tmp_graph,
                           op_attr=bs_ops_attr(np_array)
                           )
-  ugraph.add_op(const_op_info)
+  ugraph.add_op(const_op_info, False)
 
   return const_op_info.output_tensors
 
@@ -99,7 +100,7 @@ def Reshape_Op(name, input_tensor, shape_tensor, ugraph):
                           ugraph=tmp_ugraph
                           )
 
-  ugraph.add_op(reshape_opInfo)
+  ugraph.add_op(reshape_opInfo, False)
 
   return reshape_opInfo.output_tensors
 
@@ -130,7 +131,7 @@ def Uint8Q7Origin_Op(name, inputs, ugraph):
   
   # if(name == 'convert_uint8_q7_Relu/eightbit_transpose_0_q7'):
   # import pdb; pdb.set_trace()
-  ugraph.add_op(q7_op_info)
+  ugraph.add_op(q7_op_info, False)
 
   return q7_op_info.output_tensors
 
@@ -159,7 +160,7 @@ def CMSIS_FC_Op(name, in0, in1, bias, bShift, oShift, scratch, ugraph):
                           ugraph=tmp_ugraph
                           )
                           
-  ugraph.add_op(fc_op_info)
+  ugraph.add_op(fc_op_info, False)
   return fc_op_info.output_tensors
 
 def QuantRangeForMultiplicationu8u8int32_Op(name, a_range, b_range, ugraph):
@@ -186,6 +187,57 @@ def QuantRangeForMultiplicationu8u8int32_Op(name, a_range, b_range, ugraph):
                     ugraph=tmp_ugraph
                     )
 
-  ugraph.add_op(new_range_op_info)
+  ugraph.add_op(new_range_op_info, False)
 
-  return new_range_op_info.output_tensors
\ No newline at end of file
+  return new_range_op_info.output_tensors
+
+def conv2d_op(name, inputs, ugraph):
+  tmp_ugraph = uTensorGraph(output_nodes=[name])
+  conv_out = TensorInfo(name=name + ":0",
+                    op_name=name,
+                    dtype=np.dtype('float32'),
+                    shape=inputs[0].shape,
+                    ugraph=tmp_ugraph
+                    )
+  conv2d_op_info = OperationInfo(name=name,
+                        input_tensors=inputs,
+                        output_tensors=[conv_out],
+                        op_type="Conv2D",
+                        backend="tensorflow",
+                        ugraph=tmp_ugraph)
+  
+  ugraph.add_op(conv2d_op_info, False)
+  return conv2d_op_info.output_tensors
+def relu_op(name, inputs, ugraph):
+  tmp_ugraph = uTensorGraph(output_nodes=[name])
+  relu_out = TensorInfo(name=name + ":0",
+                    op_name=name,
+                    dtype=np.dtype('float32'),
+                    shape=inputs[0].shape,
+                    ugraph=tmp_ugraph
+                    )
+  relu_op_info = OperationInfo(name=name,
+                        input_tensors=inputs,
+                        output_tensors=[relu_out],
+                        op_type="Relu",
+                        backend="tensorflow",
+                        ugraph=tmp_ugraph)
+  
+  ugraph.add_op(relu_op_info, False)
+  return relu_op_info.output_tensors
+def maxpool_op(name, inputs, ugraph):
+  tmp_ugraph = uTensorGraph(output_nodes=[name])
+  max_out = TensorInfo(name=name + ":0",
+                    op_name=name,
+                    dtype=np.dtype('float32'),
+                    shape=inputs[0].shape,
+                    ugraph=tmp_ugraph
+                    )
+  max_op_info = OperationInfo(name=name,
+                        input_tensors=inputs,
+                        output_tensors=[max_out],
+                        op_type="MaxPool",
+                        backend="tensorflow",
+                        ugraph=tmp_ugraph)
+  ugraph.add_op(max_op_info, False)
+  return max_op_info.output_tensors
\ No newline at end of file
diff --git a/utensor_cgen/experimental/ugraph_matcher.py b/utensor_cgen/experimental/ugraph_matcher.py
index 2a06218b..4bc49142 100644
--- a/utensor_cgen/experimental/ugraph_matcher.py
+++ b/utensor_cgen/experimental/ugraph_matcher.py
@@ -40,6 +40,9 @@ def get_ops_io_info(self, op_type):
     ops_io_table["Const"] =               [None, [0]]
     ops_io_table["Placeholder"] =         [None, [0]]
     ops_io_table["Inline"] =              [None, [0]]
+    ops_io_table["MaxPool"] =             [[0],[0]]
+    ops_io_table["Conv2D"] =              [[0, 1],[0]]
+    ops_io_table["Relu"] =                [[0],[0]]
 
     return ops_io_table[op_type]
 
@@ -245,6 +248,15 @@ def isomorphic_match(self, subject_graph, matcher_graph, meta):
     matcher_to_subject_nodes.update(partial_matcher_to_subject_nodes)
     matcher_to_subject_edges.update(partial_matcher_to_subject_edges)
 
+
+    #including output tensors from the output node
+    sgraph_out_node = subject_graph.ops_info[matcher_to_subject_nodes[next(iter(matcher_output_node_names))]]
+    mgraph_out_node = matcher_graph.ops_info[next(iter(matcher_output_node_names))]
+    for mgraph_out_edge, sgraph_out_edge in zip(mgraph_out_node.output_tensors, sgraph_out_node.output_tensors):
+      partial_matcher_to_subject_edges[mgraph_out_edge.name] = sgraph_out_edge.name
+
+    matcher_to_subject_edges.update(partial_matcher_to_subject_edges)
+
     self.translator = [matcher_to_subject_nodes, matcher_to_subject_edges]
 
     return self.translator
diff --git a/utensor_cgen/ir/base.py b/utensor_cgen/ir/base.py
index 8ccb20ea..2a723020 100644
--- a/utensor_cgen/ir/base.py
+++ b/utensor_cgen/ir/base.py
@@ -240,7 +240,7 @@ def graph_def(self):
   def ops(self):
     return [self.ops_info[name] for name in self.topo_order]
 
-  def add_op(self, op):
+  def add_op(self, op, sort=True):
     if not isinstance(op, OperationInfo):
       raise ValueError('expecting OperationInfo, get {}'.format(type(op)))
     if op.name in self.ops_info:
@@ -250,7 +250,11 @@ def add_op(self, op):
     # if(op.name == 'convert_uint8_q7_Relu/eightbit_transpose_0_q7'):
     #   import pdb; pdb.set_trace()
     self.ops_info[op.name] = op
-    topologic_order_graph(self)
+
+    # FIXME: forcing a topo-order here prevent us from dynamic-graph-construction
+    # The temporary fix is to disable this as an option
+    if sort:
+      topologic_order_graph(self)
 
   def drop_op(self, op_name):
     if op_name not in self.ops_info:
diff --git a/utensor_cgen/transformer/linear_reoder.py b/utensor_cgen/transformer/linear_reoder.py
new file mode 100644
index 00000000..3f3cec82
--- /dev/null
+++ b/utensor_cgen/transformer/linear_reoder.py
@@ -0,0 +1,77 @@
+# -*- coding:utf8 -*-
+r"""CMSIS-NN Transformer
+
+Node fusion and replacement for CMSIS-NN
+
+"""
+import re
+from collections import defaultdict
+from copy import deepcopy
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.tools.graph_transforms import TransformGraph
+
+from utensor_cgen.experimental.ugraph_builder import *
+from utensor_cgen.experimental.ugraph_matcher import *
+from utensor_cgen.experimental.ugraph_util_functions import *
+from utensor_cgen.frontend.tensorflow import GraphDefParser
+from utensor_cgen.ir import OperationInfo, TensorInfo, uTensorGraph
+from utensor_cgen.ir.converter import AttrValueConverter  # hue hue hue hue hue
+from utensor_cgen.ir.converter import GenericTensorConverterMixin
+from utensor_cgen.ir.utils import graph_check
+from utensor_cgen.utils import parse_tensor_name, topologic_order_graph
+from utensor_cgen.ir.misc.graph_viz import viz_graph
+
+from .base import Transformer
+
+__all__ = ["CMSIS_NN_Transformer"]
+
+class Linear_Reorder_Transformer(Transformer):
+  METHOD_NAME = 'linear_reoder'
+  KWARGS_NAMESCOPE = '_linear_reoder'
+
+  def get_matcher_graph(self):
+    ugraph = uTensorGraph(output_nodes=['maxpool'])
+
+    dummpy_input0 = Const_Op('dummy_input0', np.zeros([16,16]), ugraph)
+    dummpy_input1 = Const_Op('dummy_input1', np.zeros([4,4]), ugraph)
+    conv_out = conv2d_op('convolution2d', [dummpy_input0[0], dummpy_input1[0]], ugraph)
+    relu_out = relu_op('relu', conv_out, ugraph)
+    out_tensor = maxpool_op('maxpool', relu_out, ugraph)
+    topologic_order_graph(ugraph)
+
+    #viz_graph('matcher', True, ugraph)
+    
+    meta = dict()
+    meta["dummy_input0"] = ["End", "Any"]
+    meta["dummy_input1"] = ["End", "Any"]
+    
+    return (ugraph, meta)
+
+  def transform(self, ugraph):
+    [matcher_ugraph, metaData] = self.get_matcher_graph()
+    while True:
+      matcher = uGraphMatcher()
+      result = matcher.isomorphic_match(ugraph, matcher_ugraph, metaData)
+      if result == False:
+        break
+      #import pdb; pdb.set_trace()
+      relu_name = matcher['relu'].name + '_'
+      maxpool_name = matcher['maxpool'].name + '_'
+
+      new_relu_out = relu_op(relu_name, [matcher['relu:0']], ugraph)
+      new_maxpool_out = maxpool_op(maxpool_name, [matcher['convolution2d:0']], ugraph)
+      matcher['relu:0'] = new_maxpool_out[0]
+      matcher['maxpool:0'] = new_relu_out[0]
+      matcher['relu'] = None
+      matcher['maxpool'] = None
+
+      topologic_order_graph(ugraph)
+      graph_validate(ugraph)
+
+      viz_graph('matcher', True, ugraph)
+    return ugraph ##remove me
+
+    # graph_check(ugraph)
+    # return ugraph
diff --git a/utensor_cgen/transformer/pipline.py b/utensor_cgen/transformer/pipline.py
index 3cff8c6b..4600ff0d 100644
--- a/utensor_cgen/transformer/pipline.py
+++ b/utensor_cgen/transformer/pipline.py
@@ -7,6 +7,7 @@
 from .optimizer import IdOpRemoveOptimizer, RefCntOptimizer
 from .quantize import QuantizeTransformer
 from .graph_viz import GraphVizTransformer
+from .linear_reoder import Linear_Reorder_Transformer
 
 class TransformerPipeline(object):
 
@@ -20,6 +21,7 @@ class TransformerPipeline(object):
     CMSIS_NN_Transformer.METHOD_NAME: CMSIS_NN_Transformer,
     IdOpRemoveOptimizer.METHOD_NAME: IdOpRemoveOptimizer,
     GraphVizTransformer.METHOD_NAME: GraphVizTransformer,
+    Linear_Reorder_Transformer.METHOD_NAME: Linear_Reorder_Transformer,
   }
 
   def __init__(self, methods):

From 6f633e1b13e24cc4412705f01b44fee5d5d7a13c Mon Sep 17 00:00:00 2001
From: Neil Tan <neil.c.tan@gmail.com>
Date: Sun, 5 May 2019 01:07:34 +0800
Subject: [PATCH 02/17] ugraph seems correct, but tf quantization complains

---
 .../experimental/ugraph_util_functions.py     | 21 +++++++++++++++--
 utensor_cgen/transformer/linear_reoder.py     | 23 +++++++++++--------
 utensor_cgen/utils.py                         |  5 +++-
 3 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/utensor_cgen/experimental/ugraph_util_functions.py b/utensor_cgen/experimental/ugraph_util_functions.py
index b2428e2e..e0cb7ba8 100644
--- a/utensor_cgen/experimental/ugraph_util_functions.py
+++ b/utensor_cgen/experimental/ugraph_util_functions.py
@@ -9,7 +9,8 @@
 
 __all__ = ["is_connected", "get_input_tensor_names", "get_output_tensor_names", "tensorInfo_from_name",
        "get_tensor_node_names", "replace_tensors_op", "replace_tensor_op_by_name",
-       "graph_validate", "get_input_node_names", "get_output_node_names", "replace_tensor"]
+       "graph_validate", "get_input_node_names", "get_output_node_names", "replace_tensor",
+       "update_tensor_op_names", "print_graph"]
 
 
 def is_connected(graph, node0, node1):
@@ -94,6 +95,10 @@ def graph_validate(graph):
       if input_tensor_info.op_name not in graph.topo_order:
         print("In %r: input tensor %r points to an op (%r) that does not exist in graph.topo_order" % (op_name, input_tensor_info.name, input_tensor_info.op_name))
         conflicts.append((input_tensor_info.name, input_tensor_info.op_name))
+      for out_tensor in op_info.output_tensors:
+        if out_tensor.op_name != op_info.name:
+          print("In %r: output tensor %r does not point to its origin, it points to (%r)" % (op_info.name, out_tensor.name, out_tensor.op_name))
+          conflicts.append((op_info.name, out_tensor.name, out_tensor.op_name))
 
 def get_input_node_names(graph, node_name):
   input_op_infos = graph.ops_info[node_name].input_nodes
@@ -116,4 +121,16 @@ def replace_tensor(name, new_tensorInfo, ugraph):
     #outputs
     for i, t_info in enumerate(op_info.output_tensors):
       if(t_info.name == name):
-        op_info.output_tensors[i] = new_tensorInfo
\ No newline at end of file
+        op_info.output_tensors[i] = new_tensorInfo
+
+def update_tensor_op_names(graph):
+  for key, op_info in graph.ops_info.items():
+    for out_tensor in op_info.output_tensors:
+      out_tensor.op_name = op_info.name
+      graph.ops_info[key] = op_info
+
+def print_graph(graph):
+  for key, op_info in graph.ops_info.items():
+    print(key, " :\r\n")
+    print("  In: ", [tensor.name for tensor in op_info.input_tensors])
+    print("  Out: ", [tensor.name for tensor in op_info.output_tensors])
\ No newline at end of file
diff --git a/utensor_cgen/transformer/linear_reoder.py b/utensor_cgen/transformer/linear_reoder.py
index 3f3cec82..751fe5a5 100644
--- a/utensor_cgen/transformer/linear_reoder.py
+++ b/utensor_cgen/transformer/linear_reoder.py
@@ -56,19 +56,22 @@ def transform(self, ugraph):
       result = matcher.isomorphic_match(ugraph, matcher_ugraph, metaData)
       if result == False:
         break
-      #import pdb; pdb.set_trace()
-      relu_name = matcher['relu'].name + '_'
-      maxpool_name = matcher['maxpool'].name + '_'
 
-      new_relu_out = relu_op(relu_name, [matcher['relu:0']], ugraph)
-      new_maxpool_out = maxpool_op(maxpool_name, [matcher['convolution2d:0']], ugraph)
-      matcher['relu:0'] = new_maxpool_out[0]
-      matcher['maxpool:0'] = new_relu_out[0]
-      matcher['relu'] = None
-      matcher['maxpool'] = None
+      max_pool_op = matcher['maxpool']
+      relu_op = matcher['relu']
 
-      topologic_order_graph(ugraph)
+      max_pool_op.input_tensors[0] = matcher['convolution2d:0']
+      max_pool_op.output_tensors[0] = matcher['relu:0']
+      relu_op.input_tensors[0] = matcher['relu:0']
+      relu_op.output_tensors[0] = matcher['maxpool:0']
+
+      matcher['maxpool'] = max_pool_op
+      matcher['relu'] = relu_op
+
+      update_tensor_op_names(ugraph)
       graph_validate(ugraph)
+      topologic_order_graph(ugraph)
+      #import pdb; pdb.set_trace()
 
       viz_graph('matcher', True, ugraph)
     return ugraph ##remove me
diff --git a/utensor_cgen/utils.py b/utensor_cgen/utils.py
index 8e23fb01..74de701a 100644
--- a/utensor_cgen/utils.py
+++ b/utensor_cgen/utils.py
@@ -240,7 +240,10 @@ def visit(node_name):
     op_info = ugraph.ops_info[node_name]
 
     for t_info in op_info.input_tensors:
-      op_name = parse_tensor_name(t_info.name)[0]
+      # NT: we should not rely on tensor-name conventions for back-tracing
+      # op_name = parse_tensor_name(t_info.name)[0]
+      # It would be nice to rely on something similar to get_tensor_node_names(), but based on ops_info instead of topo_order
+      op_name = t_info.op_name
       visit(op_name)
 
     perm_visit.add(node_name)

From 7d59b057c729a74ff5e64247d61a0497490ba8d6 Mon Sep 17 00:00:00 2001
From: Neil Tan <neil.c.tan@gmail.com>
Date: Sun, 5 May 2019 01:56:39 +0800
Subject: [PATCH 03/17] quantization passed, good until: unsupported op type in
 uTensor: QuantizedMul

---
 .../experimental/ugraph_util_functions.py         | 15 +++++++++++++--
 utensor_cgen/transformer/linear_reoder.py         |  9 +++++++++
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/utensor_cgen/experimental/ugraph_util_functions.py b/utensor_cgen/experimental/ugraph_util_functions.py
index e0cb7ba8..5068e2b5 100644
--- a/utensor_cgen/experimental/ugraph_util_functions.py
+++ b/utensor_cgen/experimental/ugraph_util_functions.py
@@ -10,7 +10,7 @@
 __all__ = ["is_connected", "get_input_tensor_names", "get_output_tensor_names", "tensorInfo_from_name",
        "get_tensor_node_names", "replace_tensors_op", "replace_tensor_op_by_name",
        "graph_validate", "get_input_node_names", "get_output_node_names", "replace_tensor",
-       "update_tensor_op_names", "print_graph"]
+       "update_tensor_op_names", "print_graph", "rename_tensor"]
 
 
 def is_connected(graph, node0, node1):
@@ -133,4 +133,15 @@ def print_graph(graph):
   for key, op_info in graph.ops_info.items():
     print(key, " :\r\n")
     print("  In: ", [tensor.name for tensor in op_info.input_tensors])
-    print("  Out: ", [tensor.name for tensor in op_info.output_tensors])
\ No newline at end of file
+    print("  Out: ", [tensor.name for tensor in op_info.output_tensors])
+
+def rename_tensor(name, new_name, graph):
+    for key, op_info in graph.ops_info.items():
+      for i, tensor in enumerate(op_info.input_tensors):
+        if tensor.name == name:
+          op_info.input_tensors[i].name = new_name
+    for key, op_info in graph.ops_info.items():
+      for i, tensor in enumerate(op_info.output_tensors):
+        if tensor.name == name:
+          op_info.output_tensors[i].name = new_name
+      graph.ops_info[key] = op_info
\ No newline at end of file
diff --git a/utensor_cgen/transformer/linear_reoder.py b/utensor_cgen/transformer/linear_reoder.py
index 751fe5a5..80418ddc 100644
--- a/utensor_cgen/transformer/linear_reoder.py
+++ b/utensor_cgen/transformer/linear_reoder.py
@@ -57,6 +57,7 @@ def transform(self, ugraph):
       if result == False:
         break
 
+      #swapping the ops
       max_pool_op = matcher['maxpool']
       relu_op = matcher['relu']
 
@@ -68,6 +69,14 @@ def transform(self, ugraph):
       matcher['maxpool'] = max_pool_op
       matcher['relu'] = relu_op
 
+      #swapping the tensor names
+      relu_tensor_name = matcher['relu:0'].name
+      maxpool_tensor_name = matcher['maxpool:0'].name
+
+      rename_tensor(relu_tensor_name, 'tmp_relu_name', ugraph)
+      rename_tensor(maxpool_tensor_name, relu_tensor_name, ugraph)
+      rename_tensor('tmp_relu_name', maxpool_tensor_name, ugraph)
+      
       update_tensor_op_names(ugraph)
       graph_validate(ugraph)
       topologic_order_graph(ugraph)

From b98eeab7b43931b5fa05fca65f38dddf1ae9c03a Mon Sep 17 00:00:00 2001
From: Neil Tan <neil.c.tan@gmail.com>
Date: Sun, 5 May 2019 16:22:48 +0800
Subject: [PATCH 04/17] CONV_POOL_Transformer WIP: matcher graph completed

---
 utensor_cgen/cli.py                         |   2 +-
 utensor_cgen/experimental/ugraph_builder.py | 132 +++++++++++++++++++-
 utensor_cgen/transformer/conv_pool.py       | 106 ++++++++++++++++
 utensor_cgen/transformer/linear_reoder.py   |   9 +-
 4 files changed, 238 insertions(+), 11 deletions(-)
 create mode 100644 utensor_cgen/transformer/conv_pool.py

diff --git a/utensor_cgen/cli.py b/utensor_cgen/cli.py
index 7a2e948e..c0bf4210 100644
--- a/utensor_cgen/cli.py
+++ b/utensor_cgen/cli.py
@@ -46,7 +46,7 @@ def cli():
               help="list of output nodes")
 @click.option("--transform-methods",
               type=NArgsKwargsParam(sep='|>'),
-              default='dropout|>linear_reoder|>quantize|>inline|>biasAdd|>remove_id_op|>refcnt',
+              default='dropout|>linear_reoder|>quantize|>conv_pool|>inline|>biasAdd|>remove_id_op|>refcnt',
               help='optimization pipeline',
               metavar='METHOD[|>METHOD|>...]',
               show_default=True)
diff --git a/utensor_cgen/experimental/ugraph_builder.py b/utensor_cgen/experimental/ugraph_builder.py
index a5545b95..d7603046 100644
--- a/utensor_cgen/experimental/ugraph_builder.py
+++ b/utensor_cgen/experimental/ugraph_builder.py
@@ -13,8 +13,9 @@
 from utensor_cgen.experimental.ugraph_util_functions import *
 
 
-__all__ = ["transpose_offline", "Const_Op", "Ram_Op", "Const_Reshape", "Uint8Q7Origin_Op", "CMSIS_FC_Op", "QuantRangeForMultiplicationu8u8int32_Op",
-            "conv2d_op", "relu_op", "maxpool_op"]
+__all__ = ["transpose_offline", "Const_Op", "Ram_Op", "Const_Reshape", "Uint8Q7Origin_Op",
+           "CMSIS_FC_Op", "QuantRangeForMultiplicationu8u8int32_Op", "conv2d_op", "quantized_conv2d_op",
+           "relu_op", "maxpool_op", "requantize_op", "requantization_range_op", "quantized_maxpool_op"]
 
 # Let us get unique names for custom injected nodes
 def static_vars(**kwargs):
@@ -196,7 +197,7 @@ def conv2d_op(name, inputs, ugraph):
   conv_out = TensorInfo(name=name + ":0",
                     op_name=name,
                     dtype=np.dtype('float32'),
-                    shape=inputs[0].shape,
+                    shape=inputs[0].shape,  #FIXME: wrong shape most likely
                     ugraph=tmp_ugraph
                     )
   conv2d_op_info = OperationInfo(name=name,
@@ -208,6 +209,38 @@ def conv2d_op(name, inputs, ugraph):
   
   ugraph.add_op(conv2d_op_info, False)
   return conv2d_op_info.output_tensors
+
+def quantized_conv2d_op(name, inputs, ugraph):
+  tmp_ugraph = uTensorGraph(output_nodes=[name])
+  conv_out = TensorInfo(name=name + ":0",
+                    op_name=name,
+                    dtype=np.dtype('uint8'),
+                    shape=inputs[0].shape, #FIXME: wrong shape most likely
+                    ugraph=tmp_ugraph
+                    )
+  min_out = TensorInfo(name=name + ":1",
+            op_name=name,
+            dtype=np.dtype('float32'),
+            shape=[1],
+            ugraph=tmp_ugraph
+            )
+  max_out = TensorInfo(name=name + ":2",
+              op_name=name,
+              dtype=np.dtype('float32'),
+              shape=[1],
+              ugraph=tmp_ugraph
+              )
+
+  quantized_conv2d_op_info = OperationInfo(name=name,
+                        input_tensors=inputs,
+                        output_tensors=[conv_out, min_out, max_out],
+                        op_type="QuantizedConv2D",
+                        backend="tensorflow",
+                        ugraph=tmp_ugraph)
+
+  ugraph.add_op(quantized_conv2d_op_info, False)
+  return quantized_conv2d_op_info.output_tensors
+
 def relu_op(name, inputs, ugraph):
   tmp_ugraph = uTensorGraph(output_nodes=[name])
   relu_out = TensorInfo(name=name + ":0",
@@ -225,6 +258,7 @@ def relu_op(name, inputs, ugraph):
   
   ugraph.add_op(relu_op_info, False)
   return relu_op_info.output_tensors
+
 def maxpool_op(name, inputs, ugraph):
   tmp_ugraph = uTensorGraph(output_nodes=[name])
   max_out = TensorInfo(name=name + ":0",
@@ -240,4 +274,94 @@ def maxpool_op(name, inputs, ugraph):
                         backend="tensorflow",
                         ugraph=tmp_ugraph)
   ugraph.add_op(max_op_info, False)
-  return max_op_info.output_tensors
\ No newline at end of file
+  return max_op_info.output_tensors
+
+def quantized_maxpool_op(name, inputs, ugraph):
+  tmp_ugraph = uTensorGraph(output_nodes=[name])
+  max_value_out = TensorInfo(name=name + ":0",
+                    op_name=name,
+                    dtype=np.dtype('float32'),
+                    shape=inputs[0].shape,
+                    ugraph=tmp_ugraph
+                    )
+  min_out = TensorInfo(name=name + ":1",
+            op_name=name,
+            dtype=np.dtype('float32'),
+            shape=[1],
+            ugraph=tmp_ugraph
+            )
+  max_out = TensorInfo(name=name + ":2",
+              op_name=name,
+              dtype=np.dtype('float32'),
+              shape=[1],
+              ugraph=tmp_ugraph
+              )
+
+  qnt_max_op_info = OperationInfo(name=name,
+                        input_tensors=inputs,
+                        output_tensors=[max_value_out, min_out, max_out],
+                        op_type="QuantizedMaxPool",
+                        backend="tensorflow",
+                        ugraph=tmp_ugraph)
+  ugraph.add_op(qnt_max_op_info, False)
+  return qnt_max_op_info.output_tensors
+
+def requantization_range_op(name, inputs, ugraph):
+  tmp_ugraph = uTensorGraph(output_nodes=[name])
+
+  min_out = TensorInfo(name=name + ":0",
+              op_name=name,
+              dtype=np.dtype('float32'),
+              shape=[1],
+              ugraph=tmp_ugraph
+              )
+  max_out = TensorInfo(name=name + ":1",
+              op_name=name,
+              dtype=np.dtype('float32'),
+              shape=[1],
+              ugraph=tmp_ugraph
+              )
+
+  rqntr_op_info = OperationInfo(name=name,
+                    input_tensors=inputs,
+                    output_tensors=[min_out, max_out],
+                    op_type="RequantizationRange",
+                    backend="tensorflow",
+                    ugraph=tmp_ugraph)
+
+  ugraph.add_op(rqntr_op_info, False)
+  return rqntr_op_info.output_tensors
+
+
+def requantize_op(name, inputs, ugraph):
+  tmp_ugraph = uTensorGraph(output_nodes=[name])
+
+  value_out = TensorInfo(name=name + ":0",
+              op_name=name,
+              dtype=np.dtype('uint8'),
+              shape=inputs[0].shape,
+              ugraph=tmp_ugraph
+              )
+
+  min_out = TensorInfo(name=name + ":1",
+              op_name=name,
+              dtype=np.dtype('float32'),
+              shape=[1],
+              ugraph=tmp_ugraph
+              )
+  max_out = TensorInfo(name=name + ":2",
+              op_name=name,
+              dtype=np.dtype('float32'),
+              shape=[1],
+              ugraph=tmp_ugraph
+              )
+
+  rqnt_op_info = OperationInfo(name=name,
+                    input_tensors=inputs,
+                    output_tensors=[value_out, min_out, max_out],
+                    op_type="Requantize",
+                    backend="tensorflow",
+                    ugraph=tmp_ugraph)
+
+  ugraph.add_op(rqnt_op_info, False)
+  return [value_out, min_out, max_out]
\ No newline at end of file
diff --git a/utensor_cgen/transformer/conv_pool.py b/utensor_cgen/transformer/conv_pool.py
new file mode 100644
index 00000000..49669b34
--- /dev/null
+++ b/utensor_cgen/transformer/conv_pool.py
@@ -0,0 +1,106 @@
+# -*- coding:utf8 -*-
+r"""Convolution Maxpool Fusion Transformer
+
+Node fusion for QuantConv2d QuantMaxPool operators
+
+"""
+import re
+from collections import defaultdict
+from copy import deepcopy
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.tools.graph_transforms import TransformGraph
+
+from utensor_cgen.experimental.ugraph_builder import *
+from utensor_cgen.experimental.ugraph_matcher import *
+from utensor_cgen.experimental.ugraph_util_functions import *
+from utensor_cgen.frontend.tensorflow import GraphDefParser
+from utensor_cgen.ir import OperationInfo, TensorInfo, uTensorGraph
+from utensor_cgen.ir.converter import AttrValueConverter  # hue hue hue hue hue
+from utensor_cgen.ir.converter import GenericTensorConverterMixin
+from utensor_cgen.ir.utils import graph_check
+from utensor_cgen.utils import parse_tensor_name, topologic_order_graph
+from utensor_cgen.ir.misc.graph_viz import viz_graph
+
+from .base import Transformer
+
+__all__ = ["CONV_POOL_Transformer"]
+
+class CONV_POOL_Transformer(Transformer):
+  METHOD_NAME = 'conv_pool'
+  KWARGS_NAMESCOPE = '_conv_pool'
+
+  def get_matcher_graph(self):
+    ugraph = uTensorGraph(output_nodes=['quantized_maxpool'], backend="tensorflow")
+
+    dummpy_input0 = Const_Op('dummy_input0', np.zeros([16,16], dtype=np.uint8), ugraph)
+    dummpy_input0_min = Const_Op('dummy_input0_min', np.zeros([1]), ugraph)
+    dummpy_input0_max = Const_Op('dummy_input0_max', np.zeros([1]), ugraph)
+
+    dummpy_input1 = Const_Op('dummy_input1', np.zeros([4,4]), ugraph)
+    dummpy_input1_min = Const_Op('dummy_input1_min', np.zeros([1]), ugraph)
+    dummpy_input1_max = Const_Op('dummy_input1_max', np.zeros([1]), ugraph)
+
+    conv_out = quantized_conv2d_op('convolution2d', [dummpy_input0[0],
+                dummpy_input1[0], dummpy_input0_min[0], dummpy_input0_max[0],
+                dummpy_input1_min[0], dummpy_input1_max[0]], ugraph)
+
+    requantization_range_out = requantization_range_op('requantization_range', conv_out, ugraph)
+
+    requantize_out = requantize_op('requantize', [conv_out[0], conv_out[1], conv_out[2],
+                     requantization_range_out[0], requantization_range_out[1]], ugraph) #FIXME: check the tensor ordering here
+
+    quantized_maxpool_op('quantized_maxpool', requantize_out, ugraph)
+
+    topologic_order_graph(ugraph)
+
+    #viz_graph('matcher_quant', True, ugraph)
+    #import pdb; pdb.set_trace()
+    
+    meta = dict()
+    meta["convolution2d"] = ["End"]
+    
+    return (ugraph, meta)
+
+  def transform(self, ugraph):
+    [matcher_ugraph, metaData] = self.get_matcher_graph()
+    while True:
+      matcher = uGraphMatcher()
+      result = matcher.isomorphic_match(ugraph, matcher_ugraph, metaData)
+      if result == False:
+        break
+    
+      import pdb; pdb.set_trace() #remove me
+      return ugraph  #remove me
+
+      #swapping the ops
+      max_pool_op = matcher['maxpool']
+      relu_op = matcher['relu']
+
+      max_pool_op.input_tensors[0] = matcher['convolution2d:0']
+      max_pool_op.output_tensors[0] = matcher['relu:0']
+      relu_op.input_tensors[0] = matcher['relu:0']
+      relu_op.output_tensors[0] = matcher['maxpool:0']
+
+      matcher['maxpool'] = max_pool_op
+      matcher['relu'] = relu_op
+
+      #swapping the tensor names
+      relu_tensor_name = matcher['relu:0'].name
+      maxpool_tensor_name = matcher['maxpool:0'].name
+
+      rename_tensor(relu_tensor_name, 'tmp_relu_name', ugraph)
+      rename_tensor(maxpool_tensor_name, relu_tensor_name, ugraph)
+      rename_tensor('tmp_relu_name', maxpool_tensor_name, ugraph)
+      
+      update_tensor_op_names(ugraph)
+      graph_validate(ugraph)
+      topologic_order_graph(ugraph)
+      #import pdb; pdb.set_trace()
+
+      viz_graph('matcher', True, ugraph)
+    return ugraph ##remove me
+
+    # graph_check(ugraph)
+    # return ugraph
diff --git a/utensor_cgen/transformer/linear_reoder.py b/utensor_cgen/transformer/linear_reoder.py
index 80418ddc..248e836f 100644
--- a/utensor_cgen/transformer/linear_reoder.py
+++ b/utensor_cgen/transformer/linear_reoder.py
@@ -1,7 +1,7 @@
 # -*- coding:utf8 -*-
-r"""CMSIS-NN Transformer
+r"""Linear Re-ordering Transformer
 
-Node fusion and replacement for CMSIS-NN
+Linear Operation Legalizations
 
 """
 import re
@@ -83,7 +83,4 @@ def transform(self, ugraph):
       #import pdb; pdb.set_trace()
 
       viz_graph('matcher', True, ugraph)
-    return ugraph ##remove me
-
-    # graph_check(ugraph)
-    # return ugraph
+    return ugraph
\ No newline at end of file

From 154971fc8a7ff76eb0f686f90f4d7b295ed9f0b3 Mon Sep 17 00:00:00 2001
From: Neil Tan <neil.c.tan@gmail.com>
Date: Sun, 5 May 2019 16:42:53 +0800
Subject: [PATCH 05/17] fused, but with graph integrity problem

---
 utensor_cgen/experimental/ugraph_builder.py | 36 +++++++++++++++-
 utensor_cgen/transformer/conv_pool.py       | 48 +++++++--------------
 utensor_cgen/transformer/linear_reoder.py   |  6 +--
 3 files changed, 51 insertions(+), 39 deletions(-)

diff --git a/utensor_cgen/experimental/ugraph_builder.py b/utensor_cgen/experimental/ugraph_builder.py
index d7603046..e457d214 100644
--- a/utensor_cgen/experimental/ugraph_builder.py
+++ b/utensor_cgen/experimental/ugraph_builder.py
@@ -15,7 +15,8 @@
 
 __all__ = ["transpose_offline", "Const_Op", "Ram_Op", "Const_Reshape", "Uint8Q7Origin_Op",
            "CMSIS_FC_Op", "QuantRangeForMultiplicationu8u8int32_Op", "conv2d_op", "quantized_conv2d_op",
-           "relu_op", "maxpool_op", "requantize_op", "requantization_range_op", "quantized_maxpool_op"]
+           "relu_op", "maxpool_op", "requantize_op", "requantization_range_op", "quantized_maxpool_op",
+           "quantized_conv2d_pool_op"]
 
 # Let us get unique names for custom injected nodes
 def static_vars(**kwargs):
@@ -364,4 +365,35 @@ def requantize_op(name, inputs, ugraph):
                     ugraph=tmp_ugraph)
 
   ugraph.add_op(rqnt_op_info, False)
-  return [value_out, min_out, max_out]
\ No newline at end of file
+  return [value_out, min_out, max_out]
+
+def quantized_conv2d_pool_op(name, inputs, ugraph):
+  tmp_ugraph = uTensorGraph(output_nodes=[name])
+  conv_out = TensorInfo(name=name + ":0",
+                    op_name=name,
+                    dtype=np.dtype('uint8'),
+                    shape=inputs[0].shape, #FIXME: wrong shape most likely
+                    ugraph=tmp_ugraph
+                    )
+  min_out = TensorInfo(name=name + ":1",
+            op_name=name,
+            dtype=np.dtype('float32'),
+            shape=[1],
+            ugraph=tmp_ugraph
+            )
+  max_out = TensorInfo(name=name + ":2",
+              op_name=name,
+              dtype=np.dtype('float32'),
+              shape=[1],
+              ugraph=tmp_ugraph
+              )
+
+  quantized_conv2d_op_info = OperationInfo(name=name,
+                        input_tensors=inputs,
+                        output_tensors=[conv_out, min_out, max_out],
+                        op_type="FusedConv2DMaxpool",
+                        backend="tensorflow",
+                        ugraph=tmp_ugraph)
+
+  ugraph.add_op(quantized_conv2d_op_info, False)
+  return quantized_conv2d_op_info.output_tensors
diff --git a/utensor_cgen/transformer/conv_pool.py b/utensor_cgen/transformer/conv_pool.py
index 49669b34..286e7392 100644
--- a/utensor_cgen/transformer/conv_pool.py
+++ b/utensor_cgen/transformer/conv_pool.py
@@ -42,7 +42,7 @@ def get_matcher_graph(self):
     dummpy_input1_min = Const_Op('dummy_input1_min', np.zeros([1]), ugraph)
     dummpy_input1_max = Const_Op('dummy_input1_max', np.zeros([1]), ugraph)
 
-    conv_out = quantized_conv2d_op('convolution2d', [dummpy_input0[0],
+    conv_out = quantized_conv2d_op('quant_convolution2d', [dummpy_input0[0],
                 dummpy_input1[0], dummpy_input0_min[0], dummpy_input0_max[0],
                 dummpy_input1_min[0], dummpy_input1_max[0]], ugraph)
 
@@ -55,11 +55,9 @@ def get_matcher_graph(self):
 
     topologic_order_graph(ugraph)
 
-    #viz_graph('matcher_quant', True, ugraph)
-    #import pdb; pdb.set_trace()
     
     meta = dict()
-    meta["convolution2d"] = ["End"]
+    meta["quant_convolution2d"] = ["End"]
     
     return (ugraph, meta)
 
@@ -70,37 +68,23 @@ def transform(self, ugraph):
       result = matcher.isomorphic_match(ugraph, matcher_ugraph, metaData)
       if result == False:
         break
-    
-      import pdb; pdb.set_trace() #remove me
-      return ugraph  #remove me
-
-      #swapping the ops
-      max_pool_op = matcher['maxpool']
-      relu_op = matcher['relu']
-
-      max_pool_op.input_tensors[0] = matcher['convolution2d:0']
-      max_pool_op.output_tensors[0] = matcher['relu:0']
-      relu_op.input_tensors[0] = matcher['relu:0']
-      relu_op.output_tensors[0] = matcher['maxpool:0']
-
-      matcher['maxpool'] = max_pool_op
-      matcher['relu'] = relu_op
-
-      #swapping the tensor names
-      relu_tensor_name = matcher['relu:0'].name
-      maxpool_tensor_name = matcher['maxpool:0'].name
 
-      rename_tensor(relu_tensor_name, 'tmp_relu_name', ugraph)
-      rename_tensor(maxpool_tensor_name, relu_tensor_name, ugraph)
-      rename_tensor('tmp_relu_name', maxpool_tensor_name, ugraph)
+      fused_op_name = matcher['quant_convolution2d'].name + "_" + matcher['quantized_maxpool'].name
+      fused_op_out = quantized_conv2d_pool_op(fused_op_name, matcher['quant_convolution2d'].input_tensors, ugraph)
+      matcher['quantized_maxpool:0'] = fused_op_out[0]
+      matcher['quantized_maxpool:1'] = fused_op_out[1]
+      matcher['quantized_maxpool:2'] = fused_op_out[2]
+      matcher['quant_convolution2d'] = None
+      matcher['requantization_range'] = None
+      matcher['requantize'] = None
+      matcher['quantized_maxpool'] = None
       
       update_tensor_op_names(ugraph)
-      graph_validate(ugraph)
       topologic_order_graph(ugraph)
-      #import pdb; pdb.set_trace()
+      graph_validate(ugraph)
 
-      viz_graph('matcher', True, ugraph)
-    return ugraph ##remove me
+    
+    viz_graph('matcher', True, ugraph)
+    import pdb; pdb.set_trace()
 
-    # graph_check(ugraph)
-    # return ugraph
+    return ugraph ##remove me
diff --git a/utensor_cgen/transformer/linear_reoder.py b/utensor_cgen/transformer/linear_reoder.py
index 248e836f..f78683f6 100644
--- a/utensor_cgen/transformer/linear_reoder.py
+++ b/utensor_cgen/transformer/linear_reoder.py
@@ -40,8 +40,6 @@ def get_matcher_graph(self):
     relu_out = relu_op('relu', conv_out, ugraph)
     out_tensor = maxpool_op('maxpool', relu_out, ugraph)
     topologic_order_graph(ugraph)
-
-    #viz_graph('matcher', True, ugraph)
     
     meta = dict()
     meta["dummy_input0"] = ["End", "Any"]
@@ -78,9 +76,7 @@ def transform(self, ugraph):
       rename_tensor('tmp_relu_name', maxpool_tensor_name, ugraph)
       
       update_tensor_op_names(ugraph)
-      graph_validate(ugraph)
       topologic_order_graph(ugraph)
-      #import pdb; pdb.set_trace()
+      graph_validate(ugraph)
 
-      viz_graph('matcher', True, ugraph)
     return ugraph
\ No newline at end of file

From a2f7988443664cb9253207f595caf1bab615bd61 Mon Sep 17 00:00:00 2001
From: Neil Tan <neil.c.tan@gmail.com>
Date: Sun, 5 May 2019 21:17:22 +0800
Subject: [PATCH 06/17] output graph topologically correct TODO: copy
 attributes to the fused node

---
 utensor_cgen/experimental/ugraph_matcher.py        | 2 +-
 utensor_cgen/experimental/ugraph_util_functions.py | 2 +-
 utensor_cgen/transformer/conv_pool.py              | 2 --
 utensor_cgen/transformer/pipline.py                | 2 ++
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/utensor_cgen/experimental/ugraph_matcher.py b/utensor_cgen/experimental/ugraph_matcher.py
index 4bc49142..d5f9bb59 100644
--- a/utensor_cgen/experimental/ugraph_matcher.py
+++ b/utensor_cgen/experimental/ugraph_matcher.py
@@ -333,7 +333,7 @@ def __setitem__(self, name, info):
     
     if info == None:
       #TODO: tensor dependency checking here
-      if name in self.subject_graph.topo_order:
+      if self.translator[0][name] in self.subject_graph.topo_order:  #FIXME: name here reference matcher name, should be subject name
         self.subject_graph.drop_op(self.translator[0][name])
         del self.translator[0][name]
       else:
diff --git a/utensor_cgen/experimental/ugraph_util_functions.py b/utensor_cgen/experimental/ugraph_util_functions.py
index 5068e2b5..635665a3 100644
--- a/utensor_cgen/experimental/ugraph_util_functions.py
+++ b/utensor_cgen/experimental/ugraph_util_functions.py
@@ -44,7 +44,7 @@ def get_tensor_node_names(graph, t_name):
   start_nodes = list()
   end_nodes = list()
 
-  for it_node in graph.topo_order:
+  for it_node, info in graph.ops_info.items():
     for t in graph.ops_info[it_node].input_tensors:
       if t.name == t_name:
         end_nodes.append(it_node)
diff --git a/utensor_cgen/transformer/conv_pool.py b/utensor_cgen/transformer/conv_pool.py
index 286e7392..931d9111 100644
--- a/utensor_cgen/transformer/conv_pool.py
+++ b/utensor_cgen/transformer/conv_pool.py
@@ -82,9 +82,7 @@ def transform(self, ugraph):
       update_tensor_op_names(ugraph)
       topologic_order_graph(ugraph)
       graph_validate(ugraph)
-
     
     viz_graph('matcher', True, ugraph)
-    import pdb; pdb.set_trace()
 
     return ugraph ##remove me
diff --git a/utensor_cgen/transformer/pipline.py b/utensor_cgen/transformer/pipline.py
index 4600ff0d..c305dfbe 100644
--- a/utensor_cgen/transformer/pipline.py
+++ b/utensor_cgen/transformer/pipline.py
@@ -8,6 +8,7 @@
 from .quantize import QuantizeTransformer
 from .graph_viz import GraphVizTransformer
 from .linear_reoder import Linear_Reorder_Transformer
+from .conv_pool import CONV_POOL_Transformer
 
 class TransformerPipeline(object):
 
@@ -22,6 +23,7 @@ class TransformerPipeline(object):
     IdOpRemoveOptimizer.METHOD_NAME: IdOpRemoveOptimizer,
     GraphVizTransformer.METHOD_NAME: GraphVizTransformer,
     Linear_Reorder_Transformer.METHOD_NAME: Linear_Reorder_Transformer,
+    CONV_POOL_Transformer.METHOD_NAME: CONV_POOL_Transformer,
   }
 
   def __init__(self, methods):

From d267577a451b6e6efa019d89d9a0ef0f6b529ee8 Mon Sep 17 00:00:00 2001
From: Neil Tan <neil.c.tan@gmail.com>
Date: Sun, 5 May 2019 21:35:58 +0800
Subject: [PATCH 07/17] attaching op_attribs to the fused-node

---
 utensor_cgen/experimental/ugraph_builder.py |  5 ++++-
 utensor_cgen/transformer/conv_pool.py       | 10 ++++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/utensor_cgen/experimental/ugraph_builder.py b/utensor_cgen/experimental/ugraph_builder.py
index e457d214..f93b4980 100644
--- a/utensor_cgen/experimental/ugraph_builder.py
+++ b/utensor_cgen/experimental/ugraph_builder.py
@@ -367,7 +367,9 @@ def requantize_op(name, inputs, ugraph):
   ugraph.add_op(rqnt_op_info, False)
   return [value_out, min_out, max_out]
 
-def quantized_conv2d_pool_op(name, inputs, ugraph):
+
+#FIXME: The content in op_attr should be passed in as proper parameters
+def quantized_conv2d_pool_op(name, inputs, op_attr, ugraph):
   tmp_ugraph = uTensorGraph(output_nodes=[name])
   conv_out = TensorInfo(name=name + ":0",
                     op_name=name,
@@ -393,6 +395,7 @@ def quantized_conv2d_pool_op(name, inputs, ugraph):
                         output_tensors=[conv_out, min_out, max_out],
                         op_type="FusedConv2DMaxpool",
                         backend="tensorflow",
+                        op_attr=op_attr,
                         ugraph=tmp_ugraph)
 
   ugraph.add_op(quantized_conv2d_op_info, False)
diff --git a/utensor_cgen/transformer/conv_pool.py b/utensor_cgen/transformer/conv_pool.py
index 931d9111..d989ed50 100644
--- a/utensor_cgen/transformer/conv_pool.py
+++ b/utensor_cgen/transformer/conv_pool.py
@@ -70,7 +70,10 @@ def transform(self, ugraph):
         break
 
       fused_op_name = matcher['quant_convolution2d'].name + "_" + matcher['quantized_maxpool'].name
-      fused_op_out = quantized_conv2d_pool_op(fused_op_name, matcher['quant_convolution2d'].input_tensors, ugraph)
+      op_attr = dict()
+      op_attr['_utensor_conv'] = matcher['quant_convolution2d'].op_attr
+      op_attr['_utensor_pool'] = matcher['quantized_maxpool'].op_attr
+      fused_op_out = quantized_conv2d_pool_op(fused_op_name, matcher['quant_convolution2d'].input_tensors, op_attr, ugraph)
       matcher['quantized_maxpool:0'] = fused_op_out[0]
       matcher['quantized_maxpool:1'] = fused_op_out[1]
       matcher['quantized_maxpool:2'] = fused_op_out[2]
@@ -83,6 +86,5 @@ def transform(self, ugraph):
       topologic_order_graph(ugraph)
       graph_validate(ugraph)
     
-    viz_graph('matcher', True, ugraph)
-
-    return ugraph ##remove me
+    #viz_graph('matcher', True, ugraph)
+    return ugraph

From dde8b5264fe548720abba18c797deaea089afd44 Mon Sep 17 00:00:00 2001
From: Neil Tan <neil.c.tan@gmail.com>
Date: Wed, 8 May 2019 00:32:58 +0800
Subject: [PATCH 08/17] fix up

---
 utensor_cgen/backend/operators.py           | 31 +++++++++++++++++++--
 utensor_cgen/backend/snippets/_snippets.py  |  2 +-
 utensor_cgen/experimental/ugraph_builder.py |  4 +--
 3 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/utensor_cgen/backend/operators.py b/utensor_cgen/backend/operators.py
index 3c625177..e4c9b713 100644
--- a/utensor_cgen/backend/operators.py
+++ b/utensor_cgen/backend/operators.py
@@ -481,9 +481,9 @@ def __init__(self, op_info, **kwargs):
     in_dtype, filter_dtype = (op_info.input_tensors[0].dtype,
                               op_info.input_tensors[1].dtype)
     out_dtype = op_info.output_tensors[0].dtype
-    strides = op_info.op_attr["strides"].value.ints_value
-    ksize = op_info.op_attr["ksize"].value.ints_value
-    padding = op_info.op_attr["padding"].value.decode('utf8')
+    strides = op_info.op_attr['_utensor_conv']["strides"].value.ints_value
+    ksize = op_info.op_attr['_utensor_pool']["ksize"].value.ints_value
+    padding = op_info.op_attr['_utensor_conv']["padding"].value.decode('utf8')
     parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
                                     op_info.op_attr)
     ref_count = parser.get('ref_counts', [0])[0]
@@ -738,3 +738,28 @@ def __init__(self, op_info, **kwargs):
     ref_count = parser.get('ref_counts', [0])[0]
     to_eval = parser.get('to_eval', False)
     self._snippet = GatherOpSnippet(inputs, output, tf_dtype, ref_count, to_eval)
+
+
+@OperatorFactory.register
+class _QuantizedMulOperator(_Operator):
+
+  op_type = "QuantizedMul"
+
+  def __init__(self, op_info, **kwargs):
+    _Operator.__init__(self)
+    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
+    outputs = [tensor_info.name for tensor_info in op_info.output_tensors]
+    in_dtype, qout_dtype = (op_info.input_tensors[0].dtype,
+                            op_info.output_tensors[0].dtype)  #NT: why separate this out?
+                                                              #DB: I don't know, it's in the uTensor C code
+    out_dtypes = [tensor_info.dtype for tensor_info in op_info.output_tensors[1:]]
+    parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
+                                    op_info.op_attr)
+    ref_counts = parser.get('ref_counts', [])
+    to_eval = parser.get('to_eval', False)
+    self._snippet = QuantizedReluOpSnippet(inputs, outputs, in_dtype,
+                                           out_dtypes, qout_dtype, 
+                                           ref_counts, to_eval)
+    print("FIXME: _QuantizedMulOperator not implemented")
+    print("FIXME: _QuantizedMulOperator not implemented")
+    print("FIXME: _QuantizedMulOperator not implemented")
\ No newline at end of file
diff --git a/utensor_cgen/backend/snippets/_snippets.py b/utensor_cgen/backend/snippets/_snippets.py
index 7deb5616..35af7bef 100644
--- a/utensor_cgen/backend/snippets/_snippets.py
+++ b/utensor_cgen/backend/snippets/_snippets.py
@@ -665,7 +665,7 @@ def __init__(self, inputs, output, strides, ksize, padding,
     self.template_vars["padding"] = padding
     self.template_vars["to_eval"] = to_eval
 
-class QuantizedFusedConv2DOpMaxpoolSnippet(Snippet):
+class QuantizedFusedConv2DMaxpoolOpSnippet(Snippet):
   __template_name__ = "snippets/fused_conv2d_maxpool_op.cpp"
   __headers__ = set(['"uTensor/ops/MatrixOps.hpp"'])
 
diff --git a/utensor_cgen/experimental/ugraph_builder.py b/utensor_cgen/experimental/ugraph_builder.py
index f93b4980..c112a6ad 100644
--- a/utensor_cgen/experimental/ugraph_builder.py
+++ b/utensor_cgen/experimental/ugraph_builder.py
@@ -373,7 +373,7 @@ def quantized_conv2d_pool_op(name, inputs, op_attr, ugraph):
   tmp_ugraph = uTensorGraph(output_nodes=[name])
   conv_out = TensorInfo(name=name + ":0",
                     op_name=name,
-                    dtype=np.dtype('uint8'),
+                    dtype=inputs[0].dtype,
                     shape=inputs[0].shape, #FIXME: wrong shape most likely
                     ugraph=tmp_ugraph
                     )
@@ -393,7 +393,7 @@ def quantized_conv2d_pool_op(name, inputs, op_attr, ugraph):
   quantized_conv2d_op_info = OperationInfo(name=name,
                         input_tensors=inputs,
                         output_tensors=[conv_out, min_out, max_out],
-                        op_type="FusedConv2DMaxpool",
+                        op_type="QuantizedFusedConv2DMaxpool",
                         backend="tensorflow",
                         op_attr=op_attr,
                         ugraph=tmp_ugraph)

From 62308c17a52ddbdd0f9924e3273eac810d26da42 Mon Sep 17 00:00:00 2001
From: Neil Tan <neil.c.tan@gmail.com>
Date: Wed, 8 May 2019 00:39:35 +0800
Subject: [PATCH 09/17] fixing the types

---
 utensor_cgen/experimental/ugraph_builder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utensor_cgen/experimental/ugraph_builder.py b/utensor_cgen/experimental/ugraph_builder.py
index c112a6ad..91b523f7 100644
--- a/utensor_cgen/experimental/ugraph_builder.py
+++ b/utensor_cgen/experimental/ugraph_builder.py
@@ -281,7 +281,7 @@ def quantized_maxpool_op(name, inputs, ugraph):
   tmp_ugraph = uTensorGraph(output_nodes=[name])
   max_value_out = TensorInfo(name=name + ":0",
                     op_name=name,
-                    dtype=np.dtype('float32'),
+                    dtype=inputs[0].dtype,
                     shape=inputs[0].shape,
                     ugraph=tmp_ugraph
                     )

From c89b5e45d0af0aef513144f2f19586572405cb4a Mon Sep 17 00:00:00 2001
From: Neil Tan <neil.c.tan@gmail.com>
Date: Wed, 8 May 2019 23:04:34 +0800
Subject: [PATCH 10/17] generates some source files, weights file missing

---
 utensor_cgen/backend/operators.py          | 25 ----------------------
 utensor_cgen/backend/snippets/_snippets.py |  2 +-
 utensor_cgen/transformer/ns_transformer.py | 16 ++++++++++++++
 utensor_cgen/transformer/pipline.py        |  4 +++-
 4 files changed, 20 insertions(+), 27 deletions(-)

diff --git a/utensor_cgen/backend/operators.py b/utensor_cgen/backend/operators.py
index e4c9b713..a5d0209d 100644
--- a/utensor_cgen/backend/operators.py
+++ b/utensor_cgen/backend/operators.py
@@ -738,28 +738,3 @@ def __init__(self, op_info, **kwargs):
     ref_count = parser.get('ref_counts', [0])[0]
     to_eval = parser.get('to_eval', False)
     self._snippet = GatherOpSnippet(inputs, output, tf_dtype, ref_count, to_eval)
-
-
-@OperatorFactory.register
-class _QuantizedMulOperator(_Operator):
-
-  op_type = "QuantizedMul"
-
-  def __init__(self, op_info, **kwargs):
-    _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    outputs = [tensor_info.name for tensor_info in op_info.output_tensors]
-    in_dtype, qout_dtype = (op_info.input_tensors[0].dtype,
-                            op_info.output_tensors[0].dtype)  #NT: why separate this out?
-                                                              #DB: I don't know, it's in the uTensor C code
-    out_dtypes = [tensor_info.dtype for tensor_info in op_info.output_tensors[1:]]
-    parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
-                                    op_info.op_attr)
-    ref_counts = parser.get('ref_counts', [])
-    to_eval = parser.get('to_eval', False)
-    self._snippet = QuantizedReluOpSnippet(inputs, outputs, in_dtype,
-                                           out_dtypes, qout_dtype, 
-                                           ref_counts, to_eval)
-    print("FIXME: _QuantizedMulOperator not implemented")
-    print("FIXME: _QuantizedMulOperator not implemented")
-    print("FIXME: _QuantizedMulOperator not implemented")
\ No newline at end of file
diff --git a/utensor_cgen/backend/snippets/_snippets.py b/utensor_cgen/backend/snippets/_snippets.py
index 35af7bef..2314a7a6 100644
--- a/utensor_cgen/backend/snippets/_snippets.py
+++ b/utensor_cgen/backend/snippets/_snippets.py
@@ -20,7 +20,7 @@
            "QuantizedMulOpSnippet",
            "CreateTensorBinarySnippet", "WeightSnippet",
            "ContextGlobalArrayContainer", "QuantRangeForMultiplicationSnippet",
-           "FusedConv2DOpMaxpoolSnippet", "QuantizedFusedConv2DOpMaxpoolSnippet",
+           "FusedConv2DOpMaxpoolSnippet", "QuantizedFusedConv2DMaxpoolOpSnippet",
            "GatherOpSnippet",
            "CreateTensorRamSnippet", "Uint8Q7OriginSnippet"]
 
diff --git a/utensor_cgen/transformer/ns_transformer.py b/utensor_cgen/transformer/ns_transformer.py
index 90dbf1a1..2e7cb459 100644
--- a/utensor_cgen/transformer/ns_transformer.py
+++ b/utensor_cgen/transformer/ns_transformer.py
@@ -137,3 +137,19 @@ class BatchNormTransformer(Transformer):
   def transform(self, ugraph):
     # TODO: implement this!
     pass
+
+
+class FakeGatherV2Transformer(Transformer):
+  """Force converting GatherV2 op to Gather op
+  """
+  METHOD_NAME = 'FakeGatherV2'
+  KWARGS_NAMESCOPE = '_fake_gatherv2'
+  TARGET_NODENAME_PATTERN = re.compile(r'(GatherV2[_\w\d]*)/.*')
+
+  def transform(self, ugraph):
+    print("warning: force replacing GatherV2 with Gather")
+    for key, op in ugraph.ops_info.items():
+      if op.op_type == "GatherV2":
+        op.op_type = "Gather"
+        ugraph.ops_info[key] = op
+    return ugraph
\ No newline at end of file
diff --git a/utensor_cgen/transformer/pipline.py b/utensor_cgen/transformer/pipline.py
index c305dfbe..b97ec8d3 100644
--- a/utensor_cgen/transformer/pipline.py
+++ b/utensor_cgen/transformer/pipline.py
@@ -3,7 +3,8 @@
 from .base import Transformer
 from .cmsis_nn import CMSIS_NN_Transformer
 from .ns_transformer import (BatchNormTransformer, DropoutTransformer,
-                             InlineTransformer, BiasAddTransformer)
+                             InlineTransformer, BiasAddTransformer,
+                             FakeGatherV2Transformer)
 from .optimizer import IdOpRemoveOptimizer, RefCntOptimizer
 from .quantize import QuantizeTransformer
 from .graph_viz import GraphVizTransformer
@@ -24,6 +25,7 @@ class TransformerPipeline(object):
     GraphVizTransformer.METHOD_NAME: GraphVizTransformer,
     Linear_Reorder_Transformer.METHOD_NAME: Linear_Reorder_Transformer,
     CONV_POOL_Transformer.METHOD_NAME: CONV_POOL_Transformer,
+    FakeGatherV2Transformer.METHOD_NAME: FakeGatherV2Transformer
   }
 
   def __init__(self, methods):

From eab8562786ac284fa023f13f436fb2224b76b5c2 Mon Sep 17 00:00:00 2001
From: Neil Tan <neil.c.tan@gmail.com>
Date: Wed, 8 May 2019 23:46:15 +0800
Subject: [PATCH 11/17] inline transformer enabler hotfix

---
 utensor_cgen/backend/code_generator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utensor_cgen/backend/code_generator.py b/utensor_cgen/backend/code_generator.py
index dc0fc5ab..2fa22ff3 100644
--- a/utensor_cgen/backend/code_generator.py
+++ b/utensor_cgen/backend/code_generator.py
@@ -120,7 +120,7 @@ def formatter(name, kwargs):
         container.add_snippet(cmt_snippet)
     composer.add_snippet(container)
 
-    if 'inline' in self.trans_methods:
+    if ('inline', {}) in self.trans_methods:  
       _logger.info("Generate weight file: %s", weightheader_fname)
       with open(weightheader_fname, "w") as wf:
         wf.write('// Auto generated by utensor-cli\n\n')

From b082a5ac73b3a009b99cab059c31686843a932c6 Mon Sep 17 00:00:00 2001
From: Neil Tan <neil.c.tan@gmail.com>
Date: Thu, 9 May 2019 15:04:12 +0800
Subject: [PATCH 12/17] fixing GatherOpSnippet

---
 utensor_cgen/backend/snippets/templates/snippets/gather_op.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utensor_cgen/backend/snippets/templates/snippets/gather_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/gather_op.cpp
index fbb763e1..57bcf4ac 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/gather_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/gather_op.cpp
@@ -7,7 +7,7 @@ S_TENSOR {{sptr_name}};
     {% else %}
     ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}");
     {% endif %}
-    ctx.push(new GatherOp<{{in_dtype}}, {{out_dtype}}>(),
+    ctx.push(new GatherOp<{{in_dtype}}>(),
              { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" }, 
              { "{{output}}" });
     {% if to_eval %}

From bbcc9e37f48b63d128a8a4313c035f986797c1bb Mon Sep 17 00:00:00 2001
From: Michael Bartling <michael.bartling@arm.com>
Date: Mon, 10 Jun 2019 14:10:45 -0500
Subject: [PATCH 13/17] Fix Gather and Fused quantized conv maxpool snippets

---
 .gitignore                                           |  4 ++++
 utensor_cgen/backend/operators.py                    | 10 +++++-----
 utensor_cgen/backend/snippets/_snippets.py           | 12 +++++++-----
 .../snippets/quantized_fused_conv2d_maxpool_op.cpp   |  2 +-
 4 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/.gitignore b/.gitignore
index 6d40de30..bb03a7b6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,3 +15,7 @@ tests/*/cpp
 */*/models
 tests/deep_mlp/data 
 .vscode
+*.pyc
+.*.pyc
+*.swp
+.*.swp
diff --git a/utensor_cgen/backend/operators.py b/utensor_cgen/backend/operators.py
index a5d0209d..83062db9 100644
--- a/utensor_cgen/backend/operators.py
+++ b/utensor_cgen/backend/operators.py
@@ -477,10 +477,10 @@ class _QuantizedFusedConv2DMaxpoolOperator(_Operator):
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
     inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    output = op_info.output_tensors[0].name
+    outputs = [tensor_info.name for tensor_info in op_info.output_tensors]
     in_dtype, filter_dtype = (op_info.input_tensors[0].dtype,
                               op_info.input_tensors[1].dtype)
-    out_dtype = op_info.output_tensors[0].dtype
+    out_dtypes = [tensor_info.dtype for tensor_info in op_info.output_tensors]
     strides = op_info.op_attr['_utensor_conv']["strides"].value.ints_value
     ksize = op_info.op_attr['_utensor_pool']["ksize"].value.ints_value
     padding = op_info.op_attr['_utensor_conv']["padding"].value.decode('utf8')
@@ -488,8 +488,8 @@ def __init__(self, op_info, **kwargs):
                                     op_info.op_attr)
     ref_count = parser.get('ref_counts', [0])[0]
     to_eval = parser.get('to_eval', False)
-    self._snippet = QuantizedFusedConv2DMaxpoolOpSnippet(inputs, output, strides, ksize, padding,
-                                     in_dtype=in_dtype, filter_dtype=filter_dtype, out_dtype=out_dtype,
+    self._snippet = QuantizedFusedConv2DMaxpoolOpSnippet(inputs, outputs, strides, ksize, padding,
+                                     in_dtype=in_dtype, filter_dtype=filter_dtype, out_dtypes=out_dtypes,
                                      ref_count=ref_count, to_eval=to_eval)
 
 @OperatorFactory.register
@@ -726,7 +726,7 @@ def __init__(self, op_info, **kwargs):
 @OperatorFactory.register
 class _GatherOperator(_Operator):
 
-  op_type = "Gather" # tf op type
+  op_type = "GatherV2" # tf op type
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
diff --git a/utensor_cgen/backend/snippets/_snippets.py b/utensor_cgen/backend/snippets/_snippets.py
index 2314a7a6..f4e65169 100644
--- a/utensor_cgen/backend/snippets/_snippets.py
+++ b/utensor_cgen/backend/snippets/_snippets.py
@@ -666,21 +666,23 @@ def __init__(self, inputs, output, strides, ksize, padding,
     self.template_vars["to_eval"] = to_eval
 
 class QuantizedFusedConv2DMaxpoolOpSnippet(Snippet):
-  __template_name__ = "snippets/fused_conv2d_maxpool_op.cpp"
+  __template_name__ = "snippets/quantized_fused_conv2d_maxpool_op.cpp"
   __headers__ = set(['"uTensor/ops/MatrixOps.hpp"'])
 
-  def __init__(self, inputs, output, strides, ksize, padding,
-               in_dtype, filter_dtype, out_dtype,
+  def __init__(self, inputs, outputs, strides, ksize, padding,
+               in_dtype, filter_dtype, out_dtypes,
                ref_count=0,
                to_eval=False):
     Snippet.__init__(self)
     if ref_count:
       self.template_vars["ref_count"] = ref_count
+    print(outputs)
+    print(out_dtypes)
     self.template_vars["inputs"] = inputs
-    self.template_vars["output"] = output
+    self.template_vars["outputs"] = outputs
     self.template_vars["in_dtype"] = NP_TYPES_MAP[in_dtype].tensor_type_str
     self.template_vars["filter_dtype"] = NP_TYPES_MAP[filter_dtype].tensor_type_str
-    self.template_vars["out_dtype"] = NP_TYPES_MAP[out_dtype].tensor_type_str
+    self.template_vars["out_dtypes"] = [NP_TYPES_MAP[out_dtype].tensor_type_str for out_dtype in out_dtypes]
     self.template_vars["strides"] = strides
     self.template_vars["ksize"] = ksize
     self.template_vars["padding"] = padding
diff --git a/utensor_cgen/backend/snippets/templates/snippets/quantized_fused_conv2d_maxpool_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/quantized_fused_conv2d_maxpool_op.cpp
index 7aa4f07a..1161e3df 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/quantized_fused_conv2d_maxpool_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/quantized_fused_conv2d_maxpool_op.cpp
@@ -8,7 +8,7 @@
     ctx.add(new RamTensor<{{out_dtypes[1]}}>({1}), "{{outputs[1]}}");
     ctx.add(new RamTensor<{{out_dtypes[2]}}>({1}), "{{outputs[2]}}");
     {% endif %}
-    ctx.push(new QuantizedFusedConvMaxpoolOp<{{in_dtype}}, {{filter_dtype}}, {{out_dtype}}>({ {% for s in strides[:-1]%}{{s}}, {%endfor%}{{strides[-1]}} }, { {% for s in ksize[:-1]%}{{s}}, {%endfor%}{{ksize[-1]}} },{{padding}}),
+    ctx.push(new QuantizedFusedConvMaxpoolOp<{{in_dtype}}, {{filter_dtype}}, {{out_dtypes[0]}}>({ {% for s in strides[:-1]%}{{s}}, {%endfor%}{{strides[-1]}} }, { {% for s in ksize[:-1]%}{{s}}, {%endfor%}{{ksize[-1]}} },{{padding}}),
              { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" },
              { {% for tname in outputs[:-1]%}"{{tname}}", {%endfor%}"{{outputs[-1]}}" });
     {% if to_eval %}

From 5b1e183e212bc756607ee27bceb40e6ac109d31a Mon Sep 17 00:00:00 2001
From: Michael Bartling <michael.bartling@arm.com>
Date: Tue, 25 Jun 2019 13:33:17 -0500
Subject: [PATCH 14/17] Remove C strings from utensor snippets

This saves a buttload of space
---
 utensor_cgen/backend/operators.py             | 274 +++++++++++-------
 utensor_cgen/backend/snippets/_snippets.py    |  38 ++-
 .../templates/containers/weight_header.hpp    |   1 +
 .../snippets/templates/snippets/add_op.cpp    |  10 +-
 .../snippets/templates/snippets/argmax_op.cpp |  12 +-
 .../templates/snippets/cmsis_nn_fc_op.cpp     |  20 +-
 .../snippets/cmsis_uint8q7origin_op.cpp       |  10 +-
 .../snippets/templates/snippets/conv2d_op.cpp |   8 +-
 .../snippets/create_tensor_binary.cpp         |   6 +-
 .../templates/snippets/create_tensor_idx.cpp  |  12 +-
 .../templates/snippets/create_tensor_new.cpp  |   6 +-
 .../templates/snippets/dequantize_op.cpp      |  12 +-
 .../snippets/fused_conv2d_maxpool_op.cpp      |   8 +-
 .../snippets/templates/snippets/gather_op.cpp |   8 +-
 .../snippets/templates/snippets/matmul_op.cpp |  12 +-
 .../snippets/templates/snippets/max_op.cpp    |  12 +-
 .../templates/snippets/max_pool_op.cpp        |  12 +-
 .../snippets/templates/snippets/min_op.cpp    |  12 +-
 .../snippets/templates/snippets/pack_op.cpp   |  12 +-
 .../snippets/templates/snippets/qadd_op.cpp   |  18 +-
 .../templates/snippets/qconv2d_op.cpp         |  18 +-
 .../templates/snippets/qmatmul_op.cpp         |  20 +-
 .../templates/snippets/qmax_pool_op.cpp       |  20 +-
 .../snippets/templates/snippets/qmul_op.cpp   |  18 +-
 .../snippets/templates/snippets/qrelu_op.cpp  |  18 +-
 .../templates/snippets/qreshape_op.cpp        |  18 +-
 .../templates/snippets/quantV2_op.cpp         |  20 +-
 .../quant_range_for_multiplication_op.cpp     |  12 +-
 .../quantized_fused_conv2d_maxpool_op.cpp     |  16 +-
 .../snippets/templates/snippets/relu_op.cpp   |  12 +-
 .../templates/snippets/requant_op.cpp         |  20 +-
 .../templates/snippets/requant_range_op.cpp   |  16 +-
 .../templates/snippets/reshape_op.cpp         |  12 +-
 .../snippets/templates/snippets/shape_op.cpp  |  12 +-
 .../templates/snippets/softmax_op.cpp         |  12 +-
 .../templates/snippets/strided_slice_op.cpp   |  12 +-
 .../snippets/tensor_string_reference.hpp      |   3 +
 .../templates/snippets/weight_snippet.hpp     |   2 +-
 38 files changed, 433 insertions(+), 331 deletions(-)
 create mode 100644 utensor_cgen/backend/snippets/templates/snippets/tensor_string_reference.hpp

diff --git a/utensor_cgen/backend/operators.py b/utensor_cgen/backend/operators.py
index 83062db9..abbeca43 100644
--- a/utensor_cgen/backend/operators.py
+++ b/utensor_cgen/backend/operators.py
@@ -10,6 +10,21 @@
 
 from .snippets import *  # pylint: disable=W0401,W0614
 
+def add_tensor_string_reference(sref_name, **kwargs):
+  sref_snippet = TensorStringReferenceSnippet(sref_name)
+  weight_container = kwargs['weight_container']
+  weight_container.add_snippet(sref_snippet)
+
+def add_tensor_string_references(inputs, outputs, **kwargs):
+  def add_things(mthings, **kwargs):
+    if isinstance(mthings, list):
+      for sref_name in mthings:
+        add_tensor_string_reference(sref_name, kwargs)
+    else:
+      add_tensor_string_reference(mthings, kwargs)
+  add_things(inputs, kwargs)
+  add_things(outputs, kwargs)
+
 
 class OperatorFactory():
   # Can easily do something smarter
@@ -52,14 +67,16 @@ class _AddOperator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    output = op_info.output_tensors[0].name
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    output = prepare_string_ref_name(op_info.output_tensors[0].name)
     tf_dtype = op_info.input_tensors[0].dtype
     parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE, 
                                     op_info.op_attr)
     ref_count = parser.get('ref_counts', [0])[0]
     to_eval = parser.get('to_eval', False)
     self._snippet = AddOpSnippet(inputs, output, tf_dtype, ref_count, to_eval)
+    
+    add_tensor_string_references(inputs, output, kwargs)
 
 
 @OperatorFactory.register
@@ -69,7 +86,7 @@ class _ArgMaxOperator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
     out_tensor_info = op_info.output_tensors[0]
     output, out_dtype = out_tensor_info.name, out_tensor_info.dtype
     in_dtype = op_info.input_tensors[0].dtype
@@ -79,6 +96,7 @@ def __init__(self, op_info, **kwargs):
     to_eval = parser.get('to_eval', False)
     self._snippet = ArgMaxOpSnippet(inputs, output, in_dtype, out_dtype, ref_count, to_eval)
 
+    add_tensor_string_references(inputs, output, kwargs)
 
 @OperatorFactory.register
 class _DequantizeOperator(_Operator):
@@ -87,7 +105,7 @@ class _DequantizeOperator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
     out_tensor_info = op_info.output_tensors[0]
     output, out_dtype = out_tensor_info.name, out_tensor_info.dtype
     parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE, 
@@ -96,6 +114,7 @@ def __init__(self, op_info, **kwargs):
     to_eval = parser.get('to_eval', False)
     self._snippet = DequantizeOpSnippet(inputs, output, out_dtype, ref_count, to_eval)
 
+    add_tensor_string_references(inputs, output, kwargs)
 
 @OperatorFactory.register
 class _MaxOperator(_Operator):
@@ -104,7 +123,7 @@ class _MaxOperator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
     out_tensor_info = op_info.output_tensors[0]
     output, out_dtype, out_shape = (out_tensor_info.name,
                                     out_tensor_info.dtype,
@@ -117,6 +136,8 @@ def __init__(self, op_info, **kwargs):
     ref_count = parser.get('ref_counts', [0])[0]
     to_eval = parser.get('to_eval', False)
     self._snippet = MaxOpSnippet(inputs, output, out_dtype, out_shape, ref_count, to_eval)
+    
+    add_tensor_string_references(inputs, output, kwargs)
 
 @OperatorFactory.register
 class _MaxPool(_Operator):
@@ -125,8 +146,8 @@ class _MaxPool(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    output = op_info.output_tensors[0].name
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    output = prepare_string_ref_name(op_info.output_tensors[0].name)
     dtype = op_info.output_tensors[0].dtype
     ksize = op_info.op_attr['ksize'].value.ints_value
     strides = op_info.op_attr['strides'].value.ints_value
@@ -138,6 +159,8 @@ def __init__(self, op_info, **kwargs):
     self._snippet = MaxPoolSnippet(inputs, output, dtype,
                                             ksize, strides, padding,
                                             ref_count, to_eval)
+    
+    add_tensor_string_references(inputs, output, kwargs)
 
 
 @OperatorFactory.register
@@ -147,8 +170,8 @@ class _QuantizedMaxPool(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    outputs = [tensor_info.name for tensor_info in op_info.output_tensors]
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    outputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.output_tensors]
     dtype = op_info.output_tensors[0].dtype
     ksize = op_info.op_attr['ksize'].value.ints_value
     strides = op_info.op_attr['strides'].value.ints_value
@@ -161,6 +184,7 @@ def __init__(self, op_info, **kwargs):
                                             ksize, strides, padding,
                                             ref_counts, to_eval)
 
+    add_tensor_string_references(inputs, outputs, kwargs)
 
 @OperatorFactory.register
 class _MinOperator(_Operator):
@@ -169,7 +193,7 @@ class _MinOperator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
     out_info = op_info.output_tensors[0]
     output, out_dtype, out_shape = (out_info.name,
                                     out_info.dtype,
@@ -183,6 +207,7 @@ def __init__(self, op_info, **kwargs):
     to_eval = parser.get('to_eval', False)
     self._snippet = MinOpSnippet(inputs, output, out_dtype, out_shape, ref_count, to_eval)
 
+    add_tensor_string_references(inputs, output, kwargs)
 
 @OperatorFactory.register
 class _QuantizeV2Operator(_Operator):
@@ -191,8 +216,8 @@ class _QuantizeV2Operator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    outputs = [tensor_info.name for tensor_info in op_info.output_tensors]
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    outputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.output_tensors]
     out_dtype = op_info.output_tensors[0].dtype
     parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
                                     op_info.op_attr)
@@ -200,6 +225,7 @@ def __init__(self, op_info, **kwargs):
     to_eval = parser.get('to_eval', False)
     self._snippet = QuantizeV2OpSnippet(inputs, outputs, out_dtype, ref_counts, to_eval)
 
+    add_tensor_string_references(inputs, outputs, kwargs)
 
 @OperatorFactory.register
 class _MatMulOperator(_Operator):
@@ -208,8 +234,8 @@ class _MatMulOperator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    output = op_info.output_tensors[0].name
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    output = prepare_string_ref_name(op_info.output_tensors[0].name)
     in_tensor_info = op_info.input_tensors[0]
     x_dtype, w_dtype, out_dtype = (op_info.input_tensors[0].dtype,
                                    op_info.input_tensors[1].dtype,
@@ -221,6 +247,8 @@ def __init__(self, op_info, **kwargs):
     self._snippet = MatMulOpSnippet(inputs, output,
                                     x_dtype, w_dtype, out_dtype,
                                     ref_count, to_eval)
+    
+    add_tensor_string_references(inputs, output, kwargs)
 
 @OperatorFactory.register
 class _QuantizedMatMulOperator(_Operator):
@@ -229,8 +257,8 @@ class _QuantizedMatMulOperator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    outputs = [tensor_info.name for tensor_info in op_info.output_tensors]
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    outputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.output_tensors]
     in_tensor_info = op_info.input_tensors[0]
     x_dtype, w_dtype, out_dtype = (op_info.input_tensors[0].dtype,
                                    op_info.input_tensors[1].dtype,
@@ -243,6 +271,8 @@ def __init__(self, op_info, **kwargs):
                                              x_dtype, w_dtype, out_dtype, 
                                              ref_counts, to_eval)
 
+    add_tensor_string_references(inputs, outputs, kwargs)
+
 @OperatorFactory.register
 class _ReluOperator(_Operator):
 
@@ -250,8 +280,8 @@ class _ReluOperator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    output = op_info.output_tensors[0].name
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    output = prepare_string_ref_name(op_info.output_tensors[0].name)
     in_dtype, out_dtype = (op_info.input_tensors[0].dtype,
                             op_info.output_tensors[0].dtype)  #NT: why separate this out?
                                                               #DB: I don't know, it's in the uTensor C code
@@ -263,6 +293,7 @@ def __init__(self, op_info, **kwargs):
                                            out_dtype,
                                            ref_count, to_eval)
 
+    add_tensor_string_references(inputs, output, kwargs)
 
 @OperatorFactory.register
 class _QuantizedReluOperator(_Operator):
@@ -271,8 +302,8 @@ class _QuantizedReluOperator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    outputs = [tensor_info.name for tensor_info in op_info.output_tensors]
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    outputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.output_tensors]
     in_dtype, qout_dtype = (op_info.input_tensors[0].dtype,
                             op_info.output_tensors[0].dtype)  #NT: why separate this out?
                                                               #DB: I don't know, it's in the uTensor C code
@@ -285,6 +316,7 @@ def __init__(self, op_info, **kwargs):
                                            out_dtypes, qout_dtype, 
                                            ref_counts, to_eval)
 
+    add_tensor_string_references(inputs, outputs, kwargs)
 
 @OperatorFactory.register
 class _QuantizedAddOperator(_Operator):
@@ -293,8 +325,8 @@ class _QuantizedAddOperator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    outputs = [tensor_info.name for tensor_info in op_info.output_tensors]
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    outputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.output_tensors]
     x_dtype, w_dtype, out_dtype = (op_info.input_tensors[0].dtype,
                                    op_info.input_tensors[1].dtype,
                                    op_info.output_tensors[0].dtype)
@@ -306,6 +338,7 @@ def __init__(self, op_info, **kwargs):
                                           x_dtype, w_dtype, out_dtype, 
                                           ref_counts, to_eval)
 
+    add_tensor_string_references(inputs, outputs, kwargs)
     
 @OperatorFactory.register
 class _QuantizedMulOperator(_Operator):
@@ -314,8 +347,8 @@ class _QuantizedMulOperator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    outputs = [tensor_info.name for tensor_info in op_info.output_tensors]
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    outputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.output_tensors]
     x_dtype, w_dtype, out_dtype = (op_info.input_tensors[0].dtype,
                                    op_info.input_tensors[1].dtype,
                                    op_info.output_tensors[0].dtype)
@@ -327,6 +360,7 @@ def __init__(self, op_info, **kwargs):
                                           x_dtype, w_dtype, out_dtype, 
                                           ref_counts, to_eval)
 
+    add_tensor_string_references(inputs, outputs, kwargs)
 
 @OperatorFactory.register
 class _RequantizationRangeOperator(_Operator):
@@ -335,8 +369,8 @@ class _RequantizationRangeOperator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    outputs = [tensor_info.name for tensor_info in op_info.output_tensors]
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    outputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.output_tensors]
     out_dtype = op_info.output_tensors[0].dtype
     parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
                                     op_info.op_attr)
@@ -344,6 +378,7 @@ def __init__(self, op_info, **kwargs):
     to_eval = parser.get('to_eval', False)
     self._snippet = RequantizationRangeOpSnippet(inputs, outputs, out_dtype, 
                                                  ref_counts, to_eval)
+    add_tensor_string_references(inputs, outputs, kwargs)
 
 
 @OperatorFactory.register
@@ -352,8 +387,8 @@ class _RequantizeOperator(_Operator):
   op_type = "Requantize"
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    outputs = [tensor_info.name for tensor_info in op_info.output_tensors]
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    outputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.output_tensors]
     qout_dtype = op_info.output_tensors[0].dtype
     range_dtype = op_info.output_tensors[1].dtype
     parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
@@ -364,6 +399,7 @@ def __init__(self, op_info, **kwargs):
                                         qout_dtype, range_dtype,
                                         ref_counts, to_eval)
 
+    add_tensor_string_references(inputs, outputs, kwargs)
 
 @OperatorFactory.register
 class _ReshapeOperator(_Operator):
@@ -372,14 +408,16 @@ class _ReshapeOperator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    output = op_info.output_tensors[0].name
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    output = prepare_string_ref_name(op_info.output_tensors[0].name)
     parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
                                     op_info.op_attr)
     ref_count = parser.get('ref_counts', [0])[0]
     to_eval = parser.get('to_eval', False)
     dtype = op_info.input_tensors[0].dtype
     self._snippet = ReshapeOpSnippet(inputs, output, dtype, ref_count, to_eval)
+    
+    add_tensor_string_references(inputs, output, kwargs)
 
 
 @OperatorFactory.register
@@ -389,8 +427,8 @@ class _QuantizedReshapeOperator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    outputs = [tensor_info.name for tensor_info in op_info.output_tensors]
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    outputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.output_tensors]
     parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
                                     op_info.op_attr)
     ref_counts = parser.get('ref_counts', [])
@@ -400,6 +438,7 @@ def __init__(self, op_info, **kwargs):
                                               ref_counts=ref_counts,
                                               to_eval=to_eval)
 
+    add_tensor_string_references(inputs, outputs, kwargs)
 @OperatorFactory.register
 class _CMSIS_NN_FCOperator(_Operator):
 
@@ -409,8 +448,8 @@ def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
     #import pdb; pdb.set_trace()
     # Note order of inputs/outputs is preserved
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    output = op_info.output_tensors[0].name
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    output = prepare_string_ref_name(op_info.output_tensors[0].name)
     out_dtype = op_info.output_tensors[0].dtype
     in_dtypes = [tensor_info.dtype for tensor_info in op_info.input_tensors]
     assert (op_info.input_tensors[0].shape[1] == None or op_info.input_tensors[0].shape[1] == 1)
@@ -425,6 +464,7 @@ def __init__(self, op_info, **kwargs):
                                               out_dtype=out_dtype,
                                               to_eval=to_eval)
 
+    add_tensor_string_references(inputs, output, kwargs)
 @OperatorFactory.register
 class _Conv2DOperator(_Operator):
 
@@ -432,8 +472,8 @@ class _Conv2DOperator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    output = op_info.output_tensors[0].name
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    output = prepare_string_ref_name(op_info.output_tensors[0].name)
     in_dtype, filter_dtype = (op_info.input_tensors[0].dtype,
                               op_info.input_tensors[1].dtype)
     out_dtype = op_info.output_tensors[0].dtype
@@ -446,6 +486,7 @@ def __init__(self, op_info, **kwargs):
     self._snippet = Conv2DOpSnippent(inputs, output, strides, padding,
                                      in_dtype=in_dtype, filter_dtype=filter_dtype, out_dtype=out_dtype,
                                      ref_count=ref_count, to_eval=to_eval)
+    add_tensor_string_references(inputs, output, kwargs)
 @OperatorFactory.register
 class _FusedConv2DMaxpoolOperator(_Operator):
 
@@ -453,8 +494,8 @@ class _FusedConv2DMaxpoolOperator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    output = op_info.output_tensors[0].name
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    output = prepare_string_ref_name(op_info.output_tensors[0].name)
     in_dtype, filter_dtype = (op_info.input_tensors[0].dtype,
                               op_info.input_tensors[1].dtype)
     out_dtype = op_info.output_tensors[0].dtype
@@ -469,6 +510,8 @@ def __init__(self, op_info, **kwargs):
                                      in_dtype=in_dtype, filter_dtype=filter_dtype, out_dtype=out_dtype,
                                      ref_count=ref_count, to_eval=to_eval)
 
+    add_tensor_string_references(inputs, output, kwargs)
+
 @OperatorFactory.register
 class _QuantizedFusedConv2DMaxpoolOperator(_Operator):
 
@@ -476,8 +519,8 @@ class _QuantizedFusedConv2DMaxpoolOperator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    outputs = [tensor_info.name for tensor_info in op_info.output_tensors]
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    outputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.output_tensors]
     in_dtype, filter_dtype = (op_info.input_tensors[0].dtype,
                               op_info.input_tensors[1].dtype)
     out_dtypes = [tensor_info.dtype for tensor_info in op_info.output_tensors]
@@ -492,6 +535,8 @@ def __init__(self, op_info, **kwargs):
                                      in_dtype=in_dtype, filter_dtype=filter_dtype, out_dtypes=out_dtypes,
                                      ref_count=ref_count, to_eval=to_eval)
 
+    add_tensor_string_references(inputs, outputs, kwargs)
+
 @OperatorFactory.register
 class _Conv2DQuantOperator(_Operator):
 
@@ -499,8 +544,8 @@ class _Conv2DQuantOperator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    outputs = [tensor_info.name for tensor_info in op_info.output_tensors]
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    outputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.output_tensors]
     in_dtype, filter_dtype = (op_info.input_tensors[0].dtype,
                               op_info.input_tensors[1].dtype)
     out_dtypes = [tensor_info.dtype for tensor_info in op_info.output_tensors]
@@ -513,6 +558,7 @@ def __init__(self, op_info, **kwargs):
     self._snippet = Conv2DQuantOpSnippent(inputs, outputs, strides, padding,
                                      in_dtype=in_dtype, filter_dtype=filter_dtype, out_dtypes=out_dtypes,
                                      ref_counts=ref_counts, to_eval=to_eval)
+    add_tensor_string_references(inputs, outputs, kwargs)
 @OperatorFactory.register
 class _Uint8Q7OriginOperator(_Operator):
 
@@ -520,13 +566,14 @@ class _Uint8Q7OriginOperator(_Operator):
   
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    output = op_info.output_tensors[0].name
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    output = prepare_string_ref_name(op_info.output_tensors[0].name)
     parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE, 
                                     op_info.op_attr)
     ref_count = parser.get('ref_counts', [0])[0]
     to_eval = parser.get('to_eval', False)
     self._snippet = Uint8Q7OriginSnippet(inputs, output, ref_count, to_eval)
+    add_tensor_string_references(inputs, output, kwargs)
 
 #hard coding to uint8_t uint8_t int32_t for now
 @OperatorFactory.register
@@ -536,8 +583,8 @@ class _QuantRangeForMultiplication_u8_u8_int32_Operator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    outputs = [tensor_info.name for tensor_info in op_info.output_tensors]
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    outputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.output_tensors]
     if op_info.output_tensors[0].dtype != op_info.output_tensors[1].dtype:
       assert "output tensors must have the same data type"
     #output_type = op_info.output_tensors[0].dtype
@@ -548,6 +595,7 @@ def __init__(self, op_info, **kwargs):
     ref_counts = parser.get('ref_counts', [])
     to_eval = parser.get('to_eval', False)
     self._snippet = QuantRangeForMultiplicationSnippet(inputs, outputs, output_type, ref_counts, to_eval)
+    add_tensor_string_references(inputs, outputs, kwargs)
 
 @OperatorFactory.register
 class _InlineOperator(_Operator):
@@ -564,6 +612,7 @@ def __init__(self, op_info, **kwargs):
     ref_count = parser.get('ref_counts', [0])[0]
     pre_tname = self._prepare_tensor_name(out_tname)
     inline_tname = self._prepare_inline_array_name(out_tname)
+    out_tname = prepare_string_ref_name(out_tname)
     value = op_info.op_attr['value'].value.np_array.flatten()
     self._snippet = CreateTensorBinarySnippet(out_tname, tensor_shape=tensor_shape,
                                          tf_dtype=out_dtype,
@@ -577,6 +626,7 @@ def __init__(self, op_info, **kwargs):
                                   value)
     weight_container = kwargs['weight_container']                             
     weight_container.add_snippet(weight_snippet)
+    add_tensor_string_references([], out_tname, kwargs)
 
   def _prepare_tensor_name(self, tensor_name):
     prepared = tensor_name.replace(":", "_").replace("/", "_")
@@ -587,6 +637,7 @@ def _prepare_inline_array_name(self, tensor_name):
     preapred = "inline_{}".format(inline)
     return preapred
 
+
 @OperatorFactory.register
 class _ConstOperator(_Operator):
 
@@ -604,6 +655,7 @@ def __init__(self, op_info, **kwargs):
     idx_dir = kwargs['idx_dir']
     embed_data_dir = kwargs.get('embed_data_dir',
                                 os.path.join("/fs", idx_dir))
+    out_tname = prepare_string_ref_name(out_tname)
     self._snippet = CreateTensorIdxSnippet(embed_data_dir, out_tname,
                                            idx_fname=idx_fname,
                                            np_dtype=out_dtype,
@@ -611,6 +663,7 @@ def __init__(self, op_info, **kwargs):
     idx_path = os.path.join(idx_dir, idx_fname)
     value = op_info.op_attr['value'].value
     self._tf_save_data(idx_path, value)
+    add_tensor_string_references([], out_tname, kwargs)
 
   def _tf_prepare_tensor_name(self, tensor_name):
     """Replace all ':' and '/' with '_' in a given tensor name
@@ -640,88 +693,96 @@ def __init__(self, op_info, **kwargs):
                                     op_info.op_attr)
     ref_count = parser.get('ref_counts', [0])[0]
     pre_tname = self._prepare_tensor_name(out_tname)
+    out_tname = prepare_string_ref_name(out_tname)
     #inline_tname = self._prepare_inline_array_name(out_tname)
     #value = op_info.op_attr['value'].value.np_array.flatten()
     self._snippet = CreateTensorRamSnippet(out_tname, tensor_shape=tensor_shape,
                                          tf_dtype=out_dtype,
                                          sptr_name=pre_tname,
                                          ref_count=ref_count)
+    
+    add_tensor_string_references([], out_tname, kwargs)
+
   def _prepare_tensor_name(self, tensor_name):
     prepared = tensor_name.replace(":", "_").replace("/", "_")
     return prepared
 
 @OperatorFactory.register
 class _ShapeOperator(_Operator):
-    op_type = "Shape"
+  op_type = "Shape"
 
-    def __init__(self, op_info, **kwargs):
-        _Operator.__init__(self)
-        inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-        output = op_info.output_tensors[0].name
-        parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
-                                        op_info.op_attr)
-        ref_count = parser.get('ref_counts', [0])[0]
-        to_eval = parser.get('to_eval', True)
-        out_dtype = op_info.output_tensors[0].dtype
-        self._snippet = ShapeOpSnippet(inputs, output, out_dtype, ref_count, to_eval)
+  def __init__(self, op_info, **kwargs):
+    _Operator.__init__(self)
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    output = prepare_string_ref_name(op_info.output_tensors[0].name)
+    parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
+                                    op_info.op_attr)
+    ref_count = parser.get('ref_counts', [0])[0]
+    to_eval = parser.get('to_eval', True)
+    out_dtype = op_info.output_tensors[0].dtype
+    self._snippet = ShapeOpSnippet(inputs, output, out_dtype, ref_count, to_eval)
+    add_tensor_string_references(inputs, output, kwargs)
 
 
 @OperatorFactory.register
 class _StridedSliceOperator(_Operator):
-    op_type = "StridedSlice"
-
-    def __init__(self, op_info, **kwargs):
-        _Operator.__init__(self)
-        inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-        output = op_info.output_tensors[0].name
-        parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
-                                        op_info.op_attr)
-        ref_count = parser.get('ref_counts', [0])[0]
-        to_eval = parser.get('to_eval', True)
-        dtype = op_info.input_tensors[0].dtype
-        out_dtype = op_info.output_tensors[0].dtype
-        begin_mask = op_info.op_attr['begin_mask'].value
-        ellipsis_mask = op_info.op_attr['ellipsis_mask'].value
-        end_mask = op_info.op_attr['end_mask'].value
-        new_axis_mask = op_info.op_attr['begin_mask'].value
-        shrink_axis_mask = op_info.op_attr['shrink_axis_mask'].value
-        self._snippet = StridedSliceOpSnippet(inputs, output, dtype, out_dtype,
-                                              begin_mask, ellipsis_mask, end_mask,
-                                              new_axis_mask, shrink_axis_mask,
-                                              ref_count, to_eval)
+  op_type = "StridedSlice"
+
+  def __init__(self, op_info, **kwargs):
+    _Operator.__init__(self)
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    output = prepare_string_ref_name(op_info.output_tensors[0].name)
+    parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
+                                    op_info.op_attr)
+    ref_count = parser.get('ref_counts', [0])[0]
+    to_eval = parser.get('to_eval', True)
+    dtype = op_info.input_tensors[0].dtype
+    out_dtype = op_info.output_tensors[0].dtype
+    begin_mask = op_info.op_attr['begin_mask'].value
+    ellipsis_mask = op_info.op_attr['ellipsis_mask'].value
+    end_mask = op_info.op_attr['end_mask'].value
+    new_axis_mask = op_info.op_attr['begin_mask'].value
+    shrink_axis_mask = op_info.op_attr['shrink_axis_mask'].value
+    self._snippet = StridedSliceOpSnippet(inputs, output, dtype, out_dtype,
+                                          begin_mask, ellipsis_mask, end_mask,
+                                          new_axis_mask, shrink_axis_mask,
+                                          ref_count, to_eval)
+    add_tensor_string_references(inputs, output, kwargs)
 
 @OperatorFactory.register
 class _PackOperator(_Operator):
-    op_type = "Pack"
-
-    def __init__(self, op_info, **kwargs):
-        _Operator.__init__(self)
-        inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-        output = op_info.output_tensors[0].name
-        parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
-                                        op_info.op_attr)
-        ref_count = parser.get('ref_counts', [0])[0]
-        to_eval = parser.get('to_eval', True)
-        dtype = op_info.input_tensors[0].dtype
-        out_dtype = op_info.output_tensors[0].dtype
-        N = op_info.op_attr['N'].value
-        axis = op_info.op_attr['axis'].value
-        self._snippet = PackOpSnippet(inputs, output, dtype, out_dtype, N, axis, ref_count, to_eval)
+  op_type = "Pack"
+
+  def __init__(self, op_info, **kwargs):
+    _Operator.__init__(self)
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    output = prepare_string_ref_name(op_info.output_tensors[0].name)
+    parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
+                                    op_info.op_attr)
+    ref_count = parser.get('ref_counts', [0])[0]
+    to_eval = parser.get('to_eval', True)
+    dtype = op_info.input_tensors[0].dtype
+    out_dtype = op_info.output_tensors[0].dtype
+    N = op_info.op_attr['N'].value
+    axis = op_info.op_attr['axis'].value
+    self._snippet = PackOpSnippet(inputs, output, dtype, out_dtype, N, axis, ref_count, to_eval)
+    add_tensor_string_references(inputs, output, kwargs)
 
 @OperatorFactory.register
 class _SoftmaxOperator(_Operator):
-    op_type = "Softmax"
+  op_type = "Softmax"
 
-    def __init__(self, op_info, **kwargs):
-        _Operator.__init__(self)
-        inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-        output = op_info.output_tensors[0].name
-        parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
-                                        op_info.op_attr)
-        ref_count = parser.get('ref_counts', [0])[0]
-        to_eval = parser.get('to_eval', True)
-        out_dtype = op_info.output_tensors[0].dtype
-        self._snippet = SoftmaxOpSnippet(inputs, output, out_dtype, ref_count, to_eval)
+  def __init__(self, op_info, **kwargs):
+    _Operator.__init__(self)
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    output = prepare_string_ref_name(op_info.output_tensors[0].name)
+    parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
+                                    op_info.op_attr)
+    ref_count = parser.get('ref_counts', [0])[0]
+    to_eval = parser.get('to_eval', True)
+    out_dtype = op_info.output_tensors[0].dtype
+    self._snippet = SoftmaxOpSnippet(inputs, output, out_dtype, ref_count, to_eval)
+    add_tensor_string_references(inputs, output, kwargs)
 
 @OperatorFactory.register
 class _GatherOperator(_Operator):
@@ -730,11 +791,12 @@ class _GatherOperator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
-    inputs = [tensor_info.name for tensor_info in op_info.input_tensors]
-    output = op_info.output_tensors[0].name
+    inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
+    output = prepare_string_ref_name(op_info.output_tensors[0].name)
     tf_dtype = op_info.input_tensors[0].dtype
     parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE, 
                                     op_info.op_attr)
     ref_count = parser.get('ref_counts', [0])[0]
     to_eval = parser.get('to_eval', False)
     self._snippet = GatherOpSnippet(inputs, output, tf_dtype, ref_count, to_eval)
+    add_tensor_string_references(inputs, output, kwargs)
diff --git a/utensor_cgen/backend/snippets/_snippets.py b/utensor_cgen/backend/snippets/_snippets.py
index f4e65169..158b1f68 100644
--- a/utensor_cgen/backend/snippets/_snippets.py
+++ b/utensor_cgen/backend/snippets/_snippets.py
@@ -18,12 +18,26 @@
            "CommentSnippet", "ContextHeaderSnippet",
            "ContextSnippetsContainer", "QuantizedAddOpSnippet",
            "QuantizedMulOpSnippet",
-           "CreateTensorBinarySnippet", "WeightSnippet",
+           "CreateTensorBinarySnippet", "WeightSnippet", "TensorStringReferenceSnippet",
            "ContextGlobalArrayContainer", "QuantRangeForMultiplicationSnippet",
            "FusedConv2DOpMaxpoolSnippet", "QuantizedFusedConv2DMaxpoolOpSnippet",
            "GatherOpSnippet",
            "CreateTensorRamSnippet", "Uint8Q7OriginSnippet"]
 
+#TODO Put this in the correct location
+def mhash(mstr):
+    """
+    Simple java string hash
+    """
+    v = 7
+    for c in mstr:
+        v = v*31 + ord(c)
+    return v
+def prepare_string_ref_name(tensor_name):
+  inline = tensor_name.replace(":", "_").replace("/", "_")
+  prepared = "sref_{}".format(inline)
+  return prepared
+
 # TODO: Better abstraction, i.e a better backend for code generation
 class CreateTensorIdxSnippet(Snippet):
   __template_name__ = "snippets/create_tensor_idx.cpp"
@@ -768,10 +782,32 @@ def __init__(self, guard_name, graph_name, placeholders=None):
     self.template_vars["graph_name"] = graph_name
     self.template_vars["placeholders"] = placeholders
 
+class TensorStringReferenceSnippet(Snippet):
+  __template_name__ = "snippets/tensor_string_reference.hpp"
+  __headers__ = set([])
+  __references__ = set([])
+
+
+  def __init__(self, sref_name):
+    Snippet.__init__(self)
+    self.template_vars['sref_name'] = sref_name
+    self.template_vars['string_id'] = mhash(sref_name)
+    # Dont render duplicates
+    if sref_name not in __references:
+      self.renderable = True
+    else:
+      self.renderable = False
+    __references.add(sref_name)
+  
+  def render(self):
+    if self.renderable:
+      return Snippet.render(self)
+
 class WeightSnippet(Snippet):
   __template_name__ = "snippets/weight_snippet.hpp"
   __headers__ = set([])
 
+
   def __init__(self, inline_name, type, shape, value):
       Snippet.__init__(self)
       length = np.prod(shape)
diff --git a/utensor_cgen/backend/snippets/templates/containers/weight_header.hpp b/utensor_cgen/backend/snippets/templates/containers/weight_header.hpp
index e1d89bcb..56e86eb3 100644
--- a/utensor_cgen/backend/snippets/templates/containers/weight_header.hpp
+++ b/utensor_cgen/backend/snippets/templates/containers/weight_header.hpp
@@ -1,3 +1,4 @@
+#include <stdint.h>
 {% for snippet in snippets%}
 {{snippet.render()}}
 {% endfor %}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/add_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/add_op.cpp
index 7cf9db99..8009ac6e 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/add_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/add_op.cpp
@@ -3,14 +3,14 @@ S_TENSOR {{sptr_name}};
 {% endif %}
 {
     {% if ref_count %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}", {{ref_count}});
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}}, {{ref_count}});
     {% else %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}");
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}});
     {% endif %}
     ctx.push(new AddOp<{{in_dtype}}, {{out_dtype}}>(),
-             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" }, 
-             { "{{output}}" });
+             { {% for tname in inputs[:-1]%}{{tname}}, {%endfor%}{{inputs[-1]}} }, 
+             { {{output}} });
     {% if to_eval %}
     ctx.eval();
     {% endif %}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/argmax_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/argmax_op.cpp
index 0e4ebe0f..5f5196a1 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/argmax_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/argmax_op.cpp
@@ -3,17 +3,17 @@ S_TENSOR {{sptr_name}};
 {% endif %}
 {
     {% if ref_count %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}", {{ref_count}});
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}}, {{ref_count}});
     {% else %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}");
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}});
     {% endif %}
     ctx.push(new ArgMaxOp<{{in_dtype}}, {{out_dtype}}>(), 
-             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" },
-             { "{{output}}" });
+             { {% for tname in inputs[:-1]%}{{tname}}, {%endfor%}{{inputs[-1]}} },
+             { {{output}} });
     {% if create_sptr %}
-    {{sptr_name}} = ctx.get("{{output}}");
+    {{sptr_name}} = ctx.get({{output}});
     {% endif %}
     {% if to_eval %}
     ctx.eval();
     {% endif %}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/cmsis_nn_fc_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/cmsis_nn_fc_op.cpp
index dd871804..be36921a 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/cmsis_nn_fc_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/cmsis_nn_fc_op.cpp
@@ -1,25 +1,25 @@
 {
     {#
     // {%if ref_counts%}
-    // ctx.add(new RamTensor<{{out_dtypes[0]}}>(), "{{outputs[0]}}", {{ref_counts[0]}});
-    // ctx.add(new RamTensor<{{out_dtypes[0]}}>({1}), "{{outputs[1]}}", {{ref_counts[1]}});
-    // ctx.add(new RamTensor<{{out_dtypes[0]}}>({1}), "{{outputs[2]}}", {{ref_counts[2]}});
+    // ctx.add(new RamTensor<{{out_dtypes[0]}}>(), {{outputs[0]}}, {{ref_counts[0]}});
+    // ctx.add(new RamTensor<{{out_dtypes[0]}}>({1}), {{outputs[1]}}, {{ref_counts[1]}});
+    // ctx.add(new RamTensor<{{out_dtypes[0]}}>({1}), {{outputs[2]}}, {{ref_counts[2]}});
     // {%else%}
-    // ctx.add(new RamTensor<{{out_dtypes[0]}}>(), "{{outputs[0]}}");
-    // ctx.add(new RamTensor<{{out_dtypes[0]}}>({1}), "{{outputs[1]}}");
-    // ctx.add(new RamTensor<{{out_dtypes[0]}}>({1}), "{{outputs[2]}}");
+    // ctx.add(new RamTensor<{{out_dtypes[0]}}>(), {{outputs[0]}});
+    // ctx.add(new RamTensor<{{out_dtypes[0]}}>({1}), {{outputs[1]}});
+    // ctx.add(new RamTensor<{{out_dtypes[0]}}>({1}), {{outputs[2]}});
     // {%endif%}
     #}
 
     {%if ref_counts%}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}", {{ref_counts[0]}});
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}}, {{ref_counts[0]}});
     {%else%}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}");
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}});
     {%endif%}
     
     ctx.push(new FullyConnectedLayerCmsisOp<{{out_dtype}}>(),
-              { {%for tname in inputs[:-1] %}"{{tname}}", {%endfor%}"{{inputs[-1]}}" },
-              { "{{output}}" });
+              { {%for tname in inputs[:-1] %}{{tname}}, {%endfor%}{{inputs[-1]}} },
+              { {{output}} });
     {%if to_eval%}
     ctx.eval();
     {%endif%}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/cmsis_uint8q7origin_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/cmsis_uint8q7origin_op.cpp
index a4cd9102..332428cf 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/cmsis_uint8q7origin_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/cmsis_uint8q7origin_op.cpp
@@ -3,14 +3,14 @@ S_TENSOR {{sptr_name}};
 {% endif %}
 {
     {% if ref_count %}
-    ctx.add(new RamTensor<q7_t>(), "{{output}}", {{ref_count}});
+    ctx.add(new RamTensor<q7_t>(), {{output}}, {{ref_count}});
     {% else %}
-    ctx.add(new RamTensor<q7_t>(), "{{output}}");
+    ctx.add(new RamTensor<q7_t>(), {{output}});
     {% endif %}
     ctx.push(new Uint8Q7OriginOp(),
-             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" }, 
-             { "{{output}}" });
+             { {% for tname in inputs[:-1]%}{{tname}}, {%endfor%}{{inputs[-1]}} }, 
+             { {{output}} });
     {% if to_eval %}
     ctx.eval();
     {% endif %}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/conv2d_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/conv2d_op.cpp
index e9245688..bbde3d1d 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/conv2d_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/conv2d_op.cpp
@@ -1,12 +1,12 @@
 {
     {% if ref_count %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}", {{ref_count}});
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}}, {{ref_count}});
     {% else %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}");
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}});
     {% endif %}
     ctx.push(new ConvOp<{{in_dtype}}, {{filter_dtype}}, {{out_dtype}}>({ {% for s in strides[:-1]%}{{s}}, {%endfor%}{{strides[-1]}} }, {{padding}}),
-             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" },
-             { "{{output}}"});
+             { {% for tname in inputs[:-1]%}{{tname}}, {%endfor%}{{inputs[-1]}} },
+             { {{output}}});
     {% if to_eval %}
     ctx.eval();
     {% endif %}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/create_tensor_binary.cpp b/utensor_cgen/backend/snippets/templates/snippets/create_tensor_binary.cpp
index 42b2f5bc..95f95cf0 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/create_tensor_binary.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/create_tensor_binary.cpp
@@ -4,14 +4,14 @@ S_TENSOR {{sptr_name}};
 {    
     {%if ref_count%}
     ctx.add(new {{tensor_type}}<{{dtype}}>({{tensor_shape}}, {{inline_name}}), 
-            "{{tensor_name}}", 
+            {{ tensor_name }}, 
             {{ref_count}});
     {% else %}
     ctx.add(new {{tensor_type}}<{{dtype}}>({{tensor_shape}}, {{inline_name}}), 
-            "{{tensor_name}}");
+            {{ tensor_name }});
     {%endif%}
     {% if create_sptr %}
-    {{sptr_name}} = ctx.get("{{tensor_name}}");
+    {{sptr_name}} = ctx.get({{ tensor_name }});
     {% endif %}
     {%if to_eval%}
     ctx.eval();
diff --git a/utensor_cgen/backend/snippets/templates/snippets/create_tensor_idx.cpp b/utensor_cgen/backend/snippets/templates/snippets/create_tensor_idx.cpp
index f5f80865..53298e3b 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/create_tensor_idx.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/create_tensor_idx.cpp
@@ -4,17 +4,17 @@ S_TENSOR {{sptr_name}};
 {
     TensorIdxImporter t_import;
     {% if ref_count %}
-    ctx.add(t_import.{{importer_dtype}}_import("{{idx_path}}"),
-            "{{tensor_name}}",
+    ctx.add(t_import.{{importer_dtype}}_import({{idx_path}}),
+            {{tensor_name}},
             {{ref_count}});
     {% else %}
-    ctx.add(t_import.{{importer_dtype}}_import("{{idx_path}}"),
-            "{{tensor_name}}");
+    ctx.add(t_import.{{importer_dtype}}_import({{idx_path}}),
+            {{tensor_name}});
     {% endif %}
     {% if create_sptr %}
-    {{sptr_name}} = ctx.get("{{tensor_name}}");
+    {{sptr_name}} = ctx.get({{tensor_name}});
     {% endif %}
     {% if to_eval %}
     ctx.eval();
     {% endif %}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/create_tensor_new.cpp b/utensor_cgen/backend/snippets/templates/snippets/create_tensor_new.cpp
index c00b2c0f..e97a9958 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/create_tensor_new.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/create_tensor_new.cpp
@@ -2,11 +2,11 @@
 S_TENSOR {{sptr_name}};
 {% endif %}
 {    
-    ctx.add(new {{tensor_type}}<{{dtype}}>({% if tensor_shape %}{{tensor_shape}}{%endif%}), "{{tensor_name}}"{%if ref_count%}, {{ref_count}}{%endif%});
+    ctx.add(new {{tensor_type}}<{{dtype}}>({% if tensor_shape %}{{tensor_shape}}{%endif%}), {{tensor_name}}{%if ref_count%}, {{ref_count}}{%endif%});
     {% if create_sptr %}
-    {{sptr_name}} = ctx.get("{{tensor_name}}");
+    {{sptr_name}} = ctx.get({{tensor_name}});
     {% endif %}
     {%if to_eval%}
     ctx.eval();
     {%endif%}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/dequantize_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/dequantize_op.cpp
index 58882518..944406cf 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/dequantize_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/dequantize_op.cpp
@@ -3,17 +3,17 @@ S_TENSOR {{sptr_name}};
 {% endif %}
 {
     {% if ref_count %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}", {{ref_count}});
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}}, {{ref_count}});
     {% else %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}");
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}});
     {% endif %}
     ctx.push(new DequantizeOp(), 
-             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" },
-             { "{{output}}" });
+             { {% for tname in inputs[:-1]%}{{tname}}, {%endfor%}{{inputs[-1]}} },
+             { {{output}} });
     {% if create_sptr %}
-    {{sptr_name}} = ctx.get("{{output}}");
+    {{sptr_name}} = ctx.get({{output}});
     {% endif %}
     {%if to_eval%}
     ctx.eval();
     {%endif%}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/fused_conv2d_maxpool_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/fused_conv2d_maxpool_op.cpp
index f4418541..13618c34 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/fused_conv2d_maxpool_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/fused_conv2d_maxpool_op.cpp
@@ -1,12 +1,12 @@
 {
     {% if ref_count %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}", {{ref_count}});
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}}, {{ref_count}});
     {% else %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}");
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}});
     {% endif %}
     ctx.push(new FusedConvMaxpoolOp<{{in_dtype}}, {{filter_dtype}}, {{out_dtype}}>({ {% for s in strides[:-1]%}{{s}}, {%endfor%}{{strides[-1]}} }, { {% for s in ksize[:-1]%}{{s}}, {%endfor%}{{ksize[-1]}} },{{padding}}),
-             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" },
-             { "{{output}}"});
+             { {% for tname in inputs[:-1]%}{{tname}}, {%endfor%}{{inputs[-1]}} },
+             { {{output}}});
     {% if to_eval %}
     ctx.eval();
     {% endif %}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/gather_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/gather_op.cpp
index 57bcf4ac..e3c7f3e5 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/gather_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/gather_op.cpp
@@ -3,13 +3,13 @@ S_TENSOR {{sptr_name}};
 {% endif %}
 {
     {% if ref_count %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}", {{ref_count}});
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}}, {{ref_count}});
     {% else %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}");
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}});
     {% endif %}
     ctx.push(new GatherOp<{{in_dtype}}>(),
-             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" }, 
-             { "{{output}}" });
+             { {% for tname in inputs[:-1]%}{{tname}}, {%endfor%}{{inputs[-1]}} }, 
+             { {{output}} });
     {% if to_eval %}
     ctx.eval();
     {% endif %}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/matmul_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/matmul_op.cpp
index d8243054..27ded788 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/matmul_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/matmul_op.cpp
@@ -3,17 +3,17 @@ S_TENSOR {%for sptr_name in sptr_names[:-1]%}{{sptr_name}}, {%endfor%} {{sptr_na
 {% endif %}
 {
     {% if ref_count %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}", {{ref_count}});
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}}, {{ref_count}});
     {% else %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}");
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}});
     {% endif %}
     ctx.push(new MatMulOp<{{x_dtype}}, {{w_dtype}}, {{out_dtype}}>(),
-             { {%for tname in inputs[:-1] %}"{{tname}}", {% endfor %} "{{inputs[-1]}}" },
-             { "{{output}}" });
+             { {%for tname in inputs[:-1] %}{{tname}}, {% endfor %} {{inputs[-1]}} },
+             { {{output}} });
     {% for sptr_name, output in zip(sptr_names, outputs) %}
-    {{sptr_name}} = ctx.get("{{output}}");
+    {{sptr_name}} = ctx.get({{output}});
     {% endfor %}
     {% if to_eval %}
     ctx.eval();
     {% endif %}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/max_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/max_op.cpp
index 356a25cd..e37fe815 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/max_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/max_op.cpp
@@ -9,17 +9,17 @@ S_TENSOR {{sptr_name}};
     out_tensor = new RamTensor<{{out_dtype}}>();
     {%endif%}
     {%if ref_count %}
-    ctx.add(out_tensor, "{{output}}", {{ref_count}});
+    ctx.add(out_tensor, {{output}}, {{ref_count}});
     {%else%}
-    ctx.add(out_tensor, "{{output}}");
+    ctx.add(out_tensor, {{output}});
     {%endif%}
     ctx.push(new MaxOp(), 
-             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" },
-             { "{{output}}" });
+             { {% for tname in inputs[:-1]%}{{tname}}, {%endfor%}{{inputs[-1]}} },
+             { {{output}} });
     {% if create_sptr %}
-    {{sptr_name}} = ctx.get("{{output}}");
+    {{sptr_name}} = ctx.get({{output}});
     {% endif %}
     {%if to_eval%}
     ctx.eval();
     {%endif%}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/max_pool_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/max_pool_op.cpp
index 0bd17e92..19607451 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/max_pool_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/max_pool_op.cpp
@@ -3,22 +3,22 @@ S_TENSOR {%for sptr_name in sptr_names[:-1]%}{{sptr_name}}, {%endfor%} {{sptr_na
 {% endif %}
 {
     {% if ref_count %}
-    ctx.add(new RamTensor<{{dtype}}>(), "{{output}}", {{ref_count}});
+    ctx.add(new RamTensor<{{dtype}}>(), {{output}}, {{ref_count}});
     {% else %}
-    ctx.add(new RamTensor<{{dtype}}>(), "{{output}}");
+    ctx.add(new RamTensor<{{dtype}}>(), {{output}});
     {% endif %}
 
     ctx.push(new MaxPoolingOp<{{dtype}}>({{wind_rows}}, {{wind_cols}}, {{row_stride}}, {{col_stride}}, {{padding}}),
-             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" },
-             { "{{output}}" });
+             { {% for tname in inputs[:-1]%}{{tname}}, {%endfor%}{{inputs[-1]}} },
+             { {{output}} });
 
     {# {% if create_sptr %} #}
     {% for sptr_name, output in zip(sptr_names, outputs) %}
-    {{sptr_name}} = ctx.get("{{output}}");
+    {{sptr_name}} = ctx.get({{output}});
     {% endfor %}
     {# {% endif %} #}
 
     {% if to_eval %}
     ctx.eval();
     {% endif %}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/min_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/min_op.cpp
index 4bc2aab4..fe7678ad 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/min_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/min_op.cpp
@@ -9,17 +9,17 @@ S_TENSOR {{sptr_name}};
     out_tensor = new RamTensor<{{out_dtype}}>();
     {% endif %}
     {% if ref_count%}
-    ctx.add(out_tensor, "{{output}}", {{ref_count}});
+    ctx.add(out_tensor, {{output}}, {{ref_count}});
     {% else %}
-    ctx.add(out_tensor, "{{output}}");
+    ctx.add(out_tensor, {{output}});
     {% endif %}
     ctx.push(new MinOp(), 
-             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" },
-             { "{{output}}" });
+             { {% for tname in inputs[:-1]%}{{tname}}, {%endfor%}{{inputs[-1]}} },
+             { {{output}} });
     {% if create_sptr %}
-    {{sptr_name}} = ctx.get("{{output}}");
+    {{sptr_name}} = ctx.get({{output}});
     {% endif %}
     {% if to_eval %}
     ctx.eval();
     {% endif %}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/pack_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/pack_op.cpp
index a15de170..ba62bec1 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/pack_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/pack_op.cpp
@@ -3,17 +3,17 @@ S_TENSOR {{sptr_name}};
 {% endif %}
 {
     {% if ref_count %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}", {{ref_count}});
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}}, {{ref_count}});
     {% else %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}");
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}});
     {% endif %}
     ctx.push(new PackOp<{{dtype}}>({{N}}, {{axis}}),
-             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" },
-             { "{{output}}" });
+             { {% for tname in inputs[:-1]%}{{tname}}, {%endfor%}{{inputs[-1]}} },
+             { {{output}} });
     {% if create_sptr %}
-    {{sptr_name}} = ctx.get("{{output}}");
+    {{sptr_name}} = ctx.get({{output}});
     {% endif %}
     {% if to_eval %}
     ctx.eval();
     {% endif %}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/qadd_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/qadd_op.cpp
index 9a33c897..96d21524 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/qadd_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/qadd_op.cpp
@@ -3,19 +3,19 @@ S_TENSOR {%for sptr_name in sptr_names[:-1]%}{{sptr_name}}, {%endfor%} {{sptr_na
 {% endif %}
 {
     {% if ref_counts %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{outputs[0]}}", {{ref_counts[0]}});
-    ctx.add(new RamTensor<float>({1}), "{{outputs[1]}}", {{ref_counts[1]}});
-    ctx.add(new RamTensor<float>({1}), "{{outputs[2]}}", {{ref_counts[2]}});
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{outputs[0]}}, {{ref_counts[0]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[1]}}, {{ref_counts[1]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[2]}}, {{ref_counts[2]}});
     {% else %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{outputs[0]}}");
-    ctx.add(new RamTensor<float>({1}), "{{outputs[1]}}");
-    ctx.add(new RamTensor<float>({1}), "{{outputs[2]}}");
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{outputs[0]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[1]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[2]}});
     {% endif %}
     ctx.push(new QuantizedAddOp<{{x_dtype}}, {{w_dtype}}, {{out_dtype}}>(), 
-             { {%for tname in inputs[:-1] %}"{{tname}}", {% endfor %} "{{inputs[-1]}}" },
-             { {%for tname in outputs[:-1] %}"{{tname}}", {% endfor %} "{{outputs[-1]}}" });
+             { {%for tname in inputs[:-1] %}{{tname}}, {% endfor %} {{inputs[-1]}} },
+             { {%for tname in outputs[:-1] %}{{tname}}, {% endfor %} {{outputs[-1]}} });
     {% for sptr_name, output in zip(sptr_names, outputs) %}
-    {{sptr_name}} = ctx.get("{{output}}");
+    {{sptr_name}} = ctx.get({{output}});
     {% endfor %}
     {% if to_eval %}
     ctx.eval();
diff --git a/utensor_cgen/backend/snippets/templates/snippets/qconv2d_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/qconv2d_op.cpp
index 1c136eca..8a2e40b8 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/qconv2d_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/qconv2d_op.cpp
@@ -1,17 +1,17 @@
 {
     {% if ref_counts %}
-    ctx.add(new RamTensor<{{out_dtypes[0]}}>(), "{{outputs[0]}}", {{ref_counts[0]}});
-    ctx.add(new RamTensor<{{out_dtypes[1]}}>({1}), "{{outputs[1]}}", {{ref_counts[1]}});
-    ctx.add(new RamTensor<{{out_dtypes[2]}}>({1}), "{{outputs[2]}}", {{ref_counts[2]}});
+    ctx.add(new RamTensor<{{out_dtypes[0]}}>(), {{outputs[0]}}, {{ref_counts[0]}});
+    ctx.add(new RamTensor<{{out_dtypes[1]}}>({1}), {{outputs[1]}}, {{ref_counts[1]}});
+    ctx.add(new RamTensor<{{out_dtypes[2]}}>({1}), {{outputs[2]}}, {{ref_counts[2]}});
     {% else %}
-    ctx.add(new RamTensor<{{out_dtypes[0]}}>(), "{{outputs[0]}}");
-    ctx.add(new RamTensor<{{out_dtypes[1]}}>({1}), "{{outputs[1]}}");
-    ctx.add(new RamTensor<{{out_dtypes[2]}}>({1}), "{{outputs[2]}}");
+    ctx.add(new RamTensor<{{out_dtypes[0]}}>(), {{outputs[0]}});
+    ctx.add(new RamTensor<{{out_dtypes[1]}}>({1}), {{outputs[1]}});
+    ctx.add(new RamTensor<{{out_dtypes[2]}}>({1}), {{outputs[2]}});
     {% endif %}
     ctx.push(new QntConvOp<{{in_dtype}}, {{filter_dtype}}, {{out_dtypes[0]}}>({ {% for s in strides[:-1]%}{{s}}, {%endfor%}{{strides[-1]}} }, {{padding}}),
-             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" },
-             { {% for tname in outputs[:-1]%}"{{tname}}", {%endfor%}"{{outputs[-1]}}" });
+             { {% for tname in inputs[:-1]%}{{tname}}, {%endfor%}{{inputs[-1]}} },
+             { {% for tname in outputs[:-1]%}{{tname}}, {%endfor%}{{outputs[-1]}} });
     {% if to_eval %}
     ctx.eval();
     {% endif %}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/qmatmul_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/qmatmul_op.cpp
index bf8320d1..9027c8b0 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/qmatmul_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/qmatmul_op.cpp
@@ -3,21 +3,21 @@ S_TENSOR {%for sptr_name in sptr_names[:-1]%}{{sptr_name}}, {%endfor%} {{sptr_na
 {% endif %}
 {
     {% if ref_counts %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{outputs[0]}}", {{ref_counts[0]}});
-    ctx.add(new RamTensor<float>({1}), "{{outputs[1]}}", {{ref_counts[1]}});
-    ctx.add(new RamTensor<float>({1}), "{{outputs[2]}}", {{ref_counts[2]}});
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{outputs[0]}}, {{ref_counts[0]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[1]}}, {{ref_counts[1]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[2]}}, {{ref_counts[2]}});
     {% else %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{outputs[0]}}");
-    ctx.add(new RamTensor<float>({1}), "{{outputs[1]}}");
-    ctx.add(new RamTensor<float>({1}), "{{outputs[2]}}");
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{outputs[0]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[1]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[2]}});
     {% endif %}
     ctx.push(new QntMatMulOp<{{x_dtype}}, {{w_dtype}}, {{out_dtype}}>(), 
-             { {%for tname in inputs[:-1] %}"{{tname}}", {% endfor %} "{{inputs[-1]}}" },
-             { {%for tname in outputs[:-1] %}"{{tname}}", {% endfor %} "{{outputs[-1]}}" });
+             { {%for tname in inputs[:-1] %}{{tname}}, {% endfor %} {{inputs[-1]}} },
+             { {%for tname in outputs[:-1] %}{{tname}}, {% endfor %} {{outputs[-1]}} });
     {% for sptr_name, output in zip(sptr_names, outputs) %}
-    {{sptr_name}} = ctx.get("{{output}}");
+    {{sptr_name}} = ctx.get({{output}});
     {% endfor %}
     {% if to_eval %}
     ctx.eval();
     {% endif %}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/qmax_pool_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/qmax_pool_op.cpp
index 209b8fb5..dbd088f4 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/qmax_pool_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/qmax_pool_op.cpp
@@ -3,26 +3,26 @@ S_TENSOR {%for sptr_name in sptr_names[:-1]%}{{sptr_name}}, {%endfor%} {{sptr_na
 {% endif %}
 {
     {% if ref_counts %}
-    ctx.add(new RamTensor<{{dtype}}>(), "{{outputs[0]}}", {{ref_counts[0]}});
-    ctx.add(new RamTensor<float>({1}), "{{outputs[1]}}", {{ref_counts[1]}});
-    ctx.add(new RamTensor<float>({1}), "{{outputs[2]}}", {{ref_counts[2]}});
+    ctx.add(new RamTensor<{{dtype}}>(), {{outputs[0]}}, {{ref_counts[0]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[1]}}, {{ref_counts[1]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[2]}}, {{ref_counts[2]}});
     {% else %}
-    ctx.add(new RamTensor<{{dtype}}>(), "{{outputs[0]}}");
-    ctx.add(new RamTensor<float>({1}), "{{outputs[1]}}");
-    ctx.add(new RamTensor<float>({1}), "{{outputs[2]}}");
+    ctx.add(new RamTensor<{{dtype}}>(), {{outputs[0]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[1]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[2]}});
     {% endif %}
 
     ctx.push(new QuantizedMaxPoolingOp<{{dtype}}>({{wind_rows}}, {{wind_cols}}, {{row_stride}}, {{col_stride}}, {{padding}}),
-             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" }, 
-             { {%for tname in outputs[:-1] %}"{{tname}}", {% endfor %} "{{outputs[-1]}}" });
+             { {% for tname in inputs[:-1]%}{{tname}}, {%endfor%}{{inputs[-1]}} }, 
+             { {%for tname in outputs[:-1] %}{{tname}}, {% endfor %} {{outputs[-1]}} });
 
     {# {% if create_sptr %} #}
     {% for sptr_name, output in zip(sptr_names, outputs) %}
-    {{sptr_name}} = ctx.get("{{output}}");
+    {{sptr_name}} = ctx.get({{output}});
     {% endfor %}
     {# {% endif %} #}
     
     {% if to_eval %}
     ctx.eval();
     {% endif %}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/qmul_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/qmul_op.cpp
index e2c83757..29ee3d95 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/qmul_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/qmul_op.cpp
@@ -3,19 +3,19 @@ S_TENSOR {%for sptr_name in sptr_names[:-1]%}{{sptr_name}}, {%endfor%} {{sptr_na
 {% endif %}
 {
     {% if ref_counts %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{outputs[0]}}", {{ref_counts[0]}});
-    ctx.add(new RamTensor<float>({1}), "{{outputs[1]}}", {{ref_counts[1]}});
-    ctx.add(new RamTensor<float>({1}), "{{outputs[2]}}", {{ref_counts[2]}});
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{outputs[0]}}, {{ref_counts[0]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[1]}}, {{ref_counts[1]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[2]}}, {{ref_counts[2]}});
     {% else %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{outputs[0]}}");
-    ctx.add(new RamTensor<float>({1}), "{{outputs[1]}}");
-    ctx.add(new RamTensor<float>({1}), "{{outputs[2]}}");
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{outputs[0]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[1]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[2]}});
     {% endif %}
     ctx.push(new QuantizedMulOp<{{x_dtype}}, {{w_dtype}}, {{out_dtype}}>(), 
-             { {%for tname in inputs[:-1] %}"{{tname}}", {% endfor %} "{{inputs[-1]}}" },
-             { {%for tname in outputs[:-1] %}"{{tname}}", {% endfor %} "{{outputs[-1]}}" });
+             { {%for tname in inputs[:-1] %}{{tname}}, {% endfor %} {{inputs[-1]}} },
+             { {%for tname in outputs[:-1] %}{{tname}}, {% endfor %} {{outputs[-1]}} });
     {% for sptr_name, output in zip(sptr_names, outputs) %}
-    {{sptr_name}} = ctx.get("{{output}}");
+    {{sptr_name}} = ctx.get({{output}});
     {% endfor %}
     {% if to_eval %}
     ctx.eval();
diff --git a/utensor_cgen/backend/snippets/templates/snippets/qrelu_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/qrelu_op.cpp
index cac38f62..0240e994 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/qrelu_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/qrelu_op.cpp
@@ -3,19 +3,19 @@ S_TENSOR {%for sptr_name in sptr_names[:-1]%}{{sptr_name}}, {%endfor%} {{sptr_na
 {% endif %}
 {
     {%if ref_counts%}
-    ctx.add(new RamTensor<{{qout_dtype}}>(), "{{outputs[0]}}", {{ref_counts[0]}});
-    ctx.add(new RamTensor<{{out_dtypes[0]}}>({1}), "{{outputs[1]}}", {{ref_counts[1]}});
-    ctx.add(new RamTensor<{{out_dtypes[1]}}>({1}), "{{outputs[2]}}", {{ref_counts[2]}});
+    ctx.add(new RamTensor<{{qout_dtype}}>(), {{outputs[0]}}, {{ref_counts[0]}});
+    ctx.add(new RamTensor<{{out_dtypes[0]}}>({1}), {{outputs[1]}}, {{ref_counts[1]}});
+    ctx.add(new RamTensor<{{out_dtypes[1]}}>({1}), {{outputs[2]}}, {{ref_counts[2]}});
     {%else%}
-    ctx.add(new RamTensor<{{qout_dtype}}>(), "{{outputs[0]}}");
-    ctx.add(new RamTensor<{{out_dtypes[0]}}>({1}), "{{outputs[1]}}");
-    ctx.add(new RamTensor<{{out_dtypes[1]}}>({1}), "{{outputs[2]}}");
+    ctx.add(new RamTensor<{{qout_dtype}}>(), {{outputs[0]}});
+    ctx.add(new RamTensor<{{out_dtypes[0]}}>({1}), {{outputs[1]}});
+    ctx.add(new RamTensor<{{out_dtypes[1]}}>({1}), {{outputs[2]}});
     {%endif%}
     ctx.push(new QuantizedReluOp<{{in_dtype}}, {{out_dtypes[0]}}, {{qout_dtype}}>(), 
-             { {% for tname in inputs[:-1]%}"{{tname}}", {% endfor %}"{{inputs[-1]}}" },
-             { {% for tname in outputs[:-1]%}"{{tname}}", {% endfor %}"{{outputs[-1]}}" });
+             { {% for tname in inputs[:-1]%}{{tname}}, {% endfor %}{{inputs[-1]}} },
+             { {% for tname in outputs[:-1]%}{{tname}}, {% endfor %}{{outputs[-1]}} });
     {% for sptr_name, output in zip(sptr_names, outputs) %}
-    {{sptr_name}} = ctx.get("{{output}}");
+    {{sptr_name}} = ctx.get({{output}});
     {% endfor %}
     {% if to_eval%}
     ctx.eval();
diff --git a/utensor_cgen/backend/snippets/templates/snippets/qreshape_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/qreshape_op.cpp
index dae714dc..b2e74724 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/qreshape_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/qreshape_op.cpp
@@ -1,17 +1,17 @@
 {
     {% if ref_counts%}
-    ctx.add(new RamTensor<uint8_t>(), "{{outputs[0]}}", {{ref_counts[0]}});
-    ctx.add(new RamTensor<float>({1}), "{{outputs[1]}}", {{ref_counts[1]}});
-    ctx.add(new RamTensor<float>({1}), "{{outputs[2]}}", {{ref_counts[2]}});
+    ctx.add(new RamTensor<uint8_t>(), {{outputs[0]}}, {{ref_counts[0]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[1]}}, {{ref_counts[1]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[2]}}, {{ref_counts[2]}});
     {% else %}
-    ctx.add(new RamTensor<uint8_t>(), "{{outputs[0]}}");
-    ctx.add(new RamTensor<float>({1}), "{{outputs[1]}}");
-    ctx.add(new RamTensor<float>({1}), "{{outputs[2]}}");
+    ctx.add(new RamTensor<uint8_t>(), {{outputs[0]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[1]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[2]}});
     {% endif %}
     ctx.push(new QuantizedReshapeOp(),
-              { {%for tname in inputs[:-1] %}"{{tname}}", {%endfor%}"{{inputs[-1]}}" },
-              { {%for tname in outputs[:-1] %}"{{tname}}", {%endfor%}"{{outputs[-1]}}" });
+              { {%for tname in inputs[:-1] %}{{tname}}, {%endfor%}{{inputs[-1]}} },
+              { {%for tname in outputs[:-1] %}{{tname}}, {%endfor%}{{outputs[-1]}} });
     {%if to_eval%}
     ctx.eval();
     {%endif%}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/quantV2_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/quantV2_op.cpp
index 6f55c1a4..cb835b09 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/quantV2_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/quantV2_op.cpp
@@ -3,21 +3,21 @@ S_TENSOR {%for sptr_name in sptr_names[:-1]%}{{sptr_name}}, {%endfor%} {{sptr_na
 {% endif %}
 {
     {% if ref_counts%}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{outputs[0]}}", {{ref_counts[0]}});
-    ctx.add(new RamTensor<float>({1}), "{{outputs[1]}}", {{ref_counts[1]}});
-    ctx.add(new RamTensor<float>({1}), "{{outputs[2]}}", {{ref_counts[2]}});
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{outputs[0]}}, {{ref_counts[0]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[1]}}, {{ref_counts[1]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[2]}}, {{ref_counts[2]}});
     {% else %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{outputs[0]}}");
-    ctx.add(new RamTensor<float>({1}), "{{outputs[1]}}");
-    ctx.add(new RamTensor<float>({1}), "{{outputs[2]}}");
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{outputs[0]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[1]}});
+    ctx.add(new RamTensor<float>({1}), {{outputs[2]}});
     {% endif %}
     ctx.push(new QuantizeV2Op(),
-             { {% for tname in inputs[:-1]%} "{{tname}}", {% endfor %}"{{inputs[-1]}}" },
-             { {% for tname in outputs[:-1]%} "{{tname}}", {% endfor %}"{{outputs[-1]}}" });
+             { {% for tname in inputs[:-1]%} {{tname}}, {% endfor %}{{inputs[-1]}} },
+             { {% for tname in outputs[:-1]%} {{tname}}, {% endfor %}{{outputs[-1]}} });
     {%for sptr_name, output in zip(sptr_names, outputs)%}
-    {{sptr_name}} = ctx.get("{{output}}");
+    {{sptr_name}} = ctx.get({{output}});
     {% endfor %}
     {% if to_eval %}
     ctx.eval();
     {%endif%}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/quant_range_for_multiplication_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/quant_range_for_multiplication_op.cpp
index db922bbb..d3a8fe08 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/quant_range_for_multiplication_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/quant_range_for_multiplication_op.cpp
@@ -1,15 +1,15 @@
 {
     {%if ref_counts%}
-    ctx.add(new RamTensor<{{out_dtype}}>({1}), "{{outputs[0]}}", {{ref_counts[0]}});
-    ctx.add(new RamTensor<{{out_dtype}}>({1}), "{{outputs[1]}}", {{ref_counts[1]}});
+    ctx.add(new RamTensor<{{out_dtype}}>({1}), {{outputs[0]}}, {{ref_counts[0]}});
+    ctx.add(new RamTensor<{{out_dtype}}>({1}), {{outputs[1]}}, {{ref_counts[1]}});
     {%else%}
-    ctx.add(new RamTensor<{{out_dtype}}>({1}), "{{outputs[0]}}");
-    ctx.add(new RamTensor<{{out_dtype}}>({1}), "{{outputs[1]}}");
+    ctx.add(new RamTensor<{{out_dtype}}>({1}), {{outputs[0]}});
+    ctx.add(new RamTensor<{{out_dtype}}>({1}), {{outputs[1]}});
     {%endif%}
     
     ctx.push(new QuantRangeForMultiplicationOp<uint8_t, uint8_t, {{out_dtype}}>(),
-              { {%for tname in inputs[:-1] %}"{{tname}}", {%endfor%}"{{inputs[-1]}}" },
-              { {%for tname in outputs[:-1] %}"{{tname}}", {%endfor%}"{{outputs[-1]}}" });
+              { {%for tname in inputs[:-1] %}{{tname}}, {%endfor%}{{inputs[-1]}} },
+              { {%for tname in outputs[:-1] %}{{tname}}, {%endfor%}{{outputs[-1]}} });
     {%if to_eval%}
     ctx.eval();
     {%endif%}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/quantized_fused_conv2d_maxpool_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/quantized_fused_conv2d_maxpool_op.cpp
index 1161e3df..27859ae5 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/quantized_fused_conv2d_maxpool_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/quantized_fused_conv2d_maxpool_op.cpp
@@ -1,16 +1,16 @@
 {
     {% if ref_counts %}
-    ctx.add(new RamTensor<{{out_dtypes[0]}}>(), "{{outputs[0]}}", {{ref_counts[0]}});
-    ctx.add(new RamTensor<{{out_dtypes[1]}}>({1}), "{{outputs[1]}}", {{ref_counts[1]}});
-    ctx.add(new RamTensor<{{out_dtypes[2]}}>({1}), "{{outputs[2]}}", {{ref_counts[2]}});
+    ctx.add(new RamTensor<{{out_dtypes[0]}}>(), {{outputs[0]}}, {{ref_counts[0]}});
+    ctx.add(new RamTensor<{{out_dtypes[1]}}>({1}), {{outputs[1]}}, {{ref_counts[1]}});
+    ctx.add(new RamTensor<{{out_dtypes[2]}}>({1}), {{outputs[2]}}, {{ref_counts[2]}});
     {% else %}
-    ctx.add(new RamTensor<{{out_dtypes[0]}}>(), "{{outputs[0]}}");
-    ctx.add(new RamTensor<{{out_dtypes[1]}}>({1}), "{{outputs[1]}}");
-    ctx.add(new RamTensor<{{out_dtypes[2]}}>({1}), "{{outputs[2]}}");
+    ctx.add(new RamTensor<{{out_dtypes[0]}}>(), {{outputs[0]}});
+    ctx.add(new RamTensor<{{out_dtypes[1]}}>({1}), {{outputs[1]}});
+    ctx.add(new RamTensor<{{out_dtypes[2]}}>({1}), {{outputs[2]}});
     {% endif %}
     ctx.push(new QuantizedFusedConvMaxpoolOp<{{in_dtype}}, {{filter_dtype}}, {{out_dtypes[0]}}>({ {% for s in strides[:-1]%}{{s}}, {%endfor%}{{strides[-1]}} }, { {% for s in ksize[:-1]%}{{s}}, {%endfor%}{{ksize[-1]}} },{{padding}}),
-             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" },
-             { {% for tname in outputs[:-1]%}"{{tname}}", {%endfor%}"{{outputs[-1]}}" });
+             { {% for tname in inputs[:-1]%}{{tname}}, {%endfor%}{{inputs[-1]}} },
+             { {% for tname in outputs[:-1]%}{{tname}}, {%endfor%}{{outputs[-1]}} });
     {% if to_eval %}
     ctx.eval();
     {% endif %}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/relu_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/relu_op.cpp
index 29b7c2fa..a9195255 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/relu_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/relu_op.cpp
@@ -3,17 +3,17 @@ S_TENSOR {%for sptr_name in sptr_names[:-1]%}{{sptr_name}}, {%endfor%} {{sptr_na
 {% endif %}
 {
     {%if ref_count%}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}", {{ref_count}});
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}}, {{ref_count}});
     {%else%}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}");
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}});
     {%endif%}
     ctx.push(new ReluOp<{{in_dtype}}, {{out_dtype}}>(),
-             { {% for tname in inputs[:-1]%}"{{tname}}", {% endfor %}"{{inputs[-1]}}" },
-             { "{{output}}" });
+             { {% for tname in inputs[:-1]%}{{tname}}, {% endfor %}{{inputs[-1]}} },
+             { {{output}} });
     {% for sptr_name, output in zip(sptr_names, outputs) %}
-    {{sptr_name}} = ctx.get("{{output}}");
+    {{sptr_name}} = ctx.get({{output}});
     {% endfor %}
     {% if to_eval%}
     ctx.eval();
     {% endif %}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/requant_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/requant_op.cpp
index 4bac1905..c9cb20a5 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/requant_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/requant_op.cpp
@@ -3,21 +3,21 @@ S_TENSOR {%for sptr_name in sptr_names[:-1]%}{{sptr_name}}, {%endfor%} {{sptr_na
 {% endif %}
 {   
     {%if ref_counts%}
-    ctx.add(new RamTensor<{{qout_dtype}}>(), "{{outputs[0]}}", {{ref_counts[0]}});
-    ctx.add(new RamTensor<{{range_dtype}}>({1}), "{{outputs[1]}}", {{ref_counts[1]}});
-    ctx.add(new RamTensor<{{range_dtype}}>({1}), "{{outputs[2]}}", {{ref_counts[2]}});
+    ctx.add(new RamTensor<{{qout_dtype}}>(), {{outputs[0]}}, {{ref_counts[0]}});
+    ctx.add(new RamTensor<{{range_dtype}}>({1}), {{outputs[1]}}, {{ref_counts[1]}});
+    ctx.add(new RamTensor<{{range_dtype}}>({1}), {{outputs[2]}}, {{ref_counts[2]}});
     {%else%}
-    ctx.add(new RamTensor<{{qout_dtype}}>(), "{{outputs[0]}}");
-    ctx.add(new RamTensor<{{range_dtype}}>({1}), "{{outputs[1]}}");
-    ctx.add(new RamTensor<{{range_dtype}}>({1}), "{{outputs[2]}}");
+    ctx.add(new RamTensor<{{qout_dtype}}>(), {{outputs[0]}});
+    ctx.add(new RamTensor<{{range_dtype}}>({1}), {{outputs[1]}});
+    ctx.add(new RamTensor<{{range_dtype}}>({1}), {{outputs[2]}});
     {%endif%}
     ctx.push(new RequantizeOp(),
-             { {% for tname in inputs[:-1]%}"{{tname}}", {% endfor %}"{{inputs[-1]}}" },
-             { {% for tname in outputs[:-1]%}"{{tname}}", {% endfor %}"{{outputs[-1]}}" });
+             { {% for tname in inputs[:-1]%}{{tname}}, {% endfor %}{{inputs[-1]}} },
+             { {% for tname in outputs[:-1]%}{{tname}}, {% endfor %}{{outputs[-1]}} });
     {%for sptr_name, output in zip(sptr_names, outputs)%}
-    {{sptr_name}} = ctx.get("{{output}}");
+    {{sptr_name}} = ctx.get({{output}});
     {%endfor%}
     {% if to_eval %}
     ctx.eval();
     {% endif %}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/requant_range_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/requant_range_op.cpp
index 97e126cf..33d5b74a 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/requant_range_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/requant_range_op.cpp
@@ -3,19 +3,19 @@ S_TENSOR {%for sptr_name in sptr_names[:-1]%}{{sptr_name}}, {%endfor%} {{sptr_na
 {% endif %}
 {
     {%if ref_counts%}
-    ctx.add(new RamTensor<{{out_dtype}}>({1}), "{{outputs[0]}}", {{ref_counts[0]}});
-    ctx.add(new RamTensor<{{out_dtype}}>({1}), "{{outputs[1]}}", {{ref_counts[1]}});
+    ctx.add(new RamTensor<{{out_dtype}}>({1}), {{outputs[0]}}, {{ref_counts[0]}});
+    ctx.add(new RamTensor<{{out_dtype}}>({1}), {{outputs[1]}}, {{ref_counts[1]}});
     {%else%}
-    ctx.add(new RamTensor<{{out_dtype}}>({1}), "{{outputs[0]}}");
-    ctx.add(new RamTensor<{{out_dtype}}>({1}), "{{outputs[1]}}");
+    ctx.add(new RamTensor<{{out_dtype}}>({1}), {{outputs[0]}});
+    ctx.add(new RamTensor<{{out_dtype}}>({1}), {{outputs[1]}});
     {%endif%}
     ctx.push(new Requantization_RangeOp(),
-             { {%for tname in inputs[:-1]%}"{{tname}}", {% endfor %}"{{inputs[-1]}}" },
-             { {%for tname in outputs[:-1]%}"{{tname}}", {% endfor %}"{{outputs[-1]}}" });
+             { {%for tname in inputs[:-1]%}{{tname}}, {% endfor %}{{inputs[-1]}} },
+             { {%for tname in outputs[:-1]%}{{tname}}, {% endfor %}{{outputs[-1]}} });
     {% for sptr_name, output in zip(sptr_names, outputs) %}
-    {{sptr_name}} = ctx.get("{{output}}");
+    {{sptr_name}} = ctx.get({{output}});
     {% endfor %}
     {% if to_eval %}
     ctx.eval();
     {% endif %}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/reshape_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/reshape_op.cpp
index 092a5f3e..d05c112b 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/reshape_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/reshape_op.cpp
@@ -3,17 +3,17 @@ S_TENSOR {{sptr_name}};
 {% endif %}
 {
     {% if ref_count %}
-    ctx.add(new RamTensor<{{dtype}}>(), "{{output}}", {{ref_count}});
+    ctx.add(new RamTensor<{{dtype}}>(), {{output}}, {{ref_count}});
     {% else %}
-    ctx.add(new RamTensor<{{dtype}}>(), "{{output}}");
+    ctx.add(new RamTensor<{{dtype}}>(), {{output}});
     {% endif %}
     ctx.push(new ReshapeOp(), 
-             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" },
-             { "{{output}}" });
+             { {% for tname in inputs[:-1]%}{{tname}}, {%endfor%}{{inputs[-1]}} },
+             { {{output}} });
     {% if create_sptr %}
-    {{sptr_name}} = ctx.get("{{output}}");
+    {{sptr_name}} = ctx.get({{output}});
     {% endif %}
     {% if to_eval %}
     ctx.eval();
     {% endif %}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/shape_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/shape_op.cpp
index c25d2b73..2e96e8be 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/shape_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/shape_op.cpp
@@ -3,17 +3,17 @@ S_TENSOR {{sptr_name}};
 {% endif %}
 {
     {% if ref_count %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}", {{ref_count}});
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}}, {{ref_count}});
     {% else %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}");
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}});
     {% endif %}
     ctx.push(new ShapeOp(),
-             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" },
-             { "{{output}}" });
+             { {% for tname in inputs[:-1]%}{{tname}}, {%endfor%}{{inputs[-1]}} },
+             { {{output}} });
     {% if create_sptr %}
-    {{sptr_name}} = ctx.get("{{output}}");
+    {{sptr_name}} = ctx.get({{output}});
     {% endif %}
     {% if to_eval %}
     ctx.eval();
     {% endif %}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/softmax_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/softmax_op.cpp
index aab8cf95..cfe8a858 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/softmax_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/softmax_op.cpp
@@ -3,17 +3,17 @@ S_TENSOR {{sptr_name}};
 {% endif %}
 {
     {% if ref_count %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}", {{ref_count}});
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}}, {{ref_count}});
     {% else %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}");
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}});
     {% endif %}
     ctx.push(new SoftmaxOp(),
-             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" },
-             { "{{output}}" });
+             { {% for tname in inputs[:-1]%}{{tname}}, {%endfor%}{{inputs[-1]}} },
+             { {{output}} });
     {% if create_sptr %}
-    {{sptr_name}} = ctx.get("{{output}}");
+    {{sptr_name}} = ctx.get({{output}});
     {% endif %}
     {% if to_eval %}
     ctx.eval();
     {% endif %}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/strided_slice_op.cpp b/utensor_cgen/backend/snippets/templates/snippets/strided_slice_op.cpp
index d67e8629..1f901a54 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/strided_slice_op.cpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/strided_slice_op.cpp
@@ -3,17 +3,17 @@ S_TENSOR {{sptr_name}};
 {% endif %}
 {
     {% if ref_count %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}", {{ref_count}});
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}}, {{ref_count}});
     {% else %}
-    ctx.add(new RamTensor<{{out_dtype}}>(), "{{output}}");
+    ctx.add(new RamTensor<{{out_dtype}}>(), {{output}});
     {% endif %}
     ctx.push(new StridedSliceOp<{{dtype}}>({{begin_mask}}, {{ellipsis_mask}}, {{end_mask}}, {{new_axis_mask}}, {{shrink_axis_mask}}),
-             { {% for tname in inputs[:-1]%}"{{tname}}", {%endfor%}"{{inputs[-1]}}" },
-             { "{{output}}" });
+             { {% for tname in inputs[:-1]%}{{tname}}, {%endfor%}{{inputs[-1]}} },
+             { {{output}} });
     {% if create_sptr %}
-    {{sptr_name}} = ctx.get("{{output}}");
+    {{sptr_name}} = ctx.get({{output}});
     {% endif %}
     {% if to_eval %}
     ctx.eval();
     {% endif %}
-}
\ No newline at end of file
+}
diff --git a/utensor_cgen/backend/snippets/templates/snippets/tensor_string_reference.hpp b/utensor_cgen/backend/snippets/templates/snippets/tensor_string_reference.hpp
new file mode 100644
index 00000000..f04c53cc
--- /dev/null
+++ b/utensor_cgen/backend/snippets/templates/snippets/tensor_string_reference.hpp
@@ -0,0 +1,3 @@
+#include <stdint.h>
+
+#define {{ sref_name }} {{ string_id }} 
diff --git a/utensor_cgen/backend/snippets/templates/snippets/weight_snippet.hpp b/utensor_cgen/backend/snippets/templates/snippets/weight_snippet.hpp
index 2ccf15fc..10adc447 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/weight_snippet.hpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/weight_snippet.hpp
@@ -1,3 +1,3 @@
-#include <stdint.h>
+//#include <stdint.h>
 
 const {{ type }} {{ inline_name }} [ {{ length }} ] = { {% for item in value %} {{ item }}, {% endfor %} };

From 296916543b1f48b3c342f027285500693ef0c200 Mon Sep 17 00:00:00 2001
From: Michael Bartling <michael.bartling@arm.com>
Date: Tue, 25 Jun 2019 16:07:35 -0500
Subject: [PATCH 15/17] Use string references rather than cstrings.

Note in the demo i am working on this drops the size of the model file
from 25.8 KB to 403*4 B.
---
 utensor_cgen/backend/operators.py             | 98 ++++++++++---------
 utensor_cgen/backend/snippets/_snippets.py    | 25 ++---
 .../snippets/tensor_string_reference.hpp      |  2 -
 3 files changed, 65 insertions(+), 60 deletions(-)

diff --git a/utensor_cgen/backend/operators.py b/utensor_cgen/backend/operators.py
index abbeca43..24812049 100644
--- a/utensor_cgen/backend/operators.py
+++ b/utensor_cgen/backend/operators.py
@@ -19,12 +19,16 @@ def add_tensor_string_references(inputs, outputs, **kwargs):
   def add_things(mthings, **kwargs):
     if isinstance(mthings, list):
       for sref_name in mthings:
-        add_tensor_string_reference(sref_name, kwargs)
+        add_tensor_string_reference(sref_name, **kwargs)
     else:
-      add_tensor_string_reference(mthings, kwargs)
-  add_things(inputs, kwargs)
-  add_things(outputs, kwargs)
+      add_tensor_string_reference(mthings, **kwargs)
+  add_things(inputs, **kwargs)
+  add_things(outputs, **kwargs)
 
+def prepare_string_ref_name(tensor_name):
+  inline = tensor_name.replace(":", "_").replace("/", "_")
+  prepared = "sref_{}".format(inline)
+  return prepared
 
 class OperatorFactory():
   # Can easily do something smarter
@@ -76,7 +80,7 @@ def __init__(self, op_info, **kwargs):
     to_eval = parser.get('to_eval', False)
     self._snippet = AddOpSnippet(inputs, output, tf_dtype, ref_count, to_eval)
     
-    add_tensor_string_references(inputs, output, kwargs)
+    add_tensor_string_references(inputs, output, **kwargs)
 
 
 @OperatorFactory.register
@@ -88,7 +92,7 @@ def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
     inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
     out_tensor_info = op_info.output_tensors[0]
-    output, out_dtype = out_tensor_info.name, out_tensor_info.dtype
+    output, out_dtype = prepare_string_ref_name(out_tensor_info.name), out_tensor_info.dtype
     in_dtype = op_info.input_tensors[0].dtype
     parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE, 
                                     op_info.op_attr)
@@ -96,7 +100,7 @@ def __init__(self, op_info, **kwargs):
     to_eval = parser.get('to_eval', False)
     self._snippet = ArgMaxOpSnippet(inputs, output, in_dtype, out_dtype, ref_count, to_eval)
 
-    add_tensor_string_references(inputs, output, kwargs)
+    add_tensor_string_references(inputs, output, **kwargs)
 
 @OperatorFactory.register
 class _DequantizeOperator(_Operator):
@@ -107,14 +111,14 @@ def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
     inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
     out_tensor_info = op_info.output_tensors[0]
-    output, out_dtype = out_tensor_info.name, out_tensor_info.dtype
+    output, out_dtype = prepare_string_ref_name(out_tensor_info.name), out_tensor_info.dtype
     parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE, 
                                     op_info.op_attr)
     ref_count = parser.get('ref_counts', [0])[0]
     to_eval = parser.get('to_eval', False)
     self._snippet = DequantizeOpSnippet(inputs, output, out_dtype, ref_count, to_eval)
 
-    add_tensor_string_references(inputs, output, kwargs)
+    add_tensor_string_references(inputs, output, **kwargs)
 
 @OperatorFactory.register
 class _MaxOperator(_Operator):
@@ -125,7 +129,7 @@ def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
     inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
     out_tensor_info = op_info.output_tensors[0]
-    output, out_dtype, out_shape = (out_tensor_info.name,
+    output, out_dtype, out_shape = (prepare_string_ref_name(out_tensor_info.name),
                                     out_tensor_info.dtype,
                                     out_tensor_info.shape)
     # FIXME: automatic alloc for uTensor fail
@@ -137,7 +141,7 @@ def __init__(self, op_info, **kwargs):
     to_eval = parser.get('to_eval', False)
     self._snippet = MaxOpSnippet(inputs, output, out_dtype, out_shape, ref_count, to_eval)
     
-    add_tensor_string_references(inputs, output, kwargs)
+    add_tensor_string_references(inputs, output, **kwargs)
 
 @OperatorFactory.register
 class _MaxPool(_Operator):
@@ -160,7 +164,7 @@ def __init__(self, op_info, **kwargs):
                                             ksize, strides, padding,
                                             ref_count, to_eval)
     
-    add_tensor_string_references(inputs, output, kwargs)
+    add_tensor_string_references(inputs, output, **kwargs)
 
 
 @OperatorFactory.register
@@ -184,7 +188,7 @@ def __init__(self, op_info, **kwargs):
                                             ksize, strides, padding,
                                             ref_counts, to_eval)
 
-    add_tensor_string_references(inputs, outputs, kwargs)
+    add_tensor_string_references(inputs, outputs, **kwargs)
 
 @OperatorFactory.register
 class _MinOperator(_Operator):
@@ -195,7 +199,7 @@ def __init__(self, op_info, **kwargs):
     _Operator.__init__(self)
     inputs = [prepare_string_ref_name(tensor_info.name) for tensor_info in op_info.input_tensors]
     out_info = op_info.output_tensors[0]
-    output, out_dtype, out_shape = (out_info.name,
+    output, out_dtype, out_shape = (prepare_string_ref_name(out_info.name),
                                     out_info.dtype,
                                     out_info.shape)
     # FIXME: automatic alloc for uTensor fail
@@ -207,7 +211,7 @@ def __init__(self, op_info, **kwargs):
     to_eval = parser.get('to_eval', False)
     self._snippet = MinOpSnippet(inputs, output, out_dtype, out_shape, ref_count, to_eval)
 
-    add_tensor_string_references(inputs, output, kwargs)
+    add_tensor_string_references(inputs, output, **kwargs)
 
 @OperatorFactory.register
 class _QuantizeV2Operator(_Operator):
@@ -225,7 +229,7 @@ def __init__(self, op_info, **kwargs):
     to_eval = parser.get('to_eval', False)
     self._snippet = QuantizeV2OpSnippet(inputs, outputs, out_dtype, ref_counts, to_eval)
 
-    add_tensor_string_references(inputs, outputs, kwargs)
+    add_tensor_string_references(inputs, outputs, **kwargs)
 
 @OperatorFactory.register
 class _MatMulOperator(_Operator):
@@ -248,7 +252,7 @@ def __init__(self, op_info, **kwargs):
                                     x_dtype, w_dtype, out_dtype,
                                     ref_count, to_eval)
     
-    add_tensor_string_references(inputs, output, kwargs)
+    add_tensor_string_references(inputs, output, **kwargs)
 
 @OperatorFactory.register
 class _QuantizedMatMulOperator(_Operator):
@@ -271,7 +275,7 @@ def __init__(self, op_info, **kwargs):
                                              x_dtype, w_dtype, out_dtype, 
                                              ref_counts, to_eval)
 
-    add_tensor_string_references(inputs, outputs, kwargs)
+    add_tensor_string_references(inputs, outputs, **kwargs)
 
 @OperatorFactory.register
 class _ReluOperator(_Operator):
@@ -293,7 +297,7 @@ def __init__(self, op_info, **kwargs):
                                            out_dtype,
                                            ref_count, to_eval)
 
-    add_tensor_string_references(inputs, output, kwargs)
+    add_tensor_string_references(inputs, output, **kwargs)
 
 @OperatorFactory.register
 class _QuantizedReluOperator(_Operator):
@@ -316,7 +320,7 @@ def __init__(self, op_info, **kwargs):
                                            out_dtypes, qout_dtype, 
                                            ref_counts, to_eval)
 
-    add_tensor_string_references(inputs, outputs, kwargs)
+    add_tensor_string_references(inputs, outputs, **kwargs)
 
 @OperatorFactory.register
 class _QuantizedAddOperator(_Operator):
@@ -338,7 +342,7 @@ def __init__(self, op_info, **kwargs):
                                           x_dtype, w_dtype, out_dtype, 
                                           ref_counts, to_eval)
 
-    add_tensor_string_references(inputs, outputs, kwargs)
+    add_tensor_string_references(inputs, outputs, **kwargs)
     
 @OperatorFactory.register
 class _QuantizedMulOperator(_Operator):
@@ -360,7 +364,7 @@ def __init__(self, op_info, **kwargs):
                                           x_dtype, w_dtype, out_dtype, 
                                           ref_counts, to_eval)
 
-    add_tensor_string_references(inputs, outputs, kwargs)
+    add_tensor_string_references(inputs, outputs, **kwargs)
 
 @OperatorFactory.register
 class _RequantizationRangeOperator(_Operator):
@@ -378,7 +382,7 @@ def __init__(self, op_info, **kwargs):
     to_eval = parser.get('to_eval', False)
     self._snippet = RequantizationRangeOpSnippet(inputs, outputs, out_dtype, 
                                                  ref_counts, to_eval)
-    add_tensor_string_references(inputs, outputs, kwargs)
+    add_tensor_string_references(inputs, outputs, **kwargs)
 
 
 @OperatorFactory.register
@@ -399,7 +403,7 @@ def __init__(self, op_info, **kwargs):
                                         qout_dtype, range_dtype,
                                         ref_counts, to_eval)
 
-    add_tensor_string_references(inputs, outputs, kwargs)
+    add_tensor_string_references(inputs, outputs, **kwargs)
 
 @OperatorFactory.register
 class _ReshapeOperator(_Operator):
@@ -417,7 +421,7 @@ def __init__(self, op_info, **kwargs):
     dtype = op_info.input_tensors[0].dtype
     self._snippet = ReshapeOpSnippet(inputs, output, dtype, ref_count, to_eval)
     
-    add_tensor_string_references(inputs, output, kwargs)
+    add_tensor_string_references(inputs, output, **kwargs)
 
 
 @OperatorFactory.register
@@ -438,7 +442,7 @@ def __init__(self, op_info, **kwargs):
                                               ref_counts=ref_counts,
                                               to_eval=to_eval)
 
-    add_tensor_string_references(inputs, outputs, kwargs)
+    add_tensor_string_references(inputs, outputs, **kwargs)
 @OperatorFactory.register
 class _CMSIS_NN_FCOperator(_Operator):
 
@@ -464,7 +468,7 @@ def __init__(self, op_info, **kwargs):
                                               out_dtype=out_dtype,
                                               to_eval=to_eval)
 
-    add_tensor_string_references(inputs, output, kwargs)
+    add_tensor_string_references(inputs, output, **kwargs)
 @OperatorFactory.register
 class _Conv2DOperator(_Operator):
 
@@ -486,7 +490,7 @@ def __init__(self, op_info, **kwargs):
     self._snippet = Conv2DOpSnippent(inputs, output, strides, padding,
                                      in_dtype=in_dtype, filter_dtype=filter_dtype, out_dtype=out_dtype,
                                      ref_count=ref_count, to_eval=to_eval)
-    add_tensor_string_references(inputs, output, kwargs)
+    add_tensor_string_references(inputs, output, **kwargs)
 @OperatorFactory.register
 class _FusedConv2DMaxpoolOperator(_Operator):
 
@@ -510,7 +514,7 @@ def __init__(self, op_info, **kwargs):
                                      in_dtype=in_dtype, filter_dtype=filter_dtype, out_dtype=out_dtype,
                                      ref_count=ref_count, to_eval=to_eval)
 
-    add_tensor_string_references(inputs, output, kwargs)
+    add_tensor_string_references(inputs, output, **kwargs)
 
 @OperatorFactory.register
 class _QuantizedFusedConv2DMaxpoolOperator(_Operator):
@@ -535,7 +539,7 @@ def __init__(self, op_info, **kwargs):
                                      in_dtype=in_dtype, filter_dtype=filter_dtype, out_dtypes=out_dtypes,
                                      ref_count=ref_count, to_eval=to_eval)
 
-    add_tensor_string_references(inputs, outputs, kwargs)
+    add_tensor_string_references(inputs, outputs, **kwargs)
 
 @OperatorFactory.register
 class _Conv2DQuantOperator(_Operator):
@@ -558,7 +562,7 @@ def __init__(self, op_info, **kwargs):
     self._snippet = Conv2DQuantOpSnippent(inputs, outputs, strides, padding,
                                      in_dtype=in_dtype, filter_dtype=filter_dtype, out_dtypes=out_dtypes,
                                      ref_counts=ref_counts, to_eval=to_eval)
-    add_tensor_string_references(inputs, outputs, kwargs)
+    add_tensor_string_references(inputs, outputs, **kwargs)
 @OperatorFactory.register
 class _Uint8Q7OriginOperator(_Operator):
 
@@ -573,7 +577,7 @@ def __init__(self, op_info, **kwargs):
     ref_count = parser.get('ref_counts', [0])[0]
     to_eval = parser.get('to_eval', False)
     self._snippet = Uint8Q7OriginSnippet(inputs, output, ref_count, to_eval)
-    add_tensor_string_references(inputs, output, kwargs)
+    add_tensor_string_references(inputs, output, **kwargs)
 
 #hard coding to uint8_t uint8_t int32_t for now
 @OperatorFactory.register
@@ -595,7 +599,7 @@ def __init__(self, op_info, **kwargs):
     ref_counts = parser.get('ref_counts', [])
     to_eval = parser.get('to_eval', False)
     self._snippet = QuantRangeForMultiplicationSnippet(inputs, outputs, output_type, ref_counts, to_eval)
-    add_tensor_string_references(inputs, outputs, kwargs)
+    add_tensor_string_references(inputs, outputs, **kwargs)
 
 @OperatorFactory.register
 class _InlineOperator(_Operator):
@@ -604,7 +608,7 @@ class _InlineOperator(_Operator):
   
   def __init__(self, op_info, **kwargs):
     out_tensor_info = op_info.output_tensors[0]
-    out_tname, out_dtype, tensor_shape = (out_tensor_info.name,
+    out_tname, out_dtype, tensor_shape = (prepare_string_ref_name(out_tensor_info.name),
                             out_tensor_info.dtype,
                             out_tensor_info.shape)
     parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
@@ -612,7 +616,6 @@ def __init__(self, op_info, **kwargs):
     ref_count = parser.get('ref_counts', [0])[0]
     pre_tname = self._prepare_tensor_name(out_tname)
     inline_tname = self._prepare_inline_array_name(out_tname)
-    out_tname = prepare_string_ref_name(out_tname)
     value = op_info.op_attr['value'].value.np_array.flatten()
     self._snippet = CreateTensorBinarySnippet(out_tname, tensor_shape=tensor_shape,
                                          tf_dtype=out_dtype,
@@ -626,7 +629,7 @@ def __init__(self, op_info, **kwargs):
                                   value)
     weight_container = kwargs['weight_container']                             
     weight_container.add_snippet(weight_snippet)
-    add_tensor_string_references([], out_tname, kwargs)
+    add_tensor_string_references([], out_tname, **kwargs)
 
   def _prepare_tensor_name(self, tensor_name):
     prepared = tensor_name.replace(":", "_").replace("/", "_")
@@ -638,6 +641,7 @@ def _prepare_inline_array_name(self, tensor_name):
     return preapred
 
 
+# TODO check for correctness with cstring stuffs
 @OperatorFactory.register
 class _ConstOperator(_Operator):
 
@@ -645,7 +649,7 @@ class _ConstOperator(_Operator):
 
   def __init__(self, op_info, **kwargs):
     out_tensor_info = op_info.output_tensors[0]
-    out_tname, out_dtype = (out_tensor_info.name,
+    out_tname, out_dtype = (prepare_string_ref_name(out_tensor_info.name),
                             out_tensor_info.dtype)
     parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
                                     op_info.op_attr)
@@ -655,7 +659,7 @@ def __init__(self, op_info, **kwargs):
     idx_dir = kwargs['idx_dir']
     embed_data_dir = kwargs.get('embed_data_dir',
                                 os.path.join("/fs", idx_dir))
-    out_tname = prepare_string_ref_name(out_tname)
+    #out_tname = prepare_string_ref_name(out_tname)
     self._snippet = CreateTensorIdxSnippet(embed_data_dir, out_tname,
                                            idx_fname=idx_fname,
                                            np_dtype=out_dtype,
@@ -663,7 +667,7 @@ def __init__(self, op_info, **kwargs):
     idx_path = os.path.join(idx_dir, idx_fname)
     value = op_info.op_attr['value'].value
     self._tf_save_data(idx_path, value)
-    add_tensor_string_references([], out_tname, kwargs)
+    add_tensor_string_references([], out_tname, **kwargs)
 
   def _tf_prepare_tensor_name(self, tensor_name):
     """Replace all ':' and '/' with '_' in a given tensor name
@@ -686,14 +690,14 @@ class _RamOperator(_Operator):
   
   def __init__(self, op_info, **kwargs):
     out_tensor_info = op_info.output_tensors[0]
-    out_tname, out_dtype, tensor_shape = (out_tensor_info.name,
+    out_tname, out_dtype, tensor_shape = (prepare_string_ref_name(out_tensor_info.name),
                             out_tensor_info.dtype,
                             out_tensor_info.shape)
     parser = NamescopedKWArgsParser(RefCntOptimizer.KWARGS_NAMESCOPE,
                                     op_info.op_attr)
     ref_count = parser.get('ref_counts', [0])[0]
     pre_tname = self._prepare_tensor_name(out_tname)
-    out_tname = prepare_string_ref_name(out_tname)
+    #out_tname = prepare_string_ref_name(out_tname)
     #inline_tname = self._prepare_inline_array_name(out_tname)
     #value = op_info.op_attr['value'].value.np_array.flatten()
     self._snippet = CreateTensorRamSnippet(out_tname, tensor_shape=tensor_shape,
@@ -701,7 +705,7 @@ def __init__(self, op_info, **kwargs):
                                          sptr_name=pre_tname,
                                          ref_count=ref_count)
     
-    add_tensor_string_references([], out_tname, kwargs)
+    add_tensor_string_references([], out_tname, **kwargs)
 
   def _prepare_tensor_name(self, tensor_name):
     prepared = tensor_name.replace(":", "_").replace("/", "_")
@@ -721,7 +725,7 @@ def __init__(self, op_info, **kwargs):
     to_eval = parser.get('to_eval', True)
     out_dtype = op_info.output_tensors[0].dtype
     self._snippet = ShapeOpSnippet(inputs, output, out_dtype, ref_count, to_eval)
-    add_tensor_string_references(inputs, output, kwargs)
+    add_tensor_string_references(inputs, output, **kwargs)
 
 
 @OperatorFactory.register
@@ -747,7 +751,7 @@ def __init__(self, op_info, **kwargs):
                                           begin_mask, ellipsis_mask, end_mask,
                                           new_axis_mask, shrink_axis_mask,
                                           ref_count, to_eval)
-    add_tensor_string_references(inputs, output, kwargs)
+    add_tensor_string_references(inputs, output, **kwargs)
 
 @OperatorFactory.register
 class _PackOperator(_Operator):
@@ -766,7 +770,7 @@ def __init__(self, op_info, **kwargs):
     N = op_info.op_attr['N'].value
     axis = op_info.op_attr['axis'].value
     self._snippet = PackOpSnippet(inputs, output, dtype, out_dtype, N, axis, ref_count, to_eval)
-    add_tensor_string_references(inputs, output, kwargs)
+    add_tensor_string_references(inputs, output, **kwargs)
 
 @OperatorFactory.register
 class _SoftmaxOperator(_Operator):
@@ -782,7 +786,7 @@ def __init__(self, op_info, **kwargs):
     to_eval = parser.get('to_eval', True)
     out_dtype = op_info.output_tensors[0].dtype
     self._snippet = SoftmaxOpSnippet(inputs, output, out_dtype, ref_count, to_eval)
-    add_tensor_string_references(inputs, output, kwargs)
+    add_tensor_string_references(inputs, output, **kwargs)
 
 @OperatorFactory.register
 class _GatherOperator(_Operator):
@@ -799,4 +803,4 @@ def __init__(self, op_info, **kwargs):
     ref_count = parser.get('ref_counts', [0])[0]
     to_eval = parser.get('to_eval', False)
     self._snippet = GatherOpSnippet(inputs, output, tf_dtype, ref_count, to_eval)
-    add_tensor_string_references(inputs, output, kwargs)
+    add_tensor_string_references(inputs, output, **kwargs)
diff --git a/utensor_cgen/backend/snippets/_snippets.py b/utensor_cgen/backend/snippets/_snippets.py
index 158b1f68..1521f1fc 100644
--- a/utensor_cgen/backend/snippets/_snippets.py
+++ b/utensor_cgen/backend/snippets/_snippets.py
@@ -29,14 +29,11 @@ def mhash(mstr):
     """
     Simple java string hash
     """
-    v = 7
+    v = int(7)
     for c in mstr:
-        v = v*31 + ord(c)
+        v = (v*31 + ord(c)) & 0xffffffff
     return v
-def prepare_string_ref_name(tensor_name):
-  inline = tensor_name.replace(":", "_").replace("/", "_")
-  prepared = "sref_{}".format(inline)
-  return prepared
+
 
 # TODO: Better abstraction, i.e a better backend for code generation
 class CreateTensorIdxSnippet(Snippet):
@@ -787,21 +784,27 @@ class TensorStringReferenceSnippet(Snippet):
   __headers__ = set([])
   __references__ = set([])
 
+  @classmethod
+  def add_reference(cls, sref_name):
+      cls.__references__.add(sref_name)
+
+  @classmethod
+  def have_reference(cls, sref_name):
+    return sref_name not in cls.__references__
 
   def __init__(self, sref_name):
     Snippet.__init__(self)
     self.template_vars['sref_name'] = sref_name
     self.template_vars['string_id'] = mhash(sref_name)
     # Dont render duplicates
-    if sref_name not in __references:
-      self.renderable = True
-    else:
-      self.renderable = False
-    __references.add(sref_name)
+    self.renderable = self.have_reference(sref_name)
+    self.add_reference(sref_name)
   
   def render(self):
     if self.renderable:
       return Snippet.render(self)
+    else:
+        return ''
 
 class WeightSnippet(Snippet):
   __template_name__ = "snippets/weight_snippet.hpp"
diff --git a/utensor_cgen/backend/snippets/templates/snippets/tensor_string_reference.hpp b/utensor_cgen/backend/snippets/templates/snippets/tensor_string_reference.hpp
index f04c53cc..170cfd55 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/tensor_string_reference.hpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/tensor_string_reference.hpp
@@ -1,3 +1 @@
-#include <stdint.h>
-
 #define {{ sref_name }} {{ string_id }} 

From 59d5604a1a5235c8c06576fe87844fd9cba89920 Mon Sep 17 00:00:00 2001
From: Michael Bartling <michael.bartling@arm.com>
Date: Fri, 28 Jun 2019 13:43:52 -0500
Subject: [PATCH 16/17] Replace defines with constants, will give compile time
 errors

---
 .../snippets/templates/snippets/tensor_string_reference.hpp     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utensor_cgen/backend/snippets/templates/snippets/tensor_string_reference.hpp b/utensor_cgen/backend/snippets/templates/snippets/tensor_string_reference.hpp
index 170cfd55..7b7466d6 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/tensor_string_reference.hpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/tensor_string_reference.hpp
@@ -1 +1 @@
-#define {{ sref_name }} {{ string_id }} 
+static const uint32_t {{ sref_name }} = {{ string_id }} 

From 4fbc5e832ea781768e7e27bf6f8c54a8112f1793 Mon Sep 17 00:00:00 2001
From: Michael Bartling <michael.bartling@arm.com>
Date: Tue, 9 Jul 2019 13:23:50 -0500
Subject: [PATCH 17/17] Add ;

---
 .../snippets/templates/snippets/tensor_string_reference.hpp     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utensor_cgen/backend/snippets/templates/snippets/tensor_string_reference.hpp b/utensor_cgen/backend/snippets/templates/snippets/tensor_string_reference.hpp
index 7b7466d6..2db9d1d9 100644
--- a/utensor_cgen/backend/snippets/templates/snippets/tensor_string_reference.hpp
+++ b/utensor_cgen/backend/snippets/templates/snippets/tensor_string_reference.hpp
@@ -1 +1 @@
-static const uint32_t {{ sref_name }} = {{ string_id }} 
+static const uint32_t {{ sref_name }} = {{ string_id }};