diff --git a/.gitmodules b/.gitmodules
index 2ee7f0e..19ec233 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,10 @@
 [submodule "external/onnx"]
 	path = external/onnx
-	url = https://github.com/onnx/onnx
+	url = https://github.com/onnx/onnx.git
+	ignore = dirty
 [submodule "test/lib/googletest"]
 	path = test/lib/googletest
 	url = https://github.com/google/googletest.git
+[submodule "test/lib/filesystem"]
+	path = test/lib/filesystem
+	url = https://github.com/wjakob/filesystem.git
diff --git a/.travis.yml b/.travis.yml
index b2233ef..ac28c6d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -60,12 +60,15 @@ matrix:
           install:
               - if [ "$TRAVIS_OS_NAME" = "linux" -a "$CXX" = "g++" ]; then export CXX="g++-7" CC="gcc-7"; fi
               - |
-                bash -ex ${TRAVIS_BUILD_DIR}/.travis/install-mkldnn.sh \
+                bash -ex ${TRAVIS_BUILD_DIR}/scripts/build-mkldnn.sh \
                     --version ${MKLDNN_VERSION} \
                     --download-dir ${HOME}/downloads \
-                    --build-dir ${HOME}/build \
+                    --extract-dir ${HOME}/build \
                     --install-dir ${HOME}/mkl-dnn-${MKLDNN_VERSION} \
                     --parallel ${MAKE_JOBS}
+              - |
+                bash -ex ${TRAVIS_BUILD_DIR}/scripts/install-mkldnn.sh \
+                    --build-dir ${HOME}/build/mkl-dnn-${MKLDNN_VERSION}/build
           script: true # skip build and test
 
 script:
diff --git a/.travis/README.md b/.travis/README.md
index 1ef0312..8f45bcc 100644
--- a/.travis/README.md
+++ b/.travis/README.md
@@ -12,7 +12,7 @@ Linux-based platforms requires a container image with the following softwares:
 Currently it uses [okapies/buildenv](https://hub.docker.com/r/okapies/buildenv/) image for linux-x86_64 platform.
 
 ## Architecture
-`.travis.yml` -> `run-build.sh` -> `build.sh` -> `install-*.sh` & `build-menoh.sh`
+`.travis.yml` -> `run-build.sh` -> `build.sh` -> `scripts/*.sh`
 
 1. `run-build.sh` just calls a platform's `build.sh` for running the actual build workflow
 2. `build.sh`
diff --git a/.travis/build-menoh.sh b/.travis/build-menoh.sh
deleted file mode 100644
index 4d35f01..0000000
--- a/.travis/build-menoh.sh
+++ /dev/null
@@ -1,76 +0,0 @@
-#!/bin/bash
-
-# retrieve arguments
-while [[ $# != 0 ]]; do
-    case $1 in
-        --)
-            shift
-            break
-            ;;
-        --source-dir)
-            ARG_SOURCE_DIR="$2"
-            shift 2
-            ;;
-        --install-dir)
-            ARG_INSTALL_DIR="$2"
-            shift 2
-            ;;
-        --mkldnn-dir)
-            ARG_MKLDNN_DIR="$2"
-            shift 2
-            ;;
-        --link-static-libgcc)
-            ARG_LINK_STATIC_LIBGCC="$2"
-            shift 2
-            ;;
-        --link-static-libstdcxx)
-            ARG_LINK_STATIC_LIBSTDCXX="$2"
-            shift 2
-            ;;
-        --link-static-libprotobuf)
-            ARG_LINK_STATIC_LIBPROTOBUF="$2"
-            shift 2
-            ;;
-        -*)
-            err Unknown option \"$1\"
-            exit
-            ;;
-        *)
-            break
-            ;;
-
-    esac
-done
-
-# validate the arguments
-test -n "${ARG_SOURCE_DIR}" || { echo "--source-dir is not specified" 1>&2; exit 1; }
-
-test -n "${ARG_LINK_STATIC_LIBGCC}" || ARG_LINK_STATIC_LIBGCC='OFF'
-test -n "${ARG_LINK_STATIC_LIBSTDCXX}" || ARG_LINK_STATIC_LIBSTDCXX='OFF'
-test -n "${ARG_LINK_STATIC_LIBPROTOBUF}" || ARG_LINK_STATIC_LIBPROTOBUF='OFF'
-
-echo -e "\e[33;1mBuilding Menoh\e[0m"
-
-cd ${ARG_SOURCE_DIR}
-[ -d "build" ] || mkdir -p build
-
-cd build
-if [ -n "${ARG_INSTALL_DIR}" ]; then
-    OPT_CMAKE_INSTALL_PREFIX=-DCMAKE_INSTALL_PREFIX=${ARG_INSTALL_DIR}
-fi
-if [ -n "${ARG_MKLDNN_DIR}" ]; then
-    OPT_MKLDNN_INCLUDE_DIR=-DMKLDNN_INCLUDE_DIR=${ARG_MKLDNN_DIR}/include
-    OPT_MKLDNN_LIBRARY=-DMKLDNN_LIBRARY=${ARG_MKLDNN_DIR}/lib/libmkldnn.so
-fi
-cmake \
-    -DCMAKE_BUILD_TYPE=Release \
-    ${OPT_CMAKE_INSTALL_PREFIX} \
-    ${OPT_MKLDNN_INCLUDE_DIR} \
-    ${OPT_MKLDNN_LIBRARY} \
-    -DLINK_STATIC_LIBGCC=${ARG_LINK_STATIC_LIBGCC} \
-    -DLINK_STATIC_LIBSTDCXX=${ARG_LINK_STATIC_LIBSTDCXX} \
-    -DLINK_STATIC_LIBPROTOBUF=${ARG_LINK_STATIC_LIBPROTOBUF} \
-    -DENABLE_TEST=ON \
-    ..
-
-make
diff --git a/.travis/init-build-linux.sh b/.travis/init-build-linux.sh
index 1c4eaba..ae5f0db 100644
--- a/.travis/init-build-linux.sh
+++ b/.travis/init-build-linux.sh
@@ -16,8 +16,8 @@ export MKLDNN_INSTALL_DIR=/usr/local
 # $HOME:$HOME = /home/travis                     : /home/travis
 #               /home/travis/build               : /home/travis/build
 #               /home/travis/build/<user>/<repo> : /home/travis/build/<user>/<repo> (= ${TRAVIS_BUILD_DIR})
-SHARED_SCRIPT_DIR=$(cd $(dirname ${BASH_SOURCE:-$0}); pwd)
-source ${SHARED_SCRIPT_DIR}/run-container.sh --image ${BUILDENV_IMAGE} --work-dir ${WORK_DIR}
+SOURCE_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}/..")"; pwd)
+source ${SOURCE_DIR}/scripts/run-container.sh --image ${BUILDENV_IMAGE} --work-dir ${WORK_DIR}
 test -n "${BUILDENV_CONTAINER_ID}" || { echo "BUILDENV_CONTAINER_ID can't be empty" 1>&2; exit 1; }
 
 ## define shared functions for Linux-based platforms
@@ -34,42 +34,58 @@ function docker_exec_script() {
     return $?
 }
 
-function install_protobuf() {
+function build_protobuf() {
     docker_exec_script \
-        ${PROJ_DIR}/.travis/install-protobuf.sh \
+        "${PROJ_DIR}/scripts/build-protobuf.sh" \
             --version ${PROTOBUF_VERSION} \
-            --download-dir ${WORK_DIR}/downloads \
-            --build-dir ${WORK_DIR}/build \
-            --install-dir ${PROTOBUF_INSTALL_DIR} \
+            --download-dir "${WORK_DIR}/downloads" \
+            --extract-dir "${WORK_DIR}/build" \
+            --install-dir "${PROTOBUF_INSTALL_DIR}" \
             --parallel ${MAKE_JOBS}
 }
 
-function install_mkldnn() {
+function install_protobuf() {
+    docker_exec_script \
+        "${PROJ_DIR}/scripts/install-protobuf.sh" \
+            --build-dir "${WORK_DIR}/build/protobuf-${PROTOBUF_VERSION}"
+}
+
+function build_mkldnn() {
     docker_exec_script \
-        ${PROJ_DIR}/.travis/install-mkldnn.sh \
+        "${PROJ_DIR}/scripts/build-mkldnn.sh" \
             --version ${MKLDNN_VERSION} \
-            --download-dir ${WORK_DIR}/downloads \
-            --build-dir ${WORK_DIR}/build \
-            --install-dir ${MKLDNN_INSTALL_DIR} \
+            --download-dir "${WORK_DIR}/downloads" \
+            --extract-dir "${WORK_DIR}/build" \
+            --install-dir "${MKLDNN_INSTALL_DIR}" \
             --parallel ${MAKE_JOBS}
 }
 
+function install_mkldnn() {
+    docker_exec_script \
+        "${PROJ_DIR}/scripts/install-mkldnn.sh" \
+            --build-dir "${WORK_DIR}/build/mkl-dnn-${MKLDNN_VERSION}/build"
+}
+
 function prepare_menoh_data() {
     docker_exec_script \
-        ${PROJ_DIR}/.travis/prepare-menoh-data.sh \
-            --source-dir ${PROJ_DIR} \
+        "${PROJ_DIR}/scripts/prepare-menoh-data.sh" \
+            --source-dir "${PROJ_DIR}" \
             --python-executable python3
 }
 
 function build_menoh() {
     if [ "${LINK_STATIC}" != "true" ]; then
         docker_exec_script \
-            ${PROJ_DIR}/.travis/build-menoh.sh \
-                --source-dir ${PROJ_DIR}
+            "${PROJ_DIR}/scripts/build-menoh.sh" \
+                --build-type Release \
+                --source-dir "${PROJ_DIR}" \
+                --python-executable python3
     else
         docker_exec_script \
-            ${PROJ_DIR}/.travis/build-menoh.sh \
-                --source-dir ${PROJ_DIR} \
+            "${PROJ_DIR}/scripts/build-menoh.sh" \
+                --build-type Release \
+                --source-dir "${PROJ_DIR}" \
+                --python-executable python3 \
                 --link-static-libgcc ON \
                 --link-static-libstdcxx ON \
                 --link-static-libprotobuf ON
@@ -77,9 +93,9 @@ function build_menoh() {
 }
 
 function test_menoh() {
-    docker_exec "cd ${PROJ_DIR}/build && ./test/menoh_test"
+    docker_exec "cd \"${PROJ_DIR}/build\" && ./test/menoh_test"
 }
 
 function check_menoh_artifact() {
-    docker_exec "ldd ${PROJ_DIR}/build/menoh/libmenoh.so"
+    docker_exec "ldd \"${PROJ_DIR}/build/menoh/libmenoh.so\""
 }
diff --git a/.travis/init-build-osx.sh b/.travis/init-build-osx.sh
index 627e070..afe593a 100644
--- a/.travis/init-build-osx.sh
+++ b/.travis/init-build-osx.sh
@@ -8,28 +8,32 @@ export PROJ_DIR=${TRAVIS_BUILD_DIR} # = ${HOME}/build/${TRAVIS_REPO_SLUG}
 ## define shared functions for macOS (OSX) platforms
 
 function prepare_menoh_data() {
-    bash -ex ${PROJ_DIR}/.travis/prepare-menoh-data.sh \
-        --source-dir ${PROJ_DIR} \
+    bash -ex "${PROJ_DIR}/scripts/prepare-menoh-data.sh" \
+        --source-dir "${PROJ_DIR}" \
         --python-executable python
 }
 
 function build_menoh() {
     if [ "${LINK_STATIC}" != "true" ]; then
-        bash -ex ${PROJ_DIR}/.travis/build-menoh.sh \
-            --source-dir ${PROJ_DIR}
+        bash -ex "${PROJ_DIR}/scripts/build-menoh.sh" \
+            --build-type Release \
+            --source-dir "${PROJ_DIR}" \
+            --python-executable python
     else
         # Does not set --link-static-libgcc and --link-static-libstdcxx in macOS
-        bash -ex ${PROJ_DIR}/.travis/build-menoh.sh \
-            --source-dir ${PROJ_DIR} \
+        bash -ex "${PROJ_DIR}/scripts/build-menoh.sh" \
+            --build-type Release \
+            --source-dir "${PROJ_DIR}" \
+            --python-executable python \
             --link-static-libprotobuf ON
     fi
 }
 
 function test_menoh() {
-    cd ${PROJ_DIR}/build
+    cd "${PROJ_DIR}/build"
     ./test/menoh_test
 }
 
 function check_menoh_artifact() {
-    otool -L ${PROJ_DIR}/build/menoh/libmenoh.dylib
+    otool -L "${PROJ_DIR}/build/menoh/libmenoh.dylib"
 }
diff --git a/.travis/linux-x86_64/build.sh b/.travis/linux-x86_64/build.sh
index 0af7711..205f028 100644
--- a/.travis/linux-x86_64/build.sh
+++ b/.travis/linux-x86_64/build.sh
@@ -1,6 +1,6 @@
 #!/bin/bash -ex
 
-BASE_DIR=$(cd $(dirname ${BASH_SOURCE:-$0}); pwd)
+BASE_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
 
 # initialize this script
 source ${BASE_DIR}/../init-build-linux.sh
@@ -13,7 +13,9 @@ docker_exec "ls -l ${WORK_DIR}/build/${TRAVIS_REPO_SLUG}"
 docker_exec "(printenv | grep PATH) && make --version && cmake --version && g++ --version && ldd --version"
 
 # build and install prerequisites
+build_protobuf
 install_protobuf
+build_mkldnn
 install_mkldnn
 
 docker_exec "pip3 install --user chainer" # for generating test data
diff --git a/.travis/macosx-x86_64/build.sh b/.travis/macosx-x86_64/build.sh
index a81b8ca..ec672cc 100644
--- a/.travis/macosx-x86_64/build.sh
+++ b/.travis/macosx-x86_64/build.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-BASE_DIR=$(cd $(dirname ${BASH_SOURCE:-$0}); pwd)
+BASE_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
 
 # initialize this script
 source ${BASE_DIR}/../init-build-osx.sh
diff --git a/BUILDING.md b/BUILDING.md
index 698309d..6d0915f 100644
--- a/BUILDING.md
+++ b/BUILDING.md
@@ -5,18 +5,25 @@ You need to install [prerequisites](#prerequisites) for your platform before [bu
 To build Menoh, you require the following toolchains:
 
 Unix:
-- CMake 3.1 or later
 - GCC 4.9 or later
+- CMake 3.1 or later
+- Python 2.7 or later
 
 macOS (OSX):
 - XCode
 - [Homebrew](https://brew.sh/)
+- CMake 3.1 or later
+- Python 2.7 or later
 
 Windows:
 - Visual Studio 2015
+- CMake 3.1 or later
+- Python 2.7 or later
 
 Windows (MINGW):
 - [MSYS2](http://www.msys2.org/)
+- CMake 3.1 or later
+- Python 2.7 or later
 
 You also need to install the dependent libraries on your system:
 
@@ -25,7 +32,7 @@ You also need to install the dependent libraries on your system:
 
 `protobuf` can be installed through most package managers instead of building it yourself. `mkl-dnn` package, unfortunatelly, is not available in many environments at the moment (except for `brew` in macOS).
 
-Note that you can use ProtoBuf either version 2 or 3, but, for example, if you build Menoh with `protoc` ver 3 you should use the binary with runtime ver 3.
+Note that you can use ProtoBuf either version 2 or 3, but the runtime version should be the same as `protoc` in your system.
 
 ### Debian/Ubuntu
 ```
@@ -53,10 +60,9 @@ Download and unzip https://github.com/protocolbuffers/protobuf/releases/download
 
 ```
 cd protobuf-3.6.1/cmake
-mdir build
+mkdir build
 cd build
 cmake .. -G "Visual Studio 14" -A x64 -Dprotobuf_MSVC_STATIC_RUNTIME=OFF -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX=(CMake_Install_Dir)
-cmake --build . --config Debug --target install
 cmake --build . --config Release --target install
 cd ../../..
 ```
@@ -67,10 +73,9 @@ git clone https://github.com/intel/mkl-dnn.git
 cd mkl-dnn/scripts
 .\prepare_mkl.bat
 cd ..
-mdir build
+mkdir build
 cd build
 cmake .. -G "Visual Studio 14 Win64"  -DCMAKE_INSTALL_PREFIX=(CMake_Install_Dir)
-cmake --build . --config Debug --target install
 cmake --build . --config Release --target install
 cd ../..
 ```
@@ -78,7 +83,7 @@ cd ../..
 ### Windows (MINGW)
 ```
 pacman -S mingw-w64-x86_64-toolchain
-pacman -S git
+pacman -S git make
 pacman -S mingw-w64-x86_64-cmake
 pacman -S mingw-w64-x86_64-protobuf mingw-w64-x86_64-protobuf-c
 ```
@@ -86,7 +91,7 @@ pacman -S mingw-w64-x86_64-protobuf mingw-w64-x86_64-protobuf-c
 #### Installing MKL-DNN from binary package
 ```
 curl -omingw-w64-x86_64-mkl-dnn-0.15-1-x86_64.pkg.tar.xz -L https://github.com/pfnet-research/menoh/releases/download/v1.0.3/mingw-w64-x86_64-mkl-dnn-0.15-1-x86_64.pkg.tar.xz
-pacman -S --noconfirm mingw-w64-x86_64-mkl-dnn-0.15-1-x86_64.pkg.tar.xz
+pacman -U --noconfirm mingw-w64-x86_64-mkl-dnn-0.15-1-x86_64.pkg.tar.xz
 ```
 
 #### Installing MKL-DNN from source
@@ -124,7 +129,7 @@ make install # as root
 To run the example, you also need to download model data:
 
 ```
-python retrieve_data.py
+python scripts/retrieve_data.py
 ```
 
 #### Static linking
@@ -177,10 +182,9 @@ Please replace `(CMake_Install_Dir)` in the following with your working director
 ```
 git clone https://github.com/pfnet-research/menoh.git
 cd menoh
-mdir build
+mkdir build
 cd build
-cmake .. -G "Visual Studio 14 Win64" -DCMAKE_PREFIX_PATH=CMake_Install_Dir) -DCMAKE_INSTALL_PREFIX=CMake_Install_Dir) -DENABLE_TEST=OFF -DENABLE_BENCHMARK=OFF -DENABLE_EXAMPLE=OFF -DENABLE_TOOL=OFF
-cmake --build . --config Debug --target install
+cmake .. -G "Visual Studio 14 Win64" -DCMAKE_PREFIX_PATH=(CMake_Install_Dir) -DCMAKE_INSTALL_PREFIX=(CMake_Install_Dir) -DENABLE_TEST=OFF -DENABLE_BENCHMARK=OFF -DENABLE_EXAMPLE=OFF
 cmake --build . --config Release --target install
 ```
 
@@ -194,3 +198,12 @@ MSYS2_ARG_CONV_EXCL="-DCMAKE_INSTALL_PREFIX=" \
   cmake -G "MSYS Makefiles" -DCMAKE_INSTALL_PREFIX=/mingw64
 make
 ```
+
+### Note
+
+#### Python command name
+Menoh requires `python` command to generate source codes at build time. Add `PYTHON_EXECUTABLE` option to `cmake` if you want to use `python` command with non-standard name (e.g. `python3`).
+
+```bash
+cmake -DPYTHON_EXECUTABLE=python3 ..
+```
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b51806e..6aa884e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,8 +7,6 @@ set(MENOH_MINOR_VERSION 1)
 set(MENOH_PATCH_VERSION 0)
 
 # Options
-option(BUILD_SHARED_LIBS "Build shared libs" ON)
-
 option(USE_OLD_GLIBCXX_ABI "Generate binaries for the old libstdc++ ABI" OFF)
 option(LINK_STATIC_LIBPROTOBUF "Link static libprotobuf to libmenoh" OFF)
 
@@ -52,11 +50,37 @@ mark_as_advanced(DOWNLOAD_LOCATION)
 # Enable ExternalProject_Add
 include(ExternalProject)
 
-# Setup protobuf
-include(SetupProtobuf)
+# Setup protobuf (it is used in ONNX and Menoh)
+if(LINK_STATIC_LIBPROTOBUF)
+    # Note: We can't use `set(PROTOBUF_BUILD_SHARED_LIBS OFF)` in `FindProtobuf` module
+    # because `libprotobuf.a` produced by the package manager is not PIC. So we need to
+    # build it by ourselves.
+    if(UNIX OR MINGW)
+        include(BuildProtobuf)
+    else()
+        message(FATAL_ERROR "LINK_STATIC_LIBPROTOBUF is supported only in UNIX-like environments")
+    endif()
+else()
+    # Note: It may conflict with the loading mechanism in onnx's CMake configuration.
+    # See external/onnx/CMakeLists.txt for more details.
+    include(FindProtobuf)
+    find_package(Protobuf ${PROTOBUF_VERSION} REQUIRED)
+endif()
+
+# Build libonnx.a
+message(STATUS "Adding external/onnx")
+
+set(ONNX_SRC_DIR ${EXTERNAL_DIR}/onnx)
+execute_process(COMMAND git submodule update --init -- ${ONNX_SRC_DIR} WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
+
+# see https://github.com/onnx/onnx/pull/1466
+message(STATUS "Patching to external/onnx")
+configure_file(${EXTERNAL_DIR}/onnx-v1.3.0-patch_CMakeLists.txt ${EXTERNAL_DIR}/onnx/CMakeLists.txt COPYONLY)
 
-# Generate source codes from ONNX protobuf schema
-include(GenerateOnnxSrc)
+# TODO: enable the following option when it is ready for migrating to onnx-ml
+#set(ONNX_ML 1)
+add_subdirectory(external/onnx EXCLUDE_FROM_ALL) # Note: BUILD_SHARED_LIBS must be OFF in this place
+include_directories("${ONNX_INCLUDE_DIRS}")
 
 # Setup MKLDNN
 find_package(MKLDNN "0.14")
@@ -67,30 +91,24 @@ endif()
 include_directories("${MKLDNN_INCLUDE_DIR}")
 
 if(${ENABLE_TEST})
-    enable_testing()
-    # GTest setup
-    set(GTEST_DIR test/lib/googletest)
-    execute_process(COMMAND git submodule update --init -- ${GTEST_DIR} WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
-    message(STATUS "# add_subdirectory\(${GTEST_DIR}\)")
-    add_subdirectory(${GTEST_DIR})
-    message(STATUS "# add_subdirectory\(test\)")
+    message(STATUS "Adding test")
     add_subdirectory(test)
 endif()
 
 if(${ENABLE_BENCHMARK})
-    message(STATUS "# add_subdirectory\(benchmark\)")
+    message(STATUS "Adding benchmark")
     add_subdirectory(benchmark)
 endif()
 
 if(${ENABLE_EXAMPLE})
-    message(STATUS "# add_subdirectory\(example\)")
+    message(STATUS "Adding example")
     add_subdirectory(example)
 endif()
 
-message(STATUS "# add_subdirectory\(menoh\)")
+message(STATUS "Adding menoh")
 add_subdirectory(menoh)
 
-message(STATUS "# add_subdirectory\(include\)")
+message(STATUS "Adding include")
 add_subdirectory(include)
 
 if(SHOW_ALL_VARIABLES)
diff --git a/README.md b/README.md
index 3516f9c..03663ac 100644
--- a/README.md
+++ b/README.md
@@ -59,7 +59,7 @@ This codebase contains C API and C++ API.
 Execute following commands in root directory.
 
 ```
-python retrieve_data.py
+python scripts/retrieve_data.py
 mkdir build && cd build
 cmake ..
 make
@@ -111,7 +111,7 @@ Setup chainer
 Then, execute following commands in root directory.
 
 ```
-python gen_test_data.py
+python scripts/gen_test_data.py
 cd build
 cmake -DENABLE_TEST=ON ..
 make
@@ -155,7 +155,7 @@ make
 
 Menoh is released under MIT License. Please see the LICENSE file for details.
 
-Note: `retrieve_data.py` downloads `data/VGG16.onnx`. `data/VGG16.onnx` is generated by onnx-chainer from pre-trained model which is uploaded
+Note: `scripts/retrieve_data.py` downloads `data/VGG16.onnx`. `data/VGG16.onnx` is generated by onnx-chainer from pre-trained model which is uploaded
 at http://www.robots.ox.ac.uk/%7Evgg/software/very_deep/caffe/VGG_ILSVRC_16_layers.caffemodel
 
 That pre-trained model is released under Creative Commons Attribution License.
diff --git a/appveyor.yml b/appveyor.yml
index ccfe61b..9951f2c 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -40,13 +40,13 @@ install:
 - if [%TARGET%]==[mingw] (
     pip3 install --user chainer &&
     mkdir -p data &&
-    python3 retrieve_data.py &&
-    python3 gen_test_data.py
+    python3 scripts/retrieve_data.py &&
+    python3 scripts/gen_test_data.py
   ) else (
     pip install --user chainer &&
     md data &&
-    python retrieve_data.py &&
-    python gen_test_data.py
+    python scripts/retrieve_data.py &&
+    python scripts/gen_test_data.py
   )
 
 before_build:
@@ -72,7 +72,7 @@ build_script:
     cmake -G "MSYS Makefiles" -DENABLE_TEST=ON %STATIC_OPTION% -DCMAKE_INSTALL_PREFIX=/mingw64 .. &&
     make
   ) else (
-    cmake -G "Visual Studio 14 Win64" -DENABLE_TEST=OFF -DENABLE_BENCHMARK=OFF -DENABLE_EXAMPLE=OFF -DENABLE_TOOL=OFF -DCMAKE_INSTALL_PREFIX=c:\menoh-%MENOH_REV%-msvc .. &&
+    cmake -G "Visual Studio 14 Win64" -DENABLE_TEST=OFF -DENABLE_BENCHMARK=OFF -DENABLE_EXAMPLE=OFF -DCMAKE_INSTALL_PREFIX=c:\menoh-%MENOH_REV%-msvc .. &&
     cmake --build . --config Release --target install
   )
 
diff --git a/benchmark/vgg16_benchmark.cpp b/benchmark/vgg16_benchmark.cpp
index 230edac..845b9f3 100644
--- a/benchmark/vgg16_benchmark.cpp
+++ b/benchmark/vgg16_benchmark.cpp
@@ -35,7 +35,7 @@ int main(int argc, char** argv) {
     menoh::variable_profile_table_builder vpt_builder;
     vpt_builder.add_input_profile(conv1_1_in_name, menoh::dtype_t::float_,
                                   {batch_size, 3, 224, 224});
-    vpt_builder.add_output_profile(softmax_out_name, menoh::dtype_t::float_);
+    vpt_builder.add_output_name(softmax_out_name);
     auto vpt = vpt_builder.build_variable_profile_table(model_data);
 
     // Build model
diff --git a/cmake/BuildProtobuf.cmake b/cmake/BuildProtobuf.cmake
new file mode 100644
index 0000000..163411c
--- /dev/null
+++ b/cmake/BuildProtobuf.cmake
@@ -0,0 +1,44 @@
+set(PROTOBUF_VERSION_STATIC "3.6.1")
+
+set(PROTOBUF_DIR ${CMAKE_CURRENT_BINARY_DIR}/protobuf-${PROTOBUF_VERSION_STATIC})
+set(PROTOBUF_URL "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOBUF_VERSION_STATIC}/protobuf-cpp-${PROTOBUF_VERSION_STATIC}.tar.gz")
+set(PROTOBUF_HASH MD5=406d5b8636576b1c86730ca5cbd1e576)
+
+# Requires `-fPIC` for linking with a shared library
+set(PROTOBUF_CFLAGS "-g -O2 -fPIC")
+set(PROTOBUF_CXXFLAGS "-g -O2 -fPIC")
+if(USE_OLD_GLIBCXX_ABI)
+    set(PROTOBUF_CXXFLAGS "${PROTOBUF_CXXFLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
+endif()
+
+ExternalProject_Add(Protobuf
+    PREFIX ${PROTOBUF_DIR}
+    URL ${PROTOBUF_URL}
+    URL_HASH ${PROTOBUF_HASH}
+    DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
+    BUILD_IN_SOURCE 1
+    CONFIGURE_COMMAND bash -ex ${CMAKE_MODULE_PATH}/configure-helper.sh --CC ${CMAKE_C_COMPILER} --CXX ${CMAKE_CXX_COMPILER} -- "--prefix=${PROTOBUF_DIR}" "CFLAGS=${PROTOBUF_CFLAGS}" "CXXFLAGS=${PROTOBUF_CXXFLAGS}"
+    BUILD_COMMAND make -j4
+    INSTALL_COMMAND make install
+)
+
+set(PROTOBUF_LIBRARY_STATIC ${PROTOBUF_DIR}/lib/libprotobuf.a)
+set(PROTOBUF_LIBRARY_SHARED ${PROTOBUF_DIR}/lib/libprotobuf.so)
+
+# Mimic the behavior of `FindProtobuf` module
+# Use the old variable names to ensure backward compatibility
+set(PROTOBUF_INCLUDE_DIR ${PROTOBUF_DIR}/include)
+set(PROTOBUF_INCLUDE_DIRS ${PROTOBUF_INCLUDE_DIR})
+set(PROTOBUF_LIBRARY ${PROTOBUF_LIBRARY_STATIC}) # use the static library
+set(PROTOBUF_LIBRARIES ${PROTOBUF_LIBRARY})
+set(PROTOBUF_PROTOC_EXECUTABLE ${PROTOBUF_DIR}/bin/protoc)
+set(PROTOBUF_FOUND TRUE)
+
+add_library(protobuf::libprotobuf UNKNOWN IMPORTED)
+# Note: INTERFACE_INCLUDE_DIRECTORIES can't set in this place because include/ is
+# not installed during executing `cmake`
+set_target_properties(protobuf::libprotobuf PROPERTIES
+    IMPORTED_LOCATION "${PROTOBUF_LIBRARY_STATIC}")
+add_executable(protobuf::protoc IMPORTED)
+set_target_properties(protobuf::protoc PROPERTIES
+    IMPORTED_LOCATION "${PROTOBUF_PROTOC_EXECUTABLE}")
diff --git a/cmake/ConfigureMenoh.cmake b/cmake/ConfigureMenoh.cmake
index a2f956a..1752799 100644
--- a/cmake/ConfigureMenoh.cmake
+++ b/cmake/ConfigureMenoh.cmake
@@ -8,6 +8,8 @@ macro(menoh_link_libraries TARGET_NAME SCOPE)
         target_link_libraries(${TARGET_NAME} ${SCOPE} -static-libstdc++)
     endif()
 
+    target_link_libraries(${TARGET_NAME} ${SCOPE} onnx) # onnx also contains protobuf
+
     if(NOT ${SCOPE})
         # PUBLIC will add transitive dependencies (`mklml_intel` and `iomp5`) to the link interface
         # Note: change it to PRIVATE after building mkldnn itself
@@ -15,6 +17,4 @@ macro(menoh_link_libraries TARGET_NAME SCOPE)
     else()
         target_link_libraries(${TARGET_NAME} ${MKLDNN_LIBRARIES})
     endif()
-
-    target_link_libraries(${TARGET_NAME} ${SCOPE} ${PROTOBUF_LIBRARIES})
 endmacro()
diff --git a/cmake/GenerateOnnxSrc.cmake b/cmake/GenerateOnnxSrc.cmake
deleted file mode 100644
index 0c2d8b5..0000000
--- a/cmake/GenerateOnnxSrc.cmake
+++ /dev/null
@@ -1,25 +0,0 @@
-set(ONNX_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/onnx)
-
-if(NOT EXISTS "${ONNX_OUTPUT_DIR}")
-    file(MAKE_DIRECTORY "${ONNX_OUTPUT_DIR}")
-endif()
-
-set(ONNX_SRC_DIR ${EXTERNAL_DIR}/onnx)
-execute_process(COMMAND git submodule update --init -- ${ONNX_SRC_DIR} WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
-
-set(ONNX_PROTO_HEADER ${ONNX_OUTPUT_DIR}/onnx/onnx.pb.h)
-set(ONNX_PROTO_SRC ${ONNX_OUTPUT_DIR}/onnx/onnx.pb.cc)
-
-set(ONNX_GENERATED_OUTPUTS ${ONNX_PROTO_HEADER} ${ONNX_PROTO_SRC})
-
-add_custom_target(gen_onnx_outputs DEPENDS ${ONNX_GENERATED_OUTPUTS})
-add_custom_command(
-    OUTPUT ${ONNX_GENERATED_OUTPUTS}
-    COMMAND ${PROTOBUF_PROTOC_EXECUTABLE}
-    ARGS -I ${ONNX_SRC_DIR} --cpp_out . ${ONNX_SRC_DIR}/onnx/onnx.proto
-    DEPENDS ${PROTOBUF_PROTOC_EXECUTABLE} ${ONNX_SRC_DIR}/onnx/onnx.proto
-    COMMENT "Generating ONNX source files"
-    WORKING_DIRECTORY ${ONNX_OUTPUT_DIR}
-    VERBATIM)
-
-include_directories(${ONNX_OUTPUT_DIR}) # for ONNX_PROTO_HEADER
diff --git a/cmake/SetupProtobuf.cmake b/cmake/SetupProtobuf.cmake
deleted file mode 100644
index 0f86f77..0000000
--- a/cmake/SetupProtobuf.cmake
+++ /dev/null
@@ -1,56 +0,0 @@
-set(PROTOBUF_VERSION "2.6.1")
-
-if(LINK_STATIC_LIBPROTOBUF)
-    # Note: We can't use `set(PROTOBUF_BUILD_SHARED_LIBS OFF)` in `FindProtobuf` module
-    # because `libprotobuf.a` produced by the package manager is not PIC. So we need to
-    # build it by ourselves.
-
-    if(UNIX OR MINGW)
-        set(PROTOBUF_VERSION_STATIC "3.6.1")
-        set(PROTOBUF_DIR ${CMAKE_CURRENT_BINARY_DIR}/protobuf-${PROTOBUF_VERSION_STATIC})
-        set(PROTOBUF_URL "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOBUF_VERSION_STATIC}/protobuf-cpp-${PROTOBUF_VERSION_STATIC}.tar.gz")
-        set(PROTOBUF_HASH MD5=406d5b8636576b1c86730ca5cbd1e576)
-
-        # Requires `-fPIC` for linking with a shared library
-        set(PROTOBUF_CFLAGS "-g -O2 -fPIC")
-        set(PROTOBUF_CXXFLAGS "-g -O2 -fPIC")
-        if(USE_OLD_GLIBCXX_ABI)
-            set(PROTOBUF_CXXFLAGS "${PROTOBUF_CXXFLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
-        endif()
-
-        ExternalProject_Add(Protobuf
-            PREFIX ${PROTOBUF_DIR}
-            URL ${PROTOBUF_URL}
-            URL_HASH ${PROTOBUF_HASH}
-            DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
-            BUILD_IN_SOURCE 1
-            CONFIGURE_COMMAND bash -ex ${CMAKE_MODULE_PATH}/configure-helper.sh --CC ${CMAKE_C_COMPILER} --CXX ${CMAKE_CXX_COMPILER} -- "--prefix=${PROTOBUF_DIR}" "CFLAGS=${PROTOBUF_CFLAGS}" "CXXFLAGS=${PROTOBUF_CXXFLAGS}"
-            BUILD_COMMAND make -j4
-            INSTALL_COMMAND make install
-        )
-
-        set(PROTOBUF_LIBRARY_STATIC ${PROTOBUF_DIR}/lib/libprotobuf.a)
-        set(PROTOBUF_LIBRARY_SHARED ${PROTOBUF_DIR}/lib/libprotobuf.so)
-
-        # Mimic the behavior of `FindProtobuf` module
-        # Use the old variable names to ensure backward compatibility
-        set(PROTOBUF_INCLUDE_DIR ${PROTOBUF_DIR}/include)
-        set(PROTOBUF_LIBRARY ${PROTOBUF_LIBRARY_STATIC}) # use the static library
-        set(PROTOBUF_LIBRARIES ${PROTOBUF_LIBRARY})
-        set(PROTOBUF_PROTOC_EXECUTABLE ${PROTOBUF_DIR}/bin/protoc)
-        set(PROTOBUF_FOUND TRUE)
-
-        add_library(protobuf::libprotobuf UNKNOWN IMPORTED)
-        # Note: INTERFACE_INCLUDE_DIRECTORIES can't set in this place because include/ is
-        # not installed during executing `cmake`
-        set_target_properties(protobuf::libprotobuf PROPERTIES
-            IMPORTED_LOCATION "${PROTOBUF_LIBRARY_STATIC}")
-    else()
-        message(FATAL_ERROR "LINK_STATIC_LIBPROTOBUF is supported only in UNIX-like environments")
-    endif()
-else()
-    include(FindProtobuf)
-    find_package(Protobuf ${PROTOBUF_VERSION} REQUIRED)
-endif()
-
-include_directories(${PROTOBUF_INCLUDE_DIR})
diff --git a/docs/getting_started.md b/docs/getting_started.md
index 2060c40..26be544 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -39,7 +39,7 @@ make install
 Execute below command in root directory:
 
 ```
-python retrieve_data.py
+python scripts/retrieve_data.py
 cd build
 ./example/vgg16_example
 ```
@@ -68,7 +68,7 @@ Please give `--help` option for details
 Execute below commands in root directory:
 
 ```
-python gen_test_data.py
+python scripts/gen_test_data.py
 cd build
 cmake -DENABLE_TEST=ON ..
 make
diff --git a/docs/main.md b/docs/main.md
index 4b7a94d..c123ee3 100644
--- a/docs/main.md
+++ b/docs/main.md
@@ -43,7 +43,7 @@ Menoh is released under MIT License.
 
 Menoh is released under MIT License. Please see the LICENSE file for details.
 
-Note: `retrieve_data.sh` downloads `data/VGG16.onnx`.
+Note: `scripts/retrieve_data.sh` downloads `data/VGG16.onnx`.
 
 `data/VGG16.onnx` is generated by onnx-chainer from pre-trained model which is uploaded
 at http://www.robots.ox.ac.uk/%7Evgg/software/very_deep/caffe/VGG_ILSVRC_16_layers.caffemodel
diff --git a/external/cmdline.h b/external/cmdline.h
index f498b66..f6b3b9b 100644
--- a/external/cmdline.h
+++ b/external/cmdline.h
@@ -39,8 +39,12 @@
 
 #ifdef _MSC_VER
 #define NOMINMAX
-#include <DbgHelp.h>
+/*
+  You must include Windows.h before DbgHelp.h.
+  See https://stackoverflow.com/a/43283926/1014818 for more details.
+*/
 #include <Windows.h>
+#include <DbgHelp.h>
 #else
 #include <cxxabi.h>
 #endif
diff --git a/external/onnx b/external/onnx
index c7b6005..bae6333 160000
--- a/external/onnx
+++ b/external/onnx
@@ -1 +1 @@
-Subproject commit c7b60050ad6ae963bf2cc1d0eb3c32b07eb7eeed
+Subproject commit bae6333e149a59a3faa9c4d9c44974373dcf5256
diff --git a/external/onnx-v1.3.0-patch_CMakeLists.txt b/external/onnx-v1.3.0-patch_CMakeLists.txt
new file mode 100644
index 0000000..23ec68b
--- /dev/null
+++ b/external/onnx-v1.3.0-patch_CMakeLists.txt
@@ -0,0 +1,565 @@
+# Minimum CMake required
+cmake_minimum_required(VERSION 3.1)
+include(cmake/Utils.cmake)
+# Set default build type
+if(NOT CMAKE_BUILD_TYPE)
+  message(STATUS "Build type not set - defaulting to Release")
+  set(
+    CMAKE_BUILD_TYPE "Release"
+    CACHE
+      STRING
+      "Choose the type of build from: Debug Release RelWithDebInfo MinSizeRel Coverage."
+    FORCE)
+endif()
+cmake_policy(SET CMP0063 NEW)
+
+# Project
+project(onnx C CXX)
+option(ONNX_BUILD_BENCHMARKS "Build ONNX micro-benchmarks" OFF)
+
+option(BUILD_ONNX_PYTHON "Build Python binaries" OFF)
+option(ONNX_GEN_PB_TYPE_STUBS "Generate protobuf python type stubs" ON)
+option(ONNX_WERROR "Build with Werror" OFF)
+option(ONNX_COVERAGE "Build with coverage instrumentation" OFF)
+option(ONNX_BUILD_TESTS "Build ONNX C++ APIs Tests" OFF)
+option(ONNX_USE_LITE_PROTO "Use lite protobuf instead of full." OFF)
+
+set(ONNX_NAMESPACE "onnx" CACHE STRING "onnx namespace")
+
+# Set C++11 as standard for the whole project
+if(NOT MSVC)
+  set(CMAKE_CXX_STANDARD 11)
+endif(NOT MSVC)
+
+set(ONNX_ROOT ${PROJECT_SOURCE_DIR})
+
+# Read ONNX version
+file(READ "${PROJECT_SOURCE_DIR}/VERSION_NUMBER" ONNX_VERSION)
+string(STRIP "${ONNX_VERSION}" ONNX_VERSION)
+
+if(NOT MSVC)
+  set(CMAKE_C_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")
+  set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")
+  if(ONNX_COVERAGE)
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fprofile-arcs -ftest-coverage")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
+  endif()
+endif()
+
+if(ONNX_BUILD_TESTS)
+  list(APPEND CMAKE_MODULE_PATH ${ONNX_ROOT}/cmake/external)
+  include(googletest)
+endif()
+
+if((ONNX_USE_LITE_PROTO AND TARGET protobuf::libprotobuf-lite) OR ((NOT ONNX_USE_LITE_PROTO) AND TARGET protobuf::libprotobuf))
+  # Sometimes we need to use protoc compiled for host architecture while linking
+  # libprotobuf against target architecture. See https://github.com/caffe2/caffe
+  # 2/blob/96f35ad75480b25c1a23d6e9e97bccae9f7a7f9c/cmake/ProtoBuf.cmake#L92-L99
+  if(EXISTS "${ONNX_CUSTOM_PROTOC_EXECUTABLE}")
+    message(STATUS "Using custom protoc executable")
+    set(ONNX_PROTOC_EXECUTABLE ${ONNX_CUSTOM_PROTOC_EXECUTABLE})
+  else()
+    set(ONNX_PROTOC_EXECUTABLE $<TARGET_FILE:protobuf::protoc>)
+  endif()
+else()
+  # Customized version of find Protobuf. We need to avoid situations mentioned
+  # in https://github.com/caffe2/caffe2/blob/b7d983f255ef5496474f1ea188edb5e0ac4
+  # 42761/cmake/ProtoBuf.cmake#L82-L92 The following section is stolen from
+  # cmake/ProtoBuf.cmake in Caffe2
+  find_program(Protobuf_PROTOC_EXECUTABLE
+               NAMES protoc
+               DOC "The Google Protocol Buffers Compiler")
+
+  # Only if protoc was found, seed the include directories and libraries. We
+  # assume that protoc is installed at PREFIX/bin. We use get_filename_component
+  # to resolve PREFIX.
+  if(Protobuf_PROTOC_EXECUTABLE)
+    set(ONNX_PROTOC_EXECUTABLE ${Protobuf_PROTOC_EXECUTABLE})
+    get_filename_component(_PROTOBUF_INSTALL_PREFIX
+                           ${Protobuf_PROTOC_EXECUTABLE} DIRECTORY)
+    get_filename_component(_PROTOBUF_INSTALL_PREFIX
+                           ${_PROTOBUF_INSTALL_PREFIX}/.. REALPATH)
+    find_library(Protobuf_PROTOC_LIBRARY
+                 NAMES protoc
+                 PATHS ${_PROTOBUF_INSTALL_PREFIX}/lib
+                 NO_DEFAULT_PATH)
+    if(ONNX_USE_LITE_PROTO)
+      find_library(Protobuf_LITE_LIBRARY
+        NAMES protobuf-lite
+        PATHS ${_PROTOBUF_INSTALL_PREFIX}/lib
+        NO_DEFAULT_PATH)
+    else(ONNX_USE_LITE_PROTO)
+      find_library(Protobuf_LIBRARY
+        NAMES protobuf
+        PATHS ${_PROTOBUF_INSTALL_PREFIX}/lib
+        NO_DEFAULT_PATH)
+    endif(ONNX_USE_LITE_PROTO)
+    find_path(Protobuf_INCLUDE_DIR google/protobuf/service.h
+              PATHS ${_PROTOBUF_INSTALL_PREFIX}/include
+              NO_DEFAULT_PATH)
+    find_package(Protobuf REQUIRED)
+  endif()
+endif()
+
+# Build the libraries with -fPIC
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+# function(RELATIVE_PROTOBUF_GENERATE_CPP SRCS HDRS ROOT_DIR) from https://githu
+# b.com/tensorflow/tensorflow/blob/d2c3b873c6f8ff999a2e4ee707a84ff00d9c15a5/tens
+# orflow/contrib/cmake/tf_core_framework.cmake to solve the problem that
+# customized dir can't be specified when calling PROTOBUF_GENERATE_CPP.
+function(RELATIVE_PROTOBUF_GENERATE_CPP NAME SRCS HDRS ROOT_DIR DEPEND)
+  if(NOT ARGN)
+    message(
+      SEND_ERROR
+        "Error: RELATIVE_PROTOBUF_GENERATE_CPP() called without any proto files"
+      )
+    return()
+  endif()
+
+  if(MSVC AND BUILD_SHARED_LIBS)
+    set(ONNX_DLLEXPORT_STR "dllexport_decl=ONNX_API:")
+  else()
+    set(ONNX_DLLEXPORT_STR "")
+  endif()
+
+  set(${SRCS})
+  set(${HDRS})
+
+  set(GEN_PROTO_PY ${ROOT_DIR}/onnx/gen_proto.py)
+  foreach(INFILE ${ARGN})
+    set(ABS_FILE ${ROOT_DIR}/${INFILE})
+    get_filename_component(FILE_DIR ${ABS_FILE} DIRECTORY)
+    get_filename_component(FILE_WE ${INFILE} NAME_WE)
+    if(ONNX_ML)
+      if(ONNX_NAMESPACE STREQUAL "onnx")
+        set(GENERATED_FILE_WE "${FILE_WE}-ml")
+      else()
+        set(GENERATED_FILE_WE "${FILE_WE}_${ONNX_NAMESPACE}-ml")
+      endif()
+    else()
+      if(ONNX_NAMESPACE STREQUAL "onnx")
+        set(GENERATED_FILE_WE "${FILE_WE}")
+      else()
+        set(GENERATED_FILE_WE "${FILE_WE}_${ONNX_NAMESPACE}")
+      endif()
+    endif()
+    file(RELATIVE_PATH REL_DIR ${ROOT_DIR} ${FILE_DIR})
+    set(OUTPUT_PROTO_DIR "${CMAKE_CURRENT_BINARY_DIR}/${REL_DIR}")
+
+    set(OUTPUT_PB_HEADER "${OUTPUT_PROTO_DIR}/${GENERATED_FILE_WE}.pb.h")
+    set(OUTPUT_PB_SRC "${OUTPUT_PROTO_DIR}/${GENERATED_FILE_WE}.pb.cc")
+    set(GENERATED_PROTO "${OUTPUT_PROTO_DIR}/${GENERATED_FILE_WE}.proto")
+    if(NOT (ONNX_NAMESPACE STREQUAL "onnx"))
+      # We need this dummy header generated by gen_proto.py when ONNX_NAMESPACE
+      # is not onnx
+      list(APPEND ${HDRS} "${OUTPUT_PROTO_DIR}/${GENERATED_FILE_WE}.pb.h")
+    endif()
+    list(APPEND ${SRCS} "${OUTPUT_PB_SRC}")
+    list(APPEND ${HDRS} "${OUTPUT_PB_HEADER}")
+
+    if(NOT EXISTS "${OUTPUT_PROTO_DIR}")
+      file(MAKE_DIRECTORY "${OUTPUT_PROTO_DIR}")
+    endif()
+
+    if("${PYTHON_EXECUTABLE}" STREQUAL "")
+      set(_python_exe "python")
+    else()
+      set(_python_exe "${PYTHON_EXECUTABLE}")
+    endif()
+    set(GEN_PROTO_ARGS
+        -p
+        "${ONNX_NAMESPACE}"
+        -o
+        "${OUTPUT_PROTO_DIR}"
+        "${FILE_WE}")
+    if(ONNX_ML)
+      list(APPEND GEN_PROTO_ARGS -m)
+    endif()
+    if(ONNX_USE_LITE_PROTO)
+      list(APPEND GEN_PROTO_ARGS -l)
+    endif()
+    add_custom_command(OUTPUT "${GENERATED_PROTO}"
+                       COMMAND "${_python_exe}" "${GEN_PROTO_PY}"
+                               ARGS ${GEN_PROTO_ARGS}
+                       DEPENDS ${INFILE}
+                       COMMENT "Running gen_proto.py on ${INFILE}"
+                       VERBATIM)
+
+    set(PROTOC_ARGS
+        ${GENERATED_PROTO}
+        -I
+        ${CMAKE_CURRENT_BINARY_DIR}
+        --cpp_out
+        ${ONNX_DLLEXPORT_STR}${CMAKE_CURRENT_BINARY_DIR})
+    if(BUILD_ONNX_PYTHON)
+      list(APPEND PROTOC_ARGS --python_out
+                  ${ONNX_DLLEXPORT_STR}${CMAKE_CURRENT_BINARY_DIR})
+      if(ONNX_GEN_PB_TYPE_STUBS)
+        # Haven't figured out how to generate mypy stubs on Windows yet
+        if(NOT WIN32)
+          # If onnx was packaged to pypi from Windows, protoc-gen-mypy.py is
+          # missing the +x flag. Add it.
+          execute_process(COMMAND chmod +x ${ROOT_DIR}/tools/protoc-gen-mypy.py)
+          set(PROTOC_MYPY_PLUGIN_FILE ${ROOT_DIR}/tools/protoc-gen-mypy.py)
+        else(NOT WIN32)
+          set(PROTOC_MYPY_PLUGIN_FILE ${ROOT_DIR}/tools/protoc-gen-mypy.bat)
+        endif()
+        list(APPEND PROTOC_ARGS
+                    --plugin
+                    protoc-gen-mypy=${PROTOC_MYPY_PLUGIN_FILE}
+                    --mypy_out
+                    ${ONNX_DLLEXPORT_STR}${CMAKE_CURRENT_BINARY_DIR})
+      endif()
+    endif()
+    if(NOT ONNX_PROTOC_EXECUTABLE)
+      message(FATAL_ERROR "Protobuf compiler not found")
+    endif()
+    if(ONNX_PROTO_POST_BUILD_SCRIPT)
+      add_custom_command(
+        OUTPUT "${OUTPUT_PB_SRC}" "${OUTPUT_PB_HEADER}"
+        COMMAND ${ONNX_PROTOC_EXECUTABLE} ARGS ${PROTOC_ARGS}
+        COMMAND "${CMAKE_COMMAND}" -DFILENAME=${OUTPUT_PB_HEADER}
+                -DNAMESPACES=${ONNX_NAMESPACE} -P
+                ${ONNX_PROTO_POST_BUILD_SCRIPT}
+        COMMAND "${CMAKE_COMMAND}" -DFILENAME=${OUTPUT_PB_SRC}
+                -DNAMESPACES=${ONNX_NAMESPACE} -P
+                ${ONNX_PROTO_POST_BUILD_SCRIPT}
+        DEPENDS ${GENERATED_PROTO} ${DEPEND}
+        COMMENT "Running C++ protocol buffer compiler on ${GENERATED_PROTO}"
+        VERBATIM)
+    else()
+      add_custom_command(
+        OUTPUT "${OUTPUT_PB_SRC}" "${OUTPUT_PB_HEADER}"
+        COMMAND ${ONNX_PROTOC_EXECUTABLE} ARGS ${PROTOC_ARGS}
+        DEPENDS ${GENERATED_PROTO} ${DEPEND}
+        COMMENT "Running C++ protocol buffer compiler on ${GENERATED_PROTO}"
+        VERBATIM)
+    endif()
+    add_custom_target(${NAME} DEPENDS ${OUTPUT_PB_SRC} ${OUTPUT_PB_HEADER})
+  endforeach()
+
+  set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
+  set(${SRCS} ${${SRCS}} PARENT_SCOPE)
+  set(${HDRS} ${${HDRS}} PARENT_SCOPE)
+endfunction()
+
+relative_protobuf_generate_cpp(gen_onnx_proto
+                               PROTO_SRCS
+                               PROTO_HDRS
+                               ${ONNX_ROOT}
+                               ""
+                               onnx/onnx.in.proto)
+relative_protobuf_generate_cpp(gen_onnx_operators_proto
+                               PROTO_SRCS2
+                               PROTO_HDRS2
+                               ${ONNX_ROOT}
+                               gen_onnx_proto
+                               onnx/onnx-operators.in.proto)
+list(APPEND PROTO_SRCS ${PROTO_SRCS2})
+list(APPEND PROTO_HDRS ${PROTO_HDRS2})
+
+file(GLOB_RECURSE onnx_src "${ONNX_ROOT}/onnx/*.h" "${ONNX_ROOT}/onnx/*.cc")
+file(GLOB_RECURSE onnx_gtests_src "${ONNX_ROOT}/onnx/test/cpp/*.h"
+                  "${ONNX_ROOT}/onnx/test/cpp/*.cc")
+list(REMOVE_ITEM onnx_src "${ONNX_ROOT}/onnx/cpp2py_export.cc")
+list(REMOVE_ITEM onnx_src ${onnx_gtests_src})
+
+add_library(onnx_proto ${PROTO_SRCS} ${PROTO_HDRS})
+target_include_directories(onnx_proto PUBLIC
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
+  $<INSTALL_INTERFACE:include>
+  $<BUILD_INTERFACE:${PROTOBUF_INCLUDE_DIRS}>)
+
+
+if(ONNX_USE_LITE_PROTO)
+  if(TARGET protobuf::libprotobuf-lite)
+    target_link_libraries(onnx_proto PUBLIC protobuf::libprotobuf-lite)
+  else()
+    target_link_libraries(onnx_proto PUBLIC ${PROTOBUF_LITE_LIBRARIES})
+  endif()
+else()
+  if(TARGET protobuf::libprotobuf)
+    target_link_libraries(onnx_proto PUBLIC protobuf::libprotobuf)
+  else()
+    target_link_libraries(onnx_proto PUBLIC ${PROTOBUF_LIBRARIES})
+  endif()
+endif()
+add_onnx_global_defines(onnx_proto)
+
+add_library(onnx ${onnx_src})
+target_include_directories(onnx PUBLIC
+  $<BUILD_INTERFACE:${ONNX_ROOT}>
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
+  $<INSTALL_INTERFACE:include>)
+target_link_libraries(onnx PUBLIC onnx_proto)
+add_onnx_global_defines(onnx)
+
+if(BUILD_ONNX_PYTHON)
+  if("${PY_EXT_SUFFIX}" STREQUAL "")
+    if(MSVC)
+      set(PY_EXT_SUFFIX ".pyd")
+    else()
+      set(PY_EXT_SUFFIX ".so")
+    endif()
+  endif()
+
+  add_library(onnx_cpp2py_export MODULE "${ONNX_ROOT}/onnx/cpp2py_export.cc")
+  set_target_properties(onnx_cpp2py_export PROPERTIES PREFIX "")
+  set_target_properties(onnx_cpp2py_export
+                        PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
+  set_target_properties(onnx_cpp2py_export PROPERTIES SUFFIX ${PY_EXT_SUFFIX})
+  set_target_properties(onnx_cpp2py_export
+                        PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
+  target_include_directories(onnx_cpp2py_export PRIVATE
+                             $<BUILD_INTERFACE:${ONNX_ROOT}>
+                             $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
+                             $<INSTALL_INTERFACE:include>
+                             ${PYTHON_INCLUDE_DIR})
+
+  # pybind11 is a header only lib
+  find_package(pybind11 2.2)
+  if(pybind11_FOUND)
+    target_include_directories(onnx_cpp2py_export PUBLIC
+      ${pybind11_INCLUDE_DIRS})
+  else()
+    if(EXISTS ${ONNX_ROOT}/third_party/pybind11/include/pybind11/pybind11.h)
+      target_include_directories(onnx_cpp2py_export PUBLIC
+        ${ONNX_ROOT}/third_party/pybind11/include)
+    else()
+      message(FATAL_ERROR "cannot find pybind")
+    endif()
+  endif()
+
+  if(APPLE)
+    set_target_properties(onnx_cpp2py_export
+                          PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+    target_link_libraries(onnx_cpp2py_export
+                          PRIVATE -Wl,-force_load,$<TARGET_FILE:onnx>)
+  elseif(MSVC)
+    # In MSVC, we will add whole archive in default
+    target_link_libraries(onnx_cpp2py_export
+                          PRIVATE -WHOLEARCHIVE:$<TARGET_FILE:onnx>)
+  else()
+    # Assume everything else is like gcc
+    target_link_libraries(onnx_cpp2py_export
+                          PRIVATE "-Wl,--whole-archive" $<TARGET_FILE:onnx>
+                                  "-Wl,--no-whole-archive")
+  endif()
+
+  target_link_libraries(onnx_cpp2py_export PRIVATE onnx)
+
+  if(MSVC)
+    if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+      set(MP_FLAG "-Xclang" "-fopenmp")
+      set(EXTRA_FLAGS
+          "-Wno-implicit-function-declaration"
+          "-Wno-undefined-inline"
+          "-Wno-incompatible-pointer-types"
+          "-Wno-dllexport-explicit-instantiation-decl"
+          "-Wno-microsoft-unqualified-friend"
+          "-Wno-absolute-value"
+          "-Wno-unused-variable"
+          "-Wno-writable-strings"
+          "-Qunused-arguments")
+    else()
+      set(MP_FLAG "/MP")
+      set(EXTRA_FLAGS "")
+    endif()
+    find_package(PythonInterp ${PY_VERSION} REQUIRED)
+    find_package(PythonLibs ${PY_VERSION} REQUIRED)
+    target_link_libraries(onnx_cpp2py_export PRIVATE ${PYTHON_LIBRARIES})
+    target_compile_options(onnx_cpp2py_export
+                           PRIVATE ${MP_FLAG}
+                                   /WX
+                                   /wd4800 # disable warning type' : forcing
+                                           # value to bool 'true' or 'false'
+                                           # (performance warning)
+                                   /wd4503 # identifier' : decorated name length
+                                           # exceeded, name was truncated
+                                   /wd4146 # unary minus operator applied to
+                                           # unsigned type, result still
+                                           # unsigned from include\google\protob
+                                           # uf\wire_format_lite.h
+                                   ${EXTRA_FLAGS})
+    target_compile_options(onnx_cpp2py_export PRIVATE /MT)
+    add_onnx_global_defines(onnx_cpp2py_export)
+  endif()
+endif()
+
+if(ONNX_BUILD_BENCHMARKS)
+  if(NOT TARGET benchmark)
+    # We will not need to test benchmark lib itself.
+    set(BENCHMARK_ENABLE_TESTING OFF
+        CACHE BOOL "Disable benchmark testing as we don't need it.")
+    # We will not need to install benchmark since we link it statically.
+    set(BENCHMARK_ENABLE_INSTALL OFF
+        CACHE BOOL
+              "Disable benchmark install to avoid overwriting vendor install.")
+    add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/benchmark)
+  endif()
+
+  add_executable(protobuf-bench tools/protobuf-bench.cc)
+  target_include_directories(protobuf-bench PUBLIC
+    $<BUILD_INTERFACE:${ONNX_ROOT}>
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
+    $<INSTALL_INTERFACE:include>
+    $<BUILD_INTERFACE:${PROTOBUF_INCLUDE_DIRS}>)
+  target_link_libraries(protobuf-bench onnx_proto benchmark)
+endif()
+
+# Export include directories
+set(ONNX_INCLUDE_DIRS "${ONNX_ROOT}" "${CMAKE_CURRENT_BINARY_DIR}")
+get_directory_property(hasParent PARENT_DIRECTORY)
+if(hasParent)
+  set(ONNX_INCLUDE_DIRS ${ONNX_INCLUDE_DIRS} PARENT_SCOPE)
+endif()
+
+if(MSVC)
+  target_compile_options(onnx_proto
+                         PRIVATE ${MP_FLAG}
+                                 /WX
+                                 /wd4800 # disable warning type' : forcing value
+                                         # to bool 'true' or 'false'
+                                         # (performance warning)
+                                 /wd4503 # identifier' : decorated name length
+                                         # exceeded, name was truncated
+                                 /wd4146 # unary minus operator applied to
+                                         # unsigned type, result still unsigned:
+                                         # include\google\protobuf\wire_format_l
+                                         # ite.h
+                                 ${EXTRA_FLAGS})
+  target_compile_options(onnx
+                         PRIVATE ${MP_FLAG}
+                                 /WX
+                                 /wd4800 # disable warning type' : forcing value
+                                         # to bool 'true' or 'false'
+                                         # (performance warning)
+                                 /wd4503 # identifier' : decorated name length
+                                         # exceeded, name was truncated
+                                 /wd4146 # unary minus operator applied to
+                                         # unsigned type, result still unsigned
+                                 ${EXTRA_FLAGS})
+  add_msvc_runtime_flag(onnx_proto)
+  add_msvc_runtime_flag(onnx)
+  set(onnx_static_library_flags
+      -IGNORE:4221 # LNK4221: This object file does not define any previously
+                   # undefined public symbols, so it will not be used by any
+                   # link operation that consumes this library
+      )
+  set_target_properties(onnx
+                        PROPERTIES STATIC_LIBRARY_FLAGS
+                                   "${onnx_static_library_flags}")
+elseif(APPLE)
+
+else()
+  if(${ONNX_WERROR})
+    target_compile_options(onnx PRIVATE -Werror=sign-compare -Werror=conversion)
+  endif()
+endif()
+
+if(APPLE)
+  set_target_properties(onnx PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+endif()
+
+# ---[ ONNX Interface for Framework Integratin (ONNXIFI)
+add_library(onnxifi INTERFACE)
+target_include_directories(onnxifi INTERFACE
+  $<BUILD_INTERFACE:${ONNX_ROOT}>
+  $<INSTALL_INTERFACE:include>)
+
+# ---[ ONNXIFI loader
+add_library(onnxifi_loader STATIC onnx/onnxifi_loader.c)
+target_include_directories(onnxifi_loader PUBLIC
+  $<BUILD_INTERFACE:${ONNX_ROOT}>
+  $<INSTALL_INTERFACE:include>)
+# Users of ONNX backend API would compile it with their toolchain, so it is
+# implemented in standard C89 for maximum compatibility
+set_target_properties(onnxifi_loader
+                      PROPERTIES C_STANDARD
+                                 90
+                                 C_EXTENSIONS
+                                 NO)
+target_link_libraries(onnxifi_loader PUBLIC onnxifi ${CMAKE_DL_LIBS})
+if(MSVC)
+  add_msvc_runtime_flag(onnxifi_loader)
+endif()
+
+if (NOT ANDROID AND NOT IOS)
+  # ---[ ONNXIFI wrapper
+  add_library(onnxifi_wrapper MODULE onnx/onnxifi_wrapper.c)
+  target_include_directories(onnxifi_wrapper PRIVATE
+    $<BUILD_INTERFACE:${ONNX_ROOT}>
+    $<INSTALL_INTERFACE:include>)
+  set_target_properties(onnxifi_wrapper PROPERTIES
+    C_STANDARD 99
+    C_EXTENSIONS NO
+    OUTPUT_NAME "onnxifi"
+    POSITION_INDEPENDENT_CODE YES)
+  target_link_libraries(onnxifi_wrapper PRIVATE onnxifi_loader onnxifi)
+  if(DEFINED ONNXIFI_SEARCH_DIR)
+    target_compile_definitions(onnxifi_wrapper PRIVATE "ONNXIFI_SEARCH_DIR=\"${ONNXIFI_SEARCH_DIR}\"")
+  endif()
+  if(WIN32)
+    if(MSVC)
+      target_compile_definitions(onnxifi_wrapper PRIVATE "ONNXIFI_PUBLIC=__declspec(dllexport)")
+    else()
+      target_compile_definitions(onnxifi_wrapper PRIVATE "ONNXIFI_PUBLIC=__attribute__((__dllexport__))")
+    endif()
+  endif()
+  if(APPLE)
+    # By default CMake would use .so suffix on Mac
+    set_target_properties(onnxifi_wrapper PROPERTIES SUFFIX ".dylib")
+  endif()
+endif()
+
+# ---[ ONNXIFI dummy backend
+add_library(onnxifi_dummy SHARED onnx/onnxifi_dummy.c)
+target_include_directories(onnxifi_dummy PRIVATE
+  $<BUILD_INTERFACE:${ONNX_ROOT}>
+  $<INSTALL_INTERFACE:include>)
+target_link_libraries(onnxifi_dummy PUBLIC onnxifi ${CMAKE_DL_LIBS})
+target_compile_definitions(onnxifi_dummy PRIVATE ONNXIFI_BUILD_LIBRARY=TRUE)
+if(MSVC)
+  add_msvc_runtime_flag(onnxifi_dummy)
+endif()
+
+install(DIRECTORY ${ONNX_ROOT}/onnx
+        DESTINATION include
+        FILES_MATCHING
+        PATTERN "*.h")
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/onnx
+        DESTINATION include
+        FILES_MATCHING
+        PATTERN "*.h")
+
+configure_file(
+  ${PROJECT_SOURCE_DIR}/cmake/ONNXConfigVersion.cmake.in
+  ${PROJECT_BINARY_DIR}/ONNXConfigVersion.cmake
+  @ONLY)
+configure_file(
+  ${PROJECT_SOURCE_DIR}/cmake/ONNXConfig.cmake.in
+  ${PROJECT_BINARY_DIR}/ONNXConfig.cmake
+  @ONLY)
+install(FILES
+  ${PROJECT_BINARY_DIR}/ONNXConfigVersion.cmake
+  ${PROJECT_BINARY_DIR}/ONNXConfig.cmake
+  DESTINATION share/cmake/ONNX
+  COMPONENT dev)
+install(EXPORT ONNXTargets DESTINATION share/cmake/ONNX)
+install(TARGETS
+  onnx onnx_proto
+  onnxifi onnxifi_dummy onnxifi_loader
+  EXPORT ONNXTargets DESTINATION lib)
+
+if(NOT ANDROID AND NOT IOS)
+  install(TARGETS onnxifi_wrapper
+    EXPORT ONNXTargets DESTINATION lib)
+endif()
+
+if(ONNX_BUILD_TESTS)
+  include(${ONNX_ROOT}/cmake/unittest.cmake)
+endif()
+
+include(cmake/summary.cmake)
+onnx_print_configuration_summary()
diff --git a/include/menoh/menoh.h b/include/menoh/menoh.h
index d561a1b..e8375c1 100644
--- a/include/menoh/menoh.h
+++ b/include/menoh/menoh.h
@@ -11,7 +11,7 @@
 #define MENOH_API
 #endif
 
-#define MENOH_SUPPORTED_ONNX_OPSET_VERSION 7
+#define MENOH_SUPPORTED_ONNX_OPSET_VERSION 8
 
 #ifndef MENOH_ERROR_MESSAGE_MAX_LENGTH
 #define MENOH_ERROR_MESSAGE_MAX_LENGTH 1024
@@ -121,7 +121,6 @@ menoh_error_code MENOH_API menoh_make_model_data_from_onnx_data_on_memory(
 menoh_error_code MENOH_API
 menoh_make_model_data(menoh_model_data_handle* dst_handle);
 /*! \brief Add a new parameter in model_data
- *
  * \note Duplication of parameter_name is not allowed and it throws error.
  */
 menoh_error_code MENOH_API menoh_model_data_add_parameter(
@@ -159,7 +158,7 @@ menoh_error_code MENOH_API menoh_model_data_add_attribute_float_to_current_node(
  */
 menoh_error_code MENOH_API menoh_model_data_add_attribute_ints_to_current_node(
   menoh_model_data_handle model_data, const char* attribute_name, int32_t size,
-  const int* value);
+  const int32_t* value);
 /*! \brief Add a new float array attribute to latest added node in model_data
  *
  * \note Duplication of attribute_name is not allowed and it throws error.
@@ -233,11 +232,22 @@ menoh_variable_profile_table_builder_add_input_profile_dims_4(
   menoh_dtype dtype, int32_t num, int32_t channel, int32_t height,
   int32_t width);
 
-/*! \brief Add output profile
+/*! \brief Add output name
+ *
+ * dims amd dtype of output are calculated automatically
+ * when calling of menoh_build_variable_profile_table.
+ */
+menoh_error_code MENOH_API menoh_variable_profile_table_builder_add_output_name(
+  menoh_variable_profile_table_builder_handle builder, const char* name);
+
+/*! \brief [DEPRECATED] Add output profile
  *
  * Output profile contains name and dtype. Its dims are calculated automatically
  * when calling of menoh_build_variable_profile_table.
  */
+MENOH_DEPRECATED_ATTRIBUTE(
+  "please use menoh_variable_profile_table_builder_add_output_name() instead. "
+  "dtype is totally ignored.")
 menoh_error_code MENOH_API
 menoh_variable_profile_table_builder_add_output_profile(
   menoh_variable_profile_table_builder_handle builder, const char* name,
diff --git a/include/menoh/menoh.hpp b/include/menoh/menoh.hpp
index e901f0b..b884971 100644
--- a/include/menoh/menoh.hpp
+++ b/include/menoh/menoh.hpp
@@ -280,13 +280,23 @@ namespace menoh {
                 dims.size(), dims.data()));
         }
 
-        //! Add output profile. That profile contains name, dtype.
-        /*! dims of output is calculated automatically.
+        //! Add a name of required output
+        /*! dtype and dims of output are calculated automatically.
          */
-        void add_output_profile(std::string const& name, dtype_t dtype) {
+        void add_output_name(std::string const& name) {
             MENOH_CPP_API_ERROR_CHECK(
-              menoh_variable_profile_table_builder_add_output_profile(
-                impl_.get(), name.c_str(), static_cast<menoh_dtype>(dtype)));
+              menoh_variable_profile_table_builder_add_output_name(
+                impl_.get(), name.c_str()));
+        }
+
+        //! Add output profile. That profile contains name and dtype
+        /*! dims of output are calculated automatically.
+         * \note This function is deprecated. Given dtype is totally ignored and
+         * inferenced by dtype of input. Use add_output_name() instead.
+         */
+        [[deprecated("Use add_output_name() instead")]]
+        void add_output_profile(std::string const& name, dtype_t) {
+            add_output_name(name);
         }
 
         //! Factory function for variable_profile_table.
diff --git a/menoh/CMakeLists.txt b/menoh/CMakeLists.txt
index 9bd995b..a4fe9e0 100644
--- a/menoh/CMakeLists.txt
+++ b/menoh/CMakeLists.txt
@@ -2,28 +2,21 @@
 option(LINK_STATIC_LIBGCC "Link static libgcc to libmenoh" OFF)
 option(LINK_STATIC_LIBSTDCXX "Link static libstdc++ to libmenoh" OFF)
 
-if(NOT DEFINED ONNX_PROTO_SRC)
-    message(FATAL_ERROR "ONNX_PROTO_SRC is not found")
-endif()
-if(NOT DEFINED ONNX_PROTO_HEADER)
-    message(FATAL_ERROR "ONNX_PROTO_HEADER is not found")
-endif()
-
 # Note: The libraries can be static (.a) or shared (.so)
+if(NOT DEFINED PROTOBUF_INCLUDE_DIRS)
+    message(FATAL_ERROR "PROTOBUF_INCLUDE_DIRS is not found")
+endif()
 if(NOT DEFINED MKLDNN_LIBRARIES)
     message(FATAL_ERROR "MKLDNN_LIBRARIES is not found")
 endif()
-if(NOT DEFINED PROTOBUF_LIBRARIES)
-    message(FATAL_ERROR "PROTOBUF_LIBRARIES is not found")
-endif()
 
 file(GLOB_RECURSE SOURCES "." "*.cpp")
 
 # Create a object library for generating shared library
 add_library(menoh_objlib OBJECT ${SOURCES})
 
-add_dependencies(menoh_objlib gen_onnx_outputs)
-target_sources(menoh_objlib PRIVATE ${ONNX_PROTO_SRC})
+add_dependencies(menoh_objlib gen_onnx_proto) # custom target defined in onnx
+target_include_directories(menoh_objlib PUBLIC $<BUILD_INTERFACE:${PROTOBUF_INCLUDE_DIRS}>)
 
 set_target_properties(menoh_objlib PROPERTIES POSITION_INDEPENDENT_CODE ON)
 
@@ -43,9 +36,6 @@ menoh_link_libraries(menoh PRIVATE)
 add_library(menoh_test_target SHARED $<TARGET_OBJECTS:menoh_objlib>)
 menoh_link_libraries(menoh_test_target PRIVATE)
 
-set_source_files_properties(${ONNX_PROTO_SRC} PROPERTIES GENERATED TRUE)
-set_source_files_properties(${ONNX_PROTO_HEADER} PROPERTIES GENERATED TRUE)
-
 install(TARGETS menoh
     RUNTIME DESTINATION "bin"
     LIBRARY DESTINATION "lib"
diff --git a/menoh/array.hpp b/menoh/array.hpp
index 2bb70bc..9b9ce2c 100644
--- a/menoh/array.hpp
+++ b/menoh/array.hpp
@@ -11,6 +11,21 @@
 
 namespace menoh_impl {
 
+    class array_profile {
+    public:
+        array_profile() = default;
+
+        array_profile(dtype_t dtype, std::vector<int> const& dims)
+          : dtype_(dtype), dims_(dims) {}
+
+        dtype_t dtype() const { return dtype_; }
+        auto const& dims() const { return dims_; }
+
+    private:
+        dtype_t dtype_ = dtype_t::undefined;
+        std::vector<int> dims_;
+    };
+
     class array {
     public:
         array() = default;
@@ -22,14 +37,25 @@ namespace menoh_impl {
 
         array(dtype_t d, std::vector<int> const& dims);
 
+        array(array_profile const& profile, void* data_handle)
+          : array(profile.dtype(), profile.dims(), data_handle) {}
+
+        array(array_profile const& profile, std::shared_ptr<void> const& data)
+          : array(profile.dtype(), profile.dims(), data) {}
+
+        explicit array(array_profile const& profile)
+          : array(profile.dtype(), profile.dims()) {}
+
         dtype_t dtype() const { return dtype_; }
         auto const& dims() const { return dims_; }
+
         auto* data() const { return data_handle_; }
         bool has_ownership() const { return static_cast<bool>(data_); }
 
     private:
         dtype_t dtype_ = dtype_t::undefined;
         std::vector<int> dims_;
+
         std::shared_ptr<void> data_;
         void* data_handle_ = nullptr;
     };
diff --git a/menoh/attribute_completion_and_shape_inference.hpp b/menoh/attribute_completion_and_shape_inference.hpp
new file mode 100644
index 0000000..dc20fd3
--- /dev/null
+++ b/menoh/attribute_completion_and_shape_inference.hpp
@@ -0,0 +1,888 @@
+
+#ifndef MENOH_ATTRIBUTE_COMPLETION_AND_SHAPE_INFERENCE_HPP
+#define MENOH_ATTRIBUTE_COMPLETION_AND_SHAPE_INFERENCE_HPP
+/*
+ * This file is generated by gen_attribute_completion_and_shape_inference_hpp.py
+ * Do NOT modify this file directly
+ */
+#include <algorithm>
+#include <cassert>
+#include <string>
+#include <unordered_map>
+
+#include <menoh/array.hpp>
+#include <menoh/model_data.hpp>
+
+namespace menoh_impl {
+    inline auto complete_attribute_and_infer_shape(
+            model_data& model_data,
+            std::unordered_map<std::string, array_profile> const&
+                input_profile_table) {
+        using ints = std::vector<int>;
+        std::unordered_map<std::string, array_profile> variable_profile_table(
+            input_profile_table.begin(), input_profile_table.end());
+        std::transform(
+            model_data.parameter_name_and_array_list.begin(),
+            model_data.parameter_name_and_array_list.end(),
+            std::inserter(variable_profile_table,
+                          variable_profile_table.end()),
+            [](auto const& p){
+                return std::make_pair(
+                    p.first,
+                    array_profile(p.second.dtype(), p.second.dims())); });
+        auto profile_of = [&variable_profile_table](std::string const& name){
+            assert(variable_profile_table.find(name) !=
+                variable_profile_table.end());
+            return variable_profile_table.at(name);
+        };
+        auto dims_of = [&variable_profile_table, profile_of](
+            std::string const& name){
+                return profile_of(name).dims();
+        };
+        auto dtype_of = [&variable_profile_table, profile_of](
+            std::string const& name){
+                return profile_of(name).dtype();
+        };
+        auto ndims_of = [&dims_of](std::string const& parameter_name) {
+            return dims_of(parameter_name).size();
+        };
+        auto add_variable_to_table = [&variable_profile_table](
+            std::string const& name,
+            dtype_t dtype, ints const& dims){
+                variable_profile_table.emplace(
+                    name, array_profile(dtype, dims));
+            };
+
+        auto graph = make_graph(model_data.node_list); // FIXME reorder nodes
+        model_data.node_list = graph.node_list();
+        for(auto& node : model_data.node_list) {
+            auto input = [&node](int i){
+                return node.input_name_list.at(i);
+            };
+            auto output = [&node](int i){
+                return node.output_name_list.at(i);
+            };
+            
+if(node.op_type == "Abs") {
+    
+    
+    
+    {
+        
+        
+assert(node.input_name_list.size() > 0);
+assert(node.output_name_list.size() > 0);
+add_variable_to_table(output(0), dtype_of(input(0)), dims_of(input(0)));
+
+    }
+}
+else
+
+
+if(node.op_type == "Add") {
+    
+    
+    
+    {
+        
+        
+assert(node.input_name_list.size() > 0);
+assert(node.output_name_list.size() > 0);
+add_variable_to_table(output(0), dtype_of(input(0)), dims_of(input(0)));
+
+    }
+}
+else
+
+
+if(node.op_type == "AveragePool") {
+    
+assert(2 <= ndims_of(input(0)));
+
+    
+{
+    auto found = node.attribute_table.find("count_include_pad");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "count_include_pad", 0);
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("kernel_shape");
+    if(found == node.attribute_table.end()) {
+        
+assert(!"attribute not found: kernel_shape");
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("pads");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "pads", ints(2*(ndims_of(input(0))-2), 0));
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("strides");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "strides", ints(ndims_of(input(0))-2, 1));
+
+    }
+}
+
+    
+    {
+        
+auto count_include_pad = get<int>(node.attribute_table.at("count_include_pad"));
+
+
+auto kernel_shape = get<ints>(node.attribute_table.at("kernel_shape"));
+
+
+auto pads = get<ints>(node.attribute_table.at("pads"));
+
+
+auto strides = get<ints>(node.attribute_table.at("strides"));
+
+        
+add_variable_to_table(output(0), dtype_of(input(0)),
+    calc_2d_output_dims(
+        dims_of(input(0)), dims_of(input(0)).at(1),
+        kernel_shape, strides, pads));
+
+    }
+}
+else
+
+
+if(node.op_type == "BatchNorm") {
+    
+    
+{
+    auto found = node.attribute_table.find("epsilon");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "epsilon", 1.e-05f);
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("momentum");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "momentum", 0.9f);
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("spatial");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "spatial", 1);
+
+    }
+}
+
+    
+    {
+        
+auto epsilon = get<float>(node.attribute_table.at("epsilon"));
+
+
+auto momentum = get<float>(node.attribute_table.at("momentum"));
+
+
+auto spatial = get<int>(node.attribute_table.at("spatial"));
+
+        
+assert(node.input_name_list.size() > 0);
+assert(node.output_name_list.size() > 0);
+add_variable_to_table(output(0), dtype_of(input(0)), dims_of(input(0)));
+
+    }
+}
+else
+
+
+if(node.op_type == "Concat") {
+    
+    
+{
+    auto found = node.attribute_table.find("axis");
+    if(found == node.attribute_table.end()) {
+        
+assert(!"attribute not found: axis");
+
+    }
+}
+
+    
+    {
+        
+auto axis = get<int>(node.attribute_table.at("axis"));
+
+        
+auto output_dims = dims_of(input(0));
+for(int i = 1; i < node.input_name_list.size(); ++i) {
+    // TODO dim check
+    output_dims.at(axis) += dims_of(input(i)).at(axis);
+}
+add_variable_to_table(output(0), dtype_of(input(0)), output_dims);
+
+    }
+}
+else
+
+
+if(node.op_type == "Conv") {
+    
+auto kernel_ndims = ndims_of(input(1))-2;
+auto weights_shape = dims_of(input(1));
+auto kernel_shape = ints(weights_shape.begin()+2, weights_shape.end());
+
+    
+{
+    auto found = node.attribute_table.find("dilations");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "dilations", ints(kernel_ndims, 1));
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("group");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "group", 1);
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("kernel_shape");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "kernel_shape", kernel_shape);
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("pads");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "pads", ints(kernel_ndims*2, 0));
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("strides");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "strides", ints(kernel_ndims, 1));
+
+    }
+}
+
+    
+    {
+        
+auto dilations = get<ints>(node.attribute_table.at("dilations"));
+
+
+auto group = get<int>(node.attribute_table.at("group"));
+
+
+auto kernel_shape = get<ints>(node.attribute_table.at("kernel_shape"));
+
+
+auto pads = get<ints>(node.attribute_table.at("pads"));
+
+
+auto strides = get<ints>(node.attribute_table.at("strides"));
+
+        
+add_variable_to_table(output(0), dtype_of(input(0)),
+    calc_2d_output_dims(
+        dims_of(input(0)), dims_of(input(1)).at(0),
+        kernel_shape, strides, pads));
+
+    }
+}
+else
+
+
+if(node.op_type == "ConvTranspose") {
+    
+auto kernel_ndims = ndims_of(input(1))-2;
+auto weights_shape = dims_of(input(1));
+auto kernel_shape = ints(weights_shape.begin()+2, weights_shape.end());
+
+    
+{
+    auto found = node.attribute_table.find("dilations");
+    if(found == node.attribute_table.end()) {
+        
+assert(!"attribute not found: dilations");
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("group");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "group", 1);
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("kernel_shape");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "kernel_shape", kernel_shape);
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("output_padding");
+    if(found == node.attribute_table.end()) {
+        
+assert(!"attribute not found: output_padding");
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("strides");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "strides", ints(kernel_ndims, 1));
+
+    }
+}
+
+    
+{
+    auto found = node.attribute_table.find("output_shape");
+    assert(!(found == node.attribute_table.end() &&
+       node.attribute_table.find("pads") == node.attribute_table.end()));
+    if(found != node.attribute_table.end()) {
+        auto output_shape = get<ints>(found->second);
+        /* [dim0_begin, dim1_begin, ... , dim0_end, dim1_end, ..., ...] */
+        ints pads(kernel_ndims*2, 0);
+        auto output_padding =
+            get<ints>(node.attribute_table.at("output_padding"));
+        auto strides = get<ints>(node.attribute_table.at("strides"));
+        auto input_profile = input_profile_table.at(input(0));
+        ints input_size(input_profile.dims().begin()+2,
+                        input_profile.dims().end());
+
+        for(int i = 0; i < kernel_ndims; ++i) {
+            auto total_padding = strides[i] * (input_size[i] - 1)
+                + output_padding[i] + kernel_shape[i] - output_shape[i];
+            pads[i] = total_padding - (total_padding/2);
+            pads[i+kernel_ndims] = (total_padding/2);
+        }
+
+        node.attribute_table["pads"] = pads;
+    }
+}
+
+    {
+        
+auto dilations = get<ints>(node.attribute_table.at("dilations"));
+
+
+auto group = get<int>(node.attribute_table.at("group"));
+
+
+auto kernel_shape = get<ints>(node.attribute_table.at("kernel_shape"));
+
+
+auto output_padding = get<ints>(node.attribute_table.at("output_padding"));
+
+
+auto strides = get<ints>(node.attribute_table.at("strides"));
+
+        
+add_variable_to_table(output(0), dtype_of(input(0)),
+    calc_2d_output_dims_for_conv_transpose(
+        dims_of(input(0)), dims_of(input(1)).at(0),
+        kernel_shape, strides, get<ints>(node.attribute_table.at("pads"))));
+
+    }
+}
+else
+
+
+if(node.op_type == "Elu") {
+    
+    
+{
+    auto found = node.attribute_table.find("alpha");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "alpha", 1.f);
+
+    }
+}
+
+    
+    {
+        
+auto alpha = get<float>(node.attribute_table.at("alpha"));
+
+        
+assert(node.input_name_list.size() > 0);
+assert(node.output_name_list.size() > 0);
+add_variable_to_table(output(0), dtype_of(input(0)), dims_of(input(0)));
+
+    }
+}
+else
+
+
+if(node.op_type == "FC") {
+    
+    
+    
+    {
+        
+        
+auto output_dims = ints({dims_of(input(0)).at(0), dims_of(input(1)).at(0)});
+add_variable_to_table(output(0), dtype_of(input(0)),
+    output_dims);
+
+    }
+}
+else
+
+
+if(node.op_type == "Gemm") {
+    
+    
+{
+    auto found = node.attribute_table.find("alpha");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "alpha", 1.f);
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("beta");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "beta", 1.f);
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("transA");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "transA", 0);
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("transB");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "transB", 0);
+
+    }
+}
+
+    
+    {
+        
+auto alpha = get<float>(node.attribute_table.at("alpha"));
+
+
+auto beta = get<float>(node.attribute_table.at("beta"));
+
+
+auto transA = get<int>(node.attribute_table.at("transA"));
+
+
+auto transB = get<int>(node.attribute_table.at("transB"));
+
+        
+auto a_dims = dims_of(input(0));
+assert(a_dims.size() == 2);
+if(transA) {
+    std::swap(a_dims.at(0), a_dims.at(1));
+}
+
+auto b_dims = dims_of(input(1));
+assert(b_dims.size() == 2);
+if(transB) {
+    std::swap(b_dims.at(0), b_dims.at(1));
+}
+
+auto output_dims = ints({a_dims.at(0), b_dims.at(1)});
+add_variable_to_table(output(0), dtype_of(input(0)), output_dims);
+
+    }
+}
+else
+
+
+if(node.op_type == "LeakyRelu") {
+    
+    
+{
+    auto found = node.attribute_table.find("alpha");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "alpha", 0.01f);
+
+    }
+}
+
+    
+    {
+        
+auto alpha = get<float>(node.attribute_table.at("alpha"));
+
+        
+assert(node.input_name_list.size() > 0);
+assert(node.output_name_list.size() > 0);
+add_variable_to_table(output(0), dtype_of(input(0)), dims_of(input(0)));
+
+    }
+}
+else
+
+
+if(node.op_type == "LRN") {
+    
+    
+{
+    auto found = node.attribute_table.find("alpha");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "alpha", 0.0001f);
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("beta");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "beta", 0.75f);
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("bias");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "bias", 1.0f);
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("size");
+    if(found == node.attribute_table.end()) {
+        
+assert(!"attribute not found: size");
+
+    }
+}
+
+    
+    {
+        
+auto alpha = get<float>(node.attribute_table.at("alpha"));
+
+
+auto beta = get<float>(node.attribute_table.at("beta"));
+
+
+auto bias = get<float>(node.attribute_table.at("bias"));
+
+
+auto size = get<float>(node.attribute_table.at("size"));
+
+        
+assert(node.input_name_list.size() > 0);
+assert(node.output_name_list.size() > 0);
+add_variable_to_table(output(0), dtype_of(input(0)), dims_of(input(0)));
+
+    }
+}
+else
+
+
+if(node.op_type == "MaxPool") {
+    
+    
+{
+    auto found = node.attribute_table.find("kernel_shape");
+    if(found == node.attribute_table.end()) {
+        
+assert(!"attribute not found: kernel_shape");
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("pads");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "pads", ints(2*(ndims_of(input(0))-2), 0));
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("storage_order");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "storage_order", 0);
+
+    }
+}
+
+
+{
+    auto found = node.attribute_table.find("strides");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "strides", ints(ndims_of(input(0))-2, 1));
+
+    }
+}
+
+    
+    {
+        
+auto kernel_shape = get<ints>(node.attribute_table.at("kernel_shape"));
+
+
+auto pads = get<ints>(node.attribute_table.at("pads"));
+
+
+auto storage_order = get<int>(node.attribute_table.at("storage_order"));
+
+
+auto strides = get<ints>(node.attribute_table.at("strides"));
+
+        
+add_variable_to_table(output(0), dtype_of(input(0)),
+    calc_2d_output_dims(
+        dims_of(input(0)), dims_of(input(0)).at(1),
+        kernel_shape, strides, pads));
+
+    }
+}
+else
+
+
+if(node.op_type == "Relu") {
+    
+    
+    
+    {
+        
+        
+assert(node.input_name_list.size() > 0);
+assert(node.output_name_list.size() > 0);
+add_variable_to_table(output(0), dtype_of(input(0)), dims_of(input(0)));
+
+    }
+}
+else
+
+
+if(node.op_type == "Softmax") {
+    
+    
+{
+    auto found = node.attribute_table.find("axis");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "axis", 1);
+
+    }
+}
+
+    
+    {
+        
+auto axis = get<int>(node.attribute_table.at("axis"));
+
+        
+assert(node.input_name_list.size() > 0);
+assert(node.output_name_list.size() > 0);
+add_variable_to_table(output(0), dtype_of(input(0)), dims_of(input(0)));
+
+    }
+}
+else
+
+
+if(node.op_type == "Sum") {
+    
+    
+    
+    {
+        
+        
+assert(node.input_name_list.size() > 0);
+assert(node.output_name_list.size() > 0);
+add_variable_to_table(output(0), dtype_of(input(0)), dims_of(input(0)));
+
+    }
+}
+else
+
+
+if(node.op_type == "Sqrt") {
+    
+    
+    
+    {
+        
+        
+assert(node.input_name_list.size() > 0);
+assert(node.output_name_list.size() > 0);
+add_variable_to_table(output(0), dtype_of(input(0)), dims_of(input(0)));
+
+    }
+}
+else
+
+
+if(node.op_type == "Tanh") {
+    
+    
+    
+    {
+        
+        
+assert(node.input_name_list.size() > 0);
+assert(node.output_name_list.size() > 0);
+add_variable_to_table(output(0), dtype_of(input(0)), dims_of(input(0)));
+
+    }
+}
+else
+
+
+if(node.op_type == "Transpose") {
+    
+ints perm(ndims_of(input(0)));
+for(int i = 0; i < perm.size(); ++i) {{
+    perm.at(i) = perm.size()-i-1;
+}}
+
+    
+{
+    auto found = node.attribute_table.find("perm");
+    if(found == node.attribute_table.end()) {
+        
+node.attribute_table.emplace(
+    "perm", perm);
+
+    }
+}
+
+    
+    {
+        
+auto perm = get<ints>(node.attribute_table.at("perm"));
+
+        
+auto input_dims = dims_of(input(0));
+ints output_dims(input_dims.size());
+for(int i = 0; i < input_dims.size(); ++i) {
+    output_dims.at(i) = input_dims.at(perm.at(i));
+}
+add_variable_to_table(output(0), dtype_of(input(0)), output_dims);
+
+    }
+}
+else
+
+            
+{
+    throw unsupported_operator(node.op_type);
+}
+
+        }
+        return variable_profile_table;
+    }
+} // namespace menoh_impl
+
+#endif // MENOH_ATTRIBUTE_COMPLETION_AND_SHAPE_INFERENCE_HPP
+
diff --git a/menoh/graph.cpp b/menoh/graph.cpp
index 569f193..0f8789a 100644
--- a/menoh/graph.cpp
+++ b/menoh/graph.cpp
@@ -221,184 +221,4 @@ namespace menoh_impl {
                         node_list.end());
     }
 
-    std::unordered_map<std::string, std::vector<int>> make_output_dims_table(
-      menoh_impl::model_data const& model_data,
-      std::vector<std::pair<std::string, std::vector<int>>> const&
-        input_name_and_dims_pair_list) {
-
-        std::vector<std::string> supported_operator_list{{"Abs",
-                                                          "Elu",
-                                                          "LeakyRelu",
-                                                          "Relu",
-                                                          "Sqrt",
-                                                          "Tanh",
-                                                          "AveragePool",
-                                                          "Add",
-                                                          "BatchNormalization",
-                                                          "Concat",
-                                                          "Conv",
-                                                          "ConvTranspose",
-                                                          "FC",
-                                                          "Gemm",
-                                                          "GlobalAveragePool",
-                                                          "GlobalMaxPool",
-                                                          "LRN",
-                                                          "MaxPool",
-                                                          "Softmax",
-                                                          "Sum"}};
-
-        std::unordered_map<std::string, std::vector<int>> variable_dims_table(
-          input_name_and_dims_pair_list.begin(),
-          input_name_and_dims_pair_list.end());
-        auto graph = make_graph(model_data.node_list);
-        auto parameter_table = std::unordered_map<std::string, array>(
-          model_data.parameter_name_and_array_list.begin(),
-          model_data.parameter_name_and_array_list.end());
-        for(auto const& node : graph.node_list()) {
-            if(node.op_type == "Conv") {
-                auto weight_name = node.input_name_list.at(1);
-                auto output_channel_num =
-                  get_output_channel_num_from_parameter_dims(
-                    find_value(parameter_table, weight_name).dims());
-                auto output_dims = calc_2d_output_dims(node, output_channel_num,
-                                                       variable_dims_table);
-                auto dilations =
-                  optional_attribute_ints(node, "dilations", {1, 1});
-                if(dilations != std::vector<int>({1, 1})) {
-                    auto actual = "(" + std::to_string(dilations.at(0)) + ", " +
-                                  std::to_string(dilations.at(1)) + ")";
-                    throw unsupported_operator_attribute(
-                      node.op_type, node.output_name_list.front(), "dilations",
-                      actual, "(1, 1)");
-                }
-                auto group = optional_attribute_int(node, "group", 1);
-                if(group != 1) {
-                    throw unsupported_operator_attribute(
-                      node.op_type, node.output_name_list.front(), "group",
-                      std::to_string(group), "1");
-                }
-                variable_dims_table.insert(
-                  {node.output_name_list.at(0), output_dims});
-            } else if(node.op_type == "ConvTranspose") {
-                auto weight_name = node.input_name_list.at(1);
-                auto output_channel_num =
-                  get_output_channel_num_from_parameter_dims(
-                    find_value(parameter_table, weight_name).dims());
-                auto output_dims = calc_2d_output_dims_for_conv_transpose(
-                  node, output_channel_num, variable_dims_table);
-                auto dilations =
-                  optional_attribute_ints(node, "dilations", {1, 1});
-                if(dilations != std::vector<int>({1, 1})) {
-                    auto actual = "(" + std::to_string(dilations.at(0)) + ", " +
-                                  std::to_string(dilations.at(1)) + ")";
-                    throw unsupported_operator_attribute(
-                      node.op_type, node.output_name_list.front(), "dilations",
-                      actual, "(1, 1)");
-                }
-                auto group = optional_attribute_int(node, "group", 1);
-                if(group != 1) {
-                    throw unsupported_operator_attribute(
-                      node.op_type, node.output_name_list.front(), "group",
-                      std::to_string(group), "1");
-                }
-                variable_dims_table.insert(
-                  {node.output_name_list.at(0), output_dims});
-            } else if(node.op_type == "MaxPool" ||
-                      node.op_type == "AveragePool") {
-                auto input_name = node.input_name_list.at(0);
-                auto output_channel_num = get_channel_num_from_variable_dims(
-                  find_value(variable_dims_table, input_name));
-                if(node.op_type == "AveragePool") {
-                    auto pads = optional_attribute_ints(node, "pads", {0, 0});
-                    auto count_include_pad = optional_attribute_int(
-                      node, "count_include_pad", 1); // TODO
-                    if(pads != std::vector<int>({0, 0}) &&
-                       count_include_pad == 0) {
-                        throw unsupported_operator_attribute(
-                          node.op_type, node.output_name_list.front(),
-                          "count_include_pad",
-                          std::to_string(count_include_pad), "0");
-                    }
-                }
-                auto output_dims = calc_2d_output_dims(node, output_channel_num,
-                                                       variable_dims_table);
-                variable_dims_table.insert(
-                  {node.output_name_list.at(0), output_dims});
-            } else if(node.op_type == "GlobalMaxPool" ||
-                      node.op_type == "GlobalAveragePool") {
-                auto input_name = node.input_name_list.at(0);
-                auto input_dims = find_value(variable_dims_table, input_name);
-                auto output_dims = input_dims;
-                output_dims.at(2) = 1;
-                output_dims.at(3) = 1;
-                variable_dims_table.insert(
-                  {node.output_name_list.at(0), output_dims});
-            } else if(node.op_type == "FC") {
-                auto input_name = node.input_name_list.at(0);
-                auto input_dims = find_value(variable_dims_table, input_name);
-                auto batch_size = get_batch_size_from_variable_dims(
-                  find_value(variable_dims_table, input_name));
-                auto weight_dims =
-                  find_value(parameter_table, node.input_name_list.at(1))
-                    .dims();
-                auto input_size =
-                  std::accumulate(input_dims.begin() + 1, input_dims.end(), 1,
-                                  std::multiplies<void>());
-                if(input_size != weight_dims[1]) {
-                    throw dimension_mismatch(
-                      node.op_type, node.output_name_list.front(),
-                      "input[1] and weight[1]", std::to_string(input_size),
-                      std::to_string(weight_dims[1]));
-                }
-                std::vector<int> output_dims{batch_size, weight_dims[0]};
-                variable_dims_table.insert(
-                  {node.output_name_list.at(0), output_dims});
-            } else if(node.op_type == "Gemm") {
-                auto input_name = node.input_name_list.at(0);
-                auto input_dims = find_value(variable_dims_table, input_name);
-                auto batch_size = get_batch_size_from_variable_dims(
-                  find_value(variable_dims_table, input_name));
-                auto weight_dims =
-                  find_value(parameter_table, node.input_name_list.at(1))
-                    .dims();
-                auto trans_a = optional_attribute_int(node, "transA", 0);
-                if(trans_a) {
-                    throw unsupported_operator_attribute(
-                      node.op_type, node.output_name_list.front(), "transA",
-                      std::to_string(trans_a), "0");
-                }
-                auto trans_b = optional_attribute_int(node, "transB", 0);
-                if(!trans_b) {
-                    throw unsupported_operator_attribute(
-                      node.op_type, node.output_name_list.front(), "transB",
-                      std::to_string(trans_b), "1");
-                }
-                auto input_size =
-                  std::accumulate(input_dims.begin() + 1, input_dims.end(), 1,
-                                  std::multiplies<void>());
-                if(input_size != weight_dims[1]) {
-                    throw dimension_mismatch(
-                      node.op_type, node.output_name_list.front(),
-                      "input[1] and weight[1]", std::to_string(input_size),
-                      std::to_string(weight_dims[1]));
-                }
-                std::vector<int> output_dims{batch_size, weight_dims[0]};
-                variable_dims_table.insert(
-                  {node.output_name_list.at(0), output_dims});
-            } else if(std::find(supported_operator_list.begin(),
-                                supported_operator_list.end(),
-                                node.op_type) !=
-                      supported_operator_list
-                        .end()) { // check if supported operator
-                auto input_name = node.input_name_list.at(0);
-                auto output_dims = find_value(variable_dims_table, input_name);
-                variable_dims_table.insert(
-                  {node.output_name_list.at(0), output_dims});
-            } else {
-                throw unsupported_operator(node.op_type);
-            }
-        }
-        return variable_dims_table;
-    } // namespace menoh_impl
-
 } // namespace menoh_impl
diff --git a/menoh/graph.hpp b/menoh/graph.hpp
index 801a44d..7c7fe17 100644
--- a/menoh/graph.hpp
+++ b/menoh/graph.hpp
@@ -82,11 +82,6 @@ namespace menoh_impl {
                         " valid value: " + valid_value) {}
     };
 
-    std::unordered_map<std::string, std::vector<int>> make_output_dims_table(
-      menoh_impl::model_data const& model_data,
-      std::vector<std::pair<std::string, std::vector<int>>> const&
-        input_dims_table);
-
 } // namespace menoh_impl
 
 #endif // MENOH_GRAPH_HPP
diff --git a/menoh/menoh.cpp b/menoh/menoh.cpp
index dfc56d5..f6193eb 100644
--- a/menoh/menoh.cpp
+++ b/menoh/menoh.cpp
@@ -10,6 +10,8 @@
 
 #include <menoh/menoh.h>
 
+#include <menoh/array.hpp>
+#include <menoh/attribute_completion_and_shape_inference.hpp>
 #include <menoh/exception.hpp>
 #include <menoh/model_core.hpp>
 #include <menoh/model_core_factory.hpp>
@@ -187,7 +189,7 @@ menoh_error_code menoh_model_data_add_attribute_array_to_current_node(
 
 menoh_error_code MENOH_API menoh_model_data_add_attribute_ints_to_current_node(
   menoh_model_data_handle model_data, const char* attribute_name, int32_t size,
-  const int* value) {
+  const int32_t* value) {
     return menoh_model_data_add_attribute_array_to_current_node(
       model_data, attribute_name, size, value);
 }
@@ -226,10 +228,9 @@ menoh_error_code MENOH_API menoh_model_data_add_parameter(
  * variable_profile_table_builder
  */
 struct menoh_variable_profile_table_builder {
-    std::vector<std::tuple<std::string, menoh_dtype, std::vector<int>>>
-      input_name_and_dtype_and_dims_list;
-    std::vector<std::tuple<std::string, menoh_dtype>>
-      output_name_and_dtype_list;
+    std::vector<std::pair<std::string, menoh_impl::array_profile>>
+      input_name_and_profile_list;
+    std::vector<std::string> required_output_name_list;
 };
 
 menoh_error_code menoh_make_variable_profile_table_builder(
@@ -249,9 +250,10 @@ menoh_error_code menoh_variable_profile_table_builder_add_input_profile(
   menoh_variable_profile_table_builder_handle builder, const char* name,
   menoh_dtype dtype, int32_t dims_size, const int32_t* dims) {
     return check_error([&]() {
-        builder->input_name_and_dtype_and_dims_list.push_back(
-          std::make_tuple(std::string(name), dtype,
-                          std::vector<int32_t>(dims, dims + dims_size)));
+        builder->input_name_and_profile_list.emplace_back(
+          std::string(name), menoh_impl::array_profile(
+                               static_cast<menoh_impl::dtype_t>(dtype),
+                               std::vector<int32_t>(dims, dims + dims_size)));
         return menoh_error_code_success;
     });
 }
@@ -261,8 +263,9 @@ menoh_error_code menoh_variable_profile_table_builder_add_input_profile_dims_2(
   menoh_dtype dtype, int32_t num, int32_t size) {
     return check_error([&]() {
         std::vector<int> dims = {num, size};
-        builder->input_name_and_dtype_and_dims_list.push_back(
-          std::make_tuple(std::string(name), dtype, dims));
+        builder->input_name_and_profile_list.emplace_back(
+          std::string(name), menoh_impl::array_profile(
+                               static_cast<menoh_impl::dtype_t>(dtype), dims));
         return menoh_error_code_success;
     });
 }
@@ -272,40 +275,48 @@ menoh_error_code menoh_variable_profile_table_builder_add_input_profile_dims_4(
   int32_t width) {
     return check_error([&]() {
         std::vector<int> dims = {num, channel, height, width};
-        builder->input_name_and_dtype_and_dims_list.push_back(
-          std::make_tuple(std::string(name), dtype, dims));
+        builder->input_name_and_profile_list.emplace_back(
+          std::string(name), menoh_impl::array_profile(
+                               static_cast<menoh_impl::dtype_t>(dtype), dims));
         return menoh_error_code_success;
     });
 }
 
-menoh_error_code menoh_variable_profile_table_builder_add_output_profile(
-  menoh_variable_profile_table_builder_handle builder, const char* name,
-  menoh_dtype dtype) {
+menoh_error_code menoh_variable_profile_table_builder_add_output_name(
+  menoh_variable_profile_table_builder_handle builder, const char* name) {
     return check_error([&]() {
-        auto found = std::find_if(
-          builder->output_name_and_dtype_list.begin(),
-          builder->output_name_and_dtype_list.end(),
-          [name](auto const& t) { return name == std::get<0>(t); });
-        if(found != builder->output_name_and_dtype_list.end()) {
+        auto found = std::find(builder->required_output_name_list.begin(),
+                               builder->required_output_name_list.end(),
+                               std::string(name));
+        if(found != builder->required_output_name_list.end()) {
             auto message =
               std::string("menoh same named variable already exist: ") + name;
             menoh_impl::set_last_error_message(message.c_str());
             return menoh_error_code_same_named_variable_already_exist;
         }
-        builder->output_name_and_dtype_list.push_back(
-          std::make_tuple(std::string(name), dtype));
+        builder->required_output_name_list.emplace_back(name);
         return menoh_error_code_success;
     });
 }
 
+/*
+ * deprecated. dtype is totally ignored.
+ */
+menoh_error_code menoh_variable_profile_table_builder_add_output_profile(
+  menoh_variable_profile_table_builder_handle builder, const char* name,
+  menoh_dtype dtype) {
+    return menoh_variable_profile_table_builder_add_output_name(builder, name);
+}
+
 /*
  * variable_profile_table
  */
 struct menoh_variable_profile_table {
-    std::unordered_map<std::string, std::tuple<menoh_dtype, std::vector<int>>>
+    std::unordered_map<std::string, menoh_impl::array_profile>
       input_profile_table;
-    std::unordered_map<std::string, std::tuple<menoh_dtype, std::vector<int>>>
+    std::unordered_map<std::string, menoh_impl::array_profile>
       output_profile_table;
+    std::vector<std::string> required_output_name_list;
 };
 
 menoh_error_code menoh_build_variable_profile_table(
@@ -313,49 +324,19 @@ menoh_error_code menoh_build_variable_profile_table(
   const menoh_model_data_handle model_data,
   menoh_variable_profile_table_handle* dst_handle) {
     return check_error([&]() {
-        std::unordered_map<std::string,
-                           std::tuple<menoh_dtype, std::vector<int>>>
-          input_profile_table;
-        std::transform(
-          builder->input_name_and_dtype_and_dims_list.begin(),
-          builder->input_name_and_dtype_and_dims_list.end(),
-          std::inserter(input_profile_table, input_profile_table.end()),
-          [](auto const& t) {
-              return std::make_pair(
-                std::get<0>(t),
-                std::make_tuple(std::get<1>(t), std::get<2>(t)));
-          });
-
-        std::vector<std::pair<std::string, std::vector<int>>>
-          input_name_and_dims_pair_list;
-        std::transform(
-          builder->input_name_and_dtype_and_dims_list.begin(),
-          builder->input_name_and_dtype_and_dims_list.end(),
-          std::back_inserter(input_name_and_dims_pair_list), [](auto const& t) {
-              return std::make_pair(std::get<0>(t), std::get<2>(t));
-          });
-        auto output_dims_table = menoh_impl::make_output_dims_table(
-          model_data->model_data, input_name_and_dims_pair_list);
-
-        std::unordered_map<std::string,
-                           std::tuple<menoh_dtype, std::vector<int>>>
-          output_profile_table;
-        std::transform(
-          builder->output_name_and_dtype_list.begin(),
-          builder->output_name_and_dtype_list.end(),
-          std::inserter(output_profile_table, output_profile_table.end()),
-          [&output_dims_table](auto const& t) {
-              std::string name;
-              menoh_dtype dtype;
-              std::tie(name, dtype) = t;
-              return std::make_pair(
-                name, std::make_tuple(dtype, menoh_impl::find_value(
-                                               output_dims_table, name)));
-          });
+        std::unordered_map<std::string, menoh_impl::array_profile>
+          input_profile_table(builder->input_name_and_profile_list.begin(),
+                              builder->input_name_and_profile_list.end());
+
+        auto output_profile_table =
+          menoh_impl::complete_attribute_and_infer_shape(
+            model_data->model_data, input_profile_table);
+
         *dst_handle =
           std::make_unique<menoh_variable_profile_table>(
             menoh_variable_profile_table{std::move(input_profile_table),
-                                         std::move(output_profile_table)})
+                                         std::move(output_profile_table),
+                                         builder->required_output_name_list})
             .release();
         return menoh_error_code_success;
     });
@@ -397,18 +378,16 @@ menoh_error_code menoh_variable_profile_table_get_dtype(
   const menoh_variable_profile_table_handle variable_profile_table,
   const char* name, menoh_dtype* dst_dtype) {
     return impl::menoh_variable_profile_table_get_variable_attribute(
-      variable_profile_table, name,
-      [&](std::tuple<menoh_dtype, std::vector<int>> const& t) {
-          *dst_dtype = std::get<0>(t);
+      variable_profile_table, name, [&](auto const& profile) {
+          *dst_dtype = static_cast<menoh_dtype>(profile.dtype());
       });
 }
 menoh_error_code menoh_variable_profile_table_get_dims_size(
   const menoh_variable_profile_table_handle variable_profile_table,
   const char* name, int32_t* dst_size) {
     return impl::menoh_variable_profile_table_get_variable_attribute(
-      variable_profile_table, name,
-      [&](std::tuple<menoh_dtype, std::vector<int>> const& t) {
-          *dst_size = static_cast<int32_t>(std::get<1>(t).size());
+      variable_profile_table, name, [&](auto const& profile) {
+          *dst_size = static_cast<int32_t>(profile.dims().size());
       });
 }
 menoh_error_code menoh_variable_profile_table_get_dims_at(
@@ -416,9 +395,7 @@ menoh_error_code menoh_variable_profile_table_get_dims_at(
   const char* name, int32_t index, int32_t* dst_size) {
     return impl::menoh_variable_profile_table_get_variable_attribute(
       variable_profile_table, name,
-      [&](std::tuple<menoh_dtype, std::vector<int>> const& t) {
-          *dst_size = std::get<1>(t).at(index);
-      });
+      [&](auto const& profile) { *dst_size = profile.dims().at(index); });
 }
 
 menoh_error_code menoh_model_data_optimize(
@@ -441,10 +418,11 @@ menoh_error_code menoh_model_data_optimize(
  * model builder
  */
 struct menoh_model_builder {
-    std::unordered_map<std::string, std::tuple<menoh_dtype, std::vector<int>>>
+    std::unordered_map<std::string, menoh_impl::array_profile>
       input_profile_table;
-    std::unordered_map<std::string, std::tuple<menoh_dtype, std::vector<int>>>
+    std::unordered_map<std::string, menoh_impl::array_profile>
       output_profile_table;
+    std::vector<std::string> required_output_name_list;
     std::unordered_map<std::string, void*> external_buffer_handle_table;
 };
 
@@ -452,12 +430,13 @@ menoh_error_code menoh_make_model_builder(
   const menoh_variable_profile_table_handle variable_profile_table,
   menoh_model_builder_handle* dst_handle) {
     return check_error([&]() {
-        *dst_handle =
-          std::make_unique<menoh_model_builder>(
-            menoh_model_builder{variable_profile_table->input_profile_table,
-                                variable_profile_table->output_profile_table,
-                                {}})
-            .release();
+        *dst_handle = std::make_unique<menoh_model_builder>(
+                        menoh_model_builder{
+                          variable_profile_table->input_profile_table,
+                          variable_profile_table->output_profile_table,
+                          variable_profile_table->required_output_name_list,
+                          {}})
+                        .release();
         return menoh_error_code_success;
     });
 }
@@ -503,54 +482,44 @@ menoh_error_code menoh_build_model(const menoh_model_builder_handle builder,
         std::unordered_map<std::string, menoh_impl::array> input_table;
         for(auto p : builder->input_profile_table) {
             std::string name;
-            std::tuple<menoh_dtype, std::vector<int>> t;
-            std::tie(name, t) = p;
-            menoh_dtype dtype;
-            std::vector<int> dims;
-            std::tie(dtype, dims) = t;
+            menoh_impl::array_profile profile;
+            std::tie(name, profile) = p;
 
             auto buff = builder->external_buffer_handle_table.find(name);
 
             if(buff == builder->external_buffer_handle_table.end()) {
-                input_table.insert(
-                  {name, menoh_impl::array(
-                           static_cast<menoh_impl::dtype_t>(dtype), dims)});
+                input_table.emplace(name, menoh_impl::array(profile));
             } else {
-                input_table.insert(
-                  {name,
-                   menoh_impl::array(static_cast<menoh_impl::dtype_t>(dtype),
-                                     dims, buff->second)});
+                input_table.emplace(name,
+                                    menoh_impl::array(profile, buff->second));
             }
         }
 
-        std::unordered_map<std::string, menoh_impl::array> output_table;
-        for(auto p : builder->output_profile_table) {
+        std::unordered_map<std::string, menoh_impl::array>
+          required_output_table;
+        for(auto const& required_output_name :
+            builder->required_output_name_list) {
+            auto p = *builder->output_profile_table.find(required_output_name);
             std::string name;
-            std::tuple<menoh_dtype, std::vector<int>> t;
-            std::tie(name, t) = p;
-            menoh_dtype dtype;
-            std::vector<int> dims;
-            std::tie(dtype, dims) = t;
+            menoh_impl::array_profile profile;
+            std::tie(name, profile) = p;
 
             auto buff = builder->external_buffer_handle_table.find(name);
 
             if(buff == builder->external_buffer_handle_table.end()) {
-                output_table.insert(
-                  {name, menoh_impl::array(
-                           static_cast<menoh_impl::dtype_t>(dtype), dims)});
+                required_output_table.emplace(name, menoh_impl::array(profile));
             } else {
-                output_table.insert(
-                  {name,
-                   menoh_impl::array(static_cast<menoh_impl::dtype_t>(dtype),
-                                     dims, buff->second)});
+                required_output_table.emplace(
+                  name, menoh_impl::array(profile, buff->second));
             }
         }
 
         *dst_model_handle =
           std::make_unique<menoh_model>(
-            menoh_model{input_table, output_table,
+            menoh_model{input_table, required_output_table,
                         menoh_impl::make_model_core(
-                          input_table, output_table, model_data->model_data,
+                          input_table, required_output_table,
+                          builder->output_profile_table, model_data->model_data,
                           backend_name, backend_config)})
             .release();
         return menoh_error_code_success;
diff --git a/menoh/mkldnn/operator/pool.cpp b/menoh/mkldnn/operator/pool.cpp
index 18f7fcf..8b6e8b3 100644
--- a/menoh/mkldnn/operator/pool.cpp
+++ b/menoh/mkldnn/operator/pool.cpp
@@ -9,6 +9,8 @@
 #include <menoh/mkldnn/operator/common.hpp>
 #include <menoh/mkldnn/utility.hpp>
 
+#include <menoh/graph.hpp> // for unsupported_operator error
+
 namespace menoh_impl {
     namespace mkldnn_backend {
 
@@ -94,6 +96,9 @@ namespace menoh_impl {
             variable_memory_table,
           std::unordered_map<std::string, array> const& required_output_table,
           mkldnn::engine const& engine) {
+            if(node.output_name_list.size() != 1) {
+                throw unsupported_operator("MaxPool issuing multiple outputs");
+            }
             return make_pool_primitive<mkldnn::pooling_max>(
               node, variable_memory_table, required_output_table, engine);
         }
diff --git a/menoh/mkldnn_with_generic_fallback/backend/generic/generic_context.cpp b/menoh/mkldnn_with_generic_fallback/backend/generic/generic_context.cpp
index e810b35..2c3f964 100644
--- a/menoh/mkldnn_with_generic_fallback/backend/generic/generic_context.cpp
+++ b/menoh/mkldnn_with_generic_fallback/backend/generic/generic_context.cpp
@@ -18,6 +18,8 @@ namespace menoh_impl {
               std::unordered_map<std::string, array> const& common_input_table,
               std::unordered_map<std::string, array> const&
                 required_output_table,
+              std::unordered_map<std::string, array_profile> const&
+                output_profile_table,
               std::vector<
                 std::pair<std::string, std::unique_ptr<context>>> const&
                 context_list,
@@ -101,23 +103,40 @@ namespace menoh_impl {
                             assert(is_found_from_other_context);
                         } while(false);
                     }
+                    std::vector<array> output_list;
+                    for(auto const& output_name : node.output_name_list) {
+                        auto found = required_output_table.find(output_name);
+                        if(found == required_output_table.end()) {
+                            // allocate new array by using profile
+                            output_list.push_back(
+                              array(output_profile_table.at(output_name)));
+                        } else {
+                            // use already allocated array
+                            output_list.push_back(found->second);
+                        }
+                    }
+
                     procedure op_proc;
-                    std::vector<std::pair<std::string, array>> new_outputs;
                     try {
                         auto factory =
                           procedure_factory_table_.at(node.op_type);
-                        std::tie(op_proc, new_outputs) =
-                          factory.operator()(current_index, node_list,
-                                             input_list, required_output_table);
-                    } catch(...) { break; }
+                        op_proc =
+                          factory.operator()(node, input_list, output_list);
+                    } catch(std::exception const& e) {
+                        *logger << e.what() << std::endl;
+                        break;
+                    }
                     new_op_proc_list.push_back(op_proc);
                     procedure_list.insert(
                       procedure_list.end(),
                       std::make_move_iterator(new_copy_procedure_list.begin()),
                       std::make_move_iterator(new_copy_procedure_list.end()));
-                    variable_table_.insert(
-                      std::make_move_iterator(new_outputs.begin()),
-                      std::make_move_iterator(new_outputs.end()));
+
+                    assert(node.output_name_list.size() == output_list.size());
+                    for(int i = 0; i < node.output_name_list.size(); ++i) {
+                        variable_table_.emplace(node.output_name_list.at(i),
+                                                output_list.at(i));
+                    }
                 }
 
                 // when no nodes are processed
diff --git a/menoh/mkldnn_with_generic_fallback/backend/generic/generic_context.hpp b/menoh/mkldnn_with_generic_fallback/backend/generic/generic_context.hpp
index 7a1bb2b..cdfda3f 100644
--- a/menoh/mkldnn_with_generic_fallback/backend/generic/generic_context.hpp
+++ b/menoh/mkldnn_with_generic_fallback/backend/generic/generic_context.hpp
@@ -32,6 +32,8 @@ namespace menoh_impl {
                     common_input_table,
                   std::unordered_map<std::string, array> const&
                     required_output_table,
+                  std::unordered_map<std::string, array_profile> const&
+                    output_profile_table,
                   std::vector<
                     std::pair<std::string, std::unique_ptr<context>>> const&
                     context_list,
@@ -43,10 +45,11 @@ namespace menoh_impl {
                     return variable_table_.at(name);
                 }
 
-                using procedure_factory = std::function<std::tuple<
-                  procedure, std::vector<std::pair<std::string, array>>>(
-                  int, std::vector<node> const&, std::vector<array> const&,
-                  std::unordered_map<std::string, array> const&)>;
+                using procedure_factory = std::function<procedure(
+                  node const&, // node
+                  std::vector<array> const&, // input list
+                  std::vector<array> const&  // output list
+                  )>;
                 optional<std::function<void()>>
                 try_to_get_input_from_common_table(
                   std::string const& input_name,
diff --git a/menoh/mkldnn_with_generic_fallback/backend/generic/operator/relu.hpp b/menoh/mkldnn_with_generic_fallback/backend/generic/operator/relu.hpp
index 092f59e..98e258a 100644
--- a/menoh/mkldnn_with_generic_fallback/backend/generic/operator/relu.hpp
+++ b/menoh/mkldnn_with_generic_fallback/backend/generic/operator/relu.hpp
@@ -1,46 +1,27 @@
 #ifndef MENOH_IMPL_MKLDNN_WITH_GENERIC_FALLBACK_BACKEND_BACKEND_GENERIC_OPERATOR_RELU_HPP
 #define MENOH_IMPL_MKLDNN_WITH_GENERIC_FALLBACK_BACKEND_BACKEND_GENERIC_OPERATOR_RELU_HPP
 
+#include <menoh/array.hpp>
 #include <menoh/mkldnn_with_generic_fallback/procedure.hpp>
 
 namespace menoh_impl {
     namespace mkldnn_with_generic_fallback_backend {
         namespace generic_backend {
-            inline std::tuple<procedure,
-                              std::vector<std::pair<std::string, array>>>
-            make_relu(int node_index, std::vector<node> const& node_list,
-                      std::vector<array> const& input_list,
-                      std::unordered_map<std::string, array> const&
-                        required_output_table) {
+            inline procedure make_relu(node const& node,
+                                       std::vector<array> const& input_list,
+                                       std::vector<array> const& output_list) {
                 assert(input_list.size() == 1);
-                auto const& node = node_list.at(node_index);
+                assert(output_list.size() == 1);
 
-                auto const& x_arr = input_list.at(0);
-
-                auto found =
-                  required_output_table.find(node.output_name_list.at(0));
-                optional<array> output_opt;
-                if(found == required_output_table.end()) {
-                    output_opt = array(dtype_t::float_,
-                                       x_arr.dims()); // TODO check inplace-able
-                } else {
-                    output_opt =
-                      found->second; // output is required so not inplace-able
-                }
-
-                auto procedure = [x_arr, output = *output_opt]() {
-                    for(decltype(total_size(x_arr)) i = 0;
-                        i < total_size(x_arr); ++i) {
-                        fat(output, i) = std::max(fat(x_arr, i), 0.f);
+                auto procedure = [input = input_list.at(0),
+                                  output = output_list.at(0)]() {
+                    for(decltype(total_size(input)) i = 0;
+                        i < total_size(input); ++i) {
+                        fat(output, i) = std::max(fat(input, i), 0.f);
                     }
                 };
 
-                std::vector<std::pair<std::string, array>> outputs;
-                if(found == required_output_table.end()) {
-                    outputs.push_back(std::pair<std::string, array>(
-                      node.output_name_list.at(0), *output_opt));
-                }
-                return std::make_tuple(procedure, outputs);
+                return procedure;
             }
 
         } // namespace generic_backend
diff --git a/menoh/mkldnn_with_generic_fallback/backend/mkldnn/mkldnn_context.cpp b/menoh/mkldnn_with_generic_fallback/backend/mkldnn/mkldnn_context.cpp
index 243a9e3..14ea6b8 100644
--- a/menoh/mkldnn_with_generic_fallback/backend/mkldnn/mkldnn_context.cpp
+++ b/menoh/mkldnn_with_generic_fallback/backend/mkldnn/mkldnn_context.cpp
@@ -22,6 +22,8 @@ namespace menoh_impl {
               std::unordered_map<std::string, array> const& common_input_table,
               std::unordered_map<std::string, array> const&
                 required_output_table,
+              std::unordered_map<std::string, array_profile> const&
+                output_profile_table,
               std::vector<
                 std::pair<std::string, std::unique_ptr<context>>> const&
                 context_list,
@@ -157,5 +159,5 @@ namespace menoh_impl {
             }
 
         } // namespace mkldnn_backend
-    } // namespace mkldnn_with_generic_fallback_backend
+    }     // namespace mkldnn_with_generic_fallback_backend
 } // namespace menoh_impl
diff --git a/menoh/mkldnn_with_generic_fallback/backend/mkldnn/mkldnn_context.hpp b/menoh/mkldnn_with_generic_fallback/backend/mkldnn/mkldnn_context.hpp
index 65fb382..493095b 100644
--- a/menoh/mkldnn_with_generic_fallback/backend/mkldnn/mkldnn_context.hpp
+++ b/menoh/mkldnn_with_generic_fallback/backend/mkldnn/mkldnn_context.hpp
@@ -27,7 +27,7 @@ namespace menoh_impl {
                     auto dims =
                       ::menoh_impl::mkldnn_backend::extract_dims(found->second);
                     auto variable_memory = found->second; // mutable
-                    procedure copy_proc(nullptr); // mutable
+                    procedure copy_proc(nullptr);         // mutable
                     if(dims.size() == 4) {
                         auto format = static_cast<mkldnn::memory::format>(
                           found->second.get_primitive_desc()
@@ -68,6 +68,8 @@ namespace menoh_impl {
                     common_input_table,
                   std::unordered_map<std::string, array> const&
                     required_output_table,
+                  std::unordered_map<std::string, array_profile> const&
+                    output_profile_table,
                   std::vector<
                     std::pair<std::string, std::unique_ptr<context>>> const&
                     context_list,
diff --git a/menoh/mkldnn_with_generic_fallback/context.hpp b/menoh/mkldnn_with_generic_fallback/context.hpp
index f8aafbc..de56e1f 100644
--- a/menoh/mkldnn_with_generic_fallback/context.hpp
+++ b/menoh/mkldnn_with_generic_fallback/context.hpp
@@ -29,6 +29,8 @@ namespace menoh_impl {
               std::unordered_map<std::string, array> const& common_input_table,
               std::unordered_map<std::string, array> const&
                 required_output_table,
+              std::unordered_map<std::string, array_profile> const&
+                output_profile_table,
               std::vector<
                 std::pair<std::string, std::unique_ptr<context>>> const&
                 context_list,
@@ -36,7 +38,8 @@ namespace menoh_impl {
                 return do_process_node_list(
                   context_name, current_index, node_list,
                   common_parameter_table, common_input_table,
-                  required_output_table, context_list, logger);
+                  required_output_table, output_profile_table, context_list,
+                  logger);
             }
 
             // for specialized optimization across backends
@@ -57,6 +60,8 @@ namespace menoh_impl {
               std::unordered_map<std::string, array> const& common_input_table,
               std::unordered_map<std::string, array> const&
                 required_output_table,
+              std::unordered_map<std::string, array_profile> const&
+                output_profile_table,
               std::vector<
                 std::pair<std::string, std::unique_ptr<context>>> const&
                 context_list,
diff --git a/menoh/mkldnn_with_generic_fallback/model_core.cpp b/menoh/mkldnn_with_generic_fallback/model_core.cpp
index a39c678..ad9b019 100644
--- a/menoh/mkldnn_with_generic_fallback/model_core.cpp
+++ b/menoh/mkldnn_with_generic_fallback/model_core.cpp
@@ -25,6 +25,8 @@ namespace menoh_impl {
             context_list,
           std::unordered_map<std::string, array> const& input_table,
           std::unordered_map<std::string, array> const& output_table,
+          std::unordered_map<std::string, array_profile> const&
+            output_profile_table,
           menoh_impl::model_data const& model_data,
           backend_config const& config)
           : menoh_impl::model_core(),
@@ -72,7 +74,8 @@ namespace menoh_impl {
                       context->process_node_list(
                         context_name, current_index, graph.node_list(),
                         common_parameter_table_, common_input_table_,
-                        required_output_table_, context_list_, logger_.get());
+                        required_output_table_, output_profile_table,
+                        context_list_, logger_.get());
 
                     // if succeeded processing, add procedures into
                     // procedure_list
@@ -102,9 +105,9 @@ namespace menoh_impl {
                 }
                 // if any context can not process the node
                 if(!is_found) {
-                    *logger_ << "failed to interpret"
-                             << graph.node_list().at(current_index).op_type
-                             << "with all context";
+                    *logger_
+                      << "failed to interpret: no contexts can interpret '"
+                      << node.op_type << "'";
                     throw unsupported_operator(node.op_type);
                 }
             }
diff --git a/menoh/mkldnn_with_generic_fallback/model_core.hpp b/menoh/mkldnn_with_generic_fallback/model_core.hpp
index e4cc405..dc19dc4 100644
--- a/menoh/mkldnn_with_generic_fallback/model_core.hpp
+++ b/menoh/mkldnn_with_generic_fallback/model_core.hpp
@@ -24,6 +24,8 @@ namespace menoh_impl {
                 context_list,
               std::unordered_map<std::string, array> const& input_table,
               std::unordered_map<std::string, array> const& output_table,
+              std::unordered_map<std::string, array_profile> const&
+                output_profile_table,
               menoh_impl::model_data const& model_data, backend_config const& config);
 
         private:
diff --git a/menoh/model.cpp b/menoh/model.cpp
deleted file mode 100644
index 24be8e9..0000000
--- a/menoh/model.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <menoh/model.hpp>
-
-#include <menoh/utility.hpp>
-
-namespace menoh_impl {
-
-    model::model(
-      std::vector<
-        std::tuple<std::string, dtype_t, std::vector<int>, void*>> const&
-        input_name_and_dtype_and_dims_and_data_handle_list,
-      std::vector<std::tuple<std::string, dtype_t, void*>> const&
-        required_output_name_and_dtype_and_data_handle_list,
-      menoh_impl::model_data const& model_data, std::string const& backend_name,
-      backend_config const& config) {
-        std::vector<std::pair<std::string, std::vector<int>>>
-          input_name_and_dims_pair_list;
-        for(auto const& t :
-            input_name_and_dtype_and_dims_and_data_handle_list) {
-            std::string input_name;
-            dtype_t dtype;
-            std::vector<int> input_dims;
-            void* data_handle;
-            std::tie(input_name, dtype, input_dims, data_handle) = t;
-            assert(input_table_.find(input_name) == input_table_.end());
-            if(data_handle) {
-                input_table_.insert(
-                  {input_name, array(dtype, input_dims, data_handle)});
-            } else {
-                input_table_.insert({input_name, array(dtype, input_dims)});
-            }
-            input_name_and_dims_pair_list.push_back({input_name, input_dims});
-        }
-
-        auto output_dims_table =
-          make_output_dims_table(model_data, input_name_and_dims_pair_list);
-
-        std::unordered_map<std::string, array> output_table;
-        for(auto const& t :
-            required_output_name_and_dtype_and_data_handle_list) {
-            std::string output_name;
-            dtype_t dtype;
-            void* data_handle;
-            std::tie(output_name, dtype, data_handle) = t;
-            assert(output_table_.find(output_name) == output_table_.end());
-            if(data_handle) {
-                output_table_.insert(
-                  {output_name,
-                   array(dtype, find_value(output_dims_table, output_name),
-                         data_handle)});
-            } else {
-                output_table_.insert(
-                  {output_name,
-                   array(dtype, find_value(output_dims_table, output_name))});
-            }
-        }
-
-        model_ = make_model_core(input_table_, output_table_, model_data,
-                                 backend_name, config);
-    }
-
-    array const& model::input(std::string const& name) const {
-        return find_value(input_table_, name);
-    }
-
-    array const& model::output(std::string const& name) const {
-        return find_value(output_table_, name);
-    }
-
-    void model::run() { model_->run(); }
-
-} // namespace menoh_impl
diff --git a/menoh/model.hpp b/menoh/model.hpp
deleted file mode 100644
index 0664ef9..0000000
--- a/menoh/model.hpp
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef MENOH_MODEL_HPP
-#define MENOH_MODEL_HPP
-
-#include <memory>
-#include <string>
-#include <tuple>
-#include <unordered_map>
-#include <unordered_set>
-#include <vector>
-
-#include <menoh/array.hpp>
-#include <menoh/backend_config.hpp>
-#include <menoh/model_core.hpp>
-#include <menoh/model_core_factory.hpp>
-#include <menoh/model_data.hpp>
-
-namespace menoh_impl {
-
-    class model {
-    public:
-        model(
-          std::vector<
-            std::tuple<std::string, dtype_t, std::vector<int>, void*>> const&
-            input_name_and_dtype_and_dims_and_data_handle_list,
-          std::vector<std::tuple<std::string, dtype_t, void*>> const&
-            required_output_name_and_dtype_and_data_handle_list,
-          menoh_impl::model_data const& model_data, std::string const& backend_name,
-          backend_config const& config = backend_config());
-
-        array const& input(std::string const& name) const;
-
-        array const& output(std::string const& name) const;
-
-        void run();
-
-    private:
-        std::unordered_map<std::string, array> input_table_;
-        std::unordered_map<std::string, array> output_table_;
-        std::unique_ptr<model_core> model_;
-    };
-
-} // namespace menoh_impl
-#endif // MENOH_MODEL_HPP
diff --git a/menoh/model_core_factory.cpp b/menoh/model_core_factory.cpp
index 9d1a582..aa47dd4 100644
--- a/menoh/model_core_factory.cpp
+++ b/menoh/model_core_factory.cpp
@@ -11,13 +11,15 @@ namespace menoh_impl {
 
     std::unique_ptr<menoh_impl::model_core>
     make_model_core(std::unordered_map<std::string, array> const& input_table,
-                    std::unordered_map<std::string, array> const& output_table,
+                    std::unordered_map<std::string, array> const& required_output_table,
+                    std::unordered_map<std::string, array_profile> const&
+                      output_profile_table,
                     menoh_impl::model_data const& model_data,
                     std::string const& backend_name,
                     backend_config const& config) {
         if(backend_name == "mkldnn") {
             return std::make_unique<mkldnn_backend::model_core>(
-              mkldnn_backend::make_model_core(input_table, output_table,
+              mkldnn_backend::make_model_core(input_table, required_output_table,
                                               model_data, config));
         } else if(backend_name == "mkldnn_with_generic_fallback") {
             using namespace mkldnn_with_generic_fallback_backend;
@@ -32,8 +34,8 @@ namespace menoh_impl {
                                  generic_backend::generic_context>());
             return std::make_unique<
               mkldnn_with_generic_fallback_backend::model_core>(
-              std::move(context_list), input_table, output_table, model_data,
-              config);
+              std::move(context_list), input_table, required_output_table,
+              output_profile_table, model_data, config);
         }
 
         throw invalid_backend_name(backend_name);
diff --git a/menoh/model_core_factory.hpp b/menoh/model_core_factory.hpp
index d249057..52037e7 100644
--- a/menoh/model_core_factory.hpp
+++ b/menoh/model_core_factory.hpp
@@ -22,12 +22,13 @@ namespace menoh_impl {
 
     struct model_data;
 
-    std::unique_ptr<menoh_impl::model_core>
-    make_model_core(std::unordered_map<std::string, array> const& input_table,
-                    std::unordered_map<std::string, array> const& output_table,
-                    menoh_impl::model_data const& model_data,
-                    std::string const& backend_name,
-                    backend_config const& config = backend_config());
+    std::unique_ptr<menoh_impl::model_core> make_model_core(
+      std::unordered_map<std::string, array> const& input_table,
+      std::unordered_map<std::string, array> const& required_output_table,
+      std::unordered_map<std::string, array_profile> const&
+        output_profile_table,
+      menoh_impl::model_data const& model_data, std::string const& backend_name,
+      backend_config const& config = backend_config());
 
 } // namespace menoh_impl
 
diff --git a/menoh/onnx.cpp b/menoh/onnx.cpp
index e1813e6..dce8553 100644
--- a/menoh/onnx.cpp
+++ b/menoh/onnx.cpp
@@ -5,7 +5,6 @@
 #include <fstream>
 #include <functional>
 #include <numeric>
-#include <random>
 #include <unordered_map>
 #include <utility>
 
@@ -23,7 +22,7 @@
 
 namespace menoh_impl {
 
-    auto tensor_proto_data_type_to_dtype(onnx::TensorProto_DataType tpdt) {
+    dtype_t tensor_proto_data_type_to_dtype(onnx::TensorProto_DataType tpdt) {
         if(tpdt == onnx::TensorProto_DataType_FLOAT) {
             return dtype_t::float_;
         }
@@ -167,7 +166,6 @@ namespace menoh_impl {
         return node_list;
     }
 
-
     model_data make_model_from_onnx(onnx::ModelProto& onnx_model) {
         // onnx opset version check
         if(onnx_model.opset_import_size() != 0) {
@@ -183,10 +181,6 @@ namespace menoh_impl {
         trim_dropout(node_list);
         trim_reshape(node_list);
 
-        std::random_device rd;
-        std::mt19937 g(rd());
-        std::shuffle(node_list.begin(), node_list.end(), g);
-
         std::vector<std::string> all_parameter_name_list;
         all_parameter_name_list.reserve(
           onnx_model.graph().initializer().size());
@@ -229,14 +223,17 @@ namespace menoh_impl {
         return make_model_from_onnx(onnx_model);
     }
 
-    model_data make_model_data_from_onnx_data_on_memory(const uint8_t* onnx_data, int32_t size) {
+    model_data
+    make_model_data_from_onnx_data_on_memory(const uint8_t* onnx_data,
+                                             int32_t size) {
         namespace gpio = ::google::protobuf::io;
         gpio::ArrayInputStream ais(onnx_data, size);
         gpio::CodedInputStream cis(&ais);
         cis.SetTotalBytesLimit(std::numeric_limits<int>::max(),
                                std::numeric_limits<int>::max());
         onnx::ModelProto onnx_model;
-        if(!onnx_model.ParseFromCodedStream(&cis) || !cis.ConsumedEntireMessage()) {
+        if(!onnx_model.ParseFromCodedStream(&cis) ||
+           !cis.ConsumedEntireMessage()) {
             throw onnx_parse_error("parse binary onnx data on memory");
         }
         return make_model_from_onnx(onnx_model);
diff --git a/scripts/build-menoh.sh b/scripts/build-menoh.sh
new file mode 100755
index 0000000..5740377
--- /dev/null
+++ b/scripts/build-menoh.sh
@@ -0,0 +1,92 @@
+#!/bin/bash -e
+
+BASE_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
+
+# retrieve arguments
+while [[ $# != 0 ]]; do
+    case $1 in
+        --)
+            shift
+            break
+            ;;
+        --build-type)
+            readonly ARG_BUILD_TYPE="$2"
+            shift 2
+            ;;
+        --source-dir)
+            readonly ARG_SOURCE_DIR="$2"
+            shift 2
+            ;;
+        --build-dir)
+            readonly ARG_BUILD_DIR="$2"
+            shift 2
+            ;;
+        --install-dir)
+            readonly ARG_INSTALL_DIR="$2"
+            shift 2
+            ;;
+        --mkldnn-dir)
+            readonly ARG_MKLDNN_DIR="$2"
+            shift 2
+            ;;
+        --python-executable)
+            ARG_PYTHON_EXECUTABLE="$2"
+            shift 2
+            ;;
+        --link-static-libgcc)
+            readonly ARG_LINK_STATIC_LIBGCC="$2"
+            shift 2
+            ;;
+        --link-static-libstdcxx)
+            readonly ARG_LINK_STATIC_LIBSTDCXX="$2"
+            shift 2
+            ;;
+        --link-static-libprotobuf)
+            readonly ARG_LINK_STATIC_LIBPROTOBUF="$2"
+            shift 2
+            ;;
+        -*)
+            echo Unknown option \"$1\" 1>&2
+            exit
+            ;;
+        *)
+            break
+            ;;
+
+    esac
+done
+
+# options that have default value
+test -n "${ARG_BUILD_TYPE}" || readonly ARG_BUILD_TYPE=Debug
+test -n "${ARG_SOURCE_DIR}" || readonly ARG_SOURCE_DIR="${BASE_DIR}/.."
+test -n "${ARG_BUILD_DIR}" || readonly ARG_BUILD_DIR="${ARG_SOURCE_DIR}/build"
+test -n "${ARG_INSTALL_DIR}" || readonly ARG_INSTALL_DIR=/usr/local
+
+if [ -n "${ARG_MKLDNN_DIR}" ]; then
+    OPT_MKLDNN_INCLUDE_DIR=-DMKLDNN_INCLUDE_DIR=${ARG_MKLDNN_DIR}/include
+    OPT_MKLDNN_LIBRARY=-DMKLDNN_LIBRARY=${ARG_MKLDNN_DIR}/lib/libmkldnn.so
+fi
+
+test -n "${ARG_PYTHON_EXECUTABLE}" || readonly ARG_PYTHON_EXECUTABLE=python
+test -n "${ARG_LINK_STATIC_LIBGCC}" || readonly ARG_LINK_STATIC_LIBGCC='OFF'
+test -n "${ARG_LINK_STATIC_LIBSTDCXX}" || readonly ARG_LINK_STATIC_LIBSTDCXX='OFF'
+test -n "${ARG_LINK_STATIC_LIBPROTOBUF}" || readonly ARG_LINK_STATIC_LIBPROTOBUF='OFF'
+
+echo -e "\e[33;1mBuilding Menoh\e[0m"
+
+[ -d "${ARG_BUILD_DIR}" ] || mkdir -p "${ARG_BUILD_DIR}"
+
+cd "${ARG_BUILD_DIR}"
+cmake \
+    -DCMAKE_BUILD_TYPE=${ARG_BUILD_TYPE} \
+    "-DCMAKE_INSTALL_PREFIX=${ARG_INSTALL_DIR}" \
+    "${OPT_MKLDNN_INCLUDE_DIR}" \
+    "${OPT_MKLDNN_LIBRARY}" \
+    -DPYTHON_EXECUTABLE=${ARG_PYTHON_EXECUTABLE} \
+    -DLINK_STATIC_LIBGCC=${ARG_LINK_STATIC_LIBGCC} \
+    -DLINK_STATIC_LIBSTDCXX=${ARG_LINK_STATIC_LIBSTDCXX} \
+    -DLINK_STATIC_LIBPROTOBUF=${ARG_LINK_STATIC_LIBPROTOBUF} \
+    -DENABLE_TEST=ON \
+    "${ARG_SOURCE_DIR}"
+
+make
diff --git a/.travis/install-mkldnn.sh b/scripts/build-mkldnn.sh
old mode 100644
new mode 100755
similarity index 50%
rename from .travis/install-mkldnn.sh
rename to scripts/build-mkldnn.sh
index cb797e8..52ed504
--- a/.travis/install-mkldnn.sh
+++ b/scripts/build-mkldnn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/bash -e
 
 # retrieve arguments
 while [[ $# != 0 ]]; do
@@ -8,55 +8,65 @@ while [[ $# != 0 ]]; do
             break
             ;;
         --version)
-            ARG_VERSION="$2"
+            readonly ARG_VERSION="$2"
             shift 2
             ;;
         --download-dir)
-            ARG_DOWNLOAD_DIR="$2"
+            readonly ARG_DOWNLOAD_DIR="$2"
+            shift 2
+            ;;
+        --extract-dir)
+            readonly ARG_EXTRACT_DIR="$2"
             shift 2
             ;;
         --build-dir)
-            ARG_BUILD_DIR="$2"
+            readonly ARG_BUILD_DIR="$2"
             shift 2
             ;;
         --install-dir)
-            ARG_INSTALL_DIR="$2"
+            readonly ARG_INSTALL_DIR="$2"
             shift 2
             ;;
         --parallel)
-            ARG_PARALLEL="$2"
+            readonly ARG_PARALLEL="$2"
             shift 2
             ;;
         -*)
-            err Unknown option \"$1\"
+            echo Unknown option \"$1\" 1>&2
             exit
             ;;
         *)
             break
             ;;
-
     esac
 done
 
 # validate the arguments
-test -n "${ARG_VERSION}" || { echo "--version is not specified" 1>&2; exit 1; }
 test -n "${ARG_DOWNLOAD_DIR}" || { echo "--download-dir is not specified" 1>&2; exit 1; }
-test -n "${ARG_BUILD_DIR}" || { echo "--build-dir is not specified" 1>&2; exit 1; }
-test -n "${ARG_INSTALL_DIR}" || { echo "--install-dir is not specified" 1>&2; exit 1; }
-test -n "${ARG_PARALLEL}" || ARG_PARALLEL=1
+test -n "${ARG_EXTRACT_DIR}" || { echo "--extract-dir is not specified" 1>&2; exit 1; }
+
+# options that have default value
+test -n "${ARG_VERSION}" || readonly ARG_VERSION=0.16
+
+readonly LIBRARY_NAME=mkl-dnn-${ARG_VERSION}
+readonly SOURCE_DIR="${ARG_EXTRACT_DIR}/${LIBRARY_NAME}"
+
+test -n "${ARG_BUILD_DIR}" || readonly ARG_BUILD_DIR="${SOURCE_DIR}/build"
+test -n "${ARG_INSTALL_DIR}" || readonly ARG_INSTALL_DIR=/usr/local
+test -n "${ARG_PARALLEL}" || readonly ARG_PARALLEL=1
 
 # download (if it isn't cached)
-if [ ! -e "${ARG_BUILD_DIR}/mkl-dnn-${ARG_VERSION}/LICENSE" ]; then
+if [ ! -e "${SOURCE_DIR}/LICENSE" ]; then
     echo -e "\e[33;1mDownloading libmkldnn\e[0m"
 
-    [ -d "${ARG_DOWNLOAD_DIR}" ] || mkdir -p ${ARG_DOWNLOAD_DIR}
+    [ -d "${ARG_DOWNLOAD_DIR}" ] || mkdir -p "${ARG_DOWNLOAD_DIR}"
 
-    cd ${ARG_DOWNLOAD_DIR}
-    if [ ! -e "mkl-dnn-${ARG_VERSION}.tar.gz" ]; then
+    cd "${ARG_DOWNLOAD_DIR}"
+    if [ ! -e "${LIBRARY_NAME}.tar.gz" ]; then
         download_url="https://github.com/intel/mkl-dnn/archive/v${ARG_VERSION}.tar.gz"
-        wget -O mkl-dnn-${ARG_VERSION}.tar.gz ${download_url}
+        wget -O ${LIBRARY_NAME}.tar.gz ${download_url}
     fi
-    tar -zxf mkl-dnn-${ARG_VERSION}.tar.gz -C ${ARG_BUILD_DIR}
+    tar -zxf ${LIBRARY_NAME}.tar.gz -C "${ARG_EXTRACT_DIR}"
 
     echo -e "\e[32;1mlibmkldnn was successfully downloaded.\e[0m"
 else
@@ -64,33 +74,26 @@ else
 fi
 
 # build (if it isn't cached)
-if [ ! -e "${ARG_BUILD_DIR}/mkl-dnn-${ARG_VERSION}/build/src/libmkldnn.so" ]; then
+if [ ! -e "${ARG_BUILD_DIR}/src/libmkldnn.so" ]; then
     echo -e "\e[33;1mBuilding libmkldnn\e[0m"
 
-    cd ${ARG_BUILD_DIR}/mkl-dnn-${ARG_VERSION}/scripts
+    cd "${SOURCE_DIR}/scripts"
     ./prepare_mkl.sh
 
-    cd ${ARG_BUILD_DIR}/mkl-dnn-${ARG_VERSION}
-    [ -d "build" ] || mkdir -p build
+    [ -d "${ARG_BUILD_DIR}" ] || mkdir -p "${ARG_BUILD_DIR}"
 
-    cd build
+    cd "${ARG_BUILD_DIR}"
     cmake \
         -DCMAKE_BUILD_TYPE=Release \
-        -DCMAKE_INSTALL_PREFIX=${ARG_INSTALL_DIR} \
+        "-DCMAKE_INSTALL_PREFIX=${ARG_INSTALL_DIR}" \
         -DWITH_TEST=OFF \
         -DWITH_EXAMPLE=OFF \
         -DARCH_OPT_FLAGS='' \
         -Wno-error=unused-result \
-        ..
+        "${SOURCE_DIR}"
     make -j${ARG_PARALLEL}
 
     echo -e "\e[32;1mlibmkldnn was successfully built.\e[0m"
 else
     echo -e "\e[32;1mlibmkldnn has been built.\e[0m"
 fi
-
-# install (always)
-echo -e "\e[33;1mInstalling libmkldnn\e[0m"
-
-cd ${ARG_BUILD_DIR}/mkl-dnn-${ARG_VERSION}/build
-make install/strip
diff --git a/.travis/install-protobuf.sh b/scripts/build-protobuf.sh
old mode 100644
new mode 100755
similarity index 51%
rename from .travis/install-protobuf.sh
rename to scripts/build-protobuf.sh
index f3b4fd5..f694b8b
--- a/.travis/install-protobuf.sh
+++ b/scripts/build-protobuf.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/bash -e
 
 # retrieve arguments
 while [[ $# != 0 ]]; do
@@ -8,55 +8,65 @@ while [[ $# != 0 ]]; do
             break
             ;;
         --version)
-            ARG_VERSION="$2"
+            readonly ARG_VERSION="$2"
             shift 2
             ;;
         --download-dir)
-            ARG_DOWNLOAD_DIR="$2"
+            readonly ARG_DOWNLOAD_DIR="$2"
+            shift 2
+            ;;
+        --extract-dir)
+            readonly ARG_EXTRACT_DIR="$2"
             shift 2
             ;;
         --build-dir)
-            ARG_BUILD_DIR="$2"
+            readonly ARG_BUILD_DIR="$2"
             shift 2
             ;;
         --install-dir)
-            ARG_INSTALL_DIR="$2"
+            readonly ARG_INSTALL_DIR="$2"
             shift 2
             ;;
         --parallel)
-            ARG_PARALLEL="$2"
+            readonly ARG_PARALLEL="$2"
             shift 2
             ;;
         -*)
-            err Unknown option \"$1\"
+            echo Unknown option \"$1\" 1>&2
             exit
             ;;
         *)
             break
             ;;
-
     esac
 done
 
 # validate the arguments
-test -n "${ARG_VERSION}" || { echo "--version is not specified" 1>&2; exit 1; }
 test -n "${ARG_DOWNLOAD_DIR}" || { echo "--download-dir is not specified" 1>&2; exit 1; }
-test -n "${ARG_BUILD_DIR}" || { echo "--build-dir is not specified" 1>&2; exit 1; }
-test -n "${ARG_INSTALL_DIR}" || { echo "--install-dir is not specified" 1>&2; exit 1; }
-test -n "${ARG_PARALLEL}" || ARG_PARALLEL=1
+test -n "${ARG_EXTRACT_DIR}" || { echo "--extract-dir is not specified" 1>&2; exit 1; }
+
+# options that have default value
+test -n "${ARG_VERSION}" || readonly ARG_VERSION=3.6.1
+
+readonly LIBRARY_NAME=protobuf-${ARG_VERSION}
+readonly SOURCE_DIR="${ARG_EXTRACT_DIR}/${LIBRARY_NAME}"
+
+test -n "${ARG_BUILD_DIR}" || readonly ARG_BUILD_DIR="${SOURCE_DIR}"
+test -n "${ARG_INSTALL_DIR}" || readonly ARG_INSTALL_DIR=/usr/local
+test -n "${ARG_PARALLEL}" || readonly ARG_PARALLEL=1
 
 # download (if it isn't cached)
-if [ ! -e "${ARG_BUILD_DIR}/protobuf-${ARG_VERSION}/LICENSE" ]; then
+if [ ! -e "${SOURCE_DIR}/LICENSE" ]; then
     echo -e "\e[33;1mDownloading libprotobuf\e[0m"
 
-    [ -d "${ARG_DOWNLOAD_DIR}" ] || mkdir -p ${ARG_DOWNLOAD_DIR}
+    [ -d "${ARG_DOWNLOAD_DIR}" ] || mkdir -p "${ARG_DOWNLOAD_DIR}"
 
-    cd ${ARG_DOWNLOAD_DIR}
+    cd "${ARG_DOWNLOAD_DIR}"
     if [ ! -e "protobuf-cpp-${ARG_VERSION}.tar.gz" ]; then
         download_dir="https://github.com/protocolbuffers/protobuf/releases/download/v${ARG_VERSION}/protobuf-cpp-${ARG_VERSION}.tar.gz"
         wget ${download_dir}
     fi
-    tar -zxf protobuf-cpp-${ARG_VERSION}.tar.gz -C ${ARG_BUILD_DIR}
+    tar -zxf protobuf-cpp-${ARG_VERSION}.tar.gz -C "${ARG_EXTRACT_DIR}"
 
     echo -e "\e[32;1mlibprotobuf was successfully downloaded.\e[0m"
 else
@@ -64,20 +74,16 @@ else
 fi
 
 # build (if it isn't cached)
-if [ ! -e "${ARG_BUILD_DIR}/protobuf-${ARG_VERSION}/src/libprotobuf.la" ]; then
+if [ ! -e "${ARG_BUILD_DIR}/src/libprotobuf.la" ]; then
     echo -e "\e[33;1mBuilding libprotobuf\e[0m"
 
-    cd ${ARG_BUILD_DIR}/protobuf-${ARG_VERSION}
-    ./configure --prefix=${ARG_INSTALL_DIR} CFLAGS=-fPIC CXXFLAGS=-fPIC
+    [ -d "${ARG_BUILD_DIR}" ] || mkdir -p "${ARG_BUILD_DIR}"
+
+    cd "${ARG_BUILD_DIR}"
+    "${SOURCE_DIR}/configure" --prefix="${ARG_INSTALL_DIR}" CFLAGS="-g -O2 -fPIC" CXXFLAGS="-g -O2 -fPIC"
     make -j${ARG_PARALLEL}
 
     echo -e "\e[32;1mlibprotobuf was successfully built.\e[0m"
 else
     echo -e "\e[32;1mlibprotobuf has been built.\e[0m"
 fi
-
-# install (always)
-echo -e "\e[33;1mInstalling libprotobuf\e[0m"
-
-cd ${ARG_BUILD_DIR}/protobuf-${ARG_VERSION}
-make install
diff --git a/scripts/gen_attribute_completion_and_shape_inference_hpp.py b/scripts/gen_attribute_completion_and_shape_inference_hpp.py
new file mode 100644
index 0000000..5913d0a
--- /dev/null
+++ b/scripts/gen_attribute_completion_and_shape_inference_hpp.py
@@ -0,0 +1,322 @@
+import os
+
+
+def make_completion_code(op_name,
+                         attribute_list=[],
+                         shape_inference_code='''
+assert(node.input_name_list.size() > 0);
+assert(node.output_name_list.size() > 0);
+add_variable_to_table(output(0), dtype_of(input(0)), dims_of(input(0)));
+''',
+                         preprocess="",
+                         postprocess=""):
+    # attribute completion and definition
+    attribute_completion_code_list = []
+    attribute_definition_list = []
+    for attribute in attribute_list:
+        attr_name, attr_type, default_value = attribute
+        inner_code = ''
+        if default_value is None:
+            inner_code = '''
+assert(!"attribute not found: {attr_name}");
+'''.format(attr_name=attr_name)
+        else:
+            inner_code = '''
+node.attribute_table.emplace(
+    "{attr_name}", {default_value});
+'''.format(attr_name=attr_name, default_value=default_value)
+
+        attribute_completion_code = '''
+{{
+    auto found = node.attribute_table.find("{attr_name}");
+    if(found == node.attribute_table.end()) {{
+        {code}
+    }}
+}}
+'''.format(attr_name=attr_name, attr_type=attr_type, code=inner_code)
+        attribute_completion_code_list.append(attribute_completion_code)
+
+        attribute_definition = '''
+auto {attr_name} = get<{attr_type}>(node.attribute_table.at("{attr_name}"));
+'''.format(attr_name=attr_name, attr_type=attr_type)
+        attribute_definition_list.append(attribute_definition)
+    # end for
+
+    template = '''
+if(node.op_type == "{op_name}") {{
+    {preprocess}
+    {attribute_completion_code}
+    {postprocess}
+    {{
+        {attribute_definition}
+        {shape_inference_code}
+    }}
+}}
+else
+'''
+    return template.format(
+        op_name=op_name,
+        preprocess=preprocess,
+        attribute_definition="\n".join(attribute_definition_list),
+        shape_inference_code=shape_inference_code,
+        postprocess=postprocess,
+        attribute_completion_code="\n".join(
+            attribute_completion_code_list))
+
+
+def main():
+    template = """
+#ifndef MENOH_ATTRIBUTE_COMPLETION_AND_SHAPE_INFERENCE_HPP
+#define MENOH_ATTRIBUTE_COMPLETION_AND_SHAPE_INFERENCE_HPP
+/*
+ * This file is generated by {script_name}
+ * Do NOT modify this file directly
+ */
+#include <algorithm>
+#include <cassert>
+#include <string>
+#include <unordered_map>
+
+#include <menoh/array.hpp>
+#include <menoh/model_data.hpp>
+
+namespace menoh_impl {{
+    inline auto complete_attribute_and_infer_shape(
+            model_data& model_data,
+            std::unordered_map<std::string, array_profile> const&
+                input_profile_table) {{
+        using ints = std::vector<int>;
+        std::unordered_map<std::string, array_profile> variable_profile_table(
+            input_profile_table.begin(), input_profile_table.end());
+        std::transform(
+            model_data.parameter_name_and_array_list.begin(),
+            model_data.parameter_name_and_array_list.end(),
+            std::inserter(variable_profile_table,
+                          variable_profile_table.end()),
+            [](auto const& p){{
+                return std::make_pair(
+                    p.first,
+                    array_profile(p.second.dtype(), p.second.dims())); }});
+        auto profile_of = [&variable_profile_table](std::string const& name){{
+            assert(variable_profile_table.find(name) !=
+                variable_profile_table.end());
+            return variable_profile_table.at(name);
+        }};
+        auto dims_of = [&variable_profile_table, profile_of](
+            std::string const& name){{
+                return profile_of(name).dims();
+        }};
+        auto dtype_of = [&variable_profile_table, profile_of](
+            std::string const& name){{
+                return profile_of(name).dtype();
+        }};
+        auto ndims_of = [&dims_of](std::string const& parameter_name) {{
+            return dims_of(parameter_name).size();
+        }};
+        auto add_variable_to_table = [&variable_profile_table](
+            std::string const& name,
+            dtype_t dtype, ints const& dims){{
+                variable_profile_table.emplace(
+                    name, array_profile(dtype, dims));
+            }};
+
+        auto graph = make_graph(model_data.node_list); // FIXME reorder nodes
+        model_data.node_list = graph.node_list();
+        for(auto& node : model_data.node_list) {{
+            auto input = [&node](int i){{
+                return node.input_name_list.at(i);
+            }};
+            auto output = [&node](int i){{
+                return node.output_name_list.at(i);
+            }};
+            {code}
+            {unsupported_operator}
+        }}
+        return variable_profile_table;
+    }}
+}} // namespace menoh_impl
+
+#endif // MENOH_ATTRIBUTE_COMPLETION_AND_SHAPE_INFERENCE_HPP
+"""
+    code_list = []
+    code_list.append(make_completion_code("Abs"))
+    code_list.append(make_completion_code("Add"))
+    code_list.append(
+        make_completion_code("AveragePool", [
+            ("count_include_pad", "int", "0"),
+            ("kernel_shape", "ints", None),
+            ("pads", "ints", "ints(2*(ndims_of(input(0))-2), 0)"),
+            ("strides", "ints", "ints(ndims_of(input(0))-2, 1)"),  # WORKAROUND: None is correct # NOQA
+        ], '''
+add_variable_to_table(output(0), dtype_of(input(0)),
+    calc_2d_output_dims(
+        dims_of(input(0)), dims_of(input(0)).at(1),
+        kernel_shape, strides, pads));
+''', preprocess='''
+assert(2 <= ndims_of(input(0)));
+'''))
+    code_list.append(
+        make_completion_code("BatchNorm", [
+            ("epsilon", "float", "1.e-05f"),
+            ("momentum", "float", "0.9f"),
+            ("spatial", "int", "1"),
+        ]))
+    code_list.append(
+        make_completion_code("Concat", [
+            ("axis", "int", None),
+        ], '''
+auto output_dims = dims_of(input(0));
+for(int i = 1; i < node.input_name_list.size(); ++i) {
+    // TODO dim check
+    output_dims.at(axis) += dims_of(input(i)).at(axis);
+}
+add_variable_to_table(output(0), dtype_of(input(0)), output_dims);
+'''))
+    code_list.append(
+        make_completion_code(
+            "Conv", [
+                ("dilations", "ints", "ints(kernel_ndims, 1)"),
+                ("group", "int", "1"),
+                ("kernel_shape", "ints", "kernel_shape"),
+                ("pads", "ints", "ints(kernel_ndims*2, 0)"),
+                ("strides", "ints", "ints(kernel_ndims, 1)"),
+            ], '''
+add_variable_to_table(output(0), dtype_of(input(0)),
+    calc_2d_output_dims(
+        dims_of(input(0)), dims_of(input(1)).at(0),
+        kernel_shape, strides, pads));
+''',
+            preprocess='''
+auto kernel_ndims = ndims_of(input(1))-2;
+auto weights_shape = dims_of(input(1));
+auto kernel_shape = ints(weights_shape.begin()+2, weights_shape.end());
+'''))
+    code_list.append(
+        make_completion_code(
+            "ConvTranspose",
+            [
+                ("dilations", "ints", None),
+                ("group", "int", "1"),
+                ("kernel_shape", "ints", "kernel_shape"),
+                ("output_padding", "ints", None),
+                # ("output_shape", "ints", None),
+                # ("pads", "ints", None),
+                ("strides", "ints", "ints(kernel_ndims, 1)"),
+            ], '''
+add_variable_to_table(output(0), dtype_of(input(0)),
+    calc_2d_output_dims_for_conv_transpose(
+        dims_of(input(0)), dims_of(input(1)).at(0),
+        kernel_shape, strides, get<ints>(node.attribute_table.at("pads"))));
+''',
+            preprocess='''
+auto kernel_ndims = ndims_of(input(1))-2;
+auto weights_shape = dims_of(input(1));
+auto kernel_shape = ints(weights_shape.begin()+2, weights_shape.end());
+''',
+            postprocess='''
+{
+    auto found = node.attribute_table.find("output_shape");
+    assert(!(found == node.attribute_table.end() &&
+       node.attribute_table.find("pads") == node.attribute_table.end()));
+    if(found != node.attribute_table.end()) {
+        auto output_shape = get<ints>(found->second);
+        /* [dim0_begin, dim1_begin, ... , dim0_end, dim1_end, ..., ...] */
+        ints pads(kernel_ndims*2, 0);
+        auto output_padding =
+            get<ints>(node.attribute_table.at("output_padding"));
+        auto strides = get<ints>(node.attribute_table.at("strides"));
+        auto input_profile = input_profile_table.at(input(0));
+        ints input_size(input_profile.dims().begin()+2,
+                        input_profile.dims().end());
+
+        for(int i = 0; i < kernel_ndims; ++i) {
+            auto total_padding = strides[i] * (input_size[i] - 1)
+                + output_padding[i] + kernel_shape[i] - output_shape[i];
+            pads[i] = total_padding - (total_padding/2);
+            pads[i+kernel_ndims] = (total_padding/2);
+        }
+
+        node.attribute_table["pads"] = pads;
+    }
+}
+'''))
+    code_list.append(make_completion_code("Elu", [("alpha", "float", "1.f")]))
+    code_list.append(
+        make_completion_code("FC", [], '''
+auto output_dims = ints({dims_of(input(0)).at(0), dims_of(input(1)).at(0)});
+add_variable_to_table(output(0), dtype_of(input(0)),
+    output_dims);
+'''))
+    code_list.append(
+        make_completion_code("Gemm", [
+            ("alpha", "float", "1.f"),
+            ("beta", "float", "1.f"),
+            ("transA", "int", "0"),
+            ("transB", "int", "0"),
+        ], '''
+auto a_dims = dims_of(input(0));
+assert(a_dims.size() == 2);
+if(transA) {
+    std::swap(a_dims.at(0), a_dims.at(1));
+}
+
+auto b_dims = dims_of(input(1));
+assert(b_dims.size() == 2);
+if(transB) {
+    std::swap(b_dims.at(0), b_dims.at(1));
+}
+
+auto output_dims = ints({a_dims.at(0), b_dims.at(1)});
+add_variable_to_table(output(0), dtype_of(input(0)), output_dims);
+'''))
+    code_list.append(
+        make_completion_code("LeakyRelu", [("alpha", "float", "0.01f")]))
+    code_list.append(
+        make_completion_code("LRN", [
+            ("alpha", "float", "0.0001f"),
+            ("beta", "float", "0.75f"),
+            ("bias", "float", "1.0f"),
+            ("size", "float", None),
+        ]))
+    code_list.append(
+        make_completion_code("MaxPool", [
+            ("kernel_shape", "ints", None),
+            ("pads", "ints", "ints(2*(ndims_of(input(0))-2), 0)"),
+            ("storage_order", "int", "0"),
+            ("strides", "ints", "ints(ndims_of(input(0))-2, 1)"), # WORKAROUND: None is correct # NOQA
+        ], '''
+add_variable_to_table(output(0), dtype_of(input(0)),
+    calc_2d_output_dims(
+        dims_of(input(0)), dims_of(input(0)).at(1),
+        kernel_shape, strides, pads));
+'''))
+    code_list.append(make_completion_code("Relu"))
+    code_list.append(make_completion_code("Softmax", [("axis", "int", "1")]))
+    code_list.append(make_completion_code("Sum"))
+    code_list.append(make_completion_code("Sqrt"))
+    code_list.append(make_completion_code("Tanh"))
+    code_list.append(
+        make_completion_code("Transpose", [
+            ("perm", "ints", "perm"),
+        ], '''
+auto input_dims = dims_of(input(0));
+ints output_dims(input_dims.size());
+for(int i = 0; i < input_dims.size(); ++i) {
+    output_dims.at(i) = input_dims.at(perm.at(i));
+}
+add_variable_to_table(output(0), dtype_of(input(0)), output_dims);
+''', preprocess="""
+ints perm(ndims_of(input(0)));
+for(int i = 0; i < perm.size(); ++i) {{
+    perm.at(i) = perm.size()-i-1;
+}}
+"""))
+    print(template.format(script_name=os.path.basename(__file__), code="\n".join(code_list), unsupported_operator='''
+{
+    throw unsupported_operator(node.op_type);
+}
+'''))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/gen_test_data.py b/scripts/gen_test_data.py
similarity index 100%
rename from gen_test_data.py
rename to scripts/gen_test_data.py
diff --git a/scripts/install-mkldnn.sh b/scripts/install-mkldnn.sh
new file mode 100755
index 0000000..763cdec
--- /dev/null
+++ b/scripts/install-mkldnn.sh
@@ -0,0 +1,38 @@
+#!/bin/bash -e
+
+# retrieve arguments
+while [[ $# != 0 ]]; do
+    case $1 in
+        --)
+            shift
+            break
+            ;;
+        --build-dir)
+            readonly ARG_BUILD_DIR="$2"
+            shift 2
+            ;;
+        --dest-dir)
+            readonly ARG_DESTDIR="$2"
+            shift 2
+            ;;
+        -*)
+            echo Unknown option \"$1\" 1>&2
+            exit
+            ;;
+        *)
+            break
+            ;;
+    esac
+done
+
+# validate the arguments
+test -n "${ARG_BUILD_DIR}" || { echo "--build-dir is not specified" 1>&2; exit 1; }
+
+# install (always)
+echo -e "\e[33;1mInstalling libmkldnn\e[0m"
+
+# install to ${DESTDIR}/${CMAKE_INSTALL_PREFIX} if it is specified
+[ -n "${ARG_DESTDIR}" ] && export DESTDIR="${ARG_DESTDIR}"
+
+cd "${ARG_BUILD_DIR}"
+make install/strip
diff --git a/scripts/install-protobuf.sh b/scripts/install-protobuf.sh
new file mode 100755
index 0000000..c5f5fd0
--- /dev/null
+++ b/scripts/install-protobuf.sh
@@ -0,0 +1,38 @@
+#!/bin/bash -e
+
+# retrieve arguments
+while [[ $# != 0 ]]; do
+    case $1 in
+        --)
+            shift
+            break
+            ;;
+        --build-dir)
+            readonly ARG_BUILD_DIR="$2"
+            shift 2
+            ;;
+        --dest-dir)
+            readonly ARG_DESTDIR="$2"
+            shift 2
+            ;;
+        -*)
+            echo Unknown option \"$1\" 1>&2
+            exit
+            ;;
+        *)
+            break
+            ;;
+    esac
+done
+
+# validate the arguments
+test -n "${ARG_BUILD_DIR}" || { echo "--build-dir is not specified" 1>&2; exit 1; }
+
+# install (always)
+echo -e "\e[33;1mInstalling libprotobuf\e[0m"
+
+# install to ${DESTDIR}/`--prefix` if it is specified
+[ -n "${ARG_DESTDIR}" ] && export DESTDIR="${ARG_DESTDIR}"
+
+cd "${ARG_BUILD_DIR}"
+make install
diff --git a/.travis/prepare-menoh-data.sh b/scripts/prepare-menoh-data.sh
old mode 100644
new mode 100755
similarity index 52%
rename from .travis/prepare-menoh-data.sh
rename to scripts/prepare-menoh-data.sh
index e82bf44..9f99002
--- a/.travis/prepare-menoh-data.sh
+++ b/scripts/prepare-menoh-data.sh
@@ -1,4 +1,6 @@
-#!/bin/bash
+#!/bin/bash -e
+
+BASE_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
 
 # retrieve arguments
 while [[ $# != 0 ]]; do
@@ -26,14 +28,14 @@ while [[ $# != 0 ]]; do
     esac
 done
 
-# validate the arguments
-test -n "${ARG_SOURCE_DIR}" || { echo "ARG_SOURCE_DIR can't be empty" 1>&2; exit 1; }
-test -n "${ARG_PYTHON_EXECUTABLE}" || ARG_PYTHON_EXECUTABLE=python
+# options that have default value
+test -n "${ARG_SOURCE_DIR}" || readonly ARG_SOURCE_DIR="${BASE_DIR}/.."
+test -n "${ARG_PYTHON_EXECUTABLE}" || readonly ARG_PYTHON_EXECUTABLE=python
 
-echo -e "\e[33;1mPreparing data/ for Menoh\e[0m"
+echo -e "\e[33;1mPreparing data for Menoh tests and examples\e[0m"
 
-cd ${ARG_SOURCE_DIR}
+cd "${ARG_SOURCE_DIR}"
 [ -d "data" ] || mkdir -p data
 
-${ARG_PYTHON_EXECUTABLE} retrieve_data.py
-${ARG_PYTHON_EXECUTABLE} gen_test_data.py
+${ARG_PYTHON_EXECUTABLE} scripts/retrieve_data.py
+${ARG_PYTHON_EXECUTABLE} scripts/gen_test_data.py
diff --git a/retrieve_data.py b/scripts/retrieve_data.py
similarity index 100%
rename from retrieve_data.py
rename to scripts/retrieve_data.py
diff --git a/.travis/run-container.sh b/scripts/run-container.sh
similarity index 100%
rename from .travis/run-container.sh
rename to scripts/run-container.sh
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index d920bce..6224acb 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,18 +1,48 @@
+enable_testing()
+
+# GTest setup
+set(GTEST_DIR "lib/googletest")
+execute_process(COMMAND git submodule update --init -- test/${GTEST_DIR} WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
+
+message(STATUS "Adding ${GTEST_DIR}")
+
+# Prevent overriding the parent project's compiler/linker settings on Windows
+# See https://github.com/google/googletest/blob/master/googletest/README.md#visual-studio-dynamic-vs-static-runtimes
+set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+
+add_subdirectory(${GTEST_DIR} EXCLUDE_FROM_ALL)
+
+# filesystem setup
+set(FILESYSTEM_DIR "lib/filesystem")
+execute_process(COMMAND git submodule update --init -- test/${FILESYSTEM_DIR} WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
+include_directories("${FILESYSTEM_DIR}")
+
 add_executable(menoh_test
     np_io.cpp
     array.cpp
     node.cpp
     graph.cpp
     onnx.cpp
-    mkldnn.cpp
 
     operator.cpp
-    model.cpp
     mkldnn_with_generic_fallback_backend.cpp
     model_data.cpp
+    attribute_completion_and_shape_inference.cpp
     #vgg16.cpp
-
 )
-target_link_libraries(menoh_test gtest_main menoh_test_target)
+
+target_include_directories(menoh_test PUBLIC $<BUILD_INTERFACE:${PROTOBUF_INCLUDE_DIRS}>)
+
+if(LINK_STATIC_LIBPROTOBUF)
+    target_link_libraries(menoh_test gtest_main menoh_test_target)
+else()
+    target_link_libraries(menoh_test gtest_main menoh_test_target ${PROTOBUF_LIBRARIES})
+endif()
+
+if(UNIX AND NOT APPLE)
+    set_property(
+        TARGET menoh_test APPEND_STRING PROPERTY
+            LINK_FLAGS "-Wl,--disable-new-dtags")
+endif()
 
 add_test(NAME menoh_test COMMAND menoh_test)
diff --git a/test/attribute_completion_and_shape_inference.cpp b/test/attribute_completion_and_shape_inference.cpp
new file mode 100644
index 0000000..b998c75
--- /dev/null
+++ b/test/attribute_completion_and_shape_inference.cpp
@@ -0,0 +1,65 @@
+#include <gtest/gtest.h>
+
+#include <menoh/attribute_completion_and_shape_inference.hpp>
+#include <menoh/model_data.hpp>
+
+#include "common.hpp"
+namespace {
+
+    class AttributeCompletionAndShapeInferenceTest : public ::testing::Test {};
+
+    TEST_F(AttributeCompletionAndShapeInferenceTest, conv_completion) {
+        menoh_impl::model_data model_data;
+        model_data.node_list.push_back(
+          menoh_impl::node{"Conv", {"x", "w"}, {"y"}, {}});
+        std::unordered_map<std::string, menoh_impl::array_profile>
+          input_profile_table;
+        input_profile_table.emplace(
+          "x",
+          menoh_impl::array_profile(menoh_impl::dtype_t::float_, {1, 1, 3, 3}));
+        input_profile_table.emplace(
+          "w",
+          menoh_impl::array_profile(menoh_impl::dtype_t::float_, {1, 1, 3, 3}));
+        menoh_impl::complete_attribute_and_infer_shape(model_data,
+                                                       input_profile_table);
+        auto const& node = model_data.node_list.at(0);
+        auto found = node.attribute_table.find("kernel_shape");
+        ASSERT_NE(found, node.attribute_table.end());
+        menoh_impl::assert_eq_list(
+          menoh_impl::get<std::vector<int>>(found->second),
+          std::vector<int>({3, 3}));
+    }
+
+    TEST_F(AttributeCompletionAndShapeInferenceTest, sum_check) {
+        menoh_impl::model_data model_data;
+        model_data.node_list.push_back(
+          menoh_impl::node{"Sum", {"x0", "x1", "x2"}, {"y"}, {}});
+        std::unordered_map<std::string, menoh_impl::array_profile>
+          input_profile_table;
+        for(int i = 0; i < 3; ++i) {
+            input_profile_table.emplace(
+              "x" + std::to_string(i),
+              menoh_impl::array_profile(menoh_impl::dtype_t::float_,
+                                        {1, 1, 3, 3}));
+        }
+        ASSERT_NO_THROW(menoh_impl::complete_attribute_and_infer_shape(
+          model_data, input_profile_table));
+    }
+
+    TEST_F(AttributeCompletionAndShapeInferenceTest, add_check) {
+        menoh_impl::model_data model_data;
+        model_data.node_list.push_back(
+          menoh_impl::node{"Add", {"x0", "x1"}, {"y"}, {}});
+        std::unordered_map<std::string, menoh_impl::array_profile>
+          input_profile_table;
+        for(int i = 0; i < 2; ++i) {
+            input_profile_table.emplace(
+              "x" + std::to_string(i),
+              menoh_impl::array_profile(menoh_impl::dtype_t::float_,
+                                        {1, 1, 3, 3}));
+        }
+        ASSERT_NO_THROW(menoh_impl::complete_attribute_and_infer_shape(
+          model_data, input_profile_table));
+    }
+
+} // namespace
diff --git a/test/backend.hpp b/test/backend.hpp
index 0f42985..f16c150 100644
--- a/test/backend.hpp
+++ b/test/backend.hpp
@@ -61,9 +61,8 @@ namespace menoh {
             std::vector<float> data;
             std::tie(std::ignore, output_dims, data) =
               menoh_impl::load_np_array(output_filename);
-            dtype_t dtype = dtype_t::float_; // TODO other dtype
             true_output_table.insert({output_name, data});
-            vpt_builder.add_output_profile(output_name, dtype);
+            vpt_builder.add_output_name(output_name);
         }
 
         auto vpt = vpt_builder.build_variable_profile_table(model_data);
diff --git a/test/common.hpp b/test/common.hpp
index 31474a2..3fb29a8 100644
--- a/test/common.hpp
+++ b/test/common.hpp
@@ -6,11 +6,11 @@
 namespace menoh_impl {
 
     template <typename Iter1, typename Iter2>
-    auto assert_eq_list(Iter1 first1, Iter1 last1, Iter2 first2, Iter2 last2) {
+    auto assert_eq_list(Iter1 first1, Iter1 last1, Iter2 first2, Iter2 last2, float eps=1.e-4) {
         ASSERT_EQ(std::distance(first1, last1), std::distance(first2, last2))
           << "size is different";
         while(first1 != last1) {
-            ASSERT_EQ(*first1, *first2)
+            ASSERT_NEAR(*first1, *first2, eps)
               << *first1 << " and " << *first2 << " are different";
             ++first1;
             ++first2;
diff --git a/test/lib/filesystem b/test/lib/filesystem
new file mode 160000
index 0000000..0a539a6
--- /dev/null
+++ b/test/lib/filesystem
@@ -0,0 +1 @@
+Subproject commit 0a539a6c988dc8691af317e077893e831dee2908
diff --git a/test/lib/googletest b/test/lib/googletest
index ba96d0b..2fe3bd9 160000
--- a/test/lib/googletest
+++ b/test/lib/googletest
@@ -1 +1 @@
-Subproject commit ba96d0b1161f540656efdaed035b3c062b60e006
+Subproject commit 2fe3bd994b3189899d93f1d5a881e725e046fdc2
diff --git a/test/mkldnn.cpp b/test/mkldnn.cpp
deleted file mode 100644
index 8799591..0000000
--- a/test/mkldnn.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-#include <gtest/gtest.h>
-
-#include <fstream>
-#include <iostream>
-#include <numeric>
-#include <string>
-#include <tuple>
-
-#include <google/protobuf/io/coded_stream.h>
-#include <google/protobuf/io/zero_copy_stream_impl.h>
-
-#include <menoh/mkldnn/model_core.hpp>
-#include <menoh/model.hpp>
-#include <menoh/onnx.hpp>
-#include <menoh/utility.hpp>
-
-namespace menoh_impl {
-    namespace {
-
-        class MKLDNNTest : public ::testing::Test {};
-
-        TEST_F(MKLDNNTest, run_onnx_model) {
-            static mkldnn::engine engine(mkldnn::engine::cpu, 0);
-            auto model_data =
-              make_model_data_from_onnx_file("../data/VGG16.onnx");
-            // model_data = trim_redundant_nodes(model_data);
-
-            auto input_name_list = extract_model_input_name_list(model_data);
-            if(input_name_list.size() != 1) {
-                throw std::runtime_error(
-                  "VGG16 data is invalid: input name list size is " +
-                  std::to_string(input_name_list.size()));
-            }
-            auto const& input_name = input_name_list.front();
-            constexpr auto batch_size = 1;
-            constexpr auto channel_num = 3;
-            constexpr auto height = 224;
-            constexpr auto width = 224;
-            std::vector<int> input_dims{batch_size, channel_num, height, width};
-            array input_arr(dtype_t::float_, input_dims);
-
-            auto output_name_list = extract_model_output_name_list(model_data);
-            if(output_name_list.size() != 1) {
-                throw std::runtime_error(
-                  "VGG16 data is invalid: input name list size is " +
-                  std::to_string(output_name_list.size()));
-            }
-            auto const& output_name = output_name_list.front();
-            auto output_dims_table =
-              make_output_dims_table(model_data, {{input_name, input_dims}});
-            array output_arr(dtype_t::float_,
-                             find_value(output_dims_table, output_name));
-            auto model = menoh_impl::mkldnn_backend::model_core(
-              {
-                {input_name, input_arr},
-              },
-              {
-                {output_name, output_arr},
-              },
-              model_data, engine);
-            model.run();
-            auto max_i =
-              std::max_element(fbegin(output_arr), fend(output_arr)) -
-              fbegin(output_arr);
-            std::cout << "max_i " << max_i << std::endl;
-        }
-
-        TEST_F(MKLDNNTest, make_mkldnn_model_with_invalid_backend_config) {
-            // Aliases to onnx's node input and output tensor name
-            auto conv1_1_in_name = "140326425860192";
-            auto softmax_out_name = "140326200803680";
-
-            constexpr auto batch_size = 1;
-            constexpr auto channel_num = 3;
-            constexpr auto height = 224;
-            constexpr auto width = 224;
-            // category_num is 1000
-
-            std::vector<int> input_dims{batch_size, channel_num, height, width};
-
-            // Load ONNX model data
-            auto model_data =
-              menoh_impl::make_model_data_from_onnx_file("../data/VGG16.onnx");
-
-            auto cpu_count =
-              mkldnn::engine::get_count(mkldnn::engine::kind::cpu);
-
-            // Construct computation primitive list and memories
-            ASSERT_THROW(
-              menoh_impl::model(
-                {std::make_tuple(
-                  conv1_1_in_name, menoh_impl::dtype_t::float_, input_dims,
-                  nullptr)}, // table of input_name, dtype, input_dims and
-                             // data_handle
-                {std::make_tuple(
-                  softmax_out_name, menoh_impl::dtype_t::float_,
-                  nullptr)}, // list of output names, dtypes and data_handles
-                model_data,
-                "mkldnn", "{\"cpu_id\":" + std::to_string(cpu_count + 2) + "}"),
-              menoh_impl::backend_error);
-        }
-
-    } // namespace
-} // namespace menoh_impl
diff --git a/test/mkldnn_with_generic_fallback_backend.cpp b/test/mkldnn_with_generic_fallback_backend.cpp
index cfe8c45..58540ce 100644
--- a/test/mkldnn_with_generic_fallback_backend.cpp
+++ b/test/mkldnn_with_generic_fallback_backend.cpp
@@ -69,15 +69,14 @@ namespace menoh {
             std::string output_name;
             std::tie(output_name, std::ignore) = gemm_true_output_filename;
             model_data.add_output_name_to_current_node(output_name);
-            dtype_t dtype = dtype_t::float_; // TODO other dtype
-            vpt_builder.add_output_profile(output_name, dtype);
+            vpt_builder.add_output_name(output_name);
         }
 
         {
             model_data.add_new_node("Relu");
             model_data.add_input_name_to_current_node("gemm_out");
             model_data.add_output_name_to_current_node("relu_out");
-            vpt_builder.add_output_profile("relu_out", dtype_t::float_);
+            vpt_builder.add_output_name("relu_out");
         }
 
         auto vpt = vpt_builder.build_variable_profile_table(model_data);
diff --git a/test/model.cpp b/test/model.cpp
deleted file mode 100644
index 8509662..0000000
--- a/test/model.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-#include <gtest/gtest.h>
-
-#include <iostream>
-#include <string>
-#include <vector>
-
-#include <mkldnn.hpp>
-
-#include <menoh/model.hpp>
-#include <menoh/onnx.hpp>
-
-namespace menoh_impl {
-    namespace {
-        class ModelTest : public ::testing::Test {};
-
-        TEST_F(ModelTest, make_model) {
-            // Aliases to onnx's node input and output tensor name
-            auto conv1_1_in_name = "140326425860192";
-            auto softmax_out_name = "140326200803680";
-
-            constexpr auto batch_size = 1;
-            constexpr auto channel_num = 3;
-            constexpr auto height = 224;
-            constexpr auto width = 224;
-            // category_num is 1000
-
-            std::vector<int> input_dims{batch_size, channel_num, height, width};
-
-            // Load ONNX model data
-            auto model_data = std::make_unique<menoh_impl::model_data>(
-              menoh_impl::make_model_data_from_onnx_file("../data/VGG16.onnx"));
-
-            // Construct computation primitive list and memories
-            auto model = menoh_impl::model(
-              {std::make_tuple(conv1_1_in_name, dtype_t::float_, input_dims,
-                               nullptr)},
-              {std::make_tuple(softmax_out_name, dtype_t::float_, nullptr)},
-              *model_data, "mkldnn");
-            model_data.reset(); // delete model_data
-
-            model.run();
-        }
-
-    } // namespace
-} // namespace menoh_impl
diff --git a/test/operator.cpp b/test/operator.cpp
index 1f610ae..bef8ee3 100644
--- a/test/operator.cpp
+++ b/test/operator.cpp
@@ -1,1041 +1,223 @@
-#include <fstream>
 #include <gtest/gtest.h>
+
+#include <fstream>
 #include <iostream>
 #include <numeric>
-#include <sstream>
-#include <string>
 
+#include <filesystem/path.h>
 #include <google/protobuf/io/coded_stream.h>
 #include <google/protobuf/io/zero_copy_stream_impl.h>
-
-#include "./common.hpp"
-#include "np_io.hpp"
-
-#include <menoh/dims.hpp>
-#include <menoh/mkldnn/operator.hpp>
-#include <menoh/mkldnn/utility.hpp>
-#include <menoh/model_core.hpp>
-
-namespace menoh_impl {
-    namespace mkldnn_backend {
-
-        auto load_np_array_as_memory(std::string const& filename,
-                                     mkldnn::engine const& engine) {
-            auto data = menoh_impl::load_np_array_as_array(filename);
-            assert(data.dims().size() == 2 || data.dims().size() == 4);
-            mkldnn::memory::format format = data.dims().size() == 2
-                                              ? mkldnn::memory::format::nc
-                                              : mkldnn::memory::format::nchw;
-            mkldnn::memory memory(
-              {{{data.dims()},
-                dtype_to_mkldnn_memory_data_type(data.dtype()),
-                format},
-               engine});
-            std::copy(fbegin(data), fend(data),
-                      static_cast<float*>(memory.get_data_handle()));
-            return memory;
-        }
-
-        class OperatorTest : public ::testing::Test {
-        protected:
-            OperatorTest() = default;
-            virtual void SetUp() {}
-
-            auto relu_test(std::string const& input_filename,
-                           std::string const& true_output_filename) const {
-                auto input_memory =
-                  load_np_array_as_memory(input_filename, engine_);
-                auto output_dims = extract_dims(input_memory);
-                auto output = array(dtype_t::float_, output_dims);
-                auto node = menoh_impl::node{"", {"input"}, {"output"}, {}};
-                auto factory_return = make_relu_primitive(
-                  node, 0, {node}, {}, {{"input", input_memory}},
-                  {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-
-                auto true_output =
-                  menoh_impl::load_np_array_as_array(true_output_filename);
-
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
-            }
-
-            auto
-            leaky_relu_test(std::string const& input_filename,
-                            std::string const& true_output_filename) const {
-                auto input_memory =
-                  load_np_array_as_memory(input_filename, engine_);
-                auto output_dims = extract_dims(input_memory);
-                auto output = array(dtype_t::float_, output_dims);
-                auto node = menoh_impl::node{
-                  "", {"input"}, {"output"}, {{"alpha", 0.001f}}};
-                auto factory_return = make_leaky_relu_primitive(
-                  node, 0, {node}, {}, {{"input", input_memory}},
-                  {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-
-                auto true_output =
-                  menoh_impl::load_np_array_as_array(true_output_filename);
-
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
-            }
-
-            auto elu_test(std::string const& input_filename,
-                          std::string const& true_output_filename) const {
-                auto input_memory =
-                  load_np_array_as_memory(input_filename, engine_);
-                auto output_dims = extract_dims(input_memory);
-                auto output = array(dtype_t::float_, output_dims);
-                auto node = menoh_impl::node{
-                  "", {"input"}, {"output"}, {{"alpha", 1.1f}}};
-                auto factory_return = make_elu_primitive(
-                  node, 0, {node}, {}, {{"input", input_memory}},
-                  {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-
-                auto true_output =
-                  menoh_impl::load_np_array_as_array(true_output_filename);
-
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
-            }
-
-            auto abs_test(std::string const& input_filename,
-                          std::string const& true_output_filename) const {
-                auto input_memory =
-                  load_np_array_as_memory(input_filename, engine_);
-                auto output_dims = extract_dims(input_memory);
-                auto output = array(dtype_t::float_, output_dims);
-                auto node = menoh_impl::node{"", {"input"}, {"output"}, {}};
-                auto factory_return = make_abs_primitive(
-                  node, 0, {node}, {}, {{"input", input_memory}},
-                  {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-
-                auto true_output =
-                  menoh_impl::load_np_array_as_array(true_output_filename);
-
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
-            }
-
-            auto sqrt_test(std::string const& input_filename,
-                           std::string const& true_output_filename) const {
-                auto input_memory =
-                  load_np_array_as_memory(input_filename, engine_);
-                auto output_dims = extract_dims(input_memory);
-                auto output = array(dtype_t::float_, output_dims);
-                auto node = menoh_impl::node{"", {"input"}, {"output"}, {}};
-                auto factory_return = make_sqrt_primitive(
-                  node, 0, {node}, {}, {{"input", input_memory}},
-                  {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-
-                auto true_output =
-                  menoh_impl::load_np_array_as_array(true_output_filename);
-
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
-            }
-
-            auto tanh_test(std::string const& input_filename,
-                           std::string const& true_output_filename) const {
-                auto input_memory =
-                  load_np_array_as_memory(input_filename, engine_);
-                auto output_dims = extract_dims(input_memory);
-                auto output = array(dtype_t::float_, output_dims);
-                auto node = menoh_impl::node{"", {"input"}, {"output"}, {}};
-                auto factory_return = make_tanh_primitive(
-                  node, 0, {node}, {}, {{"input", input_memory}},
-                  {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-
-                auto true_output =
-                  menoh_impl::load_np_array_as_array(true_output_filename);
-
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
-            }
-
-            auto softmax_test(std::string const& input_filename,
-                              std::string const& true_output_filename) const {
-                auto input_memory =
-                  load_np_array_as_memory(input_filename, engine_);
-                auto output_dims = extract_dims(input_memory);
-                auto output = array(dtype_t::float_, output_dims);
-                auto factory_return = make_softmax_primitive(
-                  menoh_impl::node{"", {"input"}, {"output"}, {}}, {},
-                  {{"input", input_memory}}, {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-
-                auto true_output =
-                  menoh_impl::load_np_array_as_array(true_output_filename);
-
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
-            }
-
-            auto fc_test(std::string const& input_filename,
-                         std::string const& weight_filename,
-                         std::string const& bias_filename,
-                         std::string const& true_output_filename) const {
-                auto input_memory =
-                  load_np_array_as_memory(input_filename, engine_);
-                auto weight =
-                  menoh_impl::load_np_array_as_array(weight_filename);
-                auto bias = menoh_impl::load_np_array_as_array(bias_filename);
-                std::vector<int> output_dims{extract_dims(input_memory).at(0),
-                                             weight.dims().at(0)};
-                auto output = array(dtype_t::float_, output_dims);
-                auto factory_return = make_fc_primitive(
-                  menoh_impl::node{"",
-                                   {"input", "weight", "bias"},
-                                   {"output"},
-                                   {{"axis", 1}, {"axis_w", 1}}},
-                  {{"weight", weight}, {"bias", bias}},
-                  {{"input", input_memory}}, {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-
-                auto true_output =
-                  menoh_impl::load_np_array_as_array(true_output_filename);
-
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
-            }
-
-            auto max_pool_test(std::string const& input_filename, int k, int s,
-                               int p) const {
-                auto input_memory =
-                  load_np_array_as_memory(input_filename, engine_);
-                std::vector<int> strides{{s, s}};
-                std::vector<int> kernel_shape{{k, k}};
-                std::vector<int> pads{{p, p, p, p}};
-                auto input_dims = extract_dims(input_memory);
-                auto output_dims = calc_2d_output_dims(
-                  input_dims, input_dims.at(1), kernel_shape, strides, pads);
-                auto output = array(dtype_t::float_, output_dims);
-                auto factory_return = make_max_pool_primitive(
-                  menoh_impl::node{"",
-                                   {"input"},
-                                   {"output"},
-                                   {{"strides", strides},
-                                    {"kernel_shape", kernel_shape},
-                                    {"pads", pads}}},
-                  {}, {{"input", input_memory}}, {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-
-                auto true_output = menoh_impl::load_np_array_as_array(
-                  "../data/max_pooling_2d_k" + std::to_string(k) + "_s" +
-                  std::to_string(s) + "_p" + std::to_string(p) + ".txt");
-
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
-            }
-
-            auto global_max_pool_test(
-              std::string const& input_filename,
-              std::string const& true_output_filename) const {
-                auto input_memory =
-                  load_np_array_as_memory(input_filename, engine_);
-                auto input_dims = extract_dims(input_memory);
-                std::vector<int> strides{1, 1};
-                std::vector<int> kernel_shape{input_dims.at(2),
-                                              input_dims.at(3)};
-                std::vector<int> pads{0, 0, 0, 0};
-                auto output_dims = calc_2d_output_dims(
-                  input_dims, input_dims.at(1), kernel_shape, strides, pads);
-                auto output = array(dtype_t::float_, output_dims);
-                auto factory_return = make_global_max_pool_primitive(
-                  menoh_impl::node{"", {"input"}, {"output"}, {}}, {},
-                  {{"input", input_memory}}, {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-
-                auto true_output =
-                  menoh_impl::load_np_array_as_array(true_output_filename);
-
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
-            }
-
-            auto average_pool_test(std::string const& input_filename, int k,
-                                   int s, int p) const {
-                auto input_memory =
-                  load_np_array_as_memory(input_filename, engine_);
-                std::vector<int> strides{{s, s}};
-                std::vector<int> kernel_shape{{k, k}};
-                std::vector<int> pads{{p, p, p, p}};
-                auto input_dims = extract_dims(input_memory);
-                auto output_dims = calc_2d_output_dims(
-                  input_dims, input_dims.at(1), kernel_shape, strides, pads);
-                auto output = array(dtype_t::float_, output_dims);
-                auto factory_return = make_average_pool_primitive(
-                  menoh_impl::node{"",
-                                   {"input"},
-                                   {"output"},
-                                   {{"strides", strides},
-                                    {"kernel_shape", kernel_shape},
-                                    {"pads", pads}}},
-                  {}, {{"input", input_memory}}, {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-
-                auto true_output = menoh_impl::load_np_array_as_array(
-                  "../data/average_pooling_2d_k" + std::to_string(k) + "_s" +
-                  std::to_string(s) + "_p" + std::to_string(p) + ".txt");
-
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
-            }
-
-            auto global_average_pool_test(
-              std::string const& input_filename,
-              std::string const& true_output_filename) const {
-                auto input_memory =
-                  load_np_array_as_memory(input_filename, engine_);
-                auto input_dims = extract_dims(input_memory);
-                std::vector<int> strides{1, 1};
-                std::vector<int> kernel_shape{input_dims.at(2),
-                                              input_dims.at(3)};
-                std::vector<int> pads{0, 0, 0, 0};
-                auto output_dims = calc_2d_output_dims(
-                  input_dims, input_dims.at(1), kernel_shape, strides, pads);
-                auto output = array(dtype_t::float_, output_dims);
-                auto factory_return = make_global_average_pool_primitive(
-                  menoh_impl::node{"", {"input"}, {"output"}, {}}, {},
-                  {{"input", input_memory}}, {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-
-                auto true_output =
-                  menoh_impl::load_np_array_as_array(true_output_filename);
-
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
-            }
-
-            auto conv_test(std::string const& input_filename, int k, int s,
-                           int p) const {
-                auto input_memory =
-                  load_np_array_as_memory(input_filename, engine_);
-                auto weight = menoh_impl::load_np_array_as_array(
-                  "../data/random_weight_5_4_" + std::to_string(k) + "_" +
-                  std::to_string(k) + ".txt");
-                std::vector<int> strides{{s, s}};
-                std::vector<int> kernel_shape{{k, k}};
-                std::vector<int> pads{{p, p, p, p}};
-                auto input_dims = extract_dims(input_memory);
-                auto output_dims = calc_2d_output_dims(
-                  input_dims, weight.dims().at(0), kernel_shape, strides, pads);
-                auto output = array(dtype_t::float_, output_dims);
-                auto factory_return = make_conv_primitive(
-                  menoh_impl::node{"",
-                                   {"input", "weight"},
-                                   {"output"},
-                                   {{"strides", strides},
-                                    {"kernel_shape", kernel_shape},
-                                    {"pads", pads}}},
-                  {{"weight", weight}}, {{"input", input_memory}},
-                  {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-
-                auto true_output = menoh_impl::load_np_array_as_array(
-                  "../data/convolution_2d_w5_4_" + std::to_string(k) + "_" +
-                  std::to_string(k) + "_k" + std::to_string(k) + "_s" +
-                  std::to_string(s) + "_p" + std::to_string(p) + ".txt");
-
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
-            }
-
-            auto conv_with_bias_test(std::string const& input_filename, int k,
-                                     int s, int p) const {
-                auto input_memory =
-                  load_np_array_as_memory(input_filename, engine_);
-                auto weight = menoh_impl::load_np_array_as_array(
-                  "../data/random_weight_5_4_" + std::to_string(k) + "_" +
-                  std::to_string(k) + ".txt");
-                auto bias = menoh_impl::load_np_array_as_array(
-                  "../data/random_bias_5.txt");
-                std::vector<int> strides{{s, s}};
-                std::vector<int> kernel_shape{{k, k}};
-                std::vector<int> pads{{p, p, p, p}};
-                auto input_dims = extract_dims(input_memory);
-                auto output_dims = calc_2d_output_dims(
-                  input_dims, weight.dims().at(0), kernel_shape, strides, pads);
-                auto output = array(dtype_t::float_, output_dims);
-                auto factory_return = make_conv_primitive(
-                  menoh_impl::node{"",
-                                   {"input", "weight", "bias"},
-                                   {"output"},
-                                   {{"strides", strides},
-                                    {"kernel_shape", kernel_shape},
-                                    {"pads", pads}}},
-                  {{"weight", weight}, {"bias", bias}},
-                  {{"input", input_memory}}, {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-
-                auto true_output = menoh_impl::load_np_array_as_array(
-                  "../data/convolution_2d_w5_4_" + std::to_string(k) + "_" +
-                  std::to_string(k) + "_k" + std::to_string(k) + "_s" +
-                  std::to_string(s) + "_p" + std::to_string(p) +
-                  "_with_bias.txt");
-
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
-            }
-
-            auto conv_transpose_test(std::string const& input_filename, int k,
-                                     int s, int p) const {
-                auto input_memory =
-                  load_np_array_as_memory(input_filename, engine_);
-                auto weight = menoh_impl::load_np_array_as_array(
-                  "../data/random_weight_4_5_" + std::to_string(k) + "_" +
-                  std::to_string(k) + ".txt");
-                std::vector<int> strides{{s, s}};
-                std::vector<int> kernel_shape{{k, k}};
-                std::vector<int> pads{{p, p, p, p}};
-                auto input_dims = extract_dims(input_memory);
-                auto output_dims = calc_2d_output_dims_for_conv_transpose(
-                  input_dims, weight.dims().at(1), kernel_shape, strides, pads);
-                auto output = array(dtype_t::float_, output_dims);
-                auto factory_return = make_conv_transpose_primitive(
-                  menoh_impl::node{"",
-                                   {"input", "weight"},
-                                   {"output"},
-                                   {{"strides", strides},
-                                    {"kernel_shape", kernel_shape},
-                                    {"pads", pads}}},
-                  {{"weight", weight}}, {{"input", input_memory}},
-                  {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-
-                auto true_output = menoh_impl::load_np_array_as_array(
-                  "../data/deconvolution_2d_w4_5_" + std::to_string(k) + "_" +
-                  std::to_string(k) + "_k" + std::to_string(k) + "_s" +
-                  std::to_string(s) + "_p" + std::to_string(p) + ".txt");
-
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
-            }
-
-            auto
-            conv_transpose_with_bias_test(std::string const& input_filename,
-                                          int k, int s, int p) const {
-                auto input_memory =
-                  load_np_array_as_memory(input_filename, engine_);
-                auto weight = menoh_impl::load_np_array_as_array(
-                  "../data/random_weight_4_5_" + std::to_string(k) + "_" +
-                  std::to_string(k) + ".txt");
-                auto bias = menoh_impl::load_np_array_as_array(
-                  "../data/random_bias_4.txt");
-                std::vector<int> strides{{s, s}};
-                std::vector<int> kernel_shape{{k, k}};
-                std::vector<int> pads{{p, p, p, p}};
-                auto input_dims = extract_dims(input_memory);
-                auto output_dims = calc_2d_output_dims_for_conv_transpose(
-                  input_dims, weight.dims().at(1), kernel_shape, strides, pads);
-                auto output = array(dtype_t::float_, output_dims);
-                auto factory_return = make_conv_transpose_primitive(
-                  menoh_impl::node{"",
-                                   {"input", "weight", "bias"},
-                                   {"output"},
-                                   {{"strides", strides},
-                                    {"kernel_shape", kernel_shape},
-                                    {"pads", pads}}},
-                  {{"weight", weight}, {"bias", bias}},
-                  {{"input", input_memory}}, {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-
-                auto true_output = menoh_impl::load_np_array_as_array(
-                  "../data/deconvolution_2d_w4_5_" + std::to_string(k) + "_" +
-                  std::to_string(k) + "_k" + std::to_string(k) + "_s" +
-                  std::to_string(s) + "_p" + std::to_string(p) +
-                  "_with_bias.txt");
-
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
-            }
-
-            auto
-            batch_norm_test(std::string const& input_filename,
-                            std::string const& mean_filename,
-                            std::string const& var_fileanme,
-                            std::string const& gamma_filename,
-                            std::string const& beta_filename,
-                            std::string const& true_output_filename) const {
-                auto input_memory =
-                  load_np_array_as_memory(input_filename, engine_);
-                auto mean = menoh_impl::load_np_array_as_array(mean_filename);
-                auto var = menoh_impl::load_np_array_as_array(var_fileanme);
-                auto gamma = menoh_impl::load_np_array_as_array(gamma_filename);
-                auto beta = menoh_impl::load_np_array_as_array(beta_filename);
-                auto output_dims = extract_dims(input_memory);
-                auto output = array(dtype_t::float_, output_dims);
-                auto factory_return = make_batch_norm_primitive(
-                  menoh_impl::node{"",
-                                   {"input", "gamma", "beta", "mean", "var"},
-                                   {"output"},
-                                   {{"epsilon", 1e-5f}, {"is_test", 1}}},
-                  {{"gamma", gamma},
-                   {"beta", beta},
-                   {"mean", mean},
-                   {"var", var}},
-                  {{"input", input_memory}}, {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-
-                auto true_output =
-                  menoh_impl::load_np_array_as_array(true_output_filename);
-
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
-            }
-
-            auto add_test(std::string const& input_a_filename,
-                          std::string const& input_b_filename,
-                          std::string const& true_output_filename) const {
-                auto input_a_memory =
-                  load_np_array_as_memory(input_a_filename, engine_);
-                auto input_b_memory =
-                  load_np_array_as_memory(input_b_filename, engine_);
-                auto output_dims = extract_dims(input_a_memory);
-                auto output = array(dtype_t::float_, output_dims);
-                auto factory_return = make_add_primitive(
-                  menoh_impl::node{
-                    "", {"input_a", "input_b"}, {"output"}, {{"broadcast", 0}}},
-                  {},
-                  {{"input_a", input_a_memory}, {"input_b", input_b_memory}},
-                  {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-                auto true_output =
-                  menoh_impl::load_np_array_as_array(true_output_filename);
-
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
-            }
-
-            auto
-            concat_test(std::vector<std::string> const& input_filename_list,
-                        int axis,
-                        std::string const& true_output_filename) const {
-                std::vector<std::string> input_name_list;
-                std::unordered_map<std::string, mkldnn::memory>
-                  input_memory_table;
-                auto i = 0;
-                for(auto const& input_filename : input_filename_list) {
-                    auto input_name = std::to_string(i);
-                    input_name_list.push_back(input_name);
-                    input_memory_table.insert(
-                      {input_name,
-                       load_np_array_as_memory(input_filename, engine_)});
-                    ++i;
+#include <onnx/onnx.pb.h>
+
+#include <menoh/menoh.hpp>
+
+#include "common.hpp"
+
+namespace {
+
+    struct named_array_data {
+        std::string name;
+        menoh::dtype_t dtype;
+        std::vector<int> dims;
+        std::unique_ptr<char[]> data;
+    };
+
+    auto load_param(filesystem::path const& filepath,
+                    bool squash_dims = false) {
+        namespace gpio = ::google::protobuf::io;
+
+        std::ifstream ifs(filepath.str(), std::ios::binary);
+        if(!ifs) {
+            std::cout << "invalid filename" << std::endl;
+            throw "invalid_filename";
+        }
+        gpio::IstreamInputStream iis(&ifs);
+        gpio::CodedInputStream cis(&iis);
+        cis.SetTotalBytesLimit(std::numeric_limits<int>::max(),
+                               std::numeric_limits<int>::max());
+        onnx::TensorProto tensor;
+        if(!tensor.ParseFromCodedStream(&cis)) {
+            std::cout << "invalid filename" << std::endl;
+            throw "onnx_parse_error";
+        }
+
+        // TODO int array
+        std::vector<int> dims(tensor.dims().begin(), tensor.dims().end());
+        assert(2 <= dims.size());
+        if(squash_dims) {
+            dims.at(1) = std::accumulate(dims.begin() + 1, dims.end(), 1,
+                                         std::multiplies<int>());
+            dims.erase(dims.begin() + 2, dims.end());
+        }
+        auto total_size =
+          std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<int>());
+        assert(tensor.has_raw_data());
+        assert(
+          tensor.raw_data().length() ==
+          static_cast<decltype(tensor.raw_data().length())>(total_size * 4));
+
+        auto data = std::make_unique<char[]>(total_size * 4);
+        std::copy(tensor.raw_data().begin(), tensor.raw_data().end(),
+                  data.get());
+        // TODO other dtype
+        return named_array_data{tensor.name(), menoh::dtype_t::float_,
+                                std::move(dims), std::move(data)};
+    }
+
+    class OperatorTest : public ::testing::Test {
+    protected:
+        OperatorTest()
+          : onnx_test_data_dir_path_(
+              "../external/onnx/onnx/backend/test/data/node/") {}
+
+        void run_test(std::string backend_name, std::string const& test_name,
+                      float eps, bool squash_dims = false) {
+            auto parent_dir_path = onnx_test_data_dir_path_ / test_name;
+
+            for(int data_set_index = 0; true; ++data_set_index) {
+                auto dataset_path =
+                  parent_dir_path /
+                  ("test_data_set_" + std::to_string(data_set_index));
+                if(!dataset_path.exists()) {
+                    break;
                 }
-                auto output_dims =
-                  extract_dims(input_memory_table.begin()->second);
-                output_dims.at(axis) = 0;
-                for(auto const& name_and_memory : input_memory_table) {
-                    output_dims.at(axis) +=
-                      extract_dims(name_and_memory.second).at(axis);
+                std::vector<named_array_data> input_list;
+                for(int input_index = 0;; ++input_index) {
+                    auto input_data_path =
+                      dataset_path /
+                      ("input_" + std::to_string(input_index) + ".pb");
+                    if(!input_data_path.exists()) {
+                        break;
+                    }
+                    input_list.push_back(
+                      load_param(input_data_path, squash_dims));
+                }
+                std::vector<named_array_data> true_output_list;
+                for(int output_index = 0;; ++output_index) {
+                    auto output_data_path =
+                      dataset_path /
+                      ("output_" + std::to_string(output_index) + ".pb");
+                    if(!output_data_path.exists()) {
+                        break;
+                    }
+                    true_output_list.push_back(
+                      load_param(output_data_path, squash_dims));
                 }
-                auto output = array(dtype_t::float_, output_dims);
-                auto factory_return = make_concat_primitive(
-                  menoh_impl::node{
-                    "", input_name_list, {"output"}, {{"axis", axis}}},
-                  {}, input_memory_table, {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-                auto true_output =
-                  menoh_impl::load_np_array_as_array(true_output_filename);
-
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
-            }
-
-            auto lrn_test(std::string const& input_filename, float alpha,
-                          float beta, float bias, int size,
-                          std::string const& true_output_filename) {
-                auto input_memory =
-                  load_np_array_as_memory(input_filename, engine_);
-                auto output_dims = extract_dims(input_memory);
-                auto output = array(dtype_t::float_, output_dims);
-                auto factory_return = make_lrn_primitive(
-                  menoh_impl::node{"",
-                                   {"input"},
-                                   {"output"},
-                                   {{"alpha", alpha},
-                                    {"beta", beta},
-                                    {"bias", bias},
-                                    {"size", size}}},
-                  {}, {{"input", input_memory}}, {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-                /*
-                auto true_output = menoh_impl::load_np_array_as_array(
-                  "../data/lrn_alpha" + std::to_string(alpha) + "_beta" +
-                  std::to_string(beta) + "_bias" + std::to_string(bias) +
-                  "_size" + std::to_string(size) + ".txt");
-                */
-                auto true_output =
-                  menoh_impl::load_np_array_as_array(true_output_filename);
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
-            }
-
-            auto gemm_test(std::string const& input_filename,
-                           std::string const& weight_filename,
-                           std::string const& bias_filename,
-                           std::string const& true_output_filename, float alpha,
-                           float beta, int trans_a, int trans_b) const {
-                auto input_memory =
-                  load_np_array_as_memory(input_filename, engine_);
-                auto weight =
-                  menoh_impl::load_np_array_as_array(weight_filename);
-                auto bias = menoh_impl::load_np_array_as_array(bias_filename);
-                std::vector<int> output_dims{extract_dims(input_memory).at(0),
-                                             weight.dims().at(0)};
-                auto output = array(dtype_t::float_, output_dims);
-                auto factory_return = make_gemm_primitive(
-                  menoh_impl::node{"",
-                                   {"input", "weight", "bias"},
-                                   {"output"},
-                                   {{"alpha", alpha},
-                                    {"beta", beta},
-                                    {"transA", trans_a},
-                                    {"transB", trans_b}}},
-                  {{"weight", weight}, {"bias", bias}},
-                  {{"input", input_memory}}, {{"output", output}}, engine_);
-                auto& net = std::get<0>(factory_return);
-                mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
 
-                auto true_output =
-                  menoh_impl::load_np_array_as_array(true_output_filename);
+                menoh::variable_profile_table_builder vpt_builder;
+                for(auto const& input : input_list) {
+                    vpt_builder.add_input_profile(input.name, input.dtype,
+                                                  input.dims);
+                }
+                for(auto const& output : true_output_list) {
+                    vpt_builder.add_output_profile(output.name, output.dtype);
+                }
+                auto onnx_model_filename = parent_dir_path / "model.onnx";
+                auto model_data =
+                  menoh::make_model_data_from_onnx(onnx_model_filename.str());
+                auto vpt = vpt_builder.build_variable_profile_table(model_data);
+                menoh::model_builder model_builder(vpt);
+
+                for(auto const& input : input_list) {
+                    model_builder.attach_external_buffer(
+                      input.name, static_cast<void*>(input.data.get()));
+                }
+                auto model =
+                  model_builder.build_model(model_data, backend_name);
+                model_data.reset();
 
-                assert_near_list(fbegin(output), fend(output),
-                                 fbegin(true_output), fend(true_output),
-                                 10.e-4);
+                std::vector<menoh::variable> output_list;
+                for(auto const& true_output : true_output_list) {
+                    output_list.push_back(model.get_variable(true_output.name));
+                }
+                model.run();
+                assert(true_output_list.size() == output_list.size());
+                auto static_cast_to_float_ptr = [](auto p) {
+                    return static_cast<float*>(static_cast<void*>(p));
+                };
+                for(int output_index = 0;
+                    output_index < true_output_list.size(); ++output_index) {
+                    auto const& input = input_list.front();
+                    auto const& output = output_list.at(output_index);
+                    auto const& true_output = true_output_list.at(output_index);
+                    auto total_size = std::accumulate(true_output.dims.begin(),
+                                                      true_output.dims.end(), 1,
+                                                      std::multiplies<int>());
+                    /*
+                    std::cout << true_output.name << std::endl;
+                    for(auto i = 0; i < 10; ++i) {
+                        std::cout
+                          << *(static_cast<float*>(
+                                 static_cast<void*>(input.data.get())) +
+                               i)
+                          << " "
+                          << *(static_cast<float*>(output.buffer_handle) + i)
+                          << " "
+                          << *(static_cast<float*>(
+                                 static_cast<void*>(true_output.data.get())) +
+                               i)
+                          << std::endl;
+                    }
+                    */
+                    menoh_impl::assert_eq_list(
+                      static_cast<float*>(output.buffer_handle),
+                      static_cast<float*>(output.buffer_handle) + total_size,
+                      static_cast_to_float_ptr(true_output.data.get()),
+                      static_cast_to_float_ptr(true_output.data.get()) +
+                        total_size,
+                      eps);
+                }
             }
-
-            mkldnn::engine engine_{mkldnn::engine::cpu, 0};
-        };
-
-        TEST_F(OperatorTest, relu_1d_test) {
-            relu_test("../data/random_input_3_4096.txt", "../data/relu_1d.txt");
-        }
-        TEST_F(OperatorTest, relu_2d_test) {
-            relu_test("../data/random_input_3_4_32_32.txt",
-                      "../data/relu_2d.txt");
         }
 
-        TEST_F(OperatorTest, leaky_relu_1d_test) {
-            leaky_relu_test("../data/random_input_3_4096.txt",
-                            "../data/leaky_relu_1d.txt");
-        }
-        TEST_F(OperatorTest, leaky_relu_2d_test) {
-            leaky_relu_test("../data/random_input_3_4_32_32.txt",
-                            "../data/leaky_relu_2d.txt");
-        }
+    private:
+        filesystem::path onnx_test_data_dir_path_;
+    };
 
-        TEST_F(OperatorTest, elu_1d_test) {
-            elu_test("../data/random_input_3_4096.txt", "../data/elu_1d.txt");
-        }
-        TEST_F(OperatorTest, elu_2d_test) {
-            elu_test("../data/random_input_3_4_32_32.txt",
-                     "../data/elu_2d.txt");
-        }
+#define TEST_OP_IMPL(backend_name, test_name, eps, squash) \
+    TEST_F(OperatorTest, backend_name##_##test_name) { run_test(#backend_name, #test_name, eps, squash); }
+#define TEST_OP(backend_name, test_name, eps) TEST_OP_IMPL(backend_name, test_name, eps, false)
+#define TEST_OP_SQUASH_DIMS(backend_name, test_name, eps) TEST_OP_IMPL(backend_name, test_name, eps, true)
 
-        TEST_F(OperatorTest, abs_1d_test) {
-            abs_test("../data/random_input_3_4096.txt", "../data/abs_1d.txt");
-        }
-        TEST_F(OperatorTest, abs_2d_test) {
-            abs_test("../data/random_input_3_4_32_32.txt",
-                     "../data/abs_2d.txt");
-        }
+    float eps = 1.e-4;
 
-        TEST_F(OperatorTest, sqrt_1d_test) {
-            sqrt_test("../data/random_positive_input_3_4096.txt",
-                      "../data/sqrt_1d.txt");
-        }
-        TEST_F(OperatorTest, sqrt_2d_test) {
-            sqrt_test("../data/random_positive_input_3_4_32_32.txt",
-                      "../data/sqrt_2d.txt");
-        }
+    // Tests for MKLDNN backend
+    TEST_OP_SQUASH_DIMS(mkldnn, test_abs, eps);
+    TEST_OP_SQUASH_DIMS(mkldnn, test_elu, eps);
+    TEST_OP_SQUASH_DIMS(mkldnn, test_elu_default, eps);
+    TEST_OP_SQUASH_DIMS(mkldnn, test_leakyrelu, eps);
+    TEST_OP_SQUASH_DIMS(mkldnn, test_leakyrelu_default, eps);
+    TEST_OP_SQUASH_DIMS(mkldnn, test_relu, eps);
+    TEST_OP_SQUASH_DIMS(mkldnn, test_sqrt, eps);
+    TEST_OP_SQUASH_DIMS(mkldnn, test_tanh, eps);
 
-        TEST_F(OperatorTest, tanh_1d_test) {
-            tanh_test("../data/random_input_3_4096.txt", "../data/tanh_1d.txt");
-        }
-        TEST_F(OperatorTest, tanh_2d_test) {
-            tanh_test("../data/random_input_3_4_32_32.txt",
-                      "../data/tanh_2d.txt");
-        }
-
-        TEST_F(OperatorTest, softmax_1d_test) {
-            softmax_test("../data/random_input_3_4096.txt",
-                         "../data/softmax_1d.txt");
-        }
-        TEST_F(OperatorTest, softmax_2d_test) {
-            softmax_test("../data/random_input_3_4_32_32.txt",
-                         "../data/softmax_2d.txt");
-        }
-
-        TEST_F(OperatorTest, fc_1d_test) {
-            fc_test("../data/random_input_3_4096.txt",
-                    "../data/random_weight_256_4096.txt",
-                    "../data/random_bias_256.txt",
-                    "../data/linear_1d_w256_4096_b_256.txt");
-        }
-        TEST_F(OperatorTest, fc_2d_test) {
-            fc_test("../data/random_input_3_4_32_32.txt",
-                    "../data/random_weight_256_4096.txt",
-                    "../data/random_bias_256.txt",
-                    "../data/linear_2d_w256_4096_b_256.txt");
-        }
-
-        TEST_F(OperatorTest, max_pool_2_2_0_test) {
-            max_pool_test("../data/random_input_3_4_32_32.txt", 2, 2, 0);
-        }
-        TEST_F(OperatorTest, max_pool_3_2_0_test) {
-            max_pool_test("../data/random_input_3_4_32_32.txt", 3, 2, 0);
-        }
-        TEST_F(OperatorTest, max_pool_3_2_1_test) {
-            max_pool_test("../data/random_input_3_4_32_32.txt", 3, 2, 1);
-        }
+    TEST_OP(mkldnn, test_averagepool_2d_default, eps);
+    TEST_OP_SQUASH_DIMS(mkldnn, test_add, eps);
+    // TEST_OP_SQUASH_DIMS(mkldnn, test_batchnormalization, eps); // not found
+    // TEST_OP(mkldnn, test_concat_2d_axis_0, eps);
+    // TEST_OP(mkldnn, test_concat_2d_axis_1, eps);
+    // TEST_OP(mkldnn, test_conv_with_strides_padding, eps);
+    // TEST_OP_SQUASH_DIMS(mkldnn, test_convtranspose, eps); // not found
+    // TEST_OP(mkldnn, test_gemm_nobroadcast, eps);
+    //TEST_OP(mkldnn, test_globalaveragepool, eps);
+    //TEST_OP(mkldnn, test_globalmaxpool, eps);
+    TEST_OP(mkldnn, test_maxpool_2d_default, eps);
+    TEST_OP_SQUASH_DIMS(mkldnn, test_softmax_axis_1, eps);
+    // TEST_OP_SQUASH_DIMS(mkldnn, test_sum_one_input, eps);
+    // TEST_OP_SQUASH_DIMS(mkldnn, test_sum_two_inputs, eps);
 
-        TEST_F(OperatorTest, average_pool_2_2_0_test) {
-            average_pool_test("../data/random_input_3_4_32_32.txt", 2, 2, 0);
-        }
-        TEST_F(OperatorTest, average_pool_3_2_0_test) {
-            average_pool_test("../data/random_input_3_4_32_32.txt", 3, 2, 0);
-        }
-        TEST_F(OperatorTest, average_pool_3_2_1_test) {
-            average_pool_test("../data/random_input_3_4_32_32.txt", 3, 2, 1);
-        }
+    // TEST_OP(mkldnn, test_averagepool_2d_pads, eps);
+    // TEST_OP(mkldnn, test_averagepool_2d_precomputed_pads, eps);
+    // TEST_OP(mkldnn, test_averagepool_2d_precomputed_same_upper, eps);
 
-        TEST_F(OperatorTest, global_max_pool_test) {
-            global_max_pool_test("../data/random_input_3_4_32_32.txt",
-                                 "../data/global_max_pooling_2d.txt");
-        }
 
-        TEST_F(OperatorTest, global_average_pool_test) {
-            global_average_pool_test("../data/random_input_3_4_32_32.txt",
-                                     "../data/global_average_pooling_2d.txt");
-        }
+    // Tests for MKLDNN with Generic fallback backend
+    TEST_OP(mkldnn_with_generic_fallback, test_relu, eps);
 
-        TEST_F(OperatorTest, conv_1_1_0_test) {
-            conv_test("../data/random_input_3_4_32_32.txt", 1, 1, 0);
-        }
-        TEST_F(OperatorTest, conv_2_1_0_test) {
-            conv_test("../data/random_input_3_4_32_32.txt", 2, 1, 0);
-        }
-        TEST_F(OperatorTest, conv_2_1_1_test) {
-            conv_test("../data/random_input_3_4_32_32.txt", 2, 1, 1);
-        }
-        TEST_F(OperatorTest, conv_2_2_0_test) {
-            conv_test("../data/random_input_3_4_32_32.txt", 2, 2, 0);
-        }
-        TEST_F(OperatorTest, conv_2_2_1_test) {
-            conv_test("../data/random_input_3_4_32_32.txt", 2, 2, 1);
-        }
-        TEST_F(OperatorTest, conv_3_1_1_test) {
-            conv_test("../data/random_input_3_4_32_32.txt", 3, 1, 1);
-        }
-        TEST_F(OperatorTest, conv_3_2_0_test) {
-            conv_test("../data/random_input_3_4_32_32.txt", 3, 2, 0);
-        }
-        TEST_F(OperatorTest, conv_3_2_1_test) {
-            conv_test("../data/random_input_3_4_32_32.txt", 3, 2, 1);
-        }
-        TEST_F(OperatorTest, conv_with_bias_1_1_0_test) {
-            conv_with_bias_test("../data/random_input_3_4_32_32.txt", 1, 1, 0);
-        }
-        TEST_F(OperatorTest, conv_with_bias_2_1_0_test) {
-            conv_test("../data/random_input_3_4_32_32.txt", 2, 1, 0);
-        }
-        TEST_F(OperatorTest, conv_with_bias_2_1_1_test) {
-            conv_with_bias_test("../data/random_input_3_4_32_32.txt", 2, 1, 1);
-        }
-        TEST_F(OperatorTest, conv_with_bias_2_2_0_test) {
-            conv_with_bias_test("../data/random_input_3_4_32_32.txt", 2, 2, 0);
-        }
-        TEST_F(OperatorTest, conv_with_bias_2_2_1_test) {
-            conv_with_bias_test("../data/random_input_3_4_32_32.txt", 2, 2, 1);
-        }
-        TEST_F(OperatorTest, conv_with_bias_3_1_1_test) {
-            conv_with_bias_test("../data/random_input_3_4_32_32.txt", 3, 1, 1);
-        }
-        TEST_F(OperatorTest, conv_with_bias_3_2_0_test) {
-            conv_with_bias_test("../data/random_input_3_4_32_32.txt", 3, 2, 0);
-        }
-        TEST_F(OperatorTest, conv_with_bias_3_2_1_test) {
-            conv_with_bias_test("../data/random_input_3_4_32_32.txt", 3, 2, 1);
-        }
+    //TEST_OP(mkldnn_with_generic_fallback, test_gemm_nobroadcast, eps);
 
-        TEST_F(OperatorTest, conv_transpose_1_1_0_test) {
-            conv_transpose_test("../data/random_input_3_4_32_32.txt", 1, 1, 0);
-        }
-        TEST_F(OperatorTest, conv_transpose_2_1_0_test) {
-            conv_transpose_test("../data/random_input_3_4_32_32.txt", 2, 1, 0);
-        }
-        TEST_F(OperatorTest, conv_transpose_2_1_1_test) {
-            conv_transpose_test("../data/random_input_3_4_32_32.txt", 2, 1, 1);
-        }
-        TEST_F(OperatorTest, conv_transpose_2_2_0_test) {
-            conv_transpose_test("../data/random_input_3_4_32_32.txt", 2, 2, 0);
-        }
-        TEST_F(OperatorTest, conv_transpose_2_2_1_test) {
-            conv_transpose_test("../data/random_input_3_4_32_32.txt", 2, 2, 1);
-        }
-        TEST_F(OperatorTest, conv_transpose_3_1_1_test) {
-            conv_transpose_test("../data/random_input_3_4_32_32.txt", 3, 1, 1);
-        }
-        TEST_F(OperatorTest, conv_transpose_3_2_0_test) {
-            conv_transpose_test("../data/random_input_3_4_32_32.txt", 3, 2, 0);
-        }
-        TEST_F(OperatorTest, conv_transpose_3_2_1_test) {
-            conv_transpose_test("../data/random_input_3_4_32_32.txt", 3, 2, 1);
-        }
-
-        TEST_F(OperatorTest, conv_transpose_with_bias_1_1_0_test) {
-            conv_transpose_with_bias_test("../data/random_input_3_4_32_32.txt",
-                                          1, 1, 0);
-        }
-        TEST_F(OperatorTest, conv_transpose_with_bias_2_1_0_test) {
-            conv_transpose_with_bias_test("../data/random_input_3_4_32_32.txt",
-                                          2, 1, 0);
-        }
-        TEST_F(OperatorTest, conv_transpose_with_bias_2_1_1_test) {
-            conv_transpose_with_bias_test("../data/random_input_3_4_32_32.txt",
-                                          2, 1, 1);
-        }
-        TEST_F(OperatorTest, conv_transpose_with_bias_2_2_0_test) {
-            conv_transpose_with_bias_test("../data/random_input_3_4_32_32.txt",
-                                          2, 2, 0);
-        }
-        TEST_F(OperatorTest, conv_transpose_with_bias_2_2_1_test) {
-            conv_transpose_with_bias_test("../data/random_input_3_4_32_32.txt",
-                                          2, 2, 1);
-        }
-        TEST_F(OperatorTest, conv_transpose_with_bias_3_1_1_test) {
-            conv_transpose_with_bias_test("../data/random_input_3_4_32_32.txt",
-                                          3, 1, 1);
-        }
-        TEST_F(OperatorTest, conv_transpose_with_bias_3_2_0_test) {
-            conv_transpose_with_bias_test("../data/random_input_3_4_32_32.txt",
-                                          3, 2, 0);
-        }
-        TEST_F(OperatorTest, conv_transpose_with_bias_3_2_1_test) {
-            conv_transpose_with_bias_test("../data/random_input_3_4_32_32.txt",
-                                          3, 2, 1);
-        }
-
-        TEST_F(OperatorTest, batch_norm_test) {
-            batch_norm_test(
-              "../data/random_input_3_4_32_32.txt", "../data/random_mean_4.txt",
-              "../data/random_var_4.txt", "../data/random_gamma_4.txt",
-              "../data/random_beta_4.txt", "../data/batch_normalization.txt");
-        }
-
-        TEST_F(OperatorTest, add_1d_test) {
-            add_test("../data/random_input_3_4096.txt",
-                     "../data/random_input_3_4096.txt", "../data/add_1d.txt");
-        }
-        TEST_F(OperatorTest, add_2d_test) {
-            add_test("../data/random_input_3_4_32_32.txt",
-                     "../data/random_input_3_4_32_32.txt",
-                     "../data/add_2d.txt");
-        }
-
-        TEST_F(OperatorTest, concat_1d_2_inputs_axis_0_test) {
-            concat_test({"../data/random_input_3_4096.txt",
-                         "../data/random_input_3_4096.txt"},
-                        0, "../data/concat_1d_6_4096.txt");
-        }
-        TEST_F(OperatorTest, concat_1d_2_inputs_axis_1_test) {
-            concat_test({"../data/random_input_3_4096.txt",
-                         "../data/random_input_3_4096.txt"},
-                        1, "../data/concat_1d_3_8192.txt");
-        }
-        TEST_F(OperatorTest, concat_1d_3_inputs_axis_0_test) {
-            concat_test({"../data/random_input_3_4096.txt",
-                         "../data/random_input_3_4096.txt",
-                         "../data/random_input_3_4096.txt"},
-                        0, "../data/concat_1d_9_4096.txt");
-        }
-        TEST_F(OperatorTest, concat_1d_3_inputs_axis_1_test) {
-            concat_test({"../data/random_input_3_4096.txt",
-                         "../data/random_input_3_4096.txt",
-                         "../data/random_input_3_4096.txt"},
-                        1, "../data/concat_1d_3_12288.txt");
-        }
-
-        TEST_F(OperatorTest, lrn_alpha_00004_beta_075_bias_1_size_1_test) {
-            lrn_test("../data/random_input_3_4_32_32.txt", 0.0001, 0.75, 1, 1,
-                     "../data/lrn_alpha0.0001_beta0.75_bias1_size1.txt");
-        }
-        TEST_F(OperatorTest, lrn_alpha_00004_beta_075_bias_1_size_2_test) {
-            lrn_test("../data/random_input_3_4_32_32.txt", 0.0001, 0.75, 1, 2,
-                     "../data/lrn_alpha0.0001_beta0.75_bias1_size2.txt");
-        }
-        TEST_F(OperatorTest, lrn_alpha_00004_beta_075_bias_1_size_3_test) {
-            lrn_test("../data/random_input_3_4_32_32.txt", 0.0001, 0.75, 1, 3,
-                     "../data/lrn_alpha0.0001_beta0.75_bias1_size3.txt");
-        }
-        TEST_F(OperatorTest, lrn_alpha_00004_beta_075_bias_1_size_4_test) {
-            lrn_test("../data/random_input_3_4_32_32.txt", 0.0001, 0.75, 1, 4,
-                     "../data/lrn_alpha0.0001_beta0.75_bias1_size4.txt");
-        }
-        TEST_F(OperatorTest, lrn_alpha_00004_beta_075_bias_2_size_1_test) {
-            lrn_test("../data/random_input_3_4_32_32.txt", 0.0001, 0.75, 2, 1,
-                     "../data/lrn_alpha0.0001_beta0.75_bias2_size1.txt");
-        }
-        TEST_F(OperatorTest, lrn_alpha_00004_beta_075_bias_2_size_2_test) {
-            lrn_test("../data/random_input_3_4_32_32.txt", 0.0001, 0.75, 2, 2,
-                     "../data/lrn_alpha0.0001_beta0.75_bias2_size2.txt");
-        }
-        TEST_F(OperatorTest, lrn_alpha_00004_beta_075_bias_2_size_3_test) {
-            lrn_test("../data/random_input_3_4_32_32.txt", 0.0001, 0.75, 2, 3,
-                     "../data/lrn_alpha0.0001_beta0.75_bias2_size3.txt");
-        }
-        TEST_F(OperatorTest, lrn_alpha_00004_beta_075_bias_2_size_4_test) {
-            lrn_test("../data/random_input_3_4_32_32.txt", 0.0001, 0.75, 2, 4,
-                     "../data/lrn_alpha0.0001_beta0.75_bias2_size4.txt");
-        }
-
-        TEST_F(OperatorTest, gemm_1d_test) {
-            gemm_test("../data/random_input_3_4096.txt",
-                      "../data/random_weight_256_4096.txt",
-                      "../data/random_bias_256.txt",
-                      "../data/linear_1d_w256_4096_b_256.txt", 1, 1, 0, 1);
-        }
-        TEST_F(OperatorTest, gemm_2d_test) {
-            gemm_test("../data/random_input_3_4_32_32.txt",
-                      "../data/random_weight_256_4096.txt",
-                      "../data/random_bias_256.txt",
-                      "../data/linear_2d_w256_4096_b_256.txt", 1, 1, 0, 1);
-        }
-        TEST_F(OperatorTest, gemm_1d_test_invalid_alpha) {
-            EXPECT_THROW(
-              {
-                  gemm_test("../data/random_input_3_4096.txt",
-                            "../data/random_weight_256_4096.txt",
-                            "../data/random_bias_256.txt",
-                            "../data/linear_1d_w256_4096_b_256.txt", 2, 1, 0,
-                            1);
-              },
-              failed_to_configure_operator);
-        }
-        TEST_F(OperatorTest, gemm_2d_test_invalid_alpha) {
-            EXPECT_THROW(
-              {
-                  gemm_test("../data/random_input_3_4_32_32.txt",
-                            "../data/random_weight_256_4096.txt",
-                            "../data/random_bias_256.txt",
-                            "../data/linear_2d_w256_4096_b_256.txt", 2, 1, 0,
-                            1);
-              },
-              failed_to_configure_operator);
-        }
-        TEST_F(OperatorTest, gemm_1d_test_invalid_beta) {
-            EXPECT_THROW(
-              {
-                  gemm_test("../data/random_input_3_4096.txt",
-                            "../data/random_weight_256_4096.txt",
-                            "../data/random_bias_256.txt",
-                            "../data/linear_1d_w256_4096_b_256.txt", 1, 2, 0,
-                            1);
-              },
-              failed_to_configure_operator);
-        }
-        TEST_F(OperatorTest, gemm_2d_test_invalid_beta) {
-            EXPECT_THROW(
-              {
-                  gemm_test("../data/random_input_3_4_32_32.txt",
-                            "../data/random_weight_256_4096.txt",
-                            "../data/random_bias_256.txt",
-                            "../data/linear_2d_w256_4096_b_256.txt", 1, 2, 0,
-                            1);
-              },
-              failed_to_configure_operator);
-        }
-        TEST_F(OperatorTest, gemm_1d_test_invalid_transA) {
-            EXPECT_THROW(
-              {
-                  gemm_test("../data/random_input_3_4096.txt",
-                            "../data/random_weight_256_4096.txt",
-                            "../data/random_bias_256.txt",
-                            "../data/linear_1d_w256_4096_b_256.txt", 1, 1, 1,
-                            1);
-              },
-              failed_to_configure_operator);
-        }
-        TEST_F(OperatorTest, gemm_2d_test_invalid_transA) {
-            EXPECT_THROW(
-              {
-                  gemm_test("../data/random_input_3_4_32_32.txt",
-                            "../data/random_weight_256_4096.txt",
-                            "../data/random_bias_256.txt",
-                            "../data/linear_2d_w256_4096_b_256.txt", 1, 1, 1,
-                            1);
-              },
-              failed_to_configure_operator);
-        }
-        TEST_F(OperatorTest, gemm_1d_test_invalid_transB) {
-            EXPECT_THROW(
-              {
-                  gemm_test("../data/random_input_3_4096.txt",
-                            "../data/random_weight_256_4096.txt",
-                            "../data/random_bias_256.txt",
-                            "../data/linear_1d_w256_4096_b_256.txt", 1, 1, 0,
-                            0);
-              },
-              failed_to_configure_operator);
-        }
-        TEST_F(OperatorTest, gemm_2d_test_invalid_transB) {
-            EXPECT_THROW(
-              {
-                  gemm_test("../data/random_input_3_4_32_32.txt",
-                            "../data/random_weight_256_4096.txt",
-                            "../data/random_bias_256.txt",
-                            "../data/linear_2d_w256_4096_b_256.txt", 1, 1, 0,
-                            0);
-              },
-              failed_to_configure_operator);
-        }
+#undef TEST_OP_SQUASH_DIMS
+#undef TEST_OP
+#undef TEST_OP_IMPL
 
-    } // namespace mkldnn_backend
-} // namespace menoh_impl
+} // namespace