diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..761d6132 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,254 @@ +cmake_minimum_required(VERSION 3.3) +project(CSI-NN2) + +enable_language(ASM) + +option(USE_CSI_NN2_DEBUG "option for debug" ON) + +option(BUILD_X86 "build x86" OFF) +option(BUILD_RISCV "build riscv" OFF) +option(BUILD_RISCV_ELF "build riscv elf" OFF) +option(BUILD_CSKY "build csky" OFF) +option(BUILD_CSKY_ELF "build csky elf" OFF) + +# riscv linux compiler +if (BUILD_RISCV) + set(CMAKE_C_COMPILER riscv64-unknown-linux-gnu-gcc) + set(CMAKE_CXX_COMPILER riscv64-unknown-linux-gnu-g++) +endif() + +# riscv elf compiler +if (BUILD_RISCV_ELF) + set(CMAKE_C_COMPILER riscv64-unknown-elf-gcc) +endif() + +# csky linux compiler +if (BUILD_CSKY) + set(CMAKE_C_COMPILER csky-abiv2-linux-gcc) + set(CMAKE_ASM_COMPILER csky-abiv2-linux-gcc) +endif() + +# csky elf compiler +if (BUILD_CSKY_ELF) + set(CMAKE_C_COMPILER csky-abiv2-elf-gcc) + set(CMAKE_ASM_COMPILER csky-abiv2-elf-gcc) +endif() + +# CSI-NN2 debug module +if(USE_CSI_NN2_DEBUG) + add_definitions(-D CSI_DEBUG) +endif() + +# reduce elf size +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffunction-sections -fdata-sections -Wl,--gc-sections") + +# set warning as error +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror") + +file(GLOB_RECURSE NN2_SRCS source/nn2/*.c source/utils/*.c) +file(GLOB_RECURSE REF_SRCS source/reference/*.c) +file(GLOB_RECURSE GREF_SRCS source/graph_ref/*.c) +file(GLOB_RECURSE OPENVX_SRCS source/openvx/*.c) +file(GLOB_RECURSE PNNA_SRCS source/pnna/*.c source/pnna/*.cpp) +file(GLOB_RECURSE C906_SRCS source/c906_opt/*.c) +file(GLOB_RECURSE C908_SRCS source/c908/*.c) +file(GLOB_RECURSE THEAD_RVV_SRCS source/thead_rvv/*.c) +file(GLOB_RECURSE C860_SRCS source/c860_opt/*.S) +file(GLOB_RECURSE I805_REF_SRCS source/i805_ref/*.c) +file(GLOB_RECURSE I805_SRCS source/i805_opt/*.c source/i805_opt/*.S) +file(GLOB_RECURSE E804_SRCS source/e804_opt/*.c source/e804_opt/*.S) +file(GLOB_RECURSE CH8601_SRCS source/ch8601/*.c) +file(GLOB_RECURSE DP1K_SRCS source/dp1k/*.c) +file(GLOB_RECURSE ASP_SRCS source/asp/*.c) + +include_directories(include) + +option(CSINN_LAYER_BENCHMARK "Layer information and performance" OFF) +if(CSINN_LAYER_BENCHMARK) + add_definitions(-DCSINN_LAYER_BENCHMARK) + message(STATUS "Print the execution time of each layer - ON") +endif() + +if(BUILD_X86) + # build x86_ref so + LIST(APPEND X86_LST ${NN2_SRCS} ${REF_SRCS}) + add_library(x86_share SHARED ${X86_LST}) + SET_TARGET_PROPERTIES(x86_share PROPERTIES OUTPUT_NAME "csi_nn2_ref_x86") + set(X86_BUILD_FLAGS -DCSI_AVX_OPT -DCSI_BUILD_REF -mavx -mfma -fopenmp) + target_compile_options(x86_share PRIVATE ${X86_BUILD_FLAGS}) + + install(TARGETS x86_share DESTINATION lib) + + # build pnna x86 simulate so + LIST(APPEND PNNA_LST ${NN2_SRCS} ${REF_SRCS} ${PNNA_SRCS}) + add_library(pnna_share SHARED ${PNNA_LST}) + SET_TARGET_PROPERTIES(pnna_share PROPERTIES OUTPUT_NAME "csi_nn2_pnna_x86") + set(PNNA_BUILD_FLAGS -DCSI_BUILD_PNNA) + target_compile_options(pnna_share PRIVATE ${PNNA_BUILD_FLAGS}) + target_include_directories(pnna_share PRIVATE module/nna_ddk_install/include/) + set(PNNA_LINK_DIR ${CMAKE_CURRENT_SOURCE_DIR}/module/nna_ddk_install/x86/) + target_link_libraries(pnna_share PRIVATE -L${PNNA_LINK_DIR} -limgdnn_csim -lnnasession_csim) + + install(TARGETS pnna_share DESTINATION lib) + + # build heterogeneous pnna x86 simulate so + LIST(APPEND HLIGHT_LST ${NN2_SRCS} ${REF_SRCS} ${GREF_SRCS} ${PNNA_SRCS}) + add_library(hlight_share SHARED ${HLIGHT_LST}) + SET_TARGET_PROPERTIES(hlight_share PROPERTIES OUTPUT_NAME "csi_nn2_hlight_x86") + set(HLIGHT_BUILD_FLAGS -DCSI_BUILD_REF -DCSI_BUILD_GREF -DCSI_BUILD_PNNA) + target_compile_options(hlight_share PRIVATE ${HLIGHT_BUILD_FLAGS}) + target_include_directories(hlight_share PRIVATE module/nna_ddk_install/include/) + set(PNNA_LINK_DIR ${CMAKE_CURRENT_SOURCE_DIR}/module/nna_ddk_install/x86/) + target_link_libraries(hlight_share PRIVATE -L${PNNA_LINK_DIR} -limgdnn_csim -lnnasession_csim) + + install(TARGETS hlight_share DESTINATION lib) + + # build ch8601 so + LIST(APPEND CH8601_LST ${NN2_SRCS} ${CH8601_SRCS}) + add_library(ch8601_share SHARED ${CH8601_LST}) + SET_TARGET_PROPERTIES(ch8601_share PROPERTIES OUTPUT_NAME "csi_nn2_ch8601") + set(CH8601_BUILD_FLAGS -DCSI_BUILD_REF -DCSI_BUILD_CH8601) + target_compile_options(ch8601_share PRIVATE ${PNNA_BUILD_FLAGS}) + + install(TARGETS ch8601_share DESTINATION lib) + + # build dp1k so + LIST(APPEND DP1K_LST ${NN2_SRCS} ${DP1K_SRCS}) + add_library(dp1k_share SHARED ${DP1K_LST}) + SET_TARGET_PROPERTIES(dp1k_share PROPERTIES OUTPUT_NAME "csi_nn2_dp1000") + set(DP1K_BUILD_FLAGS -DCSI_BUILD_DP1K -DCSI_BUILD_REF) + target_compile_options(dp1k_share PRIVATE ${DP1K_BUILD_FLAGS}) + + install(TARGETS dp1k_share DESTINATION lib) +endif() + +if(BUILD_RISCV) + # build rvv a + LIST(APPEND RVV_LST ${NN2_SRCS} ${REF_SRCS} ${GREF_SRCS} ${THEAD_RVV_SRCS}) + add_library(rvv_static STATIC ${RVV_LST}) + SET_TARGET_PROPERTIES(rvv_static PROPERTIES OUTPUT_NAME "csi_nn2_rvv") + set(RVV_BUILD_FLAGS -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d -DCSI_BUILD_RVV -DCSI_BUILD_REF -DCSI_BUILD_GREF) + target_compile_options(rvv_static PRIVATE ${RVV_BUILD_FLAGS}) + + install(TARGETS rvv_static DESTINATION lib) + + # build c906 a + LIST(APPEND C906_LST ${NN2_SRCS} ${REF_SRCS} ${GREF_SRCS} ${THEAD_RVV_SRCS} ${C906_SRCS}) + add_library(c906_static STATIC ${C906_LST}) + SET_TARGET_PROPERTIES(c906_static PROPERTIES OUTPUT_NAME "csi_nn2_c906") + set(C906_BUILD_FLAGS -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d -DCSI_BUILD_C906 -DCSI_BUILD_REF -DCSI_BUILD_GREF) + target_compile_options(c906_static PRIVATE ${C906_BUILD_FLAGS}) + + install(TARGETS c906_static DESTINATION lib) + + # build c908 a + LIST(APPEND C908_LST ${NN2_SRCS} ${REF_SRCS} ${GREF_SRCS} ${THEAD_RVV_SRCS} ${C908_SRCS}) + add_library(c908_static STATIC ${C908_LST}) + SET_TARGET_PROPERTIES(c908_static PROPERTIES OUTPUT_NAME "csi_nn2_c908") + set(C908_BUILD_FLAGS -march=rv64gcv_zfh_xtheadc_xtheadv -mabi=lp64d -DCSI_BUILD_C908 -DCSI_BUILD_REF -DCSI_BUILD_GREF) + target_compile_options(c908_static PRIVATE ${C908_BUILD_FLAGS}) + + install(TARGETS c908_static DESTINATION lib) + + # build pnna so + LIST(APPEND PNNA_LST ${NN2_SRCS} ${REF_SRCS} ${PNNA_SRCS}) + add_library(pnna_share SHARED ${PNNA_LST}) + SET_TARGET_PROPERTIES(pnna_share PROPERTIES OUTPUT_NAME "csi_nn2_pnna") + set(PNNA_BUILD_FLAGS -DCSI_BUILD_PNNA) + target_compile_options(pnna_share PRIVATE ${PNNA_BUILD_FLAGS}) + target_include_directories(pnna_share PRIVATE module/nna_ddk_install/include/) + set(PNNA_LINK_DIR ${CMAKE_CURRENT_SOURCE_DIR}/module/nna_ddk_install/light/) + target_link_libraries(pnna_share PRIVATE -L${PNNA_LINK_DIR} -limgdnn -lnnasession) + + install(TARGETS pnna_share DESTINATION lib) + + # build heterogeneous pnna so + LIST(APPEND HLIGHT_LST ${NN2_SRCS} ${REF_SRCS} ${GREF_SRCS} ${PNNA_SRCS}) + add_library(hlight_share SHARED ${HLIGHT_LST}) + SET_TARGET_PROPERTIES(hlight_share PROPERTIES OUTPUT_NAME "csi_nn2_hlight") + set(HLIGHT_BUILD_FLAGS -DCSI_BUILD_REF -DCSI_BUILD_GREF -DCSI_BUILD_PNNA) + target_compile_options(hlight_share PRIVATE ${HLIGHT_BUILD_FLAGS}) + target_include_directories(hlight_share PRIVATE module/nna_ddk_install/include/) + set(PNNA_LINK_DIR ${CMAKE_CURRENT_SOURCE_DIR}/module/nna_ddk_install/light/) + target_link_libraries(hlight_share PRIVATE -L${PNNA_LINK_DIR} -limgdnn -lnnasession) + + install(TARGETS hlight_share DESTINATION lib) +endif() + +if(BUILD_RISCV_ELF) + # build c906 elf a + LIST(APPEND C906_LST ${NN2_SRCS} ${REF_SRCS} ${GREF_SRCS} ${THEAD_RVV_SRCS} ${C906_SRCS}) + add_library(c906_elf_static STATIC ${C906_LST}) + SET_TARGET_PROPERTIES(c906_elf_static PROPERTIES OUTPUT_NAME "csi_nn2_c906_rtos") + set(C906_BUILD_FLAGS -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d -DCSI_BUILD_C906 -DCSI_BUILD_REF -DCSI_BUILD_GREF -DCSI_BUILD_RTOS) + target_compile_options(c906_elf_static PRIVATE ${C906_BUILD_FLAGS}) + + install(TARGETS c906_elf_static DESTINATION lib) + + # build ASP elf a + LIST(APPEND ASP_LST ${NN2_SRCS} ${REF_SRCS} ${GREF_SRCS} ${ASP_SRCS}) + add_library(asp_elf_static STATIC ${ASP_LST}) + SET_TARGET_PROPERTIES(asp_elf_static PROPERTIES OUTPUT_NAME "csi_nn2_asp") + set(ASP_BUILD_FLAGS -march=rv32imafdcp -mabi=ilp32d -DCSI_BUILD_ASP -DCSI_BUILD_REF -DCSI_BUILD_GREF -DCSI_BUILD_RTOS) + target_compile_options(asp_elf_static PRIVATE ${ASP_BUILD_FLAGS}) + + install(TARGETS asp_elf_static DESTINATION lib) +endif() + +if(BUILD_CSKY) + # build openvx so + LIST(APPEND OPENVX_LST ${NN2_SRCS} ${OPENVX_SRCS}) + add_library(openvx_share SHARED ${OPENVX_LST}) + SET_TARGET_PROPERTIES(openvx_share PROPERTIES OUTPUT_NAME "csi_nn2_openvx") + set(OPENVX_BUILD_FLAGS -mcpu=c860v -fPIC -DCSI_BUILD_OPENVX -mhard-float) + target_compile_options(openvx_share PRIVATE ${OPENVX_BUILD_FLAGS}) + set(OPENVX_LINK_DIR ${CMAKE_CURRENT_SOURCE_DIR}/module/acuity-driver/driver/build/sdk/drivers) + target_link_libraries(openvx_share PRIVATE -mcpu=c860v -fPIC -mhard-float -L${OPENVX_LINK_DIR} -lArchModelSw -lNNArchPerf -lOpenVX -lOpenVXU -lCLC -lVSC -lGAL -lNNGPUBinary -lovxlib -lOvx12VXCBinary) + target_include_directories(openvx_share PRIVATE module/acuity-driver/lib/acuity-ovxlib-dev/include/) + target_include_directories(openvx_share PRIVATE module/acuity-driver/driver/build/sdk/include/) + + install(TARGETS openvx_share DESTINATION lib) + + # build c860 a + LIST(APPEND C860_LST ${NN2_SRCS} ${REF_SRCS} ${C860_SRCS}) + add_library(c860_static STATIC ${C860_LST}) + SET_TARGET_PROPERTIES(c860_static PROPERTIES OUTPUT_NAME "csi_nn2_c860") + set(C860_BUILD_FLAGS -mcpu=c860v -DCSI_BUILD_REF) + target_compile_options(c860_static PRIVATE ${C860_BUILD_FLAGS}) + + install(TARGETS c860_static DESTINATION lib) +endif() + +if(BUILD_CSKY_ELF) + # build i805 ref a + LIST(APPEND I805_REF_LST ${NN2_SRCS} ${REF_SRCS} ${I805_REF_SRCS}) + add_library(i805_ref_static STATIC ${I805_REF_LST}) + SET_TARGET_PROPERTIES(i805_ref_static PROPERTIES OUTPUT_NAME "csi_nn2_ref_i805") + set(I805_REF_BUILD_FLAGS -DCSI_BUILD_REF_I805 -DCSI_MATH_DSP -DCSI_BUILD_RTOS -mcpu=i805) + target_compile_options(i805_ref_static PRIVATE ${I805_REF_BUILD_FLAGS}) + target_include_directories(i805_ref_static PRIVATE include/include_xt800) + + install(TARGETS i805_ref_static DESTINATION lib) + + # build i805 a + LIST(APPEND I805_LST ${NN2_SRCS} ${REF_SRCS} ${I805_SRCS}) + add_library(i805_static STATIC ${I805_LST}) + SET_TARGET_PROPERTIES(i805_static PROPERTIES OUTPUT_NAME "csi_nn2_i805") + set(I805_BUILD_FLAGS -DCSI_BUILD_I805 -DCSI_MATH_DSP -DCSI_BUILD_RTOS -mcpu=ck805ef -mhard-float) + target_compile_options(i805_static PRIVATE ${I805_BUILD_FLAGS}) + target_include_directories(i805_static PRIVATE include/include_xt800) + + install(TARGETS i805_static DESTINATION lib) + + # build e804 a + LIST(APPEND E804_LST ${NN2_SRCS} ${REF_SRCS} ${E804_SRCS}) + add_library(e804_static STATIC ${E804_LST}) + SET_TARGET_PROPERTIES(e804_static PROPERTIES OUTPUT_NAME "csi_nn2_e804") + set(E804_BUILD_FLAGS -DCSI_BUILD_E804 -mcpu=e804d -DCSI_BUILD_RTOS -mno-required-attr-fpu-abi) + target_compile_options(e804_static PRIVATE ${E804_BUILD_FLAGS}) + target_include_directories(e804_static PRIVATE include/include_xt800) + + install(TARGETS e804_static DESTINATION lib) +endif() + +install(DIRECTORY "include/." DESTINATION "include" FILES_MATCHING PATTERN "*.h") diff --git a/Makefile b/Makefile index 087bd813..8100ba6a 100644 --- a/Makefile +++ b/Makefile @@ -1,61 +1,52 @@ -CROSS_COMPILE ?= csky-abiv2-elf- -INSTALL_DIR = ../../lib/ -NN2_ROOT := $(shell pwd) +all: nn2_ref_x86 -ifeq ($(GCOV),y) - EXTRA_CFLAGS = -fprofile-arcs -ftest-coverage -g -O0 - LIBS += -fprofile-arcs -ftest-coverage -lgcov -else - EXTRA_CFLAGS = -O2 -g -Werror -DCSI_DEBUG -endif +nn2_c860: + mkdir -p csky_build; cd csky_build; cmake ../ -DBUILD_CSKY=ON -DCMAKE_BUILD_TYPE=Release; make c860_static -j8; cd - -export CROSS_COMPILE INSTALL_DIR +nn2_rvv: + mkdir -p riscv_build; cd riscv_build; cmake ../ -DBUILD_RISCV=ON -DCMAKE_BUILD_TYPE=Release; make rvv_static -j8; cd - +nn2_c906: + mkdir -p riscv_build; cd riscv_build; cmake ../ -DBUILD_RISCV=ON -DCMAKE_BUILD_TYPE=Release; make c906_static -j8; cd - -all: nn2_ref_x86 +nn2_c906_elf: + mkdir -p riscv_elf_build; cd riscv_elf_build; cmake ../ -DBUILD_RISCV_ELF=ON -DCMAKE_BUILD_TYPE=Release; make c906_elf_static -j8; cd - -nn2_c860: - DSP_LIB="libcsi_nn2_c860" CFLAGS="-mcpu=c860v -DCSI_BUILD_REF $(EXTRA_CFLAGS)" \ - CROSS_COMPILE="csky-abiv2-linux-" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_c860 -j8 - cd source/; find . -name *.o | xargs rm; cd - +nn2_asp_elf: + mkdir -p riscv_elf_build; cd riscv_elf_build; cmake ../ -DBUILD_RISCV_ELF=ON -DCMAKE_BUILD_TYPE=Release; make asp_elf_static -j8; cd - -nn2_c906: - DSP_LIB="libcsi_nn2_c906" CFLAGS="-march=rv64gcvxthead -mabi=lp64dv -DCSI_BUILD_C906 -DCSI_BUILD_REF -DCSI_BUILD_GREF $(EXTRA_CFLAGS)" \ - CROSS_COMPILE="riscv64-unknown-linux-gnu-" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_c906 -j8 - cd source/; find . -name *.o | xargs rm; cd - +nn2_c908: + mkdir -p riscv_build; cd riscv_build; cmake ../ -DBUILD_RISCV=ON -DCMAKE_BUILD_TYPE=Release; make c908_static -j8; cd - nn2_ref_x86: - DSP_LIB="libcsi_nn2_ref_x86" CFLAGS="$(EXTRA_CFLAGS) -DCSI_BUILD_REF -fPIC -DCSI_AVX_OPT -mavx -mfma -fopenmp" \ - CROSS_COMPILE="" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_ref -j8 - cd source/; find . -name *.o | xargs rm; cd - - DSP_LIB="libcsi_nn2_ref_x86" CFLAGS="$(EXTRA_CFLAGS) -DCSI_BUILD_REF -fPIC -DCSI_AVX_OPT -mavx -mfma -fopenmp" \ - CROSS_COMPILE="" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_ref nn2_shared -j8 - cd source/; find . -name *.o | xargs rm; cd - + mkdir -p x86_build; cd x86_build; cmake ../ -DBUILD_X86=ON -DCMAKE_BUILD_TYPE=Release; make x86_share -j8; cd - + +nn2_openvx: + mkdir -p csky_build; cd csky_build; cmake ../ -DBUILD_CSKY=ON -DCMAKE_BUILD_TYPE=Release; make openvx_share -j8; cd - -nn2_ref_i805: - DSP_LIB="libcsi_nn2_ref_i805.a" CFLAGS="-DCSI_BUILD_REF_I805 -DCSI_MATH_DSP -mcpu=i805 $(EXTRA_CFLAGS)" \ - CROSS_COMPILE="csky-abiv2-elf-" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_ref_i805 -j8 - cd source/; find . -name *.o | xargs rm; cd - +nn2_pnna: + mkdir -p riscv_build; cd riscv_build; cmake ../ -DBUILD_RISCV=ON -DCMAKE_BUILD_TYPE=Release; make pnna_share -j8; cd - -nn2_e804: - DSP_LIB="libcsi_nn2_e804.a" CFLAGS="-DCSI_BUILD_E804 -mcpu=e804d -mno-required-attr-fpu-abi $(EXTRA_CFLAGS)" \ - CROSS_COMPILE="csky-abiv2-elf-" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_e804 -j8 - cd source/; find . -name *.o | xargs rm; cd - +nn2_pnna_x86: + mkdir -p x86_build; cd x86_build; cmake ../ -DBUILD_X86=ON -DCMAKE_BUILD_TYPE=Release; make pnna_share -j8; cd - -nn2_i805: - DSP_LIB="libcsi_nn2_i805.a" CFLAGS="-DCSI_BUILD_I805 -DCSI_BUILD_REF -DCSI_BUILD_GREF -mcpu=ck805ef -mhard-float $(EXTRA_CFLAGS)" \ - CROSS_COMPILE="csky-abiv2-elf-" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_i805 -j8 - cd source/; find . -name *.o | xargs rm; cd - +nn2_hlight_x86: + mkdir -p x86_build; cd x86_build; cmake ../ -DBUILD_X86=ON -DCMAKE_BUILD_TYPE=Release; make hlight_share -j8; cd - +nn2_hlight: + mkdir -p riscv_build; cd riscv_build; cmake ../ -DBUILD_RISCV=ON -DCMAKE_BUILD_TYPE=Release; make hlight_share -j8; cd - .PHONY: install_nn2 install_nn2: include mkdir -p install_nn2/lib cp include install_nn2 -r - cp lib/libcsi_nn2_* install_nn2/lib -rf + -cp riscv_build/libcsi_nn2_* install_nn2/lib -rf + -cp csky_build/libcsi_nn2_* install_nn2/lib -rf + -cp x86_build/libcsi_nn2_* install_nn2/lib -rf cp version install_nn2/ -rf +clint: + ./script/git-clang-format.sh origin/master clean: - rm lib/* -rf - find . -name *.o | xargs rm -rf + rm lib/* -rf diff --git a/build_script/nn2.objs b/build_script/nn2.objs deleted file mode 100644 index 95f00c87..00000000 --- a/build_script/nn2.objs +++ /dev/null @@ -1,161 +0,0 @@ -objects += ${NN2_ROOT}/source/nn2/elu.o -objects += ${NN2_ROOT}/source/nn2/relu.o -objects += ${NN2_ROOT}/source/nn2/relu1.o -objects += ${NN2_ROOT}/source/nn2/relu6.o -objects += ${NN2_ROOT}/source/nn2/relun.o -objects += ${NN2_ROOT}/source/nn2/prelu.o -objects += ${NN2_ROOT}/source/nn2/leaky_relu.o -objects += ${NN2_ROOT}/source/nn2/softrelu.o -objects += ${NN2_ROOT}/source/nn2/sigmoid.o -objects += ${NN2_ROOT}/source/nn2/hard_sigmoid.o -objects += ${NN2_ROOT}/source/nn2/softplus.o -objects += ${NN2_ROOT}/source/nn2/softsign.o -objects += ${NN2_ROOT}/source/nn2/softmax.o -objects += ${NN2_ROOT}/source/nn2/log_softmax.o -objects += ${NN2_ROOT}/source/nn2/flatten.o -objects += ${NN2_ROOT}/source/nn2/fsmn.o -objects += ${NN2_ROOT}/source/nn2/fullyconnected.o -objects += ${NN2_ROOT}/source/nn2/maxpool.o -objects += ${NN2_ROOT}/source/nn2/maxpool3d.o -objects += ${NN2_ROOT}/source/nn2/averagepool.o -objects += ${NN2_ROOT}/source/nn2/averagepool3d.o -objects += ${NN2_ROOT}/source/nn2/global_averagepool.o -objects += ${NN2_ROOT}/source/nn2/global_maxpool.o -objects += ${NN2_ROOT}/source/nn2/l2pool.o -objects += ${NN2_ROOT}/source/nn2/maxpool2d_locat.o -objects += ${NN2_ROOT}/source/nn2/unpooling.o -objects += ${NN2_ROOT}/source/nn2/lrn.o -objects += ${NN2_ROOT}/source/nn2/l2_normalization.o -objects += ${NN2_ROOT}/source/nn2/batch_normalization.o -objects += ${NN2_ROOT}/source/nn2/convolution.o -objects += ${NN2_ROOT}/source/nn2/convolution_relu.o -objects += ${NN2_ROOT}/source/nn2/convolution_relu6.o -objects += ${NN2_ROOT}/source/nn2/convolution3d.o -objects += ${NN2_ROOT}/source/nn2/deconvolution.o -objects += ${NN2_ROOT}/source/nn2/deconvolution3d.o -objects += ${NN2_ROOT}/source/nn2/proposal.o -objects += ${NN2_ROOT}/source/nn2/psroipooling.o -objects += ${NN2_ROOT}/source/nn2/transpose.o -objects += ${NN2_ROOT}/source/nn2/abs.o -objects += ${NN2_ROOT}/source/nn2/add.o -objects += ${NN2_ROOT}/source/nn2/cos.o -objects += ${NN2_ROOT}/source/nn2/cosh.o -objects += ${NN2_ROOT}/source/nn2/acos.o -objects += ${NN2_ROOT}/source/nn2/acosh.o -objects += ${NN2_ROOT}/source/nn2/div.o -objects += ${NN2_ROOT}/source/nn2/floor.o -objects += ${NN2_ROOT}/source/nn2/floor_divide.o -objects += ${NN2_ROOT}/source/nn2/floor_mod.o -objects += ${NN2_ROOT}/source/nn2/maximum.o -objects += ${NN2_ROOT}/source/nn2/minimum.o -objects += ${NN2_ROOT}/source/nn2/power.o -objects += ${NN2_ROOT}/source/nn2/greater.o -objects += ${NN2_ROOT}/source/nn2/less.o -objects += ${NN2_ROOT}/source/nn2/equal.o -objects += ${NN2_ROOT}/source/nn2/not_equal.o -objects += ${NN2_ROOT}/source/nn2/greater_equal.o -objects += ${NN2_ROOT}/source/nn2/less_equal.o -objects += ${NN2_ROOT}/source/nn2/logical_and.o -objects += ${NN2_ROOT}/source/nn2/logical_or.o -objects += ${NN2_ROOT}/source/nn2/logical_not.o -objects += ${NN2_ROOT}/source/nn2/logical_xor.o -objects += ${NN2_ROOT}/source/nn2/log.o -objects += ${NN2_ROOT}/source/nn2/log1p.o -objects += ${NN2_ROOT}/source/nn2/mul.o -objects += ${NN2_ROOT}/source/nn2/rsqrt.o -objects += ${NN2_ROOT}/source/nn2/select.o -objects += ${NN2_ROOT}/source/nn2/sin.o -objects += ${NN2_ROOT}/source/nn2/asin.o -objects += ${NN2_ROOT}/source/nn2/sinh.o -objects += ${NN2_ROOT}/source/nn2/asinh.o -objects += ${NN2_ROOT}/source/nn2/sqrt.o -objects += ${NN2_ROOT}/source/nn2/square.o -objects += ${NN2_ROOT}/source/nn2/sub.o -objects += ${NN2_ROOT}/source/nn2/matmul.o -objects += ${NN2_ROOT}/source/nn2/mod.o -objects += ${NN2_ROOT}/source/nn2/and.o -objects += ${NN2_ROOT}/source/nn2/xor.o -objects += ${NN2_ROOT}/source/nn2/not.o -objects += ${NN2_ROOT}/source/nn2/or.o -objects += ${NN2_ROOT}/source/nn2/col2im.o -objects += ${NN2_ROOT}/source/nn2/im2col.o -objects += ${NN2_ROOT}/source/nn2/concat.o -objects += ${NN2_ROOT}/source/nn2/pad.o -objects += ${NN2_ROOT}/source/nn2/crop.o -objects += ${NN2_ROOT}/source/nn2/reshape.o -objects += ${NN2_ROOT}/source/nn2/shape.o -objects += ${NN2_ROOT}/source/nn2/tile.o -objects += ${NN2_ROOT}/source/nn2/arange.o -objects += ${NN2_ROOT}/source/nn2/sequence_mask.o -objects += ${NN2_ROOT}/source/nn2/ndarray_size.o -objects += ${NN2_ROOT}/source/nn2/one_hot.o -objects += ${NN2_ROOT}/source/nn2/space_to_batch.o -objects += ${NN2_ROOT}/source/nn2/space_to_batch_nd.o -objects += ${NN2_ROOT}/source/nn2/batch_to_space.o -objects += ${NN2_ROOT}/source/nn2/batch_to_space_nd.o -objects += ${NN2_ROOT}/source/nn2/space_to_depth.o -objects += ${NN2_ROOT}/source/nn2/depth_to_space.o -objects += ${NN2_ROOT}/source/nn2/expand_dims.o -objects += ${NN2_ROOT}/source/nn2/slice.o -objects += ${NN2_ROOT}/source/nn2/reverse.o -objects += ${NN2_ROOT}/source/nn2/stack.o -objects += ${NN2_ROOT}/source/nn2/unstack.o -objects += ${NN2_ROOT}/source/nn2/split.o -objects += ${NN2_ROOT}/source/nn2/where.o -objects += ${NN2_ROOT}/source/nn2/gather.o -objects += ${NN2_ROOT}/source/nn2/gather_nd.o -objects += ${NN2_ROOT}/source/nn2/squeeze.o -objects += ${NN2_ROOT}/source/nn2/tan.o -objects += ${NN2_ROOT}/source/nn2/atan.o -objects += ${NN2_ROOT}/source/nn2/tanh.o -objects += ${NN2_ROOT}/source/nn2/atanh.o -objects += ${NN2_ROOT}/source/nn2/negative.o -objects += ${NN2_ROOT}/source/nn2/ceil.o -objects += ${NN2_ROOT}/source/nn2/sign.o -objects += ${NN2_ROOT}/source/nn2/trunc.o -objects += ${NN2_ROOT}/source/nn2/isnan.o -objects += ${NN2_ROOT}/source/nn2/round.o -objects += ${NN2_ROOT}/source/nn2/exp.o -objects += ${NN2_ROOT}/source/nn2/expm1.o -objects += ${NN2_ROOT}/source/nn2/resize.o -objects += ${NN2_ROOT}/source/nn2/argmax.o -objects += ${NN2_ROOT}/source/nn2/argmin.o -objects += ${NN2_ROOT}/source/nn2/sum.o -objects += ${NN2_ROOT}/source/nn2/mean.o -objects += ${NN2_ROOT}/source/nn2/max.o -objects += ${NN2_ROOT}/source/nn2/min.o -objects += ${NN2_ROOT}/source/nn2/all.o -objects += ${NN2_ROOT}/source/nn2/any.o -objects += ${NN2_ROOT}/source/nn2/prod.o -objects += ${NN2_ROOT}/source/nn2/segment_max.o -objects += ${NN2_ROOT}/source/nn2/segment_min.o -objects += ${NN2_ROOT}/source/nn2/segment_sum.o -objects += ${NN2_ROOT}/source/nn2/segment_mean.o -objects += ${NN2_ROOT}/source/nn2/segment_prod.o -objects += ${NN2_ROOT}/source/nn2/threshold_relu.o -objects += ${NN2_ROOT}/source/nn2/batch_normalization.o -objects += ${NN2_ROOT}/source/nn2/yuv_rgb_scale.o -objects += ${NN2_ROOT}/source/nn2/roialign.o -objects += ${NN2_ROOT}/source/nn2/roipool.o -objects += ${NN2_ROOT}/source/nn2/reorg.o -objects += ${NN2_ROOT}/source/nn2/cumsum.o -objects += ${NN2_ROOT}/source/nn2/cumprod.o -objects += ${NN2_ROOT}/source/nn2/reduce_max.o -objects += ${NN2_ROOT}/source/nn2/reduce_min.o -objects += ${NN2_ROOT}/source/nn2/reduce_sum.o -objects += ${NN2_ROOT}/source/nn2/reduce_prod.o -objects += ${NN2_ROOT}/source/nn2/reduce_mean.o -objects += ${NN2_ROOT}/source/nn2/reduce_logsumexp.o -objects += ${NN2_ROOT}/source/nn2/broadcast_to.o -objects += ${NN2_ROOT}/source/nn2/clip.o -objects += ${NN2_ROOT}/source/nn2/strided_slice.o -objects += ${NN2_ROOT}/source/nn2/topk.o -objects += ${NN2_ROOT}/source/nn2/non_max_suppression.o -objects += ${NN2_ROOT}/source/nn2/shuffle_channel.o -objects += ${NN2_ROOT}/source/nn2/erf.o -objects += ${NN2_ROOT}/source/nn2/utils.o -objects += ${NN2_ROOT}/source/nn2/setup.o -objects += ${NN2_ROOT}/source/nn2/node.o -objects += ${NN2_ROOT}/source/nn2/scatter.o -objects += ${NN2_ROOT}/source/nn2/memory.o -objects += ${NN2_ROOT}/source/nn2/debug.o diff --git a/build_script/nn2_c860/Makefile b/build_script/nn2_c860/Makefile deleted file mode 100644 index d4e95315..00000000 --- a/build_script/nn2_c860/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -CROSS_COMPILE ?= csky-abiv2-linux- -CC = $(CROSS_COMPILE)gcc -AR = $(CROSS_COMPILE)ar - -CFLAGS += -ffunction-sections -fdata-sections -Wl,--gc-sections -mhard-float -INCLUDE = -I../../include - -all: nn2 - -include nn2.objs -include ../nn2.objs -include ../nn2_ref/nn2.objs - -nn2: $(objects) - $(AR) rcs $(INSTALL_DIR)/$(DSP_LIB).a $^ - -nn2_shared: $(objects) - $(CC) -shared -o $(INSTALL_DIR)/$(DSP_LIB).so $^ - -%.o: %.c - $(CC) -c $(CFLAGS) $(INCLUDE) $(MACRO) $(ENDIAN) $< -o $@ -%.o: %.S - $(CC) -c $(CFLAGS) $(INCLUDE) $(MACRO) $(ENDIAN) $< -o $@ diff --git a/build_script/nn2_c860/nn2.objs b/build_script/nn2_c860/nn2.objs deleted file mode 100644 index a168cfb9..00000000 --- a/build_script/nn2_c860/nn2.objs +++ /dev/null @@ -1 +0,0 @@ -objects += ${NN2_ROOT}/source/c860_opt/utils.o diff --git a/build_script/nn2_c906/Makefile b/build_script/nn2_c906/Makefile deleted file mode 100644 index f6237417..00000000 --- a/build_script/nn2_c906/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -CROSS_COMPILE ?= riscv64-unknown-linux-gnu- -CC = $(CROSS_COMPILE)gcc -AR = $(CROSS_COMPILE)ar - -CFLAGS += -ffunction-sections -fdata-sections -Wl,--gc-sections -INCLUDE = -I../../include - -all: nn2_c906 - -include nn2_c906.objs ../nn2.objs ../nn2_ref/nn2.objs ../nn2_gref/nn2.objs - -nn2_c906: $(objects) - $(AR) rcs $(INSTALL_DIR)/$(DSP_LIB).a $^ - -nn2_shared: $(objects) - $(CC) -shared -o $(INSTALL_DIR)/$(DSP_LIB).so $^ - -%.o: %.c - $(CC) -c $(CFLAGS) $(INCLUDE) $(MACRO) $(ENDIAN) $< -o $@ -%.o: %.S - $(CC) -c $(CFLAGS) $(INCLUDE) $(MACRO) $(ENDIAN) $< -o $@ diff --git a/build_script/nn2_c906/nn2_c906.objs b/build_script/nn2_c906/nn2_c906.objs deleted file mode 100644 index a6519aca..00000000 --- a/build_script/nn2_c906/nn2_c906.objs +++ /dev/null @@ -1,47 +0,0 @@ -objects += ${NN2_ROOT}/source/c906_opt/abs.o - -objects += ${NN2_ROOT}/source/c906_opt/add.o -objects += ${NN2_ROOT}/source/c906_opt/sub.o -objects += ${NN2_ROOT}/source/c906_opt/mul.o -objects += ${NN2_ROOT}/source/c906_opt/minimum.o - -objects += ${NN2_ROOT}/source/c906_opt/broadcast_to.o -objects += ${NN2_ROOT}/source/c906_opt/clip.o -objects += ${NN2_ROOT}/source/c906_opt/concat.o -objects += ${NN2_ROOT}/source/c906_opt/split.o -objects += ${NN2_ROOT}/source/c906_opt/convolution.o -objects += ${NN2_ROOT}/source/c906_opt/convolution_relu.o - -objects += ${NN2_ROOT}/source/c906_opt/fullyconnected.o -objects += ${NN2_ROOT}/source/c906_opt/maxpool.o -objects += ${NN2_ROOT}/source/c906_opt/global_maxpool.o -objects += ${NN2_ROOT}/source/c906_opt/avgpool.o -objects += ${NN2_ROOT}/source/c906_opt/global_avgpool.o - -objects += ${NN2_ROOT}/source/c906_opt/pad.o -objects += ${NN2_ROOT}/source/c906_opt/prelu.o -objects += ${NN2_ROOT}/source/c906_opt/relu.o -objects += ${NN2_ROOT}/source/c906_opt/relu1.o -objects += ${NN2_ROOT}/source/c906_opt/relu6.o - -objects += ${NN2_ROOT}/source/c906_opt/leaky_relu.o -objects += ${NN2_ROOT}/source/c906_opt/utils.o - -objects += ${NN2_ROOT}/source/c906_opt/setup.o - -objects += ${NN2_ROOT}/source/c906_opt/sgemm.o -objects += ${NN2_ROOT}/source/c906_opt/gemm_fp16.o -objects += ${NN2_ROOT}/source/c906_opt/convolution_1x1.o -objects += ${NN2_ROOT}/source/c906_opt/convolution_1x1_fp16.o -objects += ${NN2_ROOT}/source/c906_opt/convolution_3x3.o -objects += ${NN2_ROOT}/source/c906_opt/convolution_3x3_fp16.o -objects += ${NN2_ROOT}/source/c906_opt/convolution_sgemm.o -objects += ${NN2_ROOT}/source/c906_opt/convolution_gemm_fp16.o -objects += ${NN2_ROOT}/source/c906_opt/depthwise_convolution_3x3.o -objects += ${NN2_ROOT}/source/c906_opt/depthwise_convolution_3x3_fp16.o -objects += ${NN2_ROOT}/source/c906_opt/depthwise_convolution_3x3_pack4.o -objects += ${NN2_ROOT}/source/c906_opt/depthwise_convolution_3x3_pack8_fp16.o -objects += ${NN2_ROOT}/source/c906_opt/depthwise_convolution_5x5.o -objects += ${NN2_ROOT}/source/c906_opt/depthwise_convolution_relu_3x3.o -objects += ${NN2_ROOT}/source/c906_opt/depthwise_convolution_relu_3x3_pack4.o -objects += ${NN2_ROOT}/source/c906_opt/depthwise_convolution_relu_5x5.o diff --git a/build_script/nn2_e804/Makefile b/build_script/nn2_e804/Makefile deleted file mode 100644 index 7b06ee9b..00000000 --- a/build_script/nn2_e804/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -CROSS_COMPILE ?= csky-abiv2-elf- -CC = $(CROSS_COMPILE)gcc -AR = $(CROSS_COMPILE)ar - -CFLAGS += -ffunction-sections -fdata-sections -Wl,--gc-sections -INCLUDE = -I../../include/include_xt800/dsp_include/ -I../../include/include_xt800/csi_core_include/ -I../../include/include_xt800/nn_include -INCLUDE += -I../../include - -all: nn2_e804 - -include nn2_dsp2.objs -include ../nn2.objs -include ../nn2_ref/nn2.objs - -nn2_e804: $(objects) - $(AR) rcs $(INSTALL_DIR)/$(DSP_LIB) $^ - -%.o: %.c - $(CC) -c $(CFLAGS) $(INCLUDE) $(MACRO) $(ENDIAN) $< -o $@ -%.o: %.S - $(CC) -c $(CFLAGS) $(INCLUDE) $(MACRO) $(ENDIAN) $< -o $@ diff --git a/build_script/nn2_e804/nn2_dsp2.objs b/build_script/nn2_e804/nn2_dsp2.objs deleted file mode 100644 index 3284db51..00000000 --- a/build_script/nn2_e804/nn2_dsp2.objs +++ /dev/null @@ -1,38 +0,0 @@ -#activation -objects += ${NN2_ROOT}/source/e804_opt/activation/csi_xt800p_nn_activations_q15.o -objects += ${NN2_ROOT}/source/e804_opt/activation/csi_xt800p_nn_activations_q7.o -objects += ${NN2_ROOT}/source/e804_opt/activation/csi_xt800p_relu_q15.o -objects += ${NN2_ROOT}/source/e804_opt/activation/csi_xt800p_relu_q7.o - -#convolution -objects += ${NN2_ROOT}/source/e804_opt/convolution/csi_xt800p_convolve_1x1_HWC_q7_fast.o -objects += ${NN2_ROOT}/source/e804_opt/convolution/csi_xt800p_convolve_HWC_q15_basic.o -objects += ${NN2_ROOT}/source/e804_opt/convolution/csi_xt800p_convolve_HWC_q7_RGB.o -objects += ${NN2_ROOT}/source/e804_opt/convolution/csi_xt800p_convolve_HWC_q7_basic.o -objects += ${NN2_ROOT}/source/e804_opt/convolution/csi_xt800p_depthwise_separable_conv_HWC_q7.o - -#fully-connect -objects += ${NN2_ROOT}/source/e804_opt/fully-connect/csi_xt800p_fully_connected_mat_q7_vec_q15.o -objects += ${NN2_ROOT}/source/e804_opt/fully-connect/csi_xt800p_fully_connected_q15.o -objects += ${NN2_ROOT}/source/e804_opt/fully-connect/csi_xt800p_fully_connected_q7.o - -#nn-support -objects += ${NN2_ROOT}/source/e804_opt/nn-support/csi_xt800p_nntables.o - -#pooling -objects += ${NN2_ROOT}/source/e804_opt/pooling/csi_xt800p_pool_q7_HWC.o - -#softmax -objects += ${NN2_ROOT}/source/e804_opt/softmax/csi_xt800p_softmax_q15.o -objects += ${NN2_ROOT}/source/e804_opt/softmax/csi_xt800p_softmax_q7.o - -objects += ${NN2_ROOT}/source/e804_opt/convolution.o -objects += ${NN2_ROOT}/source/e804_opt/fullyconnected.o -objects += ${NN2_ROOT}/source/e804_opt/avgpool.o -objects += ${NN2_ROOT}/source/e804_opt/maxpool.o -objects += ${NN2_ROOT}/source/e804_opt/softmax.o -objects += ${NN2_ROOT}/source/e804_opt/relu.o -objects += ${NN2_ROOT}/source/e804_opt/sigmoid.o -objects += ${NN2_ROOT}/source/e804_opt/tanh.o - -objects += ${NN2_ROOT}/source/e804_opt/setup.o diff --git a/build_script/nn2_gref/Makefile b/build_script/nn2_gref/Makefile deleted file mode 100644 index 3d44d008..00000000 --- a/build_script/nn2_gref/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -CROSS_COMPILE ?= csky-abiv2-linux- -CC = $(CROSS_COMPILE)gcc -AR = $(CROSS_COMPILE)ar - -CFLAGS += -ffunction-sections -fdata-sections -Wl,--gc-sections -INCLUDE = -I../../include - -all: nn2 - -include nn2.objs -include ../nn2.objs -include ../nn2_ref/nn2.objs - -nn2: $(objects) - $(AR) rcs $(INSTALL_DIR)/$(DSP_LIB) $^ - -nn2_shared: $(objects) - $(CC) -shared -o $(INSTALL_DIR)/$(DSP_LIB) $^ - -%.o: %.c - $(CC) -c $(CFLAGS) $(INCLUDE) $(MACRO) $(ENDIAN) $< -o $@ -%.o: %.S - $(CC) -c $(CFLAGS) $(INCLUDE) $(MACRO) $(ENDIAN) $< -o $@ diff --git a/build_script/nn2_gref/nn2.objs b/build_script/nn2_gref/nn2.objs deleted file mode 100644 index 1cd45faa..00000000 --- a/build_script/nn2_gref/nn2.objs +++ /dev/null @@ -1,155 +0,0 @@ -objects += ${NN2_ROOT}/source/graph_ref/any.o -objects += ${NN2_ROOT}/source/graph_ref/all.o -objects += ${NN2_ROOT}/source/graph_ref/elu.o -objects += ${NN2_ROOT}/source/graph_ref/relu.o -objects += ${NN2_ROOT}/source/graph_ref/relu1.o -objects += ${NN2_ROOT}/source/graph_ref/relu6.o -objects += ${NN2_ROOT}/source/graph_ref/relun.o -objects += ${NN2_ROOT}/source/graph_ref/prelu.o -objects += ${NN2_ROOT}/source/graph_ref/leaky_relu.o -objects += ${NN2_ROOT}/source/graph_ref/softrelu.o -objects += ${NN2_ROOT}/source/graph_ref/sigmoid.o -objects += ${NN2_ROOT}/source/graph_ref/hard_sigmoid.o -objects += ${NN2_ROOT}/source/graph_ref/softplus.o -objects += ${NN2_ROOT}/source/graph_ref/softsign.o -objects += ${NN2_ROOT}/source/graph_ref/softmax.o -objects += ${NN2_ROOT}/source/graph_ref/log_softmax.o -objects += ${NN2_ROOT}/source/graph_ref/flatten.o -objects += ${NN2_ROOT}/source/graph_ref/fullyconnected.o -objects += ${NN2_ROOT}/source/graph_ref/maxpool.o -objects += ${NN2_ROOT}/source/graph_ref/maxpool3d.o -objects += ${NN2_ROOT}/source/graph_ref/avgpool.o -objects += ${NN2_ROOT}/source/graph_ref/avgpool3d.o -objects += ${NN2_ROOT}/source/graph_ref/global_averagepool.o -objects += ${NN2_ROOT}/source/graph_ref/global_maxpool.o -objects += ${NN2_ROOT}/source/graph_ref/l2pool.o -objects += ${NN2_ROOT}/source/graph_ref/maxpool2d_locat.o -objects += ${NN2_ROOT}/source/graph_ref/unpooling.o -objects += ${NN2_ROOT}/source/graph_ref/lrn.o -objects += ${NN2_ROOT}/source/graph_ref/l2_normalization.o -objects += ${NN2_ROOT}/source/graph_ref/bn.o -objects += ${NN2_ROOT}/source/graph_ref/convolution.o -objects += ${NN2_ROOT}/source/graph_ref/convolution3d.o -objects += ${NN2_ROOT}/source/graph_ref/deconvolution.o -objects += ${NN2_ROOT}/source/graph_ref/deconvolution3d.o -objects += ${NN2_ROOT}/source/graph_ref/proposal.o -objects += ${NN2_ROOT}/source/graph_ref/psroipooling.o -objects += ${NN2_ROOT}/source/graph_ref/transpose.o -objects += ${NN2_ROOT}/source/graph_ref/abs.o -objects += ${NN2_ROOT}/source/graph_ref/add.o -objects += ${NN2_ROOT}/source/graph_ref/cos.o -objects += ${NN2_ROOT}/source/graph_ref/cosh.o -objects += ${NN2_ROOT}/source/graph_ref/acos.o -objects += ${NN2_ROOT}/source/graph_ref/acosh.o -objects += ${NN2_ROOT}/source/graph_ref/div.o -objects += ${NN2_ROOT}/source/graph_ref/floor.o -objects += ${NN2_ROOT}/source/graph_ref/floor_divide.o -objects += ${NN2_ROOT}/source/graph_ref/floor_mod.o -objects += ${NN2_ROOT}/source/graph_ref/fsmn.o -objects += ${NN2_ROOT}/source/graph_ref/maximum.o -objects += ${NN2_ROOT}/source/graph_ref/minimum.o -objects += ${NN2_ROOT}/source/graph_ref/power.o -objects += ${NN2_ROOT}/source/graph_ref/greater.o -objects += ${NN2_ROOT}/source/graph_ref/less.o -objects += ${NN2_ROOT}/source/graph_ref/equal.o -objects += ${NN2_ROOT}/source/graph_ref/not_equal.o -objects += ${NN2_ROOT}/source/graph_ref/greater_equal.o -objects += ${NN2_ROOT}/source/graph_ref/less_equal.o -objects += ${NN2_ROOT}/source/graph_ref/logical_and.o -objects += ${NN2_ROOT}/source/graph_ref/logical_or.o -objects += ${NN2_ROOT}/source/graph_ref/logical_not.o -objects += ${NN2_ROOT}/source/graph_ref/logical_xor.o -objects += ${NN2_ROOT}/source/graph_ref/log.o -objects += ${NN2_ROOT}/source/graph_ref/log1p.o -objects += ${NN2_ROOT}/source/graph_ref/mul.o -objects += ${NN2_ROOT}/source/graph_ref/rsqrt.o -objects += ${NN2_ROOT}/source/graph_ref/select.o -objects += ${NN2_ROOT}/source/graph_ref/sin.o -objects += ${NN2_ROOT}/source/graph_ref/asin.o -objects += ${NN2_ROOT}/source/graph_ref/sinh.o -objects += ${NN2_ROOT}/source/graph_ref/asinh.o -objects += ${NN2_ROOT}/source/graph_ref/sqrt.o -objects += ${NN2_ROOT}/source/graph_ref/square.o -objects += ${NN2_ROOT}/source/graph_ref/sub.o -objects += ${NN2_ROOT}/source/graph_ref/matmul.o -objects += ${NN2_ROOT}/source/graph_ref/mod.o -objects += ${NN2_ROOT}/source/graph_ref/and.o -objects += ${NN2_ROOT}/source/graph_ref/xor.o -objects += ${NN2_ROOT}/source/graph_ref/not.o -objects += ${NN2_ROOT}/source/graph_ref/or.o -objects += ${NN2_ROOT}/source/graph_ref/col2im.o -objects += ${NN2_ROOT}/source/graph_ref/im2col.o -objects += ${NN2_ROOT}/source/graph_ref/concat.o -objects += ${NN2_ROOT}/source/graph_ref/crop.o -objects += ${NN2_ROOT}/source/graph_ref/pad.o -objects += ${NN2_ROOT}/source/graph_ref/reshape.o -objects += ${NN2_ROOT}/source/graph_ref/shape.o -objects += ${NN2_ROOT}/source/graph_ref/tile.o -objects += ${NN2_ROOT}/source/graph_ref/arange.o -objects += ${NN2_ROOT}/source/graph_ref/ndarray_size.o -objects += ${NN2_ROOT}/source/graph_ref/space_to_batch.o -objects += ${NN2_ROOT}/source/graph_ref/batch_to_space.o -objects += ${NN2_ROOT}/source/graph_ref/batch_to_space_nd.o -objects += ${NN2_ROOT}/source/graph_ref/space_to_depth.o -objects += ${NN2_ROOT}/source/graph_ref/depth_to_space.o -objects += ${NN2_ROOT}/source/graph_ref/expand_dims.o -objects += ${NN2_ROOT}/source/graph_ref/slice.o -objects += ${NN2_ROOT}/source/graph_ref/reverse.o -objects += ${NN2_ROOT}/source/graph_ref/stack.o -objects += ${NN2_ROOT}/source/graph_ref/unstack.o -objects += ${NN2_ROOT}/source/graph_ref/split.o -objects += ${NN2_ROOT}/source/graph_ref/gather.o -objects += ${NN2_ROOT}/source/graph_ref/gather_nd.o -objects += ${NN2_ROOT}/source/graph_ref/squeeze.o -objects += ${NN2_ROOT}/source/graph_ref/tan.o -objects += ${NN2_ROOT}/source/graph_ref/atan.o -objects += ${NN2_ROOT}/source/graph_ref/tanh.o -objects += ${NN2_ROOT}/source/graph_ref/atanh.o -objects += ${NN2_ROOT}/source/graph_ref/negative.o -objects += ${NN2_ROOT}/source/graph_ref/ceil.o -objects += ${NN2_ROOT}/source/graph_ref/sign.o -objects += ${NN2_ROOT}/source/graph_ref/trunc.o -objects += ${NN2_ROOT}/source/graph_ref/isnan.o -objects += ${NN2_ROOT}/source/graph_ref/round.o -objects += ${NN2_ROOT}/source/graph_ref/exp.o -objects += ${NN2_ROOT}/source/graph_ref/expm1.o -objects += ${NN2_ROOT}/source/graph_ref/resize.o -objects += ${NN2_ROOT}/source/graph_ref/argmax.o -objects += ${NN2_ROOT}/source/graph_ref/argmin.o -objects += ${NN2_ROOT}/source/graph_ref/sum.o -objects += ${NN2_ROOT}/source/graph_ref/mean.o -objects += ${NN2_ROOT}/source/graph_ref/max.o -objects += ${NN2_ROOT}/source/graph_ref/min.o -objects += ${NN2_ROOT}/source/graph_ref/prod.o -objects += ${NN2_ROOT}/source/graph_ref/segment_max.o -objects += ${NN2_ROOT}/source/graph_ref/segment_min.o -objects += ${NN2_ROOT}/source/graph_ref/segment_sum.o -objects += ${NN2_ROOT}/source/graph_ref/segment_mean.o -objects += ${NN2_ROOT}/source/graph_ref/segment_prod.o -objects += ${NN2_ROOT}/source/graph_ref/threshold_relu.o -objects += ${NN2_ROOT}/source/graph_ref/yuv_rgb_scale.o -objects += ${NN2_ROOT}/source/graph_ref/roialign.o -objects += ${NN2_ROOT}/source/graph_ref/roipool.o -objects += ${NN2_ROOT}/source/graph_ref/cumsum.o -objects += ${NN2_ROOT}/source/graph_ref/cumprod.o -objects += ${NN2_ROOT}/source/graph_ref/reduce_max.o -objects += ${NN2_ROOT}/source/graph_ref/reduce_min.o -objects += ${NN2_ROOT}/source/graph_ref/reduce_sum.o -objects += ${NN2_ROOT}/source/graph_ref/reduce_prod.o -objects += ${NN2_ROOT}/source/graph_ref/reduce_mean.o -objects += ${NN2_ROOT}/source/graph_ref/reduce_logsumexp.o -objects += ${NN2_ROOT}/source/graph_ref/broadcast_to.o -objects += ${NN2_ROOT}/source/graph_ref/clip.o -objects += ${NN2_ROOT}/source/graph_ref/strided_slice.o -objects += ${NN2_ROOT}/source/graph_ref/topk.o -objects += ${NN2_ROOT}/source/graph_ref/non_max_suppression.o -objects += ${NN2_ROOT}/source/graph_ref/shuffle_channel.o -objects += ${NN2_ROOT}/source/graph_ref/erf.o -objects += ${NN2_ROOT}/source/graph_ref/utils.o -objects += ${NN2_ROOT}/source/graph_ref/setup.o -objects += ${NN2_ROOT}/source/graph_ref/scatter.o -objects += ${NN2_ROOT}/source/graph_ref/reorg.o -objects += ${NN2_ROOT}/source/graph_ref/sequence_mask.o -objects += ${NN2_ROOT}/source/graph_ref/where.o -objects += ${NN2_ROOT}/source/graph_ref/space_to_batch_nd.o -objects += ${NN2_ROOT}/source/graph_ref/subgraph.o \ No newline at end of file diff --git a/build_script/nn2_i805/Makefile b/build_script/nn2_i805/Makefile deleted file mode 100644 index 1bcd80be..00000000 --- a/build_script/nn2_i805/Makefile +++ /dev/null @@ -1,22 +0,0 @@ -CROSS_COMPILE ?= csky-abiv2-elf- -CC = $(CROSS_COMPILE)gcc -AR = $(CROSS_COMPILE)ar - -CFLAGS += -ffunction-sections -fdata-sections -Wl,--gc-sections -INCLUDE = -I../../include/include_xt800/dsp_include/ -I../../include/include_xt800/csi_core_include/ -I../../include/include_xt800/nn_include -INCLUDE += -I../../include - -all: nn2_i805 - -include nn2_vdsp2.objs -include ../nn2.objs -include ../nn2_ref/nn2.objs -include ../nn2_gref/nn2.objs - -nn2_i805: $(objects) - $(AR) rcs $(INSTALL_DIR)/$(DSP_LIB) $^ - -%.o: %.c - $(CC) -c $(CFLAGS) $(INCLUDE) $(MACRO) $(ENDIAN) $< -o $@ -%.o: %.S - $(CC) -c $(CFLAGS) $(INCLUDE) $(MACRO) $(ENDIAN) $< -o $@ diff --git a/build_script/nn2_i805/nn2_vdsp2.objs b/build_script/nn2_i805/nn2_vdsp2.objs deleted file mode 100644 index 9ca2a0bb..00000000 --- a/build_script/nn2_i805/nn2_vdsp2.objs +++ /dev/null @@ -1,72 +0,0 @@ -#activation -#objects += ${NN2_ROOT}/source/i805_opt/activation/csi_xt800v_nn_activations_q15.o -objects += ${NN2_ROOT}/source/i805_opt/activation/csi_xt800v_nn_activations_q15_fast.o -#objects += ${NN2_ROOT}/source/i805_opt/activation/csi_xt800v_nn_activations_q7.o -objects += ${NN2_ROOT}/source/i805_opt/activation/csi_xt800v_nn_activations_q7_fast.o -objects += ${NN2_ROOT}/source/i805_opt/activation/csi_xt800v_relu_q15.o -objects += ${NN2_ROOT}/source/i805_opt/activation/csi_xt800v_relu_q7.o -objects += ${NN2_ROOT}/source/i805_opt/activation/csi_i805_relu_8.o -objects += ${NN2_ROOT}/source/i805_opt/activation/csi_i805_relu6_8.o -objects += ${NN2_ROOT}/source/i805_opt/activation/csi_i805_clip_8.o - -#convolution -objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_xt800v_convolve_1x1_HWC_q7_fast.o -objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q15_basic.o -objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q7_RGB.o -objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q7_basic.o -objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q7_fast_nonsquare.o -objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_xt800v_depthwise_separable_conv_HWC_q7.o -objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_xt800v_depthwise_separable_conv_HWC_q7_nonsquare.o -objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_i805_convolution_1x1_8.o -objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_i805_convolution_8.o -objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_i805_depthwise_convolution_8.o -#objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_i805_depthwise_convolution_3x3_8.o - - -#fully-connect -objects += ${NN2_ROOT}/source/i805_opt/fully-connect/csi_xt800v_fully_connected_mat_q7_vec_q15.o -objects += ${NN2_ROOT}/source/i805_opt/fully-connect/csi_xt800v_fully_connected_q15.o -objects += ${NN2_ROOT}/source/i805_opt/fully-connect/csi_xt800v_fully_connected_q7x4.o -#objects += ${NN2_ROOT}/source/i805_opt/fully-connect/csi_xt800v_fully_connected_q7x16.o -objects += ${NN2_ROOT}/source/i805_opt/fully-connect/csi_i805_fullyconnected_8.o - -#nn-support -objects += ${NN2_ROOT}/source/i805_opt/nn-support/csi_xt800v_nntables.o - -#pooling -objects += ${NN2_ROOT}/source/i805_opt/pooling/csi_xt800v_pool_q7_HWC.o -objects += ${NN2_ROOT}/source/i805_opt/pooling/csi_xt800v_avepool_q7_HWC_nonsquare.o -objects += ${NN2_ROOT}/source/i805_opt/pooling/csi_i805_maxpool_8.o - -#softmax -objects += ${NN2_ROOT}/source/i805_opt/softmax/csi_xt800v_softmax_q15.o -objects += ${NN2_ROOT}/source/i805_opt/softmax/csi_xt800v_softmax_q7.o -#objects += ${NN2_ROOT}/source/i805_opt/softmax/csi_i805_softmax_8.o - -#gemm -#objects += ${NN2_ROOT}/source/i805_opt/gemm/csi_i805_gemm_4x4_8.o -#objects += ${NN2_ROOT}/source/i805_opt/gemm/csi_i805_gemm_4x16_8.o -objects += ${NN2_ROOT}/source/i805_opt/gemm/csi_i805_vec_mat_mult_8.o -objects += ${NN2_ROOT}/source/i805_opt/gemm/csi_i805_mat_mult_nt_t_8.o - -#basic_math_func -objects += ${NN2_ROOT}/source/i805_opt/basic_math/csi_i805_elementwise_add_8.o -objects += ${NN2_ROOT}/source/i805_opt/basic_math/csi_i805_elementwise_mul_8.o - -#reshape -objects += ${NN2_ROOT}/source/i805_opt/reshape/csi_i805_reshape_8.o - -objects += ${NN2_ROOT}/source/i805_opt/convolution.o -objects += ${NN2_ROOT}/source/i805_opt/fullyconnected.o -objects += ${NN2_ROOT}/source/i805_opt/add.o -objects += ${NN2_ROOT}/source/i805_opt/avgpool.o -objects += ${NN2_ROOT}/source/i805_opt/maxpool.o -objects += ${NN2_ROOT}/source/i805_opt/mul.o -objects += ${NN2_ROOT}/source/i805_opt/softmax.o -objects += ${NN2_ROOT}/source/i805_opt/relu.o -objects += ${NN2_ROOT}/source/i805_opt/relu6.o -objects += ${NN2_ROOT}/source/i805_opt/clip.o -objects += ${NN2_ROOT}/source/i805_opt/reshape.o -objects += ${NN2_ROOT}/source/i805_opt/sigmoid.o -objects += ${NN2_ROOT}/source/i805_opt/tanh.o -objects += ${NN2_ROOT}/source/i805_opt/setup.o diff --git a/build_script/nn2_ref/Makefile b/build_script/nn2_ref/Makefile deleted file mode 100644 index 477468dc..00000000 --- a/build_script/nn2_ref/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -CROSS_COMPILE ?= csky-abiv2-linux- -CC = $(CROSS_COMPILE)gcc -AR = $(CROSS_COMPILE)ar - -CFLAGS += -ffunction-sections -fdata-sections -Wl,--gc-sections -mhard-float -INCLUDE = -I../../include -I../../module/nna_ddk_install/include/ - -all: nn2 - -include nn2.objs -include ../nn2.objs - -nn2: $(objects) - $(AR) rcs $(INSTALL_DIR)/$(DSP_LIB).a $^ - -nn2_shared: $(objects) - $(CC) -shared -o $(INSTALL_DIR)/$(DSP_LIB).so $^ - -%.o: %.c - $(CC) -c $(CFLAGS) $(INCLUDE) $(MACRO) $(ENDIAN) $< -o $@ -%.o: %.cpp - $(CXX) -c $(CFLAGS) $(INCLUDE) $(MACRO) $(ENDIAN) $< -o $@ -%.o: %.S - $(CC) -c $(CFLAGS) $(INCLUDE) $(MACRO) $(ENDIAN) $< -o $@ diff --git a/build_script/nn2_ref/nn2.objs b/build_script/nn2_ref/nn2.objs deleted file mode 100644 index bace7160..00000000 --- a/build_script/nn2_ref/nn2.objs +++ /dev/null @@ -1,151 +0,0 @@ -objects += ${NN2_ROOT}/source/reference/elu.o -objects += ${NN2_ROOT}/source/reference/relu.o -objects += ${NN2_ROOT}/source/reference/relu1.o -objects += ${NN2_ROOT}/source/reference/relu6.o -objects += ${NN2_ROOT}/source/reference/relun.o -objects += ${NN2_ROOT}/source/reference/prelu.o -objects += ${NN2_ROOT}/source/reference/leaky_relu.o -objects += ${NN2_ROOT}/source/reference/softrelu.o -objects += ${NN2_ROOT}/source/reference/sigmoid.o -objects += ${NN2_ROOT}/source/reference/hard_sigmoid.o -objects += ${NN2_ROOT}/source/reference/softplus.o -objects += ${NN2_ROOT}/source/reference/softsign.o -objects += ${NN2_ROOT}/source/reference/softmax.o -objects += ${NN2_ROOT}/source/reference/log_softmax.o -objects += ${NN2_ROOT}/source/reference/flatten.o -objects += ${NN2_ROOT}/source/reference/fsmn.o -objects += ${NN2_ROOT}/source/reference/fullyconnected.o -objects += ${NN2_ROOT}/source/reference/maxpool.o -objects += ${NN2_ROOT}/source/reference/maxpool3d.o -objects += ${NN2_ROOT}/source/reference/averagepool.o -objects += ${NN2_ROOT}/source/reference/averagepool3d.o -objects += ${NN2_ROOT}/source/reference/global_averagepool.o -objects += ${NN2_ROOT}/source/reference/global_maxpool.o -objects += ${NN2_ROOT}/source/reference/l2pool.o -objects += ${NN2_ROOT}/source/reference/maxpool2d_locat.o -objects += ${NN2_ROOT}/source/reference/unpooling.o -objects += ${NN2_ROOT}/source/reference/lrn.o -objects += ${NN2_ROOT}/source/reference/l2_normalization.o -objects += ${NN2_ROOT}/source/reference/batch_normalization.o -objects += ${NN2_ROOT}/source/reference/convolution.o -objects += ${NN2_ROOT}/source/reference/convolution_relu.o -objects += ${NN2_ROOT}/source/reference/convolution_relu6.o -objects += ${NN2_ROOT}/source/reference/convolution_channel.o -objects += ${NN2_ROOT}/source/reference/convolution3d.o -objects += ${NN2_ROOT}/source/reference/deconvolution.o -objects += ${NN2_ROOT}/source/reference/deconvolution3d.o -objects += ${NN2_ROOT}/source/reference/proposal.o -objects += ${NN2_ROOT}/source/reference/psroipooling.o -objects += ${NN2_ROOT}/source/reference/transpose.o -objects += ${NN2_ROOT}/source/reference/abs.o -objects += ${NN2_ROOT}/source/reference/add.o -objects += ${NN2_ROOT}/source/reference/cos.o -objects += ${NN2_ROOT}/source/reference/cosh.o -objects += ${NN2_ROOT}/source/reference/acos.o -objects += ${NN2_ROOT}/source/reference/acosh.o -objects += ${NN2_ROOT}/source/reference/div.o -objects += ${NN2_ROOT}/source/reference/floor.o -objects += ${NN2_ROOT}/source/reference/floor_divide.o -objects += ${NN2_ROOT}/source/reference/floor_mod.o -objects += ${NN2_ROOT}/source/reference/maximum.o -objects += ${NN2_ROOT}/source/reference/minimum.o -objects += ${NN2_ROOT}/source/reference/power.o -objects += ${NN2_ROOT}/source/reference/greater.o -objects += ${NN2_ROOT}/source/reference/less.o -objects += ${NN2_ROOT}/source/reference/equal.o -objects += ${NN2_ROOT}/source/reference/not_equal.o -objects += ${NN2_ROOT}/source/reference/greater_equal.o -objects += ${NN2_ROOT}/source/reference/less_equal.o -objects += ${NN2_ROOT}/source/reference/logical_and.o -objects += ${NN2_ROOT}/source/reference/logical_or.o -objects += ${NN2_ROOT}/source/reference/logical_not.o -objects += ${NN2_ROOT}/source/reference/logical_xor.o -objects += ${NN2_ROOT}/source/reference/log.o -objects += ${NN2_ROOT}/source/reference/log1p.o -objects += ${NN2_ROOT}/source/reference/mul.o -objects += ${NN2_ROOT}/source/reference/rsqrt.o -objects += ${NN2_ROOT}/source/reference/select.o -objects += ${NN2_ROOT}/source/reference/sin.o -objects += ${NN2_ROOT}/source/reference/asin.o -objects += ${NN2_ROOT}/source/reference/sinh.o -objects += ${NN2_ROOT}/source/reference/asinh.o -objects += ${NN2_ROOT}/source/reference/sqrt.o -objects += ${NN2_ROOT}/source/reference/square.o -objects += ${NN2_ROOT}/source/reference/sub.o -objects += ${NN2_ROOT}/source/reference/matmul.o -objects += ${NN2_ROOT}/source/reference/mod.o -objects += ${NN2_ROOT}/source/reference/and.o -objects += ${NN2_ROOT}/source/reference/xor.o -objects += ${NN2_ROOT}/source/reference/not.o -objects += ${NN2_ROOT}/source/reference/or.o -objects += ${NN2_ROOT}/source/reference/col2im.o -objects += ${NN2_ROOT}/source/reference/im2col.o -objects += ${NN2_ROOT}/source/reference/concat.o -objects += ${NN2_ROOT}/source/reference/pad.o -objects += ${NN2_ROOT}/source/reference/reshape.o -objects += ${NN2_ROOT}/source/reference/shape.o -objects += ${NN2_ROOT}/source/reference/tile.o -objects += ${NN2_ROOT}/source/reference/arange.o -objects += ${NN2_ROOT}/source/reference/ndarray_size.o -objects += ${NN2_ROOT}/source/reference/space_to_batch.o -objects += ${NN2_ROOT}/source/reference/batch_to_space.o -objects += ${NN2_ROOT}/source/reference/space_to_depth.o -objects += ${NN2_ROOT}/source/reference/depth_to_space.o -objects += ${NN2_ROOT}/source/reference/expand_dims.o -objects += ${NN2_ROOT}/source/reference/slice.o -objects += ${NN2_ROOT}/source/reference/reverse.o -objects += ${NN2_ROOT}/source/reference/stack.o -objects += ${NN2_ROOT}/source/reference/unstack.o -objects += ${NN2_ROOT}/source/reference/split.o -objects += ${NN2_ROOT}/source/reference/gather.o -objects += ${NN2_ROOT}/source/reference/gather_nd.o -objects += ${NN2_ROOT}/source/reference/squeeze.o -objects += ${NN2_ROOT}/source/reference/tan.o -objects += ${NN2_ROOT}/source/reference/atan.o -objects += ${NN2_ROOT}/source/reference/tanh.o -objects += ${NN2_ROOT}/source/reference/atanh.o -objects += ${NN2_ROOT}/source/reference/negative.o -objects += ${NN2_ROOT}/source/reference/ceil.o -objects += ${NN2_ROOT}/source/reference/sign.o -objects += ${NN2_ROOT}/source/reference/trunc.o -objects += ${NN2_ROOT}/source/reference/isnan.o -objects += ${NN2_ROOT}/source/reference/round.o -objects += ${NN2_ROOT}/source/reference/exp.o -objects += ${NN2_ROOT}/source/reference/expm1.o -objects += ${NN2_ROOT}/source/reference/resize.o -objects += ${NN2_ROOT}/source/reference/argmax.o -objects += ${NN2_ROOT}/source/reference/argmin.o -objects += ${NN2_ROOT}/source/reference/sum.o -objects += ${NN2_ROOT}/source/reference/mean.o -objects += ${NN2_ROOT}/source/reference/max.o -objects += ${NN2_ROOT}/source/reference/min.o -objects += ${NN2_ROOT}/source/reference/prod.o -objects += ${NN2_ROOT}/source/reference/segment_max.o -objects += ${NN2_ROOT}/source/reference/segment_min.o -objects += ${NN2_ROOT}/source/reference/segment_sum.o -objects += ${NN2_ROOT}/source/reference/segment_mean.o -objects += ${NN2_ROOT}/source/reference/segment_prod.o -objects += ${NN2_ROOT}/source/reference/threshold_relu.o -objects += ${NN2_ROOT}/source/reference/batch_normalization.o -objects += ${NN2_ROOT}/source/reference/yuv_rgb_scale.o -objects += ${NN2_ROOT}/source/reference/roialign.o -objects += ${NN2_ROOT}/source/reference/roipool.o -objects += ${NN2_ROOT}/source/reference/cumsum.o -objects += ${NN2_ROOT}/source/reference/cumprod.o -objects += ${NN2_ROOT}/source/reference/reduce_max.o -objects += ${NN2_ROOT}/source/reference/reduce_min.o -objects += ${NN2_ROOT}/source/reference/reduce_sum.o -objects += ${NN2_ROOT}/source/reference/reduce_prod.o -objects += ${NN2_ROOT}/source/reference/reduce_mean.o -objects += ${NN2_ROOT}/source/reference/reduce_logsumexp.o -objects += ${NN2_ROOT}/source/reference/broadcast_to.o -objects += ${NN2_ROOT}/source/reference/clip.o -objects += ${NN2_ROOT}/source/reference/strided_slice.o -objects += ${NN2_ROOT}/source/reference/topk.o -objects += ${NN2_ROOT}/source/reference/non_max_suppression.o -objects += ${NN2_ROOT}/source/reference/shuffle_channel.o -objects += ${NN2_ROOT}/source/reference/erf.o -objects += ${NN2_ROOT}/source/reference/utils.o -objects += ${NN2_ROOT}/source/reference/setup.o -objects += ${NN2_ROOT}/source/reference/scatter.o - diff --git a/build_script/nn2_ref_i805/Makefile b/build_script/nn2_ref_i805/Makefile deleted file mode 100644 index ab8cdf58..00000000 --- a/build_script/nn2_ref_i805/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -CROSS_COMPILE ?= csky-abiv2-elf- -CC = $(CROSS_COMPILE)gcc -AR = $(CROSS_COMPILE)ar - -CFLAGS += -ffunction-sections -fdata-sections -Wl,--gc-sections -INCLUDE = -I../../include/include_xt800/dsp_include/ -I../../include/include_xt800/csi_core_include/ -I../../include/include_xt800/nn_include -I../../include - -all: nn2 - -include nn2.objs -include ../nn2.objs -include ../nn2_ref/nn2.objs - -nn2: $(objects) - $(AR) rcs $(INSTALL_DIR)/$(DSP_LIB) $^ - -%.o: %.c - $(CC) -c $(CFLAGS) $(INCLUDE) $(MACRO) $(ENDIAN) $< -o $@ -%.o: %.S - $(CC) -c $(CFLAGS) $(INCLUDE) $(MACRO) $(ENDIAN) $< -o $@ diff --git a/build_script/nn2_ref_i805/nn2.objs b/build_script/nn2_ref_i805/nn2.objs deleted file mode 100644 index 75aceb88..00000000 --- a/build_script/nn2_ref_i805/nn2.objs +++ /dev/null @@ -1,52 +0,0 @@ -#activation -objects += ${NN2_ROOT}/source/i805_ref/activation/csi_nn_activations_q15.o -objects += ${NN2_ROOT}/source/i805_ref/activation/csi_nn_activations_q7.o -objects += ${NN2_ROOT}/source/i805_ref/activation/csi_relu_q15.o -objects += ${NN2_ROOT}/source/i805_ref/activation/csi_relu_q7.o - -#convolution -objects += ${NN2_ROOT}/source/i805_ref/convolution/csi_convolve_1x1_HWC_q7_fast.o -objects += ${NN2_ROOT}/source/i805_ref/convolution/csi_convolve_HWC_q15_basic.o -objects += ${NN2_ROOT}/source/i805_ref/convolution/csi_convolve_HWC_q15_fast.o -objects += ${NN2_ROOT}/source/i805_ref/convolution/csi_convolve_HWC_q7_RGB.o -objects += ${NN2_ROOT}/source/i805_ref/convolution/csi_convolve_HWC_q7_basic.o -objects += ${NN2_ROOT}/source/i805_ref/convolution/csi_convolve_HWC_q7_fast.o -objects += ${NN2_ROOT}/source/i805_ref/convolution/csi_convolve_HWC_q7_fast_nonsquare.o -objects += ${NN2_ROOT}/source/i805_ref/convolution/csi_depthwise_separable_conv_HWC_q7.o -objects += ${NN2_ROOT}/source/i805_ref/convolution/csi_depthwise_separable_conv_HWC_q7_nonsquare.o -objects += ${NN2_ROOT}/source/i805_ref/convolution/csi_nn_mat_mult_kernel_q7_q15.o -objects += ${NN2_ROOT}/source/i805_ref/convolution/csi_nn_mat_mult_kernel_q7_q15_reordered.o - -#fully-connect -objects += ${NN2_ROOT}/source/i805_ref/fully-connect/csi_fully_connected_mat_q7_vec_q15.o -objects += ${NN2_ROOT}/source/i805_ref/fully-connect/csi_fully_connected_mat_q7_vec_q15_opt.o -objects += ${NN2_ROOT}/source/i805_ref/fully-connect/csi_fully_connected_q15.o -objects += ${NN2_ROOT}/source/i805_ref/fully-connect/csi_fully_connected_q15_opt.o -objects += ${NN2_ROOT}/source/i805_ref/fully-connect/csi_fully_connected_q7.o -objects += ${NN2_ROOT}/source/i805_ref/fully-connect/csi_fully_connected_q7_opt.o - -#nn-support -objects += ${NN2_ROOT}/source/i805_ref/nn-support/csi_nntables.o -objects += ${NN2_ROOT}/source/i805_ref/nn-support/csi_q7_to_q15_no_shift.o -objects += ${NN2_ROOT}/source/i805_ref/nn-support/csi_q7_to_q15_reordered_no_shift.o - -#pooling -objects += ${NN2_ROOT}/source/i805_ref/pooling/csi_pool_q7_HWC.o -objects += ${NN2_ROOT}/source/i805_ref/pooling/csi_avepool_q7_HWC_nonsquare.o - -#softmax -objects += ${NN2_ROOT}/source/i805_ref/softmax/csi_softmax_q15.o -objects += ${NN2_ROOT}/source/i805_ref/softmax/csi_softmax_q7.o - - -objects += ${NN2_ROOT}/source/i805_ref/convolution.o -objects += ${NN2_ROOT}/source/i805_ref/fullyconnected.o -objects += ${NN2_ROOT}/source/i805_ref/avgpool.o -objects += ${NN2_ROOT}/source/i805_ref/maxpool.o -objects += ${NN2_ROOT}/source/i805_ref/softmax.o -objects += ${NN2_ROOT}/source/i805_ref/relu.o -objects += ${NN2_ROOT}/source/i805_ref/sigmoid.o -objects += ${NN2_ROOT}/source/i805_ref/tanh.o - -objects += ${NN2_ROOT}/source/i805_ref/setup.o - diff --git a/include/csi_asp.h b/include/csi_asp.h new file mode 100644 index 00000000..19094f3d --- /dev/null +++ b/include/csi_asp.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#ifndef INCLUDE_CSI_ASP_H_ +#define INCLUDE_CSI_ASP_H_ + +#include "csi_internal.h" +#include "csi_ref.h" +#include "csi_utils.h" + +int csi_asp_avgpool2d(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); +int csi_asp_conv2d(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, + struct csi_tensor *bias, struct conv2d_params *params); +int csi_asp_depthwise_conv2d(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); +int csi_asp_fullyconnected(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct fc_params *params); +int csi_asp_maxpool2d(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); +#endif // INCLUDE_CSI_ASP_H_ diff --git a/include/csi_c860.h b/include/csi_c860.h index 579bd762..87310f63 100644 --- a/include/csi_c860.h +++ b/include/csi_c860.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,20 +16,21 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ -#ifndef _CSI_INTERNAL_C860_H -#define _CSI_INTERNAL_C860_H +#ifndef INCLUDE_CSI_C860_H_ +#define INCLUDE_CSI_C860_H_ +#include #include #include #include -#include + #include "csi_internal.h" #include "csi_ref.h" #include "csi_utils.h" -void csi_dequantize_f32_c860(uint8_t *input, float *output, int32_t offset, - int32_t multiplier, int32_t shift, int32_t length); +void csi_dequantize_f32_c860(uint8_t *input, float *output, int32_t offset, int32_t multiplier, + int32_t shift, int32_t length); -#endif +#endif // INCLUDE_CSI_C860_H_ diff --git a/include/csi_c906.h b/include/csi_c906.h index 1a9c1998..ba17fb04 100644 --- a/include/csi_c906.h +++ b/include/csi_c906.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,144 +16,120 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ -#ifndef _CSI_INTERNAL_C906_H -#define _CSI_INTERNAL_C906_H +#ifndef INCLUDE_CSI_C906_H_ +#define INCLUDE_CSI_C906_H_ +#include #include #include #include -#include + #include "csi_internal.h" #include "csi_ref.h" +#include "csi_thead_rvv.h" #include "csi_utils.h" /************************** f32 func declaration ***************************/ -int csi_c906_abs_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_abs_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_c906_add_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_c906_add_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_c906_sub_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_c906_sub_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_c906_mul_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_c906_mul_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_c906_minimum_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_c906_minimum_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_c906_broadcast_to_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_broadcast_to_f32(struct csi_tensor *input, struct csi_tensor *output, struct broadcast_to_params *params); -int csi_c906_clip_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_clip_f32(struct csi_tensor *input, struct csi_tensor *output, struct clip_params *params); -int csi_c906_concat_f32(struct csi_tensor **input, - struct csi_tensor *output, +int csi_c906_concat_f32(struct csi_tensor **input, struct csi_tensor *output, struct concat_params *params); -int csi_c906_split_f32(struct csi_tensor *input, - struct csi_tensor **output, +int csi_c906_split_f32(struct csi_tensor *input, struct csi_tensor **output, struct split_params *params); -int csi_c906_fullyconnected_init(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_c906_fullyconnected_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_c906_fullyconnected_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_c906_fullyconnected_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_c906_pad_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_pad_f32(struct csi_tensor *input, struct csi_tensor *output, struct pad_params *params); -int csi_c906_prelu_f32(struct csi_tensor *input, - struct csi_tensor *alpha, - struct csi_tensor *output, - struct prelu_params *params); +int csi_c906_prelu_f32(struct csi_tensor *input, struct csi_tensor *alpha, + struct csi_tensor *output, struct prelu_params *params); -int csi_c906_relu_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_relu_f32(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_c906_relu1_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_relu1_f32(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_c906_relu6_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_relu6_f32(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_c906_leaky_relu_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_leaky_relu_f32(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_c906_conv2d_init(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_conv1d_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv1d_params *params); + +int csi_c906_conv2d_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_conv2d_relu_init(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_conv2d_relu_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_depthwise_conv2d_init(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_depthwise_conv2d_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_depthwise_conv2d_relu_init(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_depthwise_conv2d_relu_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_maxpool2d_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_maxpool2d_init(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_c906_global_maxpool2d_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_global_maxpool2d_f32(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_c906_avgpool2d_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_avgpool2d_init(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_c906_global_avgpool2d_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_global_avgpool2d_f32(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); +int csi_c906_div_init(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); + /* pack */ void csi_c906_reorder_kernel(float *a, float *sa, int m, int k, int ldx); void csi_c906_reorder_input(float *b, float *sb, int k, int n, int ldx); +void csi_c906_reorder_input_1(float *b, float *sb, int k, int n, int ldx); + /* gemm */ -void csi_c906_sgemm_kernel_f32(float* dst, const float* sa, const float* sb, int m, int k, int n, int ldc, float* bias, bool fuse_relu); +void csi_c906_sgemm_kernel_f32(float *dst, const float *sa, const float *sb, int m, int k, int n, + int ldc, float *bias, bool fuse_relu); /* kernel transform */ void csi_c906_conv1x1s1_sgemm_transform_kernel(struct csi_tensor *kernel, @@ -174,237 +150,214 @@ void csi_c906_conv3x3s1_winograd64_transform_kernel(struct csi_tensor *o_kernel, void csi_c906_conv3x3s1_winograd64_transform_kernel_1(struct csi_tensor *o_kernel, struct csi_tensor *t_kernel); +void csi_c906_conv3x3s1_winograd64_transform_kernel_pack4(struct csi_tensor *o_kernel, + struct csi_tensor *t_kernel); + +void csi_c906_conv3x3s1_winograd43_transform_kernel_pack4(struct csi_tensor *o_kernel, + struct csi_tensor *t_kernel); + /* convolution optimization */ -int csi_c906_conv1x1s1_sgemm(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_conv1x1s1_sgemm(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_conv1x1s1_sgemm_fuse_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_conv1x1s1_sgemm_fuse_relu(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_conv_im2col_sgemm(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_conv_im2col_sgemm(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_conv_im2col_sgemm_fuse_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_conv_im2col_sgemm_fuse_relu(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_conv3x3s1_winograd23(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_conv3x3s1_winograd23(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_conv3x3s1_winograd43(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_conv3x3s1_winograd43(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_conv3x3s1_winograd64(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_conv3x3s1_winograd64(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_conv3x3s1_winograd64_1(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_conv3x3s1_winograd64_1(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -void csi_c906_conv3x3s1(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_conv3x3s1_winograd64_pack4(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +int csi_c906_conv3x3s1_winograd43_pack4(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +void csi_c906_conv3x3s1(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -void csi_c906_conv3x3s2(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +void csi_c906_conv3x3s2(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); /* depthwise convolution optimization */ -int csi_c906_dwconv3x3s1(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_dwconv3x3s1(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_dwconv3x3s2(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_dwconv3x3s2(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_dwconv5x5s1(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_dwconv5x5s1(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_dwconv5x5s2(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_dwconv5x5s2(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_dwconv3x3s1_pack4(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_dwconv3x3s1_pack4(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_dwconv3x3s2_pack4(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_dwconv3x3s2_pack4(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); /* depthwise convolution fuse relu */ -int csi_c906_dwconv3x3s1_fuse_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_dwconv3x3s1_fuse_relu(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_dwconv3x3s2_fuse_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_dwconv3x3s2_fuse_relu(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_dwconv5x5s1_fuse_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_dwconv5x5s1_fuse_relu(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_dwconv5x5s2_fuse_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_dwconv5x5s2_fuse_relu(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_dwconv3x3s1_pack4_fuse_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_dwconv3x3s1_pack4_fuse_relu(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_dwconv3x3s2_pack4_fuse_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_dwconv3x3s2_pack4_fuse_relu(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); +int csi_c906_dwconv2d_s1_pad0_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); /************************** fp16 func declaration ***************************/ -int csi_c906_add_fp16(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_c906_sub_fp16(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_c906_mul_fp16(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_c906_minimum_fp16(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_c906_global_avgpool2d_fp16(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_add_fp16(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); + +int csi_c906_sub_fp16(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); + +int csi_c906_mul_fp16(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); + +int csi_c906_minimum_fp16(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); + +int csi_c906_global_avgpool2d_fp16(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_c906_global_maxpool2d_fp16(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_global_maxpool2d_fp16(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_c906_pad_fp16(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_pad_fp16(struct csi_tensor *input, struct csi_tensor *output, struct pad_params *params); -int csi_c906_relu_fp16(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_relu_fp16(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_c906_relu1_fp16(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_relu1_fp16(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_c906_relu6_fp16(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_relu6_fp16(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_c906_prelu_fp16(struct csi_tensor *input, - struct csi_tensor *alpha, - struct csi_tensor *output, - struct prelu_params *params); +int csi_c906_prelu_fp16(struct csi_tensor *input, struct csi_tensor *alpha, + struct csi_tensor *output, struct prelu_params *params); -int csi_c906_leaky_relu_fp16(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_leaky_relu_fp16(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_c906_abs_fp16(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_abs_fp16(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_c906_clip_fp16(struct csi_tensor *input, - struct csi_tensor *output, +int csi_c906_clip_fp16(struct csi_tensor *input, struct csi_tensor *output, struct clip_params *params); -int csi_c906_concat_fp16(struct csi_tensor **input, - struct csi_tensor *output, +int csi_c906_concat_fp16(struct csi_tensor **input, struct csi_tensor *output, struct concat_params *params); -int csi_c906_split_fp16(struct csi_tensor *input, - struct csi_tensor **output, +int csi_c906_split_fp16(struct csi_tensor *input, struct csi_tensor **output, struct split_params *params); -int csi_c906_fullyconnected_fp16(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_c906_fullyconnected_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_c906_fullyconnected_fp16_1(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, - struct fc_params *params); +int csi_c906_fullyconnected_pack8_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, + struct fc_params *params); + +int csi_c906_fullyconnected_pack8_fp16_1(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, + struct fc_params *params); + +int csi_c906_fullyconnected_pack16_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, + struct fc_params *params); -int csi_c906_fullyconnected_fp16_2(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, - struct fc_params *params); +int csi_c906_fullyconnected_pack16_output16_fp16(struct csi_tensor *input, + struct csi_tensor *output, + struct csi_tensor *weights, + struct csi_tensor *bias, struct fc_params *params); + +void csi_c906_reorder_weight_n8_fp16(__fp16 *src, __fp16 *dst, int m, int k, int ldx); + +void csi_c906_reorder_weight_n16_fp16(__fp16 *src, __fp16 *dst, int m, int k, int ldx); /* pack fp16 */ void csi_c906_reorder_kernel_fp16(__fp16 *a, __fp16 *sa, int m, int k, int ldx); void csi_c906_reorder_input_fp16(__fp16 *b, __fp16 *sb, int k, int n, int ldx); -/* gemm fp16 */ -void csi_c906_sgemm_kernel_fp16(__fp16* dst, const __fp16* sa, const __fp16* sb, int m, int k, int n, int ldc, __fp16* bias); +void csi_c906_reorder_input_fp16_1(__fp16 *b, __fp16 *sb, int k, int n, int ldx); +void csi_c906_reorder_matrix_z8_fp16(__fp16 *src, __fp16 *dst, int k, int n, int ldx); +void csi_c906_reorder_matrix_z16_fp16(__fp16 *src, __fp16 *dst, int k, int n, int ldx); + +/* gemm fp16 */ +void csi_c906_sgemm_kernel_fp16(__fp16 *dst, const __fp16 *sa, const __fp16 *sb, int m, int k, + int n, int ldc, __fp16 *bias); +void csi_c906_sgemm_kernel_fp16_1(__fp16 *dst, const __fp16 *sa, const __fp16 *sb, int m, int k, + int n, int ldc, __fp16 *bias); + +/* gemv fp16 */ +void csi_c906_gemv_pack8_fp16(__fp16 *dst, const __fp16 *sa, const __fp16 *sb, int k, int n, + int ldc, __fp16 *bias); +void csi_c906_gemv_pack16_fp16(__fp16 *dst, const __fp16 *sa, const __fp16 *sb, int k, int n, + int ldc, __fp16 *bias); + +void csi_c906_gemv_trans_pack8_fp16(__fp16 *dst, const __fp16 *sa, const __fp16 *sb, int k, int n, + int ldc, __fp16 *bias); +void csi_c906_gemv_trans_pack16_fp16(__fp16 *dst, const __fp16 *sa, const __fp16 *sb, int k, int n, + int ldc, __fp16 *bias); /* kernel transform fp16 */ void csi_c906_conv1x1s1_sgemm_transform_kernel_fp16(struct csi_tensor *kernel, @@ -412,75 +365,58 @@ void csi_c906_conv1x1s1_sgemm_transform_kernel_fp16(struct csi_tensor *kernel, void csi_c906_conv_im2col_sgemm_transform_kernel_fp16(struct csi_tensor *kernel, struct conv2d_params *params); -void csi_c906_conv3x3s1_winograd43_transform_kernel_fp16(struct csi_tensor *o_kernel, - struct csi_tensor *t_kernel); +void csi_c906_conv3x3s1_winograd43_transform_kernel_pack8_fp16(struct csi_tensor *o_kernel, + struct csi_tensor *t_kernel); -void csi_c906_conv3x3s1_winograd64_transform_kernel_fp16(struct csi_tensor *o_kernel, - struct csi_tensor *t_kernel); +void csi_c906_conv3x3s1_winograd64_transform_kernel_pack8_fp16(struct csi_tensor *o_kernel, + struct csi_tensor *t_kernel); /* convolution optimization fp16 */ -int csi_c906_conv1x1s1_sgemm_fp16(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_conv1x1s1_sgemm_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_conv_im2col_sgemm_fp16(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_conv1x1s1_batch_gemv_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +int csi_c906_conv_im2col_sgemm_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_conv3x3s1_winograd43_fp16(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params); +int csi_c906_conv3x3s1_winograd43_pack8_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); -int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -void csi_c906_conv3x3s1_fp16(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +void csi_c906_conv3x3s1_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -void csi_c906_conv3x3s2_fp16(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +void csi_c906_conv3x3s2_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); /* depthwise convolution optimization for fp16*/ -int csi_c906_dwconv3x3s1_fp16(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_dwconv3x3s1_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_dwconv3x3s2_fp16(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_dwconv3x3s2_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_dwconv3x3s1_pack8_fp16(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_dwconv3x3s1_pack8_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_c906_dwconv3x3s2_pack8_fp16(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_c906_dwconv3x3s2_pack8_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); - /* utils */ void csi_c906_memcpy(void *dst, const void *src, size_t n); @@ -493,8 +429,92 @@ void csi_c906_crop_output(float *output_trans, float *output, int out_c, int out void csi_c906_pad_input_fp16(const __fp16 *input, __fp16 *input_padded, int inc, int inh, int inw, int padded_h, int padded_w, int pad_top, int pad_left); -void csi_c906_crop_output_fp16(__fp16 *output_trans, __fp16 *output, int out_c, int out_h, int out_w, - int wino_h, int wino_w); +void csi_c906_crop_output_fp16(__fp16 *output_trans, __fp16 *output, int out_c, int out_h, + int out_w, int wino_h, int wino_w); + +/*asr related fuctions*/ +int csi_c906_cache_matmul_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_matmul_params *params); + +int csi_c906_cache_matmul_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_matmul_params *params); + +int csi_c906_matmul_fp16(struct csi_tensor *mat0, struct csi_tensor *mat1, + struct csi_tensor *output, struct matmul_params *params); + +int csi_c906_layer_norm_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *gamma, struct csi_tensor *beta, + struct layer_norm_params *params); + +int csi_c906_reshape_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct reshape_params *params); + +int csi_c906_transpose_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct transpose_params *params); + +int csi_c906_gather_fp16(struct csi_tensor *input, struct csi_tensor *indices, + struct csi_tensor *output, struct gather_params *params); + +int csi_c906_cache_conv1d_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_conv1d_params *params); + +int csi_c906_cache_conv1d_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_conv1d_params *params); + +int csi_c906_lrn_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct lrn_params *params); + +void asr_buffer_init_c906(struct asr_buffer_t *buffer, size_t buffer_size, size_t data_lenth); + +void *asr_buffer_insert_c906_front(struct asr_buffer_t *buffer, void *input, size_t len); + +void *asr_buffer_insert_c906_back(struct asr_buffer_t *buffer, void *input, size_t len); + +void *asr_buffer_get_buffer_c906(struct asr_buffer_t *buffer); + +void asr_buffer_reset_c906(struct asr_buffer_t *buffer); + +void csi_c906_reset_fcsr(); +int csi_c906_get_fcsr(); + +/* hardware performance */ +struct csi_c906_hpm { + size_t inst; + size_t cycle; + size_t l1_icache_access; + size_t l1_icache_miss; + size_t store_inst; + size_t l1_dcache_raccess; + size_t l1_dcache_rmiss; + size_t l1_dcache_waccess; + size_t l1_dcache_wmiss; +}; + +uint64_t csi_c906_get_inst(); +uint64_t csi_c906_get_cycle(); +uint64_t csi_c906_get_l1_icache_access(); +uint64_t csi_c906_get_l1_icache_miss(); +uint64_t csi_c906_get_cb_miss(); +uint64_t csi_c906_get_cb_inst(); +uint64_t csi_c906_get_store_inst(); +uint64_t csi_c906_get_l1_dcache_raccess(); +uint64_t csi_c906_get_l1_dcache_rmiss(); +uint64_t csi_c906_get_l1_dcache_waccess(); +uint64_t csi_c906_get_l1_dcache_wmiss(); + +struct csi_c906_hpm csi_c906_get_hw_perf(); + +int csi_c906_sum_stride_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct reduce_params *params); + +int csi_nn_c906_register_op_init(enum csinn_dtype_enum dtype, enum csinn_op_enum op_name, void *bc); +int csi_nn_c906_register_op(enum csinn_dtype_enum dtype, enum csinn_op_enum op_name, void *bc); +void csi_nn_c906_bc_init_reg(); +void csi_nn_c906_bc_reg(); -#endif +#endif // INCLUDE_CSI_C906_H_ diff --git a/include/csi_c908.h b/include/csi_c908.h new file mode 100644 index 00000000..eeb1c7c5 --- /dev/null +++ b/include/csi_c908.h @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#ifndef INCLUDE_CSI_C908_H_ +#define INCLUDE_CSI_C908_H_ + +#include +#include +#include +#include + +#include "csi_internal.h" +#include "csi_ref.h" +#include "csi_thead_rvv.h" +#include "csi_utils.h" + +int csi_nn_c908_conv2d_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +int csi_nn_c908_depthwise_conv2d_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +int csi_nn_c908_avgpool2d_init(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_c908_maxpool2d_init(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_c908_fullyconnected_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, + struct fc_params *params); + +void csi_nn_c908_conv_im2col_sgemm_transform_kernel_fp32(struct csi_tensor *kernel, + struct conv2d_params *params); + +int csi_nn_c908_conv_im2col_gemm_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +void csi_nn_c908_conv_im2col_sgemm_transform_kernel_fp16(struct csi_tensor *kernel, + struct conv2d_params *params); + +int csi_nn_c908_conv_im2col_gemm_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +void csi_nn_c908_conv1x1s1_gemm_transform_kernel_fp32(struct csi_tensor *kernel, + struct conv2d_params *params); + +int csi_nn_c908_conv1x1s1_gemm_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +void csi_nn_c908_conv1x1s1_gemm_transform_kernel_fp16(struct csi_tensor *kernel, + struct conv2d_params *params); + +int csi_nn_c908_conv1x1s1_gemm_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +void csi_nn_c908_reorder_kernel_n8_fp32(float *src, float *dst, int m, int k, int ldc); +void csi_nn_c908_reorder_input_z12_fp32(float *src, float *dst, int k, int n, int ldc); +void csi_nn_c908_gemm_8x12_fp32(float *dst, const float *sa, const float *sb, int m, int k, int n, + int ldc, float *bias); +void csi_nn_c908_reorder_input_z8_fp32(float *src, float *dst, int k, int n, int ldc); +void csi_nn_c908_gemm_8x8_fp32(float *dst, const float *sa, const float *sb, int m, int k, int n, + int ldc, float *bias); + +void csi_nn_c908_reorder_kernel_n8_fp16(__fp16 *src, __fp16 *dst, int m, int k, int ldc); +void csi_nn_c908_reorder_input_z24_fp16(__fp16 *src, __fp16 *dst, int k, int n, int ldc); +void csi_nn_c908_gemm_8x24_fp16(__fp16 *dst, const __fp16 *sa, const __fp16 *sb, int m, int k, + int n, int ldc, __fp16 *bias); +void csi_nn_c908_reorder_input_z16_fp16(__fp16 *src, __fp16 *dst, int k, int n, int ldc); +void csi_nn_c908_gemm_8x16_fp16(__fp16 *dst, const __fp16 *sa, const __fp16 *sb, int m, int k, + int n, int ldc, __fp16 *bias); + +#endif // INCLUDE_CSI_C908_H_ diff --git a/include/csi_ch8601.h b/include/csi_ch8601.h deleted file mode 100644 index 8146f989..00000000 --- a/include/csi_ch8601.h +++ /dev/null @@ -1,540 +0,0 @@ -/* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* CSI-NN2 version 1.10.x */ - -#ifndef _CSI_NN_CH8601_H -#define _CSI_NN_CH8601_H -#include "csi_nn.h" -#include "csi_utils.h" -#include "csi_node.h" - -int csi_ch8601_conv2d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params); - -int csi_ch8601_depthwise_conv2d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params); - -int csi_ch8601_group_conv2d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params); - -int csi_ch8601_conv2d_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params); - -int csi_ch8601_deconv2d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params); - -int csi_ch8601_depthwise_deconv2d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params); - -int csi_ch8601_fullyconnected(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, - struct fc_params *params); - -int csi_ch8601_fullyconnected_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, - struct fc_params *params); - -int csi_ch8601_maxpool2d(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); - -int csi_ch8601_global_maxpool2d(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); - -int csi_ch8601_avgpool2d(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); - -int csi_ch8601_global_avgpool2d(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); - -int csi_ch8601_l2pool(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); - -int csi_ch8601_pool_with_argmax(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); - -int csi_ch8601_maxpool2d_locat(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); - -int csi_ch8601_unpooling(struct csi_tensor *input, - struct csi_tensor *mask, - struct csi_tensor *output, - struct unpooling_params *params); - -int csi_ch8601_negative(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); - -int csi_ch8601_floor(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); - -int csi_ch8601_ceil(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); - -int csi_ch8601_abs(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); - -int csi_ch8601_exp(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); - -int csi_ch8601_sin(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); - -int csi_ch8601_tanh(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); - -int csi_ch8601_sqrt(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); - -int csi_ch8601_rsqrt(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); - -int csi_ch8601_square(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); - -int csi_ch8601_sigmoid(struct csi_tensor *input, - struct csi_tensor *output, - struct sigmoid_params *params); - -int csi_ch8601_elu(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); - -int csi_ch8601_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); - -int csi_ch8601_relu1(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); - -int csi_ch8601_relu6(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); - -int csi_ch8601_relun(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); - -int csi_ch8601_leaky_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); - -int csi_ch8601_softrelu(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); - -int csi_ch8601_prelu(struct csi_tensor *input, - struct csi_tensor *alpha, - struct csi_tensor *output, - struct prelu_params *params); - -int csi_ch8601_softplus(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); - -int csi_ch8601_softmax(struct csi_tensor *input, - struct csi_tensor *output, - struct softmax_params *params); - -int csi_ch8601_batch_normalization(struct csi_tensor *input, - struct csi_tensor *mean, - struct csi_tensor *variance, - struct csi_tensor *gamma, - struct csi_tensor *beta, - struct csi_tensor *output, - struct bn_params *params); - -int csi_ch8601_l2_normalization(struct csi_tensor *input, - struct csi_tensor *output, - struct l2n_params *params); - -int csi_ch8601_lrn(struct csi_tensor *input, - struct csi_tensor *output, - struct lrn_params *params); - -int csi_ch8601_matmul(struct csi_tensor *mat0, - struct csi_tensor *mat1, - struct csi_tensor *output, - struct matmul_params *params); - -int csi_ch8601_add(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ch8601_sub(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ch8601_mul(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ch8601_div(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ch8601_floor_divide(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ch8601_maximum(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ch8601_minimum(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ch8601_power(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ch8601_greater(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ch8601_less(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ch8601_equal(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ch8601_not_equal(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ch8601_greater_equal(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ch8601_less_equal(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ch8601_select(struct csi_tensor *condition, - struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ch8601_and(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ch8601_or(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ch8601_pad(struct csi_tensor *input, - struct csi_tensor *output, - struct pad_params *params); - -int csi_ch8601_resize(struct csi_tensor *input, - struct csi_tensor *output, - struct resize_params *params); - -int csi_ch8601_concat(struct csi_tensor **input, - struct csi_tensor *output, - struct concat_params *params); - -int csi_ch8601_proposal(struct csi_tensor *cls_prob, - struct csi_tensor *bbox_pred, - struct csi_tensor *im_info, - struct csi_tensor *output, - struct proposal_params *params); - -int csi_ch8601_psroipooling(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, - struct psroipooling_params *params); - -int csi_ch8601_roipool(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, - struct roi_pool_params *params); - -int csi_ch8601_transpose(struct csi_tensor *input, - struct csi_tensor *output, - struct transpose_params *params); - -int csi_ch8601_reshape(struct csi_tensor *input, - struct csi_tensor *output, - struct reshape_params *params); - -int csi_ch8601_reshape_tail(struct csi_tensor *input, - struct csi_tensor *output, - struct reshape_params *params); - -int csi_ch8601_shape(struct csi_tensor *input, - struct csi_tensor *output, - struct shape_params *params); - -int csi_ch8601_expand_dims_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct expand_dims_params *params); - -int csi_ch8601_expand_dims_u8(struct csi_tensor *input, - struct csi_tensor *output, - struct expand_dims_params *params); - -int csi_ch8601_reverse(struct csi_tensor *input, - struct csi_tensor *output, - struct reverse_params *params); - -int csi_ch8601_flatten(struct csi_tensor *input, - struct csi_tensor *output, - struct flatten_params *params); - -int csi_ch8601_flatten_tail(struct csi_tensor *input, - struct csi_tensor *output, - struct flatten_params *params); - -int csi_ch8601_crop(struct csi_tensor *input, - struct csi_tensor *output, - struct crop_params *params); - -int csi_ch8601_slice(struct csi_tensor *input, - struct csi_tensor *output, - struct slice_params *params); - -int csi_ch8601_slice_tail(struct csi_tensor *input, - struct csi_tensor *output, - struct slice_params *params); - -int csi_ch8601_split(struct csi_tensor *input, - struct csi_tensor **output, - struct split_params *params); - -int csi_ch8601_stack(struct csi_tensor *inputs, - struct csi_tensor *output, - struct stack_params *params); - -int csi_ch8601_tile(struct csi_tensor *inputs, - struct csi_tensor *output, - struct tile_params *params); - -int csi_ch8601_arange(struct csi_tensor *output, - struct arange_params *params); - -int csi_ch8601_where(struct csi_tensor *condition, - struct csi_tensor *x, - struct csi_tensor *y, - struct csi_tensor *output, - struct where_params *params); - -int csi_ch8601_unstack(struct csi_tensor *input, - struct csi_tensor *outputs, - struct unstack_params *params); - -int csi_ch8601_gather_nd(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *output, - struct gather_nd_params *params); - -int csi_ch8601_squeeze(struct csi_tensor *input, - struct csi_tensor *output, - struct squeeze_params *params); - -int csi_ch8601_squeeze_tail(struct csi_tensor *input, - struct csi_tensor *output, - struct squeeze_params *params); - -int csi_ch8601_ndarray_size(struct csi_tensor *input, - struct csi_tensor *output, - struct ndarray_size_params *params); - -int csi_ch8601_space_to_batch(struct csi_tensor *input, - struct csi_tensor *output, - struct space_to_batch_params *params); - -int csi_ch8601_batch_to_space(struct csi_tensor *input, - struct csi_tensor *output, - struct batch_to_space_params *params); - -int csi_ch8601_space_to_depth(struct csi_tensor *input, - struct csi_tensor *output, - struct space_to_depth_params *params); - -int csi_ch8601_depth_to_space(struct csi_tensor *input, - struct csi_tensor *output, - struct depth_to_space_params *params); - -int csi_ch8601_one_hot(struct csi_tensor *input, - struct csi_tensor *output, - struct one_hot_params *params); - -int csi_ch8601_sequence_mask(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct sequence_mask_params *params); - -int csi_ch8601_im2col(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct im2col_params *params); - -int csi_ch8601_col2im(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct col2im_params *params); - -int csi_ch8601_sum(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); - -int csi_ch8601_mean(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); - -int csi_ch8601_max(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); - -int csi_ch8601_min(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); - -int csi_ch8601_prod(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); - -int csi_ch8601_argmin(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); - -int csi_ch8601_argmax(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); - -int csi_ch8601_all(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); - -int csi_ch8601_any(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); - -struct csi_ch8601_graph -{ - struct csi_node *input[8]; - struct csi_node *output[8]; - int input_num; - int output_num; - struct csi_node **layer; - int layer_size; - int layer_index; -}; - -struct csi_ch8601_target_data { - struct csi_ch8601_graph *graph; -}; - -struct csi_ch8601_graph *csi_ch8601_get_graph(struct csi_session *sess); - -void csi_ch8601_set_tensor(struct csi_tensor *tensor, struct csi_session *sess); -void csi_ch8601_set_const_tensor(struct csi_tensor *tensor, struct csi_session *sess); -int csi_ch8601_get_tensor(int index, struct csi_tensor *ret, struct csi_session *sess); - -void csi_ch8601_get_multiplier_and_shift(double double_multiplier, int16_t* multiplier, int16_t* shift); -int csi_ch8601_get_q1(struct csi_tensor *input, struct csi_tensor *kernel, struct csi_tensor *output); -int csi_ch8601_get_q2(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *input2, struct csi_tensor *output, int q1); -int csi_ch8601_get_q3(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output); -struct csi_ch8601_graph *csi_ch8601_get_graph(struct csi_session *sess); -int csi_ch8601_siso_op(struct csi_tensor *input, - struct csi_tensor *output, - int op, - void *params); -int csi_ch8601_diso_op(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - int op, - void *params); -int csi_ch8601_sidcso_op(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *const0, - struct csi_tensor *const1, - int op, - void *params); - -int csi_ch8601_conv2d_internel(struct csi_tensor *conv2d_input, - struct csi_tensor *conv2d_output, - struct csi_tensor *conv2d_kernel, - struct csi_tensor *conv2d_bias, - struct conv2d_params *conv2d_params, - struct csi_tensor *mul_rhs, - struct csi_tensor *mul_output, - struct csi_tensor *add_rhs, - struct csi_tensor *add_output); -#endif diff --git a/include/csi_debug.h b/include/csi_debug.h index 12911ee1..8fc25c60 100644 --- a/include/csi_debug.h +++ b/include/csi_debug.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,390 +16,272 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ -#ifndef _CSI_DEBUG_H -#define _CSI_DEBUG_H +/* CSI-NN2 version 1.12.x */ +#ifndef INCLUDE_CSI_DEBUG_H_ +#define INCLUDE_CSI_DEBUG_H_ +#include "csi_internal.h" +#include "csi_node.h" enum csinn_debug_enum { - CSI_DEBUG_LEVEL_INFO = -1, + CSI_DEBUG_LEVEL_DEBUG = -2, + CSI_DEBUG_LEVEL_INFO, CSI_DEBUG_LEVEL_WARNING, CSI_DEBUG_LEVEL_ERROR, + CSI_DEBUG_LEVEL_FATAL, }; #ifdef CSI_DEBUG #define CSI_DEBUG_CALL(func) func +void csi_debug_debug(const char *format, ...); void csi_debug_info(const char *format, ...); void csi_debug_warning(const char *format, ...); void csi_debug_error(const char *format, ...); +void csi_debug_fatal(const char *format, ...); int csi_debug_callback_unset(); #else #define CSI_DEBUG_CALL(func) +inline void csi_debug_debug(const char *format, ...) {} inline void csi_debug_info(const char *format, ...) {} inline void csi_debug_warning(const char *format, ...) {} inline void csi_debug_error(const char *format, ...) {} -inline int csi_debug_callback_unset() {return CSINN_CALLBACK_UNSET;} +inline void csi_debug_fatal(const char *format, ...) {} +inline int csi_debug_callback_unset() { return CSINN_CALLBACK_UNSET; } #endif int csi_debug_get_level(); void csi_debug_set_level(int level); +int csi_benchmark_layer(struct csi_node *node, uint64_t start_time, uint64_t end_time, + int layer_idx); -int csi_conv2d_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params, - const char *name); +int csi_conv2d_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params, const char *name); -int csi_conv3d_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv3d_params *params, - const char *name); +int csi_conv1d_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv1d_params *params, const char *name); + +int csi_conv3d_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv3d_params *params, const char *name); + +int csi_fsmn_debug_info(struct csi_tensor *frame, struct csi_tensor *l_filter, + struct csi_tensor *r_filter, struct csi_tensor *frame_sequence, + struct csi_tensor *frame_counter, struct csi_tensor *output, + struct fsmn_params *params, const char *name); + +int csi_siso_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct siso_params *params, const char *name); + +int csi_diso_debug_info(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params, const char *name); -int csi_fsmn_debug_info(struct csi_tensor *frame, - struct csi_tensor *l_filter, - struct csi_tensor *r_filter, - struct csi_tensor *frame_sequence, - struct csi_tensor *frame_counter, - struct csi_tensor *output, - struct fsmn_params *params, - const char *name); - -int csi_siso_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params, - const char *name); - -int csi_diso_debug_info(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params, - const char *name); - -int csi_relu_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params, - const char *name); - -int csi_arange_debug_info(struct csi_tensor *output, - struct arange_params *params, +int csi_relu_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct relu_params *params, const char *name); + +int csi_arange_debug_info(struct csi_tensor *output, struct arange_params *params, const char *name); -int csi_pool_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params, - const char *name); - -int csi_pad_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct pad_params *params, - const char *name); - -int csi_crop_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct crop_params *params, - const char *name); - -int csi_roi_pool_debug_info(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, - struct roi_pool_params *params, +int csi_pool_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params, const char *name); + +int csi_pad_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct pad_params *params, const char *name); + +int csi_crop_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct crop_params *params, const char *name); + +int csi_roi_pool_debug_info(struct csi_tensor *data, struct csi_tensor *rois, + struct csi_tensor *output, struct roi_pool_params *params, const char *name); -int csi_bn_debug_info(struct csi_tensor *input, - struct csi_tensor *mean, - struct csi_tensor *variance, - struct csi_tensor *gamma, - struct csi_tensor *beta, - struct csi_tensor *output, - struct bn_params *params, +int csi_bn_debug_info(struct csi_tensor *input, struct csi_tensor *mean, + struct csi_tensor *variance, struct csi_tensor *gamma, + struct csi_tensor *beta, struct csi_tensor *output, struct bn_params *params, const char *name); -int csi_batch_to_space_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct batch_to_space_params *params, - const char *name); - -int csi_batch_to_space_nd_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct batch_to_space_nd_params *params, - const char *name); - -int csi_depth_to_space_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct depth_to_space_params *params, - const char *name); - -int csi_space_to_depth_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct space_to_depth_params *params, - const char *name); - -int csi_space_to_batch_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct space_to_batch_params *params, - const char *name); - -int csi_space_to_batch_nd_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct space_to_batch_nd_params *params, - const char *name); - -int csi_broadcast_to_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct broadcast_to_params *params, - const char *name); +int csi_batch_to_space_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct batch_to_space_params *params, const char *name); -int csi_reduce_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params, - const char *name); +int csi_batch_to_space_nd_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct batch_to_space_nd_params *params, const char *name); -int csi_clip_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct clip_params *params, - const char *name); +int csi_cache_matmul_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_matmul_params *params, const char *name); -int csi_col2im_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct col2im_params *params, - const char *name); +int csi_cache_conv1d_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_conv1d_params *params, const char *name); -int csi_concat_debug_info(struct csi_tensor **input, - struct csi_tensor *output, - struct concat_params *params, - const char *name); +int csi_space_to_depth_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct space_to_depth_params *params, const char *name); -int csi_cumprod_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct cumprod_params *params, - const char *name); +int csi_depth_to_space_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct depth_to_space_params *params, const char *name); -int csi_cumsum_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct cumsum_params *params, - const char *name); +int csi_space_to_batch_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct space_to_batch_params *params, const char *name); -int csi_expand_dims_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct expand_dims_params *params, - const char *name); +int csi_space_to_batch_nd_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct space_to_batch_nd_params *params, const char *name); -int csi_flatten_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct flatten_params *params, - const char *name); +int csi_broadcast_to_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct broadcast_to_params *params, const char *name); + +int csi_reduce_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct reduce_params *params, const char *name); + +int csi_clip_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct clip_params *params, const char *name); + +int csi_col2im_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct col2im_params *params, const char *name); + +int csi_concat_debug_info(struct csi_tensor **input, struct csi_tensor *output, + struct concat_params *params, const char *name); + +int csi_cumprod_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct cumprod_params *params, const char *name); + +int csi_cumsum_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct cumsum_params *params, const char *name); + +int csi_expand_dims_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct expand_dims_params *params, const char *name); -int csi_fullyconnected_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, - struct fc_params *params, - const char *name); - -int csi_gather_nd_debug_info(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *output, - struct gather_nd_params *params, +int csi_flatten_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct flatten_params *params, const char *name); + +int csi_fullyconnected_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, + struct fc_params *params, const char *name); + +int csi_gather_nd_debug_info(struct csi_tensor *input, struct csi_tensor *indices, + struct csi_tensor *output, struct gather_nd_params *params, const char *name); -int csi_gather_debug_info(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *output, - struct gather_params *params, +int csi_gather_debug_info(struct csi_tensor *input, struct csi_tensor *indices, + struct csi_tensor *output, struct gather_params *params, const char *name); -int csi_hard_sigmoid_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct sigmoid_params *params, - const char *name); +int csi_hard_sigmoid_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct sigmoid_params *params, const char *name); -int csi_im2col_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct im2col_params *params, - const char *name); +int csi_im2col_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct im2col_params *params, const char *name); -int csi_l2n_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct l2n_params *params, - const char *name); +int csi_l2n_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct l2n_params *params, const char *name); -int csi_softmax_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct softmax_params *params, - const char *name); +int csi_layer_norm_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *gamma, struct csi_tensor *beta, + struct layer_norm_params *params, const char *name); -int csi_lrn_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct lrn_params *params, - const char *name); +int csi_softmax_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct softmax_params *params, const char *name); + +int csi_lrn_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct lrn_params *params, const char *name); -int csi_matmul_debug_info(struct csi_tensor *mat0, - struct csi_tensor *mat1, - struct csi_tensor *output, - struct matmul_params *params, +int csi_matmul_debug_info(struct csi_tensor *mat0, struct csi_tensor *mat1, + struct csi_tensor *output, struct matmul_params *params, const char *name); -int csi_ndarray_size_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct ndarray_size_params *params, - const char *name); +int csi_ndarray_size_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct ndarray_size_params *params, const char *name); -int csi_nms_debug_info(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct non_max_suppression_params *params, +int csi_nms_debug_info(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct non_max_suppression_params *params, const char *name); -int csi_one_hot_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct one_hot_params *params, - const char *name); +int csi_one_hot_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct one_hot_params *params, const char *name); -int csi_prelu_debug_info(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct prelu_params *params, - const char *name); - -int csi_proposal_debug_info(struct csi_tensor *cls_prob, - struct csi_tensor *bbox_pred, - struct csi_tensor *im_info, - struct csi_tensor *output, - struct proposal_params *params, - const char *name); +int csi_prelu_debug_info(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct prelu_params *params, const char *name); + +int csi_proposal_debug_info(struct csi_tensor *cls_prob, struct csi_tensor *bbox_pred, + struct csi_tensor *im_info, struct csi_tensor *output, + struct proposal_params *params, const char *name); -int csi_psroipooling_debug_info(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, - struct psroipooling_params *params, +int csi_psroipooling_debug_info(struct csi_tensor *data, struct csi_tensor *rois, + struct csi_tensor *output, struct psroipooling_params *params, const char *name); -int csi_reorg_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct reorg_params *params, - const char *name); +int csi_reorg_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct reorg_params *params, const char *name); -int csi_reshape_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct reshape_params *params, - const char *name); +int csi_reshape_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct reshape_params *params, const char *name); -int csi_resize_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct resize_params *params, - const char *name); +int csi_resize_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct resize_params *params, const char *name); -int csi_reverse_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct reverse_params *params, - const char *name); +int csi_reverse_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct reverse_params *params, const char *name); -int csi_roi_align_debug_info(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, - struct roi_align_params *params, +int csi_roi_align_debug_info(struct csi_tensor *data, struct csi_tensor *rois, + struct csi_tensor *output, struct roi_align_params *params, const char *name); -int csi_scatter_nd_debug_info(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *updates, - struct csi_tensor *output, - struct scatter_nd_params *params, - const char *name); - -int csi_segment_debug_info(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct segment_params *params, +int csi_scatter_nd_debug_info(struct csi_tensor *input, struct csi_tensor *indices, + struct csi_tensor *updates, struct csi_tensor *output, + struct scatter_nd_params *params, const char *name); + +int csi_segment_debug_info(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct segment_params *params, const char *name); -int csi_select_debug_info(struct csi_tensor *condition, - struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct select_params *params, - const char *name); +int csi_select_debug_info(struct csi_tensor *condition, struct csi_tensor *input0, + struct csi_tensor *input1, struct csi_tensor *output, + struct select_params *params, const char *name); -int csi_sequence_mask_debug_info(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct sequence_mask_params *params, +int csi_sequence_mask_debug_info(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct sequence_mask_params *params, const char *name); -int csi_shape_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct shape_params *params, - const char *name); +int csi_shape_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct shape_params *params, const char *name); -int csi_shuffle_channel_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct shuffle_channel_params *params, - const char *name); +int csi_shuffle_channel_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct shuffle_channel_params *params, const char *name); -int csi_sigmoid_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct sigmoid_params *params, - const char *name); +int csi_sigmoid_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct sigmoid_params *params, const char *name); -int csi_slice_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct slice_params *params, - const char *name); +int csi_slice_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct slice_params *params, const char *name); -int csi_split_debug_info(struct csi_tensor *input, - struct csi_tensor **output, - struct split_params *params, - const char *name); +int csi_split_debug_info(struct csi_tensor *input, struct csi_tensor **output, + struct split_params *params, const char *name); -int csi_squeeze_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct squeeze_params *params, - const char *name); +int csi_squeeze_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct squeeze_params *params, const char *name); -int csi_stack_debug_info(struct csi_tensor **input, - struct csi_tensor *output, - struct stack_params *params, - const char *name); +int csi_stack_debug_info(struct csi_tensor **input, struct csi_tensor *output, + struct stack_params *params, const char *name); -int csi_strided_slice_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct strided_slice_params *params, - const char *name); +int csi_strided_slice_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct strided_slice_params *params, const char *name); -int csi_tile_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct tile_params *params, - const char *name); +int csi_tile_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct tile_params *params, const char *name); -int csi_topk_debug_info(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct topk_params *params, - const char *name); +int csi_topk_debug_info(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct topk_params *params, const char *name); -int csi_transpose_debug_info(struct csi_tensor *input, - struct csi_tensor *output, - struct transpose_params *params, - const char *name); +int csi_transpose_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct transpose_params *params, const char *name); -int csi_unpooling_debug_info(struct csi_tensor *input, - struct csi_tensor *mask, - struct csi_tensor *output, - struct unpooling_params *params, +int csi_unpooling_debug_info(struct csi_tensor *input, struct csi_tensor *mask, + struct csi_tensor *output, struct unpooling_params *params, const char *name); -int csi_unstack_debug_info(struct csi_tensor *input, - struct csi_tensor **output, - struct unstack_params *params, - const char *name); +int csi_unstack_debug_info(struct csi_tensor *input, struct csi_tensor **output, + struct unstack_params *params, const char *name); -int csi_where_debug_info(struct csi_tensor *condition, - struct csi_tensor *x, - struct csi_tensor *y, - struct csi_tensor *output, - struct where_params *params, - const char *name); +int csi_where_debug_info(struct csi_tensor *condition, struct csi_tensor *x, struct csi_tensor *y, + struct csi_tensor *output, struct where_params *params, const char *name); -#endif +#endif // INCLUDE_CSI_DEBUG_H_ diff --git a/include/csi_dp1k.h b/include/csi_dp1k.h deleted file mode 100644 index 91939049..00000000 --- a/include/csi_dp1k.h +++ /dev/null @@ -1,422 +0,0 @@ -/* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* CSI-NN2 version 1.10.x */ - -#ifndef _CSI_NN_DP1K_H -#define _CSI_NN_DP1K_H -#include "csi_nn.h" -#include "csi_utils.h" - -int csi_dp1k_add( - struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_dp1k_avgpool2d( - struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); - -int csi_dp1k_concat( - struct csi_tensor **input, - struct csi_tensor *output, - struct concat_params *params); - -int csi_dp1k_conv2d( - struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params); - -int csi_dp1k_deconv2d( - struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params); - -int csi_dp1k_fullyconnected( - struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, - struct fc_params *params); - -int csi_dp1k_leaky_relu( - struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); - -int csi_dp1k_maxpool2d( - struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); - -int csi_dp1k_prelu( - struct csi_tensor *input, - struct csi_tensor *alpha, - struct csi_tensor *output, - struct prelu_params *params); - -int csi_dp1k_mul( - struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_dp1k_relu( - struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); - -int csi_dp1k_reshape( - struct csi_tensor *input, - struct csi_tensor *output, - struct reshape_params *params); - -int csi_dp1k_resize( - struct csi_tensor *input, - struct csi_tensor *output, - struct resize_params *params); - -int csi_dp1k_sigmoid( - struct csi_tensor *input, - struct csi_tensor *output, - struct sigmoid_params *params); - -int csi_dp1k_softmax( - struct csi_tensor *input, - struct csi_tensor *output, - struct softmax_params *params); - -int csi_dp1k_transpose( - struct csi_tensor *input, - struct csi_tensor *output, - struct transpose_params *params); - -int csi_dp1k_strided_slice( - struct csi_tensor *input, - struct csi_tensor *output, - struct strided_slice_params *params); - -void csi_dp1k_input(struct csi_tensor *tensor, struct csi_session *sess); -void csi_dp1000_session_init(struct csi_session *sess); -void csi_dp1000_session_setup(struct csi_session *sess); -void csi_dp1000_set_input_number(int number, struct csi_session *sess); -void csi_dp1000_set_output_number(int number, struct csi_session *sess); -void csi_dp1000_set_input(int index, struct csi_tensor *input, struct csi_session *sess); -void csi_dp1000_set_output(int index, struct csi_tensor *output, struct csi_session *sess); - -typedef struct _csi_dp1000_target_data { - char* nb_model_path; -} csi_dp1000_target_data; - -struct csi_quant_info_dp1k -{ - int32_t zero_point; - float scale; - int32_t multiplier; - int32_t shift; - float min; - float max; -}; - -#define MAX_DIM 8 -struct csi_tensor_dp1k -{ - void *data; - int32_t dtype; - int32_t dim[MAX_DIM]; - int32_t dim_count; - char *name; - int32_t layout; - int32_t quant_channel; - struct csi_quant_info_dp1k *qinfo; - struct csi_session_dp1k *sess; -} __attribute__((packed)); - -#define CSINN_MAX_INPUT 4 -#define CSINN_MAX_OUTPUT 8 -struct csi_session_dp1k { - int32_t base_dtype; - int32_t base_layout; - int32_t base_api; - int32_t input_num; - int32_t output_num; - struct csi_tensor_dp1k *input[CSINN_MAX_INPUT]; - struct csi_tensor_dp1k *output[CSINN_MAX_OUTPUT]; - void *td; -}; - -struct ScaleZp_dp1k -{ - float scale; - int32_t zero_point; -}; - -struct conv2d_params_dp1k -{ - int (*bc)(); - int32_t layout; - int32_t api; - int32_t group; - int32_t stride_height; - int32_t stride_width; - int32_t pad_top; - int32_t pad_left; - int32_t pad_down; - int32_t pad_right; - int32_t dilation_height; - int32_t dilation_width; - char *name; - struct ScaleZp_dp1k *scale_zp; - struct - { - struct csi_tensor *kernel_tm; - int32_t conv_mode; - } conv_extra; -}; - -struct fc_params_dp1k -{ - int (*bc)(); - int32_t layout; - int32_t api; - char *name; - int32_t units; -}; - -struct pool_params_dp1k -{ - int (*bc)(); - int32_t layout; - int32_t api; - char *name; - int32_t pool_type; - int32_t filter_height; - int32_t filter_width; - int32_t filter_depth; - int32_t stride_height; - int32_t stride_width; - int32_t stride_depth; - int32_t pad_top; - int32_t pad_left; - int32_t pad_down; - int32_t pad_right; - int32_t pad_front; - int32_t pad_back; -}; - -struct sigmoid_params_dp1k -{ - int (*bc)(); - int32_t layout; - int32_t api; - char *name; -}; - -struct relu_params_dp1k -{ - int (*bc)(); - int32_t layout; - int32_t api; - char *name; - - /* n / alpha / threshold */ - float n; - int32_t n_multiplier; - int32_t n_shift; -}; - -struct prelu_params_dp1k -{ - int (*bc)(); - int32_t layout; - int32_t api; - char *name; - int32_t axis; -}; - -struct softmax_params_dp1k -{ - int (*bc)(); - int32_t layout; - int32_t api; - char *name; - int32_t axis; -}; - -struct diso_params_dp1k -{ - int (*bc)(); - int32_t layout; - int32_t api; - char *name; -}; - -struct transpose_params_dp1k -{ - int (*bc)(); - int32_t layout; - int32_t api; - char *name; - int32_t *permute; -}; - -struct concat_params_dp1k -{ - int (*bc)(); - int32_t layout; - int32_t api; - char *name; - int32_t inputs_count; - int32_t axis; -}; - -struct resize_params_dp1k -{ - int (*bc)(); - int32_t layout; - int32_t api; - char *name; - int32_t resize_mode; - bool align_corners; -}; - -struct reshape_params_dp1k -{ - int (*bc)(); - int32_t layout; - int32_t api; - char *name; -}; - -struct strided_slice_params_dp1k -{ - int (*bc)(); - int32_t layout; - int32_t api; - char *name; - int32_t *begin; - int32_t *end; - int32_t *stride; - int32_t slice_count; -}; - -extern int csi_dp1000_add( - struct csi_tensor_dp1k *input0, - struct csi_tensor_dp1k *input1, - struct csi_tensor_dp1k *output, - struct diso_params_dp1k *params); - -extern int csi_dp1000_avgpool2d( - struct csi_tensor_dp1k *input, - struct csi_tensor_dp1k *output, - struct pool_params_dp1k *params); - -extern int csi_dp1000_concat( - struct csi_tensor_dp1k **input, - struct csi_tensor_dp1k *output, - struct concat_params_dp1k *params); - -extern int csi_dp1000_conv2d( - struct csi_tensor_dp1k *input, - struct csi_tensor_dp1k *output, - struct csi_tensor_dp1k *kernel, - struct csi_tensor_dp1k *bias, - struct conv2d_params_dp1k *params); - -extern int csi_dp1000_deconv2d( - struct csi_tensor_dp1k *input, - struct csi_tensor_dp1k *output, - struct csi_tensor_dp1k *kernel, - struct csi_tensor_dp1k *bias, - struct conv2d_params_dp1k *params); - -extern int csi_dp1000_fullyconnected( - struct csi_tensor_dp1k *input, - struct csi_tensor_dp1k *output, - struct csi_tensor_dp1k *weights, - struct csi_tensor_dp1k *bias, - struct fc_params_dp1k *params); - -extern int csi_dp1000_leaky_relu( - struct csi_tensor_dp1k *input, - struct csi_tensor_dp1k *output, - struct relu_params_dp1k *params); - -extern int csi_dp1000_maxpool2d( - struct csi_tensor_dp1k *input, - struct csi_tensor_dp1k *output, - struct pool_params_dp1k *params); - -extern int csi_dp1000_prelu( - struct csi_tensor_dp1k *input, - struct csi_tensor_dp1k *alpha, - struct csi_tensor_dp1k *output, - struct prelu_params_dp1k *params); - -extern int csi_dp1000_mul( - struct csi_tensor_dp1k *input0, - struct csi_tensor_dp1k *input1, - struct csi_tensor_dp1k *output, - struct diso_params_dp1k *params); - -extern int csi_dp1000_relu( - struct csi_tensor_dp1k *input, - struct csi_tensor_dp1k *output, - struct relu_params_dp1k *params); - -extern int csi_dp1000_reshape( - struct csi_tensor_dp1k *input, - struct csi_tensor_dp1k *output, - struct reshape_params_dp1k *params); - -extern int csi_dp1000_resize( - struct csi_tensor_dp1k *input, - struct csi_tensor_dp1k *output, - struct resize_params_dp1k *params); - -extern int csi_dp1000_sigmoid( - struct csi_tensor_dp1k *input, - struct csi_tensor_dp1k *output, - struct sigmoid_params_dp1k *params); - -extern int csi_dp1000_softmax( - struct csi_tensor_dp1k *input, - struct csi_tensor_dp1k *output, - struct softmax_params_dp1k *params); - -extern int csi_dp1000_transpose( - struct csi_tensor_dp1k *input, - struct csi_tensor_dp1k *output, - struct transpose_params_dp1k *params); - -extern int csi_dp1000_strided_slice( - struct csi_tensor_dp1k *input, - struct csi_tensor_dp1k *output, - struct strided_slice_params_dp1k *params); - -extern void csi_dp1000_input(struct csi_tensor_dp1k *input); - -extern void csi_dp1000_model_setup(struct csi_session_dp1k *sess); - -#endif diff --git a/include/csi_e804.h b/include/csi_e804.h index bc6be3fa..a4a31413 100644 --- a/include/csi_e804.h +++ b/include/csi_e804.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,92 +16,69 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ -#ifndef _CSI_INTERNAL_E804_H -#define _CSI_INTERNAL_E804_H +#ifndef INCLUDE_CSI_E804_H_ +#define INCLUDE_CSI_E804_H_ +#include #include #include #include -#include + #include "csi_internal.h" #include "csi_ref.h" #include "csi_utils.h" -// #include "csi_math.h" -// #include "csi_nnfunctions.h" #include "csky_dsp2_nnfunctions.h" - -int csi_e804_conv2d_init_q7(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_e804_conv2d_init_q7(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_e804_conv2d_init_q15(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_e804_conv2d_init_q15(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_e804_depthwise_conv2d_init_q7(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_e804_depthwise_conv2d_init_q7(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_e804_avgpool2d_init_q7(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_e804_avgpool2d_init_q7(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_e804_maxpool2d_init_q7(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_e804_maxpool2d_init_q7(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_e804_fullyconnected_q7(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_e804_fullyconnected_q7(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_e804_fullyconnected_q15(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_e804_fullyconnected_q15(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_e804_softmax_q7(struct csi_tensor *input, - struct csi_tensor *output, +int csi_e804_softmax_q7(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params); -int csi_e804_softmax_q15(struct csi_tensor *input, - struct csi_tensor *output, +int csi_e804_softmax_q15(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params); -int csi_e804_relu_q7(struct csi_tensor *input, - struct csi_tensor *output, +int csi_e804_relu_q7(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_e804_relu_q15(struct csi_tensor *input, - struct csi_tensor *output, +int csi_e804_relu_q15(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_e804_sigmoid_q7(struct csi_tensor *input, - struct csi_tensor *output, +int csi_e804_sigmoid_q7(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params); -int csi_e804_sigmoid_q15(struct csi_tensor *input, - struct csi_tensor *output, +int csi_e804_sigmoid_q15(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params); -int csi_e804_tanh_q7(struct csi_tensor *input, - struct csi_tensor *output, +int csi_e804_tanh_q7(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_e804_tanh_q15(struct csi_tensor *input, - struct csi_tensor *output, +int csi_e804_tanh_q15(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); - -#endif +#endif // INCLUDE_CSI_E804_H_ diff --git a/include/csi_gref.h b/include/csi_gref.h index ce949fc1..18c68ac6 100644 --- a/include/csi_gref.h +++ b/include/csi_gref.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,748 +16,496 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ -#ifndef _CSI_NN_GREF_H -#define _CSI_NN_GREF_H +#ifndef INCLUDE_CSI_GREF_H_ +#define INCLUDE_CSI_GREF_H_ #include "csi_nn.h" -#include "csi_utils.h" #include "csi_node.h" +#include "csi_utils.h" -int csi_gref_acos(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_acos(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_acosh(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_acosh(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_cos(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_cos(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_cosh(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_cosh(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_asin(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_asin(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_asinh(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_asinh(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_tan(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_tan(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_atan(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_atan(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_atanh(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_atanh(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_threshold_relu(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_threshold_relu(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_gref_trunc(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_trunc(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_topk(struct csi_tensor *input, - struct csi_tensor *output1, - struct csi_tensor *output2, +int csi_gref_topk(struct csi_tensor *input, struct csi_tensor *output1, struct csi_tensor *output2, struct topk_params *params); -int csi_gref_cumprod(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_cumprod(struct csi_tensor *input, struct csi_tensor *output, struct cumprod_params *params); -int csi_gref_cumsum(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_cumsum(struct csi_tensor *input, struct csi_tensor *output, struct cumsum_params *params); -int csi_gref_conv2d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params); +int csi_gref_conv1d(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, + struct csi_tensor *bias, struct conv2d_params *params); + +int csi_gref_conv2d(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, + struct csi_tensor *bias, struct conv2d_params *params); -int csi_gref_depthwise_conv2d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_gref_depthwise_conv2d(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_gref_group_conv2d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_gref_group_conv2d(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_gref_conv2d_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_gref_conv2d_relu(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_gref_conv2d_relu6(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params); +int csi_gref_conv2d_relu6(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); -int csi_gref_conv3d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv3d_params *params); +int csi_gref_conv3d(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, + struct csi_tensor *bias, struct conv3d_params *params); -int csi_gref_deconv2d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_gref_deconv2d(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_gref_deconv3d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_gref_deconv3d(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv3d_params *params); -int csi_gref_depthwise_deconv2d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_gref_depthwise_deconv2d(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_gref_depthwise_conv2d_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_gref_depthwise_conv2d_relu(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_gref_depthwise_conv2d_relu6(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_gref_depthwise_conv2d_relu6(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_gref_fsmn(struct csi_tensor *frame, - struct csi_tensor *l_filter, - struct csi_tensor *r_filter, - struct csi_tensor *frame_sequence, - struct csi_tensor *frame_counter, - struct csi_tensor *output, +int csi_gref_fsmn(struct csi_tensor *frame, struct csi_tensor *l_filter, + struct csi_tensor *r_filter, struct csi_tensor *frame_sequence, + struct csi_tensor *frame_counter, struct csi_tensor *output, struct fsmn_params *params); -int csi_gref_fullyconnected(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_gref_fullyconnected(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_gref_fullyconnected_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_gref_fullyconnected_relu(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_gref_maxpool2d(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_gref_maxpool2d(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_gref_maxpool3d(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_maxpool3d(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_gref_avgpool2d(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_gref_avgpool2d(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_gref_avgpool3d(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_avgpool3d(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_gref_global_avgpool3d(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_global_avgpool3d(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_gref_global_avgpool2d(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_gref_global_avgpool2d(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_gref_global_maxpool2d(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_gref_global_maxpool2d(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_gref_l2pool(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_l2pool(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_gref_pool_with_argmax(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_pool_with_argmax(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_gref_maxpool2d_locat(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_maxpool2d_locat(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_gref_mod(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_gref_mod(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_gref_non_max_suppression(struct csi_tensor *input0, - struct csi_tensor *input1, +int csi_gref_non_max_suppression(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct non_max_suppression_params *params); -int csi_gref_unpooling(struct csi_tensor *input, - struct csi_tensor *mask, - struct csi_tensor *output, +int csi_gref_unpooling(struct csi_tensor *input, struct csi_tensor *mask, struct csi_tensor *output, struct unpooling_params *params); -int csi_gref_negative(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_negative(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_floor(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_floor(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_ceil(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_ceil(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_clip(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_clip(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_abs(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_abs(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_exp(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_exp(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_sin(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_sin(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_sinh(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_sinh(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_tanh(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_tanh(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_sqrt(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_sqrt(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_rsqrt(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_rsqrt(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_square(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_square(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_sigmoid(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_sigmoid(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params); -int csi_gref_softsign(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_softsign(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_space_to_batch_nd(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_space_to_batch_nd(struct csi_tensor *input, struct csi_tensor *output, struct space_to_batch_nd_params *params); -int csi_gref_elu(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_gref_elu(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_gref_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_gref_relu(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_gref_relu1(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_gref_relu1(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_gref_relu6(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_gref_relu6(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_gref_relun(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_gref_relun(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_gref_roi_align(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, +int csi_gref_roi_align(struct csi_tensor *data, struct csi_tensor *rois, struct csi_tensor *output, struct roi_align_params *params); -int csi_gref_roipool(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, +int csi_gref_roipool(struct csi_tensor *data, struct csi_tensor *rois, struct csi_tensor *output, struct roi_pool_params *params); -int csi_gref_round(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_round(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_leaky_relu(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_leaky_relu(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_gref_softrelu(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_softrelu(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_gref_prelu(struct csi_tensor *input, - struct csi_tensor *alpha, - struct csi_tensor *output, +int csi_gref_prelu(struct csi_tensor *input, struct csi_tensor *alpha, struct csi_tensor *output, struct prelu_params *params); -int csi_gref_softplus(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_softplus(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_softmax(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_softmax(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params); -int csi_gref_batch_normalization(struct csi_tensor *input, - struct csi_tensor *mean, - struct csi_tensor *variance, - struct csi_tensor *gamma, - struct csi_tensor *beta, - struct csi_tensor *output, +int csi_gref_batch_normalization(struct csi_tensor *input, struct csi_tensor *mean, + struct csi_tensor *variance, struct csi_tensor *gamma, + struct csi_tensor *beta, struct csi_tensor *output, struct bn_params *params); -int csi_gref_l2_normalization(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_l2_normalization(struct csi_tensor *input, struct csi_tensor *output, struct l2n_params *params); -int csi_gref_lrn(struct csi_tensor *input, - struct csi_tensor *output, - struct lrn_params *params); +int csi_gref_lrn(struct csi_tensor *input, struct csi_tensor *output, struct lrn_params *params); -int csi_gref_matmul(struct csi_tensor *mat0, - struct csi_tensor *mat1, - struct csi_tensor *output, +int csi_gref_matmul(struct csi_tensor *mat0, struct csi_tensor *mat1, struct csi_tensor *output, struct matmul_params *params); -int csi_gref_add(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_gref_add(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_gref_sub(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_gref_sub(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_gref_mul(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_gref_mul(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_gref_div(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_gref_div(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_gref_floor_divide(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_gref_floor_mod(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_gref_maximum(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_gref_minimum(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_gref_power(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_gref_floor_divide(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); + +int csi_gref_floor_mod(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); + +int csi_gref_maximum(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); + +int csi_gref_minimum(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); + +int csi_gref_power(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_gref_greater(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_gref_greater(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_gref_less(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_gref_less(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_gref_log_softmax(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_log_softmax(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params); -int csi_gref_log(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_log(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_log1p(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_log1p(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_equal(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_gref_equal(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_gref_not_equal(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_gref_not_equal(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_gref_not(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_not(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_reduce_logsumexp(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_reduce_logsumexp(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_gref_reduce_max(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_reduce_max(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_gref_reduce_mean(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_reduce_mean(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_gref_reduce_min(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_reduce_min(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_gref_reduce_prod(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_reduce_prod(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_gref_reduce_sum(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_reduce_sum(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_gref_greater_equal(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_gref_greater_equal(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_gref_less_equal(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_gref_less_equal(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_gref_select(struct csi_tensor *condition, - struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_gref_select(struct csi_tensor *condition, struct csi_tensor *input0, + struct csi_tensor *input1, struct csi_tensor *output, struct select_params *params); -int csi_gref_and(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_gref_and(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_gref_or(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_gref_or(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_gref_pad(struct csi_tensor *input, - struct csi_tensor *output, - struct pad_params *params); +int csi_gref_pad(struct csi_tensor *input, struct csi_tensor *output, struct pad_params *params); -int csi_gref_resize(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_resize(struct csi_tensor *input, struct csi_tensor *output, struct resize_params *params); -int csi_gref_concat(struct csi_tensor **input, - struct csi_tensor *output, +int csi_gref_concat(struct csi_tensor **input, struct csi_tensor *output, struct concat_params *params); -int csi_gref_proposal(struct csi_tensor *cls_prob, - struct csi_tensor *bbox_pred, - struct csi_tensor *im_info, - struct csi_tensor *output, +int csi_gref_proposal(struct csi_tensor *cls_prob, struct csi_tensor *bbox_pred, + struct csi_tensor *im_info, struct csi_tensor *output, struct proposal_params *params); -int csi_gref_psroipooling(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, - struct psroipooling_params *params); +int csi_gref_psroipooling(struct csi_tensor *data, struct csi_tensor *rois, + struct csi_tensor *output, struct psroipooling_params *params); -int csi_gref_transpose(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_transpose(struct csi_tensor *input, struct csi_tensor *output, struct transpose_params *params); -int csi_gref_reshape(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_reshape(struct csi_tensor *input, struct csi_tensor *output, struct reshape_params *params); -int csi_gref_shape(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_shape(struct csi_tensor *input, struct csi_tensor *output, struct shape_params *params); -int csi_gref_strided_slice(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_strided_slice(struct csi_tensor *input, struct csi_tensor *output, struct strided_slice_params *params); -int csi_gref_expand_dims(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_expand_dims(struct csi_tensor *input, struct csi_tensor *output, struct expand_dims_params *params); -int csi_gref_expm1(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_expm1(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_reverse(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_reverse(struct csi_tensor *input, struct csi_tensor *output, struct reverse_params *params); -int csi_gref_flatten(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_flatten(struct csi_tensor *input, struct csi_tensor *output, struct flatten_params *params); -int csi_gref_crop(struct csi_tensor *input, - struct csi_tensor *output, - struct crop_params *params); +int csi_gref_crop(struct csi_tensor *input, struct csi_tensor *output, struct crop_params *params); -int csi_gref_slice(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_slice(struct csi_tensor *input, struct csi_tensor *output, struct slice_params *params); -int csi_gref_split(struct csi_tensor *input, - struct csi_tensor **output, +int csi_gref_split(struct csi_tensor *input, struct csi_tensor **output, struct split_params *params); -int csi_gref_stack(struct csi_tensor **input, - struct csi_tensor *output, +int csi_gref_stack(struct csi_tensor **input, struct csi_tensor *output, struct stack_params *params); -int csi_gref_tile(struct csi_tensor *inputs, - struct csi_tensor *output, - struct tile_params *params); +int csi_gref_tile(struct csi_tensor *inputs, struct csi_tensor *output, struct tile_params *params); -int csi_gref_arange(struct csi_tensor *output, - struct arange_params *params); +int csi_gref_arange(struct csi_tensor *output, struct arange_params *params); -int csi_gref_where(struct csi_tensor *condition, - struct csi_tensor *x, - struct csi_tensor *y, - struct csi_tensor *output, - struct where_params *params); +int csi_gref_where(struct csi_tensor *condition, struct csi_tensor *x, struct csi_tensor *y, + struct csi_tensor *output, struct where_params *params); -int csi_gref_unstack(struct csi_tensor *input, - struct csi_tensor **output, +int csi_gref_unstack(struct csi_tensor *input, struct csi_tensor **output, struct unstack_params *params); -int csi_gref_gather(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *output, +int csi_gref_gather(struct csi_tensor *input, struct csi_tensor *indices, struct csi_tensor *output, struct gather_params *params); -int csi_gref_gather_nd(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *output, - struct gather_nd_params *params); +int csi_gref_gather_nd(struct csi_tensor *input, struct csi_tensor *indices, + struct csi_tensor *output, struct gather_nd_params *params); -int csi_gref_hard_sigmoid(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_hard_sigmoid(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params); -int csi_gref_isnan_bool(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_isnan_bool(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_logical_and(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_gref_logical_and(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_gref_logical_not(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_logical_not(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_logical_or(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_gref_logical_or(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_gref_logical_xor(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_gref_logical_xor(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_gref_squeeze(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_squeeze(struct csi_tensor *input, struct csi_tensor *output, struct squeeze_params *params); -int csi_gref_segment_max(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct segment_params *params); - -int csi_gref_segment_mean(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct segment_params *params); - -int csi_gref_segment_min(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct segment_params *params); - -int csi_gref_segment_prod(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct segment_params *params); - -int csi_gref_segment_sum(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct segment_params *params); - -int csi_gref_scatter_nd(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *updates, - struct csi_tensor *output, +int csi_gref_segment_max(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct segment_params *params); + +int csi_gref_segment_mean(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct segment_params *params); + +int csi_gref_segment_min(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct segment_params *params); + +int csi_gref_segment_prod(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct segment_params *params); + +int csi_gref_segment_sum(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct segment_params *params); + +int csi_gref_scatter_nd(struct csi_tensor *input, struct csi_tensor *indices, + struct csi_tensor *updates, struct csi_tensor *output, struct scatter_nd_params *params); -int csi_gref_shuffle_channel(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_shuffle_channel(struct csi_tensor *input, struct csi_tensor *output, struct shuffle_channel_params *params); -int csi_gref_sign(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_sign(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_ndarray_size(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_ndarray_size(struct csi_tensor *input, struct csi_tensor *output, struct ndarray_size_params *params); -int csi_gref_space_to_batch(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_space_to_batch(struct csi_tensor *input, struct csi_tensor *output, struct space_to_batch_params *params); -int csi_gref_batch_to_space(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_batch_to_space(struct csi_tensor *input, struct csi_tensor *output, struct batch_to_space_params *params); -int csi_gref_batch_to_space_nd(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_batch_to_space_nd(struct csi_tensor *input, struct csi_tensor *output, struct batch_to_space_nd_params *params); -int csi_gref_space_to_depth(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_space_to_depth(struct csi_tensor *input, struct csi_tensor *output, struct space_to_depth_params *params); -int csi_gref_depth_to_space(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_depth_to_space(struct csi_tensor *input, struct csi_tensor *output, struct depth_to_space_params *params); -int csi_gref_broadcast_to(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_broadcast_to(struct csi_tensor *input, struct csi_tensor *output, struct broadcast_to_params *params); -int csi_gref_one_hot(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_one_hot(struct csi_tensor *input, struct csi_tensor *output, struct one_hot_params *params); -int csi_gref_sequence_mask(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct sequence_mask_params *params); +int csi_gref_sequence_mask(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct sequence_mask_params *params); -int csi_gref_im2col(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_im2col(struct csi_tensor *input, struct csi_tensor *output, struct im2col_params *params); -int csi_gref_col2im(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, +int csi_gref_col2im(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, struct col2im_params *params); -int csi_gref_sum(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_gref_sum(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_gref_mean(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_mean(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_gref_max(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_gref_max(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_gref_min(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_gref_min(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_gref_prod(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_prod(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_gref_argmin(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_argmin(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_gref_argmax(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_argmax(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_gref_all(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_gref_all(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_gref_any(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_gref_any(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_gref_reorg(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_reorg(struct csi_tensor *input, struct csi_tensor *output, struct reorg_params *params); -int csi_gref_erf(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_gref_erf(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_gref_xor(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_gref_xor(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_gref_yuv_rgb_scale(struct csi_tensor *input, - struct csi_tensor *output, +int csi_gref_yuv_rgb_scale(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -struct csi_ref_graph -{ +int csi_gref_layer_norm(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *gamma, struct csi_tensor *beta, + struct layer_norm_params *params); + +int csi_gref_cache_matmul(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_matmul_params *params); + +int csi_gref_cache_conv1d(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_conv1d_params *params); + +struct csi_ref_graph { struct csi_node **input; struct csi_node **output; int input_num; @@ -767,30 +515,42 @@ struct csi_ref_graph int layer_index; }; -struct csi_gref_target_data -{ +struct csi_gref_target_data { struct csi_ref_graph *graph; }; struct csi_ref_graph *csi_gref_get_graph(struct csi_session *sess); int csi_gref_graph_insert(struct csi_node *node, struct csi_ref_graph *graph); -int csi_gref_siso_op(struct csi_tensor *input, struct csi_tensor *output, - int op, void *params); +void csi_gref_post_dfs(struct csi_ref_graph *graph, + void (*fvisit)(struct csi_ref_graph *, struct csi_node *)); +int csi_gref_is_root_node(struct csi_ref_graph *graph, struct csi_node *node); +struct csi_node *csi_gref_get_input_subgraph(struct csi_ref_graph *graph, struct csi_node *node, + int index); +void csi_gref_reset_graph_visit(struct csi_ref_graph *graph); +void csi_gref_update_input_output(struct csi_ref_graph *graph, int index); +int csi_gref_siso_op(struct csi_tensor *input, struct csi_tensor *output, int op, void *params); int csi_gref_diso_op(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, int op, void *params); int csi_gref_sidcso_op(struct csi_tensor *input, struct csi_tensor *output, - struct csi_tensor *const0, struct csi_tensor *const1, - int op, void *params); + struct csi_tensor *const0, struct csi_tensor *const1, int op, void *params); void csi_gref_set_tensor(struct csi_tensor *tensor, struct csi_session *sess); void csi_gref_set_const_tensor(struct csi_tensor *tensor, struct csi_session *sess); int csi_gref_get_tensor(int index, struct csi_tensor *ret, struct csi_session *sess); -void csi_gref_nbg(struct csi_tensor **input, struct csi_tensor **output, - uint32_t inputs_count, uint32_t outputs_count, const char *url); +void csi_gref_nbg(struct csi_tensor **input, struct csi_tensor **output, uint32_t inputs_count, + uint32_t outputs_count, const char *url); -void csi_subgraph_alloc(struct csi_node *node, struct csi_ref_graph *ograph, struct csi_ref_graph *ggraph); +void csi_subgraph_alloc(struct csi_node *node, struct csi_ref_graph *ograph, + struct csi_ref_graph *ggraph); int csi_subgraph_init(struct csi_node *n); int csi_subgraph_deinit(struct csi_node *n); int csi_subgraph_run_init(struct csi_node *n); int csi_subgraph_run(struct csi_node *n); int csi_subgraph_run_deinit(struct csi_node *n); -#endif + +struct csi_ref_graph *csi_subgraph_generate(struct csi_ref_graph *ograph); +struct csi_ref_graph *csi_subgraph_rebuild(struct csi_ref_graph *subgraph); +struct csi_ref_graph *csi_subgraph_topology_sort(struct csi_ref_graph *graph); +void csi_subgraph_fvisit_fuse(struct csi_ref_graph *graph, struct csi_node *node); +void csi_subgraph_fvisit_print(struct csi_ref_graph *graph, struct csi_node *node); +int csi_subgraph_get_device(struct csi_node *node); +#endif // INCLUDE_CSI_GREF_H_ diff --git a/include/csi_i805.h b/include/csi_i805.h index ff77dcf2..1586545e 100644 --- a/include/csi_i805.h +++ b/include/csi_i805.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,185 +16,131 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ -#ifndef _CSI_INTERNAL_I805_H -#define _CSI_INTERNAL_I805_H +#ifndef INCLUDE_CSI_I805_H_ +#define INCLUDE_CSI_I805_H_ +#include #include #include #include -#include + #include "csi_internal.h" #include "csi_ref.h" #include "csi_utils.h" -#include "csi_math.h" -// #include "csi_nnfunctions.h" -#include "csky_vdsp2_nnfunctions.h" #include "csi_i805_nnfunction.h" - -int csi_i805_conv2d_init_q7(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_i805_conv2d_init_q7(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_i805_conv2d_init_q15(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_i805_conv2d_init_q15(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_i805_depthwise_conv2d_init_q7(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_i805_depthwise_conv2d_init_q7(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_i805_avgpool2d_init_q7(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_i805_avgpool2d_init_q7(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_i805_maxpool2d_init_q7(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_i805_maxpool2d_init_q7(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_i805_fullyconnected_q7(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_i805_fullyconnected_q7(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_i805_fullyconnected_q15(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_i805_fullyconnected_q15(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_i805_softmax_q7(struct csi_tensor *input, - struct csi_tensor *output, +int csi_i805_softmax_q7(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params); -int csi_i805_softmax_q15(struct csi_tensor *input, - struct csi_tensor *output, +int csi_i805_softmax_q15(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params); -int csi_i805_relu_q7(struct csi_tensor *input, - struct csi_tensor *output, +int csi_i805_relu_q7(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_i805_relu_q15(struct csi_tensor *input, - struct csi_tensor *output, +int csi_i805_relu_q15(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_i805_sigmoid_q7(struct csi_tensor *input, - struct csi_tensor *output, +int csi_i805_sigmoid_q7(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params); -int csi_i805_sigmoid_q15(struct csi_tensor *input, - struct csi_tensor *output, +int csi_i805_sigmoid_q15(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params); -int csi_i805_tanh_q7(struct csi_tensor *input, - struct csi_tensor *output, +int csi_i805_tanh_q7(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_i805_tanh_q15(struct csi_tensor *input, - struct csi_tensor *output, +int csi_i805_tanh_q15(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); - /*********************** u8 asym quant opt func *********************************/ -int csi_i805_add_init_u8(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_i805_add_init_u8(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_i805_add_u8(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_i805_add_u8(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_i805_clip_init_u8(struct csi_tensor *input, - struct csi_tensor *output, +int csi_i805_clip_init_u8(struct csi_tensor *input, struct csi_tensor *output, struct clip_params *params); -int csi_i805_clip_u8(struct csi_tensor *input, - struct csi_tensor *output, +int csi_i805_clip_u8(struct csi_tensor *input, struct csi_tensor *output, struct clip_params *params); -int csi_i805_conv2d_init_u8(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_i805_conv2d_init_u8(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_i805_conv2d_u8(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_i805_conv2d_u8(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_i805_depthwise_conv2d_init_u8(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_i805_depthwise_conv2d_init_u8(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_i805_depthwise_conv2d_u8(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_i805_depthwise_conv2d_u8(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_i805_fullyconnected_init_u8(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_i805_fullyconnected_init_u8(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_i805_fullyconnected_u8(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_i805_fullyconnected_u8(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_i805_maxpool2d_u8(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_i805_maxpool2d_u8(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_i805_mul_init_u8(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_i805_mul_init_u8(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_i805_mul_u8(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_i805_mul_u8(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_i805_relu_init_u8(struct csi_tensor *input, - struct csi_tensor *output, +int csi_i805_relu_init_u8(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_i805_relu_u8(struct csi_tensor *input, - struct csi_tensor *output, +int csi_i805_relu_u8(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_i805_relu6_init_u8(struct csi_tensor *input, - struct csi_tensor *output, +int csi_i805_relu6_init_u8(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_i805_relu6_u8(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_i805_relu6_u8(struct csi_tensor *input, struct csi_tensor *output, + struct relu_params *params); -int csi_i805_reshape_u8(struct csi_tensor *input, - struct csi_tensor *output, +int csi_i805_reshape_u8(struct csi_tensor *input, struct csi_tensor *output, struct reshape_params *params); - - -#endif +#endif // INCLUDE_CSI_I805_H_ diff --git a/include/csi_internal.h b/include/csi_internal.h index 39ad572e..ddb3be7e 100644 --- a/include/csi_internal.h +++ b/include/csi_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,18 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ -#ifndef _CSI_INTERNAL_H -#define _CSI_INTERNAL_H +/* CSI-NN2 version 1.12.x */ +#ifndef INCLUDE_CSI_INTERNAL_H_ +#define INCLUDE_CSI_INTERNAL_H_ + +#include +#include +#include /* data type */ enum csinn_dtype_enum { CSINN_DTYPE_BOOL = 0, + CSINN_DTYPE_INT4, CSINN_DTYPE_UINT8, CSINN_DTYPE_INT8, CSINN_DTYPE_UINT16, @@ -30,20 +35,30 @@ enum csinn_dtype_enum { CSINN_DTYPE_UINT32, CSINN_DTYPE_INT32, CSINN_DTYPE_FLOAT16, + CSINN_DTYPE_BFLOAT16, CSINN_DTYPE_FLOAT32, CSINN_DTYPE_FLOAT64, CSINN_DTYPE_SIZE, }; -/* data type */ +/* data memory type */ +enum csinn_mem_type_enum { + CSINN_MEM_TYPE_CPU_NOT_ALIGNED = 0, + CSINN_MEM_TYPE_CPU_ALIGNED, + CSINN_MEM_TYPE_DMABUF, +}; + +/* quant type */ enum csinn_quant_enum { CSINN_QUANT_UNSET = 0, + CSINN_QUANT_INT4_SYM, CSINN_QUANT_UINT8_ASYM, CSINN_QUANT_UINT8_SYM, CSINN_QUANT_INT8_ASYM, CSINN_QUANT_INT8_SYM, CSINN_QUANT_INT16_SYM, CSINN_QUANT_FLOAT16, + CSINN_QUANT_BFLOAT16, CSINN_QUANT_FLOAT32, CSINN_QUANT_SIZE, }; @@ -62,7 +77,10 @@ enum csinn_api_enum { CSINN_I805, CSINN_E804, CSINN_REF_I805, + CSINN_C908, CSINN_TVMGEN, + CSINN_ASP, + CSINN_RVV, CSINN_API_SIZE, }; @@ -103,16 +121,20 @@ enum csinn_op_enum { CSINN_OP_BATCH_TO_SPACE, CSINN_OP_BATCH_TO_SPACE_ND, CSINN_OP_BROADCOST, + CSINN_OP_CACHE_MATMUL, + CSINN_OP_CACHE_CONV1D, CSINN_OP_CEIL, CSINN_OP_CLIP, CSINN_OP_COL2IM, CSINN_OP_CONCAT, + CSINN_OP_CONV1D, CSINN_OP_CONV2D, CSINN_OP_CONV2D_RELU, CSINN_OP_CONV2D_RELU6, CSINN_OP_CONV2D_CHANNEL, CSINN_OP_CONV2D_CHANNEL_RELU, CSINN_OP_CONV2D_CHANNEL_RELU6, + CSINN_OP_DATA_CONVERT, CSINN_OP_DEPTHWISE_CONV2D, CSINN_OP_DEPTHWISE_CONV2D_RELU, CSINN_OP_DEPTHWISE_CONV2D_RELU6, @@ -158,6 +180,7 @@ enum csinn_op_enum { CSINN_OP_ISNAN, CSINN_OP_L2N, CSINN_OP_L2POOL2D, + CSINN_OP_LAYER_NORM, CSINN_OP_LEAKY_RELU, CSINN_OP_LESS_EQUAL, CSINN_OP_LESS, @@ -334,6 +357,7 @@ enum csinn_layout_enum { CSINN_LAYOUT_OIW, CSINN_LAYOUT_OIHW, CSINN_LAYOUT_OIDHW, + CSINN_LAYOUT_O1HW, // depthwise kernel // NHWC // ACTIVITION @@ -344,7 +368,7 @@ enum csinn_layout_enum { CSINN_LAYOUT_OWI, CSINN_LAYOUT_OHWI, CSINN_LAYOUT_ODHWI, - + CSINN_LAYOUT_1HWO, // depthwise kernel }; enum csinn_status_enum { @@ -355,8 +379,12 @@ enum csinn_status_enum { CSINN_TRUE = 1, }; -struct csi_quant_info -{ +enum csinn_profiler_enum { + CSI_PROFILER_LEVEL_UNSET = 0, + CSI_PROFILER_LEVEL_TIMER, // print time +}; + +struct csi_quant_info { int32_t zero_point; float scale; int32_t multiplier; @@ -366,10 +394,10 @@ struct csi_quant_info }; #define MAX_DIM 8 -struct csi_tensor -{ +struct csi_tensor { void *data; enum csinn_dtype_enum dtype; + enum csinn_mem_type_enum mtype; int32_t dim[MAX_DIM]; int32_t dim_count; uint32_t is_const; @@ -380,20 +408,34 @@ struct csi_tensor struct csi_session *sess; }; -struct csi_scale_zp -{ +struct csi_session { + int32_t base_dtype; + int32_t base_layout; + int32_t base_api; + int32_t base_run_mode; + enum csinn_quant_enum base_quant_type; + char *model_name; + int32_t model_save; + int32_t debug_level; + int32_t profiler_level; + int32_t input_num; + int32_t output_num; + struct csi_tensor **input; + struct csi_tensor **output; + void *td; +}; + +struct csi_scale_zp { float scale; int32_t zero_point; }; -struct csi_min_max -{ +struct csi_min_max { float min; float max; }; -struct csi_params_base -{ +struct csi_params_base { int (*bc)(); char *name; int32_t layout; @@ -402,8 +444,7 @@ struct csi_params_base struct csi_session *sess; }; -struct fsmn_params -{ +struct fsmn_params { struct csi_params_base base; int32_t l_order; int32_t r_order; @@ -412,8 +453,7 @@ struct fsmn_params int32_t unavailable_frames; }; -struct conv2d_params -{ +struct conv2d_params { struct csi_params_base base; int32_t group; int32_t stride_height; @@ -424,15 +464,14 @@ struct conv2d_params int32_t pad_right; int32_t dilation_height; int32_t dilation_width; - struct - { + struct { struct csi_tensor *kernel_tm; enum csinn_conv_mode_enum conv_mode; + int32_t fuse_zp2bias; } conv_extra; }; -struct conv3d_params -{ +struct conv3d_params { struct csi_params_base base; int32_t group; int32_t stride_depth; @@ -452,14 +491,15 @@ struct conv3d_params int32_t out_pad_width; }; -struct fc_params -{ +struct fc_params { struct csi_params_base base; int32_t units; + struct { + int32_t fuse_zp2bias; + } fc_extra; }; -struct pool_params -{ +struct pool_params { struct csi_params_base base; int32_t pool_type; int32_t filter_height; @@ -478,8 +518,7 @@ struct pool_params bool count_include_pad; }; -struct unpooling_params -{ +struct unpooling_params { struct csi_params_base base; int32_t scale_height; int32_t scale_width; @@ -487,8 +526,7 @@ struct unpooling_params int32_t pad_out_width; }; -struct roi_align_params -{ +struct roi_align_params { struct csi_params_base base; int32_t pooled_size_h; int32_t pooled_size_w; @@ -498,8 +536,7 @@ struct roi_align_params int32_t sample_ratio; }; -struct roi_pool_params -{ +struct roi_pool_params { struct csi_params_base base; int32_t pooled_size_h; int32_t pooled_size_w; @@ -508,23 +545,19 @@ struct roi_pool_params int32_t spatial_scale_shift; }; -struct siso_params -{ +struct siso_params { struct csi_params_base base; }; -struct scatter_nd_params -{ +struct scatter_nd_params { struct csi_params_base base; }; -struct sigmoid_params -{ +struct sigmoid_params { struct csi_params_base base; }; -struct relu_params -{ +struct relu_params { struct csi_params_base base; /* n / alpha / threshold */ @@ -533,28 +566,24 @@ struct relu_params int32_t n_shift; }; -struct prelu_params -{ +struct prelu_params { struct csi_params_base base; int32_t axis; }; -struct softmax_params -{ +struct softmax_params { struct csi_params_base base; int32_t axis; }; -struct bn_params -{ +struct bn_params { struct csi_params_base base; float epsilon; int32_t epsilon_multiplier; int32_t epsilon_shift; }; -struct l2n_params -{ +struct l2n_params { struct csi_params_base base; float epsilon; int32_t epsilon_multiplier; @@ -563,8 +592,7 @@ struct l2n_params int32_t n; }; -struct lrn_params -{ +struct lrn_params { struct csi_params_base base; int32_t range; double bias; @@ -579,25 +607,21 @@ struct lrn_params enum csinn_lrn_enum norm_region; }; -struct matmul_params -{ +struct matmul_params { struct csi_params_base base; bool trans_a; bool trans_b; }; -struct diso_params -{ +struct diso_params { struct csi_params_base base; }; -struct select_params -{ +struct select_params { struct csi_params_base base; }; -struct pad_params -{ +struct pad_params { struct csi_params_base base; int32_t *pad_before; int32_t *pad_after; @@ -606,22 +630,19 @@ struct pad_params enum csinn_pad_enum pad_mode; }; -struct resize_params -{ +struct resize_params { struct csi_params_base base; enum csinn_resize_enum resize_mode; bool align_corners; }; -struct concat_params -{ +struct concat_params { struct csi_params_base base; int32_t inputs_count; int32_t axis; }; -struct proposal_params -{ +struct proposal_params { struct csi_params_base base; float *scales; int32_t *scale_multipliers; @@ -641,8 +662,7 @@ struct proposal_params bool iou_loss; }; -struct psroipooling_params -{ +struct psroipooling_params { struct csi_params_base base; int32_t output_dim; int32_t group_size; @@ -651,52 +671,44 @@ struct psroipooling_params int32_t spatial_scale_shift; }; -struct transpose_params -{ +struct transpose_params { struct csi_params_base base; int32_t *permute; int32_t permute_num; }; -struct reshape_params -{ +struct reshape_params { struct csi_params_base base; int32_t *shape; int32_t shape_num; }; -struct shape_params -{ +struct shape_params { struct csi_params_base base; }; -struct expand_dims_params -{ +struct expand_dims_params { struct csi_params_base base; int32_t axis; }; -struct reverse_params -{ +struct reverse_params { struct csi_params_base base; int32_t axis; }; -struct flatten_params -{ +struct flatten_params { struct csi_params_base base; }; -struct crop_params -{ +struct crop_params { struct csi_params_base base; int32_t axis; int32_t *offset; int32_t offset_num; }; -struct slice_params -{ +struct slice_params { struct csi_params_base base; int32_t *begin; int32_t *end; @@ -704,30 +716,26 @@ struct slice_params int32_t slice_num; }; -struct split_params -{ +struct split_params { struct csi_params_base base; int32_t *split_index; int32_t output_num; int32_t axis; }; -struct stack_params -{ +struct stack_params { struct csi_params_base base; int32_t inputs_count; int32_t axis; }; -struct tile_params -{ +struct tile_params { struct csi_params_base base; int32_t *reps; int32_t reps_num; }; -struct arange_params -{ +struct arange_params { struct csi_params_base base; float start; int32_t start_multiplier; @@ -740,49 +748,41 @@ struct arange_params int32_t step_shift; }; -struct where_params -{ +struct where_params { struct csi_params_base base; }; -struct unstack_params -{ +struct unstack_params { struct csi_params_base base; int32_t outputs_count; int32_t axis; }; -struct take_params -{ +struct take_params { struct csi_params_base base; int32_t axis; const char *mode; }; -struct gather_params -{ +struct gather_params { struct csi_params_base base; int32_t axis; }; -struct gather_nd_params -{ +struct gather_nd_params { struct csi_params_base base; }; -struct squeeze_params -{ +struct squeeze_params { struct csi_params_base base; int32_t *axis; int32_t axis_num; }; -struct ndarray_size_params -{ +struct ndarray_size_params { struct csi_params_base base; }; -struct space_to_batch_params -{ +struct space_to_batch_params { struct csi_params_base base; int32_t pad_top; int32_t pad_bottom; @@ -791,16 +791,14 @@ struct space_to_batch_params int32_t block_size; }; -struct space_to_batch_nd_params -{ +struct space_to_batch_nd_params { struct csi_params_base base; int32_t *paddings; int32_t *block_shape; int32_t spatial_dim_cnt; }; -struct batch_to_space_params -{ +struct batch_to_space_params { struct csi_params_base base; int32_t crop_top; int32_t crop_bottom; @@ -809,29 +807,25 @@ struct batch_to_space_params int32_t block_size; }; -struct batch_to_space_nd_params -{ +struct batch_to_space_nd_params { struct csi_params_base base; int32_t *crops; int32_t *block_shape; int32_t spatial_dim_cnt; }; -struct space_to_depth_params -{ +struct space_to_depth_params { struct csi_params_base base; int32_t block_size; }; -struct depth_to_space_params -{ +struct depth_to_space_params { struct csi_params_base base; enum csinn_depth2space_enum mode; int32_t block_size; }; -struct one_hot_params -{ +struct one_hot_params { struct csi_params_base base; float f_on_value; float f_off_value; @@ -841,8 +835,7 @@ struct one_hot_params int32_t axis; }; -struct sequence_mask_params -{ +struct sequence_mask_params { struct csi_params_base base; float mask_value; int32_t mask_value_multiplier; @@ -850,8 +843,7 @@ struct sequence_mask_params int32_t axis; }; -struct im2col_params -{ +struct im2col_params { struct csi_params_base base; int32_t pad_top; int32_t pad_down; @@ -863,8 +855,7 @@ struct im2col_params int32_t kernel_w; }; -struct col2im_params -{ +struct col2im_params { struct csi_params_base base; int32_t pad_h; int32_t pad_w; @@ -872,8 +863,7 @@ struct col2im_params int32_t stride_w; }; -struct reduce_params -{ +struct reduce_params { struct csi_params_base base; int32_t *out_strides; int32_t *out_extents; @@ -887,49 +877,42 @@ struct reduce_params bool keepdims; }; -struct reorg_params -{ +struct reorg_params { struct csi_params_base base; int32_t stride; }; -struct segment_params -{ +struct segment_params { struct csi_params_base base; int32_t num_segments; bool unsorted; }; -struct cumsum_params -{ +struct cumsum_params { struct csi_params_base base; int32_t axis; bool exclusive; }; -struct cumprod_params -{ +struct cumprod_params { struct csi_params_base base; int32_t axis; bool exclusive; }; -struct broadcast_to_params -{ +struct broadcast_to_params { struct csi_params_base base; int32_t *shape; int32_t shape_count; }; -struct clip_params -{ +struct clip_params { struct csi_params_base base; float min_value; float max_value; }; -struct strided_slice_params -{ +struct strided_slice_params { struct csi_params_base base; int32_t *begin; int32_t *end; @@ -937,24 +920,76 @@ struct strided_slice_params int32_t slice_count; }; -struct shuffle_channel_params -{ +struct shuffle_channel_params { struct csi_params_base base; int32_t group; }; -struct topk_params -{ +struct topk_params { struct csi_params_base base; int32_t k; }; -struct non_max_suppression_params -{ +struct non_max_suppression_params { struct csi_params_base base; int32_t max_output_size; float iou_threshold; // float score_threshold; }; -#endif +// modyfied to use asr model +struct layer_norm_params { + struct csi_params_base base; + float epsilon; + bool center; + bool scale; + int32_t axis; +}; + +struct asr_buffer_t { + size_t writer_index; + size_t buffer_lenth; // lenth of buffer + size_t data_lenth; // lenth of data + uint8_t *buffer; + uint8_t flag; +}; + +struct cache_matmul_params { + struct csi_params_base base; + struct asr_buffer_t asr_buffer; + int32_t *cache_shape; + int32_t *shape; + int32_t *axes; + void *data; +}; + +struct cache_conv1d_params { + struct csi_params_base base; + struct asr_buffer_t asr_buffer; + int32_t *cache_shape; + int32_t *in_shape; + int32_t group; + int32_t stride_width; + int32_t dilation_width; + int32_t pad_left; + int32_t pad_right; + void *data; +}; + +struct conv1d_params { + struct csi_params_base base; + int32_t group; + int32_t stride_width; + int32_t dilation_width; + int32_t pad_left; + int32_t pad_right; +}; + +struct csi_bc_op_list { + struct csi_bc_op_list *next; + enum csinn_dtype_enum dtype; + enum csinn_op_enum op_name; + void *bc; +}; + +#endif // INCLUDE_CSI_INTERNAL_H_ diff --git a/include/csi_memory.h b/include/csi_memory.h index 86dd1e10..26cae17f 100644 --- a/include/csi_memory.h +++ b/include/csi_memory.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,12 +16,15 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ -#ifndef _CSI_MEMORY_H -#define _CSI_MEMORY_H +/* CSI-NN2 version 1.12.x */ +#ifndef INCLUDE_CSI_MEMORY_H_ +#define INCLUDE_CSI_MEMORY_H_ void csi_mem_print_map(); void *csi_mem_alloc(int64_t size); +void *csi_mem_alloc_aligned(int64_t size, int aligned_bytes); +void *csi_mem_calloc(size_t nmemb, size_t size); +void *csi_mem_realloc(void *ptr, size_t size); void csi_mem_free(void *ptr); -#endif +#endif // INCLUDE_CSI_MEMORY_H_ diff --git a/include/csi_nn.h b/include/csi_nn.h index 1fd104c9..ca7de6df 100644 --- a/include/csi_nn.h +++ b/include/csi_nn.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,1413 +16,903 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ -#ifndef _CSI_NN_H -#define _CSI_NN_H +#ifndef INCLUDE_CSI_NN_H_ +#define INCLUDE_CSI_NN_H_ +#include #include #include #include -#include -#include "csi_internal.h" -#include "csi_utils.h" + #include "csi_debug.h" +#include "csi_internal.h" #include "csi_memory.h" +#include "csi_utils.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int csi_conv2d_init(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, + struct csi_tensor *bias, struct conv2d_params *params); -int csi_conv2d_init(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params); - -int csi_conv2d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params); - -int csi_conv2d_relu_init(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_conv2d(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, + struct csi_tensor *bias, struct conv2d_params *params); + +int csi_conv2d_relu_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_conv2d_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params); +int csi_conv2d_relu(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, + struct csi_tensor *bias, struct conv2d_params *params); -int csi_conv2d_relu6_init(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_conv2d_relu6_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_conv2d_relu6(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params); +int csi_conv2d_relu6(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, + struct csi_tensor *bias, struct conv2d_params *params); -int csi_deconv2d_init(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_deconv2d_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_deconv2d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params); - -int csi_conv3d_init(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv3d_params *params); - -int csi_conv3d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv3d_params *params); - -int csi_deconv3d_init(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_deconv2d(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, + struct csi_tensor *bias, struct conv2d_params *params); + +int csi_conv3d_init(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, + struct csi_tensor *bias, struct conv3d_params *params); + +int csi_conv3d(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, + struct csi_tensor *bias, struct conv3d_params *params); + +int csi_deconv3d_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv3d_params *params); -int csi_deconv3d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv3d_params *params); - -int csi_fsmn_init(struct csi_tensor *frame, - struct csi_tensor *l_filter, - struct csi_tensor *r_filter, - struct csi_tensor *frame_sequence, - struct csi_tensor *frame_counter, - struct csi_tensor *output, +int csi_deconv3d(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, + struct csi_tensor *bias, struct conv3d_params *params); + +int csi_fsmn_init(struct csi_tensor *frame, struct csi_tensor *l_filter, + struct csi_tensor *r_filter, struct csi_tensor *frame_sequence, + struct csi_tensor *frame_counter, struct csi_tensor *output, struct fsmn_params *params); -int csi_fsmn(struct csi_tensor *frame, - struct csi_tensor *l_filter, - struct csi_tensor *r_filter, - struct csi_tensor *frame_sequence, - struct csi_tensor *frame_counter, - struct csi_tensor *output, - struct fsmn_params *params); - -int csi_fullyconnected_init(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_fsmn(struct csi_tensor *frame, struct csi_tensor *l_filter, struct csi_tensor *r_filter, + struct csi_tensor *frame_sequence, struct csi_tensor *frame_counter, + struct csi_tensor *output, struct fsmn_params *params); + +int csi_fullyconnected_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_fullyconnected(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_fullyconnected(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_fullyconnected_relu_init(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_fullyconnected_relu_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_fullyconnected_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_fullyconnected_relu(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_maxpool2d_init(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_maxpool2d_init(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_maxpool2d(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_maxpool2d(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_maxpool3d_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_maxpool3d_init(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_maxpool3d(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); - -int csi_global_maxpool2d_init(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_maxpool3d(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_global_maxpool2d(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_global_maxpool2d_init(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_avgpool2d_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_global_maxpool2d(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_avgpool2d(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_avgpool2d_init(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_avgpool2d(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_avgpool3d_init(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_avgpool3d_init(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_avgpool3d(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_avgpool3d(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_global_avgpool2d_init(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_global_avgpool2d_init(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_global_avgpool2d(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_global_avgpool2d(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_l2pool_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_l2pool_init(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_l2pool(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_l2pool(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_pool_with_argmax_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_pool_with_argmax_init(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_pool_with_argmax(struct csi_tensor *input, - struct csi_tensor *output, +int csi_pool_with_argmax(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_maxpool2d_locat_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_maxpool2d_locat_init(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_maxpool2d_locat(struct csi_tensor *input, - struct csi_tensor *output, +int csi_maxpool2d_locat(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_unpooling_init(struct csi_tensor *input, - struct csi_tensor *mask, - struct csi_tensor *output, +int csi_unpooling_init(struct csi_tensor *input, struct csi_tensor *mask, struct csi_tensor *output, struct unpooling_params *params); -int csi_unpooling(struct csi_tensor *input, - struct csi_tensor *mask, - struct csi_tensor *output, +int csi_unpooling(struct csi_tensor *input, struct csi_tensor *mask, struct csi_tensor *output, struct unpooling_params *params); -int csi_roi_align_init(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, +int csi_roi_align_init(struct csi_tensor *data, struct csi_tensor *rois, struct csi_tensor *output, struct roi_align_params *params); -int csi_roi_align(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, +int csi_roi_align(struct csi_tensor *data, struct csi_tensor *rois, struct csi_tensor *output, struct roi_align_params *params); -int csi_negative_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_negative_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_negative(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_negative(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_floor_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_floor_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_floor(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_floor(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ceil_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_ceil_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ceil(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_ceil(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_sign_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_sign_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_sign(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_sign(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_trunc_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_trunc_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_trunc(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_trunc(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_round_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_round_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_round(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_round(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_abs_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_abs_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_abs(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_abs(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_isnan_bool_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_isnan_bool_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_isnan_bool(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_isnan_bool(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_exp_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_exp_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_exp(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_exp(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_expm1_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_expm1_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_expm1(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_expm1(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_sin_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_sin_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_sin(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_sin(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_cos_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_cos_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_cos(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_cos(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_tanh_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_tanh_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_tanh(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_tanh(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_log_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_log_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_log(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_log(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_sqrt_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_sqrt_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_sqrt(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_sqrt(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_rsqrt_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_rsqrt_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_rsqrt(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_rsqrt(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_square_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_square_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_square(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_square(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_sigmoid_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_sigmoid_init(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params); -int csi_sigmoid(struct csi_tensor *input, - struct csi_tensor *output, - struct sigmoid_params *params); +int csi_sigmoid(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params); -int csi_hard_sigmoid_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_hard_sigmoid_init(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params); -int csi_hard_sigmoid(struct csi_tensor *input, - struct csi_tensor *output, +int csi_hard_sigmoid(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params); -int csi_elu_init(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_elu_init(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_elu(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_elu(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_relu_init(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_relu_init(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_relu(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_relu1_init(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_relu1_init(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_relu1(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_relu1(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_relu6_init(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_relu6_init(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_relu6(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_relu6(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_relun_init(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_relun_init(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_relun(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_relun(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_leaky_relu_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_leaky_relu_init(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_leaky_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_leaky_relu(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_softrelu_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_softrelu_init(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_softrelu(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_softrelu(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_prelu_init(struct csi_tensor *input, - struct csi_tensor *alpha, - struct csi_tensor *output, +int csi_prelu_init(struct csi_tensor *input, struct csi_tensor *alpha, struct csi_tensor *output, struct prelu_params *params); -int csi_prelu(struct csi_tensor *input, - struct csi_tensor *alpha, - struct csi_tensor *output, +int csi_prelu(struct csi_tensor *input, struct csi_tensor *alpha, struct csi_tensor *output, struct prelu_params *params); -int csi_softplus_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_softplus_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_softplus(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_softplus(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_softmax_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_softmax_init(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params); -int csi_softmax(struct csi_tensor *input, - struct csi_tensor *output, - struct softmax_params *params); +int csi_softmax(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params); -int csi_log_softmax_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_log_softmax_init(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params); -int csi_log_softmax(struct csi_tensor *input, - struct csi_tensor *output, +int csi_log_softmax(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params); -int csi_batch_normalization_init(struct csi_tensor *input, - struct csi_tensor *mean, - struct csi_tensor *variance, - struct csi_tensor *gamma, - struct csi_tensor *beta, - struct csi_tensor *output, +int csi_batch_normalization_init(struct csi_tensor *input, struct csi_tensor *mean, + struct csi_tensor *variance, struct csi_tensor *gamma, + struct csi_tensor *beta, struct csi_tensor *output, struct bn_params *params); -int csi_batch_normalization(struct csi_tensor *input, - struct csi_tensor *mean, - struct csi_tensor *variance, - struct csi_tensor *gamma, - struct csi_tensor *beta, - struct csi_tensor *output, +int csi_batch_normalization(struct csi_tensor *input, struct csi_tensor *mean, + struct csi_tensor *variance, struct csi_tensor *gamma, + struct csi_tensor *beta, struct csi_tensor *output, struct bn_params *params); -int csi_l2_normalization_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_l2_normalization_init(struct csi_tensor *input, struct csi_tensor *output, struct l2n_params *params); -int csi_l2_normalization(struct csi_tensor *input, - struct csi_tensor *output, +int csi_l2_normalization(struct csi_tensor *input, struct csi_tensor *output, struct l2n_params *params); -int csi_lrn_init(struct csi_tensor *input, - struct csi_tensor *output, - struct lrn_params *params); +int csi_lrn_init(struct csi_tensor *input, struct csi_tensor *output, struct lrn_params *params); -int csi_lrn(struct csi_tensor *input, - struct csi_tensor *output, - struct lrn_params *params); +int csi_lrn(struct csi_tensor *input, struct csi_tensor *output, struct lrn_params *params); -int csi_matmul_init(struct csi_tensor *mat0, - struct csi_tensor *mat1, - struct csi_tensor *output, +int csi_matmul_init(struct csi_tensor *mat0, struct csi_tensor *mat1, struct csi_tensor *output, struct matmul_params *params); -int csi_matmul(struct csi_tensor *mat0, - struct csi_tensor *mat1, - struct csi_tensor *output, +int csi_matmul(struct csi_tensor *mat0, struct csi_tensor *mat1, struct csi_tensor *output, struct matmul_params *params); -int csi_add_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_add_init(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_add(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_add(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_sub_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_sub_init(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_sub(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_sub(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_mul_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_mul_init(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_mul(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_mul(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_div_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_div_init(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_div(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_div(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_floor_divide_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_floor_divide_init(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_floor_divide(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_floor_divide(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_floor_mod_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_floor_mod_init(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_floor_mod(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_floor_mod(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_mod_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_mod_init(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_mod(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_mod(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_maximum_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_maximum_init(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_maximum(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_maximum(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_minimum_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_minimum_init(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_minimum(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_minimum(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_power_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_power_init(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_power(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_power(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_greater_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_greater_init(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_greater(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_greater(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_less_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_less_init(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_less(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_less(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_logical_and_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_logical_and_init(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_logical_and(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_logical_and(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_logical_or_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_logical_or_init(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_logical_or(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_logical_or(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_logical_not_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_logical_not_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_logical_not(struct csi_tensor *input, - struct csi_tensor *output, +int csi_logical_not(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_logical_xor_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_logical_xor_init(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_logical_xor(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_logical_xor(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_equal_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_equal_init(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_equal(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_equal(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_not_equal_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_not_equal_init(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_not_equal(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_not_equal(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_greater_equal_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_greater_equal_init(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_greater_equal(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_greater_equal(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_less_equal_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_less_equal_init(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_less_equal(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_less_equal(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_select_init(struct csi_tensor *condition, - struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_select_init(struct csi_tensor *condition, struct csi_tensor *input0, + struct csi_tensor *input1, struct csi_tensor *output, struct select_params *params); -int csi_select(struct csi_tensor *condition, - struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct select_params *params); +int csi_select(struct csi_tensor *condition, struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct select_params *params); -int csi_and_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_and_init(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_and(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_and(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_or_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_or_init(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_or(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_or(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_xor_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_xor_init(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_xor(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_xor(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_not_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_not_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_not(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_not(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_pad_init(struct csi_tensor *input, - struct csi_tensor *output, - struct pad_params *params); +int csi_pad_init(struct csi_tensor *input, struct csi_tensor *output, struct pad_params *params); -int csi_pad(struct csi_tensor *input, - struct csi_tensor *output, - struct pad_params *params); +int csi_pad(struct csi_tensor *input, struct csi_tensor *output, struct pad_params *params); -int csi_resize_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_resize_init(struct csi_tensor *input, struct csi_tensor *output, struct resize_params *params); -int csi_resize(struct csi_tensor *input, - struct csi_tensor *output, - struct resize_params *params); +int csi_resize(struct csi_tensor *input, struct csi_tensor *output, struct resize_params *params); -int csi_concat_init(struct csi_tensor **input, - struct csi_tensor *output, +int csi_concat_init(struct csi_tensor **input, struct csi_tensor *output, struct concat_params *params); -int csi_concat(struct csi_tensor **input, - struct csi_tensor *output, - struct concat_params *params); +int csi_concat(struct csi_tensor **input, struct csi_tensor *output, struct concat_params *params); -int csi_proposal_init(struct csi_tensor *cls_prob, - struct csi_tensor *bbox_pred, - struct csi_tensor *im_info, - struct csi_tensor *output, +int csi_proposal_init(struct csi_tensor *cls_prob, struct csi_tensor *bbox_pred, + struct csi_tensor *im_info, struct csi_tensor *output, struct proposal_params *params); -int csi_proposal(struct csi_tensor *cls_prob, - struct csi_tensor *bbox_pred, - struct csi_tensor *im_info, - struct csi_tensor *output, +int csi_proposal(struct csi_tensor *cls_prob, struct csi_tensor *bbox_pred, + struct csi_tensor *im_info, struct csi_tensor *output, struct proposal_params *params); -int csi_psroipooling_init(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, - struct psroipooling_params *params); +int csi_psroipooling_init(struct csi_tensor *data, struct csi_tensor *rois, + struct csi_tensor *output, struct psroipooling_params *params); -int csi_psroipooling(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, +int csi_psroipooling(struct csi_tensor *data, struct csi_tensor *rois, struct csi_tensor *output, struct psroipooling_params *params); -int csi_transpose_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_transpose_init(struct csi_tensor *input, struct csi_tensor *output, struct transpose_params *params); -int csi_transpose(struct csi_tensor *input, - struct csi_tensor *output, +int csi_transpose(struct csi_tensor *input, struct csi_tensor *output, struct transpose_params *params); -int csi_reshape_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_reshape_init(struct csi_tensor *input, struct csi_tensor *output, struct reshape_params *params); -int csi_reshape(struct csi_tensor *input, - struct csi_tensor *output, - struct reshape_params *params); +int csi_reshape(struct csi_tensor *input, struct csi_tensor *output, struct reshape_params *params); -int csi_shape_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_shape_init(struct csi_tensor *input, struct csi_tensor *output, struct shape_params *params); -int csi_shape(struct csi_tensor *input, - struct csi_tensor *output, - struct shape_params *params); +int csi_shape(struct csi_tensor *input, struct csi_tensor *output, struct shape_params *params); -int csi_expand_dims_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_expand_dims_init(struct csi_tensor *input, struct csi_tensor *output, struct expand_dims_params *params); -int csi_expand_dims(struct csi_tensor *input, - struct csi_tensor *output, +int csi_expand_dims(struct csi_tensor *input, struct csi_tensor *output, struct expand_dims_params *params); -int csi_reverse_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_reverse_init(struct csi_tensor *input, struct csi_tensor *output, struct reverse_params *params); -int csi_reverse(struct csi_tensor *input, - struct csi_tensor *output, - struct reverse_params *params); +int csi_reverse(struct csi_tensor *input, struct csi_tensor *output, struct reverse_params *params); -int csi_flatten_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_flatten_init(struct csi_tensor *input, struct csi_tensor *output, struct flatten_params *params); -int csi_flatten(struct csi_tensor *input, - struct csi_tensor *output, - struct flatten_params *params); +int csi_flatten(struct csi_tensor *input, struct csi_tensor *output, struct flatten_params *params); -int csi_crop_init(struct csi_tensor *input, - struct csi_tensor *output, - struct crop_params *params); +int csi_crop_init(struct csi_tensor *input, struct csi_tensor *output, struct crop_params *params); -int csi_crop(struct csi_tensor *input, - struct csi_tensor *output, - struct crop_params *params); +int csi_crop(struct csi_tensor *input, struct csi_tensor *output, struct crop_params *params); -int csi_slice_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_slice_init(struct csi_tensor *input, struct csi_tensor *output, struct slice_params *params); -int csi_slice(struct csi_tensor *input, - struct csi_tensor *output, - struct slice_params *params); +int csi_slice(struct csi_tensor *input, struct csi_tensor *output, struct slice_params *params); -int csi_split_init(struct csi_tensor *input, - struct csi_tensor **output, +int csi_split_init(struct csi_tensor *input, struct csi_tensor **output, struct split_params *params); -int csi_split(struct csi_tensor *input, - struct csi_tensor **output, - struct split_params *params); +int csi_split(struct csi_tensor *input, struct csi_tensor **output, struct split_params *params); -int csi_stack_init(struct csi_tensor **inputs, - struct csi_tensor *output, +int csi_stack_init(struct csi_tensor **inputs, struct csi_tensor *output, struct stack_params *params); -int csi_stack(struct csi_tensor **inputs, - struct csi_tensor *output, - struct stack_params *params); +int csi_stack(struct csi_tensor **inputs, struct csi_tensor *output, struct stack_params *params); -int csi_unstack_init(struct csi_tensor *input, - struct csi_tensor **output, +int csi_unstack_init(struct csi_tensor *input, struct csi_tensor **output, struct unstack_params *params); -int csi_unstack(struct csi_tensor *input, - struct csi_tensor **output, +int csi_unstack(struct csi_tensor *input, struct csi_tensor **output, struct unstack_params *params); -int csi_tile_init(struct csi_tensor *inputs, - struct csi_tensor *output, - struct tile_params *params); +int csi_tile_init(struct csi_tensor *inputs, struct csi_tensor *output, struct tile_params *params); -int csi_tile(struct csi_tensor *inputs, - struct csi_tensor *output, - struct tile_params *params); +int csi_tile(struct csi_tensor *inputs, struct csi_tensor *output, struct tile_params *params); -int csi_arange_init(struct csi_tensor *output, - struct arange_params *params); +int csi_arange_init(struct csi_tensor *output, struct arange_params *params); -int csi_arange(struct csi_tensor *output, - struct arange_params *params); +int csi_arange(struct csi_tensor *output, struct arange_params *params); -int csi_where_init(struct csi_tensor *condition, - struct csi_tensor *x, - struct csi_tensor *y, - struct csi_tensor *output, - struct where_params *params); +int csi_where_init(struct csi_tensor *condition, struct csi_tensor *x, struct csi_tensor *y, + struct csi_tensor *output, struct where_params *params); -int csi_where(struct csi_tensor *condition, - struct csi_tensor *x, - struct csi_tensor *y, - struct csi_tensor *output, - struct where_params *params); +int csi_where(struct csi_tensor *condition, struct csi_tensor *x, struct csi_tensor *y, + struct csi_tensor *output, struct where_params *params); -int csi_gather_init(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *output, +int csi_gather_init(struct csi_tensor *input, struct csi_tensor *indices, struct csi_tensor *output, struct gather_params *params); -int csi_gather(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *output, +int csi_gather(struct csi_tensor *input, struct csi_tensor *indices, struct csi_tensor *output, struct gather_params *params); -int csi_gather_nd_init(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *output, - struct gather_nd_params *params); +int csi_gather_nd_init(struct csi_tensor *input, struct csi_tensor *indices, + struct csi_tensor *output, struct gather_nd_params *params); -int csi_gather_nd(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *output, +int csi_gather_nd(struct csi_tensor *input, struct csi_tensor *indices, struct csi_tensor *output, struct gather_nd_params *params); -int csi_squeeze_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_squeeze_init(struct csi_tensor *input, struct csi_tensor *output, struct squeeze_params *params); -int csi_squeeze(struct csi_tensor *input, - struct csi_tensor *output, - struct squeeze_params *params); +int csi_squeeze(struct csi_tensor *input, struct csi_tensor *output, struct squeeze_params *params); -int csi_ndarray_size_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ndarray_size_init(struct csi_tensor *input, struct csi_tensor *output, struct ndarray_size_params *params); -int csi_ndarray_size(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ndarray_size(struct csi_tensor *input, struct csi_tensor *output, struct ndarray_size_params *params); -int csi_space_to_batch_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_space_to_batch_init(struct csi_tensor *input, struct csi_tensor *output, struct space_to_batch_params *params); -int csi_space_to_batch(struct csi_tensor *input, - struct csi_tensor *output, +int csi_space_to_batch(struct csi_tensor *input, struct csi_tensor *output, struct space_to_batch_params *params); -int csi_space_to_batch_nd_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_space_to_batch_nd_init(struct csi_tensor *input, struct csi_tensor *output, struct space_to_batch_nd_params *params); -int csi_space_to_batch_nd(struct csi_tensor *input, - struct csi_tensor *output, +int csi_space_to_batch_nd(struct csi_tensor *input, struct csi_tensor *output, struct space_to_batch_nd_params *params); -int csi_batch_to_space_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_batch_to_space_init(struct csi_tensor *input, struct csi_tensor *output, struct batch_to_space_params *params); -int csi_batch_to_space(struct csi_tensor *input, - struct csi_tensor *output, +int csi_batch_to_space(struct csi_tensor *input, struct csi_tensor *output, struct batch_to_space_params *params); -int csi_batch_to_space_nd_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_batch_to_space_nd_init(struct csi_tensor *input, struct csi_tensor *output, struct batch_to_space_nd_params *params); -int csi_batch_to_space_nd(struct csi_tensor *input, - struct csi_tensor *output, +int csi_batch_to_space_nd(struct csi_tensor *input, struct csi_tensor *output, struct batch_to_space_nd_params *params); -int csi_space_to_depth_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_space_to_depth_init(struct csi_tensor *input, struct csi_tensor *output, struct space_to_depth_params *params); -int csi_space_to_depth(struct csi_tensor *input, - struct csi_tensor *output, +int csi_space_to_depth(struct csi_tensor *input, struct csi_tensor *output, struct space_to_depth_params *params); -int csi_depth_to_space_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_depth_to_space_init(struct csi_tensor *input, struct csi_tensor *output, struct depth_to_space_params *params); -int csi_depth_to_space(struct csi_tensor *input, - struct csi_tensor *output, +int csi_depth_to_space(struct csi_tensor *input, struct csi_tensor *output, struct depth_to_space_params *params); -int csi_one_hot_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_one_hot_init(struct csi_tensor *input, struct csi_tensor *output, struct one_hot_params *params); -int csi_one_hot(struct csi_tensor *input, - struct csi_tensor *output, - struct one_hot_params *params); +int csi_one_hot(struct csi_tensor *input, struct csi_tensor *output, struct one_hot_params *params); -int csi_sequence_mask_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct sequence_mask_params *params); +int csi_sequence_mask_init(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct sequence_mask_params *params); -int csi_sequence_mask(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct sequence_mask_params *params); +int csi_sequence_mask(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct sequence_mask_params *params); -int csi_im2col_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_im2col_init(struct csi_tensor *input, struct csi_tensor *output, struct im2col_params *params); -int csi_im2col(struct csi_tensor *input, - struct csi_tensor *output, - struct im2col_params *params); +int csi_im2col(struct csi_tensor *input, struct csi_tensor *output, struct im2col_params *params); -int csi_col2im_init(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, +int csi_col2im_init(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, struct col2im_params *params); -int csi_col2im(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, +int csi_col2im(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, struct col2im_params *params); -int csi_sum_init(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_sum_init(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_sum(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_sum(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_mean_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_mean_init(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_mean(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_mean(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_max_init(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_max_init(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_max(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_max(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_min_init(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_min_init(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_min(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_min(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_prod_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_prod_init(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_prod(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_prod(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_argmin_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_argmin_init(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_argmin(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_argmin(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_argmax_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_argmax_init(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_argmax(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_argmax(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_all_init(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_all_init(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_all(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_all(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_any_init(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_any_init(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_any(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_any(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_reorg_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_reorg_init(struct csi_tensor *input, struct csi_tensor *output, struct reorg_params *params); -int csi_reorg(struct csi_tensor *input, - struct csi_tensor *output, - struct reorg_params *params); +int csi_reorg(struct csi_tensor *input, struct csi_tensor *output, struct reorg_params *params); -int csi_yuv_rgb_scale_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_yuv_rgb_scale_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_yuv_rgb_scale(struct csi_tensor *input, - struct csi_tensor *output, +int csi_yuv_rgb_scale(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_segment_max_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct segment_params *params); +int csi_segment_max_init(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct segment_params *params); -int csi_segment_max(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_segment_max(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct segment_params *params); -int csi_segment_min_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct segment_params *params); +int csi_segment_min_init(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct segment_params *params); -int csi_segment_min(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_segment_min(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct segment_params *params); -int csi_segment_sum_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct segment_params *params); +int csi_segment_sum_init(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct segment_params *params); -int csi_segment_sum(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_segment_sum(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct segment_params *params); -int csi_segment_mean_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct segment_params *params); +int csi_segment_mean_init(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct segment_params *params); -int csi_segment_mean(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct segment_params *params); +int csi_segment_mean(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct segment_params *params); -int csi_segment_prod_init(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct segment_params *params); +int csi_segment_prod_init(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct segment_params *params); -int csi_segment_prod(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct segment_params *params); +int csi_segment_prod(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct segment_params *params); -int csi_threshold_relu_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_threshold_relu_init(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_threshold_relu(struct csi_tensor *input, - struct csi_tensor *output, +int csi_threshold_relu(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_acos_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); -int csi_acos(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_acos_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); +int csi_acos(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_acosh_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_acosh_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_acosh(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_acosh(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_asin_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_asin_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_asin(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_asin(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_asinh_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_asinh_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_asinh(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_asinh(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_atan_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_atan_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_atan(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_atan(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_atanh_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_atanh_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_atanh(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_atanh(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_cosh_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_cosh_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_cosh(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_cosh(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_sinh_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_sinh_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_sinh(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_sinh(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_tan_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_tan_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_tan(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_tan(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_log1p_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_log1p_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_log1p(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_log1p(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_softsign_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_softsign_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_softsign(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_softsign(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_erf_init(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_erf_init(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_erf(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_erf(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_cumsum_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_cumsum_init(struct csi_tensor *input, struct csi_tensor *output, struct cumsum_params *params); -int csi_cumsum(struct csi_tensor *input, - struct csi_tensor *output, - struct cumsum_params *params); +int csi_cumsum(struct csi_tensor *input, struct csi_tensor *output, struct cumsum_params *params); -int csi_cumprod_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_cumprod_init(struct csi_tensor *input, struct csi_tensor *output, struct cumprod_params *params); -int csi_cumprod(struct csi_tensor *input, - struct csi_tensor *output, - struct cumprod_params *params); +int csi_cumprod(struct csi_tensor *input, struct csi_tensor *output, struct cumprod_params *params); -int csi_reduce_max_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_reduce_max_init(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_reduce_max(struct csi_tensor *input, - struct csi_tensor *output, +int csi_reduce_max(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_reduce_min_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_reduce_min_init(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_reduce_min(struct csi_tensor *input, - struct csi_tensor *output, +int csi_reduce_min(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_reduce_mean_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_reduce_mean_init(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_reduce_mean(struct csi_tensor *input, - struct csi_tensor *output, +int csi_reduce_mean(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_reduce_sum_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_reduce_sum_init(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_reduce_sum(struct csi_tensor *input, - struct csi_tensor *output, +int csi_reduce_sum(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_reduce_prod_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_reduce_prod_init(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_reduce_prod(struct csi_tensor *input, - struct csi_tensor *output, +int csi_reduce_prod(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_reduce_logsumexp_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_reduce_logsumexp_init(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_reduce_logsumexp(struct csi_tensor *input, - struct csi_tensor *output, +int csi_reduce_logsumexp(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_broadcast_to_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_broadcast_to_init(struct csi_tensor *input, struct csi_tensor *output, struct broadcast_to_params *params); -int csi_broadcast_to(struct csi_tensor *input, - struct csi_tensor *output, +int csi_broadcast_to(struct csi_tensor *input, struct csi_tensor *output, struct broadcast_to_params *params); -int csi_scatter_nd_init(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *updates, - struct csi_tensor *output, +int csi_scatter_nd_init(struct csi_tensor *input, struct csi_tensor *indices, + struct csi_tensor *updates, struct csi_tensor *output, struct scatter_nd_params *params); -int csi_scatter_nd(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *updates, - struct csi_tensor *output, - struct scatter_nd_params *params); +int csi_scatter_nd(struct csi_tensor *input, struct csi_tensor *indices, struct csi_tensor *updates, + struct csi_tensor *output, struct scatter_nd_params *params); -int csi_clip_init(struct csi_tensor *input, - struct csi_tensor *output, - struct clip_params *params); +int csi_clip_init(struct csi_tensor *input, struct csi_tensor *output, struct clip_params *params); -int csi_clip(struct csi_tensor *input, - struct csi_tensor *output, - struct clip_params *params); +int csi_clip(struct csi_tensor *input, struct csi_tensor *output, struct clip_params *params); -int csi_strided_slice_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_strided_slice_init(struct csi_tensor *input, struct csi_tensor *output, struct strided_slice_params *params); -int csi_strided_slice(struct csi_tensor *input, - struct csi_tensor *output, +int csi_strided_slice(struct csi_tensor *input, struct csi_tensor *output, struct strided_slice_params *params); -int csi_topk_init(struct csi_tensor *input, - struct csi_tensor *output1, - struct csi_tensor *output2, +int csi_topk_init(struct csi_tensor *input, struct csi_tensor *output1, struct csi_tensor *output2, struct topk_params *params); -int csi_topk(struct csi_tensor *input, - struct csi_tensor *output1, - struct csi_tensor *output2, +int csi_topk(struct csi_tensor *input, struct csi_tensor *output1, struct csi_tensor *output2, struct topk_params *params); -int csi_non_max_suppression_init(struct csi_tensor *input0, - struct csi_tensor *input1, +int csi_non_max_suppression_init(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct non_max_suppression_params *params); -int csi_non_max_suppression(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct non_max_suppression_params *params); +int csi_non_max_suppression(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct non_max_suppression_params *params); -int csi_shuffle_channel_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_shuffle_channel_init(struct csi_tensor *input, struct csi_tensor *output, struct shuffle_channel_params *params); -int csi_shuffle_channel(struct csi_tensor *input, - struct csi_tensor *output, +int csi_shuffle_channel(struct csi_tensor *input, struct csi_tensor *output, struct shuffle_channel_params *params); -int csi_roipool_init(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, - struct roi_pool_params *params); +int csi_roipool_init(struct csi_tensor *data, struct csi_tensor *rois, struct csi_tensor *output, + struct roi_pool_params *params); -int csi_roipool(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, +int csi_roipool(struct csi_tensor *data, struct csi_tensor *rois, struct csi_tensor *output, struct roi_pool_params *params); +int csi_layer_norm_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *gamma, struct csi_tensor *beta, + struct layer_norm_params *params); + +int csi_layer_norm(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *gamma, + struct csi_tensor *beta, struct layer_norm_params *params); + +int csi_cache_matmul_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_matmul_params *params); + +int csi_cache_matmul(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *weight, + struct csi_tensor *bias, struct cache_matmul_params *params); + +int csi_cache_conv1d_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_conv1d_params *params); + +int csi_cache_conv1d(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *weight, + struct csi_tensor *bias, struct cache_conv1d_params *params); + +int csi_conv1d_init(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, + struct csi_tensor *bias, struct conv1d_params *params); + +int csi_conv1d(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, + struct csi_tensor *bias, struct conv1d_params *params); + +int csi_data_convert_init(struct csi_tensor *input, struct csi_tensor *output, + struct siso_params *params); +int csi_data_convert(struct csi_tensor *input, struct csi_tensor *output, + struct siso_params *params); + +#ifdef __cplusplus +} #endif + +#endif // INCLUDE_CSI_NN_H_ diff --git a/include/csi_node.h b/include/csi_node.h index 2f5712be..f48790ba 100644 --- a/include/csi_node.h +++ b/include/csi_node.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,23 +16,25 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ -#ifndef _CSI_NN_NODE_H -#define _CSI_NN_NODE_H - -#include "csi_nn.h" +#ifndef INCLUDE_CSI_NODE_H_ +#define INCLUDE_CSI_NODE_H_ struct csi_node { int type; struct csi_node **in; struct csi_node **out; + int subgraph_idx; int in_num; int out_num; char *name; void *data; int ref_count; int ref_count_init; + int visited; + int *restricted_map; + int restricted_map_num; }; /* node */ @@ -44,8 +46,10 @@ int csi_node_add_in(struct csi_node *node, struct csi_node *in, int index); int csi_node_add_out(struct csi_node *node, struct csi_node *out, int index); int csi_node_get_in_number(struct csi_node *node); int csi_node_get_out_number(struct csi_node *node); +int csi_node_get_non_const_in_number(struct csi_node *node); struct csi_node *csi_node_get_in(struct csi_node *node, int index); struct csi_node *csi_node_get_out(struct csi_node *node, int index); +int csi_node_restrict_map_insert(int value, struct csi_node *node); +int csi_node_find(struct csi_node **list, int len, struct csi_node *node); -#endif - +#endif // INCLUDE_CSI_NODE_H_ diff --git a/include/csi_ovx.h b/include/csi_ovx.h index f6af2ba6..0ab3df51 100644 --- a/include/csi_ovx.h +++ b/include/csi_ovx.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,512 +16,325 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ -#ifndef _CSI_NN_OVX_H -#define _CSI_NN_OVX_H +#ifndef INCLUDE_CSI_OVX_H_ +#define INCLUDE_CSI_OVX_H_ #include "csi_nn.h" #include "csi_utils.h" -int csi_ovx_conv2d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params); +int csi_ovx_conv2d(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, + struct csi_tensor *bias, struct conv2d_params *params); -int csi_ovx_depthwise_conv2d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ovx_depthwise_conv2d(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ovx_group_conv2d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ovx_group_conv2d(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ovx_conv2d_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ovx_conv2d_relu(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ovx_deconv2d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params); - -int csi_ovx_depthwise_deconv2d(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params); - -int csi_ovx_fullyconnected(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_ovx_deconv2d(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, + struct csi_tensor *bias, struct conv2d_params *params); + +int csi_ovx_depthwise_deconv2d(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +int csi_ovx_fullyconnected(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_ovx_fullyconnected_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_ovx_fullyconnected_relu(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_ovx_maxpool2d(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_ovx_maxpool2d(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_ovx_avgpool2d(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_avgpool2d(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_ovx_global_avgpool2d(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_global_avgpool2d(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_ovx_global_maxpool2d(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_global_maxpool2d(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_ovx_l2pool(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_ovx_l2pool(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_ovx_pool_with_argmax(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_pool_with_argmax(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_ovx_maxpool2d_locat(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_maxpool2d_locat(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_ovx_unpooling(struct csi_tensor *input, - struct csi_tensor *mask, - struct csi_tensor *output, +int csi_ovx_unpooling(struct csi_tensor *input, struct csi_tensor *mask, struct csi_tensor *output, struct unpooling_params *params); -int csi_ovx_negative(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_negative(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ovx_floor(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_ovx_floor(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ovx_ceil(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_ovx_ceil(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ovx_abs(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_ovx_abs(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ovx_exp(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_ovx_exp(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ovx_log(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_ovx_log(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ovx_sin(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_ovx_sin(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ovx_tanh(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_ovx_tanh(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ovx_sqrt(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_ovx_sqrt(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ovx_rsqrt(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_ovx_rsqrt(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ovx_square(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_ovx_square(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ovx_sigmoid(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_sigmoid(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params); -int csi_ovx_elu(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_ovx_elu(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ovx_relu(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_ovx_relu(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ovx_relu1(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_ovx_relu1(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ovx_relu6(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_ovx_relu6(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ovx_relun(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params); +int csi_ovx_relun(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ovx_leaky_relu(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_leaky_relu(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ovx_softrelu(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_softrelu(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ovx_prelu(struct csi_tensor *input, - struct csi_tensor *alpha, - struct csi_tensor *output, +int csi_ovx_prelu(struct csi_tensor *input, struct csi_tensor *alpha, struct csi_tensor *output, struct prelu_params *params); -int csi_ovx_softplus(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_softplus(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ovx_softmax(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_softmax(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params); -int csi_ovx_log_softmax(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_log_softmax(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params); -int csi_ovx_batch_normalization(struct csi_tensor *input, - struct csi_tensor *mean, - struct csi_tensor *variance, - struct csi_tensor *gamma, - struct csi_tensor *beta, - struct csi_tensor *output, +int csi_ovx_batch_normalization(struct csi_tensor *input, struct csi_tensor *mean, + struct csi_tensor *variance, struct csi_tensor *gamma, + struct csi_tensor *beta, struct csi_tensor *output, struct bn_params *params); -int csi_ovx_l2_normalization(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_l2_normalization(struct csi_tensor *input, struct csi_tensor *output, struct l2n_params *params); -int csi_ovx_lrn(struct csi_tensor *input, - struct csi_tensor *output, - struct lrn_params *params); +int csi_ovx_lrn(struct csi_tensor *input, struct csi_tensor *output, struct lrn_params *params); -int csi_ovx_matmul(struct csi_tensor *mat0, - struct csi_tensor *mat1, - struct csi_tensor *output, +int csi_ovx_matmul(struct csi_tensor *mat0, struct csi_tensor *mat1, struct csi_tensor *output, struct matmul_params *params); -int csi_ovx_add(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ovx_add(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ovx_sub(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ovx_sub(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ovx_mul(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ovx_mul(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ovx_div(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ovx_div(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ovx_floor_divide(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ovx_floor_divide(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ovx_maximum(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ovx_maximum(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ovx_minimum(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ovx_minimum(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ovx_power(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ovx_power(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ovx_greater(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ovx_greater(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ovx_less(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ovx_less(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ovx_equal(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ovx_equal(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ovx_not_equal(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ovx_greater_equal(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ovx_less_equal(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ovx_select(struct csi_tensor *condition, - struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ovx_not_equal(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); + +int csi_ovx_greater_equal(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); + +int csi_ovx_less_equal(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); + +int csi_ovx_select(struct csi_tensor *condition, struct csi_tensor *input0, + struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ovx_and(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ovx_and(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ovx_or(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ovx_or(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ovx_pad(struct csi_tensor *input, - struct csi_tensor *output, - struct pad_params *params); +int csi_ovx_pad(struct csi_tensor *input, struct csi_tensor *output, struct pad_params *params); -int csi_ovx_resize(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_resize(struct csi_tensor *input, struct csi_tensor *output, struct resize_params *params); -int csi_ovx_concat(struct csi_tensor **input, - struct csi_tensor *output, +int csi_ovx_concat(struct csi_tensor **input, struct csi_tensor *output, struct concat_params *params); -int csi_ovx_proposal(struct csi_tensor *cls_prob, - struct csi_tensor *bbox_pred, - struct csi_tensor *im_info, - struct csi_tensor *output, +int csi_ovx_proposal(struct csi_tensor *cls_prob, struct csi_tensor *bbox_pred, + struct csi_tensor *im_info, struct csi_tensor *output, struct proposal_params *params); -int csi_ovx_psroipooling(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, - struct psroipooling_params *params); +int csi_ovx_psroipooling(struct csi_tensor *data, struct csi_tensor *rois, + struct csi_tensor *output, struct psroipooling_params *params); -int csi_ovx_roipool(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, +int csi_ovx_roipool(struct csi_tensor *data, struct csi_tensor *rois, struct csi_tensor *output, struct roi_pool_params *params); -int csi_ovx_roi_align(struct csi_tensor *input, - struct csi_tensor *rois, - struct csi_tensor *output, +int csi_ovx_roi_align(struct csi_tensor *input, struct csi_tensor *rois, struct csi_tensor *output, struct roi_align_params *params); -int csi_ovx_transpose(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_transpose(struct csi_tensor *input, struct csi_tensor *output, struct transpose_params *params); -int csi_ovx_reshape(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_reshape(struct csi_tensor *input, struct csi_tensor *output, struct reshape_params *params); -int csi_ovx_reshape_tail(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_reshape_tail(struct csi_tensor *input, struct csi_tensor *output, struct reshape_params *params); -int csi_ovx_shape(struct csi_tensor *input, - struct csi_tensor *output, - struct shape_params *params); +int csi_ovx_shape(struct csi_tensor *input, struct csi_tensor *output, struct shape_params *params); -int csi_ovx_expand_dims_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct expand_dims_params *params); +int csi_ovx_expand_dims_f32(struct csi_tensor *input, struct csi_tensor *output, + struct expand_dims_params *params); -int csi_ovx_expand_dims_u8(struct csi_tensor *input, - struct csi_tensor *output, - struct expand_dims_params *params); +int csi_ovx_expand_dims_u8(struct csi_tensor *input, struct csi_tensor *output, + struct expand_dims_params *params); -int csi_ovx_reverse(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_reverse(struct csi_tensor *input, struct csi_tensor *output, struct reverse_params *params); -int csi_ovx_flatten(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_flatten(struct csi_tensor *input, struct csi_tensor *output, struct flatten_params *params); -int csi_ovx_flatten_tail(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_flatten_tail(struct csi_tensor *input, struct csi_tensor *output, struct flatten_params *params); -int csi_ovx_crop(struct csi_tensor *input, - struct csi_tensor *output, - struct crop_params *params); +int csi_ovx_crop(struct csi_tensor *input, struct csi_tensor *output, struct crop_params *params); -int csi_ovx_slice(struct csi_tensor *input, - struct csi_tensor *output, - struct slice_params *params); +int csi_ovx_slice(struct csi_tensor *input, struct csi_tensor *output, struct slice_params *params); -int csi_ovx_slice_tail(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_slice_tail(struct csi_tensor *input, struct csi_tensor *output, struct slice_params *params); -int csi_ovx_strided_slice(struct csi_tensor *input, - struct csi_tensor *output, - struct strided_slice_params *params); +int csi_ovx_strided_slice(struct csi_tensor *input, struct csi_tensor *output, + struct strided_slice_params *params); -int csi_ovx_split(struct csi_tensor *input, - struct csi_tensor **output, +int csi_ovx_split(struct csi_tensor *input, struct csi_tensor **output, struct split_params *params); -int csi_ovx_stack(struct csi_tensor **inputs, - struct csi_tensor *output, +int csi_ovx_stack(struct csi_tensor **inputs, struct csi_tensor *output, struct stack_params *params); -int csi_ovx_tile(struct csi_tensor *inputs, - struct csi_tensor *output, - struct tile_params *params); +int csi_ovx_tile(struct csi_tensor *inputs, struct csi_tensor *output, struct tile_params *params); -int csi_ovx_arange(struct csi_tensor *output, - struct arange_params *params); +int csi_ovx_arange(struct csi_tensor *output, struct arange_params *params); -int csi_ovx_where(struct csi_tensor *condition, - struct csi_tensor *x, - struct csi_tensor *y, - struct csi_tensor *output, - struct where_params *params); +int csi_ovx_where(struct csi_tensor *condition, struct csi_tensor *x, struct csi_tensor *y, + struct csi_tensor *output, struct where_params *params); -int csi_ovx_unstack(struct csi_tensor *input, - struct csi_tensor **outputs, +int csi_ovx_unstack(struct csi_tensor *input, struct csi_tensor **outputs, struct unstack_params *params); -int csi_ovx_gather(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *output, +int csi_ovx_gather(struct csi_tensor *input, struct csi_tensor *indices, struct csi_tensor *output, struct gather_params *params); -int csi_ovx_gather_nd(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *output, - struct gather_nd_params *params); +int csi_ovx_gather_nd(struct csi_tensor *input, struct csi_tensor *indices, + struct csi_tensor *output, struct gather_nd_params *params); -int csi_ovx_squeeze(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_squeeze(struct csi_tensor *input, struct csi_tensor *output, struct squeeze_params *params); -int csi_ovx_squeeze_tail(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_squeeze_tail(struct csi_tensor *input, struct csi_tensor *output, struct squeeze_params *params); -int csi_ovx_ndarray_size(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_ndarray_size(struct csi_tensor *input, struct csi_tensor *output, struct ndarray_size_params *params); -int csi_ovx_space_to_batch(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_space_to_batch(struct csi_tensor *input, struct csi_tensor *output, struct space_to_batch_params *params); -int csi_ovx_batch_to_space(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_batch_to_space(struct csi_tensor *input, struct csi_tensor *output, struct batch_to_space_params *params); -int csi_ovx_space_to_depth(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_space_to_depth(struct csi_tensor *input, struct csi_tensor *output, struct space_to_depth_params *params); -int csi_ovx_depth_to_space(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_depth_to_space(struct csi_tensor *input, struct csi_tensor *output, struct depth_to_space_params *params); -int csi_ovx_one_hot(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_one_hot(struct csi_tensor *input, struct csi_tensor *output, struct one_hot_params *params); -int csi_ovx_sequence_mask(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct sequence_mask_params *params); +int csi_ovx_sequence_mask(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct sequence_mask_params *params); -int csi_ovx_im2col(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, +int csi_ovx_im2col(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, struct im2col_params *params); -int csi_ovx_col2im(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, +int csi_ovx_col2im(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, struct col2im_params *params); -int csi_ovx_sum(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_ovx_sum(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ovx_mean(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_ovx_mean(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ovx_max(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_ovx_max(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ovx_min(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_ovx_min(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ovx_prod(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_ovx_prod(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ovx_argmin(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_argmin(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ovx_argmax(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_argmax(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ovx_all(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_ovx_all(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ovx_any(struct csi_tensor *input, - struct csi_tensor *output, - struct reduce_params *params); +int csi_ovx_any(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ovx_reorg(struct csi_tensor *input, - struct csi_tensor *output, - struct reorg_params *params); +int csi_ovx_reorg(struct csi_tensor *input, struct csi_tensor *output, struct reorg_params *params); -int csi_ovx_topk(struct csi_tensor *input, - struct csi_tensor *output0, - struct csi_tensor *output1, +int csi_ovx_topk(struct csi_tensor *input, struct csi_tensor *output0, struct csi_tensor *output1, struct topk_params *params); -int csi_ovx_clip(struct csi_tensor *input, - struct csi_tensor *output, - struct clip_params *params); +int csi_ovx_clip(struct csi_tensor *input, struct csi_tensor *output, struct clip_params *params); -int csi_ovx_shuffle_channel(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ovx_shuffle_channel(struct csi_tensor *input, struct csi_tensor *output, struct shuffle_channel_params *params); int32_t csi_get_ceil_mode_fix(int32_t input, int32_t kernel, int32_t stride, int32_t pad); @@ -539,4 +352,4 @@ void csi_ovx_show_top5(int index, struct csi_session *sess); void csi_ovx_set_graph_attribute(struct csi_session *sess, int device_index); int csi_ovx_get_device_number(); -#endif +#endif // INCLUDE_CSI_OVX_H_ diff --git a/include/csi_pnna.h b/include/csi_pnna.h index 7e7ad688..f6591972 100644 --- a/include/csi_pnna.h +++ b/include/csi_pnna.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,10 +16,10 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ -#ifndef _CSI_NN_PNNA_H -#define _CSI_NN_PNNA_H +#ifndef INCLUDE_CSI_PNNA_H_ +#define INCLUDE_CSI_PNNA_H_ #include "csi_nn.h" int csi_pnna_conv2d(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, @@ -239,6 +239,7 @@ struct csi_pnna_target_data { void *attrs; void *graph; void *nodes; + void *in_buffers; void *out_buffers; void *light_hwconfig; void *light_mapconfig; @@ -246,4 +247,4 @@ struct csi_pnna_target_data { enum csinn_quant_enum quant_type; }; -#endif +#endif // INCLUDE_CSI_PNNA_H_ diff --git a/include/csi_pnna_wrapper.h b/include/csi_pnna_wrapper.h deleted file mode 100644 index 9da49e3b..00000000 --- a/include/csi_pnna_wrapper.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* CSI-NN2 version 1.10.x */ - -#ifndef _CSI_NN_PNNA_WRAPPER_H -#define _CSI_NN_PNNA_WRAPPER_H - -#ifdef __cplusplus -extern "C" { -#endif -int csi_pnna_session_init_internal(struct csi_pnna_target_data *td); -int csi_pnna_session_deinit_internal(struct csi_pnna_target_data *td); -int csi_pnna_session_setup_internal(struct csi_pnna_target_data *td); -int csi_pnna_session_create_network_binary(char *path, struct csi_pnna_target_data *td); -int csi_pnna_session_run_internal(struct csi_session *sess, int input_num, int output_num); -void csi_pnna_load_binary_model_internal(char *path, struct csi_pnna_target_data *td); -int csi_pnna_create_tensor_internal(struct csi_tensor *t, struct csi_pnna_target_data *td); -int csi_pnna_set_output_internal(int index, struct csi_tensor *t, struct csi_pnna_target_data *td); -int csi_pnna_get_output_internal(int index, struct csi_tensor *output, - struct csi_pnna_target_data *td); -void csi_pnna_set_input_strides_internal(struct csi_pnna_target_data *td, int byte_size, - int input_fix_h, int input_fix_w); - -/* internal op */ -int csi_pnna_create_argmax_internal(struct csi_tensor *input, struct csi_tensor *output, - struct reduce_params *params, struct csi_pnna_target_data *td); -int csi_pnna_create_avgpool_internal(struct csi_tensor *input, struct csi_tensor *output, - struct pool_params *params, struct csi_pnna_target_data *td); -int csi_pnna_create_batch_to_space_nd_internal(struct csi_tensor *input, struct csi_tensor *output, - struct batch_to_space_nd_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_concat_internal(struct csi_tensor **input, struct csi_tensor *output, - struct concat_params *params, struct csi_pnna_target_data *td); -int csi_pnna_create_conv2d_internal(struct csi_tensor *input, struct csi_tensor *output, - struct csi_tensor *kernel, struct csi_tensor *bias, - struct conv2d_params *params, struct csi_pnna_target_data *td); -int csi_pnna_create_deconv2d_internal(struct csi_tensor *input, struct csi_tensor *output, - struct csi_tensor *kernel, struct csi_tensor *bias, - struct conv2d_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_dense_internal(struct csi_tensor *input, struct csi_tensor *output, - struct csi_tensor *kernel, struct csi_tensor *bias, - struct fc_params *params, struct csi_pnna_target_data *td); -int csi_pnna_create_depth_to_space_internal(struct csi_tensor *input, struct csi_tensor *output, - struct depth_to_space_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_depthwise_conv2d_internal(struct csi_tensor *input, struct csi_tensor *output, - struct csi_tensor *kernel, struct csi_tensor *bias, - struct conv2d_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_diso_internal(struct csi_tensor *input0, struct csi_tensor *input1, - struct csi_tensor *output, int op, - struct csi_pnna_target_data *td); -int csi_pnna_create_flatten_internal(struct csi_tensor *input, struct csi_tensor *output, - struct flatten_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_group_conv2d_internal(struct csi_tensor *input, struct csi_tensor *output, - struct csi_tensor *kernel, struct csi_tensor *bias, - struct conv2d_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_global_avgpool_internal(struct csi_tensor *input, struct csi_tensor *output, - struct pool_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_global_maxpool_internal(struct csi_tensor *input, struct csi_tensor *output, - struct pool_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_leaky_relu_internal(struct csi_tensor *input, struct csi_tensor *output, - struct relu_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_lrn_internal(struct csi_tensor *input, struct csi_tensor *output, - struct lrn_params *params, struct csi_pnna_target_data *td); -int csi_pnna_create_mean_internal(struct csi_tensor *input, struct csi_tensor *output, - struct reduce_params *params, struct csi_pnna_target_data *td); -int csi_pnna_create_maxpool_internal(struct csi_tensor *input, struct csi_tensor *output, - struct pool_params *params, struct csi_pnna_target_data *td); -int csi_pnna_create_maxpool2d_locat_internal(struct csi_tensor *data, struct csi_tensor *output, - struct pool_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_pad_internal(struct csi_tensor *input, struct csi_tensor *output, - struct pad_params *params, struct csi_pnna_target_data *td); -int csi_pnna_create_prelu_internal(struct csi_tensor *input, struct csi_tensor *alpha, - struct csi_tensor *output, struct prelu_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_proposal_internal(struct csi_tensor *cls_prob, struct csi_tensor *bbox_pred, - struct csi_tensor *im_info, struct csi_tensor *output, - struct proposal_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_relu1_internal(struct csi_tensor *input, struct csi_tensor *output, - struct relu_params *params, struct csi_pnna_target_data *td); -int csi_pnna_create_relu6_internal(struct csi_tensor *input, struct csi_tensor *output, - struct relu_params *params, struct csi_pnna_target_data *td); -int csi_pnna_create_reshape_internal(struct csi_tensor *input, struct csi_tensor *output, - struct reshape_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_resize_internal(struct csi_tensor *input, struct csi_tensor *output, - struct resize_params *params, struct csi_pnna_target_data *td); -int csi_pnna_create_roipool_internal(struct csi_tensor *data, struct csi_tensor *rois, - struct csi_tensor *output, struct roi_pool_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_siso_internal(struct csi_tensor *input, struct csi_tensor *output, int op, - struct csi_pnna_target_data *td); -int csi_pnna_create_softmax_internal(struct csi_tensor *input, struct csi_tensor *output, - struct softmax_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_space_to_depth_internal(struct csi_tensor *input, struct csi_tensor *output, - struct space_to_depth_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_space_to_batch_nd_internal(struct csi_tensor *input, struct csi_tensor *output, - struct space_to_batch_nd_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_split_internal(struct csi_tensor *input, struct csi_tensor **output, - struct split_params *params, struct csi_pnna_target_data *td); -int csi_pnna_create_squeeze_internal(struct csi_tensor *input, struct csi_tensor *output, - struct squeeze_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_strided_slice_internal(struct csi_tensor *input, struct csi_tensor *output, - struct strided_slice_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_cus_strided_slice_internal(struct csi_tensor *input, struct csi_tensor *output, - struct strided_slice_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_transpose_internal(struct csi_tensor *input, struct csi_tensor *output, - struct transpose_params *params, - struct csi_pnna_target_data *td); -int csi_pnna_create_unpooling_internal(struct csi_tensor *input, struct csi_tensor *mask, - struct csi_tensor *output, struct unpooling_params *params, - struct csi_pnna_target_data *td); -#ifdef __cplusplus -} -#endif - -#endif diff --git a/include/csi_ref.h b/include/csi_ref.h index bc2170e7..0c76a8ff 100644 --- a/include/csi_ref.h +++ b/include/csi_ref.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,285 +16,240 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ -#ifndef _CSI_INTERNAL_REF_H -#define _CSI_INTERNAL_REF_H +#ifndef INCLUDE_CSI_REF_H_ +#define INCLUDE_CSI_REF_H_ +#include #include #include #include -#include -#include "csi_nn.h" + #include "csi_internal.h" +#include "csi_nn.h" #include "csi_utils.h" -int csi_ref_abs_f32(struct csi_tensor *input, - struct csi_tensor *output, +#ifdef __cplusplus +extern "C" { +#endif + +int csi_ref_abs_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_abs_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_abs_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_acos_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_acos_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_acos_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_acos_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_acosh_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_acosh_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_acosh_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_acosh_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_add_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_add_f32(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ref_add_u8(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_add_u8(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ref_add_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_add_f32(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ref_add_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_add_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_and_u32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_and_u32(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ref_and_u8(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_and_u8(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ref_and_i8(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_and_i8(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ref_arange_f32(struct csi_tensor *output, - struct arange_params *params); +int csi_ref_arange_f32(struct csi_tensor *output, struct arange_params *params); -int csi_ref_arange_quant(struct csi_tensor *output, - struct arange_params *params); +int csi_ref_arange_quant(struct csi_tensor *output, struct arange_params *params); -int csi_ref_argmax_stride_i32_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_argmax_stride_i32_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_argmax_stride_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_argmax_stride_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_argmin_stride_i32_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_argmin_stride_i32_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_argmin_stride_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_argmin_stride_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_asin_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_asin_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_asin_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_asin_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_asinh_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_asinh_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_asinh_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_asinh_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_atan_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_atan_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_atan_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_atan_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_atanh_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_atanh_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_atanh_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_atanh_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_avgpool2d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_ref_avgpool2d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_ref_avgpool2d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_ref_avgpool2d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_ref_avgpool3d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_ref_avgpool3d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_ref_avgpool3d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_ref_avgpool3d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_ref_batch_normalization_f32(struct csi_tensor *input, - struct csi_tensor *mean, - struct csi_tensor *variance, - struct csi_tensor *gamma, - struct csi_tensor *beta, - struct csi_tensor *output, +int csi_ref_batch_normalization_f32(struct csi_tensor *input, struct csi_tensor *mean, + struct csi_tensor *variance, struct csi_tensor *gamma, + struct csi_tensor *beta, struct csi_tensor *output, struct bn_params *params); -int csi_ref_batch_normalization_quant(struct csi_tensor *input, - struct csi_tensor *mean, - struct csi_tensor *variance, - struct csi_tensor *gamma, - struct csi_tensor *beta, - struct csi_tensor *output, +int csi_ref_batch_normalization_quant(struct csi_tensor *input, struct csi_tensor *mean, + struct csi_tensor *variance, struct csi_tensor *gamma, + struct csi_tensor *beta, struct csi_tensor *output, struct bn_params *params); -int csi_ref_batch_to_space_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_batch_to_space_f32(struct csi_tensor *input, struct csi_tensor *output, struct batch_to_space_params *params); -int csi_ref_batch_to_space_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_batch_to_space_quant(struct csi_tensor *input, struct csi_tensor *output, struct batch_to_space_params *params); -int csi_ref_broadcast_to_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_broadcast_to_f32(struct csi_tensor *input, struct csi_tensor *output, struct broadcast_to_params *params); -int csi_ref_broadcast_to_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_broadcast_to_quant(struct csi_tensor *input, struct csi_tensor *output, struct broadcast_to_params *params); -int csi_ref_ceil_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_ceil_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_ceil_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_ceil_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_clip_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_clip_f32(struct csi_tensor *input, struct csi_tensor *output, struct clip_params *params); -int csi_ref_clip_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_clip_quant(struct csi_tensor *input, struct csi_tensor *output, struct clip_params *params); -int csi_ref_col2im_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct col2im_params *params); +int csi_ref_col2im_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct col2im_params *params); -int csi_ref_concat_f32(struct csi_tensor **input, - struct csi_tensor *output, +int csi_ref_concat_f32(struct csi_tensor **input, struct csi_tensor *output, struct concat_params *params); -int csi_ref_concat_quant(struct csi_tensor **input, - struct csi_tensor *output, +int csi_ref_concat_quant(struct csi_tensor **input, struct csi_tensor *output, struct concat_params *params); -int csi_ref_conv2d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_conv1d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv1d_params *params); + +int csi_ref_conv1d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv1d_params *params); + +int csi_ref_conv2d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_conv2d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_conv2d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_conv2d_channel_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_conv2d_channel_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_conv2d_relu_f32(struct csi_tensor *o_input, - struct csi_tensor *o_output, - struct csi_tensor *o_kernel, - struct csi_tensor *o_bias, +int csi_ref_conv2d_relu_f32(struct csi_tensor *o_input, struct csi_tensor *o_output, + struct csi_tensor *o_kernel, struct csi_tensor *o_bias, struct conv2d_params *params); -int csi_ref_conv2d_relu_quant(struct csi_tensor *o_input, - struct csi_tensor *o_output, - struct csi_tensor *o_kernel, - struct csi_tensor *o_bias, +int csi_ref_conv2d_relu_quant(struct csi_tensor *o_input, struct csi_tensor *o_output, + struct csi_tensor *o_kernel, struct csi_tensor *o_bias, struct conv2d_params *params); -int csi_ref_conv2d_channel_relu_quant(struct csi_tensor *o_input, - struct csi_tensor *o_output, - struct csi_tensor *o_kernel, - struct csi_tensor *o_bias, +int csi_ref_cache_matmul_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_matmul_params *params); + +int csi_ref_cache_matmul_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_matmul_params *params); + +int csi_ref_cache_matmul_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_matmul_params *params); + +int csi_ref_cache_conv1d_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_conv1d_params *params); + +int csi_ref_cache_conv1d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_conv1d_params *params); + +int csi_ref_cache_conv1d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_conv1d_params *params); + +int csi_ref_conv2d_channel_relu_quant(struct csi_tensor *o_input, struct csi_tensor *o_output, + struct csi_tensor *o_kernel, struct csi_tensor *o_bias, struct conv2d_params *params); -int csi_ref_conv2d_relu6_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_conv2d_relu6_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_conv2d_channel_relu6_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_conv2d_channel_relu6_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_depthwise_conv2d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_depthwise_conv2d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_depthwise_conv2d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_depthwise_conv2d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_depthwise_conv2d_channel_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_depthwise_conv2d_channel_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_depthwise_conv2d_relu_f32(struct csi_tensor *o_input, - struct csi_tensor *o_output, - struct csi_tensor *o_kernel, - struct csi_tensor *o_bias, +int csi_ref_depthwise_conv2d_relu_f32(struct csi_tensor *o_input, struct csi_tensor *o_output, + struct csi_tensor *o_kernel, struct csi_tensor *o_bias, struct conv2d_params *params); -int csi_ref_depthwise_conv2d_relu_quant(struct csi_tensor *o_input, - struct csi_tensor *o_output, - struct csi_tensor *o_kernel, - struct csi_tensor *o_bias, +int csi_ref_depthwise_conv2d_relu_quant(struct csi_tensor *o_input, struct csi_tensor *o_output, + struct csi_tensor *o_kernel, struct csi_tensor *o_bias, struct conv2d_params *params); int csi_ref_depthwise_conv2d_channel_relu_quant(struct csi_tensor *o_input, @@ -303,1234 +258,876 @@ int csi_ref_depthwise_conv2d_channel_relu_quant(struct csi_tensor *o_input, struct csi_tensor *o_bias, struct conv2d_params *params); -int csi_ref_depthwise_conv2d_relu6_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_depthwise_conv2d_relu6_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); int csi_ref_depthwise_conv2d_channel_relu6_quant(struct csi_tensor *input, struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_group_conv2d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_group_conv2d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_group_conv2d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_group_conv2d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_group_conv2d_channel_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_group_conv2d_channel_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_group_conv2d_relu_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_group_conv2d_relu_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_group_conv2d_relu6_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_group_conv2d_relu6_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_group_conv2d_channel_relu_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_group_conv2d_channel_relu_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_conv3d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_conv3d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv3d_params *params); -int csi_ref_conv3d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_conv3d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv3d_params *params); -int csi_ref_cos_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_cos_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_cos_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_cos_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_cosh_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_cosh_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_cosh_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_cosh_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_cumprod_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_cumprod_f32(struct csi_tensor *input, struct csi_tensor *output, struct cumprod_params *params); -int csi_ref_cumprod_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_cumprod_quant(struct csi_tensor *input, struct csi_tensor *output, struct cumprod_params *params); -int csi_ref_cumsum_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_cumsum_f32(struct csi_tensor *input, struct csi_tensor *output, struct cumsum_params *params); -int csi_ref_cumsum_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_cumsum_quant(struct csi_tensor *input, struct csi_tensor *output, struct cumsum_params *params); -int csi_ref_deconv2d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_data_convert_f32(struct csi_tensor *input, struct csi_tensor *output, + struct siso_params *params); +int csi_ref_data_convert_quant(struct csi_tensor *input, struct csi_tensor *output, + struct siso_params *params); + +int csi_ref_deconv2d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_deconv2d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_deconv2d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_depthwise_deconv2d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_depthwise_deconv2d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_depthwise_deconv2d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_depthwise_deconv2d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_deconv3d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_deconv3d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv3d_params *params); -int csi_ref_deconv3d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_deconv3d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv3d_params *params); -int csi_ref_depth_to_space_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_depth_to_space_f32(struct csi_tensor *input, struct csi_tensor *output, struct depth_to_space_params *params); -int csi_ref_depth_to_space_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_depth_to_space_quant(struct csi_tensor *input, struct csi_tensor *output, struct depth_to_space_params *params); -int csi_ref_div_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_div_f32(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ref_div_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_div_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_elu_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_elu_f32(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ref_elu_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_elu_quant(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ref_fsmn_f32(struct csi_tensor *frame, - struct csi_tensor *l_filter, - struct csi_tensor *r_filter, - struct csi_tensor *frame_sequence, - struct csi_tensor *frame_counter, - struct csi_tensor *output, +int csi_ref_fsmn_f32(struct csi_tensor *frame, struct csi_tensor *l_filter, + struct csi_tensor *r_filter, struct csi_tensor *frame_sequence, + struct csi_tensor *frame_counter, struct csi_tensor *output, struct fsmn_params *params); -int csi_ref_fsmn_quant(struct csi_tensor *frame, - struct csi_tensor *l_filter, - struct csi_tensor *r_filter, - struct csi_tensor *frame_sequence, - struct csi_tensor *frame_counter, - struct csi_tensor *output, - struct fsmn_params *params); +int csi_ref_fsmn_quant(struct csi_tensor *frame, struct csi_tensor *l_filter, + struct csi_tensor *r_filter, struct csi_tensor *frame_sequence, + struct csi_tensor *frame_counter, struct csi_tensor *output, + struct fsmn_params *params); -int csi_ref_equal_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_equal_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_equal_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_equal_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_erf_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_erf_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_erf_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_erf_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_exp_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_exp_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_exp_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_exp_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_expand_dims_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_expand_dims_f32(struct csi_tensor *input, struct csi_tensor *output, struct expand_dims_params *params); -int csi_ref_expand_dims_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_expand_dims_quant(struct csi_tensor *input, struct csi_tensor *output, struct expand_dims_params *params); -int csi_ref_expm1_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_expm1_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_expm1_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_expm1_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_flatten(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_flatten(struct csi_tensor *input, struct csi_tensor *output, struct flatten_params *params); -int csi_ref_flatten_requant(struct csi_tensor *input, - struct csi_tensor *output, - struct flatten_params *params); +int csi_ref_flatten_quant(struct csi_tensor *input, struct csi_tensor *output, + struct flatten_params *params); -int csi_ref_floor_divide_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_floor_divide_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_floor_divide_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_floor_divide_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_floor_mod_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_floor_mod_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_floor_mod_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_floor_mod_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_floor_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_floor_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_floor_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_floor_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_fullyconnected_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_ref_fullyconnected_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_ref_fullyconnected_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_ref_fullyconnected_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_ref_gather_nd_f32(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *output, - struct gather_nd_params *params); +int csi_ref_gather_nd_f32(struct csi_tensor *input, struct csi_tensor *indices, + struct csi_tensor *output, struct gather_nd_params *params); -int csi_ref_gather_nd_quant(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *output, - struct gather_nd_params *params); +int csi_ref_gather_nd_quant(struct csi_tensor *input, struct csi_tensor *indices, + struct csi_tensor *output, struct gather_nd_params *params); -int csi_ref_gather_f32(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *output, - struct gather_params *params); +int csi_ref_gather_f32(struct csi_tensor *input, struct csi_tensor *indices, + struct csi_tensor *output, struct gather_params *params); -int csi_ref_gather_quant(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *output, - struct gather_params *params); +int csi_ref_gather_quant(struct csi_tensor *input, struct csi_tensor *indices, + struct csi_tensor *output, struct gather_params *params); -int csi_ref_global_avgpool2d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); - -int csi_ref_global_avgpool2d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_ref_global_avgpool2d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_ref_global_maxpool2d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_ref_global_avgpool2d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_ref_global_maxpool2d_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_global_maxpool2d_f32(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_ref_greater_equal_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_global_maxpool2d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_ref_greater_equal_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_greater_equal_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_greater_equal_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_greater_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_greater_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_greater_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_greater_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_hard_sigmoid_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_hard_sigmoid_f32(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params); -int csi_ref_hard_sigmoid_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_hard_sigmoid_quant(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params); -int csi_ref_im2col_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_im2col_f32(struct csi_tensor *input, struct csi_tensor *output, struct im2col_params *params); -int csi_ref_im2col_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_im2col_quant(struct csi_tensor *input, struct csi_tensor *output, struct im2col_params *params); -int csi_ref_isnan_bool_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_isnan_bool_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_l2_normalization_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_l2_normalization_f32(struct csi_tensor *input, struct csi_tensor *output, struct l2n_params *params); -int csi_ref_l2_normalization_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_l2_normalization_quant(struct csi_tensor *input, struct csi_tensor *output, struct l2n_params *params); -int csi_ref_l2pool_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_l2pool_f32(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_ref_leaky_relu_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_layer_norm_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *gamma, struct csi_tensor *beta, + struct layer_norm_params *params); + +int csi_ref_layer_norm_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *gamma, struct csi_tensor *beta, + struct layer_norm_params *params); + +int csi_ref_leaky_relu_f32(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ref_leaky_relu_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_leaky_relu_quant(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ref_less_equal_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_less_equal_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_less_equal_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_less_equal_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_less_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_less_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_less_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_less_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_log_softmax_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_log_softmax_f32(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params); -int csi_ref_log_softmax_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_log_softmax_quant(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params); -int csi_ref_log_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_log_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_log_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_log_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_log1p_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_log1p_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_log1p_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_log1p_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_logical_and_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_logical_and_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_logical_and_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_logical_and_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_logical_not_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_logical_not_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_logical_not_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_logical_not_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_logical_or_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_logical_or_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_logical_or_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_logical_or_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_logical_xor_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_logical_xor_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_logical_xor_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_logical_xor_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_lrn_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct lrn_params *params); +int csi_ref_lrn_f32(struct csi_tensor *input, struct csi_tensor *output, struct lrn_params *params); -int csi_ref_lrn_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_lrn_quant(struct csi_tensor *input, struct csi_tensor *output, struct lrn_params *params); -int csi_ref_matmul_f32(struct csi_tensor *mat0, - struct csi_tensor *mat1, - struct csi_tensor *output, +int csi_ref_matmul_f32(struct csi_tensor *mat0, struct csi_tensor *mat1, struct csi_tensor *output, struct matmul_params *params); -int csi_ref_matmul_quant(struct csi_tensor *mat0, - struct csi_tensor *mat1, - struct csi_tensor *output, - struct matmul_params *params); +int csi_ref_matmul_quant(struct csi_tensor *mat0, struct csi_tensor *mat1, + struct csi_tensor *output, struct matmul_params *params); -int csi_ref_max_stride_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_max_stride_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_max_stride_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_max_stride_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_maximum_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); - -int csi_ref_maximum_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_maximum_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_maxpool2d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_ref_maximum_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_maxpool2d_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_maxpool2d_f32(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_ref_maxpool2d_locat_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_maxpool2d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_ref_maxpool2d_locat_f32(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_ref_maxpool2d_locat_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_maxpool2d_locat_quant(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_ref_maxpool3d_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_maxpool3d_f32(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_ref_maxpool3d_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_maxpool3d_quant(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params); -int csi_ref_mean_stride_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_mean_stride_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_mean_stride_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_mean_stride_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_mean_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_mean_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_min_stride_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_min_stride_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_min_stride_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_min_stride_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_minimum_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_minimum_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_minimum_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_minimum_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_mod_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_mod_f32(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ref_mod_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_mod_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_mul_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_mul_f32(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ref_mul_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_mul_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_ndarray_size_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_ndarray_size_f32(struct csi_tensor *input, struct csi_tensor *output, struct ndarray_size_params *params); -int csi_ref_ndarray_size_u8(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_ndarray_size_u8(struct csi_tensor *input, struct csi_tensor *output, struct ndarray_size_params *params); -int csi_ref_ndarray_size_i8(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_ndarray_size_i8(struct csi_tensor *input, struct csi_tensor *output, struct ndarray_size_params *params); -int csi_ref_ndarray_size_i32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_ndarray_size_i32(struct csi_tensor *input, struct csi_tensor *output, struct ndarray_size_params *params); -int csi_ref_negative_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_negative_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_negative_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_negative_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_non_max_suppression_std(struct csi_tensor *input0, - struct csi_tensor *input1, +int csi_ref_non_max_suppression_std(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct non_max_suppression_params *params); -int csi_ref_not_equal_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_not_equal_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_not_equal_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_not_equal_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_not_u32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_not_u32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_not_u8(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_ref_not_u8(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_not_i8(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params); +int csi_ref_not_i8(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_or_u32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_or_u32(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ref_or_u8(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_or_u8(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ref_or_i8(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_or_i8(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ref_pad_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct pad_params *params); +int csi_ref_pad_f32(struct csi_tensor *input, struct csi_tensor *output, struct pad_params *params); -int csi_ref_pad_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_pad_quant(struct csi_tensor *input, struct csi_tensor *output, struct pad_params *params); -int csi_ref_power_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_power_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_power_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_power_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_prelu_f32(struct csi_tensor *input, - struct csi_tensor *alpha, - struct csi_tensor *output, +int csi_ref_prelu_f32(struct csi_tensor *input, struct csi_tensor *alpha, struct csi_tensor *output, struct prelu_params *params); -int csi_ref_prelu_quant(struct csi_tensor *input, - struct csi_tensor *alpha, - struct csi_tensor *output, - struct prelu_params *params); +int csi_ref_prelu_quant(struct csi_tensor *input, struct csi_tensor *alpha, + struct csi_tensor *output, struct prelu_params *params); -int csi_ref_prod_stride_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_prod_stride_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_prod_stride_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_prod_stride_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_proposal_f32(struct csi_tensor *cls_prob, - struct csi_tensor *bbox_pred, - struct csi_tensor *im_info, - struct csi_tensor *output, +int csi_ref_proposal_f32(struct csi_tensor *cls_prob, struct csi_tensor *bbox_pred, + struct csi_tensor *im_info, struct csi_tensor *output, struct proposal_params *params); -int csi_ref_proposal_quant(struct csi_tensor *cls_prob, - struct csi_tensor *bbox_pred, - struct csi_tensor *im_info, - struct csi_tensor *output, +int csi_ref_proposal_quant(struct csi_tensor *cls_prob, struct csi_tensor *bbox_pred, + struct csi_tensor *im_info, struct csi_tensor *output, struct proposal_params *params); -int csi_ref_psroipooling_f32(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, - struct psroipooling_params *params); +int csi_ref_psroipooling_f32(struct csi_tensor *data, struct csi_tensor *rois, + struct csi_tensor *output, struct psroipooling_params *params); -int csi_ref_psroipooling_quant(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, - struct psroipooling_params *params); +int csi_ref_psroipooling_quant(struct csi_tensor *data, struct csi_tensor *rois, + struct csi_tensor *output, struct psroipooling_params *params); -int csi_ref_reduce_logsumexp_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_logsumexp_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_reduce_logsumexp_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_logsumexp_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_reduce_max_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_max_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_reduce_max_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_max_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_reduce_mean_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_mean_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_reduce_mean_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_mean_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_reduce_min_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_min_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_reduce_min_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_min_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_reduce_prod_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_prod_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_reduce_prod_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_prod_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_reduce_sum_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_sum_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_reduce_sum_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_sum_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_relu_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_relu_f32(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ref_relu_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_relu_quant(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ref_relu1_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_relu1_f32(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ref_relu1_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_relu1_quant(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ref_relu6_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_relu6_f32(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ref_relu6_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_relu6_quant(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ref_relun_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_relun_f32(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ref_relun_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_relun_quant(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ref_reshape(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reshape(struct csi_tensor *input, struct csi_tensor *output, struct reshape_params *params); -int csi_ref_reshape_requant(struct csi_tensor *input, - struct csi_tensor *output, - struct reshape_params *params); +int csi_ref_reshape_quant(struct csi_tensor *input, struct csi_tensor *output, + struct reshape_params *params); -int csi_ref_resize_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_resize_f32(struct csi_tensor *input, struct csi_tensor *output, struct resize_params *params); -int csi_ref_resize_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_resize_quant(struct csi_tensor *input, struct csi_tensor *output, struct resize_params *params); -int csi_ref_reverse_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reverse_f32(struct csi_tensor *input, struct csi_tensor *output, struct reverse_params *params); -int csi_ref_reverse_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reverse_quant(struct csi_tensor *input, struct csi_tensor *output, struct reverse_params *params); -int csi_ref_roi_align_f32(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, - struct roi_align_params *params); +int csi_ref_roi_align_f32(struct csi_tensor *data, struct csi_tensor *rois, + struct csi_tensor *output, struct roi_align_params *params); -int csi_ref_roipool_f32(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, +int csi_ref_roipool_f32(struct csi_tensor *data, struct csi_tensor *rois, struct csi_tensor *output, struct roi_pool_params *params); -int csi_ref_roipool_quant(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, - struct roi_pool_params *params); +int csi_ref_roipool_quant(struct csi_tensor *data, struct csi_tensor *rois, + struct csi_tensor *output, struct roi_pool_params *params); -int csi_ref_round_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_round_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_round_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_round_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_rsqrt_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_rsqrt_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_rsqrt_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_rsqrt_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_scatter_nd_f32(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *updates, - struct csi_tensor *output, +int csi_ref_scatter_nd_f32(struct csi_tensor *input, struct csi_tensor *indices, + struct csi_tensor *updates, struct csi_tensor *output, struct scatter_nd_params *params); -int csi_ref_scatter_nd_quant(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *updates, - struct csi_tensor *output, +int csi_ref_scatter_nd_quant(struct csi_tensor *input, struct csi_tensor *indices, + struct csi_tensor *updates, struct csi_tensor *output, struct scatter_nd_params *params); -int csi_ref_unsorted_segment_max_f32(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params); - -int csi_ref_segment_max_f32(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params); - -int csi_ref_unsorted_segment_max_quant(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params); - -int csi_ref_segment_max_quant(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params); - -int csi_ref_unsorted_segment_mean_f32(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params); - -int csi_ref_segment_mean_f32(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params); - -int csi_ref_unsorted_segment_mean_quant(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params); - -int csi_ref_segment_mean_quant(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params); - -int csi_ref_unsorted_segment_min_f32(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params); - -int csi_ref_segment_min_f32(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params); - -int csi_ref_unsorted_segment_min_quant(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params); - -int csi_ref_segment_min_quant(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params); - -int csi_ref_unsorted_segment_prod_f32(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params); - -int csi_ref_segment_prod_f32(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params); - -int csi_ref_unsorted_segment_prod_quant(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params); - -int csi_ref_segment_prod_quant(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params); - -int csi_ref_unsorted_segment_sum_f32(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params); - -int csi_ref_segment_sum_f32(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params); - -int csi_ref_unsorted_segment_sum_quant(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params); - -int csi_ref_segment_sum_quant(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params); - -int csi_ref_select_f32(struct csi_tensor *condition, - struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_unsorted_segment_max_f32(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params); + +int csi_ref_segment_max_f32(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params); + +int csi_ref_unsorted_segment_max_quant(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params); + +int csi_ref_segment_max_quant(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params); + +int csi_ref_unsorted_segment_mean_f32(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params); + +int csi_ref_segment_mean_f32(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params); + +int csi_ref_unsorted_segment_mean_quant(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params); + +int csi_ref_segment_mean_quant(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params); + +int csi_ref_unsorted_segment_min_f32(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params); + +int csi_ref_segment_min_f32(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params); + +int csi_ref_unsorted_segment_min_quant(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params); + +int csi_ref_segment_min_quant(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params); + +int csi_ref_unsorted_segment_prod_f32(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params); + +int csi_ref_segment_prod_f32(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params); + +int csi_ref_unsorted_segment_prod_quant(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params); + +int csi_ref_segment_prod_quant(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params); + +int csi_ref_unsorted_segment_sum_f32(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params); + +int csi_ref_segment_sum_f32(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params); + +int csi_ref_unsorted_segment_sum_quant(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params); + +int csi_ref_segment_sum_quant(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params); + +int csi_ref_select_f32(struct csi_tensor *condition, struct csi_tensor *input0, + struct csi_tensor *input1, struct csi_tensor *output, struct select_params *params); -int csi_ref_select_u8(struct csi_tensor *condition, - struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_select_u8(struct csi_tensor *condition, struct csi_tensor *input0, + struct csi_tensor *input1, struct csi_tensor *output, struct select_params *params); -int csi_ref_select_i8(struct csi_tensor *condition, - struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_select_i8(struct csi_tensor *condition, struct csi_tensor *input0, + struct csi_tensor *input1, struct csi_tensor *output, struct select_params *params); -int csi_ref_shape_i32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_shape_i32(struct csi_tensor *input, struct csi_tensor *output, struct shape_params *params); -int csi_ref_shape_u8(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_shape_u8(struct csi_tensor *input, struct csi_tensor *output, struct shape_params *params); -int csi_ref_shape_i8(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_shape_i8(struct csi_tensor *input, struct csi_tensor *output, struct shape_params *params); -int csi_ref_shuffle_channel_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_shuffle_channel_f32(struct csi_tensor *input, struct csi_tensor *output, struct shuffle_channel_params *params); -int csi_ref_shuffle_channel_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_shuffle_channel_quant(struct csi_tensor *input, struct csi_tensor *output, struct shuffle_channel_params *params); -int csi_ref_sigmoid_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sigmoid_f32(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params); -int csi_ref_sigmoid_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sigmoid_quant(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params); -int csi_ref_sign_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sign_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_sign_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sign_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_sin_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sin_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_sin_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sin_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_sinh_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sinh_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_sinh_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sinh_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_slice_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_slice_f32(struct csi_tensor *input, struct csi_tensor *output, struct slice_params *params); -int csi_ref_slice_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct slice_params *params); +int csi_ref_slice_quant(struct csi_tensor *input, struct csi_tensor *output, + struct slice_params *params); -int csi_ref_softmax_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_softmax_f32(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params); -int csi_ref_softmax_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_softmax_quant(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params); -int csi_ref_softplus_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_softplus_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_softplus_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_softplus_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_softrelu_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_softrelu_f32(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ref_softrelu_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_softrelu_quant(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ref_softsign_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_softsign_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_softsign_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_softsign_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_space_to_batch_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_space_to_batch_f32(struct csi_tensor *input, struct csi_tensor *output, struct space_to_batch_params *params); -int csi_ref_space_to_batch_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_space_to_batch_quant(struct csi_tensor *input, struct csi_tensor *output, struct space_to_batch_params *params); -int csi_ref_space_to_depth_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_space_to_depth_f32(struct csi_tensor *input, struct csi_tensor *output, struct space_to_depth_params *params); -int csi_ref_space_to_depth_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_space_to_depth_quant(struct csi_tensor *input, struct csi_tensor *output, struct space_to_depth_params *params); -int csi_ref_split_f32(struct csi_tensor *input, - struct csi_tensor **output, +int csi_ref_split_f32(struct csi_tensor *input, struct csi_tensor **output, struct split_params *params); -int csi_ref_split_quant(struct csi_tensor *input, - struct csi_tensor **output, +int csi_ref_split_quant(struct csi_tensor *input, struct csi_tensor **output, struct split_params *params); -int csi_ref_sqrt_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sqrt_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_sqrt_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sqrt_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_square_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_square_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_squeeze(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_squeeze(struct csi_tensor *input, struct csi_tensor *output, struct squeeze_params *params); -int csi_ref_stack_f32(struct csi_tensor **input, - struct csi_tensor *output, +int csi_ref_stack_f32(struct csi_tensor **input, struct csi_tensor *output, struct stack_params *params); -int csi_ref_stack_quant(struct csi_tensor **input, - struct csi_tensor *output, +int csi_ref_stack_quant(struct csi_tensor **input, struct csi_tensor *output, struct stack_params *params); -int csi_ref_strided_slice_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_strided_slice_f32(struct csi_tensor *input, struct csi_tensor *output, struct strided_slice_params *params); -int csi_ref_strided_slice_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_strided_slice_quant(struct csi_tensor *input, struct csi_tensor *output, struct strided_slice_params *params); -int csi_ref_sub_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_sub_f32(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ref_sub_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params); +int csi_ref_sub_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); -int csi_ref_sum_stride_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sum_stride_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_sum_stride_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sum_stride_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params); -int csi_ref_tan_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_tan_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_tan_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_tan_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_tanh_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_tanh_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_tanh_f64(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_tanh_f64(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_tanh_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_tanh_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_threshold_relu_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_threshold_relu_f32(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ref_threshold_relu_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_threshold_relu_quant(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ref_tile_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_tile_f32(struct csi_tensor *input, struct csi_tensor *output, struct tile_params *params); -int csi_ref_tile_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_tile_quant(struct csi_tensor *input, struct csi_tensor *output, struct tile_params *params); -int csi_ref_topk_f32(struct csi_tensor *input, - struct csi_tensor *output1, - struct csi_tensor *output2, - struct topk_params *params); +int csi_ref_topk_f32(struct csi_tensor *input, struct csi_tensor *output1, + struct csi_tensor *output2, struct topk_params *params); -int csi_ref_topk_quant(struct csi_tensor *input, - struct csi_tensor *output1, - struct csi_tensor *output2, - struct topk_params *params); +int csi_ref_topk_quant(struct csi_tensor *input, struct csi_tensor *output1, + struct csi_tensor *output2, struct topk_params *params); -int csi_ref_transpose(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_transpose(struct csi_tensor *input, struct csi_tensor *output, struct transpose_params *params); -int csi_ref_transpose_requant(struct csi_tensor *input, - struct csi_tensor *output, - struct transpose_params *params); +int csi_ref_transpose_quant(struct csi_tensor *input, struct csi_tensor *output, + struct transpose_params *params); -int csi_ref_trunc_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_trunc_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_trunc_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_trunc_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_unpooling_f32(struct csi_tensor *input, - struct csi_tensor *mask, - struct csi_tensor *output, - struct unpooling_params *params); +int csi_ref_unpooling_f32(struct csi_tensor *input, struct csi_tensor *mask, + struct csi_tensor *output, struct unpooling_params *params); -int csi_ref_unpooling_quant(struct csi_tensor *input, - struct csi_tensor *mask, - struct csi_tensor *output, - struct unpooling_params *params); +int csi_ref_unpooling_quant(struct csi_tensor *input, struct csi_tensor *mask, + struct csi_tensor *output, struct unpooling_params *params); -int csi_ref_unstack_f32(struct csi_tensor *input, - struct csi_tensor **output, +int csi_ref_unstack_f32(struct csi_tensor *input, struct csi_tensor **output, struct unstack_params *params); -int csi_ref_unstack_qunat(struct csi_tensor *input, - struct csi_tensor **output, +int csi_ref_unstack_qunat(struct csi_tensor *input, struct csi_tensor **output, struct unstack_params *params); -int csi_ref_xor_u32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_xor_u32(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ref_xor_u8(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_xor_u8(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ref_xor_i8(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_xor_i8(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params); -int csi_ref_yuv_rgb_scale_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_yuv_rgb_scale_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_yuv_rgb_scale_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_yuv_rgb_scale_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); int32_t csi_ref_max_internal_s32(int32_t a, int32_t b); int32_t csi_ref_min_internal_s32(int32_t a, int32_t b); -int32_t csi_ref_get_index(int32_t *dim, int32_t index0, int32_t index1, int32_t index2, int32_t index3); -int32_t csi_ref_get_index_5(int32_t *dim, int32_t index0, int32_t index1, int32_t index2, int32_t index3, int32_t index4); +int32_t csi_ref_get_index(int32_t *dim, int32_t index0, int32_t index1, int32_t index2, + int32_t index3); +int32_t csi_ref_get_index_5(int32_t *dim, int32_t index0, int32_t index1, int32_t index2, + int32_t index3, int32_t index4); int32_t csi_ref_get_index_iter(int32_t *dim, int dim_count, int32_t *index); float csi_ref_get_scale(int32_t multiplier, int32_t shift); float csi_ref_dequantize_u8_to_f32(uint8_t input, struct csi_quant_info *qinfo); float csi_ref_dequantize_i8_to_f32(int8_t input, struct csi_quant_info *qinfo); uint8_t csi_ref_quantize_f32_to_u8(float input, struct csi_quant_info *qinfo); int8_t csi_ref_quantize_f32_to_i8(float input, struct csi_quant_info *qinfo); -uint8_t csi_ref_quantize_channel_u8(int32_t data, struct csi_tensor *input, struct csi_tensor *output, float wscale); -int8_t csi_ref_quantize_channel_i8(int32_t data, struct csi_tensor *input, struct csi_tensor *output, float wscale); +uint8_t csi_ref_quantize_channel_u8(int32_t data, struct csi_tensor *input, + struct csi_tensor *output, float wscale); +int8_t csi_ref_quantize_channel_i8(int32_t data, struct csi_tensor *input, + struct csi_tensor *output, float wscale); float csi_ref_uint8_to_float(uint8_t i, struct csi_tensor *t); float csi_ref_int8_to_float(int8_t i, struct csi_tensor *t); int16_t csi_ref_float32_to_float16(float value); float csi_ref_float16_to_float32(int16_t value); +int16_t csi_ref_float32_to_bfloat16(float value); +float csi_ref_bfloat16_to_float32(int16_t value); struct csi_tensor *csi_ref_nchw_to_nhwc_8(struct csi_tensor *t); void csi_ref_nhwc_to_nchw_8(struct csi_tensor *nt, struct csi_tensor *t); struct csi_tensor *csi_ref_deconv_kernel_nchw_to_nhwc_f32(struct csi_tensor *t, int32_t permute[4]); struct csi_tensor *csi_ref_nchw_to_nhwc_f32(struct csi_tensor *t); void csi_ref_nhwc_to_nchw_f32(struct csi_tensor *nt, struct csi_tensor *t); -int32_t csi_ref_get_reduction_index(int32_t k, const int32_t *strides, const int32_t *extents, int32_t n); +int32_t csi_ref_get_reduction_index(int32_t k, const int32_t *strides, const int32_t *extents, + int32_t n); struct csi_tensor *csi_ref_alloc_float_tensor(struct csi_tensor *src); void csi_ref_free_float_tensor(struct csi_tensor *src); struct csi_tensor *csi_ref_convert_float_tensor(struct csi_tensor *src); @@ -1540,8 +1137,7 @@ struct csi_tensor *csi_ref_tensor_transform_f32(struct csi_tensor *input); int csi_ref_tensor_transform_free_f32(struct csi_tensor *input); uint8_t *csi_ref_f32_to_input_dtype(uint32_t index, float *data, struct csi_session *sess); -struct csi_ref_diso_callback -{ +struct csi_ref_diso_callback { void (*bc)(); struct csi_tensor *input0; struct csi_tensor *input1; @@ -1551,31 +1147,49 @@ struct csi_ref_diso_callback void *csi_init_map_ref(int op, int dtype); -int csi_ref_diso_broadcast_base(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, - struct diso_params *params, struct csi_ref_diso_callback *cb); -int csi_ref_broadcast_to_shape(struct csi_tensor *input, struct csi_tensor *output, int32_t *shape, int32_t shape_count); -int csi_ref_broadcast_to_shape_f32(struct csi_tensor *input, struct csi_tensor *output, int32_t *shape, int32_t shape_count); -int csi_ref_broadcast_to_shape_quant(struct csi_tensor *input, struct csi_tensor *output, int32_t *shape, int32_t shape_count); +int csi_ref_diso_broadcast_base(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params, + struct csi_ref_diso_callback *cb); +int csi_ref_broadcast_to_shape(struct csi_tensor *input, struct csi_tensor *output, int32_t *shape, + int32_t shape_count); +int csi_ref_broadcast_to_shape_f32(struct csi_tensor *input, struct csi_tensor *output, + int32_t *shape, int32_t shape_count); +int csi_ref_broadcast_to_shape_quant(struct csi_tensor *input, struct csi_tensor *output, + int32_t *shape, int32_t shape_count); + +int csi_ref_siso_callback_base(struct csi_tensor *input, struct csi_tensor *output, void *params, + void *cb); +int csi_ref_diso_callback_base(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, void *params, void *cb); +int csi_ref_conv_callback_base(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, void *params, + void *cb); + +void csi_ref_nn_init(struct csi_tensor *input, struct csi_tensor *output); + +void csi_ref_nn_deinit(struct csi_tensor *input, struct csi_tensor *output); + +int csi_ref_flatten_init(struct csi_tensor *input, struct csi_tensor *output, + struct reshape_params *params); + +int csi_ref_reshape_init(struct csi_tensor *input, struct csi_tensor *output, + struct reshape_params *params); -int csi_ref_siso_callback_base(struct csi_tensor *input, struct csi_tensor *output, void *params, void *cb); -int csi_ref_diso_callback_base(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, void *params, void *cb); -int csi_ref_conv_callback_base(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, struct csi_tensor *bias, void *params, void *cb); +int csi_ref_transpose_init(struct csi_tensor *input, struct csi_tensor *output, + struct transpose_params *params); -void csi_ref_nn_init(struct csi_tensor *input, - struct csi_tensor *output); +void asr_buffer_init(struct asr_buffer_t *buffer, size_t buffer_size, size_t data_lenth); -void csi_ref_nn_deinit(struct csi_tensor *input, - struct csi_tensor *output); +void *asr_buffer_insert_front(struct asr_buffer_t *buffer, void *input, size_t len); -int csi_ref_flatten_init(struct csi_tensor *input, - struct csi_tensor *output, - struct reshape_params *params); +void *asr_buffer_insert_back(struct asr_buffer_t *buffer, void *input, size_t len); -int csi_ref_reshape_init(struct csi_tensor *input, - struct csi_tensor *output, - struct reshape_params *params); +void *asr_buffer_get_buffer(struct asr_buffer_t *buffer); -int csi_ref_transpose_init(struct csi_tensor *input, - struct csi_tensor *output, - struct transpose_params *params); +void asr_buffer_reset(struct asr_buffer_t *buffer); + +#ifdef __cplusplus +} #endif + +#endif // INCLUDE_CSI_REF_H_ diff --git a/include/csi_ref_i805.h b/include/csi_ref_i805.h index 235a537c..5bc64166 100644 --- a/include/csi_ref_i805.h +++ b/include/csi_ref_i805.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,90 +16,69 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ -#ifndef _CSI_INTERNAL_REF_I805_H -#define _CSI_INTERNAL_REF_I805_H +#ifndef INCLUDE_CSI_REF_I805_H_ +#define INCLUDE_CSI_REF_I805_H_ +#include #include #include #include -#include + #include "csi_internal.h" +#include "csi_nnfunctions.h" #include "csi_ref.h" #include "csi_utils.h" -#include "csi_math.h" -#include "csi_nnfunctions.h" - -int csi_ref_i805_conv2d_init_q7(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_i805_conv2d_init_q7(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_i805_conv2d_init_q15(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_i805_conv2d_init_q15(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params); -int csi_ref_i805_depthwise_conv2d_init_q7(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params); +int csi_ref_i805_depthwise_conv2d_init_q7(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); -int csi_ref_i805_avgpool2d_init_q7(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_ref_i805_avgpool2d_init_q7(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_ref_i805_maxpool2d_init_q7(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params); +int csi_ref_i805_maxpool2d_init_q7(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); -int csi_ref_i805_fullyconnected_q7(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_ref_i805_fullyconnected_q7(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_ref_i805_fullyconnected_q15(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_ref_i805_fullyconnected_q15(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params); -int csi_ref_i805_softmax_q7(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_i805_softmax_q7(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params); -int csi_ref_i805_softmax_q15(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_i805_softmax_q15(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params); -int csi_ref_i805_relu_q7(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_i805_relu_q7(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ref_i805_relu_q15(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_i805_relu_q15(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params); -int csi_ref_i805_sigmoid_q7(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_i805_sigmoid_q7(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params); -int csi_ref_i805_sigmoid_q15(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_i805_sigmoid_q15(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params); -int csi_ref_i805_tanh_q7(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_i805_tanh_q7(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -int csi_ref_i805_tanh_q15(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_i805_tanh_q15(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params); -#endif +#endif // INCLUDE_CSI_REF_I805_H_ diff --git a/include/csi_thead_rvv.h b/include/csi_thead_rvv.h new file mode 100644 index 00000000..2f4a8da8 --- /dev/null +++ b/include/csi_thead_rvv.h @@ -0,0 +1,389 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#ifndef INCLUDE_CSI_THEAD_RVV_H_ +#define INCLUDE_CSI_THEAD_RVV_H_ + +#include +#include +#include +#include +#include + +#include "csi_internal.h" +#include "csi_ref.h" +#include "csi_utils.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int csi_nn_rvv_conv2d_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +int csi_nn_rvv_depthwise_conv2d_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +int csi_nn_rvv_avgpool2d_init(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_maxpool2d_init(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_fullyconnected_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, + struct fc_params *params); + +/************************************ convolution *********************************/ +void csi_nn_rvv_conv_im2col_sgemm_transform_kernel_fp32(struct csi_tensor *kernel, + struct conv2d_params *params); + +int csi_nn_rvv_conv_im2col_gemm_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +void csi_nn_rvv_conv_im2col_sgemm_transform_kernel_fp16(struct csi_tensor *kernel, + struct conv2d_params *params); + +int csi_nn_rvv_conv_im2col_gemm_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +void csi_nn_rvv_conv_im2col_sgemm_transform_kernel_int8(struct csi_tensor *kernel, + struct conv2d_params *params); + +int csi_nn_rvv_conv_im2col_gemm_int8(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +void csi_nn_rvv_conv_im2col_sgemm_transform_kernel_int4(struct csi_tensor *kernel, + struct conv2d_params *params); + +int csi_nn_rvv_conv_im2col_gemm_int4(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +void csi_nn_rvv_conv1x1s1_gemm_transform_kernel_fp32(struct csi_tensor *kernel, + struct conv2d_params *params); + +int csi_nn_rvv_conv1x1s1_gemm_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +void csi_nn_rvv_conv1x1s1_gemm_transform_kernel_fp16(struct csi_tensor *kernel, + struct conv2d_params *params); + +int csi_nn_rvv_conv1x1s1_gemm_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +void csi_nn_rvv_conv1x1s1_gemm_transform_kernel_int8(struct csi_tensor *kernel, + struct conv2d_params *params); + +int csi_nn_rvv_conv1x1s1_gemm_int8(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +void csi_nn_rvv_conv1x1s1_gemm_transform_kernel_int4(struct csi_tensor *kernel, + struct conv2d_params *params); + +int csi_nn_rvv_conv1x1s1_gemm_int4(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +void csi_nn_rvv_conv3x3s1_winograd64_transform_kernel_packn_fp32(struct csi_tensor *o_kernel, + struct csi_tensor *t_kernel); + +int csi_nn_rvv_conv3x3s1_winograd64_packn_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +void csi_nn_rvv_conv3x3s1_winograd64_transform_kernel_packn_fp16(struct csi_tensor *o_kernel, + struct csi_tensor *t_kernel); + +int csi_nn_rvv_conv3x3s1_winograd64_packn_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +int csi_nn_rvv_dwconv3x3s1_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +int csi_nn_rvv_dwconv3x3s2_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +int csi_nn_rvv_dwconv3x3s1_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +int csi_nn_rvv_dwconv3x3s2_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +int csi_nn_rvv_dwconv3x3s1_int8(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +int csi_nn_rvv_dwconv3x3s2_int8(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +int csi_nn_rvv_dwconv3x3s1_int4(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +int csi_nn_rvv_dwconv3x3s2_int4(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params); + +void csi_nn_rvv_reorder_kernel_n8_fp32(float *a, float *sa, int m, int k, int ldx); +void csi_nn_rvv_reorder_input_z8_fp32(float *b, float *sb, int k, int n, int ldx); +void csi_nn_rvv_gemm_8x8_fp32(float *dst, const float *sa, const float *sb, int m, int k, int n, + int ldc, float *bias); + +void csi_nn_rvv256_reorder_input_z16_fp32(float *b, float *sb, int k, int n, int ldx); +void csi_nn_rvv256_gemm_8x16_fp32(float *dst, const float *sa, const float *sb, int m, int k, int n, + int ldc, float *bias); + +void csi_nn_rvv_reorder_kernel_n8_fp16(__fp16 *a, __fp16 *sa, int m, int k, int ldx); +void csi_nn_rvv_reorder_input_z16_fp16(__fp16 *b, __fp16 *sb, int k, int n, int ldx); +void csi_nn_rvv_gemm_8x16_fp16(__fp16 *dst, const __fp16 *sa, const __fp16 *sb, int m, int k, int n, + int ldc, __fp16 *bias); + +void csi_nn_rvv256_reorder_kernel_n16_fp16(__fp16 *a, __fp16 *sa, int m, int k, int ldx); +void csi_nn_rvv256_reorder_input_z16_fp16(__fp16 *b, __fp16 *sb, int k, int n, int ldx); +void csi_nn_rvv256_gemm_16x16_fp16(__fp16 *dst, const __fp16 *sa, const __fp16 *sb, int m, int k, + int n, int ldc, __fp16 *bias); + +void csi_nn_rvv_reorder_kernel_n8_int8(int8_t *a, int8_t *sa, int m, int k, int ldx); +void csi_nn_rvv_reorder_input_z8_int8(int8_t *b, int8_t *sb, int k, int n, int ldx); +void csi_nn_rvv_gemm_8x8_int32(int32_t *dst, const int8_t *sa, const int8_t *sb, int m, int k, + int n, int ldc, int32_t *bias); +void csi_nn_rvv_gemm_8x8_int8(int8_t *dst, const int8_t *sa, const int8_t *sb, int m, int k, int n, + int ldc, int32_t *bias, int32_t out_zp, int32_t *mult, + int32_t *shift); + +void csi_nn_rvv256_reorder_input_z16_int8(int8_t *b, int8_t *sb, int k, int n, int ldx); +void csi_nn_rvv256_gemm_8x16_int32(int32_t *dst, const int8_t *sa, const int8_t *sb, int m, int k, + int n, int ldc, int32_t *bias); + +void csi_nn_rvv_reorder_input_n8_int4(int8_t *a, int8_t *sa, int m, int k, int ldx); +void csi_nn_rvv_reorder_kernel_n8_int4(int8_t *b, int8_t *sb, int n, int k, int ldx); +void csi_nn_rvv_gemm_8x8_int4(int8_t *dst, const int8_t *sa, const int8_t *sb, int m, int k, int n, + int ldc, int32_t *bias, int32_t out_zp, int32_t *mult, + int32_t *shift); + +/************************************ pooling *********************************/ +int csi_nn_rvv_avgpool2x2s2_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_avgpool2x2s2_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_avgpool2x2s2_p1_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_avgpool2x2s2_p1_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_avgpool3x3s2_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_avgpool3x3s2_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_avgpool3x3s2_p1_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_avgpool3x3s2_p1_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_avgpool3x3s1_p1_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_avgpool3x3s1_p1_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_maxpool2x2s2_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_maxpool2x2s2_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_maxpool2x2s2_int8(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_maxpool2x2s2_p1_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_maxpool2x2s2_p1_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_maxpool2x2s2_p1_int8(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_maxpool3x3s2_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_maxpool3x3s2_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_maxpool3x3s2_int8(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_maxpool3x3s2_p1_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_maxpool3x3s2_p1_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_maxpool3x3s2_p1_int8(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_maxpool3x3s1_p1_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_maxpool3x3s1_p1_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_maxpool3x3s1_p1_int8(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_global_avgpool2d_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_global_avgpool2d_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_global_maxpool2d_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +int csi_nn_rvv_global_maxpool2d_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params); + +/************************************ fullyconnected *********************************/ +void csi_nn_rvv_fc_gemv_transform_weight_fp32(struct csi_tensor *weights); + +int csi_nn_rvv_fullyconnected_packn_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, + struct fc_params *params); + +void csi_nn_rvv_fc_gemv_transform_weight_fp16(struct csi_tensor *weights); + +int csi_nn_rvv_fullyconnected_packn_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, + struct fc_params *params); + +void csi_nn_rvv_fc_gemv_transform_weight_int8(struct csi_tensor *weights); + +int csi_nn_rvv_fullyconnected_packn_int8(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, + struct fc_params *params); + +/************************************ activation *********************************/ +int csi_nn_rvv_relu_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct relu_params *params); + +int csi_nn_rvv_relu_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct relu_params *params); + +int csi_nn_rvv_relu_int8(struct csi_tensor *input, struct csi_tensor *output, + struct relu_params *params); + +int csi_nn_rvv_leaky_relu_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct relu_params *params); + +int csi_nn_rvv_leaky_relu_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct relu_params *params); + +int csi_nn_rvv_leaky_relu_int8(struct csi_tensor *input, struct csi_tensor *output, + struct relu_params *params); + +int csi_nn_rvv_sigmoid_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct sigmoid_params *params); + +int csi_nn_rvv_softmax_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct softmax_params *params); + +/************************************ layout/memory transform *********************************/ +int csi_nn_rvv_concat_fp32(struct csi_tensor **input, struct csi_tensor *output, + struct concat_params *params); + +int csi_nn_rvv_concat_fp16(struct csi_tensor **input, struct csi_tensor *output, + struct concat_params *params); + +int csi_nn_rvv_concat_int8(struct csi_tensor **input, struct csi_tensor *output, + struct concat_params *params); + +/************************************ basic math *********************************/ +int csi_nn_rvv_add_fp32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); + +int csi_nn_rvv_add_fp16(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); + +int csi_nn_rvv_add_int8(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); + +int csi_nn_rvv_mul_fp32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); + +int csi_nn_rvv_mul_fp16(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); + +int csi_nn_rvv_mul_int8(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params); + +int csi_nn_rvv_sum_stride_int8(struct csi_tensor *input, struct csi_tensor *output, + struct reduce_params *params); + +/************************************ utils *********************************/ +void csi_nn_rvv_pad_input_fp32(const float *input, float *input_padded, int inc, int inh, int inw, + int padded_h, int padded_w, int pad_top, int pad_left); + +void csi_nn_rvv_pad_input_fp16(const __fp16 *input, __fp16 *input_padded, int inc, int inh, int inw, + int padded_h, int padded_w, int pad_top, int pad_left); + +void csi_nn_rvv_pad_input_int8(const int8_t *input, int8_t *input_padded, int inc, int inh, int inw, + int padded_h, int padded_w, int pad_top, int pad_left, + int8_t pad_value); + +void csi_nn_rvv_saturated_int8(int32_t *src, int8_t *dst, int32_t out_zp, int size); + +void csi_nn_rvv_requantize(int32_t *src, int32_t multiplier, int32_t shift, int channel_size); + +void csi_nn_rvv_pad_input_int4_trans_int8(const int8_t *input, int8_t *input_padded, int inc, + int inh, int inw, int padded_h, int padded_w, int pad_top, + int pad_left, int8_t pad_value); +void csi_nn_rvv_int4_to_int8(int8_t *src, int8_t *dst, int size); +void csi_nn_rvv_int8_to_int4(int8_t *src, int8_t *dst, int size); +void csi_nn_rvv_int4_trans_int8(int8_t *src, int8_t *dst, int size); + +int csrr_vl(); +int csrr_vlenb(); + +#ifdef __cplusplus +} +#endif + +#endif // INCLUDE_CSI_THEAD_RVV_H_ diff --git a/include/csi_utils.h b/include/csi_utils.h index effa44d1..cb275726 100644 --- a/include/csi_utils.h +++ b/include/csi_utils.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,46 +16,34 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ -#ifndef _CSI_NN_UTIL_H -#define _CSI_NN_UTIL_H +#ifndef INCLUDE_CSI_UTILS_H_ +#define INCLUDE_CSI_UTILS_H_ +#include +#include +#include #include -#include #include +#include #include -#include -#include -#include -#if ((!defined CSI_BUILD_I805) && (!defined CSI_BUILD_E804) && (!defined CSI_BUILD_REF_I805)) +#if (!defined CSI_BUILD_RTOS) #include #endif #include "csi_internal.h" -struct csi_session { - int32_t base_dtype; - int32_t base_layout; - int32_t base_api; - int32_t base_run_mode; - enum csinn_quant_enum base_quant_type; - char *model_name; - int32_t model_save; - int32_t debug_level; - int32_t input_num; - int32_t output_num; - struct csi_tensor **input; - struct csi_tensor **output; - void *td; -}; +#ifdef __cplusplus +extern "C" { +#endif /* misc */ void csi_get_top5(float *buf, uint32_t size, float *prob, uint32_t *cls); void csi_show_top5(struct csi_tensor *output, struct csi_session *sess); uint64_t csi_get_timespec(); +void csi_print_time_interval(uint64_t start, uint64_t end, const char *msg); void csi_statistical_mean_std(float *data, int sz); -void csi_quantize_multiplier(double double_multiplier, int32_t* quantized_multiplier, int* shift); - +void csi_quantize_multiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift); /* tensor */ int csi_tensor_size(struct csi_tensor *tensor); @@ -95,8 +83,7 @@ int csi_set_tensor_entry(struct csi_tensor *tensor, struct csi_session *sess); /* * model setup and run */ -void csi_nn_init(struct csi_tensor *input, - struct csi_tensor *output); +void csi_nn_init(struct csi_tensor *input, struct csi_tensor *output); void csi_nn_setup(void *td); @@ -104,11 +91,18 @@ void csi_nn_run(void *td); void csi_nn_postprocess(void *td); -void csi_nn_deinit(struct csi_tensor *input, - struct csi_tensor *output); +void csi_nn_deinit(struct csi_tensor *input, struct csi_tensor *output); void *csi_nn_presetup(int input, int output); void *csi_bc_map(int api, int rmode, int op, int dtype); void *csi_init_map(int api, int op, int dtype); + +struct csi_bc_op_list *csi_bc_list_end(struct csi_bc_op_list *list); +void *csi_bc_list_match(struct csi_bc_op_list *list, enum csinn_dtype_enum dtype, + enum csinn_op_enum op_name); + +#ifdef __cplusplus +} #endif +#endif // INCLUDE_CSI_UTILS_H_ diff --git a/include/include_xt800/csi_i805_nnfunction.h b/include/include_xt800/csi_i805_nnfunction.h new file mode 100644 index 00000000..11a47c42 --- /dev/null +++ b/include/include_xt800/csi_i805_nnfunction.h @@ -0,0 +1,346 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Title: csi_nnfunctions.h + * Description: Public header file for CSI NN Library + * + * -------------------------------------------------------------------- */ + +#ifndef INCLUDE_INCLUDE_XT800_CSI_I805_NNFUNCTION_H_ +#define INCLUDE_INCLUDE_XT800_CSI_I805_NNFUNCTION_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "csky_vdsp2_nnfunctions.h" + +/** + * @brief u8 asym quant generic convolution optimized function + * @param[in] input_data pointer to input tensor data + * @param[in] kernel_data pointer to kernel tensor data + * @param[in] bias_data pointer to bias tensor data + * @param[in,out] output_data pointer to output tensor data + * @param[in,out] bufferA pointer to buffer for input/im2col data + * @param[in] input_h input height + * @param[in] input_w input width + * @param[in] input_ch input channel / output_channel + * @param[in] kernel_h kernel height + * @param[in] kernel_w kernel width + * @param[in] pad_h pad on height + * @param[in] pad_w pad on width + * @param[in] stride_h stride on height + * @param[in] stride_w stride on width + * @param[in] out_h output height + * @param[in] out_w output width + * @param[in] input_zero_point input zero_point + * @param[in] kernel_zero_point weight zero_point + * @param[in] output_zero_point output zero_point + * @param[in] dst_mult multiplier for s1 * s2 / s3 + * @param[in] dst_shift output shift for s1 * s2 / s3, shift_right + * @return none. + * bufferA size: 2*input_ch*kernel_h*kernel_w + */ +void csi_i805_conv2d_opt_u8(uint8_t *input_data, uint8_t *kernel_data, int32_t *bias_data, + uint8_t *output_data, uint8_t *bufferA, int32_t input_h, + int32_t input_w, int32_t input_ch, int32_t kernel_h, int32_t kernel_w, + int32_t pad_h, int32_t pad_w, int32_t stride_h, int32_t stride_w, + int32_t out_h, int32_t out_w, int32_t out_c, int32_t input_zero_point, + int32_t weight_zero_point, int32_t output_zero_point, int32_t out_mult, + int32_t out_shift); + +/** + * @brief u8 asym quant 1x1 kernel_size convolution (pointwise convolution) optimized function + * @param[in] input_data pointer to input tensor data + * @param[in] kernel_data pointer to kernel tensor data + * @param[in] bias_data pointer to bias tensor data + * @param[in,out] output_data pointer to output tensor data + * @param[in] input_hxw input height mul width + * @param[in] input_ch input channel + * @param[in] output_ch output_channel + * @param[in] input_zero_point input zero_point + * @param[in] kernel_zero_point weight zero_point + * @param[in] output_zero_point output zero_point + * @param[in] dst_mult multiplier for s1 * s2 / s3 + * @param[in] dst_shift output shift for s1 * s2 / s3, shift_right + * @return none. + * + */ +void csi_i805_pwconv2d_opt_u8(uint8_t *input_data, uint8_t *kernel_data, int32_t *bias_data, + uint8_t *output_data, int32_t input_hxw, int32_t input_ch, + int32_t output_ch, int32_t input_zero_point, + int32_t weight_zero_point, int32_t output_zero_point, + int32_t out_mult, int32_t out_shift); + +/** + * @brief u8 asym quant depthwise convolution optimized function + * @param[in] input_data pointer to input tensor data + * @param[in] kernel_data pointer to kernel tensor data + * @param[in] bias_data pointer to bias tensor data + * @param[in,out] output_data pointer to output tensor data + * @param[in,out] bufferA pointer to buffer for input/im2col data + * @param[in] input_h input height + * @param[in] input_w input width + * @param[in] input_ch input channel / output_channel + * @param[in] kernel_h kernel height + * @param[in] kernel_w kernel width + * @param[in] pad_h pad on height + * @param[in] pad_w pad on width + * @param[in] stride_h stride on height + * @param[in] stride_w stride on width + * @param[in] out_h output height + * @param[in] out_w output width + * @param[in] input_zero_point input zero_point + * @param[in] kernel_zero_point weight zero_point + * @param[in] output_zero_point output zero_point + * @param[in] dst_mult multiplier for s1 * s2 / s3 + * @param[in] dst_shift output shift for s1 * s2 / s3, shift_right + * @return none. + * bufferA size: 4*input_ch*kernel_h*kernel_w + */ +void csi_i805_dwconv2d_opt_u8(uint8_t *input_data, uint8_t *kernel_data, int32_t *bias_data, + uint8_t *output_data, uint8_t *bufferA, int32_t input_h, + int32_t input_w, int32_t input_ch, int32_t kernel_h, int32_t kernel_w, + int32_t pad_h, int32_t pad_w, int32_t stride_h, int32_t stride_w, + int32_t out_h, int32_t out_w, int32_t input_zero_point, + int32_t weight_zero_point, int32_t output_zero_point, + int32_t out_mult, int32_t out_shift); + +/** + * @brief u8 asym quant depthwise convolution 3x3 kernel_size and 1 stride optimized function + * @param[in] input pointer to input tensor data + * @param[in] kernel pointer to kernel tensor data + * @param[in] bias pointer to bias tensor data + * @param[in,out] output pointer to output tensor data + * @param[in] input_zero_point input zero_point + * @param[in] kernel_zero_point weight zero_point + * @param[in] output_zero_point output zero_point + * @param[in] dst_mult multiplier for s1 * s2 / s3 + * @param[in] dst_shift output shift for s1 * s2 / s3, shift_right + * @return none. + * + */ +void csi_i805_dwconv2d_3x3_opt_u8(uint8_t *input, uint8_t *kernel, int32_t *bias, uint8_t *output, + int32_t input_zero_point, int32_t kernel_zero_point, + int32_t output_zero_point, int32_t dst_mult, int32_t dst_shift); + +/** + * @brief u8 asym quant fullyconnected optimized function + * @param[in] input_data pointer to input tensor data + * @param[in] weight_data pointer to weight tensor data + * @param[in] bias_data pointer to bias tensor data + * @param[in,out] output_data pointer to output tensor data + * @param[in] in_nodes input nodes (weight cols) + * @param[in] out_nodes output nodes (weight rows) + * @param[in] input_zero_point input zero_point + * @param[in] weight_zero_point weight zero_point + * @param[in] output_zero_point output zero_point + * @param[in] output_mult multiplier for s1 * s2 / s3 + * @param[in] output_shift output shift for s1 * s2 / s3. shift_right + * @return none. + * + */ +void csi_i805_fullyconnected_opt_u8(uint8_t *input_data, uint8_t *weight_data, int32_t *bias_data, + uint8_t *output_data, int32_t in_nodes, int32_t out_nodes, + int32_t input_zero_point, int32_t weight_zero_point, + int32_t output_zero_point, int32_t output_mult, + int32_t output_shift); + +/** + * @brief u8 asym quant generic maxpool optimized function + * @param[in] input_data pointer to input tensor data + * @param[in,out] output_data pointer to output tensor data + * @param[in] input_h input height + * @param[in] input_w input width + * @param[in] input_ch input channel / output_channel + * @param[in] kernel_h kernel height + * @param[in] kernel_w kernel width + * @param[in] pad_h pad on height + * @param[in] pad_w pad on width + * @param[in] stride_h stride on height + * @param[in] stride_w stride on width + * @param[in] out_h output height + * @param[in] out_w output width + * @return none. + * bufferA size: 2*input_ch*kernel_h*kernel_w + */ +void csi_i805_maxpool2d_opt_u8(uint8_t *input_data, uint8_t *output_data, int32_t input_h, + int32_t input_w, int32_t input_ch, int32_t kernel_h, + int32_t kernel_w, int32_t pad_h, int32_t pad_w, int32_t stride_h, + int32_t stride_w, int32_t output_h, int32_t output_w); + +/** + * @brief u8 asym quant relu optimized function + * @param[in,out] data pointer to input/output tensor data, compute inplace + * @param[in] size input tensor size, tensor length + * @param[in] input_zeropoint input zero_point + * @param[in] out_multiplier multiplier for sacle_in / scale_out + * @param[in] out_shift shift left > 0 + * @return none. + * can be fused with conv/fc + */ +void csi_i805_relu_opt_u8(uint8_t *data, int32_t size, int32_t input_zeropoint, + int32_t out_multiplier, int32_t out_shift); + +/** + * @brief u8 asym quant relu6 optimized function + * @param[in,out] data pointer to input/output tensor data, compute inplace + * @param[in] size input tensor size, tensor length + * @param[in] input_zeropoint input zero_point + * @param[in] out_multiplier multiplier for sacle_in / scale_out + * @param[in] out_shift shift left > 0 + * @return none. + * can be fused with conv/fc + */ +void csi_i805_relu6_opt_u8(uint8_t *data, int32_t size, int32_t input_zeropoint, + int32_t out_multiplier, int32_t out_shift); + +/** + * @brief u8 asym quant clip optimized function + * @param[in] input_data pointer to input tensor data + * @param[in,out] output_data pointer to output tensor data + * @param[in] size input tensor size, tensor length + * @param[in] clip_qmin clip min value(quant) + * @param[in] clip_qmax clip max value(quant) + * @param[in] input_zeropoint input zero_point + * @param[in] output_zeropoint output zero_point + * @param[in] out_multiplier multiplier for sacle_in / scale_out + * @param[in] out_shift shift left > 0 + * @return none. + * can be fused with conv/fc + */ +void csi_i805_clip_opt_u8(uint8_t *input_data, uint8_t *output_data, int32_t size, int32_t clip_min, + int32_t clip_max, int32_t input_zeropoint, int32_t output_zeropoint, + int32_t out_multiplier, int32_t out_shift); + +/** + * @brief u8 asym quant element add optimized function + * @param[in] input_0 pointer to input_0 tensor data + * @param[in] input_1 pointer to input_1 tensor data + * @param[in,out] output pointer to output tensor data + * @param[in] size input tensor size, tensor length, element size + * @param[in] input_0_zeroponit input_0 zero_point. Range: Range: -255 to 0 + * @param[in] input_0_mult multiplier for sacle_input_0 + * @param[in] input_0_shift input_0 shift + * @param[in] input_1_zeropoint input_1 zero_point. Range: Range: -255 to 0 + * @param[in] input_1_mult multiplier for sacle_input_1 + * @param[in] input_1_shift input_1 shift + * @param[in] output_zeropoint output zero_point + * @param[in] output_mult multiplier for scale_output + * @param[in] output_shift output shift + * @return none. + * + */ +void csi_i805_elementwise_add_opt_u8(uint8_t *input_0, uint8_t *input_1, uint8_t *output, + int32_t size, int32_t input_0_zeroponit, int32_t input_0_mult, + int32_t input_0_shift, int32_t input_1_zeropoint, + int32_t input_1_mult, int32_t input_1_shift, + int32_t output_zeropoint, int32_t output_mult, + int32_t output_shift); + +/** + * @brief u8 asym quant element mul optimized function + * @param[in] input_0 pointer to input_0 tensor data + * @param[in] input_1 pointer to input_1 tensor data + * @param[in,out] output pointer to output tensor data + * @param[in] size input tensor size, tensor length, element size + * @param[in] input_0_zeroponit input_0 zero_point + * @param[in] input_1_zeropoint input_1 zero_point + * @param[in] output_zeropoint output zero_point + * @param[in] output_mult multiplier for s1 * s2 / s3 + * @param[in] output_shift output shift for s1 * s2 / s3 + * @return none. + * + */ +void csi_i805_elementwise_mul_opt_u8(uint8_t *input_0, uint8_t *input_1, uint8_t *output, + int32_t size, int32_t input_0_zeroponit, + int32_t input_1_zeropoint, int32_t output_zeropoint, + int32_t output_mult, int32_t output_shift); + +/** + * @brief u8 asym quant softmax optimized function + * @param[in] input_data pointer to input tensor data + * @param[in,out] output_data pointer to output tensor data + * @param[in] size tensor size + * @param[in] out_mult multiplier + * @param[in] out_shift output shift + * @return none. + * + */ +void csi_i805_softmax_opt_u8(uint8_t *input_data, uint8_t *output_data, int32_t size, + int32_t out_mult, int32_t out_shift); + +/** + * @brief u8 asym quant reshape optimized function + * @param[in] input_data pointer to input tensor data + * @param[in,out] output_data pointer to output tensor data + * @param[in] size tensor size + * @return none. + * + */ +void csi_i805_reshape_opt_u8(uint8_t *input_data, uint8_t *output_data, int32_t size); + +/** + * @brief u8 asym quant vec and matrix mul optimized function + * @param[in] lhs pointer to input tensor data + * @param[in] rhs pointer to weight tensor data + * @param[in] bias pointer to bias tensor data + * @param[in,out] dst pointer to output tensor data + * @param[in] rhs_col input nodes (weight cols) + * @param[in] rhs_row output nodes (weight rows) + * @param[in] lhs_zero_point input zero_point + * @param[in] rhs_zero_point weight zero_point + * @param[in] dst_zero_point output zero_point + * @param[in] dst_mult multiplier for s1 * s2 / s3 + * @param[in] dst_shift output shift for s1 * s2 / s3 + * @return none. + * + */ +void csi_i805_vec_mat_mult_opt_u8(uint8_t *lhs, uint8_t *rhs, int32_t *bias, uint8_t *dst, + int32_t rhs_col, int32_t rhs_row, int32_t lhs_zero_point, + int32_t rhs_zero_point, int32_t dst_zero_point, int32_t dst_mult, + int32_t dst_shift); + +/** + * @brief u8 asym quant matrix mul(A * B_trans) optimized function + * @param[in] lhs pointer to input tensor data + * @param[in] rhs pointer to weight tensor data + * @param[in] bias pointer to bias tensor data + * @param[in,out] dst pointer to output tensor data + * @param[in] lhs_row input row / m + * @param[in] lhs_col input col / k + * @param[in] rhs_row weight row / n + * @param[in] lhs_zero_point input zero_point + * @param[in] rhs_zero_point weight zero_point + * @param[in] dst_zero_point output zero_point + * @param[in] dst_mult multiplier for s1 * s2 / s3 + * @param[in] dst_shift output shift for s1 * s2 / s3 + * @return none. + * + */ +void csi_i805_mat_mult_nt_t_opt_u8(uint8_t *lhs, uint8_t *rhs, int32_t *bias, uint8_t *dst, + int32_t lhs_row, int32_t lhs_col, int32_t rhs_row, + int32_t lhs_zero_point, int32_t rhs_zero_point, + int32_t dst_zero_point, int32_t dst_mult, int32_t dst_shift); + +#ifdef __cplusplus +} +#endif + +#endif // INCLUDE_INCLUDE_XT800_CSI_I805_NNFUNCTION_H_ diff --git a/include/include_xt800/csi_instance.h b/include/include_xt800/csi_instance.h new file mode 100644 index 00000000..2fe3adcd --- /dev/null +++ b/include/include_xt800/csi_instance.h @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2016-2020 T-head Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/****************************************************************************** + * @file csi_instance.h + * @brief Some common define + * @version V1.0 + * @date Feb. 2020 + ******************************************************************************/ + +#ifndef INCLUDE_INCLUDE_XT800_CSI_INSTANCE_H_ +#define INCLUDE_INCLUDE_XT800_CSI_INSTANCE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/** + * @brief 8-bit fractional data type in 1.7 format. + */ +typedef int8_t q7_t; + +/** + * @brief 16-bit fractional data type in 1.15 format. + */ +typedef int16_t q15_t; + +/** + * @brief 32-bit fractional data type in 1.31 format. + */ +typedef int32_t q31_t; + +/** + * @brief 64-bit fractional data type in 1.63 format. + */ +typedef int64_t q63_t; + +/** + * @brief 32-bit floating-point type definition. + */ +typedef float float32_t; + +/** + * @brief 64-bit floating-point type definition. + */ +typedef double float64_t; + +/** + @brief definition to read/write two 16 bit values. + @deprecated + */ +#define __SIMD32_TYPE int32_t +#define __SIMD32(addr) (*(__SIMD32_TYPE **)&(addr)) + +/** + * @brief definition to pack two 16 bit values. + */ +#define __PKHBT(ARG1, ARG2, ARG3) \ + ((((int32_t)(ARG1) << 0) & (int32_t)0x0000FFFF) | \ + (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000)) +#define __PKHTB(ARG1, ARG2, ARG3) \ + ((((int32_t)(ARG1) << 0) & (int32_t)0xFFFF0000) | \ + (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF)) + +#ifdef __cplusplus +} +#endif + +#endif // INCLUDE_INCLUDE_XT800_CSI_INSTANCE_H_ diff --git a/include/include_xt800/nn_include/csi_nn_tables.h b/include/include_xt800/csi_nn_tables.h similarity index 87% rename from include/include_xt800/nn_include/csi_nn_tables.h rename to include/include_xt800/csi_nn_tables.h index 8e972c42..77ce9101 100644 --- a/include/include_xt800/nn_include/csi_nn_tables.h +++ b/include/include_xt800/csi_nn_tables.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -21,8 +21,8 @@ * Description: Extern declaration for NN tables * -------------------------------------------------------------------- */ -#ifndef _CSI_NN_TABLES_H -#define _CSI_NN_TABLES_H +#ifndef INCLUDE_INCLUDE_XT800_CSI_NN_TABLES_H_ +#define INCLUDE_INCLUDE_XT800_CSI_NN_TABLES_H_ #include "csi_instance.h" @@ -51,4 +51,4 @@ extern const q15_t sigmoidLTable_q15[128]; extern const q15_t sigmoidLTable_q15[128]; extern const q15_t sigmoidHTable_q15[192]; -#endif /* CSI_NN_TABLES_H */ +#endif // INCLUDE_INCLUDE_XT800_CSI_NN_TABLES_H_ diff --git a/include/include_xt800/csi_nnfunctions.h b/include/include_xt800/csi_nnfunctions.h new file mode 100644 index 00000000..1f530e74 --- /dev/null +++ b/include/include_xt800/csi_nnfunctions.h @@ -0,0 +1,452 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Title: csi_nnfunctions.h + * Description: Public header file for CSI NN Library + * + * -------------------------------------------------------------------- */ + +#ifndef INCLUDE_INCLUDE_XT800_CSI_NNFUNCTIONS_H_ +#define INCLUDE_INCLUDE_XT800_CSI_NNFUNCTIONS_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "csi_instance.h" +#include "csi_nnsupportfunctions.h" + +/** + * @brief Struct for specifying activation function types + * + */ +typedef enum { + CSKY_SIGMOID = 0, /**< Sigmoid activation function */ + CSKY_TANH = 1, /**< Tanh activation function */ +} csi_nn_activation_type; + +/** + * @brief Basic Q7 convolution function + * @param[in] Im_in pointer to input tensor + * @param[in] dim_im_in input tensor dimention + * @param[in] ch_im_in number of input tensor channels + * @param[in] wt pointer to kernel weights + * @param[in] ch_im_out number of filters, i.e., output tensor channels + * @param[in] dim_kernel filter kernel size + * @param[in] padding padding sizes + * @param[in] stride convolution stride + * @param[in] bias pointer to bias + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in,out] Im_out pointer to output tensor + * @param[in] dim_im_out output tensor dimension + * @param[in,out] bufferA pointer to buffer space for input + * @return none. + * + */ + +void csi_convolve_HWC_q7_basic(const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, + const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, + const uint16_t padding, const uint16_t stride, const q7_t *bias, + const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, + const uint16_t dim_im_out, q15_t *bufferA); + +/** + * @brief Basic Q15 convolution function + * @param[in] Im_in pointer to input tensor + * @param[in] dim_im_in input tensor dimention + * @param[in] ch_im_in number of input tensor channels + * @param[in] wt pointer to kernel weights + * @param[in] ch_im_out number of filters, i.e., output tensor channels + * @param[in] dim_kernel filter kernel size + * @param[in] padding padding sizes + * @param[in] stride convolution stride + * @param[in] bias pointer to bias + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in,out] Im_out pointer to output tensor + * @param[in] dim_im_out output tensor dimension + * @param[in,out] bufferA pointer to buffer space for input + * @return none. + * + */ + +void csi_convolve_HWC_q15_basic(const q15_t *Im_in, const uint16_t dim_im_in, + const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, + const uint16_t dim_kernel, const uint16_t padding, + const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, + const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, + q15_t *bufferA); + +void csi_convolve_HWC_q15_fast(const q15_t *Im_in, const uint16_t dim_im_in, + const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, + const uint16_t dim_kernel, const uint16_t padding, + const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, + const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, + q15_t *bufferA); + +/** + * @brief Fast Q7 convolution function (non-sqaure shape) + * @param[in] Im_in pointer to input tensor + * @param[in] dim_im_in_x input tensor dimention x + * @param[in] dim_im_in_y input tensor dimention y + * @param[in] ch_im_in number of input tensor channels + * @param[in] wt pointer to kernel weights + * @param[in] ch_im_out number of filters, i.e., output tensor channels + * @param[in] dim_kernel_x filter kernel size x + * @param[in] dim_kernel_y filter kernel size y + * @param[in] padding_x padding size x + * @param[in] padding_y padding size y + * @param[in] stride_x convolution stride x + * @param[in] stride_y convolution stride y + * @param[in] bias pointer to bias + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in,out] Im_out pointer to output tensor + * @param[in] dim_im_out_x output tensor dimension x + * @param[in] dim_im_out_y output tensor dimension y + * @param[in,out] bufferA pointer to buffer space for input + * @return none. + * + * This function is the version with full list of optimization tricks, but with + * some contraints: + * ch_im_in is multiple of 4 + * ch_im_out is multiple of 2 + */ + +void csi_convolve_HWC_q7_fast_nonsquare( + const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, + const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, + const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, + const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, + const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, + const uint16_t dim_im_out_y, q15_t *bufferA); + +/** + * @brief Fast Q7 version of 1x1 convolution (non-sqaure shape) + * @param[in] Im_in pointer to input tensor + * @param[in] dim_im_in_x input tensor dimention x + * @param[in] dim_im_in_y input tensor dimention y + * @param[in] ch_im_in number of input tensor channels + * @param[in] wt pointer to kernel weights + * @param[in] ch_im_out number of filters, i.e., output tensor channels + * @param[in] dim_kernel_x filter kernel size x + * @param[in] dim_kernel_y filter kernel size y + * @param[in] padding_x padding size x + * @param[in] padding_y padding size y + * @param[in] stride_x convolution stride x + * @param[in] stride_y convolution stride y + * @param[in] bias pointer to bias + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in,out] Im_out pointer to output tensor + * @param[in] dim_im_out_x output tensor dimension x + * @param[in] dim_im_out_y output tensor dimension y + * @param[in,out] bufferA pointer to buffer space for input + * @return none. + * + * This function implement convolution with 1x1 kernel size (i.e., dim_kernel_x=1 + * and dim_kernel_y=1). It can be used for + * second half of MobileNets after depthwise separable convolution. + * + * This function is the version with full list of optimization tricks, but with + * some contraints: + * ch_im_in is multiple of 4 + * ch_im_out is multiple of 2 + */ +void csi_convolve_1x1_HWC_q7_fast(const q7_t *Im_in, const uint16_t dim_im_in_x, + const uint16_t dim_im_in_y, const uint16_t ch_im_in, + const q7_t *wt, const uint16_t ch_im_out, const q7_t *bias, + const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, + const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, + q15_t *bufferA); + +/** + * @brief Q7 version of convolution for RGB image + * @param[in] Im_in pointer to input tensor + * @param[in] dim_im_in input tensor dimention + * @param[in] ch_im_in number of input tensor channels + * @param[in] wt pointer to kernel weights + * @param[in] ch_im_out number of filters, i.e., output tensor channels + * @param[in] dim_kernel filter kernel size + * @param[in] padding padding sizes + * @param[in] stride convolution stride + * @param[in] bias pointer to bias + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in,out] Im_out pointer to output tensor + * @param[in] dim_im_out output tensor dimension + * @param[in,out] bufferA pointer to buffer space for input + * @return none. + * + * This kernel is written exclusively for convolution with ch_im_in + * equals 3. This applies on the first layer of CNNs which has input + * image with RGB format. + */ + +void csi_convolve_HWC_q7_RGB(const q7_t *Im_in, const uint16_t dim_im_in, const q7_t *wt, + const uint16_t ch_im_out, const uint16_t dim_kernel, + const uint16_t padding, const uint16_t stride, const q7_t *bias, + const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, + const uint16_t dim_im_out, q15_t *bufferA); + +/** + * @brief Q7 depthwise separable convolution function + * @param[in] Im_in pointer to input tensor + * @param[in] dim_im_in input tensor dimention + * @param[in] ch_im_in number of input tensor channels + * @param[in] wt pointer to kernel weights + * @param[in] ch_im_out number of filters, i.e., output tensor channels + * @param[in] dim_kernel filter kernel size + * @param[in] padding padding sizes + * @param[in] stride convolution stride + * @param[in] bias pointer to bias + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in,out] Im_out pointer to output tensor + * @param[in] dim_im_out output tensor dimension + * @param[in,out] bufferA pointer to buffer space for input + * @return none. + * + * This function is the version with full list of optimization tricks, but with + * some contraints: + * ch_im_in is multiple of 2 + * ch_im_out is multiple of 2 + */ + +void csi_depthwise_separable_conv_HWC_q7(const q7_t *Im_in, const uint16_t dim_im_in, + const uint16_t ch_im_in, const q7_t *wt, + const uint16_t ch_im_out, const uint16_t dim_kernel, + const uint16_t padding, const uint16_t stride, + const q7_t *bias, const uint16_t bias_shift, + const uint16_t out_shift, q7_t *Im_out, + const uint16_t dim_im_out, q15_t *bufferA); + +/** + * @brief Q7 depthwise separable convolution function (non-square shape) + * @param[in] Im_in pointer to input tensor + * @param[in] dim_im_in_x input tensor dimention x + * @param[in] dim_im_in_y input tensor dimention y + * @param[in] ch_im_in number of input tensor channels + * @param[in] wt pointer to kernel weights + * @param[in] ch_im_out number of filters, i.e., output tensor channels + * @param[in] dim_kernel_x filter kernel size x + * @param[in] dim_kernel_y filter kernel size y + * @param[in] padding_x padding sizes x + * @param[in] padding_y padding sizes y + * @param[in] stride_x convolution stride x + * @param[in] stride_y convolution stride y + * @param[in] bias pointer to bias + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in,out] Im_out pointer to output tensor + * @param[in] dim_im_out_x output tensor dimension x + * @param[in] dim_im_out_y output tensor dimension y + * @param[in,out] bufferA pointer to buffer space for input + * @return none. + * + * This function is the version with full list of optimization tricks, but with + * some contraints: + * ch_im_in is multiple of 2 + * ch_im_out is multiple of 2 + */ +void csi_depthwise_separable_conv_HWC_q7_nonsquare( + const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, + const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, + const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, + const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, + const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, + const uint16_t dim_im_out_y, q15_t *bufferA); + +/** + * @brief Q7 basic fully-connected layer function + * @param[in] pV pointer to input vector + * @param[in] pM pointer to matrix weights + * @param[in] dim_vec length of the vector + * @param[in] num_of_rows number of rows in weight matrix + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in] bias pointer to bias + * @param[in,out] pOut pointer to output vector + * @return none. + */ + +void csi_fully_connected_q7(const q7_t *pV, const q7_t *pM, const uint16_t dim_vec, + const uint16_t num_of_rows, const uint16_t bias_shift, + const uint16_t out_shift, const q7_t *bias, q7_t *pOut); + +/** + * @brief Q15 basic fully-connected layer function + * @param[in] pV pointer to input vector + * @param[in] pM pointer to matrix weights + * @param[in] dim_vec length of the vector + * @param[in] num_of_rows number of rows in weight matrix + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in] bias pointer to bias + * @param[in,out] pOut pointer to output vector + * @return none. + * + */ + +void csi_fully_connected_q15(const q15_t *pV, const q15_t *pM, const uint16_t dim_vec, + const uint16_t num_of_rows, const uint16_t bias_shift, + const uint16_t out_shift, const q15_t *bias, q15_t *pOut); + +/** + * @brief Mixed Q15-Q7 fully-connected layer function + * @param[in] pV pointer to input vector + * @param[in] pM pointer to matrix weights + * @param[in] dim_vec length of the vector + * @param[in] num_of_rows number of rows in weight matrix + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in] bias pointer to bias + * @param[in,out] pOut pointer to output vector + * @return none. + * + */ + +void csi_fully_connected_mat_q7_vec_q15(const q15_t *pV, const q7_t *pM, const uint16_t dim_vec, + const uint16_t num_of_rows, const uint16_t bias_shift, + const uint16_t out_shift, const q7_t *bias, q15_t *pOut); + +/** + * @brief Q7 RELU function + * @param[in,out] data pointer to input + * @param[in] size number of elements + * @return none. + */ + +void csi_relu_q7(q7_t *data, uint16_t size); + +/** + * @brief Q15 RELU function + * @param[in,out] data pointer to input + * @param[in] size number of elements + * @return none. + */ + +void csi_relu_q15(q15_t *data, uint16_t size); + +/** + * @brief Q7 neural network activation function using direct table look-up + * @param[in,out] data pointer to input + * @param[in] size number of elements + * @param[in] int_width bit-width of the integer part, assume to be smaller than 3 + * @param[in] type type of activation functions + * @return none. + */ + +void csi_nn_activations_direct_q7(q7_t *data, uint16_t size, uint16_t int_width, + csi_nn_activation_type type); + +/** + * @brief Q15 neural network activation function using direct table look-up + * @param[in,out] data pointer to input + * @param[in] size number of elements + * @param[in] int_width bit-width of the integer part, assume to be smaller than 3 + * @param[in] type type of activation functions + * @return none. + */ + +void csi_nn_activations_direct_q15(q15_t *data, uint16_t size, uint16_t int_width, + csi_nn_activation_type type); + +/** + * @brief Q7 max pooling function + * @param[in] Im_in pointer to input tensor + * @param[in] dim_im_in input tensor dimention + * @param[in] ch_im_in number of input tensor channels + * @param[in] dim_kernel filter kernel size + * @param[in] padding padding sizes + * @param[in] stride convolution stride + * @param[in] dim_im_out output tensor dimension + * @param[in,out] bufferA pointer to buffer space for input + * @param[in,out] Im_out pointer to output tensor + * @return none. + * + */ + +void csi_maxpool2d_q7_HWC(q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, + const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, + const uint16_t dim_im_out, q7_t *bufferA, q7_t *Im_out); + +/** + * @brief Q7 average pooling function + * @param[in] Im_in pointer to input tensor + * @param[in] dim_im_in input tensor dimention + * @param[in] ch_im_in number of input tensor channels + * @param[in] dim_kernel filter kernel size + * @param[in] padding padding sizes + * @param[in] stride convolution stride + * @param[in] dim_im_out output tensor dimension + * @param[in,out] bufferA pointer to buffer space for input + * @param[in,out] Im_out pointer to output tensor + * @return none. + * + */ + +void csi_avepool_q7_HWC(q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, + const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, + const uint16_t dim_im_out, q7_t *bufferA, q7_t *Im_out); + +void csi_avepool_q7_HWC_nonsquare(q7_t *Im_in, // input image + const uint16_t dim_im_in_x, // input image dimension + const uint16_t dim_im_in_y, // input image dimension + const uint16_t ch_im_in, // number of input image channels + const uint16_t dim_kernel_x, // window kernel size + const uint16_t dim_kernel_y, // window kernel size + const uint16_t padding_x, // padding sizes + const uint16_t padding_y, // padding sizes + const uint16_t stride_x, // stride + const uint16_t stride_y, // stride + const uint16_t dim_im_out_x, // output image dimension + const uint16_t dim_im_out_y, // output image dimension + q7_t *bufferA, // a buffer for local storage + q7_t *Im_out, // output feature + const uint16_t out_lshift); // output left shift (scaling) + +/** + * @brief Q7 softmax function + * @param[in] vec_in pointer to input vector + * @param[in] dim_vec input vector dimention + * @param[out] p_out pointer to output vector + * @return none. + * + */ + +void csi_softmax_q7(const q7_t *vec_in, const uint16_t dim_vec, q7_t *p_out); + +/** + * @brief Q15 softmax function + * @param[in] vec_in pointer to input vector + * @param[in] dim_vec input vector dimention + * @param[out] p_out pointer to output vector + * @return none. + * + */ + +void csi_softmax_q15(const q15_t *vec_in, const uint16_t dim_vec, q15_t *p_out); + +#ifdef __cplusplus +} +#endif + +#endif // INCLUDE_INCLUDE_XT800_CSI_NNFUNCTIONS_H_ diff --git a/include/include_xt800/csi_nnsupportfunctions.h b/include/include_xt800/csi_nnsupportfunctions.h new file mode 100644 index 00000000..38a3b01f --- /dev/null +++ b/include/include_xt800/csi_nnsupportfunctions.h @@ -0,0 +1,320 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Title: csi_nnsupportfunctions.h + * Description: Public header file of support functions for CSI NN Library + * + * -------------------------------------------------------------------- */ + +#ifndef INCLUDE_INCLUDE_XT800_CSI_NNSUPPORTFUNCTIONS_H_ +#define INCLUDE_INCLUDE_XT800_CSI_NNSUPPORTFUNCTIONS_H_ + +#include "csi_instance.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Union for SIMD access of Q31/Q15/Q7 types + */ +union csi_nnword { + q31_t word; /**< Q31 type */ + q15_t half_words[2]; /**< Q15 type */ + q7_t bytes[4]; /**< Q7 type */ +}; + +/** + * @defgroup nndata_convert Neural Network Data Conversion Functions + * + * Perform data type conversion in-between neural network operations + * + */ + +/** + * @brief Converts the elements of the Q7 vector to Q15 vector without left-shift + * @param[in] *pSrc points to the Q7 input vector + * @param[out] *pDst points to the Q15 output vector + * @param[in] blockSize length of the input vector + * @return none. + * + */ + +void csi_q7_to_q15_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t blockSize); + +/** + * @brief Converts the elements of the Q7 vector to reordered Q15 vector without left-shift + * @param[in] *pSrc points to the Q7 input vector + * @param[out] *pDst points to the Q15 output vector + * @param[in] blockSize length of the input vector + * @return none. + * + */ + +void csi_q7_to_q15_reordered_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t blockSize); + +#if defined(CSI_MATH_DSP) + +/* + * @brief C custom defined SXTB16 + */ +uint32_t __SXTB16(uint32_t x) +{ + return ((uint32_t)(((((q31_t)x << 24) >> 24) & (q31_t)0x0000FFFF) | + ((((q31_t)x << 8) >> 8) & (q31_t)0xFFFF0000))); +} + +/** + \brief Rotate Right in unsigned value (32 bit) + \details Rotate Right (immediate) provides the value of the contents of a register rotated by a + variable number of bits. \param [in] op1 Value to rotate \param [in] op2 Number of Bits to + rotate \return Rotated value + */ +uint32_t __ROR(uint32_t op1, uint32_t op2) { return (op1 >> op2) | (op1 << (32U - op2)); } + +int32_t __SSAT_8(int32_t x) +{ + int32_t res = x; + if (x > 0x7f) { + res = 0x7f; + } else if (x < -128) { + res = -128; + } + + return res; +} + +/** + \details This function saturates a signed value. + \param [in] x Value to be saturated + \param [in] y Bit position to saturate to [1..32] + \return Saturated value. + */ +int32_t __SSAT(int32_t x, uint32_t y) +{ + int32_t posMax, negMin; + uint32_t i; + + posMax = 1; + + for (i = 0; i < (y - 1); i++) { + posMax = posMax * 2; + } + + if (x > 0) { + posMax = (posMax - 1); + + if (x > posMax) { + x = posMax; + } + + // x &= (posMax * 2 + 1); + } else { + negMin = -posMax; + + if (x < negMin) { + x = negMin; + } + + // x &= (posMax * 2 - 1); + } + + return (x); +} + +/** + \brief Unsigned Saturate + \details Saturates an unsigned value. + \param [in] value Value to be saturated + \param [in] sat Bit position to saturate to (0..31) + \return Saturated value + */ +uint32_t __USAT(uint32_t value, uint32_t sat) +{ + uint32_t result; + + if ((((0xFFFFFFFF >> sat) << sat) & value) != 0) { + result = 0xFFFFFFFF >> (32 - sat); + } else { + result = value; + } + + return (result); +} + +/** + \brief Dual 16-bit saturating subtract. + \details This function enables you to perform two 16-bit integer subtractions in parallel, + saturating the results to the 16-bit signed integer range -2^15 <= x <= 2^15 - 1. + \param [in] x first two 16-bit summands. + \param [in] y second two 16-bit summands. + \return the saturated subtraction of the low halfwords, in the low halfword of the return + value.\n the saturated subtraction of the high halfwords, in the high halfword of the return + value.\n The returned results are saturated to the 16-bit signed integer range -2^15 <= x <= 2^15 + - 1. \remark res[15:0] = val1[15:0] - val2[15:0] \n res[31:16] = val1[31:16] - + val2[31:16] + */ +uint32_t __QSUB16(uint32_t x, uint32_t y) +{ + int32_t r, s; + + r = __SSAT(((((int32_t)x << 16) >> 16) - (((int32_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; + s = __SSAT(((((int32_t)x) >> 16) - (((int32_t)y) >> 16)), 16) & (int32_t)0x0000FFFF; + + return ((uint32_t)((s << 16) | (r))); +} + +/** + \brief Quad 8-bit saturating subtract. + \details This function enables you to perform four 8-bit integer subtractions, + saturating the results to the 8-bit signed integer range -2^7 <= x <= 2^7 - 1. + \param [in] x first four 8-bit summands. + \param [in] y second four 8-bit summands. + \return the subtraction of the first byte of each operand in the first byte of the return + value.\n the subtraction of the second byte of each operand in the second byte of the return + value.\n the subtraction of the third byte of each operand in the third byte of the return + value.\n the subtraction of the fourth byte of each operand in the fourth byte of the return + value.\n The returned results are saturated to the 8-bit signed integer range -2^7 <= x <= 2^7 + - 1. \remark res[7:0] = val1[7:0] - val2[7:0] \n res[15:8] = val1[15:8] - val2[15:8] + \n res[23:16] = val1[23:16] - val2[23:16] \n res[31:24] = val1[31:24] - val2[31:24] + */ +uint32_t __QSUB8(uint32_t x, uint32_t y) +{ + int32_t r, s, t, u; + + r = __SSAT(((((int32_t)x << 24) >> 24) - (((int32_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF; + s = __SSAT(((((int32_t)x << 16) >> 24) - (((int32_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF; + t = __SSAT(((((int32_t)x << 8) >> 24) - (((int32_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF; + u = __SSAT(((((int32_t)x) >> 24) - (((int32_t)y) >> 24)), 8) & (int32_t)0x000000FF; + + return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r))); +} + +/** + \brief Dual 16-bit signed multiply with single 32-bit accumulator. + \details This function enables you to perform two signed 16-bit multiplications, + adding both results to a 32-bit accumulate operand. + \param [in] x first 16-bit operands for each multiplication. + \param [in] y second 16-bit operands for each multiplication. + \param [in] sum accumulate value. + \return the product of each multiplication added to the accumulate value, as a 32-bit + integer. \remark p1 = val1[15:0] * val2[15:0] \n p2 = val1[31:16] * val2[31:16] \n + res[31:0] = p1 + p2 + val3[31:0] + */ + +uint32_t __SMLAD(uint32_t x, uint32_t y, uint32_t sum) +{ + return ((uint32_t)(((((int32_t)x << 16) >> 16) * (((int32_t)y << 16) >> 16)) + + ((((int32_t)x) >> 16) * (((int32_t)y) >> 16)) + (((int32_t)sum)))); +} +/** + \brief Dual 16-bit saturating addition. + \details This function enables you to perform two 16-bit integer arithmetic additions in parallel, + saturating the results to the 16-bit signed integer range -2^15 <= x <= 2^15 - 1. + \param [in] x first two 16-bit summands. + \param [in] y second two 16-bit summands. + \return the saturated addition of the low halfwords, in the low halfword of the return + value.\n the saturated addition of the high halfwords, in the high halfword of the return value.\n + The returned results are saturated to the 16-bit signed integer range -2^15 <= x <= + 2^15 - 1. \remark res[15:0] = val1[15:0] + val2[15:0] \n res[31:16] = val1[31:16] + + val2[31:16] + */ +uint32_t __QADD16(uint32_t x, uint32_t y) +{ + int32_t r = 0, s = 0; + + r = __SSAT(((((int32_t)x << 16) >> 16) + (((int32_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; + s = __SSAT(((((int32_t)x) >> 16) + (((int32_t)y) >> 16)), 16) & (int32_t)0x0000FFFF; + + return ((uint32_t)((s << 16) | (r))); +} + +/** + * @brief read and expand one Q7 word into two Q15 words + */ + +void *read_and_pad(void *source, q31_t *out1, q31_t *out2) +{ + q31_t inA = *__SIMD32(source)++; + q31_t inAbuf1 = __SXTB16(__ROR(inA, 8)); + q31_t inAbuf2 = __SXTB16(inA); + +#ifndef CSKY_MATH_BIG_ENDIAN + *out2 = __PKHTB(inAbuf1, inAbuf2, 16); + *out1 = __PKHBT(inAbuf2, inAbuf1, 16); +#else + *out1 = __PKHTB(inAbuf1, inAbuf2, 16); + *out2 = __PKHBT(inAbuf2, inAbuf1, 16); +#endif + + return source; +} + +/** + * @brief read and expand one Q7 word into two Q15 words with reordering + */ + +void *read_and_pad_reordered(void *source, q31_t *out1, q31_t *out2) +{ + q31_t inA = *__SIMD32(source)++; +#ifndef CSKY_MATH_BIG_ENDIAN + *out2 = __SXTB16(__ROR(inA, 8)); + *out1 = __SXTB16(inA); +#else + *out1 = __SXTB16(__ROR(inA, 8)); + *out2 = __SXTB16(inA); +#endif + + return source; +} +#endif + +q7_t *csi_nn_mat_mult_kernel_q7_q15_reordered(const q7_t *pA, const q15_t *pInBuffer, + const uint16_t ch_im_out, const uint16_t numCol_A, + const uint16_t bias_shift, const uint16_t out_shift, + const q7_t *bias, q7_t *pOut); + +q7_t *csi_nn_mat_mult_kernel_q7_q15(const q7_t *pA, const q15_t *pInBuffer, + const uint16_t ch_im_out, const uint16_t numCol_A, + const uint16_t bias_shift, const uint16_t out_shift, + const q7_t *bias, q7_t *pOut); + +/** + * @brief A few utility functions used by pooling functions + * + */ + +void buffer_scale_back_q15_to_q7(q15_t *buffer, q7_t *target, uint16_t length, uint16_t scale); + +void accumulate_q7_to_q15(q15_t *base, q7_t *target, const uint16_t length); + +/** + * @brief defition to adding rouding offset + */ +#ifndef CSKY_NN_TRUNCATE +#define NN_ROUND(out_shift) (0x1 << (out_shift - 1)) +#else +#define NN_ROUND(out_shift) 0 +#endif + +#ifdef __cplusplus +} +#endif + +#endif // INCLUDE_INCLUDE_XT800_CSI_NNSUPPORTFUNCTIONS_H_ diff --git a/include/include_xt800/nn_include/csky_dsp2_nnfunctions.h b/include/include_xt800/csky_dsp2_nnfunctions.h similarity index 99% rename from include/include_xt800/nn_include/csky_dsp2_nnfunctions.h rename to include/include_xt800/csky_dsp2_nnfunctions.h index 6aea8c43..e45e137f 100644 --- a/include/include_xt800/nn_include/csky_dsp2_nnfunctions.h +++ b/include/include_xt800/csky_dsp2_nnfunctions.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,15 +22,15 @@ * * -------------------------------------------------------------------- */ -#ifndef _CSKY_DSP2_NNFUNCTIONS_H -#define _CSKY_DSP2_NNFUNCTIONS_H +#ifndef INCLUDE_INCLUDE_XT800_CSKY_DSP2_NNFUNCTIONS_H_ +#define INCLUDE_INCLUDE_XT800_CSKY_DSP2_NNFUNCTIONS_H_ #ifdef __cplusplus extern "C" { #endif -#include"csky_math.h" +#include "csi_instance.h" /** * @brief Struct for specifying activation function types * @@ -742,4 +742,4 @@ void csky_dsp2_softmax_q15(const q15_t *vec_in, const uint16_t dim_vec, } #endif -#endif +#endif // INCLUDE_INCLUDE_XT800_CSKY_DSP2_NNFUNCTIONS_H_ diff --git a/include/include_xt800/csky_vdsp2_nnfunctions.h b/include/include_xt800/csky_vdsp2_nnfunctions.h new file mode 100644 index 00000000..52b2bbe9 --- /dev/null +++ b/include/include_xt800/csky_vdsp2_nnfunctions.h @@ -0,0 +1,450 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Title: csky_vdsp2_nnfunctions.h + * Description: Public header file for CSI NN Library + * + * -------------------------------------------------------------------- */ + +#ifndef INCLUDE_INCLUDE_XT800_CSKY_VDSP2_NNFUNCTIONS_H_ +#define INCLUDE_INCLUDE_XT800_CSKY_VDSP2_NNFUNCTIONS_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "csi_instance.h" + +/** + * @brief Struct for specifying activation function types + * + */ +typedef enum { + CSKY_SIGMOID = 0, /**< Sigmoid activation function */ + CSKY_TANH = 1, /**< Tanh activation function */ +} csky_vdsp2_nn_activation_type; + +/** + * @brief Basic Q7 convolution function + * @param[in] Im_in pointer to input tensor + * @param[in] dim_im_in input tensor dimention + * @param[in] ch_im_in number of input tensor channels + * @param[in] wt pointer to kernel weights + * @param[in] ch_im_out number of filters, i.e., output tensor channels + * @param[in] dim_kernel filter kernel size + * @param[in] padding padding sizes + * @param[in] stride convolution stride + * @param[in] bias pointer to bias + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in,out] Im_out pointer to output tensor + * @param[in] dim_im_out output tensor dimension + * @param[in,out] bufferA pointer to buffer space for input + * @return none. + * + */ + +void csky_vdsp2_convolve_HWC_q7_basic(const q7_t *Im_in, const uint16_t dim_im_in, + const uint16_t ch_im_in, const q7_t *wt, + const uint16_t ch_im_out, const uint16_t dim_kernel, + const uint16_t padding, const uint16_t stride, + const q7_t *bias, const uint16_t bias_shift, + const uint16_t out_shift, q7_t *Im_out, + const uint16_t dim_im_out, q15_t *bufferA); + +/** + * @brief Basic Q15 convolution function + * @param[in] Im_in pointer to input tensor + * @param[in] dim_im_in input tensor dimention + * @param[in] ch_im_in number of input tensor channels + * @param[in] wt pointer to kernel weights + * @param[in] ch_im_out number of filters, i.e., output tensor channels + * @param[in] dim_kernel filter kernel size + * @param[in] padding padding sizes + * @param[in] stride convolution stride + * @param[in] bias pointer to bias + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in,out] Im_out pointer to output tensor + * @param[in] dim_im_out output tensor dimension + * @param[in,out] bufferA pointer to buffer space for input + * @return none. + * + */ + +void csky_vdsp2_convolve_HWC_q15_basic(const q15_t *Im_in, const uint16_t dim_im_in, + const uint16_t ch_im_in, const q15_t *wt, + const uint16_t ch_im_out, const uint16_t dim_kernel, + const uint16_t padding, const uint16_t stride, + const q15_t *bias, const uint16_t bias_shift, + const uint16_t out_shift, q15_t *Im_out, + const uint16_t dim_im_out, q15_t *bufferA); + +/** + * @brief Fast Q7 convolution function (non-sqaure shape) + * @param[in] Im_in pointer to input tensor + * @param[in] dim_im_in_x input tensor dimention x + * @param[in] dim_im_in_y input tensor dimention y + * @param[in] ch_im_in number of input tensor channels + * @param[in] wt pointer to kernel weights + * @param[in] ch_im_out number of filters, i.e., output tensor channels + * @param[in] dim_kernel_x filter kernel size x + * @param[in] dim_kernel_y filter kernel size y + * @param[in] padding_x padding size x + * @param[in] padding_y padding size y + * @param[in] stride_x convolution stride x + * @param[in] stride_y convolution stride y + * @param[in] bias pointer to bias + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in,out] Im_out pointer to output tensor + * @param[in] dim_im_out_x output tensor dimension x + * @param[in] dim_im_out_y output tensor dimension y + * @param[in,out] bufferA pointer to buffer space for input + * @return none. + * + * This function is the version with full list of optimization tricks, but with + * some contraints: + * ch_im_in is multiple of 4 + * ch_im_out is multiple of 2 + */ + +void csky_vdsp2_convolve_HWC_q7_fast_nonsquare( + const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, + const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, + const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, + const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, + const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, + const uint16_t dim_im_out_y, q15_t *bufferA); + +/** + * @brief Fast Q7 version of 1x1 convolution (non-sqaure shape) + * @param[in] Im_in pointer to input tensor + * @param[in] dim_im_in_x input tensor dimention x + * @param[in] dim_im_in_y input tensor dimention y + * @param[in] ch_im_in number of input tensor channels + * @param[in] wt pointer to kernel weights + * @param[in] ch_im_out number of filters, i.e., output tensor channels + * @param[in] dim_kernel_x filter kernel size x + * @param[in] dim_kernel_y filter kernel size y + * @param[in] padding_x padding size x + * @param[in] padding_y padding size y + * @param[in] stride_x convolution stride x + * @param[in] stride_y convolution stride y + * @param[in] bias pointer to bias + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in,out] Im_out pointer to output tensor + * @param[in] dim_im_out_x output tensor dimension x + * @param[in] dim_im_out_y output tensor dimension y + * @param[in,out] bufferA pointer to buffer space for input + * @return none. + * + * This function implement convolution with 1x1 kernel size (i.e., dim_kernel_x=1 + * and dim_kernel_y=1). It can be used for + * second half of MobileNets after depthwise separable convolution. + * + * This function is the version with full list of optimization tricks, but with + * some contraints: + * ch_im_in is multiple of 4 + * ch_im_out is multiple of 2 + */ +void csky_vdsp2_convolve_1x1_HWC_q7_fast(const q7_t *Im_in, const uint16_t dim_im_in_x, + const uint16_t dim_im_in_y, const uint16_t ch_im_in, + const q7_t *wt, const uint16_t ch_im_out, const q7_t *bias, + const uint16_t bias_shift, const uint16_t out_shift, + q7_t *Im_out, const uint16_t dim_im_out_x, + const uint16_t dim_im_out_y, q15_t *bufferA); + +/** + * @brief Q7 version of convolution for RGB image + * @param[in] Im_in pointer to input tensor + * @param[in] dim_im_in input tensor dimention + * @param[in] ch_im_in number of input tensor channels + * @param[in] wt pointer to kernel weights + * @param[in] ch_im_out number of filters, i.e., output tensor channels + * @param[in] dim_kernel filter kernel size + * @param[in] padding padding sizes + * @param[in] stride convolution stride + * @param[in] bias pointer to bias + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in,out] Im_out pointer to output tensor + * @param[in] dim_im_out output tensor dimension + * @param[in,out] bufferA pointer to buffer space for input + * @return none. + * + * This kernel is written exclusively for convolution with ch_im_in + * equals 3. This applies on the first layer of CNNs which has input + * image with RGB format. + */ + +void csky_vdsp2_convolve_HWC_q7_RGB(const q7_t *Im_in, const uint16_t dim_im_in, const q7_t *wt, + const uint16_t ch_im_out, const uint16_t dim_kernel, + const uint16_t padding, const uint16_t stride, const q7_t *bias, + const uint16_t bias_shift, const uint16_t out_shift, + q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA); + +/** + * @brief Q7 depthwise separable convolution function + * @param[in] Im_in pointer to input tensor + * @param[in] dim_im_in input tensor dimention + * @param[in] ch_im_in number of input tensor channels + * @param[in] wt pointer to kernel weights + * @param[in] ch_im_out number of filters, i.e., output tensor channels + * @param[in] dim_kernel filter kernel size + * @param[in] padding padding sizes + * @param[in] stride convolution stride + * @param[in] bias pointer to bias + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in,out] Im_out pointer to output tensor + * @param[in] dim_im_out output tensor dimension + * @param[in,out] bufferA pointer to buffer space for input + * @return none. + * + * This function is the version with full list of optimization tricks, but with + * some contraints: + * ch_im_in is multiple of 2 + * ch_im_out is multiple of 2 + */ + +void csky_vdsp2_depthwise_separable_conv_HWC_q7(const q7_t *Im_in, const uint16_t dim_im_in, + const uint16_t ch_im_in, const q7_t *wt, + const uint16_t ch_im_out, const uint16_t dim_kernel, + const uint16_t padding, const uint16_t stride, + const q7_t *bias, const uint16_t bias_shift, + const uint16_t out_shift, q7_t *Im_out, + const uint16_t dim_im_out, q15_t *bufferA); + +/** + * @brief Q7 depthwise separable convolution function (non-square shape) + * @param[in] Im_in pointer to input tensor + * @param[in] dim_im_in_x input tensor dimention x + * @param[in] dim_im_in_y input tensor dimention y + * @param[in] ch_im_in number of input tensor channels + * @param[in] wt pointer to kernel weights + * @param[in] ch_im_out number of filters, i.e., output tensor channels + * @param[in] dim_kernel_x filter kernel size x + * @param[in] dim_kernel_y filter kernel size y + * @param[in] padding_x padding sizes x + * @param[in] padding_y padding sizes y + * @param[in] stride_x convolution stride x + * @param[in] stride_y convolution stride y + * @param[in] bias pointer to bias + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in,out] Im_out pointer to output tensor + * @param[in] dim_im_out_x output tensor dimension x + * @param[in] dim_im_out_y output tensor dimension y + * @param[in,out] bufferA pointer to buffer space for input + * @return none. + * + * This function is the version with full list of optimization tricks, but with + * some contraints: + * ch_im_in is multiple of 2 + * ch_im_out is multiple of 2 + */ +void csky_vdsp2_depthwise_separable_conv_HWC_q7_nonsquare( + const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, + const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, + const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, + const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, + const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, + const uint16_t dim_im_out_y, q15_t *bufferA); + +/** + * @brief Q7 basic fully-connected layer function + * @param[in] pV pointer to input vector + * @param[in] pM pointer to matrix weights + * @param[in] dim_vec length of the vector + * @param[in] num_of_rows number of rows in weight matrix + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in] bias pointer to bias + * @param[in,out] pOut pointer to output vector + * @return none. + */ + +void csky_vdsp2_fully_connected_q7(const q7_t *pV, const q7_t *pM, const uint16_t dim_vec, + const uint16_t num_of_rows, const uint16_t bias_shift, + const uint16_t out_shift, const q7_t *bias, q7_t *pOut); + +/** + * @brief Q15 basic fully-connected layer function + * @param[in] pV pointer to input vector + * @param[in] pM pointer to matrix weights + * @param[in] dim_vec length of the vector + * @param[in] num_of_rows number of rows in weight matrix + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in] bias pointer to bias + * @param[in,out] pOut pointer to output vector + * @return none. + * + */ + +void csky_vdsp2_fully_connected_q15(const q15_t *pV, const q15_t *pM, const uint16_t dim_vec, + const uint16_t num_of_rows, const uint16_t bias_shift, + const uint16_t out_shift, const q15_t *bias, q15_t *pOut); + +/** + * @brief Mixed Q15-Q7 fully-connected layer function + * @param[in] pV pointer to input vector + * @param[in] pM pointer to matrix weights + * @param[in] dim_vec length of the vector + * @param[in] num_of_rows number of rows in weight matrix + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in] bias pointer to bias + * @param[in,out] pOut pointer to output vector + * @return none. + * + */ + +void csky_vdsp2_fully_connected_mat_q7_vec_q15(const q15_t *pV, const q7_t *pM, + const uint16_t dim_vec, const uint16_t num_of_rows, + const uint16_t bias_shift, const uint16_t out_shift, + const q7_t *bias, q15_t *pOut); + +/** + * @brief Q7 RELU function + * @param[in,out] data pointer to input + * @param[in] size number of elements + * @return none. + */ + +void csky_vdsp2_relu_q7(q7_t *data, uint16_t size); + +/** + * @brief Q15 RELU function + * @param[in,out] data pointer to input + * @param[in] size number of elements + * @return none. + */ + +void csky_vdsp2_relu_q15(q15_t *data, uint16_t size); + +/** + * @brief Q7 neural network activation function using direct table look-up + * @param[in,out] data pointer to input + * @param[in] size number of elements + * @param[in] int_width bit-width of the integer part, assume to be smaller than 3 + * @param[in] type type of activation functions + * @return none. + */ + +void csky_vdsp2_nn_activations_direct_q7(q7_t *data, uint16_t size, uint16_t int_width, + csky_vdsp2_nn_activation_type type); + +/** + * @brief Q15 neural network activation function using direct table look-up + * @param[in,out] data pointer to input + * @param[in] size number of elements + * @param[in] int_width bit-width of the integer part, assume to be smaller than 3 + * @param[in] type type of activation functions + * @return none. + */ + +void csky_vdsp2_nn_activations_direct_q15(q15_t *data, uint16_t size, uint16_t int_width, + csky_vdsp2_nn_activation_type type); + +/** + * @brief Q7 max pooling function + * @param[in] Im_in pointer to input tensor + * @param[in] dim_im_in input tensor dimention + * @param[in] ch_im_in number of input tensor channels + * @param[in] dim_kernel filter kernel size + * @param[in] padding padding sizes + * @param[in] stride convolution stride + * @param[in] dim_im_out output tensor dimension + * @param[in,out] bufferA pointer to buffer space for input + * @param[in,out] Im_out pointer to output tensor + * @return none. + * + */ + +void csky_vdsp2_maxpool2d_q7_HWC(q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, + const uint16_t dim_kernel, const uint16_t padding, + const uint16_t stride, const uint16_t dim_im_out, q7_t *bufferA, + q7_t *Im_out); + +/** + * @brief Q7 average pooling function + * @param[in] Im_in pointer to input tensor + * @param[in] dim_im_in input tensor dimention + * @param[in] ch_im_in number of input tensor channels + * @param[in] dim_kernel filter kernel size + * @param[in] padding padding sizes + * @param[in] stride convolution stride + * @param[in] dim_im_out output tensor dimension + * @param[in,out] bufferA pointer to buffer space for input + * @param[in,out] Im_out pointer to output tensor + * @return none. + * + */ + +void csky_vdsp2_avepool_q7_HWC(q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, + const uint16_t dim_kernel, const uint16_t padding, + const uint16_t stride, const uint16_t dim_im_out, q7_t *bufferA, + q7_t *Im_out); + +void csky_vdsp2_avepool_q7_HWC_nonsquare(q7_t *Im_in, // input image + const uint16_t dim_im_in_x, // input image dimension + const uint16_t dim_im_in_y, // input image dimension + const uint16_t ch_im_in, // number of input image channels + const uint16_t dim_kernel_x, // window kernel size + const uint16_t dim_kernel_y, // window kernel size + const uint16_t padding_x, // padding sizes + const uint16_t padding_y, // padding sizes + const uint16_t stride_x, // stride + const uint16_t stride_y, // stride + const uint16_t dim_im_out_x, // output image dimension + const uint16_t dim_im_out_y, // output image dimension + q7_t *bufferA, // a buffer for local storage + q7_t *Im_out, // output feature + const uint16_t out_lshift); // output left shift (scaling) + +/** + * @brief Q7 softmax function + * @param[in] vec_in pointer to input vector + * @param[in] dim_vec input vector dimention + * @param[out] p_out pointer to output vector + * @return none. + * + */ + +void csky_vdsp2_softmax_q7(const q7_t *vec_in, const uint16_t dim_vec, q7_t *p_out); + +/** + * @brief Q15 softmax function + * @param[in] vec_in pointer to input vector + * @param[in] dim_vec input vector dimention + * @param[out] p_out pointer to output vector + * @return none. + * + */ + +void csky_vdsp2_softmax_q15(const q15_t *vec_in, const uint16_t dim_vec, q15_t *p_out); + +#ifdef __cplusplus +} +#endif + +#endif // INCLUDE_INCLUDE_XT800_CSKY_VDSP2_NNFUNCTIONS_H_ diff --git a/include/include_xt800/dsp_include/csi_instance.h b/include/include_xt800/dsp_include/csi_instance.h deleted file mode 100644 index 88cdece3..00000000 --- a/include/include_xt800/dsp_include/csi_instance.h +++ /dev/null @@ -1,1879 +0,0 @@ -/* - * Copyright (C) 2016-2020 T-head Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/****************************************************************************** - * @file csi_instance.h - * @brief Some common define - * @version V1.0 - * @date Feb. 2020 - ******************************************************************************/ - - -#ifndef _CSI_INSTANCE_H -#define _CSI_INSTANCE_H - -#ifdef __cplusplus -extern "C" -{ -#endif - - -#include -#include -#include -#include -#include -#ifndef __CK860__ -#include "csi_core.h" -#else -#include -#endif - -#define LOW_OPTIMIZATION_ENTER -#define LOW_OPTIMIZATION_EXIT - -#define F64_MAX ((float64_t)DBL_MAX) -#define F32_MAX ((float32_t)FLT_MAX) - -#define F64_MIN (-DBL_MAX) -#define F32_MIN (-FLT_MAX) - -#define F64_ABSMAX ((float64_t)DBL_MAX) -#define F32_ABSMAX ((float32_t)FLT_MAX) - -#define F64_ABSMIN ((float64_t)0.0) -#define F32_ABSMIN ((float32_t)0.0) - -#define Q31_MAX ((q31_t)(0x7FFFFFFFL)) -#define Q15_MAX ((q15_t)(0x7FFF)) -#define Q7_MAX ((q7_t)(0x7F)) -#define Q31_MIN ((q31_t)(0x80000000L)) -#define Q15_MIN ((q15_t)(0x8000)) -#define Q7_MIN ((q7_t)(0x80)) - -#define Q31_ABSMAX ((q31_t)(0x7FFFFFFFL)) -#define Q15_ABSMAX ((q15_t)(0x7FFF)) -#define Q7_ABSMAX ((q7_t)(0x7F)) -#define Q31_ABSMIN ((q31_t)0) -#define Q15_ABSMIN ((q15_t)0) -#define Q7_ABSMIN ((q7_t)0) - -/** - * @brief Macros required for reciprocal calculation in Normalized LMS - */ - -#define DELTA_Q31 ((q31_t)(0x100)) -#define DELTA_Q15 ((q15_t)0x5) -#define INDEX_MASK 0x0000003F -#ifndef PI -#define PI 3.14159265358979f -#endif - -#ifndef UNALIGNED_SUPPORT_DISABLE - #define ALIGN4 -#else - #define ALIGN4 __attribute__((aligned(4))) -#endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */ - -/** - * @brief Macros required for SINE and COSINE Fast math approximations - */ - -#define FAST_MATH_TABLE_SIZE 512 -#define FAST_MATH_Q31_SHIFT (32 - 10) -#define FAST_MATH_Q15_SHIFT (16 - 10) -#define CONTROLLER_Q31_SHIFT (32 - 9) -#define TABLE_SPACING_Q31 0x400000 -#define TABLE_SPACING_Q15 0x80 - -#define __STATIC_FORCEINLINE static inline __attribute__((unused)) -#define CSI_NEWTON_SQRTF -#ifdef __CK860__ -#define __STATIC_INLINE static inline __attribute__((unused)) - -#define __ALWAYS_STATIC_INLINE __attribute__((always_inline)) static inline - -#endif - - -/** - * @brief Macros required for SINE and COSINE Controller functions - */ -/* 1.31(q31) Fixed value of 2/360 */ -/* -1 to +1 is divided into 360 values so total spacing is (2/360) */ -#define INPUT_SPACING 0xB60B61 - -/** - * @brief Macros for complex numbers - */ - -/* Dimension C vector space */ -#define CMPLX_DIM 2 - -/** - * @brief Error status returned by some functions in the library. - */ - -typedef enum { - CSI_MATH_SUCCESS = 0, /**< No error */ - CSI_MATH_ARGUMENT_ERROR = -1, /**< One or more arguments are incorrect */ - CSI_MATH_LENGTH_ERROR = -2, /**< Length of data buffer is incorrect */ - CSI_MATH_SIZE_MISMATCH = -3, /**< Size of matrices is not compatible with the operation */ - CSI_MATH_NANINF = -4, /**< Not-a-number (NaN) or infinity is generated */ - CSI_MATH_SINGULAR = -5, /**< Input matrix is singular and cannot be inverted */ - CSI_MATH_TEST_FAILURE = -6 /**< Test Failed */ -} csi_status; - -/** - * @brief 8-bit fractional data type in 1.7 format. - */ -typedef int8_t q7_t; - -/** - * @brief 16-bit fractional data type in 1.15 format. - */ -typedef int16_t q15_t; - -/** - * @brief 32-bit fractional data type in 1.31 format. - */ -typedef int32_t q31_t; - -/** - * @brief 64-bit fractional data type in 1.63 format. - */ -typedef int64_t q63_t; - -/** - * @brief 32-bit floating-point type definition. - */ -typedef float float32_t; - -/** - * @brief 64-bit floating-point type definition. - */ -typedef double float64_t; - -/** - @brief definition to read/write two 16 bit values. - @deprecated - */ -#define __SIMD32_TYPE int32_t -#define __SIMD32(addr) (*(__SIMD32_TYPE **) & (addr)) -#define __SIMD32_CONST(addr) ( (__SIMD32_TYPE * ) (addr)) -#define _SIMD32_OFFSET(addr) (*(__SIMD32_TYPE * ) (addr)) -#define __SIMD64(addr) (*( int64_t **) & (addr)) - -#define STEP(x) (x) <= 0 ? 0 : 1 -#define SQ(x) ((x) * (x)) - -__ALWAYS_STATIC_INLINE int32_t __SSAT_31(int32_t x) -{ - int32_t res = x; - if (x > 0x3fffffff) { - res = 0x3fffffff; - } else if (x < -1073741824) { - res = -1073741824; - } - - return res; -} - -__ALWAYS_STATIC_INLINE int32_t __SSAT_16(int32_t x) -{ - int32_t res = x; - if (x > 0x7fff) { - res = 0x7fff; - } else if (x < -32768) { - res = -32768; - } - - return res; -} - -__ALWAYS_STATIC_INLINE int32_t __SSAT_8(int32_t x) -{ - int32_t res = x; - if (x > 0x7f) { - res = 0x7f; - } else if (x < -128) { - res = -128; - } - - return res; -} - -/** - @brief Read 2 Q15 from Q15 pointer. - @param[in] pQ15 points to input value - @return Q31 value - */ -__STATIC_FORCEINLINE q31_t read_q15x2 ( - q15_t * pQ15) -{ - q31_t val; - memcpy (&val, pQ15, 4); - return (val); -} - -/** - @brief Read 2 Q15 from Q15 pointer and increment pointer afterwards. - @param[in] pQ15 points to input value - @return Q31 value - */ -__STATIC_FORCEINLINE q31_t read_q15x2_ia ( - q15_t ** pQ15) -{ - q31_t val; - memcpy (&val, *pQ15, 4); - *pQ15 += 2; - return (val); -} - -/** - @brief Read 2 Q15 from Q15 pointer and decrement pointer afterwards. - @param[in] pQ15 points to input value - @return Q31 value - */ -__STATIC_FORCEINLINE q31_t read_q15x2_da ( - q15_t ** pQ15) -{ - q31_t val; - memcpy (&val, *pQ15, 4); - *pQ15 -= 2; - return (val); -} - -/** - @brief Write 2 Q15 to Q15 pointer and increment pointer afterwards. - @param[in] pQ15 points to input value - @param[in] value Q31 value - @return none - */ -__STATIC_FORCEINLINE void write_q15x2_ia ( - q15_t ** pQ15, - q31_t value) -{ - q31_t val = value; - memcpy (*pQ15, &val, 4); - *pQ15 += 2; -} - -/** - @brief Write 2 Q15 to Q15 pointer. - @param[in] pQ15 points to input value - @param[in] value Q31 value - @return none - */ -__STATIC_FORCEINLINE void write_q15x2 ( - q15_t * pQ15, - q31_t value) -{ - q31_t val = value; - memcpy (pQ15, &val, 4); -} - - -/** - @brief Read 4 Q7 from Q7 pointer and increment pointer afterwards. - @param[in] pQ7 points to input value - @return Q31 value - */ -__STATIC_FORCEINLINE q31_t read_q7x4_ia ( - q7_t ** pQ7) -{ - q31_t val; - memcpy (&val, *pQ7, 4); - *pQ7 += 4; - return (val); -} - -/** - @brief Read 4 Q7 from Q7 pointer and decrement pointer afterwards. - @param[in] pQ7 points to input value - @return Q31 value - */ -__STATIC_FORCEINLINE q31_t read_q7x4_da ( - q7_t ** pQ7) -{ - q31_t val; - memcpy (&val, *pQ7, 4); - *pQ7 -= 4; - return (val); -} - -/** - @brief Write 4 Q7 to Q7 pointer and increment pointer afterwards. - @param[in] pQ7 points to input value - @param[in] value Q31 value - @return none - */ -__STATIC_FORCEINLINE void write_q7x4_ia ( - q7_t ** pQ7, - q31_t value) -{ - q31_t val = value; - memcpy (*pQ7, &val, 4); - *pQ7 += 4; -} - -#ifdef __CK860__ -__STATIC_FORCEINLINE uint8_t __CLZ(uint32_t data) -{ - if (data == 0U) { - return 32U; - } - - uint32_t count = 0U; - uint32_t mask = 0x80000000U; - - while ((data & mask) == 0U) { - count += 1U; - mask = mask >> 1U; - } - - return count; -} - -__STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat) -{ - if ((sat >= 1U) && (sat <= 32U)) { - const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U); - const int32_t min = -1 - max ; - - if (val > max) { - return max; - - } else if (val < min) { - return min; - } - } - - return val; -} - -__STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat) -{ - if (sat <= 31U) { - const uint32_t max = ((1U << sat) - 1U); - - if (val > (int32_t)max) { - return max; - - } else if (val < 0) { - return 0U; - } - } - - return (uint32_t)val; -} -#endif -/** - * @brief definition to pack two 16 bit values. - */ -#define __PKHBT(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0x0000FFFF) | \ - (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000) ) -#define __PKHTB(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0xFFFF0000) | \ - (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF) ) - - -/** -* @brief definition to pack four 8 bit values. -*/ -#define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) << 0) & (int32_t)0x000000FF) | \ - (((int32_t)(v1) << 8) & (int32_t)0x0000FF00) | \ - (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \ - (((int32_t)(v3) << 24) & (int32_t)0xFF000000) ) - -/** - * @brief Clips Q63 to Q31 values. - */ -__STATIC_FORCEINLINE q31_t clip_q63_to_q31( - q63_t x) -{ - return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ? - ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x; -} - -/** - * @brief Clips Q63 to Q15 values. - */ -__STATIC_FORCEINLINE q15_t clip_q63_to_q15( - q63_t x) -{ - return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ? - ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15); -} - -/** - * @brief Clips Q31 to Q7 values. - */ -__STATIC_FORCEINLINE q7_t clip_q31_to_q7( - q31_t x) -{ - return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ? - ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x; -} - -/** - * @brief Clips Q31 to Q15 values. - */ -__STATIC_FORCEINLINE q15_t clip_q31_to_q15( - q31_t x) -{ - return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ? - ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x; -} - -/** - * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format. - */ -__STATIC_FORCEINLINE q63_t mult32x64( - q63_t x, - q31_t y) -{ - return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) + - (((q63_t) (x >> 32) * y) ) ); -} - -/** - * @brief Integer exponentiation - * @param[in] x value - * @param[in] nb integer exponent >= 1 - * @return x^nb - * - */ -__STATIC_INLINE float32_t csi_exponent_f32(float32_t x, int32_t nb) -{ - float32_t r = x; - nb --; - - while(nb > 0) { - r = r * x; - nb--; - } - - return(r); -} - -/** - * @brief 64-bit to 32-bit unsigned normalization - * @param[in] in is input unsigned long long value - * @param[out] normalized is the 32-bit normalized value - * @param[out] norm is norm scale - */ -__STATIC_INLINE void csi_norm_64_to_32u(uint64_t in, int32_t * normalized, int32_t *norm) -{ - int32_t n1; - int32_t hi = (int32_t) (in >> 32); - int32_t lo = (int32_t) ((in << 32) >> 32); - n1 = __CLZ(hi) - 32; - - if (!n1) { - /* - * input fits in 32-bit - */ - n1 = __CLZ(lo); - - if (!n1) { - /* - * MSB set, need to scale down by 1 - */ - *norm = -1; - *normalized = (((uint32_t) lo) >> 1); - - } else { - if (n1 == 32) { - /* - * input is zero - */ - *norm = 0; - *normalized = 0; - - } else { - /* - * 32-bit normalization - */ - *norm = n1 - 1; - *normalized = lo << *norm; - } - } - - } else { - /* - * input fits in 64-bit - */ - n1 = 1 - n1; - *norm = -n1; - /* - * 64 bit normalization - */ - *normalized = (((uint32_t) lo) >> n1) | (hi << (32 - n1)); - } -} - -__STATIC_INLINE q31_t csi_div_q63_to_q31(q63_t num, q31_t den) -{ - q31_t result; - uint64_t absNum; - int32_t normalized; - int32_t norm; - /* - * if sum fits in 32bits - * avoid costly 64-bit division - */ - absNum = num > 0 ? num : -num; - csi_norm_64_to_32u(absNum, &normalized, &norm); - - if (norm > 0) - /* - * 32-bit division - */ - result = (q31_t) num / den; - - else - /* - * 64-bit division - */ - result = (q31_t) (num / den); - - return result; -} - -/* - * @brief C custom defined intrinsic functions - */ -#ifdef __CK860__ -/* - * @brief C custom defined QADD8 - */ -__STATIC_FORCEINLINE uint32_t __QADD8( - uint32_t x, - uint32_t y) -{ - q31_t r, s, t, u; - r = __SSAT(((((q31_t)x << 24) >> 24) + (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF; - s = __SSAT(((((q31_t)x << 16) >> 24) + (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF; - t = __SSAT(((((q31_t)x << 8) >> 24) + (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF; - u = __SSAT(((((q31_t)x ) >> 24) + (((q31_t)y ) >> 24)), 8) & (int32_t)0x000000FF; - return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r ))); -} - - -/* - * @brief C custom defined QSUB8 - */ -__STATIC_FORCEINLINE uint32_t __QSUB8( - uint32_t x, - uint32_t y) -{ - q31_t r, s, t, u; - r = __SSAT(((((q31_t)x << 24) >> 24) - (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF; - s = __SSAT(((((q31_t)x << 16) >> 24) - (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF; - t = __SSAT(((((q31_t)x << 8) >> 24) - (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF; - u = __SSAT(((((q31_t)x ) >> 24) - (((q31_t)y ) >> 24)), 8) & (int32_t)0x000000FF; - return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r ))); -} - - -/* - * @brief C custom defined QADD16 - */ -__STATIC_FORCEINLINE uint32_t __QADD16( - uint32_t x, - uint32_t y) -{ - /* q31_t r, s; without initialisation 'csi_offset_q15 test' fails but 'intrinsic' tests pass! for armCC */ - q31_t r = 0, s = 0; - r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; - s = __SSAT(((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; - return ((uint32_t)((s << 16) | (r ))); -} - - -/* - * @brief C custom defined SHADD16 - */ -__STATIC_FORCEINLINE uint32_t __SHADD16( - uint32_t x, - uint32_t y) -{ - q31_t r, s; - r = (((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; - s = (((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; - return ((uint32_t)((s << 16) | (r ))); -} - - -/* - * @brief C custom defined QSUB16 - */ -__STATIC_FORCEINLINE uint32_t __QSUB16( - uint32_t x, - uint32_t y) -{ - q31_t r, s; - r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; - s = __SSAT(((((q31_t)x ) >> 16) - (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; - return ((uint32_t)((s << 16) | (r ))); -} - - -/* - * @brief C custom defined SHSUB16 - */ -__STATIC_FORCEINLINE uint32_t __SHSUB16( - uint32_t x, - uint32_t y) -{ - q31_t r, s; - r = (((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; - s = (((((q31_t)x ) >> 16) - (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; - return ((uint32_t)((s << 16) | (r ))); -} - - -/* - * @brief C custom defined QASX - */ -__STATIC_FORCEINLINE uint32_t __QASX( - uint32_t x, - uint32_t y) -{ - q31_t r, s; - r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; - s = __SSAT(((((q31_t)x ) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; - return ((uint32_t)((s << 16) | (r ))); -} - - -/* - * @brief C custom defined SHASX - */ -__STATIC_FORCEINLINE uint32_t __SHASX( - uint32_t x, - uint32_t y) -{ - q31_t r, s; - r = (((((q31_t)x << 16) >> 16) - (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; - s = (((((q31_t)x ) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; - return ((uint32_t)((s << 16) | (r ))); -} - - -/* - * @brief C custom defined QSAX - */ -__STATIC_FORCEINLINE uint32_t __QSAX( - uint32_t x, - uint32_t y) -{ - q31_t r, s; - r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; - s = __SSAT(((((q31_t)x ) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; - return ((uint32_t)((s << 16) | (r ))); -} - - -/* - * @brief C custom defined SHSAX - */ -__STATIC_FORCEINLINE uint32_t __SHSAX( - uint32_t x, - uint32_t y) -{ - q31_t r, s; - r = (((((q31_t)x << 16) >> 16) + (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; - s = (((((q31_t)x ) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; - return ((uint32_t)((s << 16) | (r ))); -} - - -/* - * @brief C custom defined SMUSDX - */ -__STATIC_FORCEINLINE uint32_t __SMUSDX( - uint32_t x, - uint32_t y) -{ - return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) - - ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) )); -} - -/* - * @brief C custom defined SMUADX - */ -__STATIC_FORCEINLINE uint32_t __SMUADX( - uint32_t x, - uint32_t y) -{ - return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) + - ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) )); -} - - -/* - * @brief C custom defined QADD - */ -__STATIC_FORCEINLINE int32_t __QADD( - int32_t x, - int32_t y) -{ - return ((int32_t)(clip_q63_to_q31((q63_t)x + (q31_t)y))); -} - - -/* - * @brief C custom defined QSUB - */ -__STATIC_FORCEINLINE int32_t __QSUB( - int32_t x, - int32_t y) -{ - return ((int32_t)(clip_q63_to_q31((q63_t)x - (q31_t)y))); -} - - -/* - * @brief C custom defined SMLAD - */ -__STATIC_FORCEINLINE uint32_t __SMLAD( - uint32_t x, - uint32_t y, - uint32_t sum) -{ - return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) + - ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) + - ( ((q31_t)sum ) ) )); -} - - -/* - * @brief C custom defined SMLADX - */ -__STATIC_FORCEINLINE uint32_t __SMLADX( - uint32_t x, - uint32_t y, - uint32_t sum) -{ - return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) + - ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) + - ( ((q31_t)sum ) ) )); -} - - -/* - * @brief C custom defined SMLSDX - */ -__STATIC_FORCEINLINE uint32_t __SMLSDX( - uint32_t x, - uint32_t y, - uint32_t sum) -{ - return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) - - ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) + - ( ((q31_t)sum ) ) )); -} - - -/* - * @brief C custom defined SMLALD - */ -__STATIC_FORCEINLINE uint64_t __SMLALD( - uint32_t x, - uint32_t y, - uint64_t sum) -{ - /* return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) + ((q15_t) x * (q15_t) y)); */ - return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) + - ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) + - ( ((q63_t)sum ) ) )); -} - - -/* - * @brief C custom defined SMLALDX - */ -__STATIC_FORCEINLINE uint64_t __SMLALDX( - uint32_t x, - uint32_t y, - uint64_t sum) -{ - /* return (sum + ((q15_t) (x >> 16) * (q15_t) y)) + ((q15_t) x * (q15_t) (y >> 16)); */ - return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) + - ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) + - ( ((q63_t)sum ) ) )); -} - - -/* - * @brief C custom defined SMUAD - */ -__STATIC_FORCEINLINE uint32_t __SMUAD( - uint32_t x, - uint32_t y) -{ - return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) + - ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) )); -} - - -/* - * @brief C custom defined SMUSD - */ -__STATIC_FORCEINLINE uint32_t __SMUSD( - uint32_t x, - uint32_t y) -{ - return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) - - ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) )); -} - - -/* - * @brief C custom defined SXTB16 - */ -__STATIC_FORCEINLINE uint32_t __SXTB16( - uint32_t x) -{ - return ((uint32_t)(((((q31_t)x << 24) >> 24) & (q31_t)0x0000FFFF) | - ((((q31_t)x << 8) >> 8) & (q31_t)0xFFFF0000) )); -} -/* - * @brief C custom defined SMMLA - */ -__STATIC_FORCEINLINE int32_t __SMMLA( - int32_t x, - int32_t y, - int32_t sum) -{ - return (sum + (int32_t) (((int64_t) x * y) >> 32)); -} -#endif -/** - * @brief Instance structure for the Q7 FIR filter. - */ -typedef struct { - uint16_t numTaps; /**< number of filter coefficients in the filter. */ - q7_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - const q7_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ -} csi_fir_instance_q7; - -/** - * @brief Instance structure for the Q15 FIR filter. - */ -typedef struct { - uint16_t numTaps; /**< number of filter coefficients in the filter. */ - q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - const q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ -} csi_fir_instance_q15; - -/** - * @brief Instance structure for the Q31 FIR filter. - */ -typedef struct { - uint16_t numTaps; /**< number of filter coefficients in the filter. */ - q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - const q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ -} csi_fir_instance_q31; - -/** - * @brief Instance structure for the floating-point FIR filter. - */ -typedef struct { - uint16_t numTaps; /**< number of filter coefficients in the filter. */ - float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - const float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ -} csi_fir_instance_f32; - -/** - * @brief Instance structure for the Q15 Biquad cascade filter. - */ -typedef struct { - int8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - q15_t *pState; /**< Points to the array of state coefficients. The array is of length 4*numStages. */ - const q15_t *pCoeffs; /**< Points to the array of coefficients. The array is of length 5*numStages. */ - int8_t postShift; /**< Additional shift, in bits, applied to each output sample. */ -} csi_biquad_casd_df1_inst_q15; - -/** - * @brief Instance structure for the Q31 Biquad cascade filter. - */ -typedef struct { - uint32_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - q31_t *pState; /**< Points to the array of state coefficients. The array is of length 4*numStages. */ - const q31_t *pCoeffs; /**< Points to the array of coefficients. The array is of length 5*numStages. */ - uint8_t postShift; /**< Additional shift, in bits, applied to each output sample. */ -} csi_biquad_casd_df1_inst_q31; - -/** - * @brief Instance structure for the floating-point Biquad cascade filter. - */ -typedef struct { - uint32_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - float32_t *pState; /**< Points to the array of state coefficients. The array is of length 4*numStages. */ - const float32_t *pCoeffs; /**< Points to the array of coefficients. The array is of length 5*numStages. */ -} csi_biquad_casd_df1_inst_f32; - -/** - * @brief Instance structure for the floating-point matrix structure. - */ -typedef struct { - uint16_t numRows; /**< number of rows of the matrix. */ - uint16_t numCols; /**< number of columns of the matrix. */ - float32_t *pData; /**< points to the data of the matrix. */ -} csi_matrix_instance_f32; - - -/** - * @brief Instance structure for the floating-point matrix structure. - */ -typedef struct { - uint16_t numRows; /**< number of rows of the matrix. */ - uint16_t numCols; /**< number of columns of the matrix. */ - float64_t *pData; /**< points to the data of the matrix. */ -} csi_matrix_instance_f64; - -/** - * @brief Instance structure for the Q15 matrix structure. - */ -typedef struct { - uint16_t numRows; /**< number of rows of the matrix. */ - uint16_t numCols; /**< number of columns of the matrix. */ - q15_t *pData; /**< points to the data of the matrix. */ -} csi_matrix_instance_q15; - -/** - * @brief Instance structure for the Q31 matrix structure. - */ -typedef struct { - uint16_t numRows; /**< number of rows of the matrix. */ - uint16_t numCols; /**< number of columns of the matrix. */ - q31_t *pData; /**< points to the data of the matrix. */ -} csi_matrix_instance_q31; - -/** - * @brief Instance structure for the Q15 PID Control. - */ -typedef struct { - q15_t A0; /**< The derived gain, A0 = Kp + Ki + Kd . */ - q15_t A1; - q15_t A2; - q15_t state[3]; /**< The state array of length 3. */ - q15_t Kp; /**< The proportional gain. */ - q15_t Ki; /**< The integral gain. */ - q15_t Kd; /**< The derivative gain. */ -} csi_pid_instance_q15; - -/** - * @brief Instance structure for the Q31 PID Control. - */ -typedef struct { - q31_t A0; /**< The derived gain, A0 = Kp + Ki + Kd . */ - q31_t A1; /**< The derived gain, A1 = -Kp - 2Kd. */ - q31_t A2; /**< The derived gain, A2 = Kd . */ - q31_t state[3]; /**< The state array of length 3. */ - q31_t Kp; /**< The proportional gain. */ - q31_t Ki; /**< The integral gain. */ - q31_t Kd; /**< The derivative gain. */ -} csi_pid_instance_q31; - -/** - * @brief Instance structure for the floating-point PID Control. - */ -typedef struct { - float32_t A0; /**< The derived gain, A0 = Kp + Ki + Kd . */ - float32_t A1; /**< The derived gain, A1 = -Kp - 2Kd. */ - float32_t A2; /**< The derived gain, A2 = Kd . */ - float32_t state[3]; /**< The state array of length 3. */ - float32_t Kp; /**< The proportional gain. */ - float32_t Ki; /**< The integral gain. */ - float32_t Kd; /**< The derivative gain. */ -} csi_pid_instance_f32; - -/** - * @brief Instance structure for the floating-point Linear Interpolate function. - */ -typedef struct { - uint32_t nValues; /**< nValues */ - float32_t x1; /**< x1 */ - float32_t xSpacing; /**< xSpacing */ - float32_t *pYData; /**< pointer to the table of Y values */ -} csi_linear_interp_instance_f32; - -/** - * @brief Instance structure for the floating-point bilinear interpolation function. - */ -typedef struct { - uint16_t numRows; /**< number of rows in the data table. */ - uint16_t numCols; /**< number of columns in the data table. */ - float32_t *pData; /**< points to the data table. */ -} csi_bilinear_interp_instance_f32; - -/** -* @brief Instance structure for the Q31 bilinear interpolation function. -*/ -typedef struct { - uint16_t numRows; /**< number of rows in the data table. */ - uint16_t numCols; /**< number of columns in the data table. */ - q31_t *pData; /**< points to the data table. */ -} csi_bilinear_interp_instance_q31; - -/** -* @brief Instance structure for the Q15 bilinear interpolation function. -*/ -typedef struct { - uint16_t numRows; /**< number of rows in the data table. */ - uint16_t numCols; /**< number of columns in the data table. */ - q15_t *pData; /**< points to the data table. */ -} csi_bilinear_interp_instance_q15; - -/** -* @brief Instance structure for the Q15 bilinear interpolation function. -*/ -typedef struct { - uint16_t numRows; /**< number of rows in the data table. */ - uint16_t numCols; /**< number of columns in the data table. */ - q7_t *pData; /**< points to the data table. */ -} csi_bilinear_interp_instance_q7; - -/** - * @brief Instance structure for the Q15 CFFT/CIFFT function. - */ -typedef struct { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - const q15_t *pTwiddle; /**< points to the Sin twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ -} csi_cfft_radix2_instance_q15; - -/** - * @brief Instance structure for the Q15 CFFT/CIFFT function. - */ -typedef struct { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - const q15_t *pTwiddle; /**< points to the twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ -} csi_cfft_radix4_instance_q15; - -/** - * @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function. - */ -typedef struct { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - const q31_t *pTwiddle; /**< points to the Twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ -} csi_cfft_radix2_instance_q31; - - -/** - * @brief Instance structure for the Q31 CFFT/CIFFT function. - */ -typedef struct { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - const q31_t *pTwiddle; /**< points to the twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ -} csi_cfft_radix4_instance_q31; - -/** - * @brief Instance structure for the floating-point CFFT/CIFFT function. - */ -typedef struct { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - const float32_t *pTwiddle; /**< points to the Twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ - float32_t onebyfftLen; /**< value of 1/fftLen. */ -} csi_cfft_radix2_instance_f32; - -/** - * @brief Instance structure for the floating-point CFFT/CIFFT function. - */ -typedef struct { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - const float32_t *pTwiddle; /**< points to the Twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ - float32_t onebyfftLen; /**< value of 1/fftLen. */ -} csi_cfft_radix4_instance_f32; - -/** - * @brief Instance structure for the fixed-point CFFT/CIFFT function. - */ -typedef struct { - uint16_t fftLen; /**< length of the FFT. */ - const q15_t *pTwiddle; /**< points to the Twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t bitRevLength; /**< bit reversal table length. */ -} csi_cfft_instance_q15; - -/** - * @brief Instance structure for the fixed-point CFFT/CIFFT function. - */ -typedef struct { - uint16_t fftLen; /**< length of the FFT. */ - const q31_t *pTwiddle; /**< points to the Twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t bitRevLength; /**< bit reversal table length. */ -} csi_cfft_instance_q31; - - -/** - * @brief Instance structure for the floating-point CFFT/CIFFT function. - */ -typedef struct { - uint16_t fftLen; /**< length of the FFT. */ - const float32_t *pTwiddle; /**< points to the Twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t bitRevLength; /**< bit reversal table length. */ -} csi_cfft_instance_f32; - - typedef struct - { - uint32_t fftLenReal; /**< length of the real FFT. */ - uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */ - uint8_t bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */ - q31_t *pTwiddleAReal; /**< points to the A real twiddle factor table. */ - q31_t *pTwiddleBReal; /**< points to the B real twiddle factor table. */ - const csi_cfft_instance_q31 *pCfft; /**< points to the complex FFT instance. */ - } csi_rfft_fast_instance_q31; - - typedef struct - { - uint32_t fftLenReal; /**< length of the real FFT. */ - uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */ - uint8_t bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */ - q15_t *pTwiddleAReal; /**< points to the A real twiddle factor table. */ - q15_t *pTwiddleBReal; /**< points to the B real twiddle factor table. */ - const csi_cfft_instance_q15 *pCfft; /**< points to the complex FFT instance. */ - } csi_rfft_fast_instance_q15; - -/** - * @brief Instance structure for the Q15 RFFT/RIFFT function. - */ -typedef struct { - uint32_t fftLenReal; /**< length of the real FFT. */ - uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */ - uint8_t bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */ - uint32_t twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - q15_t *pTwiddleAReal; /**< points to the real twiddle factor table. */ -#if (!defined __riscv_xthead) && (defined __riscv) - q15_t *pTwiddleBReal; /**< points to the imag twiddle factor table. */ -#endif - const csi_cfft_instance_q15 *pCfft; /**< points to the complex FFT instance. */ -} csi_rfft_instance_q15; - - -/** - * @brief Instance structure for the Q31 RFFT/RIFFT function. - */ -typedef struct { - uint32_t fftLenReal; /**< length of the real FFT. */ - uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */ - uint8_t bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */ - uint32_t twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - q31_t *pTwiddleAReal; /**< points to the real twiddle factor table. */ -#if (!defined __riscv_xthead) && (defined __riscv) - q31_t *pTwiddleBReal; /**< points to the imag twiddle factor table. */ -#endif - const csi_cfft_instance_q31 *pCfft; /**< points to the complex FFT instance. */ -} csi_rfft_instance_q31; - - -/** - * @brief Instance structure for the floating-point RFFT/RIFFT function. - */ -typedef struct { - uint32_t fftLenReal; /**< length of the real FFT. */ - uint16_t fftLenBy2; /**< length of the complex FFT. */ - uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */ - uint8_t bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */ - uint32_t twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - float32_t *pTwiddleAReal; /**< points to the real twiddle factor table. */ - float32_t *pTwiddleBReal; /**< points to the imag twiddle factor table. */ - csi_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */ -} csi_rfft_instance_f32; - -/** - * @brief Instance structure for the floating-point RFFT/RIFFT function. - */ -typedef struct { - csi_cfft_instance_f32 Sint; /**< Internal CFFT structure. */ - uint16_t fftLenRFFT; /**< length of the real sequence */ - const float32_t * pTwiddleRFFT; /**< Twiddle factors real stage */ -} csi_rfft_fast_instance_f32 ; - -/** - * @brief Instance structure for the floating-point DCT4/IDCT4 function. - */ -typedef struct { - uint16_t N; /**< length of the DCT4. */ - uint16_t Nby2; /**< half of the length of the DCT4. */ - float32_t normalize; /**< normalizing factor. */ - const float32_t *pTwiddle; /**< points to the twiddle factor table. */ - const float32_t *pCosFactor; /**< points to the cosFactor table. */ - csi_rfft_fast_instance_f32 *pRfft; /**< points to the real FFT instance. */ - csi_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */ -} csi_dct4_instance_f32; - - -/** - * @brief Instance structure for the Q31 DCT4/IDCT4 function. - */ -typedef struct { - uint16_t N; /**< length of the DCT4. */ - uint16_t Nby2; /**< half of the length of the DCT4. */ - q31_t normalize; /**< normalizing factor. */ - const q31_t *pTwiddle; /**< points to the twiddle factor table. */ - const q31_t *pCosFactor; /**< points to the cosFactor table. */ - csi_rfft_instance_q31 *pRfft; /**< points to the real FFT instance. */ - csi_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */ -} csi_dct4_instance_q31; - - -/** - * @brief Instance structure for the Q15 DCT4/IDCT4 function. - */ -typedef struct { - uint16_t N; /**< length of the DCT4. */ - uint16_t Nby2; /**< half of the length of the DCT4. */ - q15_t normalize; /**< normalizing factor. */ - const q15_t *pTwiddle; /**< points to the twiddle factor table. */ - const q15_t *pCosFactor; /**< points to the cosFactor table. */ - csi_rfft_instance_q15 *pRfft; /**< points to the real FFT instance. */ - csi_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */ -} csi_dct4_instance_q15; - - typedef struct - { - uint16_t N; /**< length of the DCT4. */ - uint16_t Nby2; /**< half of the length of the DCT4. */ - q15_t normalize; /**< normalizing factor. */ - q15_t *pTwiddle; /**< points to the twiddle factor table. */ - q15_t *pCosFactor; /**< points to the cosFactor table. */ - csi_rfft_fast_instance_q15 *pRfft; /**< points to the real FFT instance. */ - csi_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */ - } csi_dct4_fast_instance_q15; - - typedef struct - { - uint16_t N; /**< length of the DCT4. */ - uint16_t Nby2; /**< half of the length of the DCT4. */ - q31_t normalize; /**< normalizing factor. */ - q31_t *pTwiddle; /**< points to the twiddle factor table. */ - q31_t *pCosFactor; /**< points to the cosFactor table. */ - csi_rfft_fast_instance_q31 *pRfft; /**< points to the real FFT instance. */ - csi_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */ - } csi_dct4_fast_instance_q31; - - csi_status csi_dct4_init_q31( - csi_dct4_instance_q31 * S, - csi_rfft_instance_q31 * S_RFFT, - csi_cfft_radix4_instance_q31 * S_CFFT, - uint16_t N, - uint16_t Nby2, - q31_t normalize); - - void csi_dct4_q31( - const csi_dct4_instance_q31 * S, - q31_t * pState, - q31_t * pInlineBuffer); - - void csi_dct4_fast_q31( - const csi_dct4_fast_instance_q31 * S, - q31_t * pState, - q31_t * pInlineBuffer); - - -/** - * @brief Instance structure for the Q15 FIR decimator. - */ -typedef struct { - uint8_t M; /**< decimation factor. */ - uint16_t numTaps; /**< number of coefficients in the filter. */ - const q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ -} csi_fir_decimate_instance_q15; - -/** - * @brief Instance structure for the Q31 FIR decimator. - */ -typedef struct { - uint8_t M; /**< decimation factor. */ - uint16_t numTaps; /**< number of coefficients in the filter. */ - const q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ -} csi_fir_decimate_instance_q31; - -/** - @brief Instance structure for floating-point FIR decimator. - */ -typedef struct { - uint8_t M; /**< decimation factor. */ - uint16_t numTaps; /**< number of coefficients in the filter. */ - const float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ -} csi_fir_decimate_instance_f32; - -/** - * @brief Instance structure for the Q15 FIR interpolator. - */ -typedef struct { - uint8_t L; /**< upsample factor. */ - uint16_t phaseLength; /**< length of each polyphase filter component. */ - const q15_t *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */ - q15_t *pState; /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */ -} csi_fir_interpolate_instance_q15; - -/** - * @brief Instance structure for the Q31 FIR interpolator. - */ -typedef struct { - uint8_t L; /**< upsample factor. */ - uint16_t phaseLength; /**< length of each polyphase filter component. */ - const q31_t *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */ - q31_t *pState; /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */ -} csi_fir_interpolate_instance_q31; - -/** - * @brief Instance structure for the floating-point FIR interpolator. - */ -typedef struct { - uint8_t L; /**< upsample factor. */ - uint16_t phaseLength; /**< length of each polyphase filter component. */ - const float32_t *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */ - float32_t *pState; /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */ -} csi_fir_interpolate_instance_f32; - - -/** - * @brief Instance structure for the high precision Q31 Biquad cascade filter. - */ -typedef struct { - uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - q63_t *pState; /**< points to the array of state coefficients. The array is of length 4*numStages. */ - const q31_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */ - uint8_t postShift; /**< additional shift, in bits, applied to each output sample. */ -} csi_biquad_cas_df1_32x64_ins_q31; - -/** - * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter. - */ -typedef struct { - uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - float32_t *pState; /**< points to the array of state coefficients. The array is of length 2*numStages. */ - const float32_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */ -} csi_biquad_cascade_df2T_instance_f32; - -/** - * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter. - */ -typedef struct { - uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - float32_t *pState; /**< points to the array of state coefficients. The array is of length 4*numStages. */ - const float32_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */ -} csi_biquad_cascade_stereo_df2T_instance_f32; - -/** - * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter. - */ -typedef struct { - uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - float64_t *pState; /**< points to the array of state coefficients. The array is of length 2*numStages. */ - float64_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */ -} csi_biquad_cascade_df2T_instance_f64; - -/** - * @brief Instance structure for the Q15 FIR lattice filter. - */ -typedef struct { - uint16_t numStages; /**< number of filter stages. */ - q15_t *pState; /**< points to the state variable array. The array is of length numStages. */ - const q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */ -} csi_fir_lattice_instance_q15; - -/** - * @brief Instance structure for the Q31 FIR lattice filter. - */ -typedef struct { - uint16_t numStages; /**< number of filter stages. */ - q31_t *pState; /**< points to the state variable array. The array is of length numStages. */ - const q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */ -} csi_fir_lattice_instance_q31; - -/** - * @brief Instance structure for the floating-point FIR lattice filter. - */ -typedef struct { - uint16_t numStages; /**< number of filter stages. */ - float32_t *pState; /**< points to the state variable array. The array is of length numStages. */ - const float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */ -} csi_fir_lattice_instance_f32; - - -/** - * @brief Instance structure for the Q15 IIR lattice filter. - */ -typedef struct { - uint16_t numStages; /**< number of stages in the filter. */ - q15_t *pState; /**< points to the state variable array. The array is of length numStages+blockSize. */ - q15_t *pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */ - q15_t *pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */ -} csi_iir_lattice_instance_q15; - -/** - * @brief Instance structure for the Q31 IIR lattice filter. - */ -typedef struct { - uint16_t numStages; /**< number of stages in the filter. */ - q31_t *pState; /**< points to the state variable array. The array is of length numStages+blockSize. */ - q31_t *pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */ - q31_t *pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */ -} csi_iir_lattice_instance_q31; - -/** - * @brief Instance structure for the floating-point IIR lattice filter. - */ -typedef struct { - uint16_t numStages; /**< number of stages in the filter. */ - float32_t *pState; /**< points to the state variable array. The array is of length numStages+blockSize. */ - float32_t *pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */ - float32_t *pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */ -} csi_iir_lattice_instance_f32; - - -/** - * @brief Instance structure for the floating-point LMS filter. - */ -typedef struct { - uint16_t numTaps; /**< number of coefficients in the filter. */ - float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - float32_t mu; /**< step size that controls filter coefficient updates. */ -} csi_lms_instance_f32; - - -/** - * @brief Instance structure for the Q15 LMS filter. - */ -typedef struct { - uint16_t numTaps; /**< number of coefficients in the filter. */ - q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - q15_t mu; /**< step size that controls filter coefficient updates. */ - uint32_t postShift; /**< bit shift applied to coefficients. */ -} csi_lms_instance_q15; - - -/** - * @brief Instance structure for the Q31 LMS filter. - */ -typedef struct { - uint16_t numTaps; /**< number of coefficients in the filter. */ - q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - q31_t mu; /**< step size that controls filter coefficient updates. */ - uint32_t postShift; /**< bit shift applied to coefficients. */ -} csi_lms_instance_q31; - - -/** - * @brief Instance structure for the floating-point normalized LMS filter. - */ -typedef struct { - uint16_t numTaps; /**< number of coefficients in the filter. */ - float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - float32_t mu; /**< step size that control filter coefficient updates. */ - float32_t energy; /**< saves previous frame energy. */ - float32_t x0; /**< saves previous input sample. */ -} csi_lms_norm_instance_f32; - -/** - * @brief Instance structure for the Q31 normalized LMS filter. - */ -typedef struct { - uint16_t numTaps; /**< number of coefficients in the filter. */ - q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - q31_t mu; /**< step size that controls filter coefficient updates. */ - uint8_t postShift; /**< bit shift applied to coefficients. */ - const q31_t *recipTable; /**< points to the reciprocal initial value table. */ - q31_t energy; /**< saves previous frame energy. */ - q31_t x0; /**< saves previous input sample. */ -} csi_lms_norm_instance_q31; - - -/** - * @brief Instance structure for the Q15 normalized LMS filter. - */ -typedef struct { - uint16_t numTaps; /**< Number of coefficients in the filter. */ - q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - q15_t mu; /**< step size that controls filter coefficient updates. */ - uint8_t postShift; /**< bit shift applied to coefficients. */ - const q15_t *recipTable; /**< Points to the reciprocal initial value table. */ - q15_t energy; /**< saves previous frame energy. */ - q15_t x0; /**< saves previous input sample. */ -} csi_lms_norm_instance_q15; - -/** - * @brief Instance structure for the floating-point sparse FIR filter. - */ -typedef struct { - uint16_t numTaps; /**< number of coefficients in the filter. */ - uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */ - float32_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */ - const float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */ - int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */ -} csi_fir_sparse_instance_f32; - -/** - * @brief Instance structure for the Q31 sparse FIR filter. - */ -typedef struct { - uint16_t numTaps; /**< number of coefficients in the filter. */ - uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */ - q31_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */ - const q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */ - int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */ -} csi_fir_sparse_instance_q31; - -/** - * @brief Instance structure for the Q15 sparse FIR filter. - */ -typedef struct { - uint16_t numTaps; /**< number of coefficients in the filter. */ - uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */ - q15_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */ - const q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */ - int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */ -} csi_fir_sparse_instance_q15; - -/** - * @brief Instance structure for the Q7 sparse FIR filter. - */ -typedef struct { - uint16_t numTaps; /**< number of coefficients in the filter. */ - uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */ - q7_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */ - const q7_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */ - int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */ -} csi_fir_sparse_instance_q7; - - -/** -* @brief Struct for specifying SVM Kernel -* -*/ -typedef enum { - CSI_ML_KERNEL_LINEAR = 0, - /**< Linear kernel */ - CSI_ML_KERNEL_POLYNOMIAL = 1, - /**< Polynomial kernel */ - CSI_ML_KERNEL_RBF = 2, - /**< Radial Basis Function kernel */ - CSI_ML_KERNEL_SIGMOID = 3 - /**< Sigmoid kernel */ -} csi_ml_kernel_type; - - - -/** - * @brief Instance structure for linear SVM prediction function. - */ -typedef struct { - uint32_t nbOfSupportVectors; /**< Number of support vectors */ - uint32_t vectorDimension; /**< Dimension of vector space */ - float32_t intercept; /**< Intercept */ - const float32_t *dualCoefficients; /**< Dual coefficients */ - const float32_t *supportVectors; /**< Support vectors */ - const int32_t *classes; /**< The two SVM classes */ -} csi_svm_linear_instance_f32; - - -/** - * @brief Instance structure for polynomial SVM prediction function. - */ -typedef struct { - uint32_t nbOfSupportVectors; /**< Number of support vectors */ - uint32_t vectorDimension; /**< Dimension of vector space */ - float32_t intercept; /**< Intercept */ - const float32_t *dualCoefficients; /**< Dual coefficients */ - const float32_t *supportVectors; /**< Support vectors */ - const int32_t *classes; /**< The two SVM classes */ - int32_t degree; /**< Polynomial degree */ - float32_t coef0; /**< Polynomial constant */ - float32_t gamma; /**< Gamma factor */ -} csi_svm_polynomial_instance_f32; - -/** - * @brief Instance structure for rbf SVM prediction function. - */ -typedef struct { - uint32_t nbOfSupportVectors; /**< Number of support vectors */ - uint32_t vectorDimension; /**< Dimension of vector space */ - float32_t intercept; /**< Intercept */ - const float32_t *dualCoefficients; /**< Dual coefficients */ - const float32_t *supportVectors; /**< Support vectors */ - const int32_t *classes; /**< The two SVM classes */ - float32_t gamma; /**< Gamma factor */ -} csi_svm_rbf_instance_f32; - -/** - * @brief Instance structure for sigmoid SVM prediction function. - */ -typedef struct { - uint32_t nbOfSupportVectors; /**< Number of support vectors */ - uint32_t vectorDimension; /**< Dimension of vector space */ - float32_t intercept; /**< Intercept */ - const float32_t *dualCoefficients; /**< Dual coefficients */ - const float32_t *supportVectors; /**< Support vectors */ - const int32_t *classes; /**< The two SVM classes */ - float32_t coef0; /**< Independant constant */ - float32_t gamma; /**< Gamma factor */ -} csi_svm_sigmoid_instance_f32; - -/** - * @brief Instance structure for Naive Gaussian Bayesian estimator. - */ -typedef struct { - uint32_t vectorDimension; /**< Dimension of vector space */ - uint32_t numberOfClasses; /**< Number of different classes */ - const float32_t *theta; /**< Mean values for the Gaussians */ - const float32_t *sigma; /**< Variances for the Gaussians */ - const float32_t *classPriors; /**< Class prior probabilities */ - float32_t epsilon; /**< Additive value to variances */ -} csi_gaussian_naive_bayes_instance_f32; - -#ifdef CSI_SIMD -/* SMMLAR */ -__ALWAYS_STATIC_INLINE int32_t multAcc_32x32_keep32_R(int32_t a, int32_t x, int32_t y) -{ - __ASM volatile("mula.s32.rhs %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y) : "0" (a), "1" (x), "2" (y)); - return a; -} - -/* SMMLSR */ -__ALWAYS_STATIC_INLINE int32_t multSub_32x32_keep32_R(int32_t a, int32_t x, int32_t y) -{ - __ASM volatile("muls.s32.rhs %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y)); - return a; -} - -/* SMMULR */ -__ALWAYS_STATIC_INLINE int32_t mult_32x32_keep32_R(int32_t x, int32_t y) -{ - int32_t a; - __ASM volatile("mul.s32.rh %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "1" (x), "2" (y)); - return a; -} - -/* SMMLA */ -__ALWAYS_STATIC_INLINE int32_t multAcc_32x32_keep32(int32_t a, int32_t x, int32_t y) -{ - __ASM volatile("mula.s32.hs %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y)); - return a; -} - -/* SMMLS */ -__ALWAYS_STATIC_INLINE int32_t multSub_32x32_keep32(int32_t a, int32_t x, int32_t y) -{ - __ASM volatile("muls.s32.hs %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y)); - return a; -} - -/* SMMUL */ -__ALWAYS_STATIC_INLINE int32_t mult_32x32_keep32(int32_t x, int32_t y) -{ - int32_t a; - __ASM volatile("mul.s32.h %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y)); - return a; -} - -__ALWAYS_STATIC_INLINE int32_t multAcc_16x16_keep32(int32_t a, int16_t x, int16_t y) -{ - __ASM volatile("mulall.s16 %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y)); - return a; -} - -__ALWAYS_STATIC_INLINE int64_t multAcc_16x16_keep64(int64_t a, int16_t x, int16_t y) -{ - __ASM volatile("mulall.s16.e %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y)); - return a; -} - -__ALWAYS_STATIC_INLINE int64_t mult_32x32_keep64(int32_t x, int32_t y) -{ - int64_t a; - __ASM volatile("mul.s32 %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "1" (x), "2" (y)); - return a; -} - -__ALWAYS_STATIC_INLINE int64_t multAcc_32x32_keep64(int64_t a, int32_t x, int32_t y) -{ - __ASM volatile("mula.s32 %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y)); - return a; -} - -__ALWAYS_STATIC_INLINE int32_t mult_32x32_dext_31(int32_t x, int32_t y) -{ - int64_t tmp1; - int32_t tmp2; - __ASM volatile("mul.s32 %0, %1, %2\n\t" - "dexti %3, %0, %R0, 31" - :"=r" (tmp1), "=r" (x), "=r" (y), "=r" (tmp2): "1" (x), "2" (y)); - return tmp2; -} - -__ALWAYS_STATIC_INLINE int32_t mult_32x32_dext_30(int32_t x, int32_t y) -{ - int64_t tmp1; - int32_t tmp2; - __ASM volatile("mul.s32 %0, %1, %2\n\t" - "dexti %3, %0, %R0, 30" - :"=r" (tmp1), "=r" (x), "=r" (y), "=r" (tmp2): "1" (x), "2" (y)); - return tmp2; -} - -__ALWAYS_STATIC_INLINE int32_t mult_32x32_dext_4(int32_t x, int32_t y) -{ - int64_t tmp1; - int32_t tmp2; - __ASM volatile("mul.s32 %0, %1, %2\n\t" - "dexti %3, %0, %R0, 4" - :"=r" (tmp1), "=r" (x), "=r" (y), "=r" (tmp2): "1" (x), "2" (y)); - return tmp2; -} - -__ALWAYS_STATIC_INLINE int32_t mult_32x32_dext_33(int32_t x, int32_t y) -{ - int64_t tmp1; - int32_t tmp2; - __ASM volatile("mul.s32 %0, %1, %2\n\t" - "asri %3, %R0, 1" - :"=r" (tmp1), "=r" (x), "=r" (y), "=r" (tmp2): "1" (x), "2" (y)); - return tmp2; -} - -__ALWAYS_STATIC_INLINE int32_t dext_31(int64_t x) -{ - int32_t tmp1; - __ASM volatile( - "dexti %0, %1, %R1, 31" - :"=r" (tmp1), "=r" (x) : "1" (x)); - return tmp1; -} - -__ALWAYS_STATIC_INLINE int32_t mult_l16xl16_keep32(int32_t x, int32_t y) -{ - int32_t a; - __ASM volatile("mulll.s16 %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "1" (x), "2" (y)); - return a; -} - -__ALWAYS_STATIC_INLINE int32_t mult_h16xl16_keep32(int32_t x, int32_t y) -{ - int32_t a; - __ASM volatile("mulhl.s16 %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "1" (x), "2" (y)); - return a; -} - -__ALWAYS_STATIC_INLINE int32_t mult_h16xh16_keep32(int32_t x, int32_t y) -{ - int32_t a; - __ASM volatile("mulhh.s16 %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "1" (x), "2" (y)); - return a; -} - -#else - -/* SMMLAR */ -#define multAcc_32x32_keep32_R(a, x, y) \ - a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32) - -/* SMMLSR */ -#define multSub_32x32_keep32_R(a, x, y) \ - a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32) - -/* SMMULR */ -#define mult_32x32_keep32_R(a, x, y) \ - a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32) - -/* SMMLA */ -#define multAcc_32x32_keep32(a, x, y) \ - a += (q31_t) (((q63_t) x * y) >> 32) - -/* SMMLS */ -#define multSub_32x32_keep32(a, x, y) \ - a -= (q31_t) (((q63_t) x * y) >> 32) - -/* SMMUL */ -#define mult_32x32_keep32(a, x, y) \ - a = (q31_t) (((q63_t) x * y ) >> 32) -#endif -#ifdef __cplusplus -} -#endif - - -#endif /* _CSI_MATH_H */ - -/** - * - * End of file. - */ diff --git a/include/include_xt800/dsp_include/csi_math.h b/include/include_xt800/dsp_include/csi_math.h deleted file mode 100644 index 78e0bb24..00000000 --- a/include/include_xt800/dsp_include/csi_math.h +++ /dev/null @@ -1,5739 +0,0 @@ -/* - * Copyright (C) 2016-2020 T-head Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/****************************************************************************** - * @file csi_math.h - * @brief Some common define - * @version V1.0 - * @date Feb. 2020 - ******************************************************************************/ - - -#ifndef _CSI_MATH_H -#define _CSI_MATH_H - -#ifdef __cplusplus -extern "C" -{ -#endif - - -#include -#include -#include -#include -#include -#include "csi_instance.h" - - -/** - * @brief Processing function for the Q7 FIR filter. - * @param[in] S points to an instance of the Q7 FIR filter structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ -void csi_fir_q7( - const csi_fir_instance_q7 * S, - const q7_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - -/** - * @brief Initialization function for the Q7 FIR filter. - * @param[in,out] S points to an instance of the Q7 FIR structure. - * @param[in] numTaps Number of filter coefficients in the filter. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] blockSize number of samples that are processed. - */ -void csi_fir_init_q7( - csi_fir_instance_q7 * S, - uint16_t numTaps, - const q7_t * pCoeffs, - q7_t * pState, - uint32_t blockSize); - -/** - * @brief Processing function for the Q15 FIR filter. - * @param[in] S points to an instance of the Q15 FIR structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ -void csi_fir_q15( - const csi_fir_instance_q15 * S, - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - -/** - * @brief Processing function for the fast Q15 FIR filter (fast version). - * @param[in] S points to an instance of the Q15 FIR filter structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ -void csi_fir_fast_q15( - const csi_fir_instance_q15 * S, - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - -/** - * @brief Initialization function for the Q15 FIR filter. - * @param[in,out] S points to an instance of the Q15 FIR filter structure. - * @param[in] numTaps Number of filter coefficients in the filter. Must be even and greater than or equal to 4. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] blockSize number of samples that are processed at a time. - * @return The function returns either - * CSI_MATH_SUCCESS if initialization was successful or - * CSI_MATH_ARGUMENT_ERROR if numTaps is not a supported value. - */ -csi_status csi_fir_init_q15( - csi_fir_instance_q15 * S, - uint16_t numTaps, - const q15_t * pCoeffs, - q15_t * pState, - uint32_t blockSize); - -/** - * @brief Processing function for the Q31 FIR filter. - * @param[in] S points to an instance of the Q31 FIR filter structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ -void csi_fir_q31( - const csi_fir_instance_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - -/** - * @brief Processing function for the fast Q31 FIR filter (fast version). - * @param[in] S points to an instance of the Q31 FIR filter structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ -void csi_fir_fast_q31( - const csi_fir_instance_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - -/** - * @brief Initialization function for the Q31 FIR filter. - * @param[in,out] S points to an instance of the Q31 FIR structure. - * @param[in] numTaps Number of filter coefficients in the filter. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] blockSize number of samples that are processed at a time. - */ -void csi_fir_init_q31( - csi_fir_instance_q31 * S, - uint16_t numTaps, - const q31_t * pCoeffs, - q31_t * pState, - uint32_t blockSize); - -/** - * @brief Processing function for the floating-point FIR filter. - * @param[in] S points to an instance of the floating-point FIR structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ -void csi_fir_f32( - const csi_fir_instance_f32 * S, - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - -/** - * @brief Initialization function for the floating-point FIR filter. - * @param[in,out] S points to an instance of the floating-point FIR filter structure. - * @param[in] numTaps Number of filter coefficients in the filter. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] blockSize number of samples that are processed at a time. - */ -void csi_fir_init_f32( - csi_fir_instance_f32 * S, - uint16_t numTaps, - const float32_t * pCoeffs, - float32_t * pState, - uint32_t blockSize); - - -/** - * @brief Processing function for the Q15 Biquad cascade filter. - * @param[in] S points to an instance of the Q15 Biquad cascade structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ -void csi_biquad_cascade_df1_q15( - const csi_biquad_casd_df1_inst_q15 * S, - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - -/** - * @brief Initialization function for the Q15 Biquad cascade filter. - * @param[in,out] S points to an instance of the Q15 Biquad cascade structure. - * @param[in] numStages number of 2nd order stages in the filter. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] postShift Shift to be applied to the output. Varies according to the coefficients format - */ -void csi_biquad_cascade_df1_init_q15( - csi_biquad_casd_df1_inst_q15 * S, - uint8_t numStages, - const q15_t * pCoeffs, - q15_t * pState, - int8_t postShift); - -/** - * @brief Fast but less precise processing function for the Q15 Biquad cascade filter for Cortex-M3 and Cortex-M4. - * @param[in] S points to an instance of the Q15 Biquad cascade structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ -void csi_biquad_cascade_df1_fast_q15( - const csi_biquad_casd_df1_inst_q15 * S, - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - -/** - * @brief Processing function for the Q31 Biquad cascade filter - * @param[in] S points to an instance of the Q31 Biquad cascade structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ -void csi_biquad_cascade_df1_q31( - const csi_biquad_casd_df1_inst_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - -/** - * @brief Fast but less precise processing function for the Q31 Biquad cascade filter for Cortex-M3 and Cortex-M4. - * @param[in] S points to an instance of the Q31 Biquad cascade structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ -void csi_biquad_cascade_df1_fast_q31( - const csi_biquad_casd_df1_inst_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - -/** - * @brief Initialization function for the Q31 Biquad cascade filter. - * @param[in,out] S points to an instance of the Q31 Biquad cascade structure. - * @param[in] numStages number of 2nd order stages in the filter. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] postShift Shift to be applied to the output. Varies according to the coefficients format - */ -void csi_biquad_cascade_df1_init_q31( - csi_biquad_casd_df1_inst_q31 * S, - uint8_t numStages, - const q31_t * pCoeffs, - q31_t * pState, - int8_t postShift); - -/** - * @brief Processing function for the floating-point Biquad cascade filter. - * @param[in] S points to an instance of the floating-point Biquad cascade structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ -void csi_biquad_cascade_df1_f32( - const csi_biquad_casd_df1_inst_f32 * S, - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - -/** - * @brief Initialization function for the floating-point Biquad cascade filter. - * @param[in,out] S points to an instance of the floating-point Biquad cascade structure. - * @param[in] numStages number of 2nd order stages in the filter. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - */ -void csi_biquad_cascade_df1_init_f32( - csi_biquad_casd_df1_inst_f32 * S, - uint8_t numStages, - const float32_t * pCoeffs, - float32_t * pState); - -/** - * @brief Floating-point matrix addition. - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * CSI_MATH_SIZE_MISMATCH or CSI_MATH_SUCCESS based on the outcome of size checking. - */ -csi_status csi_mat_add_f32( - const csi_matrix_instance_f32 * pSrcA, - const csi_matrix_instance_f32 * pSrcB, - csi_matrix_instance_f32 * pDst); - -/** - * @brief Q15 matrix addition. - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * CSI_MATH_SIZE_MISMATCH or CSI_MATH_SUCCESS based on the outcome of size checking. - */ -csi_status csi_mat_add_q15( - const csi_matrix_instance_q15 * pSrcA, - const csi_matrix_instance_q15 * pSrcB, - csi_matrix_instance_q15 * pDst); - -/** - * @brief Q31 matrix addition. - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * CSI_MATH_SIZE_MISMATCH or CSI_MATH_SUCCESS based on the outcome of size checking. - */ -csi_status csi_mat_add_q31( - const csi_matrix_instance_q31 * pSrcA, - const csi_matrix_instance_q31 * pSrcB, - csi_matrix_instance_q31 * pDst); - -/** - * @brief Floating-point, complex, matrix multiplication. - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * CSI_MATH_SIZE_MISMATCH or CSI_MATH_SUCCESS based on the outcome of size checking. - */ -csi_status csi_mat_cmplx_mult_f32( - const csi_matrix_instance_f32 * pSrcA, - const csi_matrix_instance_f32 * pSrcB, - csi_matrix_instance_f32 * pDst); - -/** - * @brief Q15, complex, matrix multiplication. - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * CSI_MATH_SIZE_MISMATCH or CSI_MATH_SUCCESS based on the outcome of size checking. - */ -csi_status csi_mat_cmplx_mult_q15( - const csi_matrix_instance_q15 * pSrcA, - const csi_matrix_instance_q15 * pSrcB, - csi_matrix_instance_q15 * pDst); - -void csi_mult_q15xq31_sht( - q15_t * pSrcA, - q31_t * pSrcB, - uint32_t shiftValue, - uint32_t blockSize); - -/** - * @brief Q31, complex, matrix multiplication. - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * CSI_MATH_SIZE_MISMATCH or CSI_MATH_SUCCESS based on the outcome of size checking. - */ -csi_status csi_mat_cmplx_mult_q31( - const csi_matrix_instance_q31 * pSrcA, - const csi_matrix_instance_q31 * pSrcB, - csi_matrix_instance_q31 * pDst); - -/** - * @brief Floating-point matrix transpose. - * @param[in] pSrc points to the input matrix - * @param[out] pDst points to the output matrix - * @return The function returns either CSI_MATH_SIZE_MISMATCH - * or CSI_MATH_SUCCESS based on the outcome of size checking. - */ -csi_status csi_mat_trans_f32( - const csi_matrix_instance_f32 * pSrc, - csi_matrix_instance_f32 * pDst); - -/** - * @brief Q15 matrix transpose. - * @param[in] pSrc points to the input matrix - * @param[out] pDst points to the output matrix - * @return The function returns either CSI_MATH_SIZE_MISMATCH - * or CSI_MATH_SUCCESS based on the outcome of size checking. - */ -csi_status csi_mat_trans_q15( - const csi_matrix_instance_q15 * pSrc, - csi_matrix_instance_q15 * pDst); - -/** - * @brief Q31 matrix transpose. - * @param[in] pSrc points to the input matrix - * @param[out] pDst points to the output matrix - * @return The function returns either CSI_MATH_SIZE_MISMATCH - * or CSI_MATH_SUCCESS based on the outcome of size checking. - */ -csi_status csi_mat_trans_q31( - const csi_matrix_instance_q31 * pSrc, - csi_matrix_instance_q31 * pDst); - -/** - * @brief Floating-point matrix multiplication - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * CSI_MATH_SIZE_MISMATCH or CSI_MATH_SUCCESS based on the outcome of size checking. - */ -csi_status csi_mat_mult_f32( - const csi_matrix_instance_f32 * pSrcA, - const csi_matrix_instance_f32 * pSrcB, - csi_matrix_instance_f32 * pDst); - -/** - * @brief Q15 matrix multiplication - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * CSI_MATH_SIZE_MISMATCH or CSI_MATH_SUCCESS based on the outcome of size checking. - */ -csi_status csi_mat_mult_q15( - const csi_matrix_instance_q15 * pSrcA, - const csi_matrix_instance_q15 * pSrcB, - csi_matrix_instance_q15 * pDst); - - csi_status csi_mat_mult_trans_q15( - const csi_matrix_instance_q15 * pSrcA, - const csi_matrix_instance_q15 * pSrcB, - csi_matrix_instance_q15 * pDst); - -/** - * @brief Q15 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4 - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @param[in] pState points to the array for storing intermediate results - * @return The function returns either - * CSI_MATH_SIZE_MISMATCH or CSI_MATH_SUCCESS based on the outcome of size checking. - */ -csi_status csi_mat_mult_fast_q15( - const csi_matrix_instance_q15 * pSrcA, - const csi_matrix_instance_q15 * pSrcB, - csi_matrix_instance_q15 * pDst); - -/** - * @brief Q31 matrix multiplication - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * CSI_MATH_SIZE_MISMATCH or CSI_MATH_SUCCESS based on the outcome of size checking. - */ -csi_status csi_mat_mult_q31( - const csi_matrix_instance_q31 * pSrcA, - const csi_matrix_instance_q31 * pSrcB, - csi_matrix_instance_q31 * pDst); - -csi_status csi_mat_mult_trans_q31( - const csi_matrix_instance_q31 * pSrcA, - const csi_matrix_instance_q31 * pSrcB, - csi_matrix_instance_q31 * pDst); - -/** - * @brief Q31 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4 - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * CSI_MATH_SIZE_MISMATCH or CSI_MATH_SUCCESS based on the outcome of size checking. - */ -csi_status csi_mat_mult_fast_q31( - const csi_matrix_instance_q31 * pSrcA, - const csi_matrix_instance_q31 * pSrcB, - csi_matrix_instance_q31 * pDst); - -/** - * @brief Floating-point matrix subtraction - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * CSI_MATH_SIZE_MISMATCH or CSI_MATH_SUCCESS based on the outcome of size checking. - */ -csi_status csi_mat_sub_f32( - const csi_matrix_instance_f32 * pSrcA, - const csi_matrix_instance_f32 * pSrcB, - csi_matrix_instance_f32 * pDst); - -/** - * @brief Q15 matrix subtraction - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * CSI_MATH_SIZE_MISMATCH or CSI_MATH_SUCCESS based on the outcome of size checking. - */ -csi_status csi_mat_sub_q15( - const csi_matrix_instance_q15 * pSrcA, - const csi_matrix_instance_q15 * pSrcB, - csi_matrix_instance_q15 * pDst); - -/** - * @brief Q31 matrix subtraction - * @param[in] pSrcA points to the first input matrix structure - * @param[in] pSrcB points to the second input matrix structure - * @param[out] pDst points to output matrix structure - * @return The function returns either - * CSI_MATH_SIZE_MISMATCH or CSI_MATH_SUCCESS based on the outcome of size checking. - */ -csi_status csi_mat_sub_q31( - const csi_matrix_instance_q31 * pSrcA, - const csi_matrix_instance_q31 * pSrcB, - csi_matrix_instance_q31 * pDst); - -void csi_sum_q15( - q15_t * pSrcA, - q63_t * pDst, - uint32_t blockSize); - -/** - * @brief Floating-point matrix scaling. - * @param[in] pSrc points to the input matrix - * @param[in] scale scale factor - * @param[out] pDst points to the output matrix - * @return The function returns either - * CSI_MATH_SIZE_MISMATCH or CSI_MATH_SUCCESS based on the outcome of size checking. - */ -csi_status csi_mat_scale_f32( - const csi_matrix_instance_f32 * pSrc, - float32_t scale, - csi_matrix_instance_f32 * pDst); - -/** - * @brief Q15 matrix scaling. - * @param[in] pSrc points to input matrix - * @param[in] scaleFract fractional portion of the scale factor - * @param[in] shift number of bits to shift the result by - * @param[out] pDst points to output matrix - * @return The function returns either - * CSI_MATH_SIZE_MISMATCH or CSI_MATH_SUCCESS based on the outcome of size checking. - */ -csi_status csi_mat_scale_q15( - const csi_matrix_instance_q15 * pSrc, - q15_t scaleFract, - int32_t shift, - csi_matrix_instance_q15 * pDst); - -/** - * @brief Q31 matrix scaling. - * @param[in] pSrc points to input matrix - * @param[in] scaleFract fractional portion of the scale factor - * @param[in] shift number of bits to shift the result by - * @param[out] pDst points to output matrix structure - * @return The function returns either - * CSI_MATH_SIZE_MISMATCH or CSI_MATH_SUCCESS based on the outcome of size checking. - */ -csi_status csi_mat_scale_q31( - const csi_matrix_instance_q31 * pSrc, - q31_t scaleFract, - int32_t shift, - csi_matrix_instance_q31 * pDst); - -/** - * @brief Q31 matrix initialization. - * @param[in,out] S points to an instance of the floating-point matrix structure. - * @param[in] nRows number of rows in the matrix. - * @param[in] nColumns number of columns in the matrix. - * @param[in] pData points to the matrix data array. - */ -void csi_mat_init_q31( - csi_matrix_instance_q31 * S, - uint16_t nRows, - uint16_t nColumns, - q31_t * pData); - -/** - * @brief Q15 matrix initialization. - * @param[in,out] S points to an instance of the floating-point matrix structure. - * @param[in] nRows number of rows in the matrix. - * @param[in] nColumns number of columns in the matrix. - * @param[in] pData points to the matrix data array. - */ -void csi_mat_init_q15( - csi_matrix_instance_q15 * S, - uint16_t nRows, - uint16_t nColumns, - q15_t * pData); - -/** - * @brief Floating-point matrix initialization. - * @param[in,out] S points to an instance of the floating-point matrix structure. - * @param[in] nRows number of rows in the matrix. - * @param[in] nColumns number of columns in the matrix. - * @param[in] pData points to the matrix data array. - */ -void csi_mat_init_f32( - csi_matrix_instance_f32 * S, - uint16_t nRows, - uint16_t nColumns, - float32_t * pData); - - -/** - * @brief Initialization function for the floating-point PID Control. - * @param[in,out] S points to an instance of the PID structure. - * @param[in] resetStateFlag flag to reset the state. 0 = no change in state 1 = reset the state. - */ -void csi_pid_init_f32( - csi_pid_instance_f32 * S, - int32_t resetStateFlag); - - -/** - * @brief Reset function for the floating-point PID Control. - * @param[in,out] S is an instance of the floating-point PID Control structure - */ -void csi_pid_reset_f32( - csi_pid_instance_f32 * S); - - -/** - * @brief Initialization function for the Q31 PID Control. - * @param[in,out] S points to an instance of the Q15 PID structure. - * @param[in] resetStateFlag flag to reset the state. 0 = no change in state 1 = reset the state. - */ -void csi_pid_init_q31( - csi_pid_instance_q31 * S, - int32_t resetStateFlag); - - -/** - * @brief Reset function for the Q31 PID Control. - * @param[in,out] S points to an instance of the Q31 PID Control structure - */ - -void csi_pid_reset_q31( - csi_pid_instance_q31 * S); - - -/** - * @brief Initialization function for the Q15 PID Control. - * @param[in,out] S points to an instance of the Q15 PID structure. - * @param[in] resetStateFlag flag to reset the state. 0 = no change in state 1 = reset the state. - */ -void csi_pid_init_q15( - csi_pid_instance_q15 * S, - int32_t resetStateFlag); - - -/** - * @brief Reset function for the Q15 PID Control. - * @param[in,out] S points to an instance of the q15 PID Control structure - */ -void csi_pid_reset_q15( - csi_pid_instance_q15 * S); - -/** - * @brief Q7 vector multiplication. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ -void csi_mult_q7( - const q7_t * pSrcA, - const q7_t * pSrcB, - q7_t * pDst, - uint32_t blockSize); - - -/** - * @brief Q15 vector multiplication. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ -void csi_mult_q15( - const q15_t * pSrcA, - const q15_t * pSrcB, - q15_t * pDst, - uint32_t blockSize); - -void csi_mult_rnd_q15( - q15_t * pSrcA, - q15_t * pSrcB, - q15_t * pDst, - uint32_t blockSize); - - -/** - * @brief Q31 vector multiplication. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ -void csi_mult_q31( - const q31_t * pSrcA, - const q31_t * pSrcB, - q31_t * pDst, - uint32_t blockSize); - - -/** - * @brief Floating-point vector multiplication. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ -void csi_mult_f32( - const float32_t * pSrcA, - const float32_t * pSrcB, - float32_t * pDst, - uint32_t blockSize); - - -/* Deprecated */ -csi_status csi_cfft_radix2_init_q15( - csi_cfft_radix2_instance_q15 * S, - uint16_t fftLen, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - -/* Deprecated */ -void csi_cfft_radix2_q15( - const csi_cfft_radix2_instance_q15 * S, - q15_t * pSrc); - - -/* Deprecated */ -csi_status csi_cfft_radix4_init_q15( - csi_cfft_radix4_instance_q15 * S, - uint16_t fftLen, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - -/* Deprecated */ -void csi_cfft_radix4_q15( - const csi_cfft_radix4_instance_q15 * S, - q15_t * pSrc); - -/* Deprecated */ -csi_status csi_cfft_radix2_init_q31( - csi_cfft_radix2_instance_q31 * S, - uint16_t fftLen, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - -/* Deprecated */ -void csi_cfft_radix2_q31( - const csi_cfft_radix2_instance_q31 * S, - q31_t * pSrc); - - -/* Deprecated */ -void csi_cfft_radix4_q31( - const csi_cfft_radix4_instance_q31 * S, - q31_t * pSrc); - -/* Deprecated */ -csi_status csi_cfft_radix4_init_q31( - csi_cfft_radix4_instance_q31 * S, - uint16_t fftLen, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - - -/* Deprecated */ -csi_status csi_cfft_radix2_init_f32( - csi_cfft_radix2_instance_f32 * S, - uint16_t fftLen, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - -/* Deprecated */ - void csi_cfft_radix2_f32( - const csi_cfft_instance_f32 * S, - float32_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag, - float32_t onebyfftLen); - - -/* Deprecated */ -csi_status csi_cfft_radix4_init_f32( - csi_cfft_radix4_instance_f32 * S, - uint16_t fftLen, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - -/* Deprecated */ - void csi_cfft_radix4_f32( - const csi_cfft_instance_f32 * S, - float32_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag, - float32_t onebyfftLen); - - void csi_cfft_fast_radix4_f32( - const csi_cfft_instance_f32 * S, - float32_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag, - float32_t onebyfftLen); - - -void csi_cfft_q15( - const csi_cfft_instance_q15 * S, - q15_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - -void csi_cfft_fast_q15( - const csi_cfft_instance_q15 * S, - q15_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - -void csi_cfft_q31( - const csi_cfft_instance_q31 * S, - q31_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - -void csi_cfft_fast_q31( - const csi_cfft_instance_q31 * S, - q31_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - - -void csi_cfft_f32( - const csi_cfft_instance_f32 * S, - float32_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - - -csi_status csi_rfft_init_q15( - csi_rfft_instance_q15 * S, - uint32_t fftLenReal, - uint32_t ifftFlagR, - uint32_t bitReverseFlag); - -void csi_rfft_q15( - const csi_rfft_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst); - -void csi_rfft_fast_q15( - const csi_rfft_fast_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst); - - -csi_status csi_rfft_init_q31( - csi_rfft_instance_q31 * S, - uint32_t fftLenReal, - uint32_t ifftFlagR, - uint32_t bitReverseFlag); - -void csi_rfft_q31( - const csi_rfft_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst); - -void csi_rfft_fast_q31( - const csi_rfft_fast_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst); - - -csi_status csi_rfft_init_f32( - csi_rfft_instance_f32 * S, - csi_cfft_radix4_instance_f32 * S_CFFT, - uint32_t fftLenReal, - uint32_t ifftFlagR, - uint32_t bitReverseFlag); - -void csi_rfft_f32( - const csi_rfft_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst); - - -csi_status csi_rfft_fast_init_f32 ( - csi_rfft_fast_instance_f32 * S, - uint16_t fftLen); - -csi_status csi_rfft_32_fast_init_f32 ( csi_rfft_fast_instance_f32 * S ); - -csi_status csi_rfft_64_fast_init_f32 ( csi_rfft_fast_instance_f32 * S ); - -csi_status csi_rfft_128_fast_init_f32 ( csi_rfft_fast_instance_f32 * S ); - -csi_status csi_rfft_256_fast_init_f32 ( csi_rfft_fast_instance_f32 * S ); - -csi_status csi_rfft_512_fast_init_f32 ( csi_rfft_fast_instance_f32 * S ); - -csi_status csi_rfft_1024_fast_init_f32 ( csi_rfft_fast_instance_f32 * S ); - -csi_status csi_rfft_2048_fast_init_f32 ( csi_rfft_fast_instance_f32 * S ); - -csi_status csi_rfft_4096_fast_init_f32 ( csi_rfft_fast_instance_f32 * S ); - - -void csi_rfft_fast_f32( - csi_rfft_fast_instance_f32 * S, - float32_t * p, float32_t * pOut, - uint8_t ifftFlag); - - -/** - * @brief Initialization function for the floating-point DCT4/IDCT4. - * @param[in,out] S points to an instance of floating-point DCT4/IDCT4 structure. - * @param[in] S_RFFT points to an instance of floating-point RFFT/RIFFT structure. - * @param[in] S_CFFT points to an instance of floating-point CFFT/CIFFT structure. - * @param[in] N length of the DCT4. - * @param[in] Nby2 half of the length of the DCT4. - * @param[in] normalize normalizing factor. - * @return csi_status function returns CSI_MATH_SUCCESS if initialization is successful or CSI_MATH_ARGUMENT_ERROR if fftLenReal is not a supported transform length. - */ -csi_status csi_dct4_init_f32( - csi_dct4_instance_f32 * S, - csi_rfft_fast_instance_f32 * S_RFFT, - csi_cfft_radix4_instance_f32 * S_CFFT, - uint16_t N, - uint16_t Nby2, - float32_t normalize); - - -/** - * @brief Processing function for the floating-point DCT4/IDCT4. - * @param[in] S points to an instance of the floating-point DCT4/IDCT4 structure. - * @param[in] pState points to state buffer. - * @param[in,out] pInlineBuffer points to the in-place input and output buffer. - */ -void csi_dct4_f32( - const csi_dct4_instance_f32 * S, - float32_t * pState, - float32_t * pInlineBuffer); - - - -/** - * @brief Initialization function for the Q31 DCT4/IDCT4. - * @param[in,out] S points to an instance of Q31 DCT4/IDCT4 structure. - * @param[in] S_RFFT points to an instance of Q31 RFFT/RIFFT structure - * @param[in] S_CFFT points to an instance of Q31 CFFT/CIFFT structure - * @param[in] N length of the DCT4. - * @param[in] Nby2 half of the length of the DCT4. - * @param[in] normalize normalizing factor. - * @return csi_status function returns CSI_MATH_SUCCESS if initialization is successful or CSI_MATH_ARGUMENT_ERROR if N is not a supported transform length. - */ -csi_status csi_dct4_init_q31( - csi_dct4_instance_q31 * S, - csi_rfft_instance_q31 * S_RFFT, - csi_cfft_radix4_instance_q31 * S_CFFT, - uint16_t N, - uint16_t Nby2, - q31_t normalize); - - -/** - * @brief Processing function for the Q31 DCT4/IDCT4. - * @param[in] S points to an instance of the Q31 DCT4 structure. - * @param[in] pState points to state buffer. - * @param[in,out] pInlineBuffer points to the in-place input and output buffer. - */ -void csi_dct4_q31( - const csi_dct4_instance_q31 * S, - q31_t * pState, - q31_t * pInlineBuffer); - -void csi_dct4_fast_q31( - const csi_dct4_fast_instance_q31 * S, - q31_t * pState, - q31_t * pInlineBuffer); - - -/** - * @brief Initialization function for the Q15 DCT4/IDCT4. - * @param[in,out] S points to an instance of Q15 DCT4/IDCT4 structure. - * @param[in] S_RFFT points to an instance of Q15 RFFT/RIFFT structure. - * @param[in] S_CFFT points to an instance of Q15 CFFT/CIFFT structure. - * @param[in] N length of the DCT4. - * @param[in] Nby2 half of the length of the DCT4. - * @param[in] normalize normalizing factor. - * @return csi_status function returns CSI_MATH_SUCCESS if initialization is successful or CSI_MATH_ARGUMENT_ERROR if N is not a supported transform length. - */ -csi_status csi_dct4_init_q15( - csi_dct4_instance_q15 * S, - csi_rfft_instance_q15 * S_RFFT, - csi_cfft_radix4_instance_q15 * S_CFFT, - uint16_t N, - uint16_t Nby2, - q15_t normalize); - - -/** - * @brief Processing function for the Q15 DCT4/IDCT4. - * @param[in] S points to an instance of the Q15 DCT4 structure. - * @param[in] pState points to state buffer. - * @param[in,out] pInlineBuffer points to the in-place input and output buffer. - */ -void csi_dct4_q15( - const csi_dct4_instance_q15 * S, - q15_t * pState, - q15_t * pInlineBuffer); - -void csi_dct4_fast_q15( - const csi_dct4_fast_instance_q15 * S, - q15_t * pState, - q15_t * pInlineBuffer); - - -/** - * @brief Floating-point vector addition. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ -void csi_add_f32( - const float32_t * pSrcA, - const float32_t * pSrcB, - float32_t * pDst, - uint32_t blockSize); - - -/** - * @brief Q7 vector addition. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ -void csi_add_q7( - const q7_t * pSrcA, - const q7_t * pSrcB, - q7_t * pDst, - uint32_t blockSize); - - -/** - * @brief Q15 vector addition. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ -void csi_add_q15( - const q15_t * pSrcA, - const q15_t * pSrcB, - q15_t * pDst, - uint32_t blockSize); - - -/** - * @brief Q31 vector addition. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ -void csi_add_q31( - const q31_t * pSrcA, - const q31_t * pSrcB, - q31_t * pDst, - uint32_t blockSize); - - -/** - * @brief Floating-point vector subtraction. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ -void csi_sub_f32( - const float32_t * pSrcA, - const float32_t * pSrcB, - float32_t * pDst, - uint32_t blockSize); - - -/** - * @brief Q7 vector subtraction. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ -void csi_sub_q7( - const q7_t * pSrcA, - const q7_t * pSrcB, - q7_t * pDst, - uint32_t blockSize); - - -/** - * @brief Q15 vector subtraction. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ -void csi_sub_q15( - const q15_t * pSrcA, - const q15_t * pSrcB, - q15_t * pDst, - uint32_t blockSize); - - -/** - * @brief Q31 vector subtraction. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in each vector - */ -void csi_sub_q31( - const q31_t * pSrcA, - const q31_t * pSrcB, - q31_t * pDst, - uint32_t blockSize); - - -/** - * @brief Multiplies a floating-point vector by a scalar. - * @param[in] pSrc points to the input vector - * @param[in] scale scale factor to be applied - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ -void csi_scale_f32( - const float32_t * pSrc, - float32_t scale, - float32_t * pDst, - uint32_t blockSize); - - -/** - * @brief Multiplies a Q7 vector by a scalar. - * @param[in] pSrc points to the input vector - * @param[in] scaleFract fractional portion of the scale value - * @param[in] shift number of bits to shift the result by - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ -void csi_scale_q7( - const q7_t * pSrc, - q7_t scaleFract, - int8_t shift, - q7_t * pDst, - uint32_t blockSize); - - -/** - * @brief Multiplies a Q15 vector by a scalar. - * @param[in] pSrc points to the input vector - * @param[in] scaleFract fractional portion of the scale value - * @param[in] shift number of bits to shift the result by - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ -void csi_scale_q15( - const q15_t * pSrc, - q15_t scaleFract, - int8_t shift, - q15_t * pDst, - uint32_t blockSize); - - -/** - * @brief Multiplies a Q31 vector by a scalar. - * @param[in] pSrc points to the input vector - * @param[in] scaleFract fractional portion of the scale value - * @param[in] shift number of bits to shift the result by - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ -void csi_scale_q31( - const q31_t * pSrc, - q31_t scaleFract, - int8_t shift, - q31_t * pDst, - uint32_t blockSize); - - -/** - * @brief Q7 vector absolute value. - * @param[in] pSrc points to the input buffer - * @param[out] pDst points to the output buffer - * @param[in] blockSize number of samples in each vector - */ -void csi_abs_q7( - const q7_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - -/** - * @brief Floating-point vector absolute value. - * @param[in] pSrc points to the input buffer - * @param[out] pDst points to the output buffer - * @param[in] blockSize number of samples in each vector - */ -void csi_abs_f32( - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - -/** - * @brief Q15 vector absolute value. - * @param[in] pSrc points to the input buffer - * @param[out] pDst points to the output buffer - * @param[in] blockSize number of samples in each vector - */ -void csi_abs_q15( - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - -/** - * @brief Q31 vector absolute value. - * @param[in] pSrc points to the input buffer - * @param[out] pDst points to the output buffer - * @param[in] blockSize number of samples in each vector - */ -void csi_abs_q31( - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csi_abs_max_q15( - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csi_abs_max_q31( - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - -/** - * @brief Dot product of floating-point vectors. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[in] blockSize number of samples in each vector - * @param[out] result output result returned here - */ -void csi_dot_prod_f32( - const float32_t * pSrcA, - const float32_t * pSrcB, - uint32_t blockSize, - float32_t * result); - - -/** - * @brief Dot product of Q7 vectors. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[in] blockSize number of samples in each vector - * @param[out] result output result returned here - */ -void csi_dot_prod_q7( - const q7_t * pSrcA, - const q7_t * pSrcB, - uint32_t blockSize, - q31_t * result); - - -/** - * @brief Dot product of Q15 vectors. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[in] blockSize number of samples in each vector - * @param[out] result output result returned here - */ -void csi_dot_prod_q15( - const q15_t * pSrcA, - const q15_t * pSrcB, - uint32_t blockSize, - q63_t * result); - - -/** - * @brief Dot product of Q31 vectors. - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[in] blockSize number of samples in each vector - * @param[out] result output result returned here - */ -void csi_dot_prod_q31( - const q31_t * pSrcA, - const q31_t * pSrcB, - uint32_t blockSize, - q63_t * result); - - -/** - * @brief Shifts the elements of a Q7 vector a specified number of bits. - * @param[in] pSrc points to the input vector - * @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right. - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ -void csi_shift_q7( - const q7_t * pSrc, - int8_t shiftBits, - q7_t * pDst, - uint32_t blockSize); - - -/** - * @brief Shifts the elements of a Q15 vector a specified number of bits. - * @param[in] pSrc points to the input vector - * @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right. - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ -void csi_shift_q15( - const q15_t * pSrc, - int8_t shiftBits, - q15_t * pDst, - uint32_t blockSize); - - -/** - * @brief Shifts the elements of a Q31 vector a specified number of bits. - * @param[in] pSrc points to the input vector - * @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right. - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ -void csi_shift_q31( - const q31_t * pSrc, - int8_t shiftBits, - q31_t * pDst, - uint32_t blockSize); - - -/** - * @brief Adds a constant offset to a floating-point vector. - * @param[in] pSrc points to the input vector - * @param[in] offset is the offset to be added - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ -void csi_offset_f32( - const float32_t * pSrc, - float32_t offset, - float32_t * pDst, - uint32_t blockSize); - - -/** - * @brief Adds a constant offset to a Q7 vector. - * @param[in] pSrc points to the input vector - * @param[in] offset is the offset to be added - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ -void csi_offset_q7( - const q7_t * pSrc, - q7_t offset, - q7_t * pDst, - uint32_t blockSize); - - -/** - * @brief Adds a constant offset to a Q15 vector. - * @param[in] pSrc points to the input vector - * @param[in] offset is the offset to be added - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ -void csi_offset_q15( - const q15_t * pSrc, - q15_t offset, - q15_t * pDst, - uint32_t blockSize); - - -/** - * @brief Adds a constant offset to a Q31 vector. - * @param[in] pSrc points to the input vector - * @param[in] offset is the offset to be added - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ -void csi_offset_q31( - const q31_t * pSrc, - q31_t offset, - q31_t * pDst, - uint32_t blockSize); - - -/** - * @brief Negates the elements of a floating-point vector. - * @param[in] pSrc points to the input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ -void csi_negate_f32( - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - -/** - * @brief Negates the elements of a Q7 vector. - * @param[in] pSrc points to the input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ -void csi_negate_q7( - const q7_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - -/** - * @brief Negates the elements of a Q15 vector. - * @param[in] pSrc points to the input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ -void csi_negate_q15( - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - -/** - * @brief Negates the elements of a Q31 vector. - * @param[in] pSrc points to the input vector - * @param[out] pDst points to the output vector - * @param[in] blockSize number of samples in the vector - */ -void csi_negate_q31( - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - -/** - * @brief Copies the elements of a floating-point vector. - * @param[in] pSrc input pointer - * @param[out] pDst output pointer - * @param[in] blockSize number of samples to process - */ -void csi_copy_f32( - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - -/** - * @brief Copies the elements of a Q7 vector. - * @param[in] pSrc input pointer - * @param[out] pDst output pointer - * @param[in] blockSize number of samples to process - */ -void csi_copy_q7( - const q7_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - -/** - * @brief Copies the elements of a Q15 vector. - * @param[in] pSrc input pointer - * @param[out] pDst output pointer - * @param[in] blockSize number of samples to process - */ -void csi_copy_q15( - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - -/** - * @brief Copies the elements of a Q31 vector. - * @param[in] pSrc input pointer - * @param[out] pDst output pointer - * @param[in] blockSize number of samples to process - */ -void csi_copy_q31( - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - -/** - * @brief Fills a constant value into a floating-point vector. - * @param[in] value input value to be filled - * @param[out] pDst output pointer - * @param[in] blockSize number of samples to process - */ -void csi_fill_f32( - float32_t value, - float32_t * pDst, - uint32_t blockSize); - - -/** - * @brief Fills a constant value into a Q7 vector. - * @param[in] value input value to be filled - * @param[out] pDst output pointer - * @param[in] blockSize number of samples to process - */ -void csi_fill_q7( - q7_t value, - q7_t * pDst, - uint32_t blockSize); - - -/** - * @brief Fills a constant value into a Q15 vector. - * @param[in] value input value to be filled - * @param[out] pDst output pointer - * @param[in] blockSize number of samples to process - */ -void csi_fill_q15( - q15_t value, - q15_t * pDst, - uint32_t blockSize); - - -/** - * @brief Fills a constant value into a Q31 vector. - * @param[in] value input value to be filled - * @param[out] pDst output pointer - * @param[in] blockSize number of samples to process - */ -void csi_fill_q31( - q31_t value, - q31_t * pDst, - uint32_t blockSize); - - -/** - * @brief Convolution of floating-point sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the location where the output result is written. Length srcALen+srcBLen-1. - */ -void csi_conv_f32( - const float32_t * pSrcA, - uint32_t srcALen, - const float32_t * pSrcB, - uint32_t srcBLen, - float32_t * pDst); - - -/** - * @brief Convolution of Q15 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. - * @param[in] pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. - * @param[in] pScratch2 points to scratch buffer of size min(srcALen, srcBLen). - */ -void csi_conv_opt_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - q15_t * pScratch1, - q15_t * pScratch2); - - -/** - * @brief Convolution of Q15 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the location where the output result is written. Length srcALen+srcBLen-1. - */ -void csi_conv_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst); - - -/** - * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4 - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. - */ -void csi_conv_fast_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst); - - -/** - * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4 - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. - * @param[in] pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. - * @param[in] pScratch2 points to scratch buffer of size min(srcALen, srcBLen). - */ -void csi_conv_fast_opt_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - q15_t * pScratch1, - q15_t * pScratch2); - - -/** - * @brief Convolution of Q31 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. - */ -void csi_conv_q31( - const q31_t * pSrcA, - uint32_t srcALen, - const q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst); - - -/** - * @brief Convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4 - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. - */ -void csi_conv_fast_q31( - const q31_t * pSrcA, - uint32_t srcALen, - const q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst); - - -/** -* @brief Convolution of Q7 sequences. -* @param[in] pSrcA points to the first input sequence. -* @param[in] srcALen length of the first input sequence. -* @param[in] pSrcB points to the second input sequence. -* @param[in] srcBLen length of the second input sequence. -* @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. -* @param[in] pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. -* @param[in] pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen). -*/ -void csi_conv_opt_q7( - const q7_t * pSrcA, - uint32_t srcALen, - const q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst, - q15_t * pScratch1, - q15_t * pScratch2); - - -/** - * @brief Convolution of Q7 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length srcALen+srcBLen-1. - */ -void csi_conv_q7( - const q7_t * pSrcA, - uint32_t srcALen, - const q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst); - - -/** - * @brief Partial convolution of floating-point sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data - * @param[in] firstIndex is the first output sample to start with. - * @param[in] numPoints is the number of output points to be computed. - * @return Returns either CSI_MATH_SUCCESS if the function completed correctly or CSI_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. - */ -csi_status csi_conv_partial_f32( - const float32_t * pSrcA, - uint32_t srcALen, - const float32_t * pSrcB, - uint32_t srcBLen, - float32_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - -/** - * @brief Partial convolution of Q15 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data - * @param[in] firstIndex is the first output sample to start with. - * @param[in] numPoints is the number of output points to be computed. - * @param[in] pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. - * @param[in] pScratch2 points to scratch buffer of size min(srcALen, srcBLen). - * @return Returns either CSI_MATH_SUCCESS if the function completed correctly or CSI_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. - */ -csi_status csi_conv_partial_opt_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - uint32_t firstIndex, - uint32_t numPoints, - q15_t * pScratch1, - q15_t * pScratch2); - - -/** - * @brief Partial convolution of Q15 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data - * @param[in] firstIndex is the first output sample to start with. - * @param[in] numPoints is the number of output points to be computed. - * @return Returns either CSI_MATH_SUCCESS if the function completed correctly or CSI_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. - */ -csi_status csi_conv_partial_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - -/** - * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4 - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data - * @param[in] firstIndex is the first output sample to start with. - * @param[in] numPoints is the number of output points to be computed. - * @return Returns either CSI_MATH_SUCCESS if the function completed correctly or CSI_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. - */ -csi_status csi_conv_partial_fast_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - -/** - * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4 - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data - * @param[in] firstIndex is the first output sample to start with. - * @param[in] numPoints is the number of output points to be computed. - * @param[in] pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. - * @param[in] pScratch2 points to scratch buffer of size min(srcALen, srcBLen). - * @return Returns either CSI_MATH_SUCCESS if the function completed correctly or CSI_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. - */ -csi_status csi_conv_partial_fast_opt_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - uint32_t firstIndex, - uint32_t numPoints, - q15_t * pScratch1, - q15_t * pScratch2); - - -/** - * @brief Partial convolution of Q31 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data - * @param[in] firstIndex is the first output sample to start with. - * @param[in] numPoints is the number of output points to be computed. - * @return Returns either CSI_MATH_SUCCESS if the function completed correctly or CSI_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. - */ -csi_status csi_conv_partial_q31( - const q31_t * pSrcA, - uint32_t srcALen, - const q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - -/** - * @brief Partial convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4 - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data - * @param[in] firstIndex is the first output sample to start with. - * @param[in] numPoints is the number of output points to be computed. - * @return Returns either CSI_MATH_SUCCESS if the function completed correctly or CSI_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. - */ -csi_status csi_conv_partial_fast_q31( - const q31_t * pSrcA, - uint32_t srcALen, - const q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - -/** - * @brief Partial convolution of Q7 sequences - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data - * @param[in] firstIndex is the first output sample to start with. - * @param[in] numPoints is the number of output points to be computed. - * @param[in] pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. - * @param[in] pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen). - * @return Returns either CSI_MATH_SUCCESS if the function completed correctly or CSI_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. - */ -csi_status csi_conv_partial_opt_q7( - const q7_t * pSrcA, - uint32_t srcALen, - const q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst, - uint32_t firstIndex, - uint32_t numPoints, - q15_t * pScratch1, - q15_t * pScratch2); - - -/** - * @brief Partial convolution of Q7 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data - * @param[in] firstIndex is the first output sample to start with. - * @param[in] numPoints is the number of output points to be computed. - * @return Returns either CSI_MATH_SUCCESS if the function completed correctly or CSI_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2]. - */ -csi_status csi_conv_partial_q7( - const q7_t * pSrcA, - uint32_t srcALen, - const q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - - -/** - @brief Processing function for floating-point FIR decimator. - @param[in] S points to an instance of the floating-point FIR decimator structure - @param[in] pSrc points to the block of input data - @param[out] pDst points to the block of output data - @param[in] blockSize number of samples to process - */ -void csi_fir_decimate_f32( - const csi_fir_decimate_instance_f32 * S, - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - -/** - @brief Initialization function for the floating-point FIR decimator. - @param[in,out] S points to an instance of the floating-point FIR decimator structure - @param[in] numTaps number of coefficients in the filter - @param[in] M decimation factor - @param[in] pCoeffs points to the filter coefficients - @param[in] pState points to the state buffer - @param[in] blockSize number of input samples to process per call - @return execution status - - \ref CSI_MATH_SUCCESS : Operation successful - - \ref CSI_MATH_LENGTH_ERROR : blockSize is not a multiple of M - */ -csi_status csi_fir_decimate_init_f32( - csi_fir_decimate_instance_f32 * S, - uint16_t numTaps, - uint8_t M, - const float32_t * pCoeffs, - float32_t * pState, - uint32_t blockSize); - - -/** - * @brief Processing function for the Q15 FIR decimator. - * @param[in] S points to an instance of the Q15 FIR decimator structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of input samples to process per call. - */ -void csi_fir_decimate_q15( - const csi_fir_decimate_instance_q15 * S, - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - -/** - * @brief Processing function for the Q15 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4. - * @param[in] S points to an instance of the Q15 FIR decimator structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of input samples to process per call. - */ -void csi_fir_decimate_fast_q15( - const csi_fir_decimate_instance_q15 * S, - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - -/** - * @brief Initialization function for the Q15 FIR decimator. - * @param[in,out] S points to an instance of the Q15 FIR decimator structure. - * @param[in] numTaps number of coefficients in the filter. - * @param[in] M decimation factor. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] blockSize number of input samples to process per call. - * @return The function returns CSI_MATH_SUCCESS if initialization is successful or CSI_MATH_LENGTH_ERROR if - * blockSize is not a multiple of M. - */ -csi_status csi_fir_decimate_init_q15( - csi_fir_decimate_instance_q15 * S, - uint16_t numTaps, - uint8_t M, - const q15_t * pCoeffs, - q15_t * pState, - uint32_t blockSize); - - -/** - * @brief Processing function for the Q31 FIR decimator. - * @param[in] S points to an instance of the Q31 FIR decimator structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of input samples to process per call. - */ -void csi_fir_decimate_q31( - const csi_fir_decimate_instance_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - -/** - * @brief Processing function for the Q31 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4. - * @param[in] S points to an instance of the Q31 FIR decimator structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of input samples to process per call. - */ -void csi_fir_decimate_fast_q31( - const csi_fir_decimate_instance_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - -/** - * @brief Initialization function for the Q31 FIR decimator. - * @param[in,out] S points to an instance of the Q31 FIR decimator structure. - * @param[in] numTaps number of coefficients in the filter. - * @param[in] M decimation factor. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] blockSize number of input samples to process per call. - * @return The function returns CSI_MATH_SUCCESS if initialization is successful or CSI_MATH_LENGTH_ERROR if - * blockSize is not a multiple of M. - */ -csi_status csi_fir_decimate_init_q31( - csi_fir_decimate_instance_q31 * S, - uint16_t numTaps, - uint8_t M, - const q31_t * pCoeffs, - q31_t * pState, - uint32_t blockSize); - - - -/** - * @brief Processing function for the Q15 FIR interpolator. - * @param[in] S points to an instance of the Q15 FIR interpolator structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of input samples to process per call. - */ -void csi_fir_interpolate_q15( - const csi_fir_interpolate_instance_q15 * S, - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - -/** - * @brief Initialization function for the Q15 FIR interpolator. - * @param[in,out] S points to an instance of the Q15 FIR interpolator structure. - * @param[in] L upsample factor. - * @param[in] numTaps number of filter coefficients in the filter. - * @param[in] pCoeffs points to the filter coefficient buffer. - * @param[in] pState points to the state buffer. - * @param[in] blockSize number of input samples to process per call. - * @return The function returns CSI_MATH_SUCCESS if initialization is successful or CSI_MATH_LENGTH_ERROR if - * the filter length numTaps is not a multiple of the interpolation factor L. - */ -csi_status csi_fir_interpolate_init_q15( - csi_fir_interpolate_instance_q15 * S, - uint8_t L, - uint16_t numTaps, - const q15_t * pCoeffs, - q15_t * pState, - uint32_t blockSize); - - -/** - * @brief Processing function for the Q31 FIR interpolator. - * @param[in] S points to an instance of the Q15 FIR interpolator structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of input samples to process per call. - */ -void csi_fir_interpolate_q31( - const csi_fir_interpolate_instance_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - -/** - * @brief Initialization function for the Q31 FIR interpolator. - * @param[in,out] S points to an instance of the Q31 FIR interpolator structure. - * @param[in] L upsample factor. - * @param[in] numTaps number of filter coefficients in the filter. - * @param[in] pCoeffs points to the filter coefficient buffer. - * @param[in] pState points to the state buffer. - * @param[in] blockSize number of input samples to process per call. - * @return The function returns CSI_MATH_SUCCESS if initialization is successful or CSI_MATH_LENGTH_ERROR if - * the filter length numTaps is not a multiple of the interpolation factor L. - */ -csi_status csi_fir_interpolate_init_q31( - csi_fir_interpolate_instance_q31 * S, - uint8_t L, - uint16_t numTaps, - const q31_t * pCoeffs, - q31_t * pState, - uint32_t blockSize); - - -/** - * @brief Processing function for the floating-point FIR interpolator. - * @param[in] S points to an instance of the floating-point FIR interpolator structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of input samples to process per call. - */ -void csi_fir_interpolate_f32( - const csi_fir_interpolate_instance_f32 * S, - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - -/** - * @brief Initialization function for the floating-point FIR interpolator. - * @param[in,out] S points to an instance of the floating-point FIR interpolator structure. - * @param[in] L upsample factor. - * @param[in] numTaps number of filter coefficients in the filter. - * @param[in] pCoeffs points to the filter coefficient buffer. - * @param[in] pState points to the state buffer. - * @param[in] blockSize number of input samples to process per call. - * @return The function returns CSI_MATH_SUCCESS if initialization is successful or CSI_MATH_LENGTH_ERROR if - * the filter length numTaps is not a multiple of the interpolation factor L. - */ -csi_status csi_fir_interpolate_init_f32( - csi_fir_interpolate_instance_f32 * S, - uint8_t L, - uint16_t numTaps, - const float32_t * pCoeffs, - float32_t * pState, - uint32_t blockSize); - - -/** - * @param[in] S points to an instance of the high precision Q31 Biquad cascade filter structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of samples to process. - */ -void csi_biquad_cas_df1_32x64_q31( - const csi_biquad_cas_df1_32x64_ins_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - -/** - * @param[in,out] S points to an instance of the high precision Q31 Biquad cascade filter structure. - * @param[in] numStages number of 2nd order stages in the filter. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] postShift shift to be applied to the output. Varies according to the coefficients format - */ -void csi_biquad_cas_df1_32x64_init_q31( - csi_biquad_cas_df1_32x64_ins_q31 * S, - uint8_t numStages, - const q31_t * pCoeffs, - q63_t * pState, - uint8_t postShift); - - -/** - * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter. - * @param[in] S points to an instance of the filter data structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of samples to process. - */ -void csi_biquad_cascade_df2T_f32( - const csi_biquad_cascade_df2T_instance_f32 * S, - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - -/** - * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter. 2 channels - * @param[in] S points to an instance of the filter data structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of samples to process. - */ -void csi_biquad_cascade_stereo_df2T_f32( - const csi_biquad_cascade_stereo_df2T_instance_f32 * S, - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - -/** - * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter. - * @param[in] S points to an instance of the filter data structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of samples to process. - */ -void csi_biquad_cascade_df2T_f64( - const csi_biquad_cascade_df2T_instance_f64 * S, - float64_t * pSrc, - float64_t * pDst, - uint32_t blockSize); - - -#if defined(CSI_MATH_NEON) -void csi_biquad_cascade_df2T_compute_coefs_f32( - csi_biquad_cascade_df2T_instance_f32 * S, - uint8_t numStages, - float32_t * pCoeffs); -#endif -/** - * @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter. - * @param[in,out] S points to an instance of the filter data structure. - * @param[in] numStages number of 2nd order stages in the filter. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - */ -void csi_biquad_cascade_df2T_init_f32( - csi_biquad_cascade_df2T_instance_f32 * S, - uint8_t numStages, - const float32_t * pCoeffs, - float32_t * pState); - - -/** - * @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter. - * @param[in,out] S points to an instance of the filter data structure. - * @param[in] numStages number of 2nd order stages in the filter. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - */ -void csi_biquad_cascade_stereo_df2T_init_f32( - csi_biquad_cascade_stereo_df2T_instance_f32 * S, - uint8_t numStages, - const float32_t * pCoeffs, - float32_t * pState); - - -/** - * @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter. - * @param[in,out] S points to an instance of the filter data structure. - * @param[in] numStages number of 2nd order stages in the filter. - * @param[in] pCoeffs points to the filter coefficients. - * @param[in] pState points to the state buffer. - */ -void csi_biquad_cascade_df2T_init_f64( - csi_biquad_cascade_df2T_instance_f64 * S, - uint8_t numStages, - float64_t * pCoeffs, - float64_t * pState); - - -/** - * @brief Initialization function for the Q15 FIR lattice filter. - * @param[in] S points to an instance of the Q15 FIR lattice structure. - * @param[in] numStages number of filter stages. - * @param[in] pCoeffs points to the coefficient buffer. The array is of length numStages. - * @param[in] pState points to the state buffer. The array is of length numStages. - */ -void csi_fir_lattice_init_q15( - csi_fir_lattice_instance_q15 * S, - uint16_t numStages, - const q15_t * pCoeffs, - q15_t * pState); - - -/** - * @brief Processing function for the Q15 FIR lattice filter. - * @param[in] S points to an instance of the Q15 FIR lattice structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ -void csi_fir_lattice_q15( - const csi_fir_lattice_instance_q15 * S, - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - -/** - * @brief Initialization function for the Q31 FIR lattice filter. - * @param[in] S points to an instance of the Q31 FIR lattice structure. - * @param[in] numStages number of filter stages. - * @param[in] pCoeffs points to the coefficient buffer. The array is of length numStages. - * @param[in] pState points to the state buffer. The array is of length numStages. - */ -void csi_fir_lattice_init_q31( - csi_fir_lattice_instance_q31 * S, - uint16_t numStages, - const q31_t * pCoeffs, - q31_t * pState); - - -/** - * @brief Processing function for the Q31 FIR lattice filter. - * @param[in] S points to an instance of the Q31 FIR lattice structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of samples to process. - */ -void csi_fir_lattice_q31( - const csi_fir_lattice_instance_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - -/** - * @brief Initialization function for the floating-point FIR lattice filter. - * @param[in] S points to an instance of the floating-point FIR lattice structure. - * @param[in] numStages number of filter stages. - * @param[in] pCoeffs points to the coefficient buffer. The array is of length numStages. - * @param[in] pState points to the state buffer. The array is of length numStages. - */ -void csi_fir_lattice_init_f32( - csi_fir_lattice_instance_f32 * S, - uint16_t numStages, - const float32_t * pCoeffs, - float32_t * pState); - - -/** - * @brief Processing function for the floating-point FIR lattice filter. - * @param[in] S points to an instance of the floating-point FIR lattice structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of samples to process. - */ -void csi_fir_lattice_f32( - const csi_fir_lattice_instance_f32 * S, - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - -/** - * @brief Processing function for the floating-point IIR lattice filter. - * @param[in] S points to an instance of the floating-point IIR lattice structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ -void csi_iir_lattice_f32( - const csi_iir_lattice_instance_f32 * S, - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - -/** - * @brief Initialization function for the floating-point IIR lattice filter. - * @param[in] S points to an instance of the floating-point IIR lattice structure. - * @param[in] numStages number of stages in the filter. - * @param[in] pkCoeffs points to the reflection coefficient buffer. The array is of length numStages. - * @param[in] pvCoeffs points to the ladder coefficient buffer. The array is of length numStages+1. - * @param[in] pState points to the state buffer. The array is of length numStages+blockSize-1. - * @param[in] blockSize number of samples to process. - */ -void csi_iir_lattice_init_f32( - csi_iir_lattice_instance_f32 * S, - uint16_t numStages, - float32_t * pkCoeffs, - float32_t * pvCoeffs, - float32_t * pState, - uint32_t blockSize); - - -/** - * @brief Processing function for the Q31 IIR lattice filter. - * @param[in] S points to an instance of the Q31 IIR lattice structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ -void csi_iir_lattice_q31( - const csi_iir_lattice_instance_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - -/** - * @brief Initialization function for the Q31 IIR lattice filter. - * @param[in] S points to an instance of the Q31 IIR lattice structure. - * @param[in] numStages number of stages in the filter. - * @param[in] pkCoeffs points to the reflection coefficient buffer. The array is of length numStages. - * @param[in] pvCoeffs points to the ladder coefficient buffer. The array is of length numStages+1. - * @param[in] pState points to the state buffer. The array is of length numStages+blockSize. - * @param[in] blockSize number of samples to process. - */ -void csi_iir_lattice_init_q31( - csi_iir_lattice_instance_q31 * S, - uint16_t numStages, - q31_t * pkCoeffs, - q31_t * pvCoeffs, - q31_t * pState, - uint32_t blockSize); - - -/** - * @brief Processing function for the Q15 IIR lattice filter. - * @param[in] S points to an instance of the Q15 IIR lattice structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data. - * @param[in] blockSize number of samples to process. - */ -void csi_iir_lattice_q15( - const csi_iir_lattice_instance_q15 * S, - const q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - -/** - * @brief Initialization function for the Q15 IIR lattice filter. - * @param[in] S points to an instance of the fixed-point Q15 IIR lattice structure. - * @param[in] numStages number of stages in the filter. - * @param[in] pkCoeffs points to reflection coefficient buffer. The array is of length numStages. - * @param[in] pvCoeffs points to ladder coefficient buffer. The array is of length numStages+1. - * @param[in] pState points to state buffer. The array is of length numStages+blockSize. - * @param[in] blockSize number of samples to process per call. - */ -void csi_iir_lattice_init_q15( - csi_iir_lattice_instance_q15 * S, - uint16_t numStages, - q15_t * pkCoeffs, - q15_t * pvCoeffs, - q15_t * pState, - uint32_t blockSize); - - -/** - * @brief Processing function for floating-point LMS filter. - * @param[in] S points to an instance of the floating-point LMS filter structure. - * @param[in] pSrc points to the block of input data. - * @param[in] pRef points to the block of reference data. - * @param[out] pOut points to the block of output data. - * @param[out] pErr points to the block of error data. - * @param[in] blockSize number of samples to process. - */ -void csi_lms_f32( - const csi_lms_instance_f32 * S, - const float32_t * pSrc, - float32_t * pRef, - float32_t * pOut, - float32_t * pErr, - uint32_t blockSize); - - -/** - * @brief Initialization function for floating-point LMS filter. - * @param[in] S points to an instance of the floating-point LMS filter structure. - * @param[in] numTaps number of filter coefficients. - * @param[in] pCoeffs points to the coefficient buffer. - * @param[in] pState points to state buffer. - * @param[in] mu step size that controls filter coefficient updates. - * @param[in] blockSize number of samples to process. - */ -void csi_lms_init_f32( - csi_lms_instance_f32 * S, - uint16_t numTaps, - const float32_t * pCoeffs, - float32_t * pState, - float32_t mu, - uint32_t blockSize); - - -/** - * @brief Initialization function for the Q15 LMS filter. - * @param[in] S points to an instance of the Q15 LMS filter structure. - * @param[in] numTaps number of filter coefficients. - * @param[in] pCoeffs points to the coefficient buffer. - * @param[in] pState points to the state buffer. - * @param[in] mu step size that controls filter coefficient updates. - * @param[in] blockSize number of samples to process. - * @param[in] postShift bit shift applied to coefficients. - */ -void csi_lms_init_q15( - csi_lms_instance_q15 * S, - uint16_t numTaps, - q15_t * pCoeffs, - q15_t * pState, - q15_t mu, - uint32_t blockSize, - uint32_t postShift); - - -/** - * @brief Processing function for Q15 LMS filter. - * @param[in] S points to an instance of the Q15 LMS filter structure. - * @param[in] pSrc points to the block of input data. - * @param[in] pRef points to the block of reference data. - * @param[out] pOut points to the block of output data. - * @param[out] pErr points to the block of error data. - * @param[in] blockSize number of samples to process. - */ -void csi_lms_q15( - const csi_lms_instance_q15 * S, - const q15_t * pSrc, - q15_t * pRef, - q15_t * pOut, - q15_t * pErr, - uint32_t blockSize); - - -/** - * @brief Processing function for Q31 LMS filter. - * @param[in] S points to an instance of the Q15 LMS filter structure. - * @param[in] pSrc points to the block of input data. - * @param[in] pRef points to the block of reference data. - * @param[out] pOut points to the block of output data. - * @param[out] pErr points to the block of error data. - * @param[in] blockSize number of samples to process. - */ -void csi_lms_q31( - const csi_lms_instance_q31 * S, - const q31_t * pSrc, - q31_t * pRef, - q31_t * pOut, - q31_t * pErr, - uint32_t blockSize); - - -/** - * @brief Initialization function for Q31 LMS filter. - * @param[in] S points to an instance of the Q31 LMS filter structure. - * @param[in] numTaps number of filter coefficients. - * @param[in] pCoeffs points to coefficient buffer. - * @param[in] pState points to state buffer. - * @param[in] mu step size that controls filter coefficient updates. - * @param[in] blockSize number of samples to process. - * @param[in] postShift bit shift applied to coefficients. - */ -void csi_lms_init_q31( - csi_lms_instance_q31 * S, - uint16_t numTaps, - q31_t * pCoeffs, - q31_t * pState, - q31_t mu, - uint32_t blockSize, - uint32_t postShift); - - -/** - * @brief Processing function for floating-point normalized LMS filter. - * @param[in] S points to an instance of the floating-point normalized LMS filter structure. - * @param[in] pSrc points to the block of input data. - * @param[in] pRef points to the block of reference data. - * @param[out] pOut points to the block of output data. - * @param[out] pErr points to the block of error data. - * @param[in] blockSize number of samples to process. - */ -void csi_lms_norm_f32( - csi_lms_norm_instance_f32 * S, - const float32_t * pSrc, - float32_t * pRef, - float32_t * pOut, - float32_t * pErr, - uint32_t blockSize); - - -/** - * @brief Initialization function for floating-point normalized LMS filter. - * @param[in] S points to an instance of the floating-point LMS filter structure. - * @param[in] numTaps number of filter coefficients. - * @param[in] pCoeffs points to coefficient buffer. - * @param[in] pState points to state buffer. - * @param[in] mu step size that controls filter coefficient updates. - * @param[in] blockSize number of samples to process. - */ -void csi_lms_norm_init_f32( - csi_lms_norm_instance_f32 * S, - uint16_t numTaps, - const float32_t * pCoeffs, - float32_t * pState, - float32_t mu, - uint32_t blockSize); - - -/** - * @brief Processing function for Q31 normalized LMS filter. - * @param[in] S points to an instance of the Q31 normalized LMS filter structure. - * @param[in] pSrc points to the block of input data. - * @param[in] pRef points to the block of reference data. - * @param[out] pOut points to the block of output data. - * @param[out] pErr points to the block of error data. - * @param[in] blockSize number of samples to process. - */ -void csi_lms_norm_q31( - csi_lms_norm_instance_q31 * S, - const q31_t * pSrc, - q31_t * pRef, - q31_t * pOut, - q31_t * pErr, - uint32_t blockSize); - - -/** - * @brief Initialization function for Q31 normalized LMS filter. - * @param[in] S points to an instance of the Q31 normalized LMS filter structure. - * @param[in] numTaps number of filter coefficients. - * @param[in] pCoeffs points to coefficient buffer. - * @param[in] pState points to state buffer. - * @param[in] mu step size that controls filter coefficient updates. - * @param[in] blockSize number of samples to process. - * @param[in] postShift bit shift applied to coefficients. - */ -void csi_lms_norm_init_q31( - csi_lms_norm_instance_q31 * S, - uint16_t numTaps, - q31_t * pCoeffs, - q31_t * pState, - q31_t mu, - uint32_t blockSize, - uint8_t postShift); - - -/** - * @brief Processing function for Q15 normalized LMS filter. - * @param[in] S points to an instance of the Q15 normalized LMS filter structure. - * @param[in] pSrc points to the block of input data. - * @param[in] pRef points to the block of reference data. - * @param[out] pOut points to the block of output data. - * @param[out] pErr points to the block of error data. - * @param[in] blockSize number of samples to process. - */ -void csi_lms_norm_q15( - csi_lms_norm_instance_q15 * S, - const q15_t * pSrc, - q15_t * pRef, - q15_t * pOut, - q15_t * pErr, - uint32_t blockSize); - - -/** - * @brief Initialization function for Q15 normalized LMS filter. - * @param[in] S points to an instance of the Q15 normalized LMS filter structure. - * @param[in] numTaps number of filter coefficients. - * @param[in] pCoeffs points to coefficient buffer. - * @param[in] pState points to state buffer. - * @param[in] mu step size that controls filter coefficient updates. - * @param[in] blockSize number of samples to process. - * @param[in] postShift bit shift applied to coefficients. - */ -void csi_lms_norm_init_q15( - csi_lms_norm_instance_q15 * S, - uint16_t numTaps, - q15_t * pCoeffs, - q15_t * pState, - q15_t mu, - uint32_t blockSize, - uint8_t postShift); - - -/** - * @brief Correlation of floating-point sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. - */ -void csi_correlate_f32( - const float32_t * pSrcA, - uint32_t srcALen, - const float32_t * pSrcB, - uint32_t srcBLen, - float32_t * pDst); - - -/** - @brief Correlation of Q15 sequences - @param[in] pSrcA points to the first input sequence - @param[in] srcALen length of the first input sequence - @param[in] pSrcB points to the second input sequence - @param[in] srcBLen length of the second input sequence - @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. - @param[in] pScratch points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. -*/ -void csi_correlate_opt_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - q15_t * pScratch); - - -/** - @brief Correlation of Q15 sequences. - @param[in] pSrcA points to the first input sequence - @param[in] srcALen length of the first input sequence - @param[in] pSrcB points to the second input sequence - @param[in] srcBLen length of the second input sequence - @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. - */ -void csi_correlate_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst); - - -/** - @brief Correlation of Q15 sequences (fast version). - @param[in] pSrcA points to the first input sequence - @param[in] srcALen length of the first input sequence - @param[in] pSrcB points to the second input sequence - @param[in] srcBLen length of the second input sequence - @param[out] pDst points to the location where the output result is written. Length 2 * max(srcALen, srcBLen) - 1. - @return none - */ -void csi_correlate_fast_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst); - - -/** - @brief Correlation of Q15 sequences (fast version). - @param[in] pSrcA points to the first input sequence. - @param[in] srcALen length of the first input sequence. - @param[in] pSrcB points to the second input sequence. - @param[in] srcBLen length of the second input sequence. - @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. - @param[in] pScratch points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. - */ -void csi_correlate_fast_opt_q15( - const q15_t * pSrcA, - uint32_t srcALen, - const q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - q15_t * pScratch); - - -/** - * @brief Correlation of Q31 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. - */ -void csi_correlate_q31( - const q31_t * pSrcA, - uint32_t srcALen, - const q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst); - - -/** - @brief Correlation of Q31 sequences (fast version). - @param[in] pSrcA points to the first input sequence - @param[in] srcALen length of the first input sequence - @param[in] pSrcB points to the second input sequence - @param[in] srcBLen length of the second input sequence - @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. - */ -void csi_correlate_fast_q31( - const q31_t * pSrcA, - uint32_t srcALen, - const q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst); - - -/** - * @brief Correlation of Q7 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. - * @param[in] pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2. - * @param[in] pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen). - */ -void csi_correlate_opt_q7( - const q7_t * pSrcA, - uint32_t srcALen, - const q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst, - q15_t * pScratch1, - q15_t * pScratch2); - - -/** - * @brief Correlation of Q7 sequences. - * @param[in] pSrcA points to the first input sequence. - * @param[in] srcALen length of the first input sequence. - * @param[in] pSrcB points to the second input sequence. - * @param[in] srcBLen length of the second input sequence. - * @param[out] pDst points to the block of output data Length 2 * max(srcALen, srcBLen) - 1. - */ -void csi_correlate_q7( - const q7_t * pSrcA, - uint32_t srcALen, - const q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst); - -/** - * @brief Processing function for the floating-point sparse FIR filter. - * @param[in] S points to an instance of the floating-point sparse FIR structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] pScratchIn points to a temporary buffer of size blockSize. - * @param[in] blockSize number of input samples to process per call. - */ -void csi_fir_sparse_f32( - csi_fir_sparse_instance_f32 * S, - const float32_t * pSrc, - float32_t * pDst, - float32_t * pScratchIn, - uint32_t blockSize); - - -/** - * @brief Initialization function for the floating-point sparse FIR filter. - * @param[in,out] S points to an instance of the floating-point sparse FIR structure. - * @param[in] numTaps number of nonzero coefficients in the filter. - * @param[in] pCoeffs points to the array of filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] pTapDelay points to the array of offset times. - * @param[in] maxDelay maximum offset time supported. - * @param[in] blockSize number of samples that will be processed per block. - */ -void csi_fir_sparse_init_f32( - csi_fir_sparse_instance_f32 * S, - uint16_t numTaps, - const float32_t * pCoeffs, - float32_t * pState, - int32_t * pTapDelay, - uint16_t maxDelay, - uint32_t blockSize); - - -/** - * @brief Processing function for the Q31 sparse FIR filter. - * @param[in] S points to an instance of the Q31 sparse FIR structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] pScratchIn points to a temporary buffer of size blockSize. - * @param[in] blockSize number of input samples to process per call. - */ -void csi_fir_sparse_q31( - csi_fir_sparse_instance_q31 * S, - const q31_t * pSrc, - q31_t * pDst, - q31_t * pScratchIn, - uint32_t blockSize); - - -/** - * @brief Initialization function for the Q31 sparse FIR filter. - * @param[in,out] S points to an instance of the Q31 sparse FIR structure. - * @param[in] numTaps number of nonzero coefficients in the filter. - * @param[in] pCoeffs points to the array of filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] pTapDelay points to the array of offset times. - * @param[in] maxDelay maximum offset time supported. - * @param[in] blockSize number of samples that will be processed per block. - */ -void csi_fir_sparse_init_q31( - csi_fir_sparse_instance_q31 * S, - uint16_t numTaps, - const q31_t * pCoeffs, - q31_t * pState, - int32_t * pTapDelay, - uint16_t maxDelay, - uint32_t blockSize); - - -/** - * @brief Processing function for the Q15 sparse FIR filter. - * @param[in] S points to an instance of the Q15 sparse FIR structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] pScratchIn points to a temporary buffer of size blockSize. - * @param[in] pScratchOut points to a temporary buffer of size blockSize. - * @param[in] blockSize number of input samples to process per call. - */ -void csi_fir_sparse_q15( - csi_fir_sparse_instance_q15 * S, - const q15_t * pSrc, - q15_t * pDst, - q15_t * pScratchIn, - q31_t * pScratchOut, - uint32_t blockSize); - - -/** - * @brief Initialization function for the Q15 sparse FIR filter. - * @param[in,out] S points to an instance of the Q15 sparse FIR structure. - * @param[in] numTaps number of nonzero coefficients in the filter. - * @param[in] pCoeffs points to the array of filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] pTapDelay points to the array of offset times. - * @param[in] maxDelay maximum offset time supported. - * @param[in] blockSize number of samples that will be processed per block. - */ -void csi_fir_sparse_init_q15( - csi_fir_sparse_instance_q15 * S, - uint16_t numTaps, - const q15_t * pCoeffs, - q15_t * pState, - int32_t * pTapDelay, - uint16_t maxDelay, - uint32_t blockSize); - - -/** - * @brief Processing function for the Q7 sparse FIR filter. - * @param[in] S points to an instance of the Q7 sparse FIR structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] pScratchIn points to a temporary buffer of size blockSize. - * @param[in] pScratchOut points to a temporary buffer of size blockSize. - * @param[in] blockSize number of input samples to process per call. - */ -void csi_fir_sparse_q7( - csi_fir_sparse_instance_q7 * S, - const q7_t * pSrc, - q7_t * pDst, - q7_t * pScratchIn, - q31_t * pScratchOut, - uint32_t blockSize); - - -/** - * @brief Initialization function for the Q7 sparse FIR filter. - * @param[in,out] S points to an instance of the Q7 sparse FIR structure. - * @param[in] numTaps number of nonzero coefficients in the filter. - * @param[in] pCoeffs points to the array of filter coefficients. - * @param[in] pState points to the state buffer. - * @param[in] pTapDelay points to the array of offset times. - * @param[in] maxDelay maximum offset time supported. - * @param[in] blockSize number of samples that will be processed per block. - */ -void csi_fir_sparse_init_q7( - csi_fir_sparse_instance_q7 * S, - uint16_t numTaps, - const q7_t * pCoeffs, - q7_t * pState, - int32_t * pTapDelay, - uint16_t maxDelay, - uint32_t blockSize); - - -/** - * @brief Floating-point sin_cos function. - * @param[in] theta input value in degrees - * @param[out] pSinVal points to the processed sine output. - * @param[out] pCosVal points to the processed cos output. - */ -void csi_sin_cos_f32( - float32_t theta, - float32_t * pSinVal, - float32_t * pCosVal); - - -/** - * @brief Q31 sin_cos function. - * @param[in] theta scaled input value in degrees - * @param[out] pSinVal points to the processed sine output. - * @param[out] pCosVal points to the processed cosine output. - */ -void csi_sin_cos_q31( - q31_t theta, - q31_t * pSinVal, - q31_t * pCosVal); - - -/** - * @brief Floating-point complex conjugate. - * @param[in] pSrc points to the input vector - * @param[out] pDst points to the output vector - * @param[in] numSamples number of complex samples in each vector - */ -void csi_cmplx_conj_f32( - const float32_t * pSrc, - float32_t * pDst, - uint32_t numSamples); - -/** - * @brief Q31 complex conjugate. - * @param[in] pSrc points to the input vector - * @param[out] pDst points to the output vector - * @param[in] numSamples number of complex samples in each vector - */ -void csi_cmplx_conj_q31( - const q31_t * pSrc, - q31_t * pDst, - uint32_t numSamples); - - -/** - * @brief Q15 complex conjugate. - * @param[in] pSrc points to the input vector - * @param[out] pDst points to the output vector - * @param[in] numSamples number of complex samples in each vector - */ -void csi_cmplx_conj_q15( - const q15_t * pSrc, - q15_t * pDst, - uint32_t numSamples); - - -/** - * @brief Floating-point complex magnitude squared - * @param[in] pSrc points to the complex input vector - * @param[out] pDst points to the real output vector - * @param[in] numSamples number of complex samples in the input vector - */ -void csi_cmplx_mag_squared_f32( - const float32_t * pSrc, - float32_t * pDst, - uint32_t numSamples); - - -/** - * @brief Q31 complex magnitude squared - * @param[in] pSrc points to the complex input vector - * @param[out] pDst points to the real output vector - * @param[in] numSamples number of complex samples in the input vector - */ -void csi_cmplx_mag_squared_q31( - const q31_t * pSrc, - q31_t * pDst, - uint32_t numSamples); - -void csi_cmplx_mag_squared_q31_basic( - q31_t * pSrc, - q63_t * pDst, - uint32_t numSamples); - - -/** - * @brief Q15 complex magnitude squared - * @param[in] pSrc points to the complex input vector - * @param[out] pDst points to the real output vector - * @param[in] numSamples number of complex samples in the input vector - */ -void csi_cmplx_mag_squared_q15( - const q15_t * pSrc, - q15_t * pDst, - uint32_t numSamples); - - -/** - * @ingroup groupController - */ - -/** - * @defgroup PID PID Motor Control - * - * A Proportional Integral Derivative (PID) controller is a generic feedback control - * loop mechanism widely used in industrial control systems. - * A PID controller is the most commonly used type of feedback controller. - * - * This set of functions implements (PID) controllers - * for Q15, Q31, and floating-point data types. The functions operate on a single sample - * of data and each call to the function returns a single processed value. - * S points to an instance of the PID control data structure. in - * is the input sample value. The functions return the output value. - * - * \par Algorithm: - *
- *    y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]
- *    A0 = Kp + Ki + Kd
- *    A1 = (-Kp ) - (2 * Kd )
- *    A2 = Kd
- * 
- * - * \par - * where \c Kp is proportional constant, \c Ki is Integral constant and \c Kd is Derivative constant - * - * \par - * \image html PID.gif "Proportional Integral Derivative Controller" - * - * \par - * The PID controller calculates an "error" value as the difference between - * the measured output and the reference input. - * The controller attempts to minimize the error by adjusting the process control inputs. - * The proportional value determines the reaction to the current error, - * the integral value determines the reaction based on the sum of recent errors, - * and the derivative value determines the reaction based on the rate at which the error has been changing. - * - * \par Instance Structure - * The Gains A0, A1, A2 and state variables for a PID controller are stored together in an instance data structure. - * A separate instance structure must be defined for each PID Controller. - * There are separate instance structure declarations for each of the 3 supported data types. - * - * \par Reset Functions - * There is also an associated reset function for each data type which clears the state array. - * - * \par Initialization Functions - * There is also an associated initialization function for each data type. - * The initialization function performs the following operations: - * - Initializes the Gains A0, A1, A2 from Kp,Ki, Kd gains. - * - Zeros out the values in the state buffer. - * - * \par - * Instance structure cannot be placed into a const data section and it is recommended to use the initialization function. - * - * \par Fixed-Point Behavior - * Care must be taken when using the fixed-point versions of the PID Controller functions. - * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered. - * Refer to the function specific documentation below for usage guidelines. - */ - -/** - * @addtogroup PID - * @{ - */ - -/** - * @brief Process function for the floating-point PID Control. - * @param[in,out] S is an instance of the floating-point PID Control structure - * @param[in] in input sample to process - * @return processed output sample. - */ -__STATIC_FORCEINLINE float32_t csi_pid_f32( - csi_pid_instance_f32 * S, - float32_t in) -{ - float32_t out; - /* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2] */ - out = (S->A0 * in) + - (S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]); - /* Update state */ - S->state[1] = S->state[0]; - S->state[0] = in; - S->state[2] = out; - /* return to application */ - return (out); -} - -/** - @brief Process function for the Q31 PID Control. - @param[in,out] S points to an instance of the Q31 PID Control structure - @param[in] in input sample to process - @return processed output sample. - - \par Scaling and Overflow Behavior - The function is implemented using an internal 64-bit accumulator. - The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit. - Thus, if the accumulator result overflows it wraps around rather than clip. - In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions. - After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format. - */ -__STATIC_FORCEINLINE q31_t csi_pid_q31( - csi_pid_instance_q31 * S, - q31_t in) -{ - q63_t acc; - q31_t out; - /* acc = A0 * x[n] */ - acc = (q63_t) S->A0 * in; - /* acc += A1 * x[n-1] */ - acc += (q63_t) S->A1 * S->state[0]; - /* acc += A2 * x[n-2] */ - acc += (q63_t) S->A2 * S->state[1]; - /* convert output to 1.31 format to add y[n-1] */ - out = (q31_t) (acc >> 31U); - /* out += y[n-1] */ - out += S->state[2]; - /* Update state */ - S->state[1] = S->state[0]; - S->state[0] = in; - S->state[2] = out; - /* return to application */ - return (out); -} - - -/** - @brief Process function for the Q15 PID Control. - @param[in,out] S points to an instance of the Q15 PID Control structure - @param[in] in input sample to process - @return processed output sample. - - \par Scaling and Overflow Behavior - The function is implemented using a 64-bit internal accumulator. - Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result. - The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format. - There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved. - After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits. - Lastly, the accumulator is saturated to yield a result in 1.15 format. - */ -__STATIC_FORCEINLINE q15_t csi_pid_q15( - csi_pid_instance_q15 * S, - q15_t in) -{ - q63_t acc; - q15_t out; -#if defined (CSI_MATH_DSP) - /* Implementation of PID controller */ - /* acc = A0 * x[n] */ - acc = (q31_t) __SMUAD((uint32_t)S->A0, (uint32_t)in); - /* acc += A1 * x[n-1] + A2 * x[n-2] */ - acc = (q63_t)__SMLALD((uint32_t)S->A1, (uint32_t)read_q15x2 (S->state), (uint64_t)acc); -#else - /* acc = A0 * x[n] */ - acc = ((q31_t) S->A0) * in; - /* acc += A1 * x[n-1] + A2 * x[n-2] */ - acc += (q31_t) S->A1 * S->state[0]; - acc += (q31_t) S->A2 * S->state[1]; -#endif - /* acc += y[n-1] */ - acc += (q31_t) S->state[2] << 15; - /* saturate the output */ - out = (q15_t) (__SSAT((q31_t)(acc >> 15), 16)); - /* Update state */ - S->state[1] = S->state[0]; - S->state[0] = in; - S->state[2] = out; - /* return to application */ - return (out); -} - -/** - * @} end of PID group - */ - - -/** - * @brief Floating-point matrix inverse. - * @param[in] src points to the instance of the input floating-point matrix structure. - * @param[out] dst points to the instance of the output floating-point matrix structure. - * @return The function returns CSI_MATH_SIZE_MISMATCH, if the dimensions do not match. - * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status CSI_MATH_SINGULAR. - */ -csi_status csi_mat_inverse_f32( - const csi_matrix_instance_f32 * src, - csi_matrix_instance_f32 * dst); - - -/** - * @brief Floating-point matrix inverse. - * @param[in] src points to the instance of the input floating-point matrix structure. - * @param[out] dst points to the instance of the output floating-point matrix structure. - * @return The function returns CSI_MATH_SIZE_MISMATCH, if the dimensions do not match. - * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status CSI_MATH_SINGULAR. - */ -csi_status csi_mat_inverse_f64( - const csi_matrix_instance_f64 * src, - csi_matrix_instance_f64 * dst); - - - -/** - * @ingroup groupController - */ - -/** - * @defgroup clarke Vector Clarke Transform - * Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector. - * Generally the Clarke transform uses three-phase currents Ia, Ib and Ic to calculate currents - * in the two-phase orthogonal stator axis Ialpha and Ibeta. - * When Ialpha is superposed with Ia as shown in the figure below - * \image html clarke.gif Stator current space vector and its components in (a,b). - * and Ia + Ib + Ic = 0, in this condition Ialpha and Ibeta - * can be calculated using only Ia and Ib. - * - * The function operates on a single sample of data and each call to the function returns the processed output. - * The library provides separate functions for Q31 and floating-point data types. - * \par Algorithm - * \image html clarkeFormula.gif - * where Ia and Ib are the instantaneous stator phases and - * pIalpha and pIbeta are the two coordinates of time invariant vector. - * \par Fixed-Point Behavior - * Care must be taken when using the Q31 version of the Clarke transform. - * In particular, the overflow and saturation behavior of the accumulator used must be considered. - * Refer to the function specific documentation below for usage guidelines. - */ - -/** - * @addtogroup clarke - * @{ - */ - -/** - * - * @brief Floating-point Clarke transform - * @param[in] Ia input three-phase coordinate a - * @param[in] Ib input three-phase coordinate b - * @param[out] pIalpha points to output two-phase orthogonal vector axis alpha - * @param[out] pIbeta points to output two-phase orthogonal vector axis beta - * @return none - */ -__STATIC_FORCEINLINE void csi_clarke_f32( - float32_t Ia, - float32_t Ib, - float32_t * pIalpha, - float32_t * pIbeta) -{ - /* Calculate pIalpha using the equation, pIalpha = Ia */ - *pIalpha = Ia; - /* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */ - *pIbeta = ((float32_t) 0.57735026919 * Ia + (float32_t) 1.15470053838 * Ib); -} - - -/** - @brief Clarke transform for Q31 version - @param[in] Ia input three-phase coordinate a - @param[in] Ib input three-phase coordinate b - @param[out] pIalpha points to output two-phase orthogonal vector axis alpha - @param[out] pIbeta points to output two-phase orthogonal vector axis beta - @return none - - \par Scaling and Overflow Behavior - The function is implemented using an internal 32-bit accumulator. - The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format. - There is saturation on the addition, hence there is no risk of overflow. - */ -__STATIC_FORCEINLINE void csi_clarke_q31( - q31_t Ia, - q31_t Ib, - q31_t * pIalpha, - q31_t * pIbeta) -{ - q31_t product1, product2; /* Temporary variables used to store intermediate results */ - /* Calculating pIalpha from Ia by equation pIalpha = Ia */ - *pIalpha = Ia; - /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */ - product1 = (q31_t) (((q63_t) Ia * 0x24F34E8B) >> 30); - /* Intermediate product is calculated by (2/sqrt(3) * Ib) */ - product2 = (q31_t) (((q63_t) Ib * 0x49E69D16) >> 30); - /* pIbeta is calculated by adding the intermediate products */ - *pIbeta = __QADD(product1, product2); -} - -/** - * @} end of clarke group - */ - - -/** - * @ingroup groupController - */ - -/** - * @defgroup inv_clarke Vector Inverse Clarke Transform - * Inverse Clarke transform converts the two-coordinate time invariant vector into instantaneous stator phases. - * - * The function operates on a single sample of data and each call to the function returns the processed output. - * The library provides separate functions for Q31 and floating-point data types. - * \par Algorithm - * \image html clarkeInvFormula.gif - * where pIa and pIb are the instantaneous stator phases and - * Ialpha and Ibeta are the two coordinates of time invariant vector. - * \par Fixed-Point Behavior - * Care must be taken when using the Q31 version of the Clarke transform. - * In particular, the overflow and saturation behavior of the accumulator used must be considered. - * Refer to the function specific documentation below for usage guidelines. - */ - -/** - * @addtogroup inv_clarke - * @{ - */ - -/** -* @brief Floating-point Inverse Clarke transform -* @param[in] Ialpha input two-phase orthogonal vector axis alpha -* @param[in] Ibeta input two-phase orthogonal vector axis beta -* @param[out] pIa points to output three-phase coordinate a -* @param[out] pIb points to output three-phase coordinate b -* @return none -*/ -__STATIC_FORCEINLINE void csi_inv_clarke_f32( - float32_t Ialpha, - float32_t Ibeta, - float32_t * pIa, - float32_t * pIb) -{ - /* Calculating pIa from Ialpha by equation pIa = Ialpha */ - *pIa = Ialpha; - /* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */ - *pIb = -0.5f * Ialpha + 0.8660254039f * Ibeta; -} - - -/** - @brief Inverse Clarke transform for Q31 version - @param[in] Ialpha input two-phase orthogonal vector axis alpha - @param[in] Ibeta input two-phase orthogonal vector axis beta - @param[out] pIa points to output three-phase coordinate a - @param[out] pIb points to output three-phase coordinate b - @return none - - \par Scaling and Overflow Behavior - The function is implemented using an internal 32-bit accumulator. - The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format. - There is saturation on the subtraction, hence there is no risk of overflow. - */ -__STATIC_FORCEINLINE void csi_inv_clarke_q31( - q31_t Ialpha, - q31_t Ibeta, - q31_t * pIa, - q31_t * pIb) -{ - q31_t product1, product2; /* Temporary variables used to store intermediate results */ - /* Calculating pIa from Ialpha by equation pIa = Ialpha */ - *pIa = Ialpha; - /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */ - product1 = (q31_t) (((q63_t) (Ialpha) * (0x40000000)) >> 31); - /* Intermediate product is calculated by (1/sqrt(3) * pIb) */ - product2 = (q31_t) (((q63_t) (Ibeta) * (0x6ED9EBA1)) >> 31); - /* pIb is calculated by subtracting the products */ - *pIb = __QSUB(product2, product1); -} - -/** - * @} end of inv_clarke group - */ - - - -/** - * @ingroup groupController - */ - -/** - * @defgroup park Vector Park Transform - * - * Forward Park transform converts the input two-coordinate vector to flux and torque components. - * The Park transform can be used to realize the transformation of the Ialpha and the Ibeta currents - * from the stationary to the moving reference frame and control the spatial relationship between - * the stator vector current and rotor flux vector. - * If we consider the d axis aligned with the rotor flux, the diagram below shows the - * current vector and the relationship from the two reference frames: - * \image html park.gif "Stator current space vector and its component in (a,b) and in the d,q rotating reference frame" - * - * The function operates on a single sample of data and each call to the function returns the processed output. - * The library provides separate functions for Q31 and floating-point data types. - * \par Algorithm - * \image html parkFormula.gif - * where Ialpha and Ibeta are the stator vector components, - * pId and pIq are rotor vector components and cosVal and sinVal are the - * cosine and sine values of theta (rotor flux position). - * \par Fixed-Point Behavior - * Care must be taken when using the Q31 version of the Park transform. - * In particular, the overflow and saturation behavior of the accumulator used must be considered. - * Refer to the function specific documentation below for usage guidelines. - */ - -/** - * @addtogroup park - * @{ - */ - -/** - * @brief Floating-point Park transform - * @param[in] Ialpha input two-phase vector coordinate alpha - * @param[in] Ibeta input two-phase vector coordinate beta - * @param[out] pId points to output rotor reference frame d - * @param[out] pIq points to output rotor reference frame q - * @param[in] sinVal sine value of rotation angle theta - * @param[in] cosVal cosine value of rotation angle theta - * @return none - * - * The function implements the forward Park transform. - * - */ -__STATIC_FORCEINLINE void csi_park_f32( - float32_t Ialpha, - float32_t Ibeta, - float32_t * pId, - float32_t * pIq, - float32_t sinVal, - float32_t cosVal) -{ - /* Calculate pId using the equation, pId = Ialpha * cosVal + Ibeta * sinVal */ - *pId = Ialpha * cosVal + Ibeta * sinVal; - /* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */ - *pIq = -Ialpha * sinVal + Ibeta * cosVal; -} - - -/** - @brief Park transform for Q31 version - @param[in] Ialpha input two-phase vector coordinate alpha - @param[in] Ibeta input two-phase vector coordinate beta - @param[out] pId points to output rotor reference frame d - @param[out] pIq points to output rotor reference frame q - @param[in] sinVal sine value of rotation angle theta - @param[in] cosVal cosine value of rotation angle theta - @return none - - \par Scaling and Overflow Behavior - The function is implemented using an internal 32-bit accumulator. - The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format. - There is saturation on the addition and subtraction, hence there is no risk of overflow. - */ -__STATIC_FORCEINLINE void csi_park_q31( - q31_t Ialpha, - q31_t Ibeta, - q31_t * pId, - q31_t * pIq, - q31_t sinVal, - q31_t cosVal) -{ -#ifdef CSI_SIMD - asm volatile( - "rmul.s32.h t0, %0, %3\n\t" - "rmul.s32.h t1, %1, %2\n\t" - "add.s32.s t0, t0, t1\n\t" - "st.w t0, (%4, 0x0)\n\t" - "rmul.s32.h t0, %0, %2\n\t" - "rmul.s32.h t1, %1, %3\n\t" - "sub.s32.s t1, t1, t0\n\t" - "st.w t1, (%5, 0x0)\n\t" - ::"r"(Ialpha),"r"(Ibeta),"r"(sinVal),"r"(cosVal),"r"(pId),"r"(pIq) - :"t0","t1", "memory"); -#else - q31_t product1, product2; /* Temporary variables used to store intermediate results */ - q31_t product3, product4; /* Temporary variables used to store intermediate results */ - /* Intermediate product is calculated by (Ialpha * cosVal) */ - product1 = (q31_t) (((q63_t) (Ialpha) * (cosVal)) >> 31); - /* Intermediate product is calculated by (Ibeta * sinVal) */ - product2 = (q31_t) (((q63_t) (Ibeta) * (sinVal)) >> 31); - /* Intermediate product is calculated by (Ialpha * sinVal) */ - product3 = (q31_t) (((q63_t) (Ialpha) * (sinVal)) >> 31); - /* Intermediate product is calculated by (Ibeta * cosVal) */ - product4 = (q31_t) (((q63_t) (Ibeta) * (cosVal)) >> 31); - /* Calculate pId by adding the two intermediate products 1 and 2 */ - *pId = __QADD(product1, product2); - /* Calculate pIq by subtracting the two intermediate products 3 from 4 */ - *pIq = __QSUB(product4, product3); -#endif -} - -/** - * @} end of park group - */ - - -/** - * @ingroup groupController - */ - -/** - * @defgroup inv_park Vector Inverse Park transform - * Inverse Park transform converts the input flux and torque components to two-coordinate vector. - * - * The function operates on a single sample of data and each call to the function returns the processed output. - * The library provides separate functions for Q31 and floating-point data types. - * \par Algorithm - * \image html parkInvFormula.gif - * where pIalpha and pIbeta are the stator vector components, - * Id and Iq are rotor vector components and cosVal and sinVal are the - * cosine and sine values of theta (rotor flux position). - * \par Fixed-Point Behavior - * Care must be taken when using the Q31 version of the Park transform. - * In particular, the overflow and saturation behavior of the accumulator used must be considered. - * Refer to the function specific documentation below for usage guidelines. - */ - -/** - * @addtogroup inv_park - * @{ - */ - -/** -* @brief Floating-point Inverse Park transform -* @param[in] Id input coordinate of rotor reference frame d -* @param[in] Iq input coordinate of rotor reference frame q -* @param[out] pIalpha points to output two-phase orthogonal vector axis alpha -* @param[out] pIbeta points to output two-phase orthogonal vector axis beta -* @param[in] sinVal sine value of rotation angle theta -* @param[in] cosVal cosine value of rotation angle theta -* @return none -*/ -__STATIC_FORCEINLINE void csi_inv_park_f32( - float32_t Id, - float32_t Iq, - float32_t * pIalpha, - float32_t * pIbeta, - float32_t sinVal, - float32_t cosVal) -{ - /* Calculate pIalpha using the equation, pIalpha = Id * cosVal - Iq * sinVal */ - *pIalpha = Id * cosVal - Iq * sinVal; - /* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */ - *pIbeta = Id * sinVal + Iq * cosVal; -} - - -/** - @brief Inverse Park transform for Q31 version - @param[in] Id input coordinate of rotor reference frame d - @param[in] Iq input coordinate of rotor reference frame q - @param[out] pIalpha points to output two-phase orthogonal vector axis alpha - @param[out] pIbeta points to output two-phase orthogonal vector axis beta - @param[in] sinVal sine value of rotation angle theta - @param[in] cosVal cosine value of rotation angle theta - @return none - - @par Scaling and Overflow Behavior - The function is implemented using an internal 32-bit accumulator. - The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format. - There is saturation on the addition, hence there is no risk of overflow. - */ -__STATIC_FORCEINLINE void csi_inv_park_q31( - q31_t Id, - q31_t Iq, - q31_t * pIalpha, - q31_t * pIbeta, - q31_t sinVal, - q31_t cosVal) -{ -#ifdef CSI_SIMD - asm volatile( - "rmul.s32.h t0, %0, %3\n\t" - "rmul.s32.h t1, %1, %2\n\t" - "sub.s32.s t0, t0, t1\n\t" - "st.w t0, (%4, 0x0)\n\t" - "rmul.s32.h t0, %0, %2\n\t" - "rmul.s32.h t1, %1, %3\n\t" - "add.s32.s t0, t0, t1\n\t" - "st.w t0, (%5, 0x0)\n\t" - ::"r"(Id),"r"(Iq),"r"(sinVal),"r"(cosVal),"r"(pIalpha),"r"(pIbeta) - :"t0","t1", "memory"); -#else - q31_t product1, product2; /* Temporary variables used to store intermediate results */ - q31_t product3, product4; /* Temporary variables used to store intermediate results */ - /* Intermediate product is calculated by (Id * cosVal) */ - product1 = (q31_t) (((q63_t) (Id) * (cosVal)) >> 31); - /* Intermediate product is calculated by (Iq * sinVal) */ - product2 = (q31_t) (((q63_t) (Iq) * (sinVal)) >> 31); - /* Intermediate product is calculated by (Id * sinVal) */ - product3 = (q31_t) (((q63_t) (Id) * (sinVal)) >> 31); - /* Intermediate product is calculated by (Iq * cosVal) */ - product4 = (q31_t) (((q63_t) (Iq) * (cosVal)) >> 31); - /* Calculate pIalpha by using the two intermediate products 1 and 2 */ - *pIalpha = __QSUB(product1, product2); - /* Calculate pIbeta by using the two intermediate products 3 and 4 */ - *pIbeta = __QADD(product4, product3); -#endif -} - -/** - * @} end of Inverse park group - */ - - -/** - * @ingroup groupInterpolation - */ - -/** - * @defgroup LinearInterpolate Linear Interpolation - * - * Linear interpolation is a method of curve fitting using linear polynomials. - * Linear interpolation works by effectively drawing a straight line between two neighboring samples and returning the appropriate point along that line - * - * \par - * \image html LinearInterp.gif "Linear interpolation" - * - * \par - * A Linear Interpolate function calculates an output value(y), for the input(x) - * using linear interpolation of the input values x0, x1( nearest input values) and the output values y0 and y1(nearest output values) - * - * \par Algorithm: - *
- *       y = y0 + (x - x0) * ((y1 - y0)/(x1-x0))
- *       where x0, x1 are nearest values of input x
- *             y0, y1 are nearest values to output y
- * 
- * - * \par - * This set of functions implements Linear interpolation process - * for Q7, Q15, Q31, and floating-point data types. The functions operate on a single - * sample of data and each call to the function returns a single processed value. - * S points to an instance of the Linear Interpolate function data structure. - * x is the input sample value. The functions returns the output value. - * - * \par - * if x is outside of the table boundary, Linear interpolation returns first value of the table - * if x is below input range and returns last value of table if x is above range. - */ - -/** - * @addtogroup LinearInterpolate - * @{ - */ - -/** - * @brief Process function for the floating-point Linear Interpolation Function. - * @param[in,out] S is an instance of the floating-point Linear Interpolation structure - * @param[in] x input sample to process - * @return y processed output sample. - * - */ -__STATIC_FORCEINLINE float32_t csi_linear_interp_f32( - csi_linear_interp_instance_f32 * S, - float32_t x) -{ - float32_t y; - float32_t x0, x1; /* Nearest input values */ - float32_t y0, y1; /* Nearest output values */ - float32_t xSpacing = S->xSpacing; /* spacing between input values */ - int32_t i; /* Index variable */ - float32_t *pYData = S->pYData; /* pointer to output table */ - /* Calculation of index */ - i = (int32_t) ((x - S->x1) / xSpacing); - - if (i < 0) { - /* Iniatilize output for below specified range as least output value of table */ - y = pYData[0]; - - } else if ((uint32_t)i >= (S->nValues - 1)) { - /* Iniatilize output for above specified range as last output value of table */ - y = pYData[S->nValues - 1]; - - } else { - /* Calculation of nearest input values */ - x0 = S->x1 + i * xSpacing; - x1 = S->x1 + (i + 1) * xSpacing; - /* Read of nearest output values */ - y0 = pYData[i]; - y1 = pYData[i + 1]; - /* Calculation of output */ - y = y0 + (x - x0) * ((y1 - y0) / (x1 - x0)); - } - - /* returns output value */ - return (y); -} - - -/** -* -* @brief Process function for the Q31 Linear Interpolation Function. -* @param[in] pYData pointer to Q31 Linear Interpolation table -* @param[in] x input sample to process -* @param[in] nValues number of table values -* @return y processed output sample. -* -* \par -* Input sample x is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part. -* This function can support maximum of table size 2^12. -* -*/ -__STATIC_FORCEINLINE q31_t csi_linear_interp_q31( - q31_t * pYData, - q31_t x, - uint32_t nValues) -{ - q31_t y; /* output */ - q31_t y0, y1; /* Nearest output values */ - q31_t fract; /* fractional part */ - int32_t index; /* Index to read nearest output values */ - /* Input is in 12.20 format */ - /* 12 bits for the table index */ - /* Index value calculation */ - index = ((x & (q31_t)0xFFF00000) >> 20); - - if (index >= (int32_t)(nValues - 1)) { - return (pYData[nValues - 1]); - - } else if (index < 0) { - return (pYData[0]); - - } else { - /* 20 bits for the fractional part */ - /* shift left by 11 to keep fract in 1.31 format */ - fract = (x & 0x000FFFFF) << 11; - /* Read two nearest output values from the index in 1.31(q31) format */ - y0 = pYData[index]; - y1 = pYData[index + 1]; - /* Calculation of y0 * (1-fract) and y is in 2.30 format */ - y = ((q31_t) ((q63_t) y0 * (0x7FFFFFFF - fract) >> 32)); - /* Calculation of y0 * (1-fract) + y1 *fract and y is in 2.30 format */ - y += ((q31_t) (((q63_t) y1 * fract) >> 32)); - /* Convert y to 1.31 format */ - return (y << 1U); - } -} - - -/** - * - * @brief Process function for the Q15 Linear Interpolation Function. - * @param[in] pYData pointer to Q15 Linear Interpolation table - * @param[in] x input sample to process - * @param[in] nValues number of table values - * @return y processed output sample. - * - * \par - * Input sample x is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part. - * This function can support maximum of table size 2^12. - * - */ -__STATIC_FORCEINLINE q15_t csi_linear_interp_q15( - q15_t * pYData, - q31_t x, - uint32_t nValues) -{ - q63_t y; /* output */ - q15_t y0, y1; /* Nearest output values */ - q31_t fract; /* fractional part */ - int32_t index; /* Index to read nearest output values */ - /* Input is in 12.20 format */ - /* 12 bits for the table index */ - /* Index value calculation */ - index = ((x & (int32_t)0xFFF00000) >> 20); - - if (index >= (int32_t)(nValues - 1)) { - return (pYData[nValues - 1]); - - } else if (index < 0) { - return (pYData[0]); - - } else { - /* 20 bits for the fractional part */ - /* fract is in 12.20 format */ - fract = (x & 0x000FFFFF); - /* Read two nearest output values from the index */ - y0 = pYData[index]; - y1 = pYData[index + 1]; - /* Calculation of y0 * (1-fract) and y is in 13.35 format */ - y = ((q63_t) y0 * (0xFFFFF - fract)); - /* Calculation of (y0 * (1-fract) + y1 * fract) and y is in 13.35 format */ - y += ((q63_t) y1 * (fract)); - /* convert y to 1.15 format */ - return (q15_t) (y >> 20); - } -} - - -/** - * - * @brief Process function for the Q7 Linear Interpolation Function. - * @param[in] pYData pointer to Q7 Linear Interpolation table - * @param[in] x input sample to process - * @param[in] nValues number of table values - * @return y processed output sample. - * - * \par - * Input sample x is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part. - * This function can support maximum of table size 2^12. - */ -__STATIC_FORCEINLINE q7_t csi_linear_interp_q7( - q7_t * pYData, - q31_t x, - uint32_t nValues) -{ - q31_t y; /* output */ - q7_t y0, y1; /* Nearest output values */ - q31_t fract; /* fractional part */ - uint32_t index; /* Index to read nearest output values */ - - /* Input is in 12.20 format */ - /* 12 bits for the table index */ - /* Index value calculation */ - if (x < 0) { - return (pYData[0]); - } - - index = (x >> 20) & 0xfff; - - if (index >= (nValues - 1)) { - return (pYData[nValues - 1]); - - } else { - /* 20 bits for the fractional part */ - /* fract is in 12.20 format */ - fract = (x & 0x000FFFFF); - /* Read two nearest output values from the index and are in 1.7(q7) format */ - y0 = pYData[index]; - y1 = pYData[index + 1]; - /* Calculation of y0 * (1-fract ) and y is in 13.27(q27) format */ - y = ((y0 * (0xFFFFF - fract))); - /* Calculation of y1 * fract + y0 * (1-fract) and y is in 13.27(q27) format */ - y += (y1 * fract); - /* convert y to 1.7(q7) format */ - return (q7_t) (y >> 20); - } -} - -/** - * @} end of LinearInterpolate group - */ - -/** - * @brief Fast approximation to the trigonometric sine function for floating-point data. - * @param[in] x input value in radians. - * @return sin(x). - */ -float32_t csi_sin_f32( - float32_t x); - - -/** - * @brief Fast approximation to the trigonometric sine function for Q31 data. - * @param[in] x Scaled input value in radians. - * @return sin(x). - */ -q31_t csi_sin_q31( - q31_t x); - - -/** - * @brief Fast approximation to the trigonometric sine function for Q15 data. - * @param[in] x Scaled input value in radians. - * @return sin(x). - */ -q15_t csi_sin_q15( - q15_t x); - - -/** - * @brief Fast approximation to the trigonometric cosine function for floating-point data. - * @param[in] x input value in radians. - * @return cos(x). - */ -float32_t csi_cos_f32( - float32_t x); - - -/** - * @brief Fast approximation to the trigonometric cosine function for Q31 data. - * @param[in] x Scaled input value in radians. - * @return cos(x). - */ -q31_t csi_cos_q31( - q31_t x); - - -/** - * @brief Fast approximation to the trigonometric cosine function for Q15 data. - * @param[in] x Scaled input value in radians. - * @return cos(x). - */ -q15_t csi_cos_q15( - q15_t x); - - -/** - @brief Floating-point vector of log values. - @param[in] pSrc points to the input vector - @param[out] pDst points to the output vector - @param[in] blockSize number of samples in each vector - @return none - */ -void csi_vlog_f32( - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - -/** - @brief Floating-point vector of exp values. - @param[in] pSrc points to the input vector - @param[out] pDst points to the output vector - @param[in] blockSize number of samples in each vector - @return none - */ -void csi_vexp_f32( - const float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - -/** - * @ingroup groupFastMath - */ - - -/** - * @defgroup SQRT Square Root - * - * Computes the square root of a number. - * There are separate functions for Q15, Q31, and floating-point data types. - * The square root function is computed using the Newton-Raphson algorithm. - * This is an iterative algorithm of the form: - *
- *      x1 = x0 - f(x0)/f'(x0)
- * 
- * where x1 is the current estimate, - * x0 is the previous estimate, and - * f'(x0) is the derivative of f() evaluated at x0. - * For the square root function, the algorithm reduces to: - *
- *     x0 = in/2                         [initial guess]
- *     x1 = 1/2 * ( x0 + in / x0)        [each iteration]
- * 
- */ - - -/** - * @addtogroup SQRT - * @{ - */ - -/** - @brief Q15 square root function. - @param[in] in input value. The range of the input value is [0 +1) or 0x0000 to 0x7FFF - @param[out] pOut points to square root of input value - @return execution status - - \ref CSI_MATH_SUCCESS : input value is positive - - \ref CSI_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0 - */ -csi_status csi_sqrt_q15( - q15_t in, - q15_t * pOut); - -/** - @brief Floating-point square root function. - @param[in] in input value - @param[out] pOut square root of input value - @return execution status - - \ref CSI_MATH_SUCCESS : input value is positive - - \ref CSI_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0 - */ -#ifdef __riscv -__STATIC_FORCEINLINE csi_status csi_sqrt_f32( - float32_t in, - float32_t * pOut) -{ - if (in >= 0.0f) { -#ifdef CSI_NEWTON_SQRTF - float32_t eps = 0.000000011; - float32_t val = in / 2; - float32_t last; - - if (in <= eps) { - *pOut = 0.0f; - } else { - do { - last = val; - val = (val + in / val) / 2; - } while (fabsf(val - last) > eps); - *pOut = val; - } -#else - *pOut = sqrtf(in); -#endif - return (CSI_MATH_SUCCESS); - } else { - *pOut = 0.0f; - return (CSI_MATH_ARGUMENT_ERROR); - } -} -#else -csi_status csi_sqrt_f32( - float32_t in, - float32_t * pOut); -#endif - - -/** - @brief Q31 square root function. - @param[in] in input value. The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF - @param[out] pOut points to square root of input value - @return execution status - - \ref CSI_MATH_SUCCESS : input value is positive - - \ref CSI_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0 - */ -csi_status csi_sqrt_q31( - q31_t in, - q31_t * pOut); - -/** - * @brief Vector Floating-point square root function. - * @param[in] pIn input vector. - * @param[out] pOut vector of square roots of input elements. - * @param[in] len length of input vector. - * @return The function returns CSI_MATH_SUCCESS if input value is positive value or CSI_MATH_ARGUMENT_ERROR if - * in is negative value and returns zero output for negative values. - */ -#ifdef __csky__ - -void csi_vsqrt_f32( - float32_t * pIn, - float32_t * pOut, - uint16_t len); - - void csi_vsqrt_q15( - q15_t * pIn, - q15_t * pOut, - uint16_t len); - -void csi_vsqrt_q31( - q31_t * pIn, - q31_t * pOut, - uint16_t len); - -void csi_vsqrt_q7( - q7_t * pIn, - q7_t * pOut, - uint16_t len); - - -#else -__STATIC_FORCEINLINE void csi_vsqrt_f32( - float32_t * pIn, - float32_t * pOut, - uint16_t len) -{ - for (int i = 0; i < len; i++) { - csi_sqrt_f32(pIn[i], pOut + i); - } -} - -__STATIC_FORCEINLINE void csi_vsqrt_q15( - q15_t * pIn, - q15_t * pOut, - uint16_t len -) -{ - for (int i = 0; i < len; i++) { - csi_sqrt_q15(pIn[i], pOut + i); - } -} -__STATIC_FORCEINLINE void csi_vsqrt_q31( - q31_t * pIn, - q31_t * pOut, - uint16_t len -) -{ - for (int i = 0; i < len; i++) { - csi_sqrt_q31(pIn[i], pOut + i); - } -} -#endif -/** - * @} end of SQRT group - */ - -/** - * @brief floating-point Circular write function. - a*/ -#ifndef __csky__ -__STATIC_FORCEINLINE void csi_circularWrite_f32( - int32_t * circBuffer, - int32_t L, - uint16_t * writeOffset, - int32_t bufferInc, - const int32_t * src, - int32_t srcInc, - uint32_t blockSize) -{ - uint32_t i = 0U; - int32_t wOffset; - /* Copy the value of Index pointer that points - * to the current location where the input samples to be copied */ - wOffset = *writeOffset; - /* Loop over the blockSize */ - i = blockSize; - - while (i > 0U) { - /* copy the input sample to the circular buffer */ - circBuffer[wOffset] = *src; - /* Update the input pointer */ - src += srcInc; - /* Circularly update wOffset. Watch out for positive and negative value */ - wOffset += bufferInc; - - if (wOffset >= L) - wOffset -= L; - - /* Decrement the loop counter */ - i--; - } - - /* Update the index pointer */ - *writeOffset = (uint16_t)wOffset; -} - - - -/** - * @brief floating-point Circular Read function. - */ -__STATIC_FORCEINLINE void csi_circularRead_f32( - int32_t * circBuffer, - int32_t L, - int32_t * readOffset, - int32_t bufferInc, - int32_t * dst, - int32_t * dst_base, - int32_t dst_length, - int32_t dstInc, - uint32_t blockSize) -{ - uint32_t i = 0U; - int32_t rOffset; - int32_t* dst_end; - /* Copy the value of Index pointer that points - * to the current location from where the input samples to be read */ - rOffset = *readOffset; - dst_end = dst_base + dst_length; - /* Loop over the blockSize */ - i = blockSize; - - while (i > 0U) { - /* copy the sample from the circular buffer to the destination buffer */ - *dst = circBuffer[rOffset]; - /* Update the input pointer */ - dst += dstInc; - - if (dst == dst_end) { - dst = dst_base; - } - - /* Circularly update rOffset. Watch out for positive and negative value */ - rOffset += bufferInc; - - if (rOffset >= L) { - rOffset -= L; - } - - /* Decrement the loop counter */ - i--; - } - - /* Update the index pointer */ - *readOffset = rOffset; -} -#endif -/** - * @brief Sum of the squares of the elements of a Q31 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ -void csi_power_q31( - const q31_t * pSrc, - uint32_t blockSize, - q63_t * pResult); - -void csi_power_int32( - int32_t * pSrc, - uint32_t blockSize, - q63_t * pResult); - - -/** - * @brief Sum of the squares of the elements of a floating-point vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ -void csi_power_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult); - - -/** - * @brief Sum of the squares of the elements of a Q15 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ -void csi_power_q15( - const q15_t * pSrc, - uint32_t blockSize, - q63_t * pResult); - - -/** - * @brief Sum of the squares of the elements of a Q7 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ -void csi_power_q7( - const q7_t * pSrc, - uint32_t blockSize, - q31_t * pResult); - - -/** - * @brief Mean value of a Q7 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ -void csi_mean_q7( - const q7_t * pSrc, - uint32_t blockSize, - q7_t * pResult); - - -/** - * @brief Mean value of a Q15 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ -void csi_mean_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult); - - -/** - * @brief Mean value of a Q31 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ -void csi_mean_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult); - - -/** - * @brief Mean value of a floating-point vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ -void csi_mean_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult); - - -/** - * @brief Variance of the elements of a floating-point vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ -void csi_var_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult); - - -/** - * @brief Variance of the elements of a Q31 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ -void csi_var_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult); - - -/** - * @brief Variance of the elements of a Q15 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ -void csi_var_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult); - - -/** - * @brief Root Mean Square of the elements of a floating-point vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ -void csi_rms_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult); - - -/** - * @brief Root Mean Square of the elements of a Q31 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ -void csi_rms_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult); - - -/** - * @brief Root Mean Square of the elements of a Q15 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ -void csi_rms_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult); - - -/** - * @brief Standard deviation of the elements of a floating-point vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ -void csi_std_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult); - - -/** - * @brief Standard deviation of the elements of a Q31 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ -void csi_std_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult); - - -/** - * @brief Standard deviation of the elements of a Q15 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output value. - */ -void csi_std_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult); - - -/** - * @brief Floating-point complex magnitude - * @param[in] pSrc points to the complex input vector - * @param[out] pDst points to the real output vector - * @param[in] numSamples number of complex samples in the input vector - */ -void csi_cmplx_mag_f32( - const float32_t * pSrc, - float32_t * pDst, - uint32_t numSamples); - - -/** - * @brief Q31 complex magnitude - * @param[in] pSrc points to the complex input vector - * @param[out] pDst points to the real output vector - * @param[in] numSamples number of complex samples in the input vector - */ -void csi_cmplx_mag_q31( - const q31_t * pSrc, - q31_t * pDst, - uint32_t numSamples); - - -/** - * @brief Q15 complex magnitude - * @param[in] pSrc points to the complex input vector - * @param[out] pDst points to the real output vector - * @param[in] numSamples number of complex samples in the input vector - */ -void csi_cmplx_mag_q15( - const q15_t * pSrc, - q15_t * pDst, - uint32_t numSamples); - - -/** - * @brief Q15 complex dot product - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[in] numSamples number of complex samples in each vector - * @param[out] realResult real part of the result returned here - * @param[out] imagResult imaginary part of the result returned here - */ -void csi_cmplx_dot_prod_q15( - const q15_t * pSrcA, - const q15_t * pSrcB, - uint32_t numSamples, - q31_t * realResult, - q31_t * imagResult); - - -/** - * @brief Q31 complex dot product - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[in] numSamples number of complex samples in each vector - * @param[out] realResult real part of the result returned here - * @param[out] imagResult imaginary part of the result returned here - */ -void csi_cmplx_dot_prod_q31( - const q31_t * pSrcA, - const q31_t * pSrcB, - uint32_t numSamples, - q63_t * realResult, - q63_t * imagResult); - - void csi_dot_prod_u64xu8( - uint8_t * pSrcA, - uint64_t * pSrcB, - uint32_t blockSize, - uint64_t * result); - -/** - * @brief Floating-point complex dot product - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[in] numSamples number of complex samples in each vector - * @param[out] realResult real part of the result returned here - * @param[out] imagResult imaginary part of the result returned here - */ -void csi_cmplx_dot_prod_f32( - const float32_t * pSrcA, - const float32_t * pSrcB, - uint32_t numSamples, - float32_t * realResult, - float32_t * imagResult); - - -/** - * @brief Q15 complex-by-real multiplication - * @param[in] pSrcCmplx points to the complex input vector - * @param[in] pSrcReal points to the real input vector - * @param[out] pCmplxDst points to the complex output vector - * @param[in] numSamples number of samples in each vector - */ -void csi_cmplx_mult_real_q15( - const q15_t * pSrcCmplx, - const q15_t * pSrcReal, - q15_t * pCmplxDst, - uint32_t numSamples); - - -/** - * @brief Q31 complex-by-real multiplication - * @param[in] pSrcCmplx points to the complex input vector - * @param[in] pSrcReal points to the real input vector - * @param[out] pCmplxDst points to the complex output vector - * @param[in] numSamples number of samples in each vector - */ -void csi_cmplx_mult_real_q31( - const q31_t * pSrcCmplx, - const q31_t * pSrcReal, - q31_t * pCmplxDst, - uint32_t numSamples); - - -/** - * @brief Floating-point complex-by-real multiplication - * @param[in] pSrcCmplx points to the complex input vector - * @param[in] pSrcReal points to the real input vector - * @param[out] pCmplxDst points to the complex output vector - * @param[in] numSamples number of samples in each vector - */ -void csi_cmplx_mult_real_f32( - const float32_t * pSrcCmplx, - const float32_t * pSrcReal, - float32_t * pCmplxDst, - uint32_t numSamples); - - -/** - * @brief Minimum value of a Q7 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] result is output pointer - * @param[in] index is the array index of the minimum value in the input buffer. - */ -void csi_min_q7( - const q7_t * pSrc, - uint16_t blockSize, - q7_t * result, - uint16_t * index); - - -/** - * @brief Minimum value of a Q15 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output pointer - * @param[in] pIndex is the array index of the minimum value in the input buffer. - */ -void csi_min_q15( - const q15_t * pSrc, - uint16_t blockSize, - q15_t * pResult, - uint16_t * pIndex); - - -/** - * @brief Minimum value of a Q31 vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output pointer - * @param[out] pIndex is the array index of the minimum value in the input buffer. - */ -void csi_min_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult, - uint32_t * pIndex); - - -/** - * @brief Minimum value of a floating-point vector. - * @param[in] pSrc is input pointer - * @param[in] blockSize is the number of samples to process - * @param[out] pResult is output pointer - * @param[out] pIndex is the array index of the minimum value in the input buffer. - */ -void csi_min_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult, - uint32_t * pIndex); - - -/** - * @brief Maximum value of a Q7 vector. - * @param[in] pSrc points to the input buffer - * @param[in] blockSize length of the input vector - * @param[out] pResult maximum value returned here - * @param[out] pIndex index of maximum value returned here - */ -void csi_max_q7( - const q7_t * pSrc, - uint16_t blockSize, - q7_t * pResult, - uint16_t * pIndex); - - -/** - * @brief Maximum value of a Q15 vector. - * @param[in] pSrc points to the input buffer - * @param[in] blockSize length of the input vector - * @param[out] pResult maximum value returned here - * @param[out] pIndex index of maximum value returned here - */ -void csi_max_q15( - const q15_t * pSrc, - uint16_t blockSize, - q15_t * pResult, - uint16_t * pIndex); - - -/** - * @brief Maximum value of a Q31 vector. - * @param[in] pSrc points to the input buffer - * @param[in] blockSize length of the input vector - * @param[out] pResult maximum value returned here - * @param[out] pIndex index of maximum value returned here - */ -void csi_max_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult, - uint32_t * pIndex); - - -/** - * @brief Maximum value of a floating-point vector. - * @param[in] pSrc points to the input buffer - * @param[in] blockSize length of the input vector - * @param[out] pResult maximum value returned here - * @param[out] pIndex index of maximum value returned here - */ -void csi_max_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult, - uint32_t * pIndex); - -/** - @brief Maximum value of a floating-point vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult maximum value returned here - @return none - */ -void csi_max_no_idx_f32( - const float32_t *pSrc, - uint32_t blockSize, - float32_t *pResult); - -/** - * @brief Q15 complex-by-complex multiplication - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] numSamples number of complex samples in each vector - */ -void csi_cmplx_mult_cmplx_q15( - const q15_t * pSrcA, - const q15_t * pSrcB, - q15_t * pDst, - uint32_t numSamples); - - -/** - * @brief Q31 complex-by-complex multiplication - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] numSamples number of complex samples in each vector - */ -void csi_cmplx_mult_cmplx_q31( - const q31_t * pSrcA, - const q31_t * pSrcB, - q31_t * pDst, - uint32_t numSamples); - - -/** - * @brief Floating-point complex-by-complex multiplication - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[out] pDst points to the output vector - * @param[in] numSamples number of complex samples in each vector - */ -void csi_cmplx_mult_cmplx_f32( - const float32_t * pSrcA, - const float32_t * pSrcB, - float32_t * pDst, - uint32_t numSamples); - -void csi_cmplx_mult_cmplx_re_f32( - const float32_t * pSrcA, - const float32_t * pSrcB, - float32_t * pDst, - uint32_t numSamples); - - -/** - * @brief Converts the elements of the floating-point vector to Q31 vector. - * @param[in] pSrc points to the floating-point input vector - * @param[out] pDst points to the Q31 output vector - * @param[in] blockSize length of the input vector - */ -void csi_float_to_q31( - const float32_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - -/** - * @brief Converts the elements of the floating-point vector to Q15 vector. - * @param[in] pSrc points to the floating-point input vector - * @param[out] pDst points to the Q15 output vector - * @param[in] blockSize length of the input vector - */ -void csi_float_to_q15( - const float32_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - -/** - * @brief Converts the elements of the floating-point vector to Q7 vector. - * @param[in] pSrc points to the floating-point input vector - * @param[out] pDst points to the Q7 output vector - * @param[in] blockSize length of the input vector - */ -void csi_float_to_q7( - const float32_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - -/** - * @brief Converts the elements of the Q31 vector to floating-point vector. - * @param[in] pSrc is input pointer - * @param[out] pDst is output pointer - * @param[in] blockSize is the number of samples to process - */ -void csi_q31_to_float( - const q31_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - -/** - * @brief Converts the elements of the Q31 vector to Q15 vector. - * @param[in] pSrc is input pointer - * @param[out] pDst is output pointer - * @param[in] blockSize is the number of samples to process - */ -void csi_q31_to_q15( - const q31_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - -void csi_q31_to_q7_rs( - q31_t * pSrc, - q7_t * pDst, - uint32_t shiftValue, - uint32_t blockSize); - -void csi_q63_to_q31_rs( - q63_t * pSrc, - q31_t * pDst, - uint32_t shiftValue, - uint32_t blockSize); - - -/** - * @brief Converts the elements of the Q31 vector to Q7 vector. - * @param[in] pSrc is input pointer - * @param[out] pDst is output pointer - * @param[in] blockSize is the number of samples to process - */ -void csi_q31_to_q7( - const q31_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - -/** - * @brief Converts the elements of the Q15 vector to floating-point vector. - * @param[in] pSrc is input pointer - * @param[out] pDst is output pointer - * @param[in] blockSize is the number of samples to process - */ -void csi_q15_to_float( - const q15_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - -/** - * @brief Converts the elements of the Q15 vector to Q31 vector. - * @param[in] pSrc is input pointer - * @param[out] pDst is output pointer - * @param[in] blockSize is the number of samples to process - */ -void csi_q15_to_q31( - const q15_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - -/** - * @brief Converts the elements of the Q15 vector to Q7 vector. - * @param[in] pSrc is input pointer - * @param[out] pDst is output pointer - * @param[in] blockSize is the number of samples to process - */ -void csi_q15_to_q7( - const q15_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - -/** - * @brief Converts the elements of the Q7 vector to floating-point vector. - * @param[in] pSrc is input pointer - * @param[out] pDst is output pointer - * @param[in] blockSize is the number of samples to process - */ -void csi_q7_to_float( - const q7_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - -/** - * @brief Converts the elements of the Q7 vector to Q31 vector. - * @param[in] pSrc input pointer - * @param[out] pDst output pointer - * @param[in] blockSize number of samples to process - */ -void csi_q7_to_q31( - const q7_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - -/** - * @brief Converts the elements of the Q7 vector to Q15 vector. - * @param[in] pSrc input pointer - * @param[out] pDst output pointer - * @param[in] blockSize number of samples to process - */ -void csi_q7_to_q15( - const q7_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - -/** - * @brief SVM linear instance init function - * @param[in] S Parameters for SVM functions - * @param[in] nbOfSupportVectors Number of support vectors - * @param[in] vectorDimension Dimension of vector space - * @param[in] intercept Intercept - * @param[in] dualCoefficients Array of dual coefficients - * @param[in] supportVectors Array of support vectors - * @param[in] classes Array of 2 classes ID - * @return none. - * - */ - - -void csi_svm_linear_init_f32(csi_svm_linear_instance_f32 *S, - uint32_t nbOfSupportVectors, - uint32_t vectorDimension, - float32_t intercept, - const float32_t *dualCoefficients, - const float32_t *supportVectors, - const int32_t *classes); - -/** - * @brief SVM linear prediction - * @param[in] S Pointer to an instance of the linear SVM structure. - * @param[in] in Pointer to input vector - * @param[out] pResult Decision value - * @return none. - * - */ - -void csi_svm_linear_predict_f32(const csi_svm_linear_instance_f32 *S, - const float32_t * in, - int32_t * pResult); - - -/** - * @brief SVM polynomial instance init function - * @param[in] S points to an instance of the polynomial SVM structure. - * @param[in] nbOfSupportVectors Number of support vectors - * @param[in] vectorDimension Dimension of vector space - * @param[in] intercept Intercept - * @param[in] dualCoefficients Array of dual coefficients - * @param[in] supportVectors Array of support vectors - * @param[in] classes Array of 2 classes ID - * @param[in] degree Polynomial degree - * @param[in] coef0 coeff0 (scikit-learn terminology) - * @param[in] gamma gamma (scikit-learn terminology) - * @return none. - * - */ - - -void csi_svm_polynomial_init_f32(csi_svm_polynomial_instance_f32 *S, - uint32_t nbOfSupportVectors, - uint32_t vectorDimension, - float32_t intercept, - const float32_t *dualCoefficients, - const float32_t *supportVectors, - const int32_t *classes, - int32_t degree, - float32_t coef0, - float32_t gamma - ); - -/** - * @brief SVM polynomial prediction - * @param[in] S Pointer to an instance of the polynomial SVM structure. - * @param[in] in Pointer to input vector - * @param[out] pResult Decision value - * @return none. - * - */ -void csi_svm_polynomial_predict_f32(const csi_svm_polynomial_instance_f32 *S, - const float32_t * in, - int32_t * pResult); - - -/** - * @brief SVM radial basis function instance init function - * @param[in] S points to an instance of the polynomial SVM structure. - * @param[in] nbOfSupportVectors Number of support vectors - * @param[in] vectorDimension Dimension of vector space - * @param[in] intercept Intercept - * @param[in] dualCoefficients Array of dual coefficients - * @param[in] supportVectors Array of support vectors - * @param[in] classes Array of 2 classes ID - * @param[in] gamma gamma (scikit-learn terminology) - * @return none. - * - */ - -void csi_svm_rbf_init_f32(csi_svm_rbf_instance_f32 *S, - uint32_t nbOfSupportVectors, - uint32_t vectorDimension, - float32_t intercept, - const float32_t *dualCoefficients, - const float32_t *supportVectors, - const int32_t *classes, - float32_t gamma - ); - -/** - * @brief SVM rbf prediction - * @param[in] S Pointer to an instance of the rbf SVM structure. - * @param[in] in Pointer to input vector - * @param[out] pResult decision value - * @return none. - * - */ -void csi_svm_rbf_predict_f32(const csi_svm_rbf_instance_f32 *S, - const float32_t * in, - int32_t * pResult); - -/** - * @brief SVM sigmoid instance init function - * @param[in] S points to an instance of the rbf SVM structure. - * @param[in] nbOfSupportVectors Number of support vectors - * @param[in] vectorDimension Dimension of vector space - * @param[in] intercept Intercept - * @param[in] dualCoefficients Array of dual coefficients - * @param[in] supportVectors Array of support vectors - * @param[in] classes Array of 2 classes ID - * @param[in] coef0 coeff0 (scikit-learn terminology) - * @param[in] gamma gamma (scikit-learn terminology) - * @return none. - * - */ - -void csi_svm_sigmoid_init_f32(csi_svm_sigmoid_instance_f32 *S, - uint32_t nbOfSupportVectors, - uint32_t vectorDimension, - float32_t intercept, - const float32_t *dualCoefficients, - const float32_t *supportVectors, - const int32_t *classes, - float32_t coef0, - float32_t gamma - ); - -/** - * @brief SVM sigmoid prediction - * @param[in] S Pointer to an instance of the rbf SVM structure. - * @param[in] in Pointer to input vector - * @param[out] pResult Decision value - * @return none. - * - */ -void csi_svm_sigmoid_predict_f32(const csi_svm_sigmoid_instance_f32 *S, - const float32_t * in, - int32_t * pResult); - - -/** - * @brief Naive Gaussian Bayesian Estimator - * - * @param[in] S points to a naive bayes instance structure - * @param[in] in points to the elements of the input vector. - * @param[in] pBuffer points to a buffer of length numberOfClasses - * @return The predicted class - * - */ - - -uint32_t csi_gaussian_naive_bayes_predict_f32(const csi_gaussian_naive_bayes_instance_f32 *S, - const float32_t * in, - float32_t *pBuffer); - -/** - * @brief Computation of the LogSumExp - * - * In probabilistic computations, the dynamic of the probability values can be very - * wide because they come from gaussian functions. - * To avoid underflow and overflow issues, the values are represented by their log. - * In this representation, multiplying the original exp values is easy : their logs are added. - * But adding the original exp values is requiring some special handling and it is the - * goal of the LogSumExp function. - * - * If the values are x1...xn, the function is computing: - * - * ln(exp(x1) + ... + exp(xn)) and the computation is done in such a way that - * rounding issues are minimised. - * - * The max xm of the values is extracted and the function is computing: - * xm + ln(exp(x1 - xm) + ... + exp(xn - xm)) - * - * @param[in] *in Pointer to an array of input values. - * @param[in] blockSize Number of samples in the input array. - * @return LogSumExp - * - */ - - -float32_t csi_logsumexp_f32(const float32_t *in, uint32_t blockSize); - -/** - * @brief Dot product with log arithmetic - * - * Vectors are containing the log of the samples - * - * @param[in] pSrcA points to the first input vector - * @param[in] pSrcB points to the second input vector - * @param[in] blockSize number of samples in each vector - * @param[in] pTmpBuffer temporary buffer of length blockSize - * @return The log of the dot product . - * - */ - - -float32_t csi_logsumexp_dot_prod_f32(const float32_t * pSrcA, - const float32_t * pSrcB, - uint32_t blockSize, - float32_t *pTmpBuffer); - -/** - * @brief Entropy - * - * @param[in] pSrcA Array of input values. - * @param[in] blockSize Number of samples in the input array. - * @return Entropy -Sum(p ln p) - * - */ - - -float32_t csi_entropy_f32(const float32_t * pSrcA,uint32_t blockSize); - - -/** - * @brief Kullback-Leibler - * - * @param[in] pSrcA Pointer to an array of input values for probability distribution A. - * @param[in] pSrcB Pointer to an array of input values for probability distribution B. - * @param[in] blockSize Number of samples in the input array. - * @return Kullback-Leibler Divergence D(A || B) - * - */ -float32_t csi_kullback_leibler_f32(const float32_t * pSrcA - ,const float32_t * pSrcB - ,uint32_t blockSize); - - -/** - * @brief Weighted sum - * - * - * @param[in] *in Array of input values. - * @param[in] *weigths Weights - * @param[in] blockSize Number of samples in the input array. - * @return Weighted sum - * - */ -float32_t csi_weighted_sum_f32(const float32_t *in - , const float32_t *weigths - , uint32_t blockSize); - - -/** - * @brief Barycenter - * - * - * @param[in] in List of vectors - * @param[in] weights Weights of the vectors - * @param[out] out Barycenter - * @param[in] nbVectors Number of vectors - * @param[in] vecDim Dimension of space (vector dimension) - * @return None - * - */ -void csi_barycenter_f32(const float32_t *in - , const float32_t *weights - , float32_t *out - , uint32_t nbVectors - , uint32_t vecDim); - -/** - * @brief Euclidean distance between two vectors - * @param[in] pA First vector - * @param[in] pB Second vector - * @param[in] blockSize vector length - * @return distance - * - */ - -float32_t csi_euclidean_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize); - -/** - * @brief Bray-Curtis distance between two vectors - * @param[in] pA First vector - * @param[in] pB Second vector - * @param[in] blockSize vector length - * @return distance - * - */ -float32_t csi_braycurtis_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize); - -/** - * @brief Canberra distance between two vectors - * - * This function may divide by zero when samples pA[i] and pB[i] are both zero. - * The result of the computation will be correct. So the division per zero may be - * ignored. - * - * @param[in] pA First vector - * @param[in] pB Second vector - * @param[in] blockSize vector length - * @return distance - * - */ -float32_t csi_canberra_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize); - - -/** - * @brief Chebyshev distance between two vectors - * @param[in] pA First vector - * @param[in] pB Second vector - * @param[in] blockSize vector length - * @return distance - * - */ -float32_t csi_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize); - - -/** - * @brief Cityblock (Manhattan) distance between two vectors - * @param[in] pA First vector - * @param[in] pB Second vector - * @param[in] blockSize vector length - * @return distance - * - */ -float32_t csi_cityblock_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize); - -/** - * @brief Correlation distance between two vectors - * - * The input vectors are modified in place ! - * - * @param[in] pA First vector - * @param[in] pB Second vector - * @param[in] blockSize vector length - * @return distance - * - */ -float32_t csi_correlation_distance_f32(float32_t *pA,float32_t *pB, uint32_t blockSize); - -/** - * @brief Cosine distance between two vectors - * - * @param[in] pA First vector - * @param[in] pB Second vector - * @param[in] blockSize vector length - * @return distance - * - */ - -float32_t csi_cosine_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize); - -/** - * @brief Jensen-Shannon distance between two vectors - * - * This function is assuming that elements of second vector are > 0 - * and 0 only when the corresponding element of first vector is 0. - * Otherwise the result of the computation does not make sense - * and for speed reasons, the cases returning NaN or Infinity are not - * managed. - * - * When the function is computing x log (x / y) with x 0 and y 0, - * it will compute the right value (0) but a division per zero will occur - * and shoudl be ignored in client code. - * - * @param[in] pA First vector - * @param[in] pB Second vector - * @param[in] blockSize vector length - * @return distance - * - */ - -float32_t csi_jensenshannon_distance_f32(const float32_t *pA,const float32_t *pB,uint32_t blockSize); - -/** - * @brief Minkowski distance between two vectors - * - * @param[in] pA First vector - * @param[in] pB Second vector - * @param[in] n Norm order (>= 2) - * @param[in] blockSize vector length - * @return distance - * - */ - - - -float32_t csi_minkowski_distance_f32(const float32_t *pA,const float32_t *pB, int32_t order, uint32_t blockSize); - -/** - * @brief Dice distance between two vectors - * - * @param[in] pA First vector of packed booleans - * @param[in] pB Second vector of packed booleans - * @param[in] order Distance order - * @param[in] blockSize Number of samples - * @return distance - * - */ - - -float32_t csi_dice_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools); - -/** - * @brief Hamming distance between two vectors - * - * @param[in] pA First vector of packed booleans - * @param[in] pB Second vector of packed booleans - * @param[in] numberOfBools Number of booleans - * @return distance - * - */ - -float32_t csi_hamming_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools); - -/** - * @brief Jaccard distance between two vectors - * - * @param[in] pA First vector of packed booleans - * @param[in] pB Second vector of packed booleans - * @param[in] numberOfBools Number of booleans - * @return distance - * - */ - -float32_t csi_jaccard_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools); - -/** - * @brief Kulsinski distance between two vectors - * - * @param[in] pA First vector of packed booleans - * @param[in] pB Second vector of packed booleans - * @param[in] numberOfBools Number of booleans - * @return distance - * - */ - -float32_t csi_kulsinski_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools); - -/** - * @brief Roger Stanimoto distance between two vectors - * - * @param[in] pA First vector of packed booleans - * @param[in] pB Second vector of packed booleans - * @param[in] numberOfBools Number of booleans - * @return distance - * - */ - -float32_t csi_rogerstanimoto_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools); - -/** - * @brief Russell-Rao distance between two vectors - * - * @param[in] pA First vector of packed booleans - * @param[in] pB Second vector of packed booleans - * @param[in] numberOfBools Number of booleans - * @return distance - * - */ - -float32_t csi_russellrao_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools); - -/** - * @brief Sokal-Michener distance between two vectors - * - * @param[in] pA First vector of packed booleans - * @param[in] pB Second vector of packed booleans - * @param[in] numberOfBools Number of booleans - * @return distance - * - */ - -float32_t csi_sokalmichener_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools); - -/** - * @brief Sokal-Sneath distance between two vectors - * - * @param[in] pA First vector of packed booleans - * @param[in] pB Second vector of packed booleans - * @param[in] numberOfBools Number of booleans - * @return distance - * - */ - -float32_t csi_sokalsneath_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools); - -/** - * @brief Yule distance between two vectors - * - * @param[in] pA First vector of packed booleans - * @param[in] pB Second vector of packed booleans - * @param[in] numberOfBools Number of booleans - * @return distance - * - */ - -float32_t csi_yule_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools); - - -/** - * @ingroup groupInterpolation - */ - -/** - * @defgroup BilinearInterpolate Bilinear Interpolation - * - * Bilinear interpolation is an extension of linear interpolation applied to a two dimensional grid. - * The underlying function f(x, y) is sampled on a regular grid and the interpolation process - * determines values between the grid points. - * Bilinear interpolation is equivalent to two step linear interpolation, first in the x-dimension and then in the y-dimension. - * Bilinear interpolation is often used in image processing to rescale images. - * The library provides bilinear interpolation functions for Q7, Q15, Q31, and floating-point data types. - * - * Algorithm - * \par - * The instance structure used by the bilinear interpolation functions describes a two dimensional data table. - * For floating-point, the instance structure is defined as: - *
- *   typedef struct
- *   {
- *     uint16_t numRows;
- *     uint16_t numCols;
- *     float32_t *pData;
- * } csi_bilinear_interp_instance_f32;
- * 
- * - * \par - * where numRows specifies the number of rows in the table; - * numCols specifies the number of columns in the table; - * and pData points to an array of size numRows*numCols values. - * The data table pTable is organized in row order and the supplied data values fall on integer indexes. - * That is, table element (x,y) is located at pTable[x + y*numCols] where x and y are integers. - * - * \par - * Let (x, y) specify the desired interpolation point. Then define: - *
- *     XF = floor(x)
- *     YF = floor(y)
- * 
- * \par - * The interpolated output point is computed as: - *
- *  f(x, y) = f(XF, YF) * (1-(x-XF)) * (1-(y-YF))
- *           + f(XF+1, YF) * (x-XF)*(1-(y-YF))
- *           + f(XF, YF+1) * (1-(x-XF))*(y-YF)
- *           + f(XF+1, YF+1) * (x-XF)*(y-YF)
- * 
- * Note that the coordinates (x, y) contain integer and fractional components. - * The integer components specify which portion of the table to use while the - * fractional components control the interpolation processor. - * - * \par - * if (x,y) are outside of the table boundary, Bilinear interpolation returns zero output. - */ - - -/** - * @addtogroup BilinearInterpolate - * @{ - */ - -/** -* @brief Floating-point bilinear interpolation. -* @param[in,out] S points to an instance of the interpolation structure. -* @param[in] X interpolation coordinate. -* @param[in] Y interpolation coordinate. -* @return out interpolated value. -*/ -__STATIC_FORCEINLINE float32_t csi_bilinear_interp_f32( - const csi_bilinear_interp_instance_f32 * S, - float32_t X, - float32_t Y) -{ - float32_t out; - float32_t f00, f01, f10, f11; - float32_t *pData = S->pData; - int32_t xIndex, yIndex, index; - float32_t xdiff, ydiff; - float32_t b1, b2, b3, b4; - xIndex = (int32_t) X; - yIndex = (int32_t) Y; - - /* Care taken for table outside boundary */ - /* Returns zero output when values are outside table boundary */ - if (xIndex < 0 || xIndex > (S->numRows - 1) || yIndex < 0 || yIndex > (S->numCols - 1)) { - return (0); - } - - /* Calculation of index for two nearest points in X-direction */ - index = (xIndex - 1) + (yIndex - 1) * S->numCols; - /* Read two nearest points in X-direction */ - f00 = pData[index]; - f01 = pData[index + 1]; - /* Calculation of index for two nearest points in Y-direction */ - index = (xIndex - 1) + (yIndex) * S->numCols; - /* Read two nearest points in Y-direction */ - f10 = pData[index]; - f11 = pData[index + 1]; - /* Calculation of intermediate values */ - b1 = f00; - b2 = f01 - f00; - b3 = f10 - f00; - b4 = f00 - f01 - f10 + f11; - /* Calculation of fractional part in X */ - xdiff = X - xIndex; - /* Calculation of fractional part in Y */ - ydiff = Y - yIndex; - /* Calculation of bi-linear interpolated output */ - out = b1 + b2 * xdiff + b3 * ydiff + b4 * xdiff * ydiff; - /* return to application */ - return (out); -} - - -/** -* @brief Q31 bilinear interpolation. -* @param[in,out] S points to an instance of the interpolation structure. -* @param[in] X interpolation coordinate in 12.20 format. -* @param[in] Y interpolation coordinate in 12.20 format. -* @return out interpolated value. -*/ -__STATIC_FORCEINLINE q31_t csi_bilinear_interp_q31( - csi_bilinear_interp_instance_q31 * S, - q31_t X, - q31_t Y) -{ - q31_t out; /* Temporary output */ - q31_t acc = 0; /* output */ - q31_t xfract, yfract; /* X, Y fractional parts */ - q31_t x1, x2, y1, y2; /* Nearest output values */ - int32_t rI, cI; /* Row and column indices */ - q31_t *pYData = S->pData; /* pointer to output table values */ - uint32_t nCols = S->numCols; /* num of rows */ - /* Input is in 12.20 format */ - /* 12 bits for the table index */ - /* Index value calculation */ - rI = ((X & (q31_t)0xFFF00000) >> 20); - /* Input is in 12.20 format */ - /* 12 bits for the table index */ - /* Index value calculation */ - cI = ((Y & (q31_t)0xFFF00000) >> 20); - - /* Care taken for table outside boundary */ - /* Returns zero output when values are outside table boundary */ - if (rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1)) { - return (0); - } - - /* 20 bits for the fractional part */ - /* shift left xfract by 11 to keep 1.31 format */ - xfract = (X & 0x000FFFFF) << 11U; - /* Read two nearest output values from the index */ - x1 = pYData[(rI) + (int32_t)nCols * (cI) ]; - x2 = pYData[(rI) + (int32_t)nCols * (cI) + 1]; - /* 20 bits for the fractional part */ - /* shift left yfract by 11 to keep 1.31 format */ - yfract = (Y & 0x000FFFFF) << 11U; - /* Read two nearest output values from the index */ - y1 = pYData[(rI) + (int32_t)nCols * (cI + 1) ]; - y2 = pYData[(rI) + (int32_t)nCols * (cI + 1) + 1]; - /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 3.29(q29) format */ - out = ((q31_t) (((q63_t) x1 * (0x7FFFFFFF - xfract)) >> 32)); - acc = ((q31_t) (((q63_t) out * (0x7FFFFFFF - yfract)) >> 32)); - /* x2 * (xfract) * (1-yfract) in 3.29(q29) and adding to acc */ - out = ((q31_t) ((q63_t) x2 * (0x7FFFFFFF - yfract) >> 32)); - acc += ((q31_t) ((q63_t) out * (xfract) >> 32)); - /* y1 * (1 - xfract) * (yfract) in 3.29(q29) and adding to acc */ - out = ((q31_t) ((q63_t) y1 * (0x7FFFFFFF - xfract) >> 32)); - acc += ((q31_t) ((q63_t) out * (yfract) >> 32)); - /* y2 * (xfract) * (yfract) in 3.29(q29) and adding to acc */ - out = ((q31_t) ((q63_t) y2 * (xfract) >> 32)); - acc += ((q31_t) ((q63_t) out * (yfract) >> 32)); - /* Convert acc to 1.31(q31) format */ - return ((q31_t)(acc << 2)); -} - - -/** -* @brief Q15 bilinear interpolation. -* @param[in,out] S points to an instance of the interpolation structure. -* @param[in] X interpolation coordinate in 12.20 format. -* @param[in] Y interpolation coordinate in 12.20 format. -* @return out interpolated value. -*/ -__STATIC_FORCEINLINE q15_t csi_bilinear_interp_q15( - csi_bilinear_interp_instance_q15 * S, - q31_t X, - q31_t Y) -{ - q63_t acc = 0; /* output */ - q31_t out; /* Temporary output */ - q15_t x1, x2, y1, y2; /* Nearest output values */ - q31_t xfract, yfract; /* X, Y fractional parts */ - int32_t rI, cI; /* Row and column indices */ - q15_t *pYData = S->pData; /* pointer to output table values */ - uint32_t nCols = S->numCols; /* num of rows */ - /* Input is in 12.20 format */ - /* 12 bits for the table index */ - /* Index value calculation */ - rI = ((X & (q31_t)0xFFF00000) >> 20); - /* Input is in 12.20 format */ - /* 12 bits for the table index */ - /* Index value calculation */ - cI = ((Y & (q31_t)0xFFF00000) >> 20); - - /* Care taken for table outside boundary */ - /* Returns zero output when values are outside table boundary */ - if (rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1)) { - return (0); - } - - /* 20 bits for the fractional part */ - /* xfract should be in 12.20 format */ - xfract = (X & 0x000FFFFF); - /* Read two nearest output values from the index */ - x1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) ]; - x2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) + 1]; - /* 20 bits for the fractional part */ - /* yfract should be in 12.20 format */ - yfract = (Y & 0x000FFFFF); - /* Read two nearest output values from the index */ - y1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) ]; - y2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) + 1]; - /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 13.51 format */ - /* x1 is in 1.15(q15), xfract in 12.20 format and out is in 13.35 format */ - /* convert 13.35 to 13.31 by right shifting and out is in 1.31 */ - out = (q31_t) (((q63_t) x1 * (0xFFFFF - xfract)) >> 4U); - acc = ((q63_t) out * (0xFFFFF - yfract)); - /* x2 * (xfract) * (1-yfract) in 1.51 and adding to acc */ - out = (q31_t) (((q63_t) x2 * (0xFFFFF - yfract)) >> 4U); - acc += ((q63_t) out * (xfract)); - /* y1 * (1 - xfract) * (yfract) in 1.51 and adding to acc */ - out = (q31_t) (((q63_t) y1 * (0xFFFFF - xfract)) >> 4U); - acc += ((q63_t) out * (yfract)); - /* y2 * (xfract) * (yfract) in 1.51 and adding to acc */ - out = (q31_t) (((q63_t) y2 * (xfract)) >> 4U); - acc += ((q63_t) out * (yfract)); - /* acc is in 13.51 format and down shift acc by 36 times */ - /* Convert out to 1.15 format */ - return ((q15_t)(acc >> 36)); -} - - -/** -* @brief Q7 bilinear interpolation. -* @param[in,out] S points to an instance of the interpolation structure. -* @param[in] X interpolation coordinate in 12.20 format. -* @param[in] Y interpolation coordinate in 12.20 format. -* @return out interpolated value. -*/ -__STATIC_FORCEINLINE q7_t csi_bilinear_interp_q7( - csi_bilinear_interp_instance_q7 * S, - q31_t X, - q31_t Y) -{ - q63_t acc = 0; /* output */ - q31_t out; /* Temporary output */ - q31_t xfract, yfract; /* X, Y fractional parts */ - q7_t x1, x2, y1, y2; /* Nearest output values */ - int32_t rI, cI; /* Row and column indices */ - q7_t *pYData = S->pData; /* pointer to output table values */ - uint32_t nCols = S->numCols; /* num of rows */ - /* Input is in 12.20 format */ - /* 12 bits for the table index */ - /* Index value calculation */ - rI = ((X & (q31_t)0xFFF00000) >> 20); - /* Input is in 12.20 format */ - /* 12 bits for the table index */ - /* Index value calculation */ - cI = ((Y & (q31_t)0xFFF00000) >> 20); - - /* Care taken for table outside boundary */ - /* Returns zero output when values are outside table boundary */ - if (rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1)) { - return (0); - } - - /* 20 bits for the fractional part */ - /* xfract should be in 12.20 format */ - xfract = (X & (q31_t)0x000FFFFF); - /* Read two nearest output values from the index */ - x1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) ]; - x2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) + 1]; - /* 20 bits for the fractional part */ - /* yfract should be in 12.20 format */ - yfract = (Y & (q31_t)0x000FFFFF); - /* Read two nearest output values from the index */ - y1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) ]; - y2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) + 1]; - /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 16.47 format */ - out = ((x1 * (0xFFFFF - xfract))); - acc = (((q63_t) out * (0xFFFFF - yfract))); - /* x2 * (xfract) * (1-yfract) in 2.22 and adding to acc */ - out = ((x2 * (0xFFFFF - yfract))); - acc += (((q63_t) out * (xfract))); - /* y1 * (1 - xfract) * (yfract) in 2.22 and adding to acc */ - out = ((y1 * (0xFFFFF - xfract))); - acc += (((q63_t) out * (yfract))); - /* y2 * (xfract) * (yfract) in 2.22 and adding to acc */ - out = ((y2 * (yfract))); - acc += (((q63_t) out * (xfract))); - /* acc in 16.47 format and down shift by 40 to convert to 1.7 format */ - return ((q7_t)(acc >> 40)); -} - -/** - * @} end of BilinearInterpolate group - */ - -#ifdef __cplusplus -} -#endif - - -#endif /* _CSI_MATH_H */ - -/** - * - * End of file. - */ diff --git a/include/include_xt800/dsp_include/csky_math.h b/include/include_xt800/dsp_include/csky_math.h deleted file mode 100644 index d7adfecb..00000000 --- a/include/include_xt800/dsp_include/csky_math.h +++ /dev/null @@ -1,4637 +0,0 @@ -/* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/****************************************************************************** - * @file csky_math.h - * @brief Public header file for CSI DSP Library. - * @version V1.0 - * @date 20. Dec 2016 - ******************************************************************************/ - - -/** - * @defgroup groupMath Basic Math Functions - */ - -/** - * @defgroup groupFastMath Fast Math Functions - * This set of functions provides a fast approximation to sine, cosine, and square root. - * As compared to most of the other functions in the CSI math library, the fast math functions - * operate on individual values and not arrays. - * There are separate functions for Q15, Q31, and floating-point data. - * - */ - -/** - * @defgroup groupCmplxMath Complex Math Functions - * This set of functions operates on complex data vectors. - * The data in the complex arrays is stored in an interleaved fashion - * (real, imag, real, imag, ...). - * In the API functions, the number of samples in a complex array refers - * to the number of complex values; the array contains twice this number of - * real values. - */ - -/** - * @defgroup groupFilters Filtering Functions - */ - -/** - * @defgroup groupMatrix Matrix Functions - * - * This set of functions provides basic matrix math operations. - * The functions operate on matrix data structures. For example, - * the type - * definition for the floating-point matrix structure is shown - * below: - *
- *     typedef struct
- *     {
- *       uint16_t numRows;     // number of rows of the matrix.
- *       uint16_t numCols;     // number of columns of the matrix.
- *       float32_t *pData;     // points to the data of the matrix.
- *     } csky_matrix_instance_f32;
- * 
- * There are similar definitions for Q15 and Q31 data types. - * - * The structure specifies the size of the matrix and then points to - * an array of data. The array is of size numRows X numCols - * and the values are arranged in row order. That is, the - * matrix element (i, j) is stored at: - *
- *     pData[i*numCols + j]
- * 
- * - * \par Init Functions - * There is an associated initialization function for each type of matrix - * data structure. - * The initialization function sets the values of the internal structure fields. - * Refer to the function csky_mat_init_f32(), csky_mat_init_q31() - * and csky_mat_init_q15() for floating-point, Q31 and Q15 types, respectively. - * - * \par - * Use of the initialization function is optional. However, if initialization function is used - * then the instance structure cannot be placed into a const data section. - * To place the instance structure in a const data - * section, manually initialize the data structure. For example: - *
- * csky_matrix_instance_f32 S = {nRows, nColumns, pData};
- * csky_matrix_instance_q31 S = {nRows, nColumns, pData};
- * csky_matrix_instance_q15 S = {nRows, nColumns, pData};
- * 
- * where nRows specifies the number of rows, nColumns - * specifies the number of columns, and pData points to the - * data array. - * - * \par Size Checking - * By default all of the matrix functions perform size checking on the input and - * output matrices. For example, the matrix addition function verifies that the - * two input matrices and the output matrix all have the same number of rows and - * columns. If the size check fails the functions return: - *
- *     CSKY_MATH_SIZE_MISMATCH
- * 
- * Otherwise the functions return - *
- *     CSKY_MATH_SUCCESS
- * 
- * There is some overhead associated with this matrix size checking. - * The matrix size checking is enabled via the \#define - *
- *     CSKY_MATH_MATRIX_CHECK
- * 
- * within the library project settings. By default this macro is defined - * and size checking is enabled. By changing the project settings and - * undefining this macro size checking is eliminated and the functions - * run a bit faster. With size checking disabled the functions always - * return CSKY_MATH_SUCCESS. - */ - -/** - * @defgroup groupTransforms Transform Functions - */ - -/** - * @defgroup groupController Controller Functions - */ - -/** - * @defgroup groupStats Statistics Functions - */ -/** - * @defgroup groupSupport Support Functions - */ - -/** - * @defgroup groupInterpolation Interpolation Functions - * These functions perform 1- and 2-dimensional interpolation of data. - * Linear interpolation is used for 1-dimensional data and - * bilinear interpolation is used for 2-dimensional data. - */ - - -/** - * @defgroup groupYunvoice Yunvoice Functions - * These functions are designed for Yunvoice project, which are modified - * according to the CEVA DSP functions. So, one can porting the software - * from CEVA to CSKY straightforwardly. - */ - -/** - * @defgroup groupExamples Examples - */ - - -#ifndef _CSKY_MATH_H -#define _CSKY_MATH_H - -#define __CSI_GENERIC /* disable NVIC and Systick functions */ - -#include "csi_core.h" - -#include -#undef __CSI_GENERIC /* enable NVIC and Systick functions */ -#include "string.h" -#include "math.h" -#ifdef __cplusplus -extern "C" -{ -#endif - - - /** - * @brief Macros required for reciprocal calculation in Normalized LMS - */ - -#define DELTA_Q31 (0x100) -#define DELTA_Q15 0x5 -#define INDEX_MASK 0x0000003F -#ifndef PI -#define PI 3.14159265358979f -#endif - - /** - * @brief Macros required for SINE and COSINE Fast math approximations - */ - -#define FAST_MATH_TABLE_SIZE 512 -#define FAST_MATH_Q31_SHIFT (32 - 10) -#define FAST_MATH_Q15_SHIFT (16 - 10) -#define CONTROLLER_Q31_SHIFT (32 - 9) -#define TABLE_SIZE 256 -#define TABLE_SPACING_Q31 0x400000 -#define TABLE_SPACING_Q15 0x80 - - /** - * @brief Macros required for SINE and COSINE Controller functions - */ - /* 1.31(q31) Fixed value of 2/360 */ - /* -1 to +1 is divided into 360 values so total spacing is (2/360) */ -#define INPUT_SPACING 0xB60B61 - - /** - * @brief Macro for Unaligned Support - */ -#ifndef UNALIGNED_SUPPORT_DISABLE - #define ALIGN4 -#else - #define ALIGN4 __attribute__((aligned(4))) -#endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */ - -__ALWAYS_STATIC_INLINE int32_t __SSAT_31(int32_t x) -{ - int32_t res = x; - if (x > 0x3fffffff) { - res = 0x3fffffff; - } else if (x < -1073741824) { - res = -1073741824; - } - - return res; -} - -__ALWAYS_STATIC_INLINE int32_t __SSAT_16(int32_t x) -{ - int32_t res = x; - if (x > 0x7fff) { - res = 0x7fff; - } else if (x < -32768) { - res = -32768; - } - - return res; -} - -__ALWAYS_STATIC_INLINE int32_t __SSAT_8(int32_t x) -{ - int32_t res = x; - if (x > 0x7f) { - res = 0x7f; - } else if (x < -128) { - res = -128; - } - - return res; -} - -#ifdef CSKY_SIMD -/* SMMLAR */ -__ALWAYS_STATIC_INLINE int32_t multAcc_32x32_keep32_R(int32_t a, int32_t x, int32_t y) -{ - __ASM volatile("mula.s32.rhs %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y) : "0" (a), "1" (x), "2" (y)); - return a; -} - -/* SMMLSR */ -__ALWAYS_STATIC_INLINE int32_t multSub_32x32_keep32_R(int32_t a, int32_t x, int32_t y) -{ - __ASM volatile("muls.s32.rhs %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y)); - return a; -} - -/* SMMULR */ -__ALWAYS_STATIC_INLINE int32_t mult_32x32_keep32_R(int32_t x, int32_t y) -{ - int32_t a; - __ASM volatile("mul.s32.rh %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "1" (x), "2" (y)); - return a; -} - -/* SMMLA */ -__ALWAYS_STATIC_INLINE int32_t multAcc_32x32_keep32(int32_t a, int32_t x, int32_t y) -{ - __ASM volatile("mula.s32.hs %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y)); - return a; -} - -/* SMMLS */ -__ALWAYS_STATIC_INLINE int32_t multSub_32x32_keep32(int32_t a, int32_t x, int32_t y) -{ - __ASM volatile("muls.s32.hs %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y)); - return a; -} - -/* SMMUL */ -__ALWAYS_STATIC_INLINE int32_t mult_32x32_keep32(int32_t x, int32_t y) -{ - int32_t a; - __ASM volatile("mul.s32.h %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y)); - return a; -} - -__ALWAYS_STATIC_INLINE int32_t multAcc_16x16_keep32(int32_t a, int16_t x, int16_t y) -{ - __ASM volatile("mulall.s16 %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y)); - return a; -} - -__ALWAYS_STATIC_INLINE int64_t multAcc_16x16_keep64(int64_t a, int16_t x, int16_t y) -{ - __ASM volatile("mulall.s16.e %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y)); - return a; -} - -__ALWAYS_STATIC_INLINE int64_t mult_32x32_keep64(int32_t x, int32_t y) -{ - int64_t a; - __ASM volatile("mul.s32 %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "1" (x), "2" (y)); - return a; -} - -__ALWAYS_STATIC_INLINE int64_t multAcc_32x32_keep64(int64_t a, int32_t x, int32_t y) -{ - __ASM volatile("mula.s32 %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "0" (a), "1" (x), "2" (y)); - return a; -} - -__ALWAYS_STATIC_INLINE int32_t mult_32x32_dext_31(int32_t x, int32_t y) -{ - int64_t tmp1; - int32_t tmp2; - __ASM volatile("mul.s32 %0, %1, %2\n\t" - "dexti %3, %0, %R0, 31" - :"=r" (tmp1), "=r" (x), "=r" (y), "=r" (tmp2): "1" (x), "2" (y)); - return tmp2; -} - -__ALWAYS_STATIC_INLINE int32_t mult_32x32_dext_30(int32_t x, int32_t y) -{ - int64_t tmp1; - int32_t tmp2; - __ASM volatile("mul.s32 %0, %1, %2\n\t" - "dexti %3, %0, %R0, 30" - :"=r" (tmp1), "=r" (x), "=r" (y), "=r" (tmp2): "1" (x), "2" (y)); - return tmp2; -} - -__ALWAYS_STATIC_INLINE int32_t mult_32x32_dext_4(int32_t x, int32_t y) -{ - int64_t tmp1; - int32_t tmp2; - __ASM volatile("mul.s32 %0, %1, %2\n\t" - "dexti %3, %0, %R0, 4" - :"=r" (tmp1), "=r" (x), "=r" (y), "=r" (tmp2): "1" (x), "2" (y)); - return tmp2; -} - -__ALWAYS_STATIC_INLINE int32_t mult_32x32_dext_33(int32_t x, int32_t y) -{ - int64_t tmp1; - int32_t tmp2; - __ASM volatile("mul.s32 %0, %1, %2\n\t" - "asri %3, %R0, 1" - :"=r" (tmp1), "=r" (x), "=r" (y), "=r" (tmp2): "1" (x), "2" (y)); - return tmp2; -} - -__ALWAYS_STATIC_INLINE int32_t dext_31(int64_t x) -{ - int32_t tmp1; - __ASM volatile( - "dexti %0, %1, %R1, 31" - :"=r" (tmp1), "=r" (x) : "1" (x)); - return tmp1; -} - -__ALWAYS_STATIC_INLINE int32_t mult_l16xl16_keep32(int32_t x, int32_t y) -{ - int32_t a; - __ASM volatile("mulll.s16 %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "1" (x), "2" (y)); - return a; -} - -__ALWAYS_STATIC_INLINE int32_t mult_h16xl16_keep32(int32_t x, int32_t y) -{ - int32_t a; - __ASM volatile("mulhl.s16 %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "1" (x), "2" (y)); - return a; -} - -__ALWAYS_STATIC_INLINE int32_t mult_h16xh16_keep32(int32_t x, int32_t y) -{ - int32_t a; - __ASM volatile("mulhh.s16 %0, %1, %2\n\t" - :"=r" (a), "=r" (x), "=r" (y): "1" (x), "2" (y)); - return a; -} - -#endif - - - /** - * @brief Error status returned by some functions in the library. - */ - - typedef enum - { - CSKY_MATH_SUCCESS = 0, /**< No error */ - CSKY_MATH_ARGUMENT_ERROR = -1, /**< One or more arguments are incorrect */ - CSKY_MATH_LENGTH_ERROR = -2, /**< Length of data buffer is incorrect */ - CSKY_MATH_SIZE_MISMATCH = -3, /**< Size of matrices is not compatible with the operation. */ - CSKY_MATH_NANINF = -4, /**< Not-a-number (NaN) or infinity is generated */ - CSKY_MATH_SINGULAR = -5, /**< Generated by matrix inversion if the input matrix is singular and cannot be inverted. */ - CSKY_MATH_TEST_FAILURE = -6 /**< Test Failed */ - } csky_status; - - /** - * @brief 8-bit fractional data type in 1.7 format. - */ - typedef int8_t q7_t; - - /** - * @brief 16-bit fractional data type in 1.15 format. - */ - typedef int16_t q15_t; - - /** - * @brief 32-bit fractional data type in 1.31 format. - */ - typedef int32_t q31_t; - - /** - * @brief 64-bit fractional data type in 1.63 format. - */ - typedef int64_t q63_t; - - /** - * @brief 32-bit floating-point type definition. - */ - typedef float float32_t; - - /** - * @brief 64-bit floating-point type definition. - */ - typedef double float64_t; - - /** - * @brief 32-bit fractional complex data type in 1.31 format. - */ - typedef struct - { - q31_t re; - q31_t im; - } cq31_t; - /** - * @brief 16-bit fractional complex data type in 1.15 format. - */ - typedef struct - { - q15_t re; - q15_t im; - } cq15_t; - /** - * @brief definition to read/write two 16 bit values. - */ - #define __SIMD32_TYPE int32_t - #define CSI_UNUSED __attribute__((unused)) - -#define __SIMD32(addr) (*(__SIMD32_TYPE **) & (addr)) -#define __SIMD32_CONST(addr) ((__SIMD32_TYPE *)(addr)) -#define _SIMD32_OFFSET(addr) (*(__SIMD32_TYPE *) (addr)) -#define __SIMD64(addr) (*(int64_t **) & (addr)) - -#if defined (CSKY_MATH_NO_SIMD) - /** - * @brief definition to pack two 16 bit values. - */ -#define __PKHBT(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0x0000FFFF) | \ - (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000) ) -#define __PKHTB(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0xFFFF0000) | \ - (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF) ) - -#endif - - - /** - * @brief definition to pack four 8 bit values. - */ -#ifndef CSKY_MATH_BIG_ENDIAN - -#define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) << 0) & (int32_t)0x000000FF) | \ - (((int32_t)(v1) << 8) & (int32_t)0x0000FF00) | \ - (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \ - (((int32_t)(v3) << 24) & (int32_t)0xFF000000) ) -#else - -#define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) << 0) & (int32_t)0x000000FF) | \ - (((int32_t)(v2) << 8) & (int32_t)0x0000FF00) | \ - (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) | \ - (((int32_t)(v0) << 24) & (int32_t)0xFF000000) ) - -#endif - - /** - * @brief Clips Q63 to Q31 values. - */ - static __INLINE q31_t clip_q63_to_q31( - q63_t x) - { - return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ? - ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x; - } - - /** - * @brief Instance structure for the Q7 FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of filter coefficients in the filter. */ - q7_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q7_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - } csky_fir_instance_q7; - - /** - * @brief Instance structure for the Q15 FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of filter coefficients in the filter. */ - q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - } csky_fir_instance_q15; - - /** - * @brief Instance structure for the Q31 FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of filter coefficients in the filter. */ - q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - } csky_fir_instance_q31; - - /** - * @brief Instance structure for the floating-point FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of filter coefficients in the filter. */ - float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - } csky_fir_instance_f32; - - void csky_fir_q7( - const csky_fir_instance_q7 * S, - q7_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - void csky_fir_init_q7( - csky_fir_instance_q7 * S, - uint16_t numTaps, - q7_t * pCoeffs, - q7_t * pState, - uint32_t blockSize); - - void csky_fir_q15( - const csky_fir_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_fir_fast_q15( - const csky_fir_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - csky_status csky_fir_init_q15( - csky_fir_instance_q15 * S, - uint16_t numTaps, - q15_t * pCoeffs, - q15_t * pState, - uint32_t blockSize); - - void csky_fir_q31( - const csky_fir_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_fir_fast_q31( - const csky_fir_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_fir_init_q31( - csky_fir_instance_q31 * S, - uint16_t numTaps, - q31_t * pCoeffs, - q31_t * pState, - uint32_t blockSize); - - void csky_fir_f32( - const csky_fir_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - void csky_fir_init_f32( - csky_fir_instance_f32 * S, - uint16_t numTaps, - float32_t * pCoeffs, - float32_t * pState, - uint32_t blockSize); - - - /** - * @brief Instance structure for the Q15 Biquad cascade filter. - */ - typedef struct - { - int8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - q15_t *pState; /**< Points to the array of state coefficients. The array is of length 4*numStages. */ - q15_t *pCoeffs; /**< Points to the array of coefficients. The array is of length 5*numStages. */ - int8_t postShift; /**< Additional shift, in bits, applied to each output sample. */ - } csky_biquad_casd_df1_inst_q15; - - /** - * @brief Instance structure for the Q31 Biquad cascade filter. - */ - typedef struct - { - uint32_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - q31_t *pState; /**< Points to the array of state coefficients. The array is of length 4*numStages. */ - q31_t *pCoeffs; /**< Points to the array of coefficients. The array is of length 5*numStages. */ - uint8_t postShift; /**< Additional shift, in bits, applied to each output sample. */ - } csky_biquad_casd_df1_inst_q31; - - /** - * @brief Instance structure for the Q31 Biquad cascade filter. - */ - - /** - * @brief Instance structure for the floating-point Biquad cascade filter. - */ - typedef struct - { - uint32_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - float32_t *pState; /**< Points to the array of state coefficients. The array is of length 4*numStages. */ - float32_t *pCoeffs; /**< Points to the array of coefficients. The array is of length 5*numStages. */ - } csky_biquad_casd_df1_inst_f32; - - void csky_biquad_cascade_df1_q15( - const csky_biquad_casd_df1_inst_q15 * S, - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_biquad_cascade_df1_init_q15( - csky_biquad_casd_df1_inst_q15 * S, - uint8_t numStages, - q15_t * pCoeffs, - q15_t * pState, - int8_t postShift); - - void csky_biquad_cascade_df1_fast_q15( - const csky_biquad_casd_df1_inst_q15 * S, - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_biquad_cascade_df1_q31( - const csky_biquad_casd_df1_inst_q31 * S, - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_biquad_cascade_df1_fast_q31( - const csky_biquad_casd_df1_inst_q31 * S, - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_biquad_cascade_df1_init_q31( - csky_biquad_casd_df1_inst_q31 * S, - uint8_t numStages, - q31_t * pCoeffs, - q31_t * pState, - int8_t postShift); - - void csky_biquad_cascade_df1_f32( - const csky_biquad_casd_df1_inst_f32 * S, - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - void csky_biquad_cascade_df1_init_f32( - csky_biquad_casd_df1_inst_f32 * S, - uint8_t numStages, - float32_t * pCoeffs, - float32_t * pState); - - - /** - * @brief Instance structure for the floating-point matrix structure. - */ - typedef struct - { - uint16_t numRows; /**< number of rows of the matrix. */ - uint16_t numCols; /**< number of columns of the matrix. */ - float32_t *pData; /**< points to the data of the matrix. */ - } csky_matrix_instance_f32; - - - /** - * @brief Instance structure for the floating-point matrix structure. - */ - typedef struct - { - uint16_t numRows; /**< number of rows of the matrix. */ - uint16_t numCols; /**< number of columns of the matrix. */ - float64_t *pData; /**< points to the data of the matrix. */ - } csky_matrix_instance_f64; - - /** - * @brief Instance structure for the Q15 matrix structure. - */ - typedef struct - { - uint16_t numRows; /**< number of rows of the matrix. */ - uint16_t numCols; /**< number of columns of the matrix. */ - q15_t *pData; /**< points to the data of the matrix. */ - } csky_matrix_instance_q15; - - /** - * @brief Instance structure for the Q31 matrix structure. - */ - typedef struct - { - uint16_t numRows; /**< number of rows of the matrix. */ - uint16_t numCols; /**< number of columns of the matrix. */ - q31_t *pData; /**< points to the data of the matrix. */ - } csky_matrix_instance_q31; - - csky_status csky_mat_add_f32( - const csky_matrix_instance_f32 * pSrcA, - const csky_matrix_instance_f32 * pSrcB, - csky_matrix_instance_f32 * pDst); - - csky_status csky_mat_add_q15( - const csky_matrix_instance_q15 * pSrcA, - const csky_matrix_instance_q15 * pSrcB, - csky_matrix_instance_q15 * pDst); - - csky_status csky_mat_add_q31( - const csky_matrix_instance_q31 * pSrcA, - const csky_matrix_instance_q31 * pSrcB, - csky_matrix_instance_q31 * pDst); - - csky_status csky_mat_cmplx_mult_f32( - const csky_matrix_instance_f32 * pSrcA, - const csky_matrix_instance_f32 * pSrcB, - csky_matrix_instance_f32 * pDst); - - csky_status csky_mat_cmplx_mult_q15( - const csky_matrix_instance_q15 * pSrcA, - const csky_matrix_instance_q15 * pSrcB, - csky_matrix_instance_q15 * pDst); - - csky_status csky_mat_cmplx_mult_q31( - const csky_matrix_instance_q31 * pSrcA, - const csky_matrix_instance_q31 * pSrcB, - csky_matrix_instance_q31 * pDst); - - csky_status csky_mat_trans_f32( - const csky_matrix_instance_f32 * pSrc, - csky_matrix_instance_f32 * pDst); - - csky_status csky_mat_trans_q15( - const csky_matrix_instance_q15 * pSrc, - csky_matrix_instance_q15 * pDst); - - csky_status csky_mat_trans_q31( - const csky_matrix_instance_q31 * pSrc, - csky_matrix_instance_q31 * pDst); - - csky_status csky_mat_mult_f32( - const csky_matrix_instance_f32 * pSrcA, - const csky_matrix_instance_f32 * pSrcB, - csky_matrix_instance_f32 * pDst); - - csky_status csky_mat_mult_q15( - const csky_matrix_instance_q15 * pSrcA, - const csky_matrix_instance_q15 * pSrcB, - csky_matrix_instance_q15 * pDst); - - csky_status csky_mat_mult_fast_q15( - const csky_matrix_instance_q15 * pSrcA, - const csky_matrix_instance_q15 * pSrcB, - csky_matrix_instance_q15 * pDst); - - csky_status csky_mat_mult_q31( - const csky_matrix_instance_q31 * pSrcA, - const csky_matrix_instance_q31 * pSrcB, - csky_matrix_instance_q31 * pDst); - - csky_status csky_mat_mult_trans_q31( - const csky_matrix_instance_q31 * pSrcA, - const csky_matrix_instance_q31 * pSrcB, - csky_matrix_instance_q31 * pDst); - - csky_status csky_mat_mult_fast_q31( - const csky_matrix_instance_q31 * pSrcA, - const csky_matrix_instance_q31 * pSrcB, - csky_matrix_instance_q31 * pDst); - - csky_status csky_mat_sub_f32( - const csky_matrix_instance_f32 * pSrcA, - const csky_matrix_instance_f32 * pSrcB, - csky_matrix_instance_f32 * pDst); - - csky_status csky_mat_sub_q15( - const csky_matrix_instance_q15 * pSrcA, - const csky_matrix_instance_q15 * pSrcB, - csky_matrix_instance_q15 * pDst); - - csky_status csky_mat_sub_q31( - const csky_matrix_instance_q31 * pSrcA, - const csky_matrix_instance_q31 * pSrcB, - csky_matrix_instance_q31 * pDst); - - csky_status csky_mat_scale_f32( - const csky_matrix_instance_f32 * pSrc, - float32_t scale, - csky_matrix_instance_f32 * pDst); - - csky_status csky_mat_scale_q15( - const csky_matrix_instance_q15 * pSrc, - q15_t scaleFract, - int32_t shift, - csky_matrix_instance_q15 * pDst); - - csky_status csky_mat_scale_q31( - const csky_matrix_instance_q31 * pSrc, - q31_t scaleFract, - int32_t shift, - csky_matrix_instance_q31 * pDst); - - void csky_mat_init_q31( - csky_matrix_instance_q31 * S, - uint16_t nRows, - uint16_t nColumns, - q31_t * pData); - - void csky_mat_init_q15( - csky_matrix_instance_q15 * S, - uint16_t nRows, - uint16_t nColumns, - q15_t * pData); - - void csky_mat_init_f32( - csky_matrix_instance_f32 * S, - uint16_t nRows, - uint16_t nColumns, - float32_t * pData); - - /** - * @brief Instance structure for the Q15 PID Control. - */ - typedef struct - { - q15_t A0; /**< The derived gain, A0 = Kp + Ki + Kd . */ - q15_t A1; - q15_t A2; - q15_t state[3]; /**< The state array of length 3. */ - q15_t Kp; /**< The proportional gain. */ - q15_t Ki; /**< The integral gain. */ - q15_t Kd; /**< The derivative gain. */ - } csky_pid_instance_q15; - - /** - * @brief Instance structure for the Q31 PID Control. - */ - typedef struct - { - q31_t A0; /**< The derived gain, A0 = Kp + Ki + Kd . */ - q31_t A1; /**< The derived gain, A1 = -Kp - 2Kd. */ - q31_t A2; /**< The derived gain, A2 = Kd . */ - q31_t state[3]; /**< The state array of length 3. */ - q31_t Kp; /**< The proportional gain. */ - q31_t Ki; /**< The integral gain. */ - q31_t Kd; /**< The derivative gain. */ - } csky_pid_instance_q31; - - /** - * @brief Instance structure for the floating-point PID Control. - */ - typedef struct - { - float32_t A0; /**< The derived gain, A0 = Kp + Ki + Kd . */ - float32_t A1; /**< The derived gain, A1 = -Kp - 2Kd. */ - float32_t A2; /**< The derived gain, A2 = Kd . */ - float32_t state[3]; /**< The state array of length 3. */ - float32_t Kp; /**< The proportional gain. */ - float32_t Ki; /**< The integral gain. */ - float32_t Kd; /**< The derivative gain. */ - } csky_pid_instance_f32; - - void csky_pid_init_f32( - csky_pid_instance_f32 * S, - int32_t resetStateFlag); - - void csky_pid_reset_f32( - csky_pid_instance_f32 * S); - - void csky_pid_init_q31( - csky_pid_instance_q31 * S, - int32_t resetStateFlag); - - void csky_pid_reset_q31( - csky_pid_instance_q31 * S); - - void csky_pid_init_q15( - csky_pid_instance_q15 * S, - int32_t resetStateFlag); - - void csky_pid_reset_q15( - csky_pid_instance_q15 * S); - - - /** - * @brief Instance structure for the floating-point Linear Interpolate function. - */ - typedef struct - { - uint32_t nValues; /**< nValues */ - float32_t x1; /**< x1 */ - float32_t xSpacing; /**< xSpacing */ - float32_t *pYData; /**< pointer to the table of Y values */ - } csky_linear_interp_instance_f32; - - /** - * @brief Instance structure for the floating-point bilinear interpolation function. - */ - typedef struct - { - uint16_t numRows; /**< number of rows in the data table. */ - uint16_t numCols; /**< number of columns in the data table. */ - float32_t *pData; /**< points to the data table. */ - } csky_bilinear_interp_instance_f32; - - /** - * @brief Instance structure for the Q31 bilinear interpolation function. - */ - typedef struct - { - uint16_t numRows; /**< number of rows in the data table. */ - uint16_t numCols; /**< number of columns in the data table. */ - q31_t *pData; /**< points to the data table. */ - } csky_bilinear_interp_instance_q31; - - /** - * @brief Instance structure for the Q15 bilinear interpolation function. - */ - typedef struct - { - uint16_t numRows; /**< number of rows in the data table. */ - uint16_t numCols; /**< number of columns in the data table. */ - q15_t *pData; /**< points to the data table. */ - } csky_bilinear_interp_instance_q15; - - /** - * @brief Instance structure for the Q15 bilinear interpolation function. - */ - typedef struct - { - uint16_t numRows; /**< number of rows in the data table. */ - uint16_t numCols; /**< number of columns in the data table. */ - q7_t *pData; /**< points to the data table. */ - } csky_bilinear_interp_instance_q7; - - void csky_mult_q7( - q7_t * pSrcA, - q7_t * pSrcB, - q7_t * pDst, - uint32_t blockSize); - - void csky_mult_q15( - q15_t * pSrcA, - q15_t * pSrcB, - q15_t * pDst, - uint32_t blockSize); - - void csky_mult_rnd_q15( - q15_t * pSrcA, - q15_t * pSrcB, - q15_t * pDst, - uint32_t blockSize); - - void csky_mult_q31( - q31_t * pSrcA, - q31_t * pSrcB, - q31_t * pDst, - uint32_t blockSize); - - void csky_mult_f32( - float32_t * pSrcA, - float32_t * pSrcB, - float32_t * pDst, - uint32_t blockSize); - - - /** - * @brief Instance structure for the Q15 CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - q15_t *pTwiddle; /**< points to the Sin twiddle factor table. */ - uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ - } csky_cfft_radix2_instance_q15; - - /** - * @brief Instance structure for the Q15 CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - q15_t *pTwiddle; /**< points to the twiddle factor table. */ - uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ - } csky_cfft_radix4_instance_q15; - - /** - * @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - q31_t *pTwiddle; /**< points to the Twiddle factor table. */ - uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ - } csky_cfft_radix2_instance_q31; - - /** - * @brief Instance structure for the Q31 CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - q31_t *pTwiddle; /**< points to the twiddle factor table. */ - uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ - } csky_cfft_radix4_instance_q31; - - /** - * @brief Instance structure for the floating-point CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - float32_t *pTwiddle; /**< points to the Twiddle factor table. */ - uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ - float32_t onebyfftLen; /**< value of 1/fftLen. */ - } csky_cfft_radix2_instance_f32; - - /** - * @brief Instance structure for the floating-point CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - float32_t *pTwiddle; /**< points to the Twiddle factor table. */ - uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ - float32_t onebyfftLen; /**< value of 1/fftLen. */ - } csky_cfft_radix4_instance_f32; - - /** - * @brief Instance structure for the fixed-point CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - const q15_t *pTwiddle; /**< points to the Twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t bitRevLength; /**< bit reversal table length. */ - } csky_cfft_instance_q15; - -void csky_cfft_q15( - const csky_cfft_instance_q15 * S, - q15_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - - /** - * @brief Instance structure for the fixed-point CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - const q31_t *pTwiddle; /**< points to the Twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t bitRevLength; /**< bit reversal table length. */ - } csky_cfft_instance_q31; - -void csky_cfft_q31( - const csky_cfft_instance_q31 * S, - q31_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - - /** - * @brief Instance structure for the floating-point CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - const float32_t *pTwiddle; /**< points to the Twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t bitRevLength; /**< bit reversal table length. */ - } csky_cfft_instance_f32; - - void csky_cfft_f32( - const csky_cfft_instance_f32 * S, - float32_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - - /** - * @brief Instance structure for the Q15 RFFT/RIFFT function. - */ - typedef struct - { - uint32_t fftLenReal; /**< length of the real FFT. */ - uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */ - uint8_t bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */ - uint32_t twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - q15_t *pTwiddleAReal; /**< points to the real twiddle factor table. */ - const csky_cfft_instance_q15 *pCfft; /**< points to the complex FFT instance. */ - } csky_rfft_instance_q15; - - csky_status csky_rfft_init_q15( - csky_rfft_instance_q15 * S, - uint32_t fftLenReal, - uint32_t ifftFlagR, - uint32_t bitReverseFlag); - - void csky_rfft_q15( - const csky_rfft_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst); - - /** - * @brief Instance structure for the Q31 RFFT/RIFFT function. - */ - typedef struct - { - uint32_t fftLenReal; /**< length of the real FFT. */ - uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */ - uint8_t bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */ - uint32_t twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - q31_t *pTwiddleAReal; /**< points to the real twiddle factor table. */ - const csky_cfft_instance_q31 *pCfft; /**< points to the complex FFT instance. */ - } csky_rfft_instance_q31; - - csky_status csky_rfft_init_q31( - csky_rfft_instance_q31 * S, - uint32_t fftLenReal, - uint32_t ifftFlagR, - uint32_t bitReverseFlag); - - void csky_rfft_q31( - const csky_rfft_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst); - - /** - * @brief Instance structure for the floating-point RFFT/RIFFT function. - */ - typedef struct - { - uint32_t fftLenReal; /**< length of the real FFT. */ - uint16_t fftLenBy2; /**< length of the complex FFT. */ - uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */ - uint8_t bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */ - uint32_t twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - float32_t *pTwiddleAReal; /**< points to the real twiddle factor table. */ - float32_t *pTwiddleBReal; /**< points to the imag twiddle factor table. */ - csky_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */ - } csky_rfft_instance_f32; - - csky_status csky_rfft_init_f32( - csky_rfft_instance_f32 * S, - csky_cfft_radix4_instance_f32 * S_CFFT, - uint32_t fftLenReal, - uint32_t ifftFlagR, - uint32_t bitReverseFlag); - - void csky_rfft_f32( - const csky_rfft_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst); - - /** - * @brief Instance structure for the floating-point RFFT/RIFFT function. - */ -typedef struct - { - csky_cfft_instance_f32 Sint; /**< Internal CFFT structure. */ - uint16_t fftLenRFFT; /**< length of the real sequence */ - float32_t * pTwiddleRFFT; /**< Twiddle factors real stage */ - } csky_rfft_fast_instance_f32 ; - -csky_status csky_rfft_fast_init_f32 ( - csky_rfft_fast_instance_f32 * S, - uint16_t fftLen); - -void csky_rfft_fast_f32( - csky_rfft_fast_instance_f32 * S, - float32_t * p, float32_t * pOut, - uint8_t ifftFlag); - - /** - * @brief Instance structure for the floating-point DCT4/IDCT4 function. - */ - typedef struct - { - uint16_t N; /**< length of the DCT4. */ - uint16_t Nby2; /**< half of the length of the DCT4. */ - float32_t normalize; /**< normalizing factor. */ - float32_t *pTwiddle; /**< points to the twiddle factor table. */ - float32_t *pCosFactor; /**< points to the cosFactor table. */ - csky_rfft_fast_instance_f32 *pRfft; /**< points to the real FFT fast instance. */ - csky_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */ - } csky_dct4_instance_f32; - - csky_status csky_dct4_init_f32( - csky_dct4_instance_f32 * S, - csky_rfft_fast_instance_f32 * S_RFFT, - csky_cfft_radix4_instance_f32 * S_CFFT, - uint16_t N, - uint16_t Nby2, - float32_t normalize); - - void csky_dct4_f32( - const csky_dct4_instance_f32 * S, - float32_t * pState, - float32_t * pInlineBuffer); - - - /** - * @brief Instance structure for the Q31 DCT4/IDCT4 function. - */ - typedef struct - { - uint16_t N; /**< length of the DCT4. */ - uint16_t Nby2; /**< half of the length of the DCT4. */ - q31_t normalize; /**< normalizing factor. */ - q31_t *pTwiddle; /**< points to the twiddle factor table. */ - q31_t *pCosFactor; /**< points to the cosFactor table. */ - csky_rfft_instance_q31 *pRfft; /**< points to the real FFT instance. */ - csky_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */ - } csky_dct4_instance_q31; - - csky_status csky_dct4_init_q31( - csky_dct4_instance_q31 * S, - csky_rfft_instance_q31 * S_RFFT, - csky_cfft_radix4_instance_q31 * S_CFFT, - uint16_t N, - uint16_t Nby2, - q31_t normalize); - - void csky_dct4_q31( - const csky_dct4_instance_q31 * S, - q31_t * pState, - q31_t * pInlineBuffer); - - /** - * @brief Instance structure for the Q15 DCT4/IDCT4 function. - */ - typedef struct - { - uint16_t N; /**< length of the DCT4. */ - uint16_t Nby2; /**< half of the length of the DCT4. */ - q15_t normalize; /**< normalizing factor. */ - q15_t *pTwiddle; /**< points to the twiddle factor table. */ - q15_t *pCosFactor; /**< points to the cosFactor table. */ - csky_rfft_instance_q15 *pRfft; /**< points to the real FFT instance. */ - csky_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */ - } csky_dct4_instance_q15; - - csky_status csky_dct4_init_q15( - csky_dct4_instance_q15 * S, - csky_rfft_instance_q15 * S_RFFT, - csky_cfft_radix4_instance_q15 * S_CFFT, - uint16_t N, - uint16_t Nby2, - q15_t normalize); - - void csky_dct4_q15( - const csky_dct4_instance_q15 * S, - q15_t * pState, - q15_t * pInlineBuffer); - - void csky_add_f32( - float32_t * pSrcA, - float32_t * pSrcB, - float32_t * pDst, - uint32_t blockSize); - - void csky_add_q7( - q7_t * pSrcA, - q7_t * pSrcB, - q7_t * pDst, - uint32_t blockSize); - - void csky_add_q15( - q15_t * pSrcA, - q15_t * pSrcB, - q15_t * pDst, - uint32_t blockSize); - - void csky_add_q31( - q31_t * pSrcA, - q31_t * pSrcB, - q31_t * pDst, - uint32_t blockSize); - - void csky_sub_f32( - float32_t * pSrcA, - float32_t * pSrcB, - float32_t * pDst, - uint32_t blockSize); - - void csky_sub_q7( - q7_t * pSrcA, - q7_t * pSrcB, - q7_t * pDst, - uint32_t blockSize); - - void csky_sub_q15( - q15_t * pSrcA, - q15_t * pSrcB, - q15_t * pDst, - uint32_t blockSize); - - void csky_sub_q31( - q31_t * pSrcA, - q31_t * pSrcB, - q31_t * pDst, - uint32_t blockSize); - - void csky_scale_f32( - float32_t * pSrc, - float32_t scale, - float32_t * pDst, - uint32_t blockSize); - - void csky_scale_q7( - q7_t * pSrc, - q7_t scaleFract, - int8_t shift, - q7_t * pDst, - uint32_t blockSize); - - void csky_scale_q15( - q15_t * pSrc, - q15_t scaleFract, - int8_t shift, - q15_t * pDst, - uint32_t blockSize); - - void csky_scale_q31( - q31_t * pSrc, - q31_t scaleFract, - int8_t shift, - q31_t * pDst, - uint32_t blockSize); - - void csky_abs_q7( - q7_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - void csky_abs_f32( - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - void csky_abs_q15( - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_abs_q31( - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_abs_max_q15( - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_abs_max_q31( - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - - void csky_dot_prod_f32( - float32_t * pSrcA, - float32_t * pSrcB, - uint32_t blockSize, - float32_t * result); - - void csky_dot_prod_q7( - q7_t * pSrcA, - q7_t * pSrcB, - uint32_t blockSize, - q31_t * result); - - void csky_dot_prod_q15( - q15_t * pSrcA, - q15_t * pSrcB, - uint32_t blockSize, - q63_t * result); - - void csky_dot_prod_q31( - q31_t * pSrcA, - q31_t * pSrcB, - uint32_t blockSize, - q63_t * result); - - void csky_shift_q7( - q7_t * pSrc, - int8_t shiftBits, - q7_t * pDst, - uint32_t blockSize); - - void csky_shift_q15( - q15_t * pSrc, - int8_t shiftBits, - q15_t * pDst, - uint32_t blockSize); - - void csky_shift_q31( - q31_t * pSrc, - int8_t shiftBits, - q31_t * pDst, - uint32_t blockSize); - - void csky_offset_f32( - float32_t * pSrc, - float32_t offset, - float32_t * pDst, - uint32_t blockSize); - - void csky_offset_q7( - q7_t * pSrc, - q7_t offset, - q7_t * pDst, - uint32_t blockSize); - - void csky_offset_q15( - q15_t * pSrc, - q15_t offset, - q15_t * pDst, - uint32_t blockSize); - - void csky_offset_q31( - q31_t * pSrc, - q31_t offset, - q31_t * pDst, - uint32_t blockSize); - - void csky_negate_f32( - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - void csky_negate_q7( - q7_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - void csky_negate_q15( - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_negate_q31( - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_copy_f32( - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - void csky_copy_q7( - q7_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - void csky_copy_q15( - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_copy_q31( - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_fill_f32( - float32_t value, - float32_t * pDst, - uint32_t blockSize); - - void csky_fill_q7( - q7_t value, - q7_t * pDst, - uint32_t blockSize); - - void csky_fill_q15( - q15_t value, - q15_t * pDst, - uint32_t blockSize); - - void csky_fill_q31( - q31_t value, - q31_t * pDst, - uint32_t blockSize); - - void csky_conv_f32( - float32_t * pSrcA, - uint32_t srcALen, - float32_t * pSrcB, - uint32_t srcBLen, - float32_t * pDst); - - void csky_conv_opt_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - q15_t * pScratch1, - q15_t * pScratch2); - - void csky_conv_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst); - - void csky_conv_fast_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst); - - void csky_conv_fast_opt_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - q15_t * pScratch1, - q15_t * pScratch2); - - void csky_conv_q31( - q31_t * pSrcA, - uint32_t srcALen, - q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst); - - void csky_conv_fast_q31( - q31_t * pSrcA, - uint32_t srcALen, - q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst); - - void csky_conv_opt_q7( - q7_t * pSrcA, - uint32_t srcALen, - q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst, - q15_t * pScratch1, - q15_t * pScratch2); - - void csky_conv_q7( - q7_t * pSrcA, - uint32_t srcALen, - q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst); - - csky_status csky_conv_partial_f32( - float32_t * pSrcA, - uint32_t srcALen, - float32_t * pSrcB, - uint32_t srcBLen, - float32_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - csky_status csky_conv_partial_opt_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - uint32_t firstIndex, - uint32_t numPoints, - q15_t * pScratch1, - q15_t * pScratch2); - - csky_status csky_conv_partial_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - csky_status csky_conv_partial_fast_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - csky_status csky_conv_partial_fast_opt_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - uint32_t firstIndex, - uint32_t numPoints, - q15_t * pScratch1, - q15_t * pScratch2); - - csky_status csky_conv_partial_q31( - q31_t * pSrcA, - uint32_t srcALen, - q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - csky_status csky_conv_partial_fast_q31( - q31_t * pSrcA, - uint32_t srcALen, - q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - csky_status csky_conv_partial_opt_q7( - q7_t * pSrcA, - uint32_t srcALen, - q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst, - uint32_t firstIndex, - uint32_t numPoints, - q15_t * pScratch1, - q15_t * pScratch2); - - csky_status csky_conv_partial_q7( - q7_t * pSrcA, - uint32_t srcALen, - q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - /** - * functions for the yunVoice functions. - */ - q15_t csky_dsp_lib_vec_max_abs16( - q15_t * A, - uint32_t N); - - q31_t csky_dsp_lib_vec_max_abs32( - q31_t * A, - uint32_t N); - - void csky_dsp_lib_vec_abs16( - q15_t * A, - uint32_t N, - q15_t * C); - - void csky_dsp_lib_vec_abs32( - q31_t * A, - uint32_t N, - q31_t * C); - - void csky_dsp_lib_vec_add16( - q15_t * A, - q15_t * B, - uint32_t N, - q15_t * C); - - void csky_dsp_lib_vec_add32( - q31_t * A, - q31_t * B, - uint32_t N, - q31_t * C); - - void csky_dsp_lib_vec_cx_conj_q15( - q15_t * A, - uint32_t N, - q15_t * B); - - void csky_dsp_lib_vec_cx_conj_q31( - q31_t * A, - uint32_t N, - q31_t * C); - - q31_t csky_dsp_lib_vec_dot_q15( - q15_t * A, - q15_t * B, - uint32_t N); - - q31_t csky_dsp_lib_vec_dot_q31( - q31_t * A, - q31_t * B, - uint32_t N); - - void csky_dsp_lib_mat_cx_add16( - cq15_t * A, - cq15_t * B, - uint32_t N, - uint32_t M, - cq15_t * C); - - void csky_dsp_lib_mat_cx_add32( - cq31_t * A, - cq31_t * B, - uint32_t N, - uint32_t M, - cq31_t * C); - - void csky_dsp_lib_mat_cx_mul_q15( - cq15_t * A, - cq15_t * B, - uint32_t N, - uint32_t M, - uint32_t L, - cq15_t * C); - - void csky_dsp_lib_mat_cx_mul_q31( - cq31_t * A, - cq31_t * B, - uint32_t N, - uint32_t M, - uint32_t L, - cq31_t * C); - - void csky_dsp_lib_mat_cx_sub16( - cq15_t * A, - cq15_t * B, - uint32_t N, - uint32_t M, - cq15_t * C); - - void csky_dsp_lib_mat_cx_sub32( - cq31_t * A, - cq31_t * B, - uint32_t N, - uint32_t M, - cq31_t * C); - - void csky_dsp_lib_vec_mul_q15( - q15_t * A, - q15_t * B, - uint32_t N, - q15_t * C); - - void csky_dsp_lib_vec_mul_q31( - q31_t * A, - q31_t * B, - uint32_t N, - q31_t * C); - - q31_t csky_dsp_lib_pow_int32( - q31_t arg_in_x, - q15_t arg_exp_in_x, - q31_t arg_in_y, - q15_t arg_exp_in_y, - q31_t *arg_exp_out); - - void csky_dsp_lib_vec_scale_q15( - q15_t * A, - q15_t scaleFract, - int8_t shift, - q15_t * B, - uint32_t N); - - void csky_dsp_lib_vec_scale_q31( - q31_t * A, - q31_t scaleFract, - int8_t shift, - q31_t * B, - uint32_t N); - - void csky_dsp_lib_vec_shf16( - q15_t * A, - int8_t shift_val, - uint32_t N, - q15_t * C); - - void csky_dsp_lib_vec_shf32( - q31_t * A, - q31_t shift_val, - uint32_t N, - q31_t * C); - - q15_t csky_dsp_lib_sqrt_int32( - q31_t x, - uint32_t rnd_flag); - - void csky_dsp_lib_vec_sub16( - q15_t * A, - q15_t * B, - uint32_t N, - q15_t * C); - - void csky_dsp_lib_vec_sub32( - q31_t * A, - q31_t * B, - uint32_t N, - q31_t * C); - - q63_t csky_dsp_lib_vec_sum16( - q15_t * A, - uint32_t N); - - q63_t csky_dsp_lib_vec_sum32( - q31_t * A, - uint32_t N); - - void csky_fft_lib_cx16_fft( - q31_t log2_buf_len, - q15_t * in_buf, - q15_t * out_buf, - const q15_t * twi_table, - const uint16_t * bitrev_tbl, - q15_t * temp_buf, - q7_t * ScaleShift, - q31_t br); - - void csky_fft_lib_cx32_fft( - q31_t log2_buf_len, - q31_t * in_buf, - q31_t * out_buf, - const q31_t * twi_table, - const uint16_t * bitrev_tbl, - q31_t * temp_buf, - q31_t br); - - void csky_fft_lib_cx16_ifft( - q31_t log2_buf_len, - q15_t * in_buf, - q15_t * out_buf, - const q15_t * twi_table, - const uint16_t * bitrev_tbl, - q15_t * temp_buf, - q7_t * ScaleShift, - q31_t br); - - void csky_fft_lib_cx32_ifft( - q31_t log2_buf_len, - q31_t * in_buf, - q31_t * out_buf, - const q31_t * twi_table, - const uint16_t * bitrev_tbl, - q31_t * temp_buf, - q31_t br); - - void csky_fft_lib_int16_fft( - q31_t log2_buf_len, - q15_t * in_buf, - q15_t * out_buf, - const q15_t * twi_table, - const q15_t * last_stage_twi_table, - const uint16_t * bitrev_tbl, - q15_t * temp_buf, - q7_t * ScaleShift, - q31_t br); - - void csky_fft_lib_int32_fft( - q31_t log2_buf_len, - q31_t * in_buf, - q31_t * out_buf, - const q31_t * twi_table, - const q31_t * last_stage_twi_table, - const uint16_t * bitrev_tbl, - q31_t * temp_buf, - q31_t br); - - void csky_fft_lib_int16_ifft( - q31_t log2_buf_len, - q15_t * in_buf, - q15_t * out_buf, - const q15_t * twi_table, - const q15_t * last_stage_twi_table, - const uint16_t * bitrev_tbl, - q15_t * temp_buf, - q7_t * ScaleShift, - q31_t br); - - void csky_fft_lib_int32_ifft( - q31_t log2_buf_len, - q31_t * in_buf, - q31_t * out_buf, - const q31_t * twi_table, - const q31_t * last_stage_twi_table, - const uint16_t * bitrev_tbl, - q31_t * temp_buf, - q31_t br); - - /** - * @brief Instance structure for the Q15 FIR decimator. - */ - typedef struct - { - uint8_t M; /**< decimation factor. */ - uint16_t numTaps; /**< number of coefficients in the filter. */ - q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - } csky_fir_decimate_instance_q15; - - /** - * @brief Instance structure for the Q31 FIR decimator. - */ - typedef struct - { - uint8_t M; /**< decimation factor. */ - uint16_t numTaps; /**< number of coefficients in the filter. */ - q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - } csky_fir_decimate_instance_q31; - - /** - * @brief Instance structure for the floating-point FIR decimator. - */ - typedef struct - { - uint8_t M; /**< decimation factor. */ - uint16_t numTaps; /**< number of coefficients in the filter. */ - float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - } csky_fir_decimate_instance_f32; - - void csky_fir_decimate_f32( - const csky_fir_decimate_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - csky_status csky_fir_decimate_init_f32( - csky_fir_decimate_instance_f32 * S, - uint16_t numTaps, - uint8_t M, - float32_t * pCoeffs, - float32_t * pState, - uint32_t blockSize); - - void csky_fir_decimate_q15( - const csky_fir_decimate_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_fir_decimate_fast_q15( - const csky_fir_decimate_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - csky_status csky_fir_decimate_init_q15( - csky_fir_decimate_instance_q15 * S, - uint16_t numTaps, - uint8_t M, - q15_t * pCoeffs, - q15_t * pState, - uint32_t blockSize); - - void csky_fir_decimate_q31( - const csky_fir_decimate_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_fir_decimate_fast_q31( - csky_fir_decimate_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - csky_status csky_fir_decimate_init_q31( - csky_fir_decimate_instance_q31 * S, - uint16_t numTaps, - uint8_t M, - q31_t * pCoeffs, - q31_t * pState, - uint32_t blockSize); - - - /** - * @brief Instance structure for the Q15 FIR interpolator. - */ - typedef struct - { - uint8_t L; /**< upsample factor. */ - uint16_t phaseLength; /**< length of each polyphase filter component. */ - q15_t *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */ - q15_t *pState; /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */ - } csky_fir_interpolate_instance_q15; - - /** - * @brief Instance structure for the Q31 FIR interpolator. - */ - typedef struct - { - uint8_t L; /**< upsample factor. */ - uint16_t phaseLength; /**< length of each polyphase filter component. */ - q31_t *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */ - q31_t *pState; /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */ - } csky_fir_interpolate_instance_q31; - - /** - * @brief Instance structure for the floating-point FIR interpolator. - */ - typedef struct - { - uint8_t L; /**< upsample factor. */ - uint16_t phaseLength; /**< length of each polyphase filter component. */ - float32_t *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */ - float32_t *pState; /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */ - } csky_fir_interpolate_instance_f32; - - void csky_fir_interpolate_q15( - const csky_fir_interpolate_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - csky_status csky_fir_interpolate_init_q15( - csky_fir_interpolate_instance_q15 * S, - uint8_t L, - uint16_t numTaps, - q15_t * pCoeffs, - q15_t * pState, - uint32_t blockSize); - - void csky_fir_interpolate_q31( - const csky_fir_interpolate_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - csky_status csky_fir_interpolate_init_q31( - csky_fir_interpolate_instance_q31 * S, - uint8_t L, - uint16_t numTaps, - q31_t * pCoeffs, - q31_t * pState, - uint32_t blockSize); - - void csky_fir_interpolate_f32( - const csky_fir_interpolate_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - csky_status csky_fir_interpolate_init_f32( - csky_fir_interpolate_instance_f32 * S, - uint8_t L, - uint16_t numTaps, - float32_t * pCoeffs, - float32_t * pState, - uint32_t blockSize); - - - /** - * @brief Instance structure for the high precision Q31 Biquad cascade filter. - */ - typedef struct - { - uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - q63_t *pState; /**< points to the array of state coefficients. The array is of length 4*numStages. */ - q31_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */ - uint8_t postShift; /**< additional shift, in bits, applied to each output sample. */ - } csky_biquad_cas_df1_32x64_ins_q31; - - void csky_biquad_cas_df1_32x64_q31( - const csky_biquad_cas_df1_32x64_ins_q31 * S, - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_biquad_cas_df1_32x64_init_q31( - csky_biquad_cas_df1_32x64_ins_q31 * S, - uint8_t numStages, - q31_t * pCoeffs, - q63_t * pState, - uint8_t postShift); - - - /** - * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter. - */ - typedef struct - { - uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - float32_t *pState; /**< points to the array of state coefficients. The array is of length 2*numStages. */ - float32_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */ - } csky_biquad_cascade_df2T_instance_f32; - - /** - * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter. - */ - typedef struct - { - uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - float32_t *pState; /**< points to the array of state coefficients. The array is of length 4*numStages. */ - float32_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */ - } csky_biquad_cascade_stereo_df2T_instance_f32; - - /** - * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter. - */ - typedef struct - { - uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - float64_t *pState; /**< points to the array of state coefficients. The array is of length 2*numStages. */ - float64_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */ - } csky_biquad_cascade_df2T_instance_f64; - - void csky_biquad_cascade_df2T_f32( - const csky_biquad_cascade_df2T_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - void csky_biquad_cascade_stereo_df2T_f32( - const csky_biquad_cascade_stereo_df2T_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - void csky_biquad_cascade_df2T_f64( - const csky_biquad_cascade_df2T_instance_f64 * S, - float64_t * pSrc, - float64_t * pDst, - uint32_t blockSize); - - void csky_biquad_cascade_df2T_init_f32( - csky_biquad_cascade_df2T_instance_f32 * S, - uint8_t numStages, - float32_t * pCoeffs, - float32_t * pState); - - void csky_biquad_cascade_stereo_df2T_init_f32( - csky_biquad_cascade_stereo_df2T_instance_f32 * S, - uint8_t numStages, - float32_t * pCoeffs, - float32_t * pState); - - - void csky_biquad_cascade_df2T_init_f64( - csky_biquad_cascade_df2T_instance_f64 * S, - uint8_t numStages, - float64_t * pCoeffs, - float64_t * pState); - - - /** - * @brief Instance structure for the Q15 FIR lattice filter. - */ - typedef struct - { - uint16_t numStages; /**< number of filter stages. */ - q15_t *pState; /**< points to the state variable array. The array is of length numStages. */ - q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */ - } csky_fir_lattice_instance_q15; - - /** - * @brief Instance structure for the Q31 FIR lattice filter. - */ - typedef struct - { - uint16_t numStages; /**< number of filter stages. */ - q31_t *pState; /**< points to the state variable array. The array is of length numStages. */ - q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */ - } csky_fir_lattice_instance_q31; - - /** - * @brief Instance structure for the floating-point FIR lattice filter. - */ - typedef struct - { - uint16_t numStages; /**< number of filter stages. */ - float32_t *pState; /**< points to the state variable array. The array is of length numStages. */ - float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */ - } csky_fir_lattice_instance_f32; - - void csky_fir_lattice_init_q15( - csky_fir_lattice_instance_q15 * S, - uint16_t numStages, - q15_t * pCoeffs, - q15_t * pState); - - void csky_fir_lattice_q15( - const csky_fir_lattice_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_fir_lattice_init_q31( - csky_fir_lattice_instance_q31 * S, - uint16_t numStages, - q31_t * pCoeffs, - q31_t * pState); - - void csky_fir_lattice_q31( - const csky_fir_lattice_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_fir_lattice_init_f32( - csky_fir_lattice_instance_f32 * S, - uint16_t numStages, - float32_t * pCoeffs, - float32_t * pState); - - void csky_fir_lattice_f32( - const csky_fir_lattice_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - - /** - * @brief Instance structure for the Q15 IIR lattice filter. - */ - typedef struct - { - uint16_t numStages; /**< number of stages in the filter. */ - q15_t *pState; /**< points to the state variable array. The array is of length numStages+blockSize. */ - q15_t *pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */ - q15_t *pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */ - } csky_iir_lattice_instance_q15; - - /** - * @brief Instance structure for the Q31 IIR lattice filter. - */ - typedef struct - { - uint16_t numStages; /**< number of stages in the filter. */ - q31_t *pState; /**< points to the state variable array. The array is of length numStages+blockSize. */ - q31_t *pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */ - q31_t *pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */ - } csky_iir_lattice_instance_q31; - - /** - * @brief Instance structure for the floating-point IIR lattice filter. - */ - typedef struct - { - uint16_t numStages; /**< number of stages in the filter. */ - float32_t *pState; /**< points to the state variable array. The array is of length numStages+blockSize. */ - float32_t *pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */ - float32_t *pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */ - } csky_iir_lattice_instance_f32; - - void csky_iir_lattice_f32( - const csky_iir_lattice_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - void csky_iir_lattice_init_f32( - csky_iir_lattice_instance_f32 * S, - uint16_t numStages, - float32_t * pkCoeffs, - float32_t * pvCoeffs, - float32_t * pState, - uint32_t blockSize); - - void csky_iir_lattice_q31( - const csky_iir_lattice_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_iir_lattice_init_q31( - csky_iir_lattice_instance_q31 * S, - uint16_t numStages, - q31_t * pkCoeffs, - q31_t * pvCoeffs, - q31_t * pState, - uint32_t blockSize); - - void csky_iir_lattice_q15( - const csky_iir_lattice_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_iir_lattice_init_q15( - csky_iir_lattice_instance_q15 * S, - uint16_t numStages, - q15_t * pkCoeffs, - q15_t * pvCoeffs, - q15_t * pState, - uint32_t blockSize); - - - /** - * @brief Instance structure for the floating-point LMS filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - float32_t mu; /**< step size that controls filter coefficient updates. */ - } csky_lms_instance_f32; - - void csky_lms_f32( - const csky_lms_instance_f32 * S, - float32_t * pSrc, - float32_t * pRef, - float32_t * pOut, - float32_t * pErr, - uint32_t blockSize); - - void csky_lms_init_f32( - csky_lms_instance_f32 * S, - uint16_t numTaps, - float32_t * pCoeffs, - float32_t * pState, - float32_t mu, - uint32_t blockSize); - - - /** - * @brief Instance structure for the Q15 LMS filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - q15_t mu; /**< step size that controls filter coefficient updates. */ - uint32_t postShift; /**< bit shift applied to coefficients. */ - } csky_lms_instance_q15; - - void csky_lms_init_q15( - csky_lms_instance_q15 * S, - uint16_t numTaps, - q15_t * pCoeffs, - q15_t * pState, - q15_t mu, - uint32_t blockSize, - uint32_t postShift); - - void csky_lms_q15( - const csky_lms_instance_q15 * S, - q15_t * pSrc, - q15_t * pRef, - q15_t * pOut, - q15_t * pErr, - uint32_t blockSize); - - - /** - * @brief Instance structure for the Q31 LMS filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - q31_t mu; /**< step size that controls filter coefficient updates. */ - uint32_t postShift; /**< bit shift applied to coefficients. */ - } csky_lms_instance_q31; - - void csky_lms_q31( - const csky_lms_instance_q31 * S, - q31_t * pSrc, - q31_t * pRef, - q31_t * pOut, - q31_t * pErr, - uint32_t blockSize); - - void csky_lms_init_q31( - csky_lms_instance_q31 * S, - uint16_t numTaps, - q31_t * pCoeffs, - q31_t * pState, - q31_t mu, - uint32_t blockSize, - uint32_t postShift); - - - /** - * @brief Instance structure for the floating-point normalized LMS filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - float32_t mu; /**< step size that control filter coefficient updates. */ - float32_t energy; /**< saves previous frame energy. */ - float32_t x0; /**< saves previous input sample. */ - } csky_lms_norm_instance_f32; - - void csky_lms_norm_f32( - csky_lms_norm_instance_f32 * S, - float32_t * pSrc, - float32_t * pRef, - float32_t * pOut, - float32_t * pErr, - uint32_t blockSize); - - void csky_lms_norm_init_f32( - csky_lms_norm_instance_f32 * S, - uint16_t numTaps, - float32_t * pCoeffs, - float32_t * pState, - float32_t mu, - uint32_t blockSize); - - - /** - * @brief Instance structure for the Q31 normalized LMS filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - q31_t mu; /**< step size that controls filter coefficient updates. */ - uint8_t postShift; /**< bit shift applied to coefficients. */ - q31_t *recipTable; /**< points to the reciprocal initial value table. */ - q31_t energy; /**< saves previous frame energy. */ - q31_t x0; /**< saves previous input sample. */ - } csky_lms_norm_instance_q31; - - void csky_lms_norm_q31( - csky_lms_norm_instance_q31 * S, - q31_t * pSrc, - q31_t * pRef, - q31_t * pOut, - q31_t * pErr, - uint32_t blockSize); - - void csky_lms_norm_init_q31( - csky_lms_norm_instance_q31 * S, - uint16_t numTaps, - q31_t * pCoeffs, - q31_t * pState, - q31_t mu, - uint32_t blockSize, - uint8_t postShift); - - - /** - * @brief Instance structure for the Q15 normalized LMS filter. - */ - typedef struct - { - uint16_t numTaps; /**< Number of coefficients in the filter. */ - q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - q15_t mu; /**< step size that controls filter coefficient updates. */ - uint8_t postShift; /**< bit shift applied to coefficients. */ - q15_t *recipTable; /**< Points to the reciprocal initial value table. */ - q15_t energy; /**< saves previous frame energy. */ - q15_t x0; /**< saves previous input sample. */ - } csky_lms_norm_instance_q15; - - void csky_lms_norm_q15( - csky_lms_norm_instance_q15 * S, - q15_t * pSrc, - q15_t * pRef, - q15_t * pOut, - q15_t * pErr, - uint32_t blockSize); - - void csky_lms_norm_init_q15( - csky_lms_norm_instance_q15 * S, - uint16_t numTaps, - q15_t * pCoeffs, - q15_t * pState, - q15_t mu, - uint32_t blockSize, - uint8_t postShift); - - void csky_correlate_f32( - float32_t * pSrcA, - uint32_t srcALen, - float32_t * pSrcB, - uint32_t srcBLen, - float32_t * pDst); - - void csky_correlate_opt_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - q15_t * pScratch); - - void csky_correlate_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst); - - void csky_correlate_fast_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst); - - void csky_correlate_fast_opt_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - q15_t * pScratch); - - void csky_correlate_q31( - q31_t * pSrcA, - uint32_t srcALen, - q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst); - - void csky_correlate_fast_q31( - q31_t * pSrcA, - uint32_t srcALen, - q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst); - - void csky_correlate_opt_q7( - q7_t * pSrcA, - uint32_t srcALen, - q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst, - q15_t * pScratch1, - q15_t * pScratch2); - - void csky_correlate_q7( - q7_t * pSrcA, - uint32_t srcALen, - q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst); - - - /** - * @brief Instance structure for the floating-point sparse FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */ - float32_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */ - float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */ - int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */ - } csky_fir_sparse_instance_f32; - - /** - * @brief Instance structure for the Q31 sparse FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */ - q31_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */ - q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */ - int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */ - } csky_fir_sparse_instance_q31; - - /** - * @brief Instance structure for the Q15 sparse FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */ - q15_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */ - q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */ - int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */ - } csky_fir_sparse_instance_q15; - - /** - * @brief Instance structure for the Q7 sparse FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */ - q7_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */ - q7_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */ - int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */ - } csky_fir_sparse_instance_q7; - - void csky_fir_sparse_f32( - csky_fir_sparse_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst, - float32_t * pScratchIn, - uint32_t blockSize); - - void csky_fir_sparse_init_f32( - csky_fir_sparse_instance_f32 * S, - uint16_t numTaps, - float32_t * pCoeffs, - float32_t * pState, - int32_t * pTapDelay, - uint16_t maxDelay, - uint32_t blockSize); - - void csky_fir_sparse_q31( - csky_fir_sparse_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst, - q31_t * pScratchIn, - uint32_t blockSize); - - void csky_fir_sparse_init_q31( - csky_fir_sparse_instance_q31 * S, - uint16_t numTaps, - q31_t * pCoeffs, - q31_t * pState, - int32_t * pTapDelay, - uint16_t maxDelay, - uint32_t blockSize); - - void csky_fir_sparse_q15( - csky_fir_sparse_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst, - q15_t * pScratchIn, - q31_t * pScratchOut, - uint32_t blockSize); - - void csky_fir_sparse_init_q15( - csky_fir_sparse_instance_q15 * S, - uint16_t numTaps, - q15_t * pCoeffs, - q15_t * pState, - int32_t * pTapDelay, - uint16_t maxDelay, - uint32_t blockSize); - - void csky_fir_sparse_q7( - csky_fir_sparse_instance_q7 * S, - q7_t * pSrc, - q7_t * pDst, - q7_t * pScratchIn, - q31_t * pScratchOut, - uint32_t blockSize); - - void csky_fir_sparse_init_q7( - csky_fir_sparse_instance_q7 * S, - uint16_t numTaps, - q7_t * pCoeffs, - q7_t * pState, - int32_t * pTapDelay, - uint16_t maxDelay, - uint32_t blockSize); - - void csky_sin_cos_f32( - float32_t theta, - float32_t * pSinVal, - float32_t * pCosVal); - - void csky_sin_cos_q31( - q31_t theta, - q31_t * pSinVal, - q31_t * pCosVal); - - void csky_cmplx_conj_f32( - float32_t * pSrc, - float32_t * pDst, - uint32_t numSamples); - - void csky_cmplx_conj_q31( - q31_t * pSrc, - q31_t * pDst, - uint32_t numSamples); - - void csky_cmplx_conj_q15( - q15_t * pSrc, - q15_t * pDst, - uint32_t numSamples); - - void csky_cmplx_mag_squared_f32( - float32_t * pSrc, - float32_t * pDst, - uint32_t numSamples); - - void csky_cmplx_mag_squared_q31( - q31_t * pSrc, - q31_t * pDst, - uint32_t numSamples); - - void csky_cmplx_mag_squared_q15( - q15_t * pSrc, - q15_t * pDst, - uint32_t numSamples); - - void csky_vsqrt_q31( - q31_t * pSrc, - q31_t * pDst, - uint32_t numSamples); - - void csky_vsqrt_q15( - q15_t * pSrc, - q15_t * pDst, - uint32_t numSamples); - - void csky_vsqrt_q7( - q7_t * pSrc, - q7_t * pDst, - uint32_t numSamples); - -/** - * @ingroup groupController - */ - -/** - * @defgroup PID PID Motor Control - * - * A Proportional Integral Derivative (PID) controller is a generic feedback control - * loop mechanism widely used in industrial control systems. - * A PID controller is the most commonly used type of feedback controller. - * - * This set of functions implements (PID) controllers - * for Q15, Q31, and floating-point data types. The functions operate on a single sample - * of data and each call to the function returns a single processed value. - * S points to an instance of the PID control data structure. in - * is the input sample value. The functions return the output value. - * - * \par Algorithm: - *
- *    y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]
- *    A0 = Kp + Ki + Kd
- *    A1 = (-Kp ) - (2 * Kd )
- *    A2 = Kd  
- * - * \par - * where \c Kp is proportional constant, \c Ki is Integral constant and \c Kd is Derivative constant - * - * \par - * \image html PID.gif "Proportional Integral Derivative Controller" - * - * \par - * The PID controller calculates an "error" value as the difference between - * the measured output and the reference input. - * The controller attempts to minimize the error by adjusting the process control inputs. - * The proportional value determines the reaction to the current error, - * the integral value determines the reaction based on the sum of recent errors, - * and the derivative value determines the reaction based on the rate at which the error has been changing. - * - * \par Instance Structure - * The Gains A0, A1, A2 and state variables for a PID controller are stored together in an instance data structure. - * A separate instance structure must be defined for each PID Controller. - * There are separate instance structure declarations for each of the 3 supported data types. - * - * \par Reset Functions - * There is also an associated reset function for each data type which clears the state array. - * - * \par Initialization Functions - * There is also an associated initialization function for each data type. - * The initialization function performs the following operations: - * - Initializes the Gains A0, A1, A2 from Kp,Ki, Kd gains. - * - Zeros out the values in the state buffer. - * - * \par - * Instance structure cannot be placed into a const data section and it is recommended to use the initialization function. - * - * \par Fixed-Point Behavior - * Care must be taken when using the fixed-point versions of the PID Controller functions. - * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered. - * Refer to the function specific documentation below for usage guidelines. - */ - -/** - * @addtogroup PID - * @{ - */ - -/** - * @brief Process function for the floating-point PID Control. - * @param[in,out] S is an instance of the floating-point PID Control structure - * @param[in] in input sample to process - * @return out processed output sample. - */ - __ALWAYS_STATIC_INLINE float32_t csky_pid_f32( - csky_pid_instance_f32 * S, - float32_t in) - { - float32_t out; - - /* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2] */ - out = (S->A0 * in) + - (S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]); - - /* Update state */ - S->state[1] = S->state[0]; - S->state[0] = in; - S->state[2] = out; - - /* return to application */ - return (out); - } - -/** - * @} -*/ // end of PID group - - -/** - * @addtogroup PID - * @{ - */ - -/** - * @brief Process function for the Q31 PID Control. - * @param[in,out] S points to an instance of the Q31 PID Control structure - * @param[in] in input sample to process - * @return out processed output sample. - * - * Scaling and Overflow Behavior: - * \par - * The function is implemented using an internal 64-bit accumulator. - * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit. - * Thus, if the accumulator result overflows it wraps around rather than clip. - * In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions. - * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format. - */ - __ALWAYS_STATIC_INLINE q31_t csky_pid_q31( - csky_pid_instance_q31 * S, - q31_t in) - { - q63_t acc; - q31_t out; - - #ifdef CSKY_SIMD - /* acc = A0 * x[n] */ - acc = mult_32x32_keep64(S->A0, in); - - /* acc += A1 * x[n-1] */ - acc = multAcc_32x32_keep64(acc, S->A1, S->state[0]); - - /* acc += A2 * x[n-2] */ - acc = multAcc_32x32_keep64(acc, S->A2, S->state[1]); - - /* convert output to 1.31 format to add y[n-1] */ - out = dext_31(acc); - #else - /* acc = A0 * x[n] */ - acc = (q63_t) S->A0 * in; - - /* acc += A1 * x[n-1] */ - acc += (q63_t) S->A1 * S->state[0]; - - /* acc += A2 * x[n-2] */ - acc += (q63_t) S->A2 * S->state[1]; - - /* convert output to 1.31 format to add y[n-1] */ - out = (q31_t) (acc >> 31u); - #endif - - /* out += y[n-1] */ - out += S->state[2]; - - /* Update state */ - S->state[1] = S->state[0]; - S->state[0] = in; - S->state[2] = out; - - /* return to application */ - return (out); - } - -/** - * @} - */ // end of PID group - -/** - * @addtogroup PID - * @{ - */ -/** - * @brief Process function for the Q15 PID Control. - * @param[in,out] S points to an instance of the Q15 PID Control structure - * @param[in] in input sample to process - * @return out processed output sample. - * - * Scaling and Overflow Behavior: - * \par - * The function is implemented using a 64-bit internal accumulator. - * Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result. - * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format. - * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved. - * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits. - * Lastly, the accumulator is saturated to yield a result in 1.15 format. - */ - __ALWAYS_STATIC_INLINE q15_t csky_pid_q15( - csky_pid_instance_q15 * S, - q15_t in) - { - q63_t acc; - q15_t out; - - /* acc = A0 * x[n] */ - acc = ((q31_t) S->A0) * in; - - /* acc += A1 * x[n-1] + A2 * x[n-2] */ - acc += (q31_t) S->A1 * S->state[0]; - acc += (q31_t) S->A2 * S->state[1]; - - /* acc += y[n-1] */ - acc += (q31_t) S->state[2] << 15; - - /* saturate the output */ - out = (q15_t) (__SSAT_16((acc >> 15))); - - /* Update state */ - S->state[1] = S->state[0]; - S->state[0] = in; - S->state[2] = out; - - /* return to application */ - return (out); - } -/** - * @} - */ // end of PID group - - csky_status csky_mat_inverse_f32( - const csky_matrix_instance_f32 * src, - csky_matrix_instance_f32 * dst); - - csky_status csky_mat_inverse_f64( - const csky_matrix_instance_f64 * src, - csky_matrix_instance_f64 * dst); - -/** - * @ingroup groupController - */ - -/** - * @defgroup clarke Vector Clarke Transform - * Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector. - * Generally the Clarke transform uses three-phase currents Ia, Ib and Ic to calculate currents - * in the two-phase orthogonal stator axis Ialpha and Ibeta. - * When Ialpha is superposed with Ia as shown in the figure below - * \image html clarke.gif Stator current space vector and its components in (a,b). - * and Ia + Ib + Ic = 0, in this condition Ialpha and Ibeta - * can be calculated using only Ia and Ib. - * - * The function operates on a single sample of data and each call to the function returns the processed output. - * The library provides separate functions for Q31 and floating-point data types. - * \par Algorithm - * \image html clarkeFormula.gif - * where Ia and Ib are the instantaneous stator phases and - * pIalpha and pIbeta are the two coordinates of time invariant vector. - * \par Fixed-Point Behavior - * Care must be taken when using the Q31 version of the Clarke transform. - * In particular, the overflow and saturation behavior of the accumulator used must be considered. - * Refer to the function specific documentation below for usage guidelines. - */ - -/** - * @addtogroup clarke - * @{ - */ - -/** - * - * @brief Floating-point Clarke transform - * @param[in] Ia input three-phase coordinate a - * @param[in] Ib input three-phase coordinate b - * @param[out] pIalpha points to output two-phase orthogonal vector axis alpha - * @param[out] pIbeta points to output two-phase orthogonal vector axis beta - */ - __ALWAYS_STATIC_INLINE void csky_clarke_f32( - float32_t Ia, - float32_t Ib, - float32_t * pIalpha, - float32_t * pIbeta) - { - /* Calculate pIalpha using the equation, pIalpha = Ia */ - *pIalpha = Ia; - - /* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */ - *pIbeta = ((float32_t) 0.57735026919 * Ia + (float32_t) 1.15470053838 * Ib); - } - -/** - * @} - */ // end of clarke group - - -/** - * @addtogroup clarke - * @{ - */ - -/** - * @brief Clarke transform for Q31 version - * @param[in] Ia input three-phase coordinate a - * @param[in] Ib input three-phase coordinate b - * @param[out] pIalpha points to output two-phase orthogonal vector axis alpha - * @param[out] pIbeta points to output two-phase orthogonal vector axis beta - * - * Scaling and Overflow Behavior: - * \par - * The function is implemented using an internal 32-bit accumulator. - * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format. - * There is saturation on the addition, hence there is no risk of overflow. - */ - __ALWAYS_STATIC_INLINE void csky_clarke_q31( - q31_t Ia, - q31_t Ib, - q31_t * pIalpha, - q31_t * pIbeta) - { - q31_t product1, product2; /* Temporary variables used to store intermediate results */ - - /* Calculating pIalpha from Ia by equation pIalpha = Ia */ - *pIalpha = Ia; - - #ifdef CSKY_SIMD - /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */ - product1 = mult_32x32_dext_30(Ia, 0x24F34E8B); - - /* Intermediate product is calculated by (2/sqrt(3) * Ib) */ - product2 = mult_32x32_dext_30(Ib, 0x49E69D16); - #else - /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */ - product1 = (q31_t) (((q63_t) Ia * 0x24F34E8B) >> 30); - - /* Intermediate product is calculated by (2/sqrt(3) * Ib) */ - product2 = (q31_t) (((q63_t) Ib * 0x49E69D16) >> 30); - #endif - - /* pIbeta is calculated by adding the intermediate products */ - *pIbeta = __QADD(product1, product2); - } - - -/** - * @} - */ // end of clarke group - - void csky_q7_to_q31( - q7_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - -/** - * @ingroup groupController - */ -/** - * @defgroup inv_clarke Vector Inverse Clarke Transform - * Inverse Clarke transform converts the two-coordinate time invariant vector into instantaneous stator phases. - * - * The function operates on a single sample of data and each call to the function returns the processed output. - * The library provides separate functions for Q31 and floating-point data types. - * \par Algorithm - * \image html clarkeInvFormula.gif - * where pIa and pIb are the instantaneous stator phases and - * Ialpha and Ibeta are the two coordinates of time invariant vector. - * \par Fixed-Point Behavior - * Care must be taken when using the Q31 version of the Clarke transform. - * In particular, the overflow and saturation behavior of the accumulator used must be considered. - * Refer to the function specific documentation below for usage guidelines. - */ - -/** - * @addtogroup inv_clarke - * @{ - */ - - /** - * @brief Floating-point Inverse Clarke transform - * @param[in] Ialpha input two-phase orthogonal vector axis alpha - * @param[in] Ibeta input two-phase orthogonal vector axis beta - * @param[out] pIa points to output three-phase coordinate a - * @param[out] pIb points to output three-phase coordinate b - */ - __ALWAYS_STATIC_INLINE void csky_inv_clarke_f32( - float32_t Ialpha, - float32_t Ibeta, - float32_t * pIa, - float32_t * pIb) - { - /* Calculating pIa from Ialpha by equation pIa = Ialpha */ - *pIa = Ialpha; - - /* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */ - *pIb = -0.5f * Ialpha + 0.8660254039f * Ibeta; - } - - -/** - * @} - */ // end of inv_clarke group - -/** - * @addtogroup inv_clarke - * @{ - */ - -/** - * @brief Inverse Clarke transform for Q31 version - * @param[in] Ialpha input two-phase orthogonal vector axis alpha - * @param[in] Ibeta input two-phase orthogonal vector axis beta - * @param[out] pIa points to output three-phase coordinate a - * @param[out] pIb points to output three-phase coordinate b - * - * Scaling and Overflow Behavior: - * \par - * The function is implemented using an internal 32-bit accumulator. - * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format. - * There is saturation on the subtraction, hence there is no risk of overflow. - */ - __ALWAYS_STATIC_INLINE void csky_inv_clarke_q31( - q31_t Ialpha, - q31_t Ibeta, - q31_t * pIa, - q31_t * pIb) - { - q31_t product1, product2; /* Temporary variables used to store intermediate results */ - - /* Calculating pIa from Ialpha by equation pIa = Ialpha */ - *pIa = Ialpha; - - #ifdef CSKY_SIMD - /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */ - product1 = mult_32x32_dext_31(Ialpha, 0x40000000); - - /* Intermediate product is calculated by (1/sqrt(3) * pIb) */ - product2 = mult_32x32_dext_31(Ibeta, 0x6ED9EBA1); - #else - /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */ - product1 = (q31_t) (((q63_t) (Ialpha) * (0x40000000)) >> 31); - - /* Intermediate product is calculated by (1/sqrt(3) * pIb) */ - product2 = (q31_t) (((q63_t) (Ibeta) * (0x6ED9EBA1)) >> 31); - #endif - - /* pIb is calculated by subtracting the products */ - *pIb = __QSUB(product2, product1); - } - -/** - * @} - */ // end of inv_clarke group - - void csky_q7_to_q15( - q7_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - -/** - * @ingroup groupController - */ -/** - * @defgroup park Vector Park Transform - * - * Forward Park transform converts the input two-coordinate vector to flux and torque components. - * The Park transform can be used to realize the transformation of the Ialpha and the Ibeta currents - * from the stationary to the moving reference frame and control the spatial relationship between - * the stator vector current and rotor flux vector. - * If we consider the d axis aligned with the rotor flux, the diagram below shows the - * current vector and the relationship from the two reference frames: - * \image html park.gif "Stator current space vector and its component in (a,b) and in the d,q rotating reference frame" - * - * The function operates on a single sample of data and each call to the function returns the processed output. - * The library provides separate functions for Q31 and floating-point data types. - * \par Algorithm - * \image html parkFormula.gif - * where Ialpha and Ibeta are the stator vector components, - * pId and pIq are rotor vector components and cosVal and sinVal are the - * cosine and sine values of theta (rotor flux position). - * \par Fixed-Point Behavior - * Care must be taken when using the Q31 version of the Park transform. - * In particular, the overflow and saturation behavior of the accumulator used must be considered. - * Refer to the function specific documentation below for usage guidelines. - */ -/** - * @addtogroup park - * @{ - */ -/** - * @brief Floating-point Park transform - * @param[in] Ialpha input two-phase vector coordinate alpha - * @param[in] Ibeta input two-phase vector coordinate beta - * @param[out] pId points to output rotor reference frame d - * @param[out] pIq points to output rotor reference frame q - * @param[in] sinVal sine value of rotation angle theta - * @param[in] cosVal cosine value of rotation angle theta - * - * The function implements the forward Park transform. - * - */ - __ALWAYS_STATIC_INLINE void csky_park_f32( - float32_t Ialpha, - float32_t Ibeta, - float32_t * pId, - float32_t * pIq, - float32_t sinVal, - float32_t cosVal) -{ - /* Calculate pId using the equation, pId = Ialpha * cosVal + Ibeta * sinVal */ - *pId = Ialpha * cosVal + Ibeta * sinVal; - /* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */ - *pIq = -Ialpha * sinVal + Ibeta * cosVal; -} -/** - * @} - */ // end of park group - -/** - * @addtogroup park - * @{ - */ -/** - * @brief Park transform for Q31 version - * @param[in] Ialpha input two-phase vector coordinate alpha - * @param[in] Ibeta input two-phase vector coordinate beta - * @param[out] pId points to output rotor reference frame d - * @param[out] pIq points to output rotor reference frame q - * @param[in] sinVal sine value of rotation angle theta - * @param[in] cosVal cosine value of rotation angle theta - * - * Scaling and Overflow Behavior: - * \par - * The function is implemented using an internal 32-bit accumulator. - * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format. - * There is saturation on the addition and subtraction, hence there is no risk of overflow. - */ - __ALWAYS_STATIC_INLINE void csky_park_q31( - q31_t Ialpha, - q31_t Ibeta, - q31_t * pId, - q31_t * pIq, - q31_t sinVal, - q31_t cosVal) -{ -#ifdef CSKY_SIMD - __ASM volatile( - "rmul.s32.h t0, %0, %3\n\t" - "rmul.s32.h t1, %1, %2\n\t" - "add.s32.s t0, t0, t1\n\t" - "st.w t0, (%4, 0x0)\n\t" - "rmul.s32.h t0, %0, %2\n\t" - "rmul.s32.h t1, %1, %3\n\t" - "sub.s32.s t1, t1, t0\n\t" - "st.w t1, (%5, 0x0)\n\t" - ::"r"(Ialpha),"r"(Ibeta),"r"(sinVal),"r"(cosVal),"r"(pId),"r"(pIq) - :"t0","t1", "memory"); -#else - q31_t product1, product2; /* Temporary variables used to store intermediate results */ - q31_t product3, product4; /* Temporary variables used to store intermediate results */ - /* Intermediate product is calculated by (Ialpha * cosVal) */ - product1 = clip_q63_to_q31 (((q63_t) (Ialpha) * (cosVal)) >> 31); - /* Intermediate product is calculated by (Ibeta * sinVal) */ - product2 = clip_q63_to_q31 (((q63_t) (Ibeta) * (sinVal)) >> 31); - /* Intermediate product is calculated by (Ialpha * sinVal) */ - product3 = clip_q63_to_q31 (((q63_t) (Ialpha) * (sinVal)) >> 31); - /* Intermediate product is calculated by (Ibeta * cosVal) */ - product4 = clip_q63_to_q31 (((q63_t) (Ibeta) * (cosVal)) >> 31); - /* Calculate pId by adding the two intermediate products 1 and 2 */ - *pId = __QADD(product1, product2); - /* Calculate pIq by subtracting the two intermediate products 3 from 4 */ - *pIq = __QSUB(product4, product3); -#endif -} -/** - * @} - */ // end of park group - - void csky_q7_to_float( - q7_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - -/** - * @ingroup groupController - */ -/** - * @defgroup inv_park Vector Inverse Park transform - * Inverse Park transform converts the input flux and torque components to two-coordinate vector. - * - * The function operates on a single sample of data and each call to the function returns the processed output. - * The library provides separate functions for Q31 and floating-point data types. - * \par Algorithm - * \image html parkInvFormula.gif - * where pIalpha and pIbeta are the stator vector components, - * Id and Iq are rotor vector components and cosVal and sinVal are the - * cosine and sine values of theta (rotor flux position). - * \par Fixed-Point Behavior - * Care must be taken when using the Q31 version of the Park transform. - * In particular, the overflow and saturation behavior of the accumulator used must be considered. - * Refer to the function specific documentation below for usage guidelines. - */ -/** - * @addtogroup inv_park - * @{ - */ - /** - * @brief Floating-point Inverse Park transform - * @param[in] Id input coordinate of rotor reference frame d - * @param[in] Iq input coordinate of rotor reference frame q - * @param[out] pIalpha points to output two-phase orthogonal vector axis alpha - * @param[out] pIbeta points to output two-phase orthogonal vector axis beta - * @param[in] sinVal sine value of rotation angle theta - * @param[in] cosVal cosine value of rotation angle theta - */ - __ALWAYS_STATIC_INLINE void csky_inv_park_f32( - float32_t Id, - float32_t Iq, - float32_t * pIalpha, - float32_t * pIbeta, - float32_t sinVal, - float32_t cosVal) -{ - /* Calculate pIalpha using the equation, pIalpha = Id * cosVal - Iq * sinVal */ - *pIalpha = Id * cosVal - Iq * sinVal; - /* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */ - *pIbeta = Id * sinVal + Iq * cosVal; -} -/** - * @} - */ // end of inv_park group - -/** - * @addtogroup inv_park - * @{ - */ -/** - * @brief Inverse Park transform for Q31 version - * @param[in] Id input coordinate of rotor reference frame d - * @param[in] Iq input coordinate of rotor reference frame q - * @param[out] pIalpha points to output two-phase orthogonal vector axis alpha - * @param[out] pIbeta points to output two-phase orthogonal vector axis beta - * @param[in] sinVal sine value of rotation angle theta - * @param[in] cosVal cosine value of rotation angle theta - * - * Scaling and Overflow Behavior: - * \par - * The function is implemented using an internal 32-bit accumulator. - * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format. - * There is saturation on the addition, hence there is no risk of overflow. - */ - __ALWAYS_STATIC_INLINE void csky_inv_park_q31( - q31_t Id, - q31_t Iq, - q31_t * pIalpha, - q31_t * pIbeta, - q31_t sinVal, - q31_t cosVal) -{ -#ifdef CSKY_SIMD - __ASM volatile( - "rmul.s32.h t0, %0, %3\n\t" - "rmul.s32.h t1, %1, %2\n\t" - "sub.s32.s t0, t0, t1\n\t" - "st.w t0, (%4, 0x0)\n\t" - "rmul.s32.h t0, %0, %2\n\t" - "rmul.s32.h t1, %1, %3\n\t" - "add.s32.s t0, t0, t1\n\t" - "st.w t0, (%5, 0x0)\n\t" - ::"r"(Id),"r"(Iq),"r"(sinVal),"r"(cosVal),"r"(pIalpha),"r"(pIbeta) - :"t0","t1", "memory"); - -#else - q31_t product1, product2; /* Temporary variables used to store intermediate results */ - q31_t product3, product4; /* Temporary variables used to store intermediate results */ - /* Intermediate product is calculated by (Id * cosVal) */ - product1 = clip_q63_to_q31 (((q63_t) (Id) * (cosVal)) >> 31); - /* Intermediate product is calculated by (Iq * sinVal) */ - product2 = clip_q63_to_q31 (((q63_t) (Iq) * (sinVal)) >> 31); - /* Intermediate product is calculated by (Id * sinVal) */ - product3 = clip_q63_to_q31 (((q63_t) (Id) * (sinVal)) >> 31); - /* Intermediate product is calculated by (Iq * cosVal) */ - product4 = clip_q63_to_q31 (((q63_t) (Iq) * (cosVal)) >> 31); - /* Calculate pIalpha by using the two intermediate products 1 and 2 */ - *pIalpha = __QSUB(product1, product2); - /* Calculate pIbeta by using the two intermediate products 3 and 4 */ - *pIbeta = __QADD(product4, product3); -#endif -} - -/** - * @} - */ // end of inv_park group - - void csky_q31_to_float( - q31_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - -/** - * @ingroup groupInterpolation - */ -/** - * @defgroup LinearInterpolate Linear Interpolation - * - * Linear interpolation is a method of curve fitting using linear polynomials. - * Linear interpolation works by effectively drawing a straight line between two neighboring samples and returning the appropriate point along that line - * - * \par - * \image html LinearInterp.gif "Linear interpolation" - * - * \par - * A Linear Interpolate function calculates an output value(y), for the input(x) - * using linear interpolation of the input values x0, x1( nearest input values) and the output values y0 and y1(nearest output values) - * - * \par Algorithm: - *
- *       y = y0 + (x - x0) * ((y1 - y0)/(x1-x0))
- *       where x0, x1 are nearest values of input x
- *             y0, y1 are nearest values to output y
- * 
- * - * \par - * This set of functions implements Linear interpolation process - * for Q7, Q15, Q31, and floating-point data types. The functions operate on a single - * sample of data and each call to the function returns a single processed value. - * S points to an instance of the Linear Interpolate function data structure. - * x is the input sample value. The functions returns the output value. - * - * \par - * if x is outside of the table boundary, Linear interpolation returns first value of the table - * if x is below input range and returns last value of table if x is above range. - */ -/** - * @addtogroup LinearInterpolate - * @{ - */ -/** - * @brief Process function for the floating-point Linear Interpolation Function. - * @param[in,out] S is an instance of the floating-point Linear Interpolation structure - * @param[in] x input sample to process - * @return y processed output sample. - * - */ -__ALWAYS_STATIC_INLINE float32_t csky_linear_interp_f32( -csky_linear_interp_instance_f32 * S, -float32_t x) -{ - float32_t y; - float32_t x0, x1; /* Nearest input values */ - float32_t y0, y1; /* Nearest output values */ - float32_t xSpacing = S->xSpacing; /* spacing between input values */ - int32_t i; /* Index variable */ - float32_t *pYData = S->pYData; /* pointer to output table */ - /* Calculation of index */ - i = (int32_t) ((x - S->x1) / xSpacing); - if(i < 0) - { - /* Iniatilize output for below specified range as least output value of table */ - y = pYData[0]; - } - else if((uint32_t)i >= S->nValues) - { - /* Iniatilize output for above specified range as last output value of table */ - y = pYData[S->nValues - 1]; - } - else - { - /* Calculation of nearest input values */ - x0 = S->x1 + i * xSpacing; - x1 = S->x1 + (i + 1) * xSpacing; - /* Read of nearest output values */ - y0 = pYData[i]; - y1 = pYData[i + 1]; - /* Calculation of output */ - y = y0 + (x - x0) * ((y1 - y0) / (x1 - x0)); - } - /* returns output value */ - return (y); -} -/** - * @} - */ // end of LinearInterpolate group - -/** - * @addtogroup LinearInterpolate - * @{ - */ - -/** - * @brief Process function for the Q31 Linear Interpolation Function. - * @param[in] pYData pointer to Q31 Linear Interpolation table - * @param[in] x input sample to process - * @param[in] nValues number of table values - * @return y processed output sample. - * - * \par - * Input sample x is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part. - * This function can support maximum of table size 2^12. - * - */ -__ALWAYS_STATIC_INLINE q31_t csky_linear_interp_q31( -q31_t * pYData, -q31_t x, -uint32_t nValues) -{ - q31_t y; /* output */ - q31_t y0, y1; /* Nearest output values */ - q31_t fract; /* fractional part */ - int32_t index; /* Index to read nearest output values */ - /* Input is in 12.20 format */ - /* 12 bits for the table index */ - /* Index value calculation */ - index = ((x & (q31_t)0xFFF00000) >> 20); - if(index >= (int32_t)(nValues - 1)) - { - return (pYData[nValues - 1]); - } - else if(index < 0) - { - return (pYData[0]); - } - else - { - /* 20 bits for the fractional part */ - /* shift left by 11 to keep fract in 1.31 format */ - fract = (x & 0x000FFFFF) << 11; - /* Read two nearest output values from the index in 1.31(q31) format */ - y0 = pYData[index]; - y1 = pYData[index + 1]; -#ifdef CSKY_SIMD - /* Calculation of y0 * (1-fract) and y is in 2.30 format */ - y = mult_32x32_keep32(y0, (0x7FFFFFFF - fract)); - /* Calculation of y0 * (1-fract) + y1 *fract and y is in 2.30 format */ - y = multAcc_32x32_keep32(y, y1, fract); -#else - /* Calculation of y0 * (1-fract) and y is in 2.30 format */ - y = ((q31_t) ((q63_t) y0 * (0x7FFFFFFF - fract) >> 32)); - /* Calculation of y0 * (1-fract) + y1 *fract and y is in 2.30 format */ - y += ((q31_t) (((q63_t) y1 * fract) >> 32)); -#endif - /* Convert y to 1.31 format */ - return (y << 1u); - } -} -/** - * @} - */ // end of LinearInterpolate group - -/** - * @addtogroup LinearInterpolate - * @{ - */ -/** - * - * @brief Process function for the Q15 Linear Interpolation Function. - * @param[in] pYData pointer to Q15 Linear Interpolation table - * @param[in] x input sample to process - * @param[in] nValues number of table values - * @return y processed output sample. - * - * \par - * Input sample x is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part. - * This function can support maximum of table size 2^12. - * - */ -__ALWAYS_STATIC_INLINE q15_t csky_linear_interp_q15( -q15_t * pYData, -q31_t x, -uint32_t nValues) -{ - q63_t y; /* output */ - q15_t y0, y1; /* Nearest output values */ - q31_t fract; /* fractional part */ - int32_t index; /* Index to read nearest output values */ - /* Input is in 12.20 format */ - /* 12 bits for the table index */ - /* Index value calculation */ - index = ((x & (int32_t)0xFFF00000) >> 20); - if(index >= (int32_t)(nValues - 1)) - { - return (pYData[nValues - 1]); - } - else if(index < 0) - { - return (pYData[0]); - } - else - { - /* 20 bits for the fractional part */ - /* fract is in 12.20 format */ - fract = (x & 0x000FFFFF); - /* Read two nearest output values from the index */ - y0 = pYData[index]; - y1 = pYData[index + 1]; -#ifdef CSKY_SIMD - /* Calculation of y0 * (1-fract) and y is in 13.35 format */ - y = mult_32x32_keep64(y0, (0xFFFFF - fract)); - /* Calculation of (y0 * (1-fract) + y1 * fract) and y is in 13.35 format */ - y = multAcc_32x32_keep64(y, y1, (fract)); -#else - /* Calculation of y0 * (1-fract) and y is in 13.35 format */ - y = ((q63_t) y0 * (0xFFFFF - fract)); - /* Calculation of (y0 * (1-fract) + y1 * fract) and y is in 13.35 format */ - y += ((q63_t) y1 * (fract)); -#endif - /* convert y to 1.15 format */ - return (q15_t) (y >> 20); - } -} -/** - * @} - */ // end of LinearInterpolate group - -/** - * @addtogroup LinearInterpolate - * @{ - */ -/** - * - * @brief Process function for the Q7 Linear Interpolation Function. - * @param[in] pYData pointer to Q7 Linear Interpolation table - * @param[in] x input sample to process - * @param[in] nValues number of table values - * @return y processed output sample. - * - * \par - * Input sample x is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part. - * This function can support maximum of table size 2^12. - */ -__ALWAYS_STATIC_INLINE q7_t csky_linear_interp_q7( -q7_t * pYData, -q31_t x, -uint32_t nValues) -{ - q31_t y; /* output */ - q7_t y0, y1; /* Nearest output values */ - q31_t fract; /* fractional part */ - uint32_t index; /* Index to read nearest output values */ - /* Input is in 12.20 format */ - /* 12 bits for the table index */ - /* Index value calculation */ - if (x < 0) - { - return (pYData[0]); - } - index = (x >> 20) & 0xfff; - if(index >= (nValues - 1)) - { - return (pYData[nValues - 1]); - } - else - { - /* 20 bits for the fractional part */ - /* fract is in 12.20 format */ - fract = (x & 0x000FFFFF); - /* Read two nearest output values from the index and are in 1.7(q7) format */ - y0 = pYData[index]; - y1 = pYData[index + 1]; - /* Calculation of y0 * (1-fract ) and y is in 13.27(q27) format */ - y = ((y0 * (0xFFFFF - fract))); - /* Calculation of y1 * fract + y0 * (1-fract) and y is in 13.27(q27) format */ - y += (y1 * fract); - /* convert y to 1.7(q7) format */ - return (q7_t) (y >> 20); - } -} -/** - * @} - */ // end of LinearInterpolate group - - float32_t csky_sin_f32( - float32_t x); - - q31_t csky_sin_q31( - q31_t x); - - q15_t csky_sin_q15( - q15_t x); - - float32_t csky_cos_f32( - float32_t x); - - q31_t csky_cos_q31( - q31_t x); - - q15_t csky_cos_q15( - q15_t x); - - csky_status csky_sqrt_f32( - float32_t in, - float32_t * pOut); - - csky_status csky_sqrt_q31( - q31_t in, - q31_t * pOut); - - csky_status csky_sqrt_q15( - q15_t in, - q15_t * pOut); - - void csky_power_q31( - q31_t * pSrc, - uint32_t blockSize, - q63_t * pResult); - - void csky_power_int32( - int32_t * pSrc, - uint32_t blockSize, - q63_t * pResult); - - void csky_power_int32( - int32_t * pSrc, - uint32_t blockSize, - q63_t * pResult); - - void csky_power_f32( - float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult); - - void csky_power_q15( - q15_t * pSrc, - uint32_t blockSize, - q63_t * pResult); - - void csky_power_q7( - q7_t * pSrc, - uint32_t blockSize, - q31_t * pResult); - - void csky_mean_q7( - q7_t * pSrc, - uint32_t blockSize, - q7_t * pResult); - - void csky_mean_q15( - q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult); - - void csky_mean_q31( - q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult); - - void csky_mean_f32( - float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult); - - void csky_var_f32( - float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult); - - void csky_var_q31( - q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult); - - void csky_var_q15( - q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult); - - void csky_rms_f32( - float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult); - - void csky_rms_q31( - q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult); - - void csky_rms_q15( - q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult); - - void csky_std_f32( - float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult); - - void csky_std_q31( - q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult); - - void csky_std_q15( - q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult); - - void csky_cmplx_mag_f32( - float32_t * pSrc, - float32_t * pDst, - uint32_t numSamples); - - void csky_cmplx_mag_q31( - q31_t * pSrc, - q31_t * pDst, - uint32_t numSamples); - - void csky_cmplx_mag_q15( - q15_t * pSrc, - q15_t * pDst, - uint32_t numSamples); - - void csky_cmplx_dot_prod_q15( - q15_t * pSrcA, - q15_t * pSrcB, - uint32_t numSamples, - q31_t * realResult, - q31_t * imagResult); - - void csky_cmplx_dot_prod_q31( - q31_t * pSrcA, - q31_t * pSrcB, - uint32_t numSamples, - q63_t * realResult, - q63_t * imagResult); - - void csky_cmplx_dot_prod_f32( - float32_t * pSrcA, - float32_t * pSrcB, - uint32_t numSamples, - float32_t * realResult, - float32_t * imagResult); - - void csky_cmplx_mult_real_q15( - q15_t * pSrcCmplx, - q15_t * pSrcReal, - q15_t * pCmplxDst, - uint32_t numSamples); - - void csky_cmplx_mult_real_q31( - q31_t * pSrcCmplx, - q31_t * pSrcReal, - q31_t * pCmplxDst, - uint32_t numSamples); - - void csky_cmplx_mult_real_f32( - float32_t * pSrcCmplx, - float32_t * pSrcReal, - float32_t * pCmplxDst, - uint32_t numSamples); - - void csky_min_q7( - q7_t * pSrc, - uint32_t blockSize, - q7_t * result, - uint32_t * index); - - void csky_min_q15( - q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult, - uint32_t * pIndex); - - void csky_min_q31( - q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult, - uint32_t * pIndex); - - void csky_min_f32( - float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult, - uint32_t * pIndex); - - void csky_max_q7( - q7_t * pSrc, - uint32_t blockSize, - q7_t * pResult, - uint32_t * pIndex); - - void csky_max_q15( - q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult, - uint32_t * pIndex); - - void csky_max_q31( - q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult, - uint32_t * pIndex); - - void csky_max_f32( - float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult, - uint32_t * pIndex); - - void csky_cmplx_mult_cmplx_q15( - q15_t * pSrcA, - q15_t * pSrcB, - q15_t * pDst, - uint32_t numSamples); - - void csky_cmplx_mult_cmplx_q31( - q31_t * pSrcA, - q31_t * pSrcB, - q31_t * pDst, - uint32_t numSamples); - - void csky_cmplx_mult_cmplx_f32( - float32_t * pSrcA, - float32_t * pSrcB, - float32_t * pDst, - uint32_t numSamples); - - void csky_cmplx_mult_cmplx_re_q15( - q15_t * pSrcA, - q15_t * pSrcB, - q15_t * pDst, - uint32_t numSamples); - - void csky_cmplx_mult_cmplx_re_q31( - q31_t * pSrcA, - q31_t * pSrcB, - q31_t * pDst, - uint32_t numSamples); - - void csky_cmplx_mult_cmplx_re_f32( - float32_t * pSrcA, - float32_t * pSrcB, - float32_t * pDst, - uint32_t numSamples); - - - void csky_float_to_q31( - float32_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_float_to_q15( - float32_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_float_to_q7( - float32_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - void csky_q31_to_q15( - q31_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_q31_to_q7( - q31_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - void csky_q15_to_float( - q15_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - void csky_q15_to_q31( - q15_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_q15_to_q7( - q15_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - -/** - * @ingroup groupInterpolation - */ -/** - * @defgroup BilinearInterpolate Bilinear Interpolation - * - * Bilinear interpolation is an extension of linear interpolation applied to a two dimensional grid. - * The underlying function f(x, y) is sampled on a regular grid and the interpolation process - * determines values between the grid points. - * Bilinear interpolation is equivalent to two step linear interpolation, first in the x-dimension and then in the y-dimension. - * Bilinear interpolation is often used in image processing to rescale images. - * The CSI DSP library provides bilinear interpolation functions for Q7, Q15, Q31, and floating-point data types. - * - * Algorithm - * \par - * The instance structure used by the bilinear interpolation functions describes a two dimensional data table. - * For floating-point, the instance structure is defined as: - *
- *   typedef struct
- *   {
- *     uint16_t numRows;
- *     uint16_t numCols;
- *     float32_t *pData;
- * } csky_bilinear_interp_instance_f32;
- * 
- * - * \par - * where numRows specifies the number of rows in the table; - * numCols specifies the number of columns in the table; - * and pData points to an array of size numRows*numCols values. - * The data table pTable is organized in row order and the supplied data values fall on integer indexes. - * That is, table element (x,y) is located at pTable[x + y*numCols] where x and y are integers. - * - * \par - * Let (x, y) specify the desired interpolation point. Then define: - *
- *     XF = floor(x)
- *     YF = floor(y)
- * 
- * \par - * The interpolated output point is computed as: - *
- *  f(x, y) = f(XF, YF) * (1-(x-XF)) * (1-(y-YF))
- *           + f(XF+1, YF) * (x-XF)*(1-(y-YF))
- *           + f(XF, YF+1) * (1-(x-XF))*(y-YF)
- *           + f(XF+1, YF+1) * (x-XF)*(y-YF)
- * 
- * Note that the coordinates (x, y) contain integer and fractional components. - * The integer components specify which portion of the table to use while the - * fractional components control the interpolation processor. - * - * \par - * if (x,y) are outside of the table boundary, Bilinear interpolation returns zero output. - */ -/** - * @addtogroup BilinearInterpolate - * @{ - */ -/** -* -* @brief Floating-point bilinear interpolation. -* @param[in,out] S points to an instance of the interpolation structure. -* @param[in] X interpolation coordinate. -* @param[in] Y interpolation coordinate. -* @return out interpolated value. -*/ -__ALWAYS_STATIC_INLINE float32_t csky_bilinear_interp_f32( -const csky_bilinear_interp_instance_f32 * S, -float32_t X, -float32_t Y) -{ - float32_t out; - float32_t f00, f01, f10, f11; - float32_t *pData = S->pData; - int32_t xIndex, yIndex, index; - float32_t xdiff, ydiff; - float32_t b1, b2, b3, b4; - xIndex = (int32_t) X; - yIndex = (int32_t) Y; - /* Care taken for table outside boundary */ - /* Returns zero output when values are outside table boundary */ - if(xIndex < 0 || xIndex > (S->numRows - 1) || yIndex < 0 || yIndex > (S->numCols - 1)) - { - return (0); - } - /* Calculation of index for two nearest points in X-direction */ - index = (xIndex - 1) + (yIndex - 1) * S->numCols; - /* Read two nearest points in X-direction */ - f00 = pData[index]; - f01 = pData[index + 1]; - /* Calculation of index for two nearest points in Y-direction */ - index = (xIndex - 1) + (yIndex) * S->numCols; - /* Read two nearest points in Y-direction */ - f10 = pData[index]; - f11 = pData[index + 1]; - /* Calculation of intermediate values */ - b1 = f00; - b2 = f01 - f00; - b3 = f10 - f00; - b4 = f00 - f01 - f10 + f11; - /* Calculation of fractional part in X */ - xdiff = X - xIndex; - /* Calculation of fractional part in Y */ - ydiff = Y - yIndex; - /* Calculation of bi-linear interpolated output */ - out = b1 + b2 * xdiff + b3 * ydiff + b4 * xdiff * ydiff; - /* return to application */ - return (out); -} -/** - * @} - */ // end of BilinearInterpolate group - -/** - * @addtogroup BilinearInterpolate - * @{ - */ -/** -* -* @brief Q31 bilinear interpolation. -* @param[in,out] S points to an instance of the interpolation structure. -* @param[in] X interpolation coordinate in 12.20 format. -* @param[in] Y interpolation coordinate in 12.20 format. -* @return out interpolated value. -*/ -__ALWAYS_STATIC_INLINE q31_t csky_bilinear_interp_q31( -csky_bilinear_interp_instance_q31 * S, -q31_t X, -q31_t Y) -{ - q31_t out; /* Temporary output */ - q31_t acc = 0; /* output */ - q31_t xfract, yfract; /* X, Y fractional parts */ - q31_t x1, x2, y1, y2; /* Nearest output values */ - int32_t rI, cI; /* Row and column indices */ - q31_t *pYData = S->pData; /* pointer to output table values */ - uint32_t nCols = S->numCols; /* num of rows */ - /* Input is in 12.20 format */ - /* 12 bits for the table index */ - /* Index value calculation */ - rI = ((X & (q31_t)0xFFF00000) >> 20); - /* Input is in 12.20 format */ - /* 12 bits for the table index */ - /* Index value calculation */ - cI = ((Y & (q31_t)0xFFF00000) >> 20); - /* Care taken for table outside boundary */ - /* Returns zero output when values are outside table boundary */ - if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1)) - { - return (0); - } - /* 20 bits for the fractional part */ - /* shift left xfract by 11 to keep 1.31 format */ - xfract = (X & 0x000FFFFF) << 11u; - /* Read two nearest output values from the index */ - x1 = pYData[(rI) + (int32_t)nCols * (cI) ]; - x2 = pYData[(rI) + (int32_t)nCols * (cI) + 1]; - /* 20 bits for the fractional part */ - /* shift left yfract by 11 to keep 1.31 format */ - yfract = (Y & 0x000FFFFF) << 11u; - /* Read two nearest output values from the index */ - y1 = pYData[(rI) + (int32_t)nCols * (cI + 1) ]; - y2 = pYData[(rI) + (int32_t)nCols * (cI + 1) + 1]; -#ifdef CSKY_SIMD - /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 3.29(q29) format */ - out = mult_32x32_keep32(x1, (0x7FFFFFFF - xfract)); - acc = mult_32x32_keep32(out, (0x7FFFFFFF - yfract)); - /* x2 * (xfract) * (1-yfract) in 3.29(q29) and adding to acc */ - out = mult_32x32_keep32(x2, (0x7FFFFFFF - yfract)); - acc = multAcc_32x32_keep32(acc, out, xfract); - /* y1 * (1 - xfract) * (yfract) in 3.29(q29) and adding to acc */ - out = mult_32x32_keep32(y1, (0x7FFFFFFF - xfract)); - acc = multAcc_32x32_keep32(acc, out, yfract); - /* y2 * (xfract) * (yfract) in 3.29(q29) and adding to acc */ - out = mult_32x32_keep32(y2, xfract); - acc = multAcc_32x32_keep32(acc, out, yfract); -#else - /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 3.29(q29) format */ - out = ((q31_t) (((q63_t) x1 * (0x7FFFFFFF - xfract)) >> 32)); - acc = ((q31_t) (((q63_t) out * (0x7FFFFFFF - yfract)) >> 32)); - /* x2 * (xfract) * (1-yfract) in 3.29(q29) and adding to acc */ - out = ((q31_t) ((q63_t) x2 * (0x7FFFFFFF - yfract) >> 32)); - acc += ((q31_t) ((q63_t) out * (xfract) >> 32)); - /* y1 * (1 - xfract) * (yfract) in 3.29(q29) and adding to acc */ - out = ((q31_t) ((q63_t) y1 * (0x7FFFFFFF - xfract) >> 32)); - acc += ((q31_t) ((q63_t) out * (yfract) >> 32)); - /* y2 * (xfract) * (yfract) in 3.29(q29) and adding to acc */ - out = ((q31_t) ((q63_t) y2 * (xfract) >> 32)); - acc += ((q31_t) ((q63_t) out * (yfract) >> 32)); -#endif - /* Convert acc to 1.31(q31) format */ - return ((q31_t)(acc << 2)); -} -/** - * @} - */ // end of BilinearInterpolate group - -/** - * @addtogroup BilinearInterpolate - * @{ - */ -/** -* @brief Q15 bilinear interpolation. -* @param[in,out] S points to an instance of the interpolation structure. -* @param[in] X interpolation coordinate in 12.20 format. -* @param[in] Y interpolation coordinate in 12.20 format. -* @return out interpolated value. -*/ -__ALWAYS_STATIC_INLINE q15_t csky_bilinear_interp_q15( -csky_bilinear_interp_instance_q15 * S, -q31_t X, -q31_t Y) -{ - q63_t acc = 0; /* output */ - q31_t out; /* Temporary output */ - q15_t x1, x2, y1, y2; /* Nearest output values */ - q31_t xfract, yfract; /* X, Y fractional parts */ - int32_t rI, cI; /* Row and column indices */ - q15_t *pYData = S->pData; /* pointer to output table values */ - uint32_t nCols = S->numCols; /* num of rows */ - /* Input is in 12.20 format */ - /* 12 bits for the table index */ - /* Index value calculation */ - rI = ((X & (q31_t)0xFFF00000) >> 20); - /* Input is in 12.20 format */ - /* 12 bits for the table index */ - /* Index value calculation */ - cI = ((Y & (q31_t)0xFFF00000) >> 20); - /* Care taken for table outside boundary */ - /* Returns zero output when values are outside table boundary */ - if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1)) - { - return (0); - } - /* 20 bits for the fractional part */ - /* xfract should be in 12.20 format */ - xfract = (X & 0x000FFFFF); - /* Read two nearest output values from the index */ - x1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) ]; - x2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) + 1]; - /* 20 bits for the fractional part */ - /* yfract should be in 12.20 format */ - yfract = (Y & 0x000FFFFF); - /* Read two nearest output values from the index */ - y1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) ]; - y2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) + 1]; - /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 13.51 format */ - /* x1 is in 1.15(q15), xfract in 12.20 format and out is in 13.35 format */ - /* convert 13.35 to 13.31 by right shifting and out is in 1.31 */ -#ifdef CSKY_SIMD - out = mult_32x32_dext_4(x1, (0xFFFFF - xfract)); - acc = mult_32x32_keep64(out, (0xFFFFF - yfract)); - /* x2 * (xfract) * (1-yfract) in 1.51 and adding to acc */ - out = mult_32x32_dext_4(x2, (0xFFFFF - yfract)); - acc = multAcc_32x32_keep64(acc, out, (xfract)); - /* y1 * (1 - xfract) * (yfract) in 1.51 and adding to acc */ - out = mult_32x32_dext_4(y1, (0xFFFFF - xfract)); - acc = multAcc_32x32_keep64(acc, out, (yfract)); - /* y2 * (xfract) * (yfract) in 1.51 and adding to acc */ - out = mult_32x32_dext_4(y2, (xfract)); - acc = multAcc_32x32_keep64(acc, out, (yfract)); -#else - out = (q31_t) (((q63_t) x1 * (0xFFFFF - xfract)) >> 4u); - acc = ((q63_t) out * (0xFFFFF - yfract)); - /* x2 * (xfract) * (1-yfract) in 1.51 and adding to acc */ - out = (q31_t) (((q63_t) x2 * (0xFFFFF - yfract)) >> 4u); - acc += ((q63_t) out * (xfract)); - /* y1 * (1 - xfract) * (yfract) in 1.51 and adding to acc */ - out = (q31_t) (((q63_t) y1 * (0xFFFFF - xfract)) >> 4u); - acc += ((q63_t) out * (yfract)); - /* y2 * (xfract) * (yfract) in 1.51 and adding to acc */ - out = (q31_t) (((q63_t) y2 * (xfract)) >> 4u); - acc += ((q63_t) out * (yfract)); -#endif - /* acc is in 13.51 format and down shift acc by 36 times */ - /* Convert out to 1.15 format */ - return ((q15_t)(acc >> 36)); -} -/** - * @} - */ // end of BilinearInterpolate group - -/** - * @addtogroup BilinearInterpolate - * @{ - */ -/** -* @brief Q7 bilinear interpolation. -* @param[in,out] S points to an instance of the interpolation structure. -* @param[in] X interpolation coordinate in 12.20 format. -* @param[in] Y interpolation coordinate in 12.20 format. -* @return out interpolated value. -*/ -__ALWAYS_STATIC_INLINE q7_t csky_bilinear_interp_q7( -csky_bilinear_interp_instance_q7 * S, -q31_t X, -q31_t Y) -{ - q63_t acc = 0; /* output */ - q31_t out; /* Temporary output */ - q31_t xfract, yfract; /* X, Y fractional parts */ - q7_t x1, x2, y1, y2; /* Nearest output values */ - int32_t rI, cI; /* Row and column indices */ - q7_t *pYData = S->pData; /* pointer to output table values */ - uint32_t nCols = S->numCols; /* num of rows */ - /* Input is in 12.20 format */ - /* 12 bits for the table index */ - /* Index value calculation */ - rI = ((X & (q31_t)0xFFF00000) >> 20); - /* Input is in 12.20 format */ - /* 12 bits for the table index */ - /* Index value calculation */ - cI = ((Y & (q31_t)0xFFF00000) >> 20); - /* Care taken for table outside boundary */ - /* Returns zero output when values are outside table boundary */ - if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1)) - { - return (0); - } - /* 20 bits for the fractional part */ - /* xfract should be in 12.20 format */ - xfract = (X & (q31_t)0x000FFFFF); - /* Read two nearest output values from the index */ - x1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) ]; - x2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) + 1]; - /* 20 bits for the fractional part */ - /* yfract should be in 12.20 format */ - yfract = (Y & (q31_t)0x000FFFFF); - /* Read two nearest output values from the index */ - y1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) ]; - y2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) + 1]; - /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 16.47 format */ - out = ((x1 * (0xFFFFF - xfract))); -#ifdef CSKY_SIMD - acc = multAcc_32x32_keep64(acc, out, (0xFFFFF - yfract)); - /* x2 * (xfract) * (1-yfract) in 2.22 and adding to acc */ - out = ((x2 * (0xFFFFF - yfract))); - acc = multAcc_32x32_keep64(acc, out, xfract); - /* y1 * (1 - xfract) * (yfract) in 2.22 and adding to acc */ - out = ((y1 * (0xFFFFF - xfract))); - acc = multAcc_32x32_keep64(acc, out, yfract); - /* y2 * (xfract) * (yfract) in 2.22 and adding to acc */ - out = ((y2 * (yfract))); - acc = multAcc_32x32_keep64(acc, out, xfract); -#else - acc = (((q63_t) out * (0xFFFFF - yfract))); - /* x2 * (xfract) * (1-yfract) in 2.22 and adding to acc */ - out = ((x2 * (0xFFFFF - yfract))); - acc += (((q63_t) out * (xfract))); - /* y1 * (1 - xfract) * (yfract) in 2.22 and adding to acc */ - out = ((y1 * (0xFFFFF - xfract))); - acc += (((q63_t) out * (yfract))); - /* y2 * (xfract) * (yfract) in 2.22 and adding to acc */ - out = ((y2 * (yfract))); - acc += (((q63_t) out * (xfract))); -#endif - /* acc in 16.47 format and down shift by 40 to convert to 1.7 format */ - return ((q7_t)(acc >> 40)); -} -/** - * @} - */ // end of BilinearInterpolate group - -/** - * @ingroup groupMath - */ - -/** - * @defgroup ShiftRight Right Shift - * - * Shift the input value to right with appointed bits, its basic format is: - *
- *     a = (a) >> (shift),   1 =< shift <= bitof(a) - 1.
- * 
- * The basic format is only designed for q31. - * - * and the extended format should be rounding to +inf: - *
- *     a = (a + (1<<(shift - 1)) >> (shift),   1 =< shift <= bitof(a) - 1.
- * 
- * - * which are designed for q31, q31 positive and q63. - */ - -/** - * @addtogroup ShiftRight - * @{ - */ -/** - * @brief right shift Q31 version - * @param[in] a input value to be shift. - * @param[in] shift input positive value, the number of bits to be shift. - * @param[out] result the shifted a. - * - * Scaling and Overflow Behavior: - * \par - * The function is only used for right shift. So, the value of shift is - * between[1,31]. - */ - __ALWAYS_STATIC_INLINE q31_t csky_shr_q31( - q31_t a, - q31_t shift) -{ - q31_t res; -#ifdef CSKY_SIMD - __ASM volatile( - "asr %0, %1, %2\n\t" - :"=r"(res), "=r"(a),"=r"(shift):"0"(res), "1"(a), "2"(shift)); -#else - res = ((a) >> (shift)); -#endif - return res; -} - -#define SHR(a, shift) csky_shr_q31(a, shift) - -/** - * @} - */ // end of ShiftRight group - - -/** - * @addtogroup ShiftRight - * @{ - */ -/** - * @brief right shift Q31 version - * @param[in] a input value to be shift. - * @param[in] shift input positive value, the number of bits to be shift. - * @param[out] result the shifted a. - * - * Scaling and Overflow Behavior: - * \par - * The function is only used for right shift. So, the value of shift is - * between[1,31]. And the output value is rounding to +inf. - */ - __ALWAYS_STATIC_INLINE q31_t csky_pshr_q31( - q31_t a, - q31_t shift) -{ - q31_t res; -#ifdef CSKY_SIMD - __ASM volatile( - "asr.s32.r %0, %1, %2\n\t" - :"=r"(res), "=r"(a),"=r"(shift):"0"(res), "1"(a), "2"(shift)); -#else - res = (a >= 0?(SHR((a) + (1<<(shift - 1)), shift))\ - :(SHR((a) + ((1<>1) -1, shift))); -#endif - return res; -} - -/** - * @} - */ // end of ShiftRight group - - -/** - * @addtogroup ShiftRight - * @{ - */ -/** - * @brief right shift Q31 version - * @param[in] a input positive value to be shift. - * @param[in] shift input positive value, the number of bits to be shift. - * @param[out] result the shifted a. - * - * Scaling and Overflow Behavior: - * \par - * The function is only used for right shift. So, the value of shift is - * between[1,31]. And the output value is rounding to +inf. - */ - __ALWAYS_STATIC_INLINE q31_t csky_pshr_pos_q31( - q31_t a, - q31_t shift) -{ - q31_t res; -#ifdef CSKY_SIMD - __ASM volatile( - "asr.s32.r %0, %1, %2\n\t" - :"=r"(res), "=r"(a),"=r"(shift):"0"(res), "1"(a), "2"(shift)); -#else - res = SHR((a) + (1<<(shift - 1)), shift); -#endif - return res; -} - -/** - * @} - */ // end of ShiftRight group - - -/** - * @addtogroup ShiftRight - * @{ - */ -/** - * @brief right shift Q63 version - * @param[in] a input value to be shift. - * @param[in] shift input positive value, the number of bits to be shift. - * @param[out] result the shifted a. - * - * Scaling and Overflow Behavior: - * \par - * The function is only used for right shift. So, the value of shift is - * between[1,63]. And the output value is rounding to +inf. - */ - __ALWAYS_STATIC_INLINE q63_t csky_pshr_q63( - q63_t a, - q31_t shift) -{ - q63_t res; -#ifdef CSKY_SIMD - __ASM volatile( - "subi t0, %2, 1\n\t" - "cmphsi t0, 32\n\t" - "bt 1f\n\t" - "movi t1, 1\n\t" - "lsl t0, t1, t0\n\t" - "movi t1, 0\n\t" - "add.s64.s %1, %1, t0\n\t" - "dext %0, %1, %R1, %2\n\t" - "asr %R0, %R1, %2\n\t" - "br 2f\n\t" - "1:\n\t" - "subi %2, %2, 32\n\t" - "subi t0, t0, 32\n\t" - "movi t1, 1\n\t" - "lsl t1, t1, t0\n\t" - "add.s32.s %R1, %R1, t1\n\t" - "asr %0, %R1, %2\n\t" - "asri %R0, %R1, 31\n\t" - "2:\n\t" - :"=r"(res), "=r"(a),"=r"(shift):"0"(res), "1"(a), "2"(shift):"t0", "t1"); -#else - res = (a >= 0?(SHR((a) + ((q63_t)1<<(shift - 1)), shift))\ - :(SHR((a) + (((q63_t)1<>1) -1, shift))); -#endif - return res; -} - -/** - * @} - */ // end of ShiftRight group - -//#define SHR(a, shift) csky_shr_q31(a, shift) -#define PSHR(a, shift) csky_pshr_q31(a, shift) -#define PSHR_POSITIVE(a, shift) csky_pshr_pos_q31(a, shift) -#define PSHR64(a, shift) csky_pshr_q63(a, shift) - - -#ifdef CSKY_SIMD -#else -/* SMMLAR */ -#define multAcc_32x32_keep32_R(a, x, y) \ - a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32) - -/* SMMLSR */ -#define multSub_32x32_keep32_R(a, x, y) \ - a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32) - -/* SMMULR */ -#define mult_32x32_keep32_R(a, x, y) \ - a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32) - -/* SMMLA */ -#define multAcc_32x32_keep32(a, x, y) \ - a += (q31_t) (((q63_t) x * y) >> 32) - -/* SMMLS */ -#define multSub_32x32_keep32(a, x, y) \ - a -= (q31_t) (((q63_t) x * y) >> 32) - -/* SMMUL */ -#define mult_32x32_keep32(a, x, y) \ - a = (q31_t) (((q63_t) x * y ) >> 32) -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* _CSKY_MATH_H */ - -/** - * - * End of file. - */ diff --git a/include/include_xt800/dsp_include/csky_vdsp2_math.h b/include/include_xt800/dsp_include/csky_vdsp2_math.h deleted file mode 100644 index 55ced0f4..00000000 --- a/include/include_xt800/dsp_include/csky_vdsp2_math.h +++ /dev/null @@ -1,2378 +0,0 @@ -/* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/****************************************************************************** - * @file csky_vdsp2_math.h - * @brief Public header file for CSI DSP Library. - * @version V1.0 - * @date 20. Dec 2016 - ******************************************************************************/ - -#ifndef _CSKY_VDSP2_MATH_H -#define _CSKY_VDSP2_MATH_H - -#include -#include - -#ifdef CSKY_VDSP2_MATH_DSP -#include "csi_core.h" -#endif - -#ifdef __cplusplus -extern "C" -{ -#endif - /** - * @brief 8-bit fractional data type in 1.7 format. - */ - typedef int8_t q7_t; - - /** - * @brief 16-bit fractional data type in 1.15 format. - */ - typedef int16_t q15_t; - - /** - * @brief 32-bit fractional data type in 1.31 format. - */ - typedef int32_t q31_t; - - /** - * @brief 64-bit fractional data type in 1.63 format. - */ - typedef int64_t q63_t; - - /** - * @brief 32-bit floating-point type definition. - */ - typedef float float32_t; - - /** - * @brief Error status returned by some functions in the library. - */ - - typedef enum - { - CSKY_VDSP2_MATH_SUCCESS = 0, /**< No error */ - CSKY_VDSP2_MATH_ARGUMENT_ERROR = -1, /**< One or more arguments are incorrect */ - CSKY_VDSP2_MATH_LENGTH_ERROR = -2, /**< Length of data buffer is incorrect */ - CSKY_VDSP2_MATH_SIZE_MISMATCH = -3, /**< Size of matrices is not compatible with the operation. */ - CSKY_VDSP2_MATH_NANINF = -4, /**< Not-a-number (NaN) or infinity is generated */ - CSKY_VDSP2_MATH_SINGULAR = -5, /**< Generated by matrix inversion if the input matrix is singular and cannot be inverted. */ - CSKY_VDSP2_MATH_TEST_FAILURE = -6 /**< Test Failed */ - } csky_vdsp2_status; - - /** - * @brief Instance structure for the Q7 FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of filter coefficients in the filter. */ - q7_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q7_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - } csky_vdsp2_fir_instance_q7; - - /** - * @brief Instance structure for the Q15 FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of filter coefficients in the filter. */ - q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - } csky_vdsp2_fir_instance_q15; - - /** - * @brief Instance structure for the Q31 FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of filter coefficients in the filter. */ - q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - } csky_vdsp2_fir_instance_q31; - - /** - * @brief Instance structure for the floating-point FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of filter coefficients in the filter. */ - float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - } csky_vdsp2_fir_instance_f32; - - void csky_vdsp2_fir_q7( - const csky_vdsp2_fir_instance_q7 * S, - q7_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_fir_init_q7( - csky_vdsp2_fir_instance_q7 * S, - uint16_t numTaps, - q7_t * pCoeffs, - q7_t * pState, - uint32_t blockSize); - - void csky_vdsp2_fir_q15( - const csky_vdsp2_fir_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_fir_fast_q15( - const csky_vdsp2_fir_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - csky_vdsp2_status csky_vdsp2_fir_init_q15( - csky_vdsp2_fir_instance_q15 * S, - uint16_t numTaps, - q15_t * pCoeffs, - q15_t * pState, - uint32_t blockSize); - - void csky_vdsp2_fir_q31( - const csky_vdsp2_fir_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_fir_fast_q31( - const csky_vdsp2_fir_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_fir_init_q31( - csky_vdsp2_fir_instance_q31 * S, - uint16_t numTaps, - q31_t * pCoeffs, - q31_t * pState, - uint32_t blockSize); - - void csky_vdsp2_fir_f32( - const csky_vdsp2_fir_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_fir_init_f32( - csky_vdsp2_fir_instance_f32 * S, - uint16_t numTaps, - float32_t * pCoeffs, - float32_t * pState, - uint32_t blockSize); - - - /** - * @brief Instance structure for the Q15 Biquad cascade filter. - */ - typedef struct - { - int8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - q15_t *pState; /**< Points to the array of state coefficients. The array is of length 4*numStages. */ - q15_t *pCoeffs; /**< Points to the array of coefficients. The array is of length 5*numStages. */ - int8_t postShift; /**< Additional shift, in bits, applied to each output sample. */ - } csky_vdsp2_biquad_casd_df1_inst_q15; - - /** - * @brief Instance structure for the Q31 Biquad cascade filter. - */ - typedef struct - { - uint32_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - q31_t *pState; /**< Points to the array of state coefficients. The array is of length 4*numStages. */ - q31_t *pCoeffs; /**< Points to the array of coefficients. The array is of length 5*numStages. */ - uint8_t postShift; /**< Additional shift, in bits, applied to each output sample. */ - } csky_vdsp2_biquad_casd_df1_inst_q31; - - /** - * @brief Instance structure for the Q31 Biquad cascade filter. - */ - - /** - * @brief Instance structure for the floating-point Biquad cascade filter. - */ - typedef struct - { - uint32_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - float32_t *pState; /**< Points to the array of state coefficients. The array is of length 4*numStages. */ - float32_t *pCoeffs; /**< Points to the array of coefficients. The array is of length 5*numStages. */ - } csky_vdsp2_biquad_casd_df1_inst_f32; - - void csky_vdsp2_biquad_cascade_df1_q15( - const csky_vdsp2_biquad_casd_df1_inst_q15 * S, - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_biquad_cascade_df1_init_q15( - csky_vdsp2_biquad_casd_df1_inst_q15 * S, - uint8_t numStages, - q15_t * pCoeffs, - q15_t * pState, - int8_t postShift); - - void csky_vdsp2_biquad_cascade_df1_fast_q15( - const csky_vdsp2_biquad_casd_df1_inst_q15 * S, - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_biquad_cascade_df1_q31( - const csky_vdsp2_biquad_casd_df1_inst_q31 * S, - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_biquad_cascade_df1_fast_q31( - const csky_vdsp2_biquad_casd_df1_inst_q31 * S, - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_biquad_cascade_df1_init_q31( - csky_vdsp2_biquad_casd_df1_inst_q31 * S, - uint8_t numStages, - q31_t * pCoeffs, - q31_t * pState, - int8_t postShift); - - void csky_vdsp2_biquad_cascade_df1_f32( - const csky_vdsp2_biquad_casd_df1_inst_f32 * S, - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_biquad_cascade_df1_init_f32( - csky_vdsp2_biquad_casd_df1_inst_f32 * S, - uint8_t numStages, - float32_t * pCoeffs, - float32_t * pState); - - - /** - * @brief Instance structure for the floating-point matrix structure. - */ - typedef struct - { - uint16_t numRows; /**< number of rows of the matrix. */ - uint16_t numCols; /**< number of columns of the matrix. */ - float32_t *pData; /**< points to the data of the matrix. */ - } csky_vdsp2_matrix_instance_f32; - - /** - * @brief Instance structure for the Q15 matrix structure. - */ - typedef struct - { - uint16_t numRows; /**< number of rows of the matrix. */ - uint16_t numCols; /**< number of columns of the matrix. */ - q15_t *pData; /**< points to the data of the matrix. */ - } csky_vdsp2_matrix_instance_q15; - - /** - * @brief Instance structure for the Q31 matrix structure. - */ - typedef struct - { - uint16_t numRows; /**< number of rows of the matrix. */ - uint16_t numCols; /**< number of columns of the matrix. */ - q31_t *pData; /**< points to the data of the matrix. */ - } csky_vdsp2_matrix_instance_q31; - - csky_vdsp2_status csky_vdsp2_mat_add_f32( - const csky_vdsp2_matrix_instance_f32 * pSrcA, - const csky_vdsp2_matrix_instance_f32 * pSrcB, - csky_vdsp2_matrix_instance_f32 * pDst); - - csky_vdsp2_status csky_vdsp2_mat_add_q15( - const csky_vdsp2_matrix_instance_q15 * pSrcA, - const csky_vdsp2_matrix_instance_q15 * pSrcB, - csky_vdsp2_matrix_instance_q15 * pDst); - - csky_vdsp2_status csky_vdsp2_mat_add_q31( - const csky_vdsp2_matrix_instance_q31 * pSrcA, - const csky_vdsp2_matrix_instance_q31 * pSrcB, - csky_vdsp2_matrix_instance_q31 * pDst); - - csky_vdsp2_status csky_vdsp2_mat_cmplx_mult_f32( - const csky_vdsp2_matrix_instance_f32 * pSrcA, - const csky_vdsp2_matrix_instance_f32 * pSrcB, - csky_vdsp2_matrix_instance_f32 * pDst); - - csky_vdsp2_status csky_vdsp2_mat_cmplx_mult_q15( - const csky_vdsp2_matrix_instance_q15 * pSrcA, - const csky_vdsp2_matrix_instance_q15 * pSrcB, - csky_vdsp2_matrix_instance_q15 * pDst); - - csky_vdsp2_status csky_vdsp2_mat_cmplx_mult_q31( - const csky_vdsp2_matrix_instance_q31 * pSrcA, - const csky_vdsp2_matrix_instance_q31 * pSrcB, - csky_vdsp2_matrix_instance_q31 * pDst); - - csky_vdsp2_status csky_vdsp2_mat_trans_f32( - const csky_vdsp2_matrix_instance_f32 * pSrc, - csky_vdsp2_matrix_instance_f32 * pDst); - - csky_vdsp2_status csky_vdsp2_mat_trans_q15( - const csky_vdsp2_matrix_instance_q15 * pSrc, - csky_vdsp2_matrix_instance_q15 * pDst); - - csky_vdsp2_status csky_vdsp2_mat_trans_q31( - const csky_vdsp2_matrix_instance_q31 * pSrc, - csky_vdsp2_matrix_instance_q31 * pDst); - - csky_vdsp2_status csky_vdsp2_mat_mult_f32( - const csky_vdsp2_matrix_instance_f32 * pSrcA, - const csky_vdsp2_matrix_instance_f32 * pSrcB, - csky_vdsp2_matrix_instance_f32 * pDst); - - csky_vdsp2_status csky_vdsp2_mat_mult_trans_f32( - const csky_vdsp2_matrix_instance_f32 * pSrcA, - const csky_vdsp2_matrix_instance_f32 * pSrcB, - csky_vdsp2_matrix_instance_f32 * pDst); - - csky_vdsp2_status csky_vdsp2_mat_mult_q15( - const csky_vdsp2_matrix_instance_q15 * pSrcA, - const csky_vdsp2_matrix_instance_q15 * pSrcB, - csky_vdsp2_matrix_instance_q15 * pDst); - - csky_vdsp2_status csky_vdsp2_mat_mult_trans_q15( - const csky_vdsp2_matrix_instance_q15 * pSrcA, - const csky_vdsp2_matrix_instance_q15 * pSrcB, - csky_vdsp2_matrix_instance_q15 * pDst); - - csky_vdsp2_status csky_vdsp2_mat_mult_q31( - const csky_vdsp2_matrix_instance_q31 * pSrcA, - const csky_vdsp2_matrix_instance_q31 * pSrcB, - csky_vdsp2_matrix_instance_q31 * pDst); - - csky_vdsp2_status csky_vdsp2_mat_mult_trans_q31( - const csky_vdsp2_matrix_instance_q31 * pSrcA, - const csky_vdsp2_matrix_instance_q31 * pSrcB, - csky_vdsp2_matrix_instance_q31 * pDst); - - csky_vdsp2_status csky_vdsp2_mat_sub_f32( - const csky_vdsp2_matrix_instance_f32 * pSrcA, - const csky_vdsp2_matrix_instance_f32 * pSrcB, - csky_vdsp2_matrix_instance_f32 * pDst); - - csky_vdsp2_status csky_vdsp2_mat_sub_q15( - const csky_vdsp2_matrix_instance_q15 * pSrcA, - const csky_vdsp2_matrix_instance_q15 * pSrcB, - csky_vdsp2_matrix_instance_q15 * pDst); - - csky_vdsp2_status csky_vdsp2_mat_sub_q31( - const csky_vdsp2_matrix_instance_q31 * pSrcA, - const csky_vdsp2_matrix_instance_q31 * pSrcB, - csky_vdsp2_matrix_instance_q31 * pDst); - - csky_vdsp2_status csky_vdsp2_mat_scale_f32( - const csky_vdsp2_matrix_instance_f32 * pSrc, - float32_t scale, - csky_vdsp2_matrix_instance_f32 * pDst); - - csky_vdsp2_status csky_vdsp2_mat_scale_q15( - const csky_vdsp2_matrix_instance_q15 * pSrc, - q15_t scaleFract, - int32_t shift, - csky_vdsp2_matrix_instance_q15 * pDst); - - csky_vdsp2_status csky_vdsp2_mat_scale_q31( - const csky_vdsp2_matrix_instance_q31 * pSrc, - q31_t scaleFract, - int32_t shift, - csky_vdsp2_matrix_instance_q31 * pDst); - - void csky_vdsp2_mat_init_q31( - csky_vdsp2_matrix_instance_q31 * S, - uint16_t nRows, - uint16_t nColumns, - q31_t * pData); - - void csky_vdsp2_mat_init_q15( - csky_vdsp2_matrix_instance_q15 * S, - uint16_t nRows, - uint16_t nColumns, - q15_t * pData); - - void csky_vdsp2_mat_init_f32( - csky_vdsp2_matrix_instance_f32 * S, - uint16_t nRows, - uint16_t nColumns, - float32_t * pData); - - void csky_vdsp2_mult_q15xq31_sht( - q15_t * pSrcA, - q31_t * pSrcB, - uint32_t shiftValue, - uint32_t blockSize); - - void csky_vdsp2_mult_q7( - q7_t * pSrcA, - q7_t * pSrcB, - q7_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_mult_q15( - q15_t * pSrcA, - q15_t * pSrcB, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_mult_rnd_q15( - q15_t * pSrcA, - q15_t * pSrcB, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_mult_q31( - q31_t * pSrcA, - q31_t * pSrcB, - q31_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_mult_f32( - float32_t * pSrcA, - float32_t * pSrcB, - float32_t * pDst, - uint32_t blockSize); - - - /** - * @brief Instance structure for the Q15 CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - q15_t *pTwiddle; /**< points to the Sin twiddle factor table. */ - uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ - } csky_vdsp2_cfft_radix2_instance_q15; - - /** - * @brief Instance structure for the Q15 CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - q15_t *pTwiddle; /**< points to the twiddle factor table. */ - uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ - } csky_vdsp2_cfft_radix4_instance_q15; - - /** - * @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - q31_t *pTwiddle; /**< points to the Twiddle factor table. */ - uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ - } csky_vdsp2_cfft_radix2_instance_q31; - - /** - * @brief Instance structure for the Q31 CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - q31_t *pTwiddle; /**< points to the twiddle factor table. */ - uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ - } csky_vdsp2_cfft_radix4_instance_q31; - - /** - * @brief Instance structure for the floating-point CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - float32_t *pTwiddle; /**< points to the Twiddle factor table. */ - uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ - float32_t onebyfftLen; /**< value of 1/fftLen. */ - } csky_vdsp2_cfft_radix2_instance_f32; - - /** - * @brief Instance structure for the floating-point CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */ - uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */ - float32_t *pTwiddle; /**< points to the Twiddle factor table. */ - uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */ - float32_t onebyfftLen; /**< value of 1/fftLen. */ - } csky_vdsp2_cfft_radix4_instance_f32; - - /** - * @brief Instance structure for the fixed-point CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - const q15_t *pTwiddle; /**< points to the Twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t bitRevLength; /**< bit reversal table length. */ - } csky_vdsp2_cfft_instance_q15; - -void csky_vdsp2_cfft_q15( - const csky_vdsp2_cfft_instance_q15 * S, - q15_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - -void csky_vdsp2_cfft_fast_q15( - const csky_vdsp2_cfft_instance_q15 * S, - q15_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - - /** - * @brief Instance structure for the fixed-point CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - const q31_t *pTwiddle; /**< points to the Twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t bitRevLength; /**< bit reversal table length. */ - } csky_vdsp2_cfft_instance_q31; - -void csky_vdsp2_cfft_q31( - const csky_vdsp2_cfft_instance_q31 * S, - q31_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - -void csky_vdsp2_cfft_fast_q31( - const csky_vdsp2_cfft_instance_q31 * S, - q31_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - - /** - * @brief Instance structure for the floating-point CFFT/CIFFT function. - */ - typedef struct - { - uint16_t fftLen; /**< length of the FFT. */ - const float32_t *pTwiddle; /**< points to the Twiddle factor table. */ - const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ - uint16_t bitRevLength; /**< bit reversal table length. */ - } csky_vdsp2_cfft_instance_f32; - - void csky_vdsp2_cfft_f32( - const csky_vdsp2_cfft_instance_f32 * S, - float32_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag); - - /** - * @brief Instance structure for the Q15 RFFT/RIFFT function. - */ - typedef struct - { - uint32_t fftLenReal; /**< length of the real FFT. */ - uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */ - uint8_t bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */ - uint32_t twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - q15_t *pTwiddleAReal; /**< points to the real twiddle factor table. */ - const csky_vdsp2_cfft_instance_q15 *pCfft; /**< points to the complex FFT instance. */ - } csky_vdsp2_rfft_instance_q15; - - typedef struct - { - uint32_t fftLenReal; /**< length of the real FFT. */ - uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */ - uint8_t bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */ - q15_t *pTwiddleAReal; /**< points to the A real twiddle factor table. */ - q15_t *pTwiddleBReal; /**< points to the B real twiddle factor table. */ - const csky_vdsp2_cfft_instance_q15 *pCfft; /**< points to the complex FFT instance. */ - } csky_vdsp2_rfft_fast_instance_q15; - - csky_vdsp2_status csky_vdsp2_rfft_init_q15( - csky_vdsp2_rfft_instance_q15 * S, - uint32_t fftLenReal, - uint32_t ifftFlagR, - uint32_t bitReverseFlag); - - void csky_vdsp2_rfft_q15( - const csky_vdsp2_rfft_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst); - - void csky_vdsp2_rfft_fast_q15( - const csky_vdsp2_rfft_fast_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst); - - /** - * @brief Instance structure for the Q31 RFFT/RIFFT function. - */ - typedef struct - { - uint32_t fftLenReal; /**< length of the real FFT. */ - uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */ - uint8_t bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */ - uint32_t twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - q31_t *pTwiddleAReal; /**< points to the real twiddle factor table. */ - const csky_vdsp2_cfft_instance_q31 *pCfft; /**< points to the complex FFT instance. */ - } csky_vdsp2_rfft_instance_q31; - - typedef struct - { - uint32_t fftLenReal; /**< length of the real FFT. */ - uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */ - uint8_t bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */ - q31_t *pTwiddleAReal; /**< points to the A real twiddle factor table. */ - q31_t *pTwiddleBReal; /**< points to the B real twiddle factor table. */ - const csky_vdsp2_cfft_instance_q31 *pCfft; /**< points to the complex FFT instance. */ - } csky_vdsp2_rfft_fast_instance_q31; - - csky_vdsp2_status csky_vdsp2_rfft_init_q31( - csky_vdsp2_rfft_instance_q31 * S, - uint32_t fftLenReal, - uint32_t ifftFlagR, - uint32_t bitReverseFlag); - - void csky_vdsp2_rfft_q31( - const csky_vdsp2_rfft_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst); - - void csky_vdsp2_rfft_fast_q31( - const csky_vdsp2_rfft_fast_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst); - - /** - * @brief Instance structure for the floating-point RFFT/RIFFT function. - */ - typedef struct - { - uint32_t fftLenReal; /**< length of the real FFT. */ - uint16_t fftLenBy2; /**< length of the complex FFT. */ - uint8_t ifftFlagR; /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */ - uint8_t bitReverseFlagR; /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */ - uint32_t twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ - float32_t *pTwiddleAReal; /**< points to the real twiddle factor table. */ - float32_t *pTwiddleBReal; /**< points to the imag twiddle factor table. */ - csky_vdsp2_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */ - } csky_vdsp2_rfft_instance_f32; - - csky_vdsp2_status csky_vdsp2_rfft_init_f32( - csky_vdsp2_rfft_instance_f32 * S, - csky_vdsp2_cfft_radix4_instance_f32 * S_CFFT, - uint32_t fftLenReal, - uint32_t ifftFlagR, - uint32_t bitReverseFlag); - - void csky_vdsp2_cfft_radix4_f32( - const csky_vdsp2_cfft_instance_f32 * S, - float32_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag, - float32_t onebyfftLen); - - void csky_vdsp2_cfft_fast_radix4_f32( - const csky_vdsp2_cfft_instance_f32 * S, - float32_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag, - float32_t onebyfftLen); - - void csky_vdsp2_cfft_radix2_f32( - const csky_vdsp2_cfft_instance_f32 * S, - float32_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag, - float32_t onebyfftLen); - - void csky_vdsp2_rfft_f32( - const csky_vdsp2_rfft_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst); - - /** - * @brief Instance structure for the floating-point RFFT/RIFFT function. - */ -typedef struct - { - csky_vdsp2_cfft_instance_f32 Sint; /**< Internal CFFT structure. */ - uint16_t fftLenRFFT; /**< length of the real sequence */ - float32_t * pTwiddleRFFT; /**< Twiddle factors real stage */ - } csky_vdsp2_rfft_fast_instance_f32 ; - -csky_vdsp2_status csky_vdsp2_rfft_fast_init_f32 ( - csky_vdsp2_rfft_fast_instance_f32 * S, - uint16_t fftLen); - -void csky_vdsp2_rfft_fast_f32( - csky_vdsp2_rfft_fast_instance_f32 * S, - float32_t * p, float32_t * pOut, - uint8_t ifftFlag); - - /** - * @brief Instance structure for the floating-point DCT4/IDCT4 function. - */ - typedef struct - { - uint16_t N; /**< length of the DCT4. */ - uint16_t Nby2; /**< half of the length of the DCT4. */ - float32_t normalize; /**< normalizing factor. */ - float32_t *pTwiddle; /**< points to the twiddle factor table. */ - float32_t *pCosFactor; /**< points to the cosFactor table. */ - csky_vdsp2_rfft_fast_instance_f32 *pRfft; /**< points to the real FFT fast instance. */ - csky_vdsp2_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */ - } csky_vdsp2_dct4_instance_f32; - - csky_vdsp2_status csky_vdsp2_dct4_init_f32( - csky_vdsp2_dct4_instance_f32 * S, - csky_vdsp2_rfft_fast_instance_f32 * S_RFFT, - csky_vdsp2_cfft_radix4_instance_f32 * S_CFFT, - uint16_t N, - uint16_t Nby2, - float32_t normalize); - - void csky_vdsp2_dct4_f32( - const csky_vdsp2_dct4_instance_f32 * S, - float32_t * pState, - float32_t * pInlineBuffer); - - - /** - * @brief Instance structure for the Q31 DCT4/IDCT4 function. - */ - typedef struct - { - uint16_t N; /**< length of the DCT4. */ - uint16_t Nby2; /**< half of the length of the DCT4. */ - q31_t normalize; /**< normalizing factor. */ - q31_t *pTwiddle; /**< points to the twiddle factor table. */ - q31_t *pCosFactor; /**< points to the cosFactor table. */ - csky_vdsp2_rfft_instance_q31 *pRfft; /**< points to the real FFT instance. */ - csky_vdsp2_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */ - } csky_vdsp2_dct4_instance_q31; - - typedef struct - { - uint16_t N; /**< length of the DCT4. */ - uint16_t Nby2; /**< half of the length of the DCT4. */ - q31_t normalize; /**< normalizing factor. */ - q31_t *pTwiddle; /**< points to the twiddle factor table. */ - q31_t *pCosFactor; /**< points to the cosFactor table. */ - csky_vdsp2_rfft_fast_instance_q31 *pRfft; /**< points to the real FFT instance. */ - csky_vdsp2_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */ - } csky_vdsp2_dct4_fast_instance_q31; - - csky_vdsp2_status csky_vdsp2_dct4_init_q31( - csky_vdsp2_dct4_instance_q31 * S, - csky_vdsp2_rfft_instance_q31 * S_RFFT, - csky_vdsp2_cfft_radix4_instance_q31 * S_CFFT, - uint16_t N, - uint16_t Nby2, - q31_t normalize); - - void csky_vdsp2_dct4_q31( - const csky_vdsp2_dct4_instance_q31 * S, - q31_t * pState, - q31_t * pInlineBuffer); - - void csky_vdsp2_dct4_fast_q31( - const csky_vdsp2_dct4_fast_instance_q31 * S, - q31_t * pState, - q31_t * pInlineBuffer); - - /** - * @brief Instance structure for the Q15 DCT4/IDCT4 function. - */ - typedef struct - { - uint16_t N; /**< length of the DCT4. */ - uint16_t Nby2; /**< half of the length of the DCT4. */ - q15_t normalize; /**< normalizing factor. */ - q15_t *pTwiddle; /**< points to the twiddle factor table. */ - q15_t *pCosFactor; /**< points to the cosFactor table. */ - csky_vdsp2_rfft_instance_q15 *pRfft; /**< points to the real FFT instance. */ - csky_vdsp2_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */ - } csky_vdsp2_dct4_instance_q15; - - typedef struct - { - uint16_t N; /**< length of the DCT4. */ - uint16_t Nby2; /**< half of the length of the DCT4. */ - q15_t normalize; /**< normalizing factor. */ - q15_t *pTwiddle; /**< points to the twiddle factor table. */ - q15_t *pCosFactor; /**< points to the cosFactor table. */ - csky_vdsp2_rfft_fast_instance_q15 *pRfft; /**< points to the real FFT instance. */ - csky_vdsp2_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */ - } csky_vdsp2_dct4_fast_instance_q15; - - csky_vdsp2_status csky_vdsp2_dct4_init_q15( - csky_vdsp2_dct4_instance_q15 * S, - csky_vdsp2_rfft_instance_q15 * S_RFFT, - csky_vdsp2_cfft_radix4_instance_q15 * S_CFFT, - uint16_t N, - uint16_t Nby2, - q15_t normalize); - - void csky_vdsp2_dct4_q15( - const csky_vdsp2_dct4_instance_q15 * S, - q15_t * pState, - q15_t * pInlineBuffer); - - void csky_vdsp2_dct4_fast_q15( - const csky_vdsp2_dct4_fast_instance_q15 * S, - q15_t * pState, - q15_t * pInlineBuffer); - - void csky_vdsp2_add_f32( - float32_t * pSrcA, - float32_t * pSrcB, - float32_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_add_q7( - q7_t * pSrcA, - q7_t * pSrcB, - q7_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_add_q15( - q15_t * pSrcA, - q15_t * pSrcB, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_add_q31( - q31_t * pSrcA, - q31_t * pSrcB, - q31_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_sub_f32( - float32_t * pSrcA, - float32_t * pSrcB, - float32_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_sub_q7( - q7_t * pSrcA, - q7_t * pSrcB, - q7_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_sub_q15( - q15_t * pSrcA, - q15_t * pSrcB, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_sub_q31( - q31_t * pSrcA, - q31_t * pSrcB, - q31_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_sum_q15( - q15_t * pSrcA, - q63_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_scale_f32( - float32_t * pSrc, - float32_t scale, - float32_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_scale_q7( - q7_t * pSrc, - q7_t scaleFract, - int8_t shift, - q7_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_scale_q15( - q15_t * pSrc, - q15_t scaleFract, - int8_t shift, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_scale_q31( - q31_t * pSrc, - q31_t scaleFract, - int8_t shift, - q31_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_abs_q7( - q7_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_abs_f32( - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_abs_q15( - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_abs_q31( - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_abs_max_q15( - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_abs_max_q31( - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - - void csky_vdsp2_dot_prod_f32( - float32_t * pSrcA, - float32_t * pSrcB, - uint32_t blockSize, - float32_t * result); - - void csky_vdsp2_dot_prod_q7( - q7_t * pSrcA, - q7_t * pSrcB, - uint32_t blockSize, - q31_t * result); - - void csky_vdsp2_dot_prod_q15( - q15_t * pSrcA, - q15_t * pSrcB, - uint32_t blockSize, - q63_t * result); - - void csky_vdsp2_dot_prod_q31( - q31_t * pSrcA, - q31_t * pSrcB, - uint32_t blockSize, - q63_t * result); - - void csky_vdsp2_dot_prod_u64xu8( - uint8_t * pSrcA, - uint64_t * pSrcB, - uint32_t blockSize, - uint64_t * result); - - void csky_vdsp2_shift_q7( - q7_t * pSrc, - int8_t shiftBits, - q7_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_shift_q15( - q15_t * pSrc, - int8_t shiftBits, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_shift_q31( - q31_t * pSrc, - int8_t shiftBits, - q31_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_offset_f32( - float32_t * pSrc, - float32_t offset, - float32_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_offset_q7( - q7_t * pSrc, - q7_t offset, - q7_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_offset_q15( - q15_t * pSrc, - q15_t offset, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_offset_q31( - q31_t * pSrc, - q31_t offset, - q31_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_negate_f32( - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_negate_q7( - q7_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_negate_q15( - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_negate_q31( - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_copy_f32( - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_copy_q7( - q7_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_copy_q15( - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_copy_q31( - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_fill_f32( - float32_t value, - float32_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_fill_q7( - q7_t value, - q7_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_fill_q15( - q15_t value, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_fill_q31( - q31_t value, - q31_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_conv_f32( - float32_t * pSrcA, - uint32_t srcALen, - float32_t * pSrcB, - uint32_t srcBLen, - float32_t * pDst); - - void csky_vdsp2_conv_opt_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - q15_t * pScratch1, - q15_t * pScratch2); - - void csky_vdsp2_conv_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst); - - void csky_vdsp2_conv_fast_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst); - - void csky_vdsp2_conv_fast_opt_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - q15_t * pScratch1, - q15_t * pScratch2); - - void csky_vdsp2_conv_q31( - q31_t * pSrcA, - uint32_t srcALen, - q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst); - - void csky_vdsp2_conv_fast_q31( - q31_t * pSrcA, - uint32_t srcALen, - q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst); - - void csky_vdsp2_conv_opt_q7( - q7_t * pSrcA, - uint32_t srcALen, - q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst, - q15_t * pScratch1, - q15_t * pScratch2); - - void csky_vdsp2_conv_q7( - q7_t * pSrcA, - uint32_t srcALen, - q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst); - - csky_vdsp2_status csky_vdsp2_conv_partial_f32( - float32_t * pSrcA, - uint32_t srcALen, - float32_t * pSrcB, - uint32_t srcBLen, - float32_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - csky_vdsp2_status csky_vdsp2_conv_partial_opt_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - uint32_t firstIndex, - uint32_t numPoints, - q15_t * pScratch1, - q15_t * pScratch2); - - csky_vdsp2_status csky_vdsp2_conv_partial_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - csky_vdsp2_status csky_vdsp2_conv_partial_fast_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - csky_vdsp2_status csky_vdsp2_conv_partial_fast_opt_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - uint32_t firstIndex, - uint32_t numPoints, - q15_t * pScratch1, - q15_t * pScratch2); - - csky_vdsp2_status csky_vdsp2_conv_partial_q31( - q31_t * pSrcA, - uint32_t srcALen, - q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - csky_vdsp2_status csky_vdsp2_conv_partial_fast_q31( - q31_t * pSrcA, - uint32_t srcALen, - q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - csky_vdsp2_status csky_vdsp2_conv_partial_opt_q7( - q7_t * pSrcA, - uint32_t srcALen, - q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst, - uint32_t firstIndex, - uint32_t numPoints, - q15_t * pScratch1, - q15_t * pScratch2); - - csky_vdsp2_status csky_vdsp2_conv_partial_q7( - q7_t * pSrcA, - uint32_t srcALen, - q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst, - uint32_t firstIndex, - uint32_t numPoints); - - /** - * @brief Instance structure for the Q15 FIR decimator. - */ - typedef struct - { - uint8_t M; /**< decimation factor. */ - uint16_t numTaps; /**< number of coefficients in the filter. */ - q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - } csky_vdsp2_fir_decimate_instance_q15; - - /** - * @brief Instance structure for the Q31 FIR decimator. - */ - typedef struct - { - uint8_t M; /**< decimation factor. */ - uint16_t numTaps; /**< number of coefficients in the filter. */ - q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - } csky_vdsp2_fir_decimate_instance_q31; - - /** - * @brief Instance structure for the floating-point FIR decimator. - */ - typedef struct - { - uint8_t M; /**< decimation factor. */ - uint16_t numTaps; /**< number of coefficients in the filter. */ - float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - } csky_vdsp2_fir_decimate_instance_f32; - - void csky_vdsp2_fir_decimate_f32( - const csky_vdsp2_fir_decimate_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - csky_vdsp2_status csky_vdsp2_fir_decimate_init_f32( - csky_vdsp2_fir_decimate_instance_f32 * S, - uint16_t numTaps, - uint8_t M, - float32_t * pCoeffs, - float32_t * pState, - uint32_t blockSize); - - void csky_vdsp2_fir_decimate_q15( - const csky_vdsp2_fir_decimate_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_fir_decimate_fast_q15( - const csky_vdsp2_fir_decimate_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - csky_vdsp2_status csky_vdsp2_fir_decimate_init_q15( - csky_vdsp2_fir_decimate_instance_q15 * S, - uint16_t numTaps, - uint8_t M, - q15_t * pCoeffs, - q15_t * pState, - uint32_t blockSize); - - void csky_vdsp2_fir_decimate_q31( - const csky_vdsp2_fir_decimate_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_fir_decimate_fast_q31( - csky_vdsp2_fir_decimate_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - csky_vdsp2_status csky_vdsp2_fir_decimate_init_q31( - csky_vdsp2_fir_decimate_instance_q31 * S, - uint16_t numTaps, - uint8_t M, - q31_t * pCoeffs, - q31_t * pState, - uint32_t blockSize); - - - /** - * @brief Instance structure for the Q15 FIR interpolator. - */ - typedef struct - { - uint8_t L; /**< upsample factor. */ - uint16_t phaseLength; /**< length of each polyphase filter component. */ - q15_t *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */ - q15_t *pState; /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */ - } csky_vdsp2_fir_interpolate_instance_q15; - - /** - * @brief Instance structure for the Q31 FIR interpolator. - */ - typedef struct - { - uint8_t L; /**< upsample factor. */ - uint16_t phaseLength; /**< length of each polyphase filter component. */ - q31_t *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */ - q31_t *pState; /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */ - } csky_vdsp2_fir_interpolate_instance_q31; - - /** - * @brief Instance structure for the floating-point FIR interpolator. - */ - typedef struct - { - uint8_t L; /**< upsample factor. */ - uint16_t phaseLength; /**< length of each polyphase filter component. */ - float32_t *pCoeffs; /**< points to the coefficient array. The array is of length L*phaseLength. */ - float32_t *pState; /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */ - } csky_vdsp2_fir_interpolate_instance_f32; - - void csky_vdsp2_fir_interpolate_q15( - const csky_vdsp2_fir_interpolate_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - csky_vdsp2_status csky_vdsp2_fir_interpolate_init_q15( - csky_vdsp2_fir_interpolate_instance_q15 * S, - uint8_t L, - uint16_t numTaps, - q15_t * pCoeffs, - q15_t * pState, - uint32_t blockSize); - - void csky_vdsp2_fir_interpolate_q31( - const csky_vdsp2_fir_interpolate_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - csky_vdsp2_status csky_vdsp2_fir_interpolate_init_q31( - csky_vdsp2_fir_interpolate_instance_q31 * S, - uint8_t L, - uint16_t numTaps, - q31_t * pCoeffs, - q31_t * pState, - uint32_t blockSize); - - void csky_vdsp2_fir_interpolate_f32( - const csky_vdsp2_fir_interpolate_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - csky_vdsp2_status csky_vdsp2_fir_interpolate_init_f32( - csky_vdsp2_fir_interpolate_instance_f32 * S, - uint8_t L, - uint16_t numTaps, - float32_t * pCoeffs, - float32_t * pState, - uint32_t blockSize); - - /** - * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter. - */ - typedef struct - { - uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - float32_t *pState; /**< points to the array of state coefficients. The array is of length 2*numStages. */ - float32_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */ - } csky_vdsp2_biquad_cascade_df2T_instance_f32; - - /** - * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter. - */ - typedef struct - { - uint8_t numStages; /**< number of 2nd order stages in the filter. Overall order is 2*numStages. */ - float32_t *pState; /**< points to the array of state coefficients. The array is of length 4*numStages. */ - float32_t *pCoeffs; /**< points to the array of coefficients. The array is of length 5*numStages. */ - } csky_vdsp2_biquad_cascade_stereo_df2T_instance_f32; - - void csky_vdsp2_biquad_cascade_df2T_f32( - const csky_vdsp2_biquad_cascade_df2T_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_biquad_cascade_stereo_df2T_f32( - const csky_vdsp2_biquad_cascade_stereo_df2T_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_biquad_cascade_df2T_init_f32( - csky_vdsp2_biquad_cascade_df2T_instance_f32 * S, - uint8_t numStages, - float32_t * pCoeffs, - float32_t * pState); - - void csky_vdsp2_biquad_cascade_stereo_df2T_init_f32( - csky_vdsp2_biquad_cascade_stereo_df2T_instance_f32 * S, - uint8_t numStages, - float32_t * pCoeffs, - float32_t * pState); - - /** - * @brief Instance structure for the Q15 FIR lattice filter. - */ - typedef struct - { - uint16_t numStages; /**< number of filter stages. */ - q15_t *pState; /**< points to the state variable array. The array is of length numStages. */ - q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */ - } csky_vdsp2_fir_lattice_instance_q15; - - /** - * @brief Instance structure for the Q31 FIR lattice filter. - */ - typedef struct - { - uint16_t numStages; /**< number of filter stages. */ - q31_t *pState; /**< points to the state variable array. The array is of length numStages. */ - q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */ - } csky_vdsp2_fir_lattice_instance_q31; - - /** - * @brief Instance structure for the floating-point FIR lattice filter. - */ - typedef struct - { - uint16_t numStages; /**< number of filter stages. */ - float32_t *pState; /**< points to the state variable array. The array is of length numStages. */ - float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numStages. */ - } csky_vdsp2_fir_lattice_instance_f32; - - void csky_vdsp2_fir_lattice_init_q15( - csky_vdsp2_fir_lattice_instance_q15 * S, - uint16_t numStages, - q15_t * pCoeffs, - q15_t * pState); - - void csky_vdsp2_fir_lattice_q15( - const csky_vdsp2_fir_lattice_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_fir_lattice_init_q31( - csky_vdsp2_fir_lattice_instance_q31 * S, - uint16_t numStages, - q31_t * pCoeffs, - q31_t * pState); - - void csky_vdsp2_fir_lattice_q31( - const csky_vdsp2_fir_lattice_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_fir_lattice_init_f32( - csky_vdsp2_fir_lattice_instance_f32 * S, - uint16_t numStages, - float32_t * pCoeffs, - float32_t * pState); - - void csky_vdsp2_fir_lattice_f32( - const csky_vdsp2_fir_lattice_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - - /** - * @brief Instance structure for the Q15 IIR lattice filter. - */ - typedef struct - { - uint16_t numStages; /**< number of stages in the filter. */ - q15_t *pState; /**< points to the state variable array. The array is of length numStages+blockSize. */ - q15_t *pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */ - q15_t *pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */ - } csky_vdsp2_iir_lattice_instance_q15; - - /** - * @brief Instance structure for the Q31 IIR lattice filter. - */ - typedef struct - { - uint16_t numStages; /**< number of stages in the filter. */ - q31_t *pState; /**< points to the state variable array. The array is of length numStages+blockSize. */ - q31_t *pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */ - q31_t *pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */ - } csky_vdsp2_iir_lattice_instance_q31; - - /** - * @brief Instance structure for the floating-point IIR lattice filter. - */ - typedef struct - { - uint16_t numStages; /**< number of stages in the filter. */ - float32_t *pState; /**< points to the state variable array. The array is of length numStages+blockSize. */ - float32_t *pkCoeffs; /**< points to the reflection coefficient array. The array is of length numStages. */ - float32_t *pvCoeffs; /**< points to the ladder coefficient array. The array is of length numStages+1. */ - } csky_vdsp2_iir_lattice_instance_f32; - - void csky_vdsp2_iir_lattice_f32( - const csky_vdsp2_iir_lattice_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_iir_lattice_init_f32( - csky_vdsp2_iir_lattice_instance_f32 * S, - uint16_t numStages, - float32_t * pkCoeffs, - float32_t * pvCoeffs, - float32_t * pState, - uint32_t blockSize); - - void csky_vdsp2_iir_lattice_q31( - const csky_vdsp2_iir_lattice_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_iir_lattice_init_q31( - csky_vdsp2_iir_lattice_instance_q31 * S, - uint16_t numStages, - q31_t * pkCoeffs, - q31_t * pvCoeffs, - q31_t * pState, - uint32_t blockSize); - - void csky_vdsp2_iir_lattice_q15( - const csky_vdsp2_iir_lattice_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_iir_lattice_init_q15( - csky_vdsp2_iir_lattice_instance_q15 * S, - uint16_t numStages, - q15_t * pkCoeffs, - q15_t * pvCoeffs, - q15_t * pState, - uint32_t blockSize); - - - /** - * @brief Instance structure for the floating-point LMS filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - float32_t mu; /**< step size that controls filter coefficient updates. */ - } csky_vdsp2_lms_instance_f32; - - void csky_vdsp2_lms_f32( - const csky_vdsp2_lms_instance_f32 * S, - float32_t * pSrc, - float32_t * pRef, - float32_t * pOut, - float32_t * pErr, - uint32_t blockSize); - - void csky_vdsp2_lms_init_f32( - csky_vdsp2_lms_instance_f32 * S, - uint16_t numTaps, - float32_t * pCoeffs, - float32_t * pState, - float32_t mu, - uint32_t blockSize); - - - /** - * @brief Instance structure for the Q15 LMS filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - q15_t mu; /**< step size that controls filter coefficient updates. */ - uint32_t postShift; /**< bit shift applied to coefficients. */ - } csky_vdsp2_lms_instance_q15; - - void csky_vdsp2_lms_init_q15( - csky_vdsp2_lms_instance_q15 * S, - uint16_t numTaps, - q15_t * pCoeffs, - q15_t * pState, - q15_t mu, - uint32_t blockSize, - uint32_t postShift); - - void csky_vdsp2_lms_q15( - const csky_vdsp2_lms_instance_q15 * S, - q15_t * pSrc, - q15_t * pRef, - q15_t * pOut, - q15_t * pErr, - uint32_t blockSize); - - - /** - * @brief Instance structure for the Q31 LMS filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - q31_t mu; /**< step size that controls filter coefficient updates. */ - uint32_t postShift; /**< bit shift applied to coefficients. */ - } csky_vdsp2_lms_instance_q31; - - void csky_vdsp2_lms_q31( - const csky_vdsp2_lms_instance_q31 * S, - q31_t * pSrc, - q31_t * pRef, - q31_t * pOut, - q31_t * pErr, - uint32_t blockSize); - - void csky_vdsp2_lms_init_q31( - csky_vdsp2_lms_instance_q31 * S, - uint16_t numTaps, - q31_t * pCoeffs, - q31_t * pState, - q31_t mu, - uint32_t blockSize, - uint32_t postShift); - - - /** - * @brief Instance structure for the floating-point normalized LMS filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - float32_t mu; /**< step size that control filter coefficient updates. */ - float32_t energy; /**< saves previous frame energy. */ - float32_t x0; /**< saves previous input sample. */ - } csky_vdsp2_lms_norm_instance_f32; - - void csky_vdsp2_lms_norm_f32( - csky_vdsp2_lms_norm_instance_f32 * S, - float32_t * pSrc, - float32_t * pRef, - float32_t * pOut, - float32_t * pErr, - uint32_t blockSize); - - void csky_vdsp2_lms_norm_init_f32( - csky_vdsp2_lms_norm_instance_f32 * S, - uint16_t numTaps, - float32_t * pCoeffs, - float32_t * pState, - float32_t mu, - uint32_t blockSize); - - - /** - * @brief Instance structure for the Q31 normalized LMS filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - q31_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - q31_t mu; /**< step size that controls filter coefficient updates. */ - uint8_t postShift; /**< bit shift applied to coefficients. */ - q31_t *recipTable; /**< points to the reciprocal initial value table. */ - q31_t energy; /**< saves previous frame energy. */ - q31_t x0; /**< saves previous input sample. */ - } csky_vdsp2_lms_norm_instance_q31; - - void csky_vdsp2_lms_norm_q31( - csky_vdsp2_lms_norm_instance_q31 * S, - q31_t * pSrc, - q31_t * pRef, - q31_t * pOut, - q31_t * pErr, - uint32_t blockSize); - - void csky_vdsp2_lms_norm_init_q31( - csky_vdsp2_lms_norm_instance_q31 * S, - uint16_t numTaps, - q31_t * pCoeffs, - q31_t * pState, - q31_t mu, - uint32_t blockSize, - uint8_t postShift); - - - /** - * @brief Instance structure for the Q15 normalized LMS filter. - */ - typedef struct - { - uint16_t numTaps; /**< Number of coefficients in the filter. */ - q15_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ - q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ - q15_t mu; /**< step size that controls filter coefficient updates. */ - uint8_t postShift; /**< bit shift applied to coefficients. */ - q15_t *recipTable; /**< Points to the reciprocal initial value table. */ - q15_t energy; /**< saves previous frame energy. */ - q15_t x0; /**< saves previous input sample. */ - } csky_vdsp2_lms_norm_instance_q15; - - void csky_vdsp2_lms_norm_q15( - csky_vdsp2_lms_norm_instance_q15 * S, - q15_t * pSrc, - q15_t * pRef, - q15_t * pOut, - q15_t * pErr, - uint32_t blockSize); - - void csky_vdsp2_lms_norm_init_q15( - csky_vdsp2_lms_norm_instance_q15 * S, - uint16_t numTaps, - q15_t * pCoeffs, - q15_t * pState, - q15_t mu, - uint32_t blockSize, - uint8_t postShift); - - void csky_vdsp2_correlate_f32( - float32_t * pSrcA, - uint32_t srcALen, - float32_t * pSrcB, - uint32_t srcBLen, - float32_t * pDst); - - void csky_vdsp2_correlate_opt_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - q15_t * pScratch); - - void csky_vdsp2_correlate_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst); - - void csky_vdsp2_correlate_fast_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst); - - void csky_vdsp2_correlate_fast_opt_q15( - q15_t * pSrcA, - uint32_t srcALen, - q15_t * pSrcB, - uint32_t srcBLen, - q15_t * pDst, - q15_t * pScratch); - - void csky_vdsp2_correlate_q31( - q31_t * pSrcA, - uint32_t srcALen, - q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst); - - void csky_vdsp2_correlate_fast_q31( - q31_t * pSrcA, - uint32_t srcALen, - q31_t * pSrcB, - uint32_t srcBLen, - q31_t * pDst); - - void csky_vdsp2_correlate_opt_q7( - q7_t * pSrcA, - uint32_t srcALen, - q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst, - q15_t * pScratch1, - q15_t * pScratch2); - - void csky_vdsp2_correlate_q7( - q7_t * pSrcA, - uint32_t srcALen, - q7_t * pSrcB, - uint32_t srcBLen, - q7_t * pDst); - - - /** - * @brief Instance structure for the floating-point sparse FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */ - float32_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */ - float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */ - int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */ - } csky_vdsp2_fir_sparse_instance_f32; - - /** - * @brief Instance structure for the Q31 sparse FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */ - q31_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */ - q31_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */ - int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */ - } csky_vdsp2_fir_sparse_instance_q31; - - /** - * @brief Instance structure for the Q15 sparse FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */ - q15_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */ - q15_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */ - int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */ - } csky_vdsp2_fir_sparse_instance_q15; - - /** - * @brief Instance structure for the Q7 sparse FIR filter. - */ - typedef struct - { - uint16_t numTaps; /**< number of coefficients in the filter. */ - uint16_t stateIndex; /**< state buffer index. Points to the oldest sample in the state buffer. */ - q7_t *pState; /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */ - q7_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps.*/ - uint16_t maxDelay; /**< maximum offset specified by the pTapDelay array. */ - int32_t *pTapDelay; /**< points to the array of delay values. The array is of length numTaps. */ - } csky_vdsp2_fir_sparse_instance_q7; - - void csky_vdsp2_fir_sparse_f32( - csky_vdsp2_fir_sparse_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst, - float32_t * pScratchIn, - uint32_t blockSize); - - void csky_vdsp2_fir_sparse_init_f32( - csky_vdsp2_fir_sparse_instance_f32 * S, - uint16_t numTaps, - float32_t * pCoeffs, - float32_t * pState, - int32_t * pTapDelay, - uint16_t maxDelay, - uint32_t blockSize); - - void csky_vdsp2_fir_sparse_q31( - csky_vdsp2_fir_sparse_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst, - q31_t * pScratchIn, - uint32_t blockSize); - - void csky_vdsp2_fir_sparse_init_q31( - csky_vdsp2_fir_sparse_instance_q31 * S, - uint16_t numTaps, - q31_t * pCoeffs, - q31_t * pState, - int32_t * pTapDelay, - uint16_t maxDelay, - uint32_t blockSize); - - void csky_vdsp2_fir_sparse_q15( - csky_vdsp2_fir_sparse_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst, - q15_t * pScratchIn, - q31_t * pScratchOut, - uint32_t blockSize); - - void csky_vdsp2_fir_sparse_init_q15( - csky_vdsp2_fir_sparse_instance_q15 * S, - uint16_t numTaps, - q15_t * pCoeffs, - q15_t * pState, - int32_t * pTapDelay, - uint16_t maxDelay, - uint32_t blockSize); - - void csky_vdsp2_fir_sparse_q7( - csky_vdsp2_fir_sparse_instance_q7 * S, - q7_t * pSrc, - q7_t * pDst, - q7_t * pScratchIn, - q31_t * pScratchOut, - uint32_t blockSize); - - void csky_vdsp2_fir_sparse_init_q7( - csky_vdsp2_fir_sparse_instance_q7 * S, - uint16_t numTaps, - q7_t * pCoeffs, - q7_t * pState, - int32_t * pTapDelay, - uint16_t maxDelay, - uint32_t blockSize); - - void csky_vdsp2_sin_cos_f32( - float32_t theta, - float32_t * pSinVal, - float32_t * pCosVal); - - void csky_vdsp2_sin_cos_q31( - q31_t theta, - q31_t * pSinVal, - q31_t * pCosVal); - - void csky_vdsp2_cmplx_conj_f32( - float32_t * pSrc, - float32_t * pDst, - uint32_t numSamples); - - void csky_vdsp2_cmplx_conj_q31( - q31_t * pSrc, - q31_t * pDst, - uint32_t numSamples); - - void csky_vdsp2_cmplx_conj_q15( - q15_t * pSrc, - q15_t * pDst, - uint32_t numSamples); - - void csky_vdsp2_cmplx_mag_squared_f32( - float32_t * pSrc, - float32_t * pDst, - uint32_t numSamples); - - void csky_vdsp2_cmplx_mag_squared_q31( - q31_t * pSrc, - q31_t * pDst, - uint32_t numSamples); - - void csky_vdsp2_cmplx_mag_squared_q31_basic( - q31_t * pSrc, - q63_t * pDst, - uint32_t numSamples); - - void csky_vdsp2_cmplx_mag_squared_q15( - q15_t * pSrc, - q15_t * pDst, - uint32_t numSamples); - - void csky_vdsp2_vsqrt_f32( - float32_t * pSrc, - float32_t * pDst, - uint32_t numSamples); - - void csky_vdsp2_vsqrt_q31( - q31_t * pSrc, - q31_t * pDst, - uint32_t numSamples); - - void csky_vdsp2_vsqrt_q15( - q15_t * pSrc, - q15_t * pDst, - uint32_t numSamples); - - void csky_vdsp2_vsqrt_q7( - q7_t * pSrc, - q7_t * pDst, - uint32_t numSamples); - - void csky_vdsp2_q7_to_q31( - q7_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_q7_to_q15( - q7_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_q7_to_float( - q7_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_q31_to_float( - q31_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - csky_vdsp2_status csky_vdsp2_sqrt_f32( - float32_t in, - float32_t * pOut); - - csky_vdsp2_status csky_vdsp2_sqrt_q31( - q31_t in, - q31_t * pOut); - - csky_vdsp2_status csky_vdsp2_sqrt_q15( - q15_t in, - q15_t * pOut); - - void csky_vdsp2_power_q31( - q31_t * pSrc, - uint32_t blockSize, - q63_t * pResult); - - void csky_vdsp2_power_int32( - int32_t * pSrc, - uint32_t blockSize, - q63_t * pResult); - - void csky_vdsp2_power_int32( - int32_t * pSrc, - uint32_t blockSize, - q63_t * pResult); - - void csky_vdsp2_power_f32( - float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult); - - void csky_vdsp2_power_q15( - q15_t * pSrc, - uint32_t blockSize, - q63_t * pResult); - - void csky_vdsp2_power_q7( - q7_t * pSrc, - uint32_t blockSize, - q31_t * pResult); - - void csky_vdsp2_mean_q7( - q7_t * pSrc, - uint32_t blockSize, - q7_t * pResult); - - void csky_vdsp2_mean_q15( - q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult); - - void csky_vdsp2_mean_q31( - q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult); - - void csky_vdsp2_mean_f32( - float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult); - - void csky_vdsp2_var_f32( - float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult); - - void csky_vdsp2_var_q31( - q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult); - - void csky_vdsp2_var_q15( - q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult); - - void csky_vdsp2_rms_f32( - float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult); - - void csky_vdsp2_rms_q31( - q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult); - - void csky_vdsp2_rms_q15( - q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult); - - void csky_vdsp2_std_f32( - float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult); - - void csky_vdsp2_std_q31( - q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult); - - void csky_vdsp2_std_q15( - q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult); - - void csky_vdsp2_cmplx_mag_f32( - float32_t * pSrc, - float32_t * pDst, - uint32_t numSamples); - - void csky_vdsp2_cmplx_mag_q31( - q31_t * pSrc, - q31_t * pDst, - uint32_t numSamples); - - void csky_vdsp2_cmplx_mag_q15( - q15_t * pSrc, - q15_t * pDst, - uint32_t numSamples); - - void csky_vdsp2_cmplx_dot_prod_q15( - q15_t * pSrcA, - q15_t * pSrcB, - uint32_t numSamples, - q31_t * realResult, - q31_t * imagResult); - - void csky_vdsp2_cmplx_dot_prod_q31( - q31_t * pSrcA, - q31_t * pSrcB, - uint32_t numSamples, - q63_t * realResult, - q63_t * imagResult); - - void csky_vdsp2_cmplx_dot_prod_f32( - float32_t * pSrcA, - float32_t * pSrcB, - uint32_t numSamples, - float32_t * realResult, - float32_t * imagResult); - - void csky_vdsp2_cmplx_mult_real_q15( - q15_t * pSrcCmplx, - q15_t * pSrcReal, - q15_t * pCmplxDst, - uint32_t numSamples); - - void csky_vdsp2_cmplx_mult_real_q31( - q31_t * pSrcCmplx, - q31_t * pSrcReal, - q31_t * pCmplxDst, - uint32_t numSamples); - - void csky_vdsp2_cmplx_mult_real_f32( - float32_t * pSrcCmplx, - float32_t * pSrcReal, - float32_t * pCmplxDst, - uint32_t numSamples); - - void csky_vdsp2_min_q7( - q7_t * pSrc, - uint32_t blockSize, - q7_t * result, - uint32_t * index); - - void csky_vdsp2_min_q15( - q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult, - uint32_t * pIndex); - - void csky_vdsp2_min_q31( - q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult, - uint32_t * pIndex); - - void csky_vdsp2_min_f32( - float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult, - uint32_t * pIndex); - - void csky_vdsp2_max_q7( - q7_t * pSrc, - uint32_t blockSize, - q7_t * pResult, - uint32_t * pIndex); - - void csky_vdsp2_max_q15( - q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult, - uint32_t * pIndex); - - void csky_vdsp2_max_q31( - q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult, - uint32_t * pIndex); - - void csky_vdsp2_max_f32( - float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult, - uint32_t * pIndex); - - void csky_vdsp2_cmplx_mult_cmplx_q15( - q15_t * pSrcA, - q15_t * pSrcB, - q15_t * pDst, - uint32_t numSamples); - - void csky_vdsp2_cmplx_mult_cmplx_q31( - q31_t * pSrcA, - q31_t * pSrcB, - q31_t * pDst, - uint32_t numSamples); - - void csky_vdsp2_cmplx_mult_cmplx_f32( - float32_t * pSrcA, - float32_t * pSrcB, - float32_t * pDst, - uint32_t numSamples); - - void csky_vdsp2_cmplx_mult_cmplx_re_q15( - q15_t * pSrcA, - q15_t * pSrcB, - q15_t * pDst, - uint32_t numSamples); - - void csky_vdsp2_cmplx_mult_cmplx_re_q31( - q31_t * pSrcA, - q31_t * pSrcB, - q31_t * pDst, - uint32_t numSamples); - - void csky_vdsp2_cmplx_mult_cmplx_re_f32( - float32_t * pSrcA, - float32_t * pSrcB, - float32_t * pDst, - uint32_t numSamples); - - - void csky_vdsp2_float_to_q31( - float32_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_float_to_q15( - float32_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_float_to_q7( - float32_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_q31_to_q15( - q31_t * pSrc, - q15_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_q31_to_q7( - q31_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_q31_to_q7_rs( - q31_t * pSrc, - q7_t * pDst, - uint32_t shiftValue, - uint32_t blockSize); - - void csky_vdsp2_q63_to_q31_rs( - q63_t * pSrc, - q31_t * pDst, - uint32_t shiftValue, - uint32_t blockSize); - - void csky_vdsp2_q15_to_float( - q15_t * pSrc, - float32_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_q15_to_q31( - q15_t * pSrc, - q31_t * pDst, - uint32_t blockSize); - - void csky_vdsp2_q15_to_q7( - q15_t * pSrc, - q7_t * pDst, - uint32_t blockSize); - -#ifdef __cplusplus -} -#endif -#endif /* _CSKY_VDSP2_MATH_H */ - -/** - * - * End of file. - */ diff --git a/include/include_xt800/nn_include/csi_i805_nnfunction.h b/include/include_xt800/nn_include/csi_i805_nnfunction.h deleted file mode 100644 index 7b7dc0ee..00000000 --- a/include/include_xt800/nn_include/csi_i805_nnfunction.h +++ /dev/null @@ -1,478 +0,0 @@ -/* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Title: csi_nnfunctions.h - * Description: Public header file for CSI NN Library - * - * -------------------------------------------------------------------- */ - -#ifndef _CSI_I805_NNFUNCTIONS_H -#define _CSI_I805_NNFUNCTIONS_H - -#ifdef __cplusplus -extern "C" -{ -#endif - - - /** - * @brief u8 asym quant generic convolution optimized function - * @param[in] input_data pointer to input tensor data - * @param[in] kernel_data pointer to kernel tensor data - * @param[in] bias_data pointer to bias tensor data - * @param[in,out] output_data pointer to output tensor data - * @param[in,out] bufferA pointer to buffer for input/im2col data - * @param[in] input_h input height - * @param[in] input_w input width - * @param[in] input_ch input channel / output_channel - * @param[in] kernel_h kernel height - * @param[in] kernel_w kernel width - * @param[in] pad_h pad on height - * @param[in] pad_w pad on width - * @param[in] stride_h stride on height - * @param[in] stride_w stride on width - * @param[in] out_h output height - * @param[in] out_w output width - * @param[in] input_zero_point input zero_point - * @param[in] kernel_zero_point weight zero_point - * @param[in] output_zero_point output zero_point - * @param[in] dst_mult multiplier for s1 * s2 / s3 - * @param[in] dst_shift output shift for s1 * s2 / s3, shift_right - * @return none. - * bufferA size: 2*input_ch*kernel_h*kernel_w - */ -void csi_i805_conv2d_opt_u8(uint8_t * input_data, - uint8_t * kernel_data, - int32_t * bias_data, - uint8_t * output_data, - uint8_t * bufferA, - int32_t input_h, - int32_t input_w, - int32_t input_ch, - int32_t kernel_h, - int32_t kernel_w, - int32_t pad_h, - int32_t pad_w, - int32_t stride_h, - int32_t stride_w, - int32_t out_h, - int32_t out_w, - int32_t out_c, - int32_t input_zero_point, - int32_t weight_zero_point, - int32_t output_zero_point, - int32_t out_mult, - int32_t out_shift); - - - - /** - * @brief u8 asym quant 1x1 kernel_size convolution (pointwise convolution) optimized function - * @param[in] input_data pointer to input tensor data - * @param[in] kernel_data pointer to kernel tensor data - * @param[in] bias_data pointer to bias tensor data - * @param[in,out] output_data pointer to output tensor data - * @param[in] input_hxw input height mul width - * @param[in] input_ch input channel - * @param[in] output_ch output_channel - * @param[in] input_zero_point input zero_point - * @param[in] kernel_zero_point weight zero_point - * @param[in] output_zero_point output zero_point - * @param[in] dst_mult multiplier for s1 * s2 / s3 - * @param[in] dst_shift output shift for s1 * s2 / s3, shift_right - * @return none. - * - */ -void csi_i805_pwconv2d_opt_u8(uint8_t * input_data, - uint8_t * kernel_data, - int32_t * bias_data, - uint8_t * output_data, - int32_t input_hxw, - int32_t input_ch, - int32_t output_ch, - int32_t input_zero_point, - int32_t weight_zero_point, - int32_t output_zero_point, - int32_t out_mult, - int32_t out_shift); - - - /** - * @brief u8 asym quant depthwise convolution optimized function - * @param[in] input_data pointer to input tensor data - * @param[in] kernel_data pointer to kernel tensor data - * @param[in] bias_data pointer to bias tensor data - * @param[in,out] output_data pointer to output tensor data - * @param[in,out] bufferA pointer to buffer for input/im2col data - * @param[in] input_h input height - * @param[in] input_w input width - * @param[in] input_ch input channel / output_channel - * @param[in] kernel_h kernel height - * @param[in] kernel_w kernel width - * @param[in] pad_h pad on height - * @param[in] pad_w pad on width - * @param[in] stride_h stride on height - * @param[in] stride_w stride on width - * @param[in] out_h output height - * @param[in] out_w output width - * @param[in] input_zero_point input zero_point - * @param[in] kernel_zero_point weight zero_point - * @param[in] output_zero_point output zero_point - * @param[in] dst_mult multiplier for s1 * s2 / s3 - * @param[in] dst_shift output shift for s1 * s2 / s3, shift_right - * @return none. - * bufferA size: 4*input_ch*kernel_h*kernel_w - */ -void csi_i805_dwconv2d_opt_u8(uint8_t * input_data, - uint8_t * kernel_data, - int32_t * bias_data, - uint8_t * output_data, - uint8_t * bufferA, - int32_t input_h, - int32_t input_w, - int32_t input_ch, - int32_t kernel_h, - int32_t kernel_w, - int32_t pad_h, - int32_t pad_w, - int32_t stride_h, - int32_t stride_w, - int32_t out_h, - int32_t out_w, - int32_t input_zero_point, - int32_t weight_zero_point, - int32_t output_zero_point, - int32_t out_mult, - int32_t out_shift); - - - - /** - * @brief u8 asym quant depthwise convolution 3x3 kernel_size and 1 stride optimized function - * @param[in] input pointer to input tensor data - * @param[in] kernel pointer to kernel tensor data - * @param[in] bias pointer to bias tensor data - * @param[in,out] output pointer to output tensor data - * @param[in] input_zero_point input zero_point - * @param[in] kernel_zero_point weight zero_point - * @param[in] output_zero_point output zero_point - * @param[in] dst_mult multiplier for s1 * s2 / s3 - * @param[in] dst_shift output shift for s1 * s2 / s3, shift_right - * @return none. - * - */ -void csi_i805_dwconv2d_3x3_opt_u8(uint8_t * input, - uint8_t * kernel, - int32_t * bias, - uint8_t * output, - int32_t input_zero_point, - int32_t kernel_zero_point, - int32_t output_zero_point, - int32_t dst_mult, - int32_t dst_shift); - - - - /** - * @brief u8 asym quant fullyconnected optimized function - * @param[in] input_data pointer to input tensor data - * @param[in] weight_data pointer to weight tensor data - * @param[in] bias_data pointer to bias tensor data - * @param[in,out] output_data pointer to output tensor data - * @param[in] in_nodes input nodes (weight cols) - * @param[in] out_nodes output nodes (weight rows) - * @param[in] input_zero_point input zero_point - * @param[in] weight_zero_point weight zero_point - * @param[in] output_zero_point output zero_point - * @param[in] output_mult multiplier for s1 * s2 / s3 - * @param[in] output_shift output shift for s1 * s2 / s3. shift_right - * @return none. - * - */ -void csi_i805_fullyconnected_opt_u8(uint8_t * input_data, - uint8_t * weight_data, - int32_t * bias_data, - uint8_t * output_data, - int32_t in_nodes, - int32_t out_nodes, - int32_t input_zero_point, - int32_t weight_zero_point, - int32_t output_zero_point, - int32_t output_mult, - int32_t output_shift); - - - /** - * @brief u8 asym quant generic maxpool optimized function - * @param[in] input_data pointer to input tensor data - * @param[in,out] output_data pointer to output tensor data - * @param[in] input_h input height - * @param[in] input_w input width - * @param[in] input_ch input channel / output_channel - * @param[in] kernel_h kernel height - * @param[in] kernel_w kernel width - * @param[in] pad_h pad on height - * @param[in] pad_w pad on width - * @param[in] stride_h stride on height - * @param[in] stride_w stride on width - * @param[in] out_h output height - * @param[in] out_w output width - * @return none. - * bufferA size: 2*input_ch*kernel_h*kernel_w - */ -void csi_i805_maxpool2d_opt_u8(uint8_t *input_data, - uint8_t *output_data, - int32_t input_h, - int32_t input_w, - int32_t input_ch, - int32_t kernel_h, - int32_t kernel_w, - int32_t pad_h, - int32_t pad_w, - int32_t stride_h, - int32_t stride_w, - int32_t output_h, - int32_t output_w); - - - - - /** - * @brief u8 asym quant relu optimized function - * @param[in,out] data pointer to input/output tensor data, compute inplace - * @param[in] size input tensor size, tensor length - * @param[in] input_zeropoint input zero_point - * @param[in] out_multiplier multiplier for sacle_in / scale_out - * @param[in] out_shift shift left > 0 - * @return none. - * can be fused with conv/fc - */ -void csi_i805_relu_opt_u8(uint8_t *data, - int32_t size, - int32_t input_zeropoint, - int32_t out_multiplier, - int32_t out_shift); - - - - /** - * @brief u8 asym quant relu6 optimized function - * @param[in,out] data pointer to input/output tensor data, compute inplace - * @param[in] size input tensor size, tensor length - * @param[in] input_zeropoint input zero_point - * @param[in] out_multiplier multiplier for sacle_in / scale_out - * @param[in] out_shift shift left > 0 - * @return none. - * can be fused with conv/fc - */ -void csi_i805_relu6_opt_u8(uint8_t *data, - int32_t size, - int32_t input_zeropoint, - int32_t out_multiplier, - int32_t out_shift); - - - - /** - * @brief u8 asym quant clip optimized function - * @param[in] input_data pointer to input tensor data - * @param[in,out] output_data pointer to output tensor data - * @param[in] size input tensor size, tensor length - * @param[in] clip_qmin clip min value(quant) - * @param[in] clip_qmax clip max value(quant) - * @param[in] input_zeropoint input zero_point - * @param[in] output_zeropoint output zero_point - * @param[in] out_multiplier multiplier for sacle_in / scale_out - * @param[in] out_shift shift left > 0 - * @return none. - * can be fused with conv/fc - */ -void csi_i805_clip_opt_u8(uint8_t *input_data, - uint8_t *output_data, - int32_t size, - int32_t clip_min, - int32_t clip_max, - int32_t input_zeropoint, - int32_t output_zeropoint, - int32_t out_multiplier, - int32_t out_shift); - - - - /** - * @brief u8 asym quant element add optimized function - * @param[in] input_0 pointer to input_0 tensor data - * @param[in] input_1 pointer to input_1 tensor data - * @param[in,out] output pointer to output tensor data - * @param[in] size input tensor size, tensor length, element size - * @param[in] input_0_zeroponit input_0 zero_point. Range: Range: -255 to 0 - * @param[in] input_0_mult multiplier for sacle_input_0 - * @param[in] input_0_shift input_0 shift - * @param[in] input_1_zeropoint input_1 zero_point. Range: Range: -255 to 0 - * @param[in] input_1_mult multiplier for sacle_input_1 - * @param[in] input_1_shift input_1 shift - * @param[in] output_zeropoint output zero_point - * @param[in] output_mult multiplier for scale_output - * @param[in] output_shift output shift - * @return none. - * - */ -void csi_i805_elementwise_add_opt_u8(uint8_t *input_0, - uint8_t *input_1, - uint8_t *output, - int32_t size, - int32_t input_0_zeroponit, - int32_t input_0_mult, - int32_t input_0_shift, - int32_t input_1_zeropoint, - int32_t input_1_mult, - int32_t input_1_shift, - int32_t output_zeropoint, - int32_t output_mult, - int32_t output_shift); - - - - /** - * @brief u8 asym quant element mul optimized function - * @param[in] input_0 pointer to input_0 tensor data - * @param[in] input_1 pointer to input_1 tensor data - * @param[in,out] output pointer to output tensor data - * @param[in] size input tensor size, tensor length, element size - * @param[in] input_0_zeroponit input_0 zero_point - * @param[in] input_1_zeropoint input_1 zero_point - * @param[in] output_zeropoint output zero_point - * @param[in] output_mult multiplier for s1 * s2 / s3 - * @param[in] output_shift output shift for s1 * s2 / s3 - * @return none. - * - */ -void csi_i805_elementwise_mul_opt_u8(uint8_t *input_0, - uint8_t *input_1, - uint8_t *output, - int32_t size, - int32_t input_0_zeroponit, - int32_t input_1_zeropoint, - int32_t output_zeropoint, - int32_t output_mult, - int32_t output_shift); - - - - /** - * @brief u8 asym quant softmax optimized function - * @param[in] input_data pointer to input tensor data - * @param[in,out] output_data pointer to output tensor data - * @param[in] size tensor size - * @param[in] out_mult multiplier - * @param[in] out_shift output shift - * @return none. - * - */ -void csi_i805_softmax_opt_u8(uint8_t * input_data, - uint8_t * output_data, - int32_t size, - int32_t out_mult, - int32_t out_shift); - - - - /** - * @brief u8 asym quant reshape optimized function - * @param[in] input_data pointer to input tensor data - * @param[in,out] output_data pointer to output tensor data - * @param[in] size tensor size - * @return none. - * - */ -void csi_i805_reshape_opt_u8(uint8_t * input_data, - uint8_t * output_data, - int32_t size); - - - - - /** - * @brief u8 asym quant vec and matrix mul optimized function - * @param[in] lhs pointer to input tensor data - * @param[in] rhs pointer to weight tensor data - * @param[in] bias pointer to bias tensor data - * @param[in,out] dst pointer to output tensor data - * @param[in] rhs_col input nodes (weight cols) - * @param[in] rhs_row output nodes (weight rows) - * @param[in] lhs_zero_point input zero_point - * @param[in] rhs_zero_point weight zero_point - * @param[in] dst_zero_point output zero_point - * @param[in] dst_mult multiplier for s1 * s2 / s3 - * @param[in] dst_shift output shift for s1 * s2 / s3 - * @return none. - * - */ -void csi_i805_vec_mat_mult_opt_u8(uint8_t * lhs, - uint8_t * rhs, - int32_t * bias, - uint8_t * dst, - int32_t rhs_col, - int32_t rhs_row, - int32_t lhs_zero_point, - int32_t rhs_zero_point, - int32_t dst_zero_point, - int32_t dst_mult, - int32_t dst_shift); - - - - /** - * @brief u8 asym quant matrix mul(A * B_trans) optimized function - * @param[in] lhs pointer to input tensor data - * @param[in] rhs pointer to weight tensor data - * @param[in] bias pointer to bias tensor data - * @param[in,out] dst pointer to output tensor data - * @param[in] lhs_row input row / m - * @param[in] lhs_col input col / k - * @param[in] rhs_row weight row / n - * @param[in] lhs_zero_point input zero_point - * @param[in] rhs_zero_point weight zero_point - * @param[in] dst_zero_point output zero_point - * @param[in] dst_mult multiplier for s1 * s2 / s3 - * @param[in] dst_shift output shift for s1 * s2 / s3 - * @return none. - * - */ -void csi_i805_mat_mult_nt_t_opt_u8(uint8_t * lhs, - uint8_t * rhs, - int32_t * bias, - uint8_t * dst, - int32_t lhs_row, - int32_t lhs_col, - int32_t rhs_row, - int32_t lhs_zero_point, - int32_t rhs_zero_point, - int32_t dst_zero_point, - int32_t dst_mult, - int32_t dst_shift); - - - - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/include/include_xt800/nn_include/csi_nnfunctions.h b/include/include_xt800/nn_include/csi_nnfunctions.h deleted file mode 100644 index 020ffbf7..00000000 --- a/include/include_xt800/nn_include/csi_nnfunctions.h +++ /dev/null @@ -1,569 +0,0 @@ -/* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Title: csi_nnfunctions.h - * Description: Public header file for CSI NN Library - * - * -------------------------------------------------------------------- */ - -#ifndef _CSI_NNFUNCTIONS_H -#define _CSI_NNFUNCTIONS_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -#ifdef CSI_MATH_DSP -#include "csi_instance.h" -#include "csi_nnsupportfunctions.h" -#endif - -/** - * @brief Struct for specifying activation function types - * - */ -typedef enum -{ - CSKY_SIGMOID = 0, /**< Sigmoid activation function */ - CSKY_TANH = 1, /**< Tanh activation function */ -} csi_nn_activation_type; - - /** - * @brief Basic Q7 convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @return none. - * - */ - -void csi_convolve_HWC_q7_basic(const q7_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA); - - /** - * @brief Basic Q15 convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @return none. - * - */ - -void csi_convolve_HWC_q15_basic(const q15_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q15_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q15_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q15_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA); - - - -void csi_convolve_HWC_q15_fast(const q15_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q15_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q15_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q15_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA); - - - /** - * @brief Fast Q7 convolution function (non-sqaure shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimention x - * @param[in] dim_im_in_y input tensor dimention y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding size x - * @param[in] padding_y padding size y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @return none. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 4 - * ch_im_out is multiple of 2 - */ - -void csi_convolve_HWC_q7_fast_nonsquare(const q7_t * Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t * bufferA); - - /** - * @brief Fast Q7 version of 1x1 convolution (non-sqaure shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimention x - * @param[in] dim_im_in_y input tensor dimention y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding size x - * @param[in] padding_y padding size y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @return none. - * - * This function implement convolution with 1x1 kernel size (i.e., dim_kernel_x=1 - * and dim_kernel_y=1). It can be used for - * second half of MobileNets after depthwise separable convolution. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 4 - * ch_im_out is multiple of 2 - */ -void csi_convolve_1x1_HWC_q7_fast(const q7_t * Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t * bufferA); - - /** - * @brief Q7 version of convolution for RGB image - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @return none. - * - * This kernel is written exclusively for convolution with ch_im_in - * equals 3. This applies on the first layer of CNNs which has input - * image with RGB format. - */ - -void csi_convolve_HWC_q7_RGB(const q7_t * Im_in, - const uint16_t dim_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA); - - - /** - * @brief Q7 depthwise separable convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @return none. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 2 - * ch_im_out is multiple of 2 - */ - -void csi_depthwise_separable_conv_HWC_q7(const q7_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA); - - /** - * @brief Q7 depthwise separable convolution function (non-square shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimention x - * @param[in] dim_im_in_y input tensor dimention y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding sizes x - * @param[in] padding_y padding sizes y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @return none. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 2 - * ch_im_out is multiple of 2 - */ -void csi_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t * Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t * bufferA); - - - /** - * @brief Q7 basic fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @return none. - */ - -void csi_fully_connected_q7(const q7_t * pV, - const q7_t * pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, - q7_t * pOut); - - - /** - * @brief Q15 basic fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @return none. - * - */ - -void csi_fully_connected_q15(const q15_t * pV, - const q15_t * pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q15_t * bias, - q15_t * pOut); - - - /** - * @brief Mixed Q15-Q7 fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @return none. - * - */ - -void csi_fully_connected_mat_q7_vec_q15(const q15_t * pV, - const q7_t * pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, - q15_t * pOut); - - - - /** - * @brief Q7 RELU function - * @param[in,out] data pointer to input - * @param[in] size number of elements - * @return none. - */ - -void csi_relu_q7(q7_t * data, uint16_t size); - - /** - * @brief Q15 RELU function - * @param[in,out] data pointer to input - * @param[in] size number of elements - * @return none. - */ - -void csi_relu_q15(q15_t * data, uint16_t size); - - /** - * @brief Q7 neural network activation function using direct table look-up - * @param[in,out] data pointer to input - * @param[in] size number of elements - * @param[in] int_width bit-width of the integer part, assume to be smaller than 3 - * @param[in] type type of activation functions - * @return none. - */ - -void csi_nn_activations_direct_q7(q7_t * data, uint16_t size, - uint16_t int_width, - csi_nn_activation_type type); - - /** - * @brief Q15 neural network activation function using direct table look-up - * @param[in,out] data pointer to input - * @param[in] size number of elements - * @param[in] int_width bit-width of the integer part, assume to be smaller than 3 - * @param[in] type type of activation functions - * @return none. - */ - -void csi_nn_activations_direct_q15(q15_t * data, uint16_t size, - uint16_t int_width, - csi_nn_activation_type type); - - /** - * @brief Q7 max pooling function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] Im_out pointer to output tensor - * @return none. - * - */ - -void csi_maxpool2d_q7_HWC(q7_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const uint16_t dim_im_out, - q7_t * bufferA, - q7_t * Im_out); - - /** - * @brief Q7 average pooling function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] Im_out pointer to output tensor - * @return none. - * - */ - -void csi_avepool_q7_HWC(q7_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const uint16_t dim_im_out, - q7_t * bufferA, - q7_t * Im_out); - - -void csi_avepool_q7_HWC_nonsquare( - q7_t *Im_in, // input image - const uint16_t dim_im_in_x, // input image dimension - const uint16_t dim_im_in_y, // input image dimension - const uint16_t ch_im_in, // number of input image channels - const uint16_t dim_kernel_x, // window kernel size - const uint16_t dim_kernel_y, // window kernel size - const uint16_t padding_x, // padding sizes - const uint16_t padding_y, // padding sizes - const uint16_t stride_x, // stride - const uint16_t stride_y, // stride - const uint16_t dim_im_out_x, // output image dimension - const uint16_t dim_im_out_y, // output image dimension - q7_t *bufferA, // a buffer for local storage - q7_t *Im_out, // output feature - const uint16_t out_lshift); // output left shift (scaling) - - - /** - * @brief Q7 softmax function - * @param[in] vec_in pointer to input vector - * @param[in] dim_vec input vector dimention - * @param[out] p_out pointer to output vector - * @return none. - * - */ - -void csi_softmax_q7(const q7_t *vec_in, const uint16_t dim_vec, q7_t *p_out); - - /** - * @brief Q15 softmax function - * @param[in] vec_in pointer to input vector - * @param[in] dim_vec input vector dimention - * @param[out] p_out pointer to output vector - * @return none. - * - */ - -void csi_softmax_q15(const q15_t *vec_in, const uint16_t dim_vec, - q15_t *p_out); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/include/include_xt800/nn_include/csi_nnsupportfunctions.h b/include/include_xt800/nn_include/csi_nnsupportfunctions.h deleted file mode 100644 index ec02cec9..00000000 --- a/include/include_xt800/nn_include/csi_nnsupportfunctions.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Title: csi_nnsupportfunctions.h - * Description: Public header file of support functions for CSI NN Library - * - * -------------------------------------------------------------------- */ - -#ifndef _CSKY_DSP2_NNSUPPORTFUNCTIONS_H_ -#define _CSKY_DSP2_NNSUPPORTFUNCTIONS_H_ - -#include "csi_instance.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/** - * @brief Union for SIMD access of Q31/Q15/Q7 types - */ -union csi_nnword -{ - q31_t word; /**< Q31 type */ - q15_t half_words[2]; /**< Q15 type */ - q7_t bytes[4]; /**< Q7 type */ -}; - -/** - * @defgroup nndata_convert Neural Network Data Conversion Functions - * - * Perform data type conversion in-between neural network operations - * - */ - -/** - * @brief Converts the elements of the Q7 vector to Q15 vector without left-shift - * @param[in] *pSrc points to the Q7 input vector - * @param[out] *pDst points to the Q15 output vector - * @param[in] blockSize length of the input vector - * @return none. - * - */ - -void csi_q7_to_q15_no_shift(const q7_t * pSrc, q15_t * pDst, - uint32_t blockSize); - -/** - * @brief Converts the elements of the Q7 vector to reordered Q15 vector without left-shift - * @param[in] *pSrc points to the Q7 input vector - * @param[out] *pDst points to the Q15 output vector - * @param[in] blockSize length of the input vector - * @return none. - * - */ - -void csi_q7_to_q15_reordered_no_shift(const q7_t * pSrc, q15_t * pDst, - uint32_t blockSize); - -#if defined (CSI_MATH_DSP) - -/** - * @brief read and expand one Q7 word into two Q15 words - */ - -__ALWAYS_STATIC_INLINE void *read_and_pad(void *source, q31_t *out1, - q31_t *out2) -{ - q31_t inA = *__SIMD32(source)++; - q31_t inAbuf1 = __SXTB16(__ROR(inA, 8)); - q31_t inAbuf2 = __SXTB16(inA); - -#ifndef CSKY_MATH_BIG_ENDIAN - *out2 = __PKHTB(inAbuf1, inAbuf2, 16); - *out1 = __PKHBT(inAbuf2, inAbuf1, 16); -#else - *out1 = __PKHTB(inAbuf1, inAbuf2, 16); - *out2 = __PKHBT(inAbuf2, inAbuf1, 16); -#endif - - return source; -} - -/** - * @brief read and expand one Q7 word into two Q15 words with reordering - */ - -__ALWAYS_STATIC_INLINE void *read_and_pad_reordered(void *source, q31_t * out1, - q31_t * out2) -{ - q31_t inA = *__SIMD32(source)++; -#ifndef CSKY_MATH_BIG_ENDIAN - *out2 = __SXTB16(__ROR(inA, 8)); - *out1 = __SXTB16(inA); -#else - *out1 = __SXTB16(__ROR(inA, 8)); - *out2 = __SXTB16(inA); -#endif - - return source; -} -#endif - -q7_t *csi_nn_mat_mult_kernel_q7_q15_reordered(const q7_t * pA, - const q15_t * pInBuffer, - const uint16_t ch_im_out, - const uint16_t numCol_A, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, - q7_t * pOut); - -q7_t *csi_nn_mat_mult_kernel_q7_q15(const q7_t * pA, - const q15_t * pInBuffer, - const uint16_t ch_im_out, - const uint16_t numCol_A, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, - q7_t * pOut); - -/** - * @brief A few utility functions used by pooling functions - * - */ - -void buffer_scale_back_q15_to_q7(q15_t * buffer, q7_t * target, - uint16_t length, uint16_t scale); - -void accumulate_q7_to_q15(q15_t * base, q7_t * target, - const uint16_t length); - -/** - * @brief defition to adding rouding offset - */ -#ifndef CSKY_NN_TRUNCATE - #define NN_ROUND(out_shift) ( 0x1 << (out_shift - 1) ) -#else - #define NN_ROUND(out_shift) 0 -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/include/include_xt800/nn_include/csky_dsp2_nnsupportfunctions.h b/include/include_xt800/nn_include/csky_dsp2_nnsupportfunctions.h deleted file mode 100644 index addf4279..00000000 --- a/include/include_xt800/nn_include/csky_dsp2_nnsupportfunctions.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Title: csky_dsp2_nnsupportfunctions.h - * Description: Public header file of support functions for CSI NN Library - * - * -------------------------------------------------------------------- */ - -#ifndef _CSKY_DSP2_NNSUPPORTFUNCTIONS_H_ -#define _CSKY_DSP2_NNSUPPORTFUNCTIONS_H_ - -#include "csky_math.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/** - * @brief Union for SIMD access of Q31/Q15/Q7 types - */ -union csky_dsp2_nnword -{ - q31_t word; /**< Q31 type */ - q15_t half_words[2]; /**< Q15 type */ - q7_t bytes[4]; /**< Q7 type */ -}; - -/** - * @defgroup nndata_convert Neural Network Data Conversion Functions - * - * Perform data type conversion in-between neural network operations - * - */ - -/** - * @brief Converts the elements of the Q7 vector to Q15 vector without left-shift - * @param[in] *pSrc points to the Q7 input vector - * @param[out] *pDst points to the Q15 output vector - * @param[in] blockSize length of the input vector - * @return none. - * - */ - -void csky_dsp2_q7_to_q15_no_shift(const q7_t * pSrc, q15_t * pDst, - uint32_t blockSize); - -/** - * @brief Converts the elements of the Q7 vector to reordered Q15 vector without left-shift - * @param[in] *pSrc points to the Q7 input vector - * @param[out] *pDst points to the Q15 output vector - * @param[in] blockSize length of the input vector - * @return none. - * - */ - -void csky_dsp2_q7_to_q15_reordered_no_shift(const q7_t * pSrc, q15_t * pDst, - uint32_t blockSize); - -#if defined (CSKY_MATH_DSP) - -/** - * @brief read and expand one Q7 word into two Q15 words - */ - -__ALWAYS_INLINE void *read_and_pad(void *source, q31_t *out1, q31_t *out2) -{ - q31_t inA = *__SIMD32(source)++; - q31_t inAbuf1 = __SXTB16(__ROR(inA, 8)); - q31_t inAbuf2 = __SXTB16(inA); - -#ifndef CSKY_MATH_BIG_ENDIAN - *out2 = __PKHTB(inAbuf1, inAbuf2, 16); - *out1 = __PKHBT(inAbuf2, inAbuf1, 16); -#else - *out1 = __PKHTB(inAbuf1, inAbuf2, 16); - *out2 = __PKHBT(inAbuf2, inAbuf1, 16); -#endif - - return source; -} - -/** - * @brief read and expand one Q7 word into two Q15 words with reordering - */ - -__ALWAYS_INLINE void *read_and_pad_reordered(void *source, q31_t * out1, - q31_t * out2) -{ - q31_t inA = *__SIMD32(source)++; -#ifndef CSKY_MATH_BIG_ENDIAN - *out2 = __SXTB16(__ROR(inA, 8)); - *out1 = __SXTB16(inA); -#else - *out1 = __SXTB16(__ROR(inA, 8)); - *out2 = __SXTB16(inA); -#endif - - return source; -} -#endif - -/** - * @brief defition to adding rouding offset - */ -#ifndef CSKY_NN_TRUNCATE - #define NN_ROUND(out_shift) ( 0x1 << (out_shift - 1) ) -#else - #define NN_ROUND(out_shift) 0 -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/include/include_xt800/nn_include/csky_vdsp2_nnfunctions.h b/include/include_xt800/nn_include/csky_vdsp2_nnfunctions.h deleted file mode 100644 index 7d89d4d2..00000000 --- a/include/include_xt800/nn_include/csky_vdsp2_nnfunctions.h +++ /dev/null @@ -1,552 +0,0 @@ -/* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Title: csky_vdsp2_nnfunctions.h - * Description: Public header file for CSI NN Library - * - * -------------------------------------------------------------------- */ - -#ifndef _CSKY_VDSP2_NNFUNCTIONS_H -#define _CSKY_VDSP2_NNFUNCTIONS_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -#ifdef CSKY_VDSP2_MATH_DSP -#include "csky_vdsp2_math.h" -#include "csky_vdsp2_nnsupportfunctions.h" -#endif - -/** - * @brief Struct for specifying activation function types - * - */ -typedef enum -{ - CSKY_SIGMOID = 0, /**< Sigmoid activation function */ - CSKY_TANH = 1, /**< Tanh activation function */ -} csky_vdsp2_nn_activation_type; - - /** - * @brief Basic Q7 convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @return none. - * - */ - -void csky_vdsp2_convolve_HWC_q7_basic(const q7_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA); - - /** - * @brief Basic Q15 convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @return none. - * - */ - -void csky_vdsp2_convolve_HWC_q15_basic(const q15_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q15_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q15_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q15_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA); - - - /** - * @brief Fast Q7 convolution function (non-sqaure shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimention x - * @param[in] dim_im_in_y input tensor dimention y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding size x - * @param[in] padding_y padding size y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @return none. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 4 - * ch_im_out is multiple of 2 - */ - -void csky_vdsp2_convolve_HWC_q7_fast_nonsquare(const q7_t * Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t * bufferA); - - /** - * @brief Fast Q7 version of 1x1 convolution (non-sqaure shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimention x - * @param[in] dim_im_in_y input tensor dimention y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding size x - * @param[in] padding_y padding size y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @return none. - * - * This function implement convolution with 1x1 kernel size (i.e., dim_kernel_x=1 - * and dim_kernel_y=1). It can be used for - * second half of MobileNets after depthwise separable convolution. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 4 - * ch_im_out is multiple of 2 - */ -void csky_vdsp2_convolve_1x1_HWC_q7_fast(const q7_t * Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t * bufferA); - - /** - * @brief Q7 version of convolution for RGB image - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @return none. - * - * This kernel is written exclusively for convolution with ch_im_in - * equals 3. This applies on the first layer of CNNs which has input - * image with RGB format. - */ - -void csky_vdsp2_convolve_HWC_q7_RGB(const q7_t * Im_in, - const uint16_t dim_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA); - - - /** - * @brief Q7 depthwise separable convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @return none. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 2 - * ch_im_out is multiple of 2 - */ - -void csky_vdsp2_depthwise_separable_conv_HWC_q7(const q7_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA); - - /** - * @brief Q7 depthwise separable convolution function (non-square shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimention x - * @param[in] dim_im_in_y input tensor dimention y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding sizes x - * @param[in] padding_y padding sizes y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @return none. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 2 - * ch_im_out is multiple of 2 - */ -void csky_vdsp2_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t * Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t * wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q7_t * bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t * bufferA); - - - /** - * @brief Q7 basic fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @return none. - */ - -void csky_vdsp2_fully_connected_q7(const q7_t * pV, - const q7_t * pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, - q7_t * pOut); - - - /** - * @brief Q15 basic fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @return none. - * - */ - -void csky_vdsp2_fully_connected_q15(const q15_t * pV, - const q15_t * pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q15_t * bias, - q15_t * pOut); - - - /** - * @brief Mixed Q15-Q7 fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @return none. - * - */ - -void csky_vdsp2_fully_connected_mat_q7_vec_q15(const q15_t * pV, - const q7_t * pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, - q15_t * pOut); - - - - /** - * @brief Q7 RELU function - * @param[in,out] data pointer to input - * @param[in] size number of elements - * @return none. - */ - -void csky_vdsp2_relu_q7(q7_t * data, uint16_t size); - - /** - * @brief Q15 RELU function - * @param[in,out] data pointer to input - * @param[in] size number of elements - * @return none. - */ - -void csky_vdsp2_relu_q15(q15_t * data, uint16_t size); - - /** - * @brief Q7 neural network activation function using direct table look-up - * @param[in,out] data pointer to input - * @param[in] size number of elements - * @param[in] int_width bit-width of the integer part, assume to be smaller than 3 - * @param[in] type type of activation functions - * @return none. - */ - -void csky_vdsp2_nn_activations_direct_q7(q7_t * data, uint16_t size, - uint16_t int_width, - csky_vdsp2_nn_activation_type type); - - /** - * @brief Q15 neural network activation function using direct table look-up - * @param[in,out] data pointer to input - * @param[in] size number of elements - * @param[in] int_width bit-width of the integer part, assume to be smaller than 3 - * @param[in] type type of activation functions - * @return none. - */ - -void csky_vdsp2_nn_activations_direct_q15(q15_t * data, uint16_t size, - uint16_t int_width, - csky_vdsp2_nn_activation_type type); - - /** - * @brief Q7 max pooling function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] Im_out pointer to output tensor - * @return none. - * - */ - -void csky_vdsp2_maxpool2d_q7_HWC(q7_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const uint16_t dim_im_out, - q7_t * bufferA, - q7_t * Im_out); - - /** - * @brief Q7 average pooling function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] Im_out pointer to output tensor - * @return none. - * - */ - -void csky_vdsp2_avepool_q7_HWC(q7_t * Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const uint16_t dim_im_out, - q7_t * bufferA, - q7_t * Im_out); - - -void csky_vdsp2_avepool_q7_HWC_nonsquare( - q7_t *Im_in, // input image - const uint16_t dim_im_in_x, // input image dimension - const uint16_t dim_im_in_y, // input image dimension - const uint16_t ch_im_in, // number of input image channels - const uint16_t dim_kernel_x, // window kernel size - const uint16_t dim_kernel_y, // window kernel size - const uint16_t padding_x, // padding sizes - const uint16_t padding_y, // padding sizes - const uint16_t stride_x, // stride - const uint16_t stride_y, // stride - const uint16_t dim_im_out_x, // output image dimension - const uint16_t dim_im_out_y, // output image dimension - q7_t *bufferA, // a buffer for local storage - q7_t *Im_out, // output feature - const uint16_t out_lshift); // output left shift (scaling) - - - /** - * @brief Q7 softmax function - * @param[in] vec_in pointer to input vector - * @param[in] dim_vec input vector dimention - * @param[out] p_out pointer to output vector - * @return none. - * - */ - -void csky_vdsp2_softmax_q7(const q7_t *vec_in, const uint16_t dim_vec, q7_t *p_out); - - /** - * @brief Q15 softmax function - * @param[in] vec_in pointer to input vector - * @param[in] dim_vec input vector dimention - * @param[out] p_out pointer to output vector - * @return none. - * - */ - -void csky_vdsp2_softmax_q15(const q15_t *vec_in, const uint16_t dim_vec, - q15_t *p_out); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/include/include_xt800/nn_include/csky_vdsp2_nnsupportfunctions.h b/include/include_xt800/nn_include/csky_vdsp2_nnsupportfunctions.h deleted file mode 100644 index 897765dc..00000000 --- a/include/include_xt800/nn_include/csky_vdsp2_nnsupportfunctions.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Title: csky_vdsp2_nnsupportfunctions.h - * Description: Public header file of support functions for CSI NN Library - * - * -------------------------------------------------------------------- */ - -#ifndef _CSKY_DSP2_NNSUPPORTFUNCTIONS_H_ -#define _CSKY_DSP2_NNSUPPORTFUNCTIONS_H_ - -#include "csky_math.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/** - * @brief Union for SIMD access of Q31/Q15/Q7 types - */ -union csky_vdsp2_nnword -{ - q31_t word; /**< Q31 type */ - q15_t half_words[2]; /**< Q15 type */ - q7_t bytes[4]; /**< Q7 type */ -}; - -/** - * @defgroup nndata_convert Neural Network Data Conversion Functions - * - * Perform data type conversion in-between neural network operations - * - */ - -/** - * @brief Converts the elements of the Q7 vector to Q15 vector without left-shift - * @param[in] *pSrc points to the Q7 input vector - * @param[out] *pDst points to the Q15 output vector - * @param[in] blockSize length of the input vector - * @return none. - * - */ - -void csky_vdsp2_q7_to_q15_no_shift(const q7_t * pSrc, q15_t * pDst, - uint32_t blockSize); - -/** - * @brief Converts the elements of the Q7 vector to reordered Q15 vector without left-shift - * @param[in] *pSrc points to the Q7 input vector - * @param[out] *pDst points to the Q15 output vector - * @param[in] blockSize length of the input vector - * @return none. - * - */ - -void csky_vdsp2_q7_to_q15_reordered_no_shift(const q7_t * pSrc, q15_t * pDst, - uint32_t blockSize); - -#if defined (CSKY_VDSP2_MATH_DSP) - -/** - * @brief read and expand one Q7 word into two Q15 words - */ - -__ALWAYS_STATIC_INLINE void *read_and_pad(void *source, q31_t *out1, - q31_t *out2) -{ - q31_t inA = *__SIMD32(source)++; - q31_t inAbuf1 = __SXTB16(__ROR(inA, 8)); - q31_t inAbuf2 = __SXTB16(inA); - -#ifndef CSKY_MATH_BIG_ENDIAN - *out2 = __PKHTB(inAbuf1, inAbuf2, 16); - *out1 = __PKHBT(inAbuf2, inAbuf1, 16); -#else - *out1 = __PKHTB(inAbuf1, inAbuf2, 16); - *out2 = __PKHBT(inAbuf2, inAbuf1, 16); -#endif - - return source; -} - -/** - * @brief read and expand one Q7 word into two Q15 words with reordering - */ - -__ALWAYS_STATIC_INLINE void *read_and_pad_reordered(void *source, q31_t * out1, - q31_t * out2) -{ - q31_t inA = *__SIMD32(source)++; -#ifndef CSKY_MATH_BIG_ENDIAN - *out2 = __SXTB16(__ROR(inA, 8)); - *out1 = __SXTB16(inA); -#else - *out1 = __SXTB16(__ROR(inA, 8)); - *out2 = __SXTB16(inA); -#endif - - return source; -} -#endif - -q7_t *csky_vdsp2_nn_mat_mult_kernel_q7_q15_reordered(const q7_t * pA, - const q15_t * pInBuffer, - const uint16_t ch_im_out, - const uint16_t numCol_A, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, - q7_t * pOut); - -q7_t *csky_vdsp2_nn_mat_mult_kernel_q7_q15(const q7_t * pA, - const q15_t * pInBuffer, - const uint16_t ch_im_out, - const uint16_t numCol_A, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, - q7_t * pOut); - -/** - * @brief A few utility functions used by pooling functions - * - */ - -void buffer_scale_back_q15_to_q7(q15_t * buffer, q7_t * target, - uint16_t length, uint16_t scale); - -void accumulate_q7_to_q15(q15_t * base, q7_t * target, - const uint16_t length); - -/** - * @brief defition to adding rouding offset - */ -#ifndef CSKY_NN_TRUNCATE - #define NN_ROUND(out_shift) ( 0x1 << (out_shift - 1) ) -#else - #define NN_ROUND(out_shift) 0 -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/lib/.gitignore b/lib/.gitignore deleted file mode 100644 index 09f7cd18..00000000 --- a/lib/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -*.a -*.so - diff --git a/script/git-clang-format.sh b/script/git-clang-format.sh new file mode 100755 index 00000000..2f651283 --- /dev/null +++ b/script/git-clang-format.sh @@ -0,0 +1,75 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +set -e +set -u +set -o pipefail + +if [[ "$1" == "-i" ]]; then + INPLACE_FORMAT=1 + shift 1 +else + INPLACE_FORMAT=0 +fi + +if [[ "$#" -lt 1 ]]; then + echo "Usage: tests/lint/git-clang-format.sh [-i] " + echo "" + echo "Run clang-format on files that changed since " + echo "Examples:" + echo "- Compare last one commit: tests/lint/git-clang-format.sh HEAD~1" + echo "- Compare against upstream/main: tests/lint/git-clang-format.sh upstream/main" + echo "You can also add -i option to do inplace format" + exit 1 +fi + +cleanup() +{ + rm -rf /tmp/$$.clang-format.txt +} +trap cleanup 0 + +CLANG_FORMAT=clang-format-10 + +if [ -x "$(command -v clang-format-10)" ]; then + CLANG_FORMAT=clang-format-10 +elif [ -x "$(command -v clang-format)" ]; then + echo "clang-format might be different from clang-format-10, expect potential difference." + CLANG_FORMAT=clang-format-6.0 +else + echo "Cannot find clang-format-10" + exit 1 +fi + +# Print out specific version +${CLANG_FORMAT} --version + +if [[ ${INPLACE_FORMAT} -eq 1 ]]; then + echo "Running inplace git-clang-format against" $1 + git-${CLANG_FORMAT} --extensions h,mm,c,cc --binary=${CLANG_FORMAT} $1 + exit 0 +fi + +echo "Running git-clang-format against" $1 +git-${CLANG_FORMAT} --diff --extensions h,mm,c,cc --binary=${CLANG_FORMAT} $1 1> /tmp/$$.clang-format.txt +echo "---------clang-format log----------" +cat /tmp/$$.clang-format.txt +echo "" +if grep --quiet -E "diff" < /tmp/$$.clang-format.txt; then + echo "clang-format lint error found. Consider running clang-format-10 on these files to fix them." + exit 1 +fi diff --git a/source/c860_opt/csi_u8_to_f32_c860.S b/source/c860_opt/csi_u8_to_f32_c860.S index cf645325..c4f013b9 100644 --- a/source/c860_opt/csi_u8_to_f32_c860.S +++ b/source/c860_opt/csi_u8_to_f32_c860.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ /** * diff --git a/source/c860_opt/utils.S b/source/c860_opt/utils.S index cc044ce8..56e8cbf8 100644 --- a/source/c860_opt/utils.S +++ b/source/c860_opt/utils.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ /** * diff --git a/source/c906_opt/abs.c b/source/c906_opt/abs.c index ba6d4f6d..6fe47754 100644 --- a/source/c906_opt/abs.c +++ b/source/c906_opt/abs.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" diff --git a/source/c906_opt/add.c b/source/c906_opt/add.c index 65dc7f12..27247832 100644 --- a/source/c906_opt/add.c +++ b/source/c906_opt/add.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" @@ -62,6 +62,39 @@ int csi_c906_add_f32(struct csi_tensor *input0, int in_size1 = csi_tensor_size(input1); int out_size = csi_tensor_size(output); + // HACK: special case: tensorflow densenet121 + // example: [1, 64, 55, 55] + [1, 64, 1, 1] = [1, 64, 55, 55] + if ((input1->dim[2] == 1) && (input1->dim[3] == 1) && (input1->dim[1] == input0->dim[1])) { + int inner_size = input0->dim[2] * input0->dim[3]; + int outer_size = input0->dim[1]; + asm volatile( + "1:\n\t" + "flw ft0, 0(%2)\n\t" + "mv t1, %4\n\t" + "2:\n\t" + "vsetvli t0, t1, e32, m2\n\t" + "vle.v v8, (%1)\n\t" + "sub t1, t1, t0\n\t" + "slli t0, t0, 2\n\t" // element: 4 bytes + "add %1, %1, t0\n\t" + "vfadd.vf v16, v8, ft0\n\t" + "vse.v v16, (%0)\n\t" + "add %0, %0, t0\n\t" + "bnez t1, 2b\n\t" + "addi %3, %3, -1\n\t" + "addi %2, %2, 4\n\t" + "bnez %3, 1b\n\t" + + : "=r"(output_data), // %0 + "=r"(input0_data), // %1 + "=r"(input1_data), // %2 + "=r"(outer_size), // %3 + "=r"(inner_size) // %4 + : "0"(output_data), "1"(input0_data), "2"(input1_data), "3"(outer_size), "4"(inner_size) + : "v8", "v9", "v16", "v17", "t0", "t1", "ft0"); + return CSINN_TRUE; + } + // example: [1, 3, 224, 224] + [1] = [1, 3, 224, 224] if (in_size1 == 1) { asm volatile( @@ -183,6 +216,37 @@ int csi_c906_add_fp16(struct csi_tensor *input0, int in_size1 = csi_tensor_size(input1); int out_size = csi_tensor_size(output); + if ((input1->dim[2] == 1) && (input1->dim[3] == 1) && (input1->dim[1] == input0->dim[1])) { + int inner_size = input0->dim[2] * input0->dim[3]; + int outer_size = input0->dim[1]; + asm volatile( + "1:\n\t" + "flh ft0, 0(%2)\n\t" + "mv t1, %4\n\t" + "2:\n\t" + "vsetvli t0, t1, e16, m2\n\t" + "vle.v v8, (%1)\n\t" + "sub t1, t1, t0\n\t" + "slli t0, t0, 1\n\t" + "add %1, %1, t0\n\t" + "vfadd.vf v16, v8, ft0\n\t" + "vse.v v16, (%0)\n\t" + "add %0, %0, t0\n\t" + "bnez t1, 2b\n\t" + "addi %3, %3, -1\n\t" + "addi %2, %2, 2\n\t" + "bnez %3, 1b\n\t" + + : "=r"(output_data), // %0 + "=r"(input0_data), // %1 + "=r"(input1_data), // %2 + "=r"(outer_size), // %3 + "=r"(inner_size) // %4 + : "0"(output_data), "1"(input0_data), "2"(input1_data), "3"(outer_size), "4"(inner_size) + : "v8", "v9", "v16", "v17", "t0", "t1", "ft0"); + return CSINN_TRUE; + } + if (in_size1 == 1) { asm volatile( "flh ft0, 0(%2)\n\t" diff --git a/source/c906_opt/avgpool.c b/source/c906_opt/avgpool.c index 9ba00b49..6a82a177 100644 --- a/source/c906_opt/avgpool.c +++ b/source/c906_opt/avgpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" diff --git a/source/c906_opt/broadcast_to.c b/source/c906_opt/broadcast_to.c index db9c72cc..0563179d 100644 --- a/source/c906_opt/broadcast_to.c +++ b/source/c906_opt/broadcast_to.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" diff --git a/source/c906_opt/cache_conv1d.c b/source/c906_opt/cache_conv1d.c new file mode 100644 index 00000000..12c692d1 --- /dev/null +++ b/source/c906_opt/cache_conv1d.c @@ -0,0 +1,200 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_c906.h" + +int csi_c906_cache_conv1d_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_conv1d_params *params) +{ + size_t data_size = + output->dim[0] * output->dim[1] * output->dim[2] * sizeof(__fp16); // 512*13*2 + asr_buffer_init_c906(¶ms->asr_buffer, 2 * data_size, data_size); + + if (input->dtype == CSINN_DTYPE_FLOAT16) { + __fp16 *weight_data = (__fp16 *)weight->data; + + int n = weight->dim[0]; // out_nodes + int k = weight->dim[1]; // in_nodes + if (k % 16 != 0) { + csi_debug_error("out_nodes num should be multiple of 16\n"); + } + __fp16 *pa_reorder = (__fp16 *)csi_mem_alloc(n * k * sizeof(__fp16)); + csi_c906_reorder_weight_n16_fp16(weight_data, pa_reorder, n, k, k); + + csi_c906_memcpy(weight_data, pa_reorder, n * k * sizeof(__fp16)); + params->data = weight_data; + csi_mem_free(pa_reorder); + + params->base.bc = csi_c906_cache_conv1d_fp16; + } + + return CSINN_TRUE; +} + +int csi_c906_cache_conv1d_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_conv1d_params *params) +{ + __fp16 *input_data = input->data; + __fp16 *output_data = output->data; + __fp16 *weights_data = weight->data; + __fp16 *bias_data = bias->data; + const int weights_dims_count = weight->dim_count; + const int output_depth = weight->dim[weights_dims_count - 3]; + const int accum_depth = weight->dim[weights_dims_count - 2]; + const int batches = input->dim[1]; + + int packn = 16; + int vl = 16; + int b = 0; + for (; b + 3 < batches; b += 4) { + __fp16 *init_output = output_data + b * output_depth; + __fp16 *init_output2 = init_output + output_depth; + __fp16 *init_output3 = init_output2 + output_depth; + __fp16 *init_output4 = init_output3 + output_depth; + __fp16 *init_input = input_data + b * accum_depth; + __fp16 *init_input2 = init_input + accum_depth; + __fp16 *init_input3 = init_input2 + accum_depth; + __fp16 *init_input4 = init_input3 + accum_depth; + + __fp16 *init_weight = weights_data; + __fp16 *init_bias = bias_data; + int n = output_depth; + while (n > 0) { + __fp16 *in_ptr = init_input; + __fp16 *in_ptr2 = init_input2; + __fp16 *in_ptr3 = init_input3; + __fp16 *in_ptr4 = init_input4; + + vfloat16m2_t _acc = vle16_v_f16m2(init_bias, vl); + vfloat16m2_t _acc2 = vmv_v_v_f16m2(_acc, vl); + vfloat16m2_t _acc3 = vmv_v_v_f16m2(_acc, vl); + vfloat16m2_t _acc4 = vmv_v_v_f16m2(_acc, vl); + + init_bias += vl; + int k = accum_depth; + while (k > 0) { + vfloat16m2_t _weight = vle16_v_f16m2(init_weight, vl); + _acc = vfmacc_vf_f16m2(_acc, *in_ptr, _weight, vl); + _acc2 = vfmacc_vf_f16m2(_acc2, *in_ptr2, _weight, vl); + _acc3 = vfmacc_vf_f16m2(_acc3, *in_ptr3, _weight, vl); + _acc4 = vfmacc_vf_f16m2(_acc4, *in_ptr4, _weight, vl); + init_weight += vl; + in_ptr++; + in_ptr2++; + in_ptr3++; + in_ptr4++; + k--; + } + vse16_v_f16m2(init_output, _acc, vl); + vse16_v_f16m2(init_output2, _acc2, vl); + vse16_v_f16m2(init_output3, _acc3, vl); + vse16_v_f16m2(init_output4, _acc4, vl); + init_output += vl; + init_output2 += vl; + init_output3 += vl; + init_output4 += vl; + n -= vl; + } + } + for (; b + 1 < batches; b += 2) { + __fp16 *init_output = output_data + b * output_depth; + __fp16 *init_output2 = init_output + output_depth; + __fp16 *init_input = input_data + b * accum_depth; + __fp16 *init_input2 = init_input + accum_depth; + + __fp16 *init_weight = weights_data; + __fp16 *init_bias = bias_data; + int n = output_depth; + while (n > 0) { + __fp16 *in_ptr = init_input; + __fp16 *in_ptr2 = init_input2; + vfloat16m2_t _acc = vle16_v_f16m2(init_bias, vl); + vfloat16m2_t _acc2 = vmv_v_v_f16m2(_acc, vl); + init_bias += vl; + int k = accum_depth; + while (k > 0) { + vfloat16m2_t _weight = vle16_v_f16m2(init_weight, vl); + _acc = vfmacc_vf_f16m2(_acc, *in_ptr, _weight, vl); + _acc2 = vfmacc_vf_f16m2(_acc2, *in_ptr2, _weight, vl); + init_weight += vl; + in_ptr++; + in_ptr2++; + k--; + } + vse16_v_f16m2(init_output, _acc, vl); + vse16_v_f16m2(init_output2, _acc2, vl); + init_output += vl; + init_output2 += vl; + n -= vl; + } + } + for (; b < batches; b++) { + __fp16 *init_output = output_data + b * output_depth; + __fp16 *init_input = input_data + b * accum_depth; + + __fp16 *init_weight = weights_data; + __fp16 *init_bias = bias_data; + int n = output_depth; + while (n > 0) { + __fp16 *in_ptr = init_input; + vfloat16m2_t _acc = vle16_v_f16m2(init_bias, vl); + init_bias += vl; + int k = accum_depth; + while (k > 0) { + vfloat16m2_t _weight = vle16_v_f16m2(init_weight, vl); + _acc = vfmacc_vf_f16m2(_acc, *in_ptr, _weight, vl); + init_weight += vl; + in_ptr++; + k--; + } + vse16_v_f16m2(init_output, _acc, vl); + init_output += vl; + n -= vl; + } + } + + size_t insert_lenth = output->dim[1] * input->dim[1]; // 512*6 + __fp16 *output_from_buffer; + output_from_buffer = asr_buffer_insert_c906_back(¶ms->asr_buffer, output_data, + insert_lenth * sizeof(__fp16)); + size_t output_lenth = output->dim[0] * output->dim[1] * output->dim[2]; + int *shape = output->dim; + + __fp16 *p_input = output_from_buffer; + __fp16 *p_output = output->data; + for (int i = 0; i < shape[2]; i++) { + int j = 0; + for (; j + 15 < shape[1]; j += 16) { + int out_pos = j * shape[2] + i; + vfloat16m2_t _output_from_buffer; + _output_from_buffer = vle16_v_f16m2(p_input + i * shape[1] + j, 16); + vsse16_v_f16m2(p_output + out_pos, 2 * shape[2], _output_from_buffer, 16); + } + if (j != shape[1]) { + int vl = shape[1] - j; + int out_pos = j * shape[2] + i; + vfloat16m2_t _output_from_buffer; + _output_from_buffer = vle16_v_f16m2(p_input + i * shape[1] + j, vl); + vsse16_v_f16m2(p_output + out_pos, 2 * shape[2], _output_from_buffer, vl); + } + } +} \ No newline at end of file diff --git a/source/c906_opt/cache_matmul.c b/source/c906_opt/cache_matmul.c new file mode 100644 index 00000000..6810ce48 --- /dev/null +++ b/source/c906_opt/cache_matmul.c @@ -0,0 +1,302 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_c906.h" +#include "csi_memory.h" + +// asr data buffer +void asr_buffer_init_c906(struct asr_buffer_t *buffer, size_t buffer_size, size_t data_lenth) +{ + buffer->buffer = csi_mem_alloc(buffer_size); + buffer->buffer_lenth = buffer_size; + buffer->data_lenth = data_lenth; + buffer->writer_index = buffer_size - data_lenth; + buffer->flag = 0; //用来记录有没有经过位置0.有的话置为1. +} + +// insert front +void *asr_buffer_insert_c906_front(struct asr_buffer_t *buffer, void *input, size_t len) +{ + int start_position = buffer->writer_index - len; + uint8_t *p = NULL; + if (buffer->flag == 0) { + if (start_position < 0) { + buffer->flag = 1; + } + } + if (start_position >= 0) { + p = &buffer->buffer[start_position]; + memcpy(p, input, len); + buffer->writer_index = start_position; + if (buffer->flag == 0) { + return (void *)&buffer->buffer[0]; + } else { + return (void *)p; + } + } else { + start_position = buffer->buffer_lenth - buffer->data_lenth; + p = &buffer->buffer[start_position]; + memcpy(p, input, len); + memcpy(p + len, &buffer->buffer[buffer->writer_index], buffer->data_lenth - len); + buffer->writer_index = start_position; + return (void *)p; + } +} + +void *asr_buffer_insert_c906_back(struct asr_buffer_t *buffer, void *input, size_t len) +{ + int end_position = buffer->writer_index + len; + uint8_t *p = NULL; + if (end_position <= buffer->buffer_lenth) { + p = &buffer->buffer[buffer->writer_index]; + memcpy(p, input, len); + buffer->writer_index += len; + p -= (buffer->data_lenth - len); + } else { + p = &buffer->buffer[buffer->writer_index + len - buffer->data_lenth]; + memcpy(&buffer->buffer[0], p, buffer->data_lenth - len); + buffer->writer_index = buffer->data_lenth; + memcpy(&buffer->buffer[buffer->data_lenth - len], input, len); + p = &buffer->buffer[0]; + } + return (void *)p; +} + +// get buffer +void *asr_buffer_get_buffer_c906(struct asr_buffer_t *buffer) +{ + return asr_buffer_insert_c906_back(buffer, NULL, 0); +} + +// reset buffer +void asr_buffer_reset_c906(struct asr_buffer_t *buffer) +{ + csi_mem_free(buffer->buffer); + buffer->writer_index = 0; + buffer->buffer = NULL; + buffer->buffer_lenth = 0; + buffer->data_lenth = 0; + buffer->flag = 0; +} + +int csi_c906_cache_matmul_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_matmul_params *params) +{ + size_t data_size = + params->shape[0] * params->shape[1] * params->shape[2] * params->shape[3] * sizeof(__fp16); + asr_buffer_init_c906(¶ms->asr_buffer, 2 * data_size, data_size); + + int accum_depth = weight->dim[0]; + int output_depth = weight->dim[1]; + + if (input->dtype == CSINN_DTYPE_FLOAT16) { + __fp16 *weight_data = (__fp16 *)weight->data; + + int n = weight->dim[0]; // out_nodes + int k = weight->dim[1]; // in_nodes + if (k % 16 != 0) { + csi_debug_error("out_nodes num should be multiple of 16\n"); + } + __fp16 *pa_reorder = (__fp16 *)csi_mem_alloc(n * k * sizeof(__fp16)); + csi_c906_reorder_weight_n16_fp16(weight_data, pa_reorder, n, k, k); + + csi_c906_memcpy(weight_data, pa_reorder, n * k * sizeof(__fp16)); + params->data = weight_data; + csi_mem_free(pa_reorder); + params->base.bc = csi_c906_cache_matmul_fp16; + } + return CSINN_TRUE; +} + +int csi_c906_cache_matmul_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_matmul_params *params) +{ + int accum_depth = weight->dim[0]; + int output_depth = weight->dim[1]; + int batches = input->dim[1]; + + __fp16 *input_data = input->data; + __fp16 *output_data = output->data; + __fp16 *weights_data = params->data; + __fp16 *bias_data = bias->data; + + int packn = 16; + int vl = 16; + int b = 0; + for (; b + 3 < batches; b += 4) { + __fp16 *init_output = output_data + b * output_depth; + __fp16 *init_output2 = init_output + output_depth; + __fp16 *init_output3 = init_output2 + output_depth; + __fp16 *init_output4 = init_output3 + output_depth; + __fp16 *init_input = input_data + b * accum_depth; + __fp16 *init_input2 = init_input + accum_depth; + __fp16 *init_input3 = init_input2 + accum_depth; + __fp16 *init_input4 = init_input3 + accum_depth; + + __fp16 *init_weight = weights_data; + __fp16 *init_bias = bias_data; + int n = output_depth; + while (n > 0) { + __fp16 *in_ptr = init_input; + __fp16 *in_ptr2 = init_input2; + __fp16 *in_ptr3 = init_input3; + __fp16 *in_ptr4 = init_input4; + + vfloat16m2_t _acc = vle16_v_f16m2(init_bias, vl); + vfloat16m2_t _acc2 = vmv_v_v_f16m2(_acc, vl); + vfloat16m2_t _acc3 = vmv_v_v_f16m2(_acc, vl); + vfloat16m2_t _acc4 = vmv_v_v_f16m2(_acc, vl); + + init_bias += vl; + int k = accum_depth; + while (k > 0) { + vfloat16m2_t _weight = vle16_v_f16m2(init_weight, vl); + _acc = vfmacc_vf_f16m2(_acc, *in_ptr, _weight, vl); + _acc2 = vfmacc_vf_f16m2(_acc2, *in_ptr2, _weight, vl); + _acc3 = vfmacc_vf_f16m2(_acc3, *in_ptr3, _weight, vl); + _acc4 = vfmacc_vf_f16m2(_acc4, *in_ptr4, _weight, vl); + init_weight += vl; + in_ptr++; + in_ptr2++; + in_ptr3++; + in_ptr4++; + k--; + } + vse16_v_f16m2(init_output, _acc, vl); + vse16_v_f16m2(init_output2, _acc2, vl); + vse16_v_f16m2(init_output3, _acc3, vl); + vse16_v_f16m2(init_output4, _acc4, vl); + init_output += vl; + init_output2 += vl; + init_output3 += vl; + init_output4 += vl; + n -= vl; + } + } + for (; b + 1 < batches; b += 2) { + __fp16 *init_output = output_data + b * output_depth; + __fp16 *init_output2 = init_output + output_depth; + __fp16 *init_input = input_data + b * accum_depth; + __fp16 *init_input2 = init_input + accum_depth; + + __fp16 *init_weight = weights_data; + __fp16 *init_bias = bias_data; + int n = output_depth; + while (n > 0) { + __fp16 *in_ptr = init_input; + __fp16 *in_ptr2 = init_input2; + vfloat16m2_t _acc = vle16_v_f16m2(init_bias, vl); + vfloat16m2_t _acc2 = vmv_v_v_f16m2(_acc, vl); + init_bias += vl; + int k = accum_depth; + while (k > 0) { + vfloat16m2_t _weight = vle16_v_f16m2(init_weight, vl); + _acc = vfmacc_vf_f16m2(_acc, *in_ptr, _weight, vl); + _acc2 = vfmacc_vf_f16m2(_acc2, *in_ptr2, _weight, vl); + init_weight += vl; + in_ptr++; + in_ptr2++; + k--; + } + vse16_v_f16m2(init_output, _acc, vl); + vse16_v_f16m2(init_output2, _acc2, vl); + init_output += vl; + init_output2 += vl; + n -= vl; + } + } + for (; b < batches; b++) { + __fp16 *init_output = output_data + b * output_depth; + __fp16 *init_input = input_data + b * accum_depth; + + __fp16 *init_weight = weights_data; + __fp16 *init_bias = bias_data; + int n = output_depth; + while (n > 0) { + __fp16 *in_ptr = init_input; + vfloat16m2_t _acc = vle16_v_f16m2(init_bias, vl); + init_bias += vl; + int k = accum_depth; + while (k > 0) { + vfloat16m2_t _weight = vle16_v_f16m2(init_weight, vl); + _acc = vfmacc_vf_f16m2(_acc, *in_ptr, _weight, vl); + init_weight += vl; + in_ptr++; + k--; + } + vse16_v_f16m2(init_output, _acc, vl); + init_output += vl; + n -= vl; + } + } + __fp16 judge = + bias_data[0] + bias_data[1] + bias_data[2] + bias_data[3] + bias_data[4] + bias_data[5]; + + size_t insert_lenth = output_depth * batches; + __fp16 *output_from_buffer; + if (fabs(judge) < 0.01) { + output_from_buffer = asr_buffer_insert_c906_front(¶ms->asr_buffer, output_data, + insert_lenth * sizeof(__fp16)); + } else { + output_from_buffer = asr_buffer_insert_c906_back(¶ms->asr_buffer, output_data, + insert_lenth * sizeof(__fp16)); + } + + // deal with reshape & transpose + int *shape = output->dim; + + // transpose can only be 0,2,3,1 or 0,2,1,3 + if (params->axes[2] == 3) // 0,2,3,1 + { + int batch = shape[3]; + int shape3 = shape[2]; + int flatten_shape = shape[1] * shape[2]; + __fp16 *ptr = output_from_buffer; + for (int i = 0; i < batch; i++) { + for (int j = 0; j < flatten_shape; j += 16) { + int out_pos = j * batch + i; + vfloat16m2_t _output_from_buffer; + _output_from_buffer = vle16_v_f16m2(ptr, 16); + vsse16_v_f16m2(output_data + out_pos, 2 * batch, _output_from_buffer, 16); + ptr += 16; + } + } + + } else // 0,2,1,3 + { + int batch = shape[2]; + int shape3 = shape[3]; + int flatten_shape = shape[1] * shape[3]; + __fp16 *ptr = output_from_buffer; + for (int i = 0; i < batch; i++) { + for (int j = 0; j < flatten_shape; j += 16) { + int out_pos = i * shape3 + j % shape3 + batch * shape3 * (j / shape3); + vfloat16m2_t v_output_from_buffer; + v_output_from_buffer = vle16_v_f16m2(ptr, 16); + vse16_v_f16m2(output_data + out_pos, v_output_from_buffer, 16); + ptr += 16; + } + } + } + + return CSINN_TRUE; +} diff --git a/source/c906_opt/clip.c b/source/c906_opt/clip.c index 4097a330..db9fc59c 100644 --- a/source/c906_opt/clip.c +++ b/source/c906_opt/clip.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" diff --git a/source/c906_opt/concat.c b/source/c906_opt/concat.c index 3f4cbf92..9c1c0d15 100644 --- a/source/c906_opt/concat.c +++ b/source/c906_opt/concat.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/c906_opt/convolution.c b/source/c906_opt/convolution.c index 8f0dec98..cdfb544a 100644 --- a/source/c906_opt/convolution.c +++ b/source/c906_opt/convolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" @@ -59,15 +59,32 @@ int csi_c906_conv2d_init(struct csi_tensor *input, } else if (input->dtype == CSINN_DTYPE_FLOAT16) { csi_c906_conv1x1s1_sgemm_transform_kernel_fp16(kernel, params); params->base.bc = csi_c906_conv1x1s1_sgemm_fp16; + // params->base.bc = csi_c906_conv1x1s1_batch_gemv_fp16; } - // winograd convolution condition: } else if(kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && dalition_h == 1 && dalition_w == 1) { if (input->dtype == CSINN_DTYPE_FLOAT32) { - params->conv_extra.conv_mode = CSINN_GEMM; - csi_c906_conv_im2col_sgemm_transform_kernel(kernel, params); - params->base.bc = csi_c906_conv_im2col_sgemm; + if (params->group > 1) { + params->conv_extra.conv_mode = CSINN_GEMM; + csi_c906_conv_im2col_sgemm_transform_kernel(kernel, params); + params->base.bc = csi_c906_conv_im2col_sgemm; + return CSINN_TRUE; + } + + // pack4 for winograd convolution + if ( (out_c % 4 == 0) && (in_c % 4 ==0) ) { + params->conv_extra.conv_mode = CSINN_WINOGRAD; + struct csi_tensor *t_kernel = csi_alloc_tensor(NULL); + csi_c906_conv3x3s1_winograd64_transform_kernel_pack4(kernel, t_kernel); + params->conv_extra.kernel_tm = t_kernel; + params->base.bc = csi_c906_conv3x3s1_winograd64_pack4; + } else { + params->conv_extra.conv_mode = CSINN_GEMM; + csi_c906_conv_im2col_sgemm_transform_kernel(kernel, params); + params->base.bc = csi_c906_conv_im2col_sgemm; + } + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { if (params->group > 1) { @@ -81,7 +98,7 @@ int csi_c906_conv2d_init(struct csi_tensor *input, if ( (out_c % 8 == 0) && (in_c % 8 ==0) ) { params->conv_extra.conv_mode = CSINN_WINOGRAD; struct csi_tensor *t_kernel = csi_alloc_tensor(NULL); - csi_c906_conv3x3s1_winograd64_transform_kernel_fp16(kernel, t_kernel); + csi_c906_conv3x3s1_winograd64_transform_kernel_pack8_fp16(kernel, t_kernel); params->conv_extra.kernel_tm = t_kernel; params->base.bc = csi_c906_conv3x3s1_winograd64_pack8_fp16; } else { @@ -153,8 +170,47 @@ int csi_c906_depthwise_conv2d_init(struct csi_tensor *input, if (input->dtype == CSINN_DTYPE_FLOAT32) { params->base.bc = csi_ref_depthwise_conv2d_f32; } else if (input->dtype == CSINN_DTYPE_FLOAT16) { - params->base.bc = csi_ref_depthwise_conv2d_quant; + if (params->pad_left == 0 && params->pad_top == 0 && input->dim[1] == output->dim[1]) { + params->base.bc = csi_c906_dwconv2d_s1_pad0_fp16; + } else { + params->base.bc = csi_ref_depthwise_conv2d_quant; + } } } return CSINN_TRUE; -} \ No newline at end of file +} + +int csi_c906_conv1d_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv1d_params *params) +{ + int32_t out_c = kernel->dim[0]; + int32_t in_c = kernel->dim[1]; + int32_t in_w = input->dim[2]; + int32_t kernel_w = kernel->dim[2]; + int32_t stride_w = params->stride_width; + int32_t dalition_w = params->dilation_width; + + // check output_dim + int out_width = (in_w + params->pad_left + params->pad_right - kernel_w) / stride_w + 1; + if (out_width != output->dim[2]) { + printf("output dim don't match.\n"); + return CSINN_FALSE; + } + if (kernel_w == 1 && stride_w == 1 && dalition_w == 1) { + if (input->dtype == CSINN_DTYPE_FLOAT32) { + csi_c906_conv1x1s1_sgemm_transform_kernel(kernel, (struct conv2d_params *)params); + params->base.bc = csi_c906_conv1x1s1_sgemm; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + csi_c906_conv1x1s1_sgemm_transform_kernel_fp16(kernel, (struct conv2d_params *)params); + params->base.bc = csi_c906_conv1x1s1_sgemm_fp16; + } + } else { + if (input->dtype == CSINN_DTYPE_FLOAT32) { + params->base.bc = csi_ref_conv1d_f32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + params->base.bc = csi_ref_conv1d_quant; + } + } + return CSINN_TRUE; +} diff --git a/source/c906_opt/convolution_1x1.c b/source/c906_opt/convolution_1x1.c index 7721dfd2..1cb90509 100644 --- a/source/c906_opt/convolution_1x1.c +++ b/source/c906_opt/convolution_1x1.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" @@ -55,7 +55,7 @@ static int csi_c906_conv1x1s1_sgemm_base(struct csi_tensor *input, int32_t in_ch = input->dim[1]; int32_t out_ch = kernel->dim[0]; int32_t out_h = output->dim[2]; - int32_t out_w = output->dim[3]; + int32_t out_w = output->dim_count == 4 ? output->dim[3] : 1; // adapt conv1d1s1 int32_t m = out_ch / group; int32_t k = in_ch / group; @@ -69,7 +69,7 @@ static int csi_c906_conv1x1s1_sgemm_base(struct csi_tensor *input, float *pb = pb_reorder; float *pc = output_data; // pack - csi_c906_reorder_input(input_data, pb, k, n, n); + csi_c906_reorder_input_1(input_data, pb, k, n, n); // GEMM csi_c906_sgemm_kernel_f32(pc, pa, pb, m, k, n, n, bias_data + g * m, fuse_relu); input_data += k * n; diff --git a/source/c906_opt/convolution_1x1_fp16.c b/source/c906_opt/convolution_1x1_fp16.c index eff562d5..71f51ecd 100644 --- a/source/c906_opt/convolution_1x1_fp16.c +++ b/source/c906_opt/convolution_1x1_fp16.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" @@ -53,7 +53,7 @@ int csi_c906_conv1x1s1_sgemm_fp16(struct csi_tensor *input, int32_t in_ch = input->dim[1]; int32_t out_ch = kernel->dim[0]; int32_t out_h = output->dim[2]; - int32_t out_w = output->dim[3]; + int32_t out_w = output->dim_count == 4 ? output->dim[3] : 1; // adapt conv1d1s1 int32_t m = out_ch / group; int32_t k = in_ch / group; @@ -67,9 +67,12 @@ int csi_c906_conv1x1s1_sgemm_fp16(struct csi_tensor *input, __fp16 *pb = pb_reorder; __fp16 *pc = output_data; // pack - csi_c906_reorder_input_fp16(input_data, pb, k, n, n); + csi_nn_rvv_reorder_input_z16_fp16(input_data, pb, k, n, n); + // csi_c906_reorder_input_fp16_1(input_data, pb, k, n, n); // GEMM - csi_c906_sgemm_kernel_fp16(pc, pa, pb, m, k, n, n, bias_data + g * m); + csi_nn_rvv_gemm_8x16_fp16(pc, pa, pb, m, k, n, n, bias_data + g * m); + // csi_c906_sgemm_kernel_fp16(pc, pa, pb, m, k, n, n, bias_data + g * m); + input_data += k * n; output_data += m * n; } @@ -77,3 +80,64 @@ int csi_c906_conv1x1s1_sgemm_fp16(struct csi_tensor *input, csi_mem_free(pb_reorder); return CSINN_TRUE; } + +/* + matrix: input data matrix + vector: kernel data row +*/ +int csi_c906_conv1x1s1_batch_gemv_fp16(struct csi_tensor *input, + struct csi_tensor *output, + struct csi_tensor *kernel, + struct csi_tensor *bias, + struct conv2d_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + __fp16 *kernel_data = (__fp16 *)kernel->data; + __fp16 *bias_data = (__fp16 *)bias->data; + + int32_t group = params->group; + int32_t batch = input->dim[0]; // assert(batch == 1); + int32_t in_ch = input->dim[1]; + int32_t out_ch = kernel->dim[0]; + int32_t out_h = output->dim[2]; + int32_t out_w = output->dim[3]; + + int32_t m = out_ch / group; + int32_t k = in_ch / group; + int32_t n = out_h * out_w; + + bool flag_bias = 1; // default: conv2d layer include bias + if (bias_data == NULL) { + flag_bias = 0; + bias_data = (__fp16 *)csi_mem_alloc(out_ch * sizeof(__fp16)); + } + + __fp16* pb_reorder = (__fp16 *)csi_mem_alloc(k * n * sizeof(__fp16)); + + for (int i = 0; i < batch; i++) { + for (int g = 0; g < group; g++) { + __fp16 *pa = kernel_data + g * m * k; + __fp16 *pb = pb_reorder; + __fp16 *pc = output_data; + __fp16 *bias_tmp = bias_data + g * m; + + // pack/reorder + csi_c906_reorder_matrix_z16_fp16(input_data, pb, k, n, n); + // batch GEMV + for (int j = 0; j < m; j++) { + csi_c906_gemv_trans_pack16_fp16(pc + j * n, pa + j * k, pb, k, n, n, bias_tmp + j); + } + + input_data += k * n; + output_data += m * n; + } + } + csi_mem_free(pb_reorder); + + if (!flag_bias) { + csi_mem_free(bias_data); + bias_data = NULL; + } + return CSINN_TRUE; +} diff --git a/source/c906_opt/convolution_3x3.c b/source/c906_opt/convolution_3x3.c index da51fba7..00dae52f 100644 --- a/source/c906_opt/convolution_3x3.c +++ b/source/c906_opt/convolution_3x3.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ /* @@ -48,7 +48,7 @@ void csi_c906_conv3x3s1_winograd23_transform_kernel(struct csi_tensor *o_kernel, }; csi_tensor_copy(t_kernel, o_kernel); - o_kernel->data = kernel_tm; + t_kernel->data = kernel_tm; for (int p = 0; p < outch; p++) { for (int q = 0; q < inch; q++) { @@ -90,7 +90,7 @@ int csi_c906_conv3x3s1_winograd23(struct csi_tensor *input, { float *input_data = (float *)input->data; float *output_data = (float *)output->data; - float *kernel_data = (float *)kernel->data; + float *kernel_data = (float *)params->conv_extra.kernel_tm->data; float *bias_data = (float *)bias->data; // param @@ -353,7 +353,7 @@ int csi_c906_conv3x3s1_winograd43(struct csi_tensor *input, { float *input_data = (float *)input->data; float *output_data = (float *)output->data; - float *kernel_data = (float *)kernel->data; + float *kernel_data = (float *)params->conv_extra.kernel_tm->data; float *bias_data = (float *)bias->data; // param @@ -632,7 +632,7 @@ void csi_c906_conv3x3s1_winograd64_transform_kernel(struct csi_tensor *o_kernel, // }; csi_tensor_copy(t_kernel, o_kernel); - o_kernel->data = kernel_tm; + t_kernel->data = kernel_tm; for (int p = 0; p < outch; p++) { for (int q = 0; q < inch; q++) { @@ -676,7 +676,7 @@ int csi_c906_conv3x3s1_winograd64(struct csi_tensor *input, float *input_data = (float *)input->data; float *output_data = (float *)output->data; - float *kernel_data = (float *)kernel->data; + float *kernel_data = (float *)params->conv_extra.kernel_tm->data; float *bias_data = (float *)bias->data; // param @@ -1135,6 +1135,7 @@ void csi_c906_conv3x3s1_winograd64_transform_kernel_1(struct csi_tensor *o_kerne int remain_outch_start = outch4 << 2; // float *kernel_tm2 = (float *)csi_mem_alloc(8 * 8 * inch * 4 * (outch4 + (outch % 4 + 3) / 4) * sizeof(float)); float *kernel_tm2 = (float *)csi_mem_alloc(8 * 8 * inch * outch * sizeof(float)); + t_kernel->data = kernel_tm2; for(int pp = 0; pp < outch4; pp++) { @@ -1229,7 +1230,6 @@ void csi_c906_conv3x3s1_winograd64_transform_kernel_1(struct csi_tensor *o_kerne } } csi_mem_free(kernel_tm); - o_kernel->data = kernel_tm2; } @@ -1244,7 +1244,7 @@ int csi_c906_conv3x3s1_winograd64_1(struct csi_tensor *input, // start_time = csi_get_timespec(); float *input_data = (float *)input->data; float *output_data = (float *)output->data; - float *kernel_data = (float *)kernel->data; + float *kernel_data = (float *)params->conv_extra.kernel_tm->data; float *bias_data = (float *)bias->data; // param @@ -1586,27 +1586,2262 @@ int csi_c906_conv3x3s1_winograd64_1(struct csi_tensor *input, csi_mem_free(input_padd_buf); csi_mem_free(input_trans_buf); csi_mem_free(output_trans_buf); - // end_time = csi_get_timespec(); - // printf("f32 Run graph execution time: %.5fms, FPS=%.2f\n", ((float)(end_time-start_time))/1000000, - // 1000000000.0/((float)(end_time-start_time))); return CSINN_TRUE; } -void csi_c906_conv3x3s1(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params) +/* + padding input for winograd input transform , and change memory layout to [n c/4 h w 4] + input layout: [n c h w] + input_padded layout: [n c/4 h w 4] + constrain: input channel % 4 = 0 +*/ +void csi_c906_pad_input_pack1to4(const float *input, float *input_padded, int inc, int inh, int inw, + int padded_h, int padded_w, int pad_top, int pad_left) { - /* to do */ + int inc4= inc / 4; + int padded_hw = padded_h * padded_w; + + float *pad_ptr = input_padded; + float *inp_ptr = (float *)input; + int resi_h = padded_h - pad_top - inh; // remain to pad on h (pad_down) + int resi_w = padded_w - pad_left - inw; // remain to pad on w (pad_right) + + asm volatile( + "vsetvli zero, zero, e32, m1\n\t" + "vmv.v.x v2, zero\n\t" // clear v2, for memset value 0 + "mulw t1, %6, %7\n\t" // pad_top * padded_w + "mulw t2, %6, %9\n\t" // pad_down * padded_w + "mulw t0, %3, %4\n\t" // input_size per_channel + "slli t0, t0, 2\n\t" // load stride = input_size * 4 + "slli t6, t0, 2\n\t" // t6 = input_size * 4(channel) * 4 bytes + + "1:\n\t" // channel loop [inc/8] + "mv a0, %0\n\t" // update input_addr + "mv t5, %3\n\t" // t5 = in_h + "beqz %7, 3f\n\t" // if pad_top = 0 + "mv t3, t1\n\t" // t3 = num to memset + + "2:\n\t" // pad h_top + "vse.v v2, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "addi t3, t3, -1\n\t" + "bnez t3, 2b\n\t" + + "3:\n\t" // pad h_mid + "mv t4, %4\n\t" // t4 = in_w + "beqz %8, 5f\n\t" // if pad_left = 0 + "mv t3, %8\n\t" // t3 = pad_left + + "4:\n\t" // pad w_left + "vse.v v2, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "addi t3, t3, -1\n\t" + "bnez t3, 4b\n\t" + + "5:\n\t" // pad w_mid + "vlse.v v4, (a0), t0\n\t" + "addi a0, a0, 4\n\t" + "vse.v v4, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "addi t4, t4, -1\n\t" + "bnez t4, 5b\n\t" + + "beqz %10, 7f\n\t" // if pad_right = 0 + "mv t3, %10\n\t" // t3 = pad_right + + "6:\n\t" // pad w_right + "vse.v v2, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "addi t3, t3, -1\n\t" + "bnez t3, 6b\n\t" + + "7:\n\t" + "addi t5, t5, -1\n\t" + "bnez t5, 3b\n\t" + + "beqz %9, 9f\n\t" // if pad_down = 0 + "mv t3, t2\n\t" // t3 = num to memset 0 + + "8:\n\t" // pad h_down + "vse.v v2, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "addi t3, t3, -1\n\t" + "bnez t3, 8b\n\t" + + "9:\n\t" + "add %0, %0, t6\n\t" // input_data jump to next 4 channel + + "addi %2, %2, -1\n\t" + "bnez %2, 1b\n\t" + + :"=r"(inp_ptr), // %0 + "=r"(pad_ptr), // %1 + "=r"(inc4), // %2 + "=r"(inh), // %3 + "=r"(inw), // %4 + "=r"(padded_hw), // %5 + "=r"(padded_w), // %6 + "=r"(pad_top), // %7 + "=r"(pad_left), // %8 + "=r"(resi_h), // %9 + "=r"(resi_w) // %10 + :"0"(inp_ptr), + "1"(pad_ptr), + "2"(inc4), + "3"(inh), + "4"(inw), + "5"(padded_hw), + "6"(padded_w), + "7"(pad_top), + "8"(pad_left), + "9"(resi_h), + "10"(resi_w) + :"cc", "memory", "v2", "v4", + "a0", "t0", "t1", "t2", "t3", "t4", "t5", "t6" + ); + } -void csi_c906_conv3x3s2(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params) +void csi_c906_crop_output_pack4to1(const float *output_trans, float *output, int out_c, int out_h, int out_w, + int wino_h, int wino_w) { - /* to do */ -} \ No newline at end of file + int out_c4 = out_c / 4; + float *out_tm_ptr = (float *)output_trans; + float *out_ptr = output; + + asm volatile( + "vsetvli zero, zero, e32, m1\n\t" + + "mulw t0, %3, %4\n\t" // output_size per_channel + "slli t0, t0, 2\n\t" // store_stride = output_size * 4 + + "slli t3, t0, 2\n\t" // t3 = output_size * 4(channel) * 4bytes + "slli t4, %6, 4\n\t" // t4 = wino_w * 4(channel) * 4 + + "mulw t5, %5, %6\n\t" // crop_size per_channel + "slli t5, t5, 4\n\t" // t5 = crop_size * 4(channel) * 4 + + "1:\n\t" // channel loop [out_ch / 4] + "mv a1, %1\n\t" // update output_addr + "mv a0, %0\n\t" // update crop_addr per-channel + + "mv t1, %3\n\t" // t1 = out_h + + "2:\n\t" // crop h + "mv t2, %4\n\t" // t2 = out_w + "mv s1, a0\n\t" // update crop_addr per-row + + "3:\n\t" // crop w + "vle.v v2, (s1)\n\t" + "addi s1, s1, 16\n\t" + "vsse.v v2, (a1), t0\n\t" + "addi a1, a1, 4\n\t" + + "addi t2, t2, -1\n\t" + "bnez t2, 3b\n\t" + + "add a0, a0, t4\n\t" // crop-data jump to next row + + "addi t1, t1, -1\n\t" + "bnez t1, 2b\n\t" + + "4:\n\t" + "add %1, %1, t3\n\t" // output_data jump to next 4 channel + "add %0, %0, t5\n\t" // crop-data jump to next 4 channel + + "addi %2, %2, -1\n\t" + "bnez %2, 1b\n\t" + + :"=r"(out_tm_ptr), // %0 + "=r"(out_ptr), // %1 + "=r"(out_c4), // %2 + "=r"(out_h), // %3 + "=r"(out_w), // %4 + "=r"(wino_h), // %5 + "=r"(wino_w) // %6 + :"0"(out_tm_ptr), + "1"(out_ptr), + "2"(out_c4), + "3"(out_h), + "4"(out_w), + "5"(wino_h), + "6"(wino_w) + :"cc", "memory", "v2", "v3", "a0", "a1", "s1", + "t0", "t1", "t2", "t3", "t4", "t5" + + ); + +} + +/* + constrain: output channel % 4 = 0 + input channel % 4 = 0 + kernel before: [O I 3*3] + kernel after : [O/4 8*8 I 4] +*/ +void csi_c906_conv3x3s1_winograd64_transform_kernel_pack4(struct csi_tensor *o_kernel, + struct csi_tensor *t_kernel) +{ + int32_t outch = o_kernel->dim[0]; + int32_t inch = o_kernel->dim[1]; + + float *kernel_data = (float *)o_kernel->data; + // for kernel transform buf, 3x3 --> 8x8 + float *kernel_tm = (float *)csi_mem_alloc(outch * inch * 8 * 8 * sizeof(float)); + // kernel transform matrix: G + const float ktm[8][3] = { + {1.0f, 0.0f, 0.0f}, + {-2.0f / 9, -2.0f / 9, -2.0f / 9}, + {-2.0f / 9, 2.0f / 9, -2.0f / 9}, + {1.0f / 90, 1.0f / 45, 2.0f / 45}, + {1.0f / 90, -1.0f / 45, 2.0f / 45}, + {1.0f / 45, 1.0f / 90, 1.0f / 180}, + {1.0f / 45, -1.0f / 90, 1.0f / 180}, + {0.0f, 0.0f, 1.0f} + }; + + // const float ktm[8][3] = { + // {1.0f, 0.0f, 0.0f}, + // {-2.0f / 9, -2.0f / 9, -2.0f / 9}, + // {-2.0f / 9, 2.0f / 9, -2.0f / 9}, + // {1.0f / 90, 1.0f / 45, 2.0f / 45}, + // {1.0f / 90, -1.0f / 45, 2.0f / 45}, + // {32.0f / 45, 16.0f / 45, 8.0f / 45}, + // {32.0f / 45, -16.0f / 45, 8.0f / 45}, + // {0.0f, 0.0f, 1.0f} + // }; + + csi_tensor_copy(t_kernel, o_kernel); + + for (int p = 0; p < outch; p++) { + for (int q = 0; q < inch; q++) { + + const float* kernel0 = kernel_data + p * inch * 9 + q * 9; + float* kernel_tmp = kernel_tm + p * inch * 64 + q * 64; + + // transform kernel + const float *k0 = kernel0; + const float *k1 = kernel0 + 3; + const float *k2 = kernel0 + 6; + + // h : first compute the transport matrix tmp = (g * GT)T + float tmp[8][3]; + for (int i = 0; i < 8; i++) { + + tmp[i][0] = k0[0] * ktm[i][0] + k0[1] * ktm[i][1] + k0[2] * ktm[i][2]; + tmp[i][1] = k1[0] * ktm[i][0] + k1[1] * ktm[i][1] + k1[2] * ktm[i][2]; + tmp[i][2] = k2[0] * ktm[i][0] + k2[1] * ktm[i][1] + k2[2] * ktm[i][2]; + } + + // U + for (int j = 0; j < 8; j++) { + float* tmpp = &tmp[j][0]; + + for (int i = 0; i < 8; i++) { + kernel_tmp[j * 8 + i] = tmpp[0] * ktm[i][0] + tmpp[1] * ktm[i][1] + tmpp[2] * ktm[i][2]; + } + } + } + } + // optimized layout for winograd64 + float *kernel_tm_pack4 = (float *)csi_mem_alloc(outch * inch * 8 * 8 * sizeof(float)); + t_kernel->data = kernel_tm_pack4; + + for (int oc = 0; oc < outch / 4; oc++) { + + float *g0 = kernel_tm_pack4 + oc * 64 * inch * 4; + + const float *k0 = kernel_tm + oc * 64 * inch * 4; + const float *k1 = k0 + 64 * inch; + const float *k2 = k1 + 64 * inch; + const float *k3 = k2 + 64 * inch; + + for (int k = 0; k < 64; k++) { + + float *g00 = g0 + k * inch * 4; + + for (int ic = 0; ic < inch / 4; ic++) { + + for (int i = 0; i < 4; i++) { + + const float *k00 = k0 + (ic * 4 + i) * 64; + const float *k10 = k1 + (ic * 4 + i) * 64; + const float *k20 = k2 + (ic * 4 + i) * 64; + const float *k30 = k3 + (ic * 4 + i) * 64; + + g00[0] = k00[k]; + g00[1] = k10[k]; + g00[2] = k20[k]; + g00[3] = k30[k]; + + g00 += 4; + } + } + } + } + + csi_mem_free(kernel_tm); +} + +/* + constrain: output channel % 4 = 0 + input channel % 4 = 0 +*/ +int csi_c906_conv3x3s1_winograd64_pack4(struct csi_tensor *input, + struct csi_tensor *output, + struct csi_tensor *kernel, + struct csi_tensor *bias, + struct conv2d_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + float *kernel_data = (float *)params->conv_extra.kernel_tm->data; + float *bias_data = (float *)bias->data; + + // param + int kernel_h = kernel->dim[2]; + int kernel_w = kernel->dim[3]; + int stride_h = params->stride_height; + int stride_w = params->stride_width; + int dilation_h = params->dilation_height; + int dilation_w = params->dilation_width; + int pad_left = params->pad_left; + int pad_top = params->pad_top; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + int kernel_size = in_c * kernel_h * kernel_w; + + int out_c = kernel->dim[0]; + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int output_size = out_c * out_h * out_w; + + // winograd param + int block_h = (out_h + 5) / 6; + int block_w = (out_w + 5) / 6; + + int padded_in_h = block_h * 6 + 2; // block * 4 for alignment with 4,kernel = 3 * 3 ,stride = 1,thus input_size + 2 + int padded_in_w = block_w * 6 + 2; + int padded_in_hw = padded_in_h * padded_in_w; // element size after padding per channel + + /****************************** bias *****************************/ + bool flag_bias = 1; // default: conv2d layer include bias + if (bias_data == NULL) { + flag_bias = 0; + bias_data = (float *)csi_mem_alloc(out_c * sizeof(float)); + } + + + for(int n = 0; n < batch; n++) { + + // pad buffer: [in_c/8 h w 8] + float *input_padd_buf = (float *)csi_mem_alloc(in_c * padded_in_hw * sizeof(float)); + + // pad input + csi_c906_pad_input_pack1to4(input_data, input_padd_buf, in_c, in_h, in_w, padded_in_h, padded_in_w, pad_top, pad_left); + input_data += input_size; + + // input transform buffer1: [in_ch/8, 64, blocks, 8] + float *input_tm1_buf = (float *)csi_mem_alloc(in_c * block_h * block_w * 8 * 8 * sizeof(float)); + + /****************************** transform input *****************************/ + /* + BT = { + { 1 0 -5.25 0 5.25 0 -1 0 }; + { 0 1 1 -4.25 -4.25 1 1 0 }; + { 0 -1 1 4.25 -4.25 -1 1 0 }; + { 0 0.5 0.25 -2.5 -1.25 2 1 0 }; + { 0 -0.5 0.25 2.5 -1.25 -2 1 0 }; + { 0 2 4 -2.5 -5 0.5 1 0 }; + { 0 -2 4 2.5 -5 -0.5 1 0 }; + { 0 -1 0 5.25 0 -5.25 0 1 } + }; + */ + + // int in_h_tm = block_h * 8; // input height after transform + // int in_w_tm = block_w * 8; + + int tiles = block_h * block_w; + + #pragma omp parallel for num_threads(1) + for(int q = 0; q < in_c / 4; q++) { + + float *img0 = input_padd_buf + q * padded_in_h * padded_in_w * 4; // feature map after padding - q channel + float *img0_tm = input_tm1_buf + q * 64 * tiles * 4; // transform and interleave - q channel + + float *tmp = (float *)csi_mem_alloc(8 * 8 * 4 * sizeof(float)); + + for(int i = 0; i < block_h; i++) { + + for(int j = 0; j < block_w; j++) { + + float *r0 = img0 + (i * padded_in_w * 6 + j * 6) * 4; // feature map after padding 8*8 start addr + float *r0_tm = img0_tm + (i * block_w + j) * 4; // input_tm1 8*8 block start addr + + float ratio[] = {5.25, -4.25, 0.25, -1.25, 4.0, 0.5, -2.5, 2.0}; // note: in fact cannot be output constrain + float *ratio_ptr = ratio; + + asm volatile( + "vsetvli zero, zero, e32, m1\n\t" + "li t0, 8\n\t" // m = 8 + "mv t5, %2\n\t" // t5 = tmp start addr + "slli t1, %4, 4\n\t" // t1 = padded_in_w * 4 * 4bytes + + "flw fa0, 0(%3)\n\t" // fa0 = 5.25 + "flw fa1, 4(%3)\n\t" // fa1 = -4.25 + "flw fa2, 8(%3)\n\t" // fa2 = 0.25 + "flw fa3, 12(%3)\n\t" // fa3 = -1.25 + "flw fa4, 16(%3)\n\t" // fa4 = 4.0 + "flw fa5, 20(%3)\n\t" // fa5 = 0.5 + "flw fa6, 24(%3)\n\t" // fa6 = -2.5 + "flw fa7, 28(%3)\n\t" // fa7 = 2.0 + + "1:\n\t" + "mv s1, %0\n\t" // s1 = r00 addr + + "mv a0, t5\n\t" // tmp[0][m] + "addi a1, a0, 128\n\t" // tmp[1][m] + "addi a2, a1, 128\n\t" // tmp[2][m] + "addi a3, a2, 128\n\t" // tmp[3][m] + "addi a4, a3, 128\n\t" // tmp[4][m] + "addi a5, a4, 128\n\t" // tmp[5][m] + "addi a6, a5, 128\n\t" // tmp[6][m] + "addi a7, a6, 128\n\t" // tmp[7][m] + + "vle.v v0, (s1)\n\t" // r00 + "addi s1, s1, 16\n\t" + "vle.v v1, (s1)\n\t" // r01 + "addi s1, s1, 16\n\t" + "vle.v v2, (s1)\n\t" // r02 + "addi s1, s1, 16\n\t" + "vle.v v3, (s1)\n\t" // r03 + "addi s1, s1, 16\n\t" + "vle.v v4, (s1)\n\t" // r04 + "addi s1, s1, 16\n\t" + "vle.v v5, (s1)\n\t" // r05 + "addi s1, s1, 16\n\t" + "vle.v v6, (s1)\n\t" // r06 + "addi s1, s1, 16\n\t" + "vle.v v7, (s1)\n\t" // r07 + "addi s1, s1, 16\n\t" + + "vmv.v.v v10, v6\n\t" + + //--------------------------------------------- + "vfsub.vv v8, v4, v2\n\t" // r04 - r02 + "vfsub.vv v9, v3, v5\n\t" // r03 - r05 + + "vfsub.vv v24, v0, v6\n\t" // r00 - r06 + "vfsub.vv v31, v7, v1\n\t" // r07 - r01 + + "vfmacc.vf v10, fa2, v2\n\t" // r06 + r02 * 0.25f + + "vfmul.vf v11, v1, fa5\n\t" // r01 * 0.5f + "vfmul.vf v12, v1, fa7\n\t" // r01 * 2.0f + + "vfmacc.vf v24, fa0, v8\n\t" // r00 - r06 + 5.25 * (r04 - r02) = tmp[0][m] + "vfmacc.vf v31, fa0, v9\n\t" // r07 - r01 + 5.25 * (r03 - r05) = tmp[7][m] + + //--------------------------------------------- + "vfadd.vv v8, v2, v6\n\t" // r02 + r06 + "vfadd.vv v9, v1, v5\n\t" // r01 + r05 + + "vfmacc.vf v11, fa6, v3\n\t" // r01 * 0.5f - r03 * 2.5f + "vfmacc.vf v12, fa6, v3\n\t" // r01 * 2.f - r03 * 2.5f + + "vfmacc.vf v2, fa3, v4\n\t" // r02 - r04 * 1.25f + "vfmacc.vf v10, fa3, v4\n\t" // r06 + r02 * 0.25f - r04 * 1.25f = tmp34a + + "vfmacc.vf v8, fa1, v4\n\t" // r02 + r06 - r04 * 4.25f = tmp12a + "vfmacc.vf v9, fa1, v3\n\t" // r01 + r05 - r03 * 4.25f = tmp12b + + "vfmacc.vf v11, fa7, v5\n\t" // r01 * 0.5f - r03 * 2.5f + r05 * 2.0 = tmp34b + "vfmacc.vf v12, fa5, v5\n\t" // r01 * 2.f - r03 * 2.5f + r05 * 0.5 = tmp56b + + "vse.v v24, (a0)\n\t" + "vse.v v31, (a7)\n\t" + + "vfadd.vv v25, v8, v9\n\t" // tmp12a + tmp12b = tmp[1][m] + "vfsub.vv v26, v8, v9\n\t" // tmp12a - tmp12b = tmp[2][m] + + //--------------------------------------------- + "vfmacc.vf v6, fa4, v2\n\t" // r06 + (r02 - r04 * 1.25f) * 4 = tmp56a + + "vfadd.vv v27, v10, v11\n\t" // tmp34a + tmp34b = tmp[3][m] + "vfsub.vv v28, v10, v11\n\t" // tmp34a - tmp34b = tmp[4][m] + + "vfadd.vv v29, v6, v12\n\t" // tmp56a + tmp56b = tmp[5][m] + "vfsub.vv v30, v6, v12\n\t" // tmp56a - tmp56b = tmp[6][m] + + "vse.v v25, (a1)\n\t" + "vse.v v26, (a2)\n\t" + "vse.v v27, (a3)\n\t" + "vse.v v28, (a4)\n\t" + "vse.v v29, (a5)\n\t" + "vse.v v30, (a6)\n\t" + + //--------------------------------------------- + + "add %0, %0, t1\n\t" // padding feature map 8*8 next line addr + "addi t5, t5, 16\n\t" // tmp[0][0] --> tmp[0][1] + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "2:\n\t" + + "mv t5, %2\n\t" // tmp start addr + "li t0, 8\n\t" // m = 8 + + "slli t1, %5, 4\n\t" // t1 = tiles * 4 * 4 bytes + "slli t2, %5, 7\n\t" // t2 = tiles * 8 * 4 * 4 bytes + + "3:\n\t" + + "mv a0, %1\n\t" // r0_tm_0 + "add a1, a0, t1\n\t" // r0_tm_1 + "add a2, a1, t1\n\t" // r0_tm_2 + "add a3, a2, t1\n\t" // r0_tm_3 + "add a4, a3, t1\n\t" // r0_tm_4 + "add a5, a4, t1\n\t" // r0_tm_5 + "add a6, a5, t1\n\t" // r0_tm_6 + "add a7, a6, t1\n\t" // r0_tm_7 + + "vle.v v0, (t5)\n\t" // tmp[m][0] + "addi t5, t5, 16\n\t" + "vle.v v1, (t5)\n\t" // tmp[m][1] + "addi t5, t5, 16\n\t" + "vle.v v2, (t5)\n\t" // tmp[m][2] + "addi t5, t5, 16\n\t" + "vle.v v3, (t5)\n\t" // tmp[m][3] + "addi t5, t5, 16\n\t" + "vle.v v4, (t5)\n\t" // tmp[m][4] + "addi t5, t5, 16\n\t" + "vle.v v5, (t5)\n\t" // tmp[m][5] + "addi t5, t5, 16\n\t" + "vle.v v6, (t5)\n\t" // tmp[m][6] + "addi t5, t5, 16\n\t" + "vle.v v7, (t5)\n\t" // tmp[m][7] + "addi t5, t5, 16\n\t" + + "vmv.v.v v10, v6\n\t" + + //--------------------------------------------- + "vfsub.vv v8, v4, v2\n\t" // tmp04 - tmp02 (tmp[m][4] - tmp[m][2]) + "vfsub.vv v9, v3, v5\n\t" // tmp03 - tmp05 + + "vfsub.vv v24, v0, v6\n\t" // tmp00 - tmp06 + "vfsub.vv v31, v7, v1\n\t" // tmp07 - tmp01 + + "vfmacc.vf v10, fa2, v2\n\t" // tmp06 + tmp02 * 0.25f + + "vfmul.vf v11, v1, fa5\n\t" // tmp01 * 0.5f + "vfmul.vf v12, v1, fa7\n\t" // tmp01 * 2.0f + + "vfmacc.vf v24, fa0, v8\n\t" // tmp00 - tmp06 + 5.25 * (tmp04 - tmp02) = r0_tm_0[m] + "vfmacc.vf v31, fa0, v9\n\t" // tmp07 - tmp01 + 5.25 * (tmp03 - tmp05) = r0_tm_7[m] + + //--------------------------------------------- + "vfadd.vv v8, v2, v6\n\t" // tmp02 + tmp06 + "vfadd.vv v9, v1, v5\n\t" // tmp01 + tmp05 + + "vfmacc.vf v11, fa6, v3\n\t" // tmp01 * 0.5f - tmp03 * 2.5f + "vfmacc.vf v12, fa6, v3\n\t" // tmp01 * 2.f - tmp03 * 2.5f + + "vfmacc.vf v2, fa3, v4\n\t" // tmp02 - tmp04 * 1.25f + "vfmacc.vf v10, fa3, v4\n\t" // tmp06 + tmp02 * 0.25f - tmp04 * 1.25f = tmp34a + + "vfmacc.vf v8, fa1, v4\n\t" // tmp02 + tmp06 - tmp04 * 4.25f = tmp12a + "vfmacc.vf v9, fa1, v3\n\t" // tmp01 + tmp05 - tmp03 * 4.25f = tmp12b + + "vfmacc.vf v11, fa7, v5\n\t" // tmp01 * 0.5f - tmp03 * 2.5f + tmp05 * 2.0 = tmp34b + "vfmacc.vf v12, fa5, v5\n\t" // tmp01 * 2.f - tmp03 * 2.5f + tmp05 * 0.5 = tmp56b + + "vse.v v24, (a0)\n\t" + "vse.v v31, (a7)\n\t" + + "vfadd.vv v25, v8, v9\n\t" // tmp12a + tmp12b = r0_tm_1[m] + "vfsub.vv v26, v8, v9\n\t" // tmp12a - tmp12b = r0_tm_2[m] + + //--------------------------------------------- + "vfmacc.vf v6, fa4, v2\n\t" // tmp06 + (tmp02 - tmp04 * 1.25f) * 4 = tmp56a + + "vfadd.vv v27, v10, v11\n\t" // tmp34a + tmp34b = r0_tm_3[m] + "vfsub.vv v28, v10, v11\n\t" // tmp34a - tmp34b = r0_tm_4[m] + + "vfadd.vv v29, v6, v12\n\t" // tmp56a + tmp56b = r0_tm_5[m] + "vfsub.vv v30, v6, v12\n\t" // tmp56a - tmp56b = r0_tm_6[m] + + "vse.v v25, (a1)\n\t" + "vse.v v26, (a2)\n\t" + "vse.v v27, (a3)\n\t" + "vse.v v28, (a4)\n\t" + "vse.v v29, (a5)\n\t" + "vse.v v30, (a6)\n\t" + + "add %1, %1, t2\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 3b" + + + :"=r"(r0), // %0 + "=r"(r0_tm), // %1 + "=r"(tmp), // %2 + "=r"(ratio_ptr), // %3 + "=r"(padded_in_w), // %4 + "=r"(tiles) // %5 + :"0"(r0), + "1"(r0_tm), + "2"(tmp), + "3"(ratio_ptr), + "4"(padded_in_w), + "5"(tiles) + :"cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", + "t0", "t1", "t2", "t5", "s1", "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", + "fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7" + ); + + } + } + csi_mem_free(tmp); + } + csi_mem_free(input_padd_buf); + + /*********************************** dot ***************************************/ + // reorder input_tm1_buf + int size_input_tm2 = 0; + if (tiles >= 8) { + size_input_tm2 = 64 * (tiles / 8 + (tiles % 8) / 4 + (tiles % 4) / 2 + tiles % 2) * in_c * 8; + } else if (tiles >= 4) { + size_input_tm2 = 64 * (tiles / 4 + (tiles % 4) / 2 + tiles % 2) * in_c * 4; + } else if (tiles >= 2) { + size_input_tm2 = 64 * (tiles / 2 + tiles % 2) * in_c * 2; + } else { + size_input_tm2 = 64 * tiles * in_c; + } + float *input_tm2_buf = (float *)csi_mem_alloc(size_input_tm2 * sizeof(float)); + + #pragma omp parallel for num_threads(1) + for (int r = 0; r < 64; r++) { + + float *img_tm2 = input_tm2_buf + r * size_input_tm2 / 64; // input_tm2 r channel data + + int t = 0; + for (; t + 7 < tiles; t += 8) { + float *tm2 = img_tm2 + t * in_c; // img_tm2 row data + float *tm1 = input_tm1_buf; + + tm1 += (r * tiles + t) * 4; + + //----------------- + // for (int q = 0; q < in_c / 4; q++) { + // for (int l = 0; l < 4; l++) { + // tm2[0] = tm1[l]; + // tm2[1] = tm1[l + 4 * 1]; + // tm2[2] = tm1[l + 4 * 2]; + // tm2[3] = tm1[l + 4 * 3]; + // tm2[4] = tm1[l + 4 * 4]; + // tm2[5] = tm1[l + 4 * 5]; + // tm2[6] = tm1[l + 4 * 6]; + // tm2[7] = tm1[l + 4 * 7]; + // tm2 += 8; + // } + // tm1 += 64 * tiles * 4; + // } + asm volatile( + "vsetvli zero, zero, e32, m1\n\t" + "slli t1, %2, 10\n\t" // 64 * tiles * 4 * 4 bytes + "srai t2, %3, 2\n\t" // in_ch4 + + "1:\n\t" // in_ch loop4 + + "mv a0, %1\n\t" // updata tm1 addr + + "vle.v v0, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v1, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v2, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v3, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v4, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v5, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v6, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v7, (a0)\n\t" + + "vsseg8e.v v0, (%0)\n\t" + + "add %1, %1, t1\n\t" + "addi %0, %0, 128\n\t" + + "addi t2, t2, -1\n\t" + "bnez t2, 1b\n\t" + + :"=r"(tm2), // %0 + "=r"(tm1), // %1 + "=r"(tiles), // %2 + "=r"(in_c) // %3 + :"0"(tm2), + "1"(tm1), + "2"(tiles), + "3"(in_c) + :"cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "a0", "t1", "t2" + ); + } + for (; t + 3 < tiles; t += 4) { + float *tm2 = img_tm2 + (t / 8 + (t % 8) / 4) * in_c * 8; // img_tm2 row data + float *tm1 = input_tm1_buf; + + tm1 += (r * tiles + t) * 4; + + // for (int q = 0; q < in_c / 4; q++) { + // for (int l = 0; l < 4; l++) { + // tm2[0] = tm1[l]; + // tm2[1] = tm1[l + 4 * 1]; + // tm2[2] = tm1[l + 4 * 2]; + // tm2[3] = tm1[l + 4 * 3]; + // tm2 += 4; + // } + // tm1 += 64 * tiles * 4; + // } + asm volatile( + "vsetvli zero, zero, e32, m1\n\t" + "slli t1, %2, 10\n\t" // 64 * tiles * 4 * 4 bytes + "srai t2, %3, 2\n\t" // in_ch4 + + "1:\n\t" // in_ch loop4 + + "mv a0, %1\n\t" // updata tm1 addr + + "vle.v v0, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v1, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v2, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v3, (a0)\n\t" + + "vsseg4e.v v0, (%0)\n\t" + + "add %1, %1, t1\n\t" + "addi %0, %0, 64\n\t" + + "addi t2, t2, -1\n\t" + "bnez t2, 1b\n\t" + + :"=r"(tm2), // %0 + "=r"(tm1), // %1 + "=r"(tiles), // %2 + "=r"(in_c) // %3 + :"0"(tm2), + "1"(tm1), + "2"(tiles), + "3"(in_c) + :"cc", "memory", "v0", "v1", "v2", "v3", + "a0", "t1", "t2" + ); + } + for (; t + 1 < tiles; t += 2) { + float *tm2 = img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2) * in_c * 8; // img_tm2 row data + float *tm1 = input_tm1_buf; + + tm1 += (r * tiles + t) * 4; + // for (int q = 0; q < in_c / 4; q++) { + // for (int l = 0; l < 4; l++) { + // tm2[0] = tm1[l]; + // tm2[1] = tm1[l + 4]; + // tm2 += 2; + // } + // tm1 += 64 * tiles * 4; + // } + + asm volatile( + "vsetvli zero, zero, e32, m1\n\t" + "slli t1, %2, 10\n\t" // 64 * tiles * 4 * 4 bytes + "srai t2, %3, 2\n\t" // in_ch4 + + "1:\n\t" // in_ch loop4 + + "mv a0, %1\n\t" // updata tm1 addr + + "vle.v v0, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v1, (a0)\n\t" + + "vsseg2e.v v0, (%0)\n\t" + + "add %1, %1, t1\n\t" + "addi %0, %0, 32\n\t" + + "addi t2, t2, -1\n\t" + "bnez t2, 1b\n\t" + + :"=r"(tm2), // %0 + "=r"(tm1), // %1 + "=r"(tiles), // %2 + "=r"(in_c) // %3 + :"0"(tm2), + "1"(tm1), + "2"(tiles), + "3"(in_c) + :"cc", "memory", "v0", "v1", + "a0", "t1", "t2" + ); + + } + for (; t < tiles; t++) { + float *tm2 = img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2 + t % 2) * in_c * 8; // img_tm2 row data + float *tm1 = input_tm1_buf; + + tm1 += (r * tiles + t) * 4; + // for (int q = 0; q < in_c / 4; q++) { + // for (int l = 0; l < 4; l++) { + // tm2[0] = tm1[l]; + // tm2++; + // } + // tm1 += 64 * tiles * 4; + // } + + asm volatile( + "vsetvli zero, zero, e32, m1\n\t" + "slli t1, %2, 10\n\t" // 64 * tiles * 4 * 4 bytes + "srai t2, %3, 2\n\t" // in_ch4 + + "1:\n\t" // in_ch loop4 + + "mv a0, %1\n\t" // updata tm1 addr + + "vle.v v0, (a0)\n\t" + "addi a0, a0, 16\n\t" + + "vse.v v0, (%0)\n\t" + + "add %1, %1, t1\n\t" + "addi %0, %0, 16\n\t" + + "addi t2, t2, -1\n\t" + "bnez t2, 1b\n\t" + + :"=r"(tm2), // %0 + "=r"(tm1), // %1 + "=r"(tiles), // %2 + "=r"(in_c) // %3 + :"0"(tm2), + "1"(tm1), + "2"(tiles), + "3"(in_c) + :"cc", "memory", "v0", + "a0", "t1", "t2" + ); + + } + } + + csi_mem_free(input_tm1_buf); + + // output_dot_buf: [out_c/4, 64, blocks, 4] + float *output_dot_buf = (float *)csi_mem_alloc(out_c * block_h * block_w * 8 * 8 * sizeof(float)); + + #pragma omp parallel for num_threads(1) + for (int p = 0; p < out_c / 4; p++) { + + float *output0_tm = output_dot_buf + p * 64 * tiles * 4; // 4 channel dot output + float *kernel0_tm = kernel_data + p * 64 * in_c * 4; // 4 channel kernel + + for (int r = 0; r < 64; r++) { + + float *img_tm2 = input_tm2_buf + r * size_input_tm2 / 64; // img_tm2 第r个channel + + int t = 0; + for (; t + 7 < tiles; t += 8) { + + float *r0 = img_tm2 + t * in_c; + float *k0 = kernel0_tm + r * in_c * 4; + + asm volatile( + "vsetvli zero, zero, e32, m1\n\t" + "mv t0, %3\n\t" // t0 = in_c + + "vmv.v.x v0, zero\n\t" + "vmv.v.x v1, zero\n\t" + "vmv.v.x v2, zero\n\t" + "vmv.v.x v3, zero\n\t" + "vmv.v.x v4, zero\n\t" + "vmv.v.x v5, zero\n\t" + "vmv.v.x v6, zero\n\t" + "vmv.v.x v7, zero\n\t" // clear + + "1:\n\t" + + "flw fa0, (%0)\n\t" + "flw fa1, 4(%0)\n\t" + "flw fa2, 8(%0)\n\t" + "flw fa3, 12(%0)\n\t" + "flw fa4, 16(%0)\n\t" + "flw fa5, 20(%0)\n\t" + "flw fa6, 24(%0)\n\t" + "flw fa7, 28(%0)\n\t" + "addi %0, %0, 32\n\t" + + "vle.v v8, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "vfmacc.vf v0, fa0, v8\n\t" + "vfmacc.vf v1, fa1, v8\n\t" + "vfmacc.vf v2, fa2, v8\n\t" + "vfmacc.vf v3, fa3, v8\n\t" + "vfmacc.vf v4, fa4, v8\n\t" + "vfmacc.vf v5, fa5, v8\n\t" + "vfmacc.vf v6, fa6, v8\n\t" + "vfmacc.vf v7, fa7, v8\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "vse.v v0, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v1, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v2, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v3, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v4, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v5, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v6, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v7, (%2)\n\t" + "addi %2, %2, 16\n\t" + + :"=r"(r0), // %0 + "=r"(k0), // %1 + "=r"(output0_tm), // %2 + "=r"(in_c) // %3 + :"0"(r0), + "1"(k0), + "2"(output0_tm), + "3"(in_c) + + :"cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", + "fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7", "t0" + + ); + } + for (; t + 3 < tiles; t += 4) { + float *r0 = img_tm2 + (t / 8 + (t % 8) / 4) * in_c * 8; + float *k0 = kernel0_tm + r * in_c * 4; + + asm volatile( + "vsetvli zero, zero, e32, m1\n\t" + "mv t0, %3\n\t" // t0 = in_c + "vmv.v.x v0, zero\n\t" + "vmv.v.x v1, zero\n\t" + "vmv.v.x v2, zero\n\t" + "vmv.v.x v3, zero\n\t" // clear + + "1:\n\t" + + "flw fa0, (%0)\n\t" + "flw fa1, 4(%0)\n\t" + "flw fa2, 8(%0)\n\t" + "flw fa3, 12(%0)\n\t" + "addi %0, %0, 16\n\t" + + "vle.v v4, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "vfmacc.vf v0, fa0, v4\n\t" + "vfmacc.vf v1, fa1, v4\n\t" + "vfmacc.vf v2, fa2, v4\n\t" + "vfmacc.vf v3, fa3, v4\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "vse.v v0, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v1, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v2, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v3, (%2)\n\t" + "addi %2, %2, 16\n\t" + + :"=r"(r0), // %0 + "=r"(k0), // %1 + "=r"(output0_tm), // %2 + "=r"(in_c) // %3 + :"0"(r0), + "1"(k0), + "2"(output0_tm), + "3"(in_c) + :"cc", "memory", "v0", "v1", "v2", "v3", "v4", "fa0", "fa1", "fa2", "fa3", "t0" + ); + } + for (; t + 1 < tiles; t += 2) { + float *r0 = img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2) * in_c * 8; + float *k0 = kernel0_tm + r * in_c * 4; + + asm volatile( + "vsetvli zero, zero, e32, m1\n\t" + "mv t0, %3\n\t" // t0 = in_c + "vmv.v.x v0, zero\n\t" + "vmv.v.x v1, zero\n\t" // clear + + "1:\n\t" + + "flw fa0, (%0)\n\t" + "flw fa1, 4(%0)\n\t" + "addi %0, %0, 8\n\t" + + "vle.v v2, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "vfmacc.vf v0, fa0, v2\n\t" + "vfmacc.vf v1, fa1, v2\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "vse.v v0, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v1, (%2)\n\t" + "addi %2, %2, 16\n\t" + + :"=r"(r0), // %0 + "=r"(k0), // %1 + "=r"(output0_tm), // %2 + "=r"(in_c) // %3 + :"0"(r0), + "1"(k0), + "2"(output0_tm), + "3"(in_c) + :"cc", "memory", "v0", "v1", "v2", "fa0", "fa1", "t0" + ); + } + for (; t < tiles; t++) { + + float *r0 = img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2 + t % 2) * in_c * 8; + float *k0 = kernel0_tm + r * in_c * 4; + + asm volatile( + "vsetvli zero, zero, e32, m1\n\t" + "mv t0, %3\n\t" // t0 = in_c + "vmv.v.x v0, zero\n\t" // clear + + "1:\n\t" + + "flw fa0, (%0)\n\t" + "addi %0, %0, 4\n\t" + + "vle.v v1, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "vfmacc.vf v0, fa0, v1\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "vse.v v0, (%2)\n\t" + "addi %2, %2, 16\n\t" + + :"=r"(r0), // %0 + "=r"(k0), // %1 + "=r"(output0_tm), // %2 + "=r"(in_c) // %3 + :"0"(r0), + "1"(k0), + "2"(output0_tm), + "3"(in_c) + :"cc", "memory", "v0", "v1", "fa0", "t0" + ); + + } + + } + + } + + csi_mem_free(input_tm2_buf); + /*************************** transform output ****************************/ + // output_tm1_buf: [out_c/4, out_h6, out_w6, 4] + float *output_tm1_buf = (float *)csi_mem_alloc(out_c * block_h * block_w * 6 * 6 * sizeof(float)); + + /* + AT = { + { 1 1 1 1 1 1 1 0 }; + { 0 1 -1 2 -2 1/2 -1/2 0 }; + { 0 1 1 4 4 1/4 1/4 0 }; + { 0 1 -1 8 -8 1/8 -1/8 0 }; + { 0 1 1 16 16 1/16 1/16 0 }; + { 0 1 -1 32 -32 1/32 -1/32 1 } + }; + AT = { + { 1 1 1 1 1 32 32 0 }; + { 0 1 -1 2 -2 16 -16 0 }; + { 0 1 1 4 4 8 8 0 }; + { 0 1 -1 8 -8 4 -4 0 }; + { 0 1 1 16 16 2 2 0 }; + { 0 1 -1 32 -32 1 -1 1 } + }; + */ + + #pragma omp parallel for num_threads(1) + for (int p = 0; p < out_c / 4; p++) + { + + float *bias_tmp = bias_data + p * 4; + + float *out0_tm = output_dot_buf + p * 64 * block_h * block_w * 4; // 输出转换前/dot后 第p个channel + float *out0 = output_tm1_buf + p * 6*block_h * 6*block_w * 4; // 转换后输出 第p个channel + + float *tmp1 = (float *)csi_mem_alloc(6 * 8 * 4 * sizeof(float)); + int out_w6 = block_w * 6; + + for (int i = 0; i < block_h; i++) { + + for (int j = 0; j < block_w; j++) { + + float *output0_tm_0 = out0_tm + (i * block_w + j) * 4; // 8*8 起始地址 + + float *output0 = out0 + (i * block_w * 6 * 6 + j * 6) * 4; // 输出 6*6 的起始地址 + + float ratio[] = {2.0, 4.0, 8.0, 16.0, 32.0}; + float *ratio_ptr = ratio; + + asm volatile( + "vsetvli zero, zero, e32, m1\n\t" + "li t0, 8\n\t" // m = 8 + "mv t5, %2\n\t" // t5 = tmp start addr + "slli t1, %4, 4\n\t" // t1 = tiles * 4 * 4 + "slli t2, %4, 7\n\t" // t2 = tiles * 8 * 4 * 4 bytes + + "flw fa0, 0(%3)\n\t" // fa0 = 2 + "flw fa1, 4(%3)\n\t" // fa1 = 4 + "flw fa2, 8(%3)\n\t" // fa2 = 8 + "flw fa3, 12(%3)\n\t" // fa3 = 16 + "flw fa4, 16(%3)\n\t" // fa4 = 32 + + "mv s1, %0\n\t" + + "1:\n\t" // shape : [6 * 8] * [8 * 8] = [6 * 8] + + "mv a0, t5\n\t" // tmp[0][m] + "addi a1, a0, 128\n\t" // tmp[1][m] + "addi a2, a1, 128\n\t" // tmp[2][m] + "addi a3, a2, 128\n\t" // tmp[3][m] + "addi a4, a3, 128\n\t" // tmp[4][m] + "addi a5, a4, 128\n\t" // tmp[5][m] + + "vle.v v0, (s1)\n\t" // r00 + "add s1, s1, t1\n\t" + "vle.v v1, (s1)\n\t" // r01 + "add s1, s1, t1\n\t" + "vle.v v2, (s1)\n\t" // r02 + "add s1, s1, t1\n\t" + "vle.v v3, (s1)\n\t" // r03 + "add s1, s1, t1\n\t" + "vle.v v4, (s1)\n\t" // r04 + "add s1, s1, t1\n\t" + "vle.v v5, (s1)\n\t" // r05 + "add s1, s1, t1\n\t" + "vle.v v6, (s1)\n\t" // r06 + "add s1, s1, t1\n\t" + "vle.v v7, (s1)\n\t" // r07 + "add s1, s1, t1\n\t" + + //--------------------------------------------- + "vfadd.vv v8, v1, v2\n\t" // r01 + r02 = tmp024a + "vfsub.vv v9, v1, v2\n\t" // r01 - r02 = tmp135a + + "vfadd.vv v10, v3, v4\n\t" // r03 + r04 = tmp024b + "vfsub.vv v11, v3, v4\n\t" // r03 - r04 = tmp135b + + "vfadd.vv v12, v5, v6\n\t" // r05 + r06 = tmp024c + "vfsub.vv v13, v5, v6\n\t" // r05 - r06 = tmp135c + + "vfadd.vv v0, v0, v8\n\t" // r00 + tmp024a + "vfadd.vv v7, v7, v9\n\t" // r07 + tmp135a + "vmv.v.v v14, v10\n\t" // v14 = tmp024b + + "vmv.v.v v26, v8\n\t" // v26 = tmp024a + "vmv.v.v v28, v8\n\t" // v28 = tmp024a + + "vfmacc.vf v26, fa1, v10\n\t" // tmp024a + tmp024b * 4 + "vfmacc.vf v14, fa4, v12\n\t" // tmp024b + tmp024c * 32 + "vfmacc.vf v28, fa3, v10\n\t" // tmp024a + tmp024b * 16 + + "vmv.v.v v15, v13\n\t" // v15 = tmp135c + "vmv.v.v v25, v9\n\t" // v25 = tmp135a + "vmv.v.v v27, v9\n\t" // v27 = tmp135a + "vfadd.vv v24, v0, v14\n\t" // r00 + tmp024a + tmp024b + tmp024c * 32 = tmp[0][m] + + "vfmacc.vf v25, fa0, v11\n\t" // tmp135a + tmp135b * 2 + "vfmacc.vf v27, fa2, v11\n\t" // tmp135a + tmp135b * 8 + + //--------------------------------------------- + "vse.v v24, (a0)\n\t" + + "vfmacc.vf v26, fa2, v12\n\t" // tmp024a + tmp024b * 4 + tmp024c * 8 = tmp[2][m] + "vfmacc.vf v28, fa0, v12\n\t" // tmp024a + tmp024b * 16 + tmp024c + tmp024c = tmp[4][m] + "vfmacc.vf v15, fa4, v11\n\t" // tmp135b * 32 + tmp135c + + "vse.v v26, (a2)\n\t" + "vse.v v28, (a4)\n\t" + + //--------------------------------------------- + "vfmacc.vf v25, fa3, v13\n\t" // tmp135a + tmp135b * 2 + tmp135c * 16 = tmp[1][m] + "vfmacc.vf v27, fa1, v13\n\t" // tmp135a + tmp135b * 8 + tmp135c * 4 = tmp[3][m] + + "vfadd.vv v29, v7, v15\n\t" // r07 + tmp135a + tmp135b * 32 + tmp135c + + "vse.v v25, (a1)\n\t" + "vse.v v27, (a3)\n\t" + "vse.v v29, (a5)\n\t" + + "addi t5, t5, 16\n\t" // tmp[0][0] --> tmp[0][1] + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "2:\n\t" + + "mv t5, %2\n\t" // tmp start addr + "li t0, 6\n\t" // m = 6 + "slli t1, %5, 4\n\t" // t1 = out_w6 * 4 * 4bytes + "vle.v v16, (%6)\n\t" // load 4 channel bias data + + "3:\n\t" // shape : [6 * 8] * [6 * 8] = [6 * 6] + + "mv a0, %1\n\t" + "addi a1, a0, 16\n\t" + "addi a2, a1, 16\n\t" + "addi a3, a2, 16\n\t" + "addi a4, a3, 16\n\t" + "addi a5, a4, 16\n\t" + + "vle.v v0, (t5)\n\t" // tmp[m][0] + "addi t5, t5, 16\n\t" + "vle.v v1, (t5)\n\t" // tmp[m][1] + "addi t5, t5, 16\n\t" + "vle.v v2, (t5)\n\t" // tmp[m][2] + "addi t5, t5, 16\n\t" + "vle.v v3, (t5)\n\t" // tmp[m][3] + "addi t5, t5, 16\n\t" + "vle.v v4, (t5)\n\t" // tmp[m][4] + "addi t5, t5, 16\n\t" + "vle.v v5, (t5)\n\t" // tmp[m][5] + "addi t5, t5, 16\n\t" + "vle.v v6, (t5)\n\t" // tmp[m][6] + "addi t5, t5, 16\n\t" + "vle.v v7, (t5)\n\t" // tmp[m][7] + "addi t5, t5, 16\n\t" + + //--------------------------------------------- + "vfadd.vv v8, v1, v2\n\t" // tmp[m][1] + tmp[m][2] = tmp024a + "vfsub.vv v9, v1, v2\n\t" // tmp[m][1] - tmp[m][2] = tmp135a + + "vfadd.vv v10, v3, v4\n\t" // tmp[m][3] + tmp[m][4] = tmp024b + "vfsub.vv v11, v3, v4\n\t" // tmp[m][3] - tmp[m][4] = tmp135b + + "vfadd.vv v12, v5, v6\n\t" // tmp[m][5] + tmp[m][6] = tmp024c + "vfsub.vv v13, v5, v6\n\t" // tmp[m][5] - tmp[m][6] = tmp135c + + "vfadd.vv v0, v0, v8\n\t" // tmp[m][0] + tmp024a + "vfadd.vv v7, v7, v9\n\t" // tmp[m][7] + tmp135a + "vmv.v.v v14, v10\n\t" // v14 = tmp024b + + "vmv.v.v v26, v8\n\t" // v26 = tmp024a + "vmv.v.v v28, v8\n\t" // v28 = tmp024a + + "vfmacc.vf v26, fa1, v10\n\t" // tmp024a + tmp024b * 4 + "vfmacc.vf v14, fa4, v12\n\t" // tmp024b + tmp024c * 32 + "vfmacc.vf v28, fa3, v10\n\t" // tmp024a + tmp024b * 16 + + "vmv.v.v v15, v13\n\t" // v15 = tmp135c + "vmv.v.v v25, v9\n\t" // v25 = tmp135a + "vmv.v.v v27, v9\n\t" // v27 = tmp135a + "vfadd.vv v24, v0, v14\n\t" // tmp[m][0] + tmp024a + tmp024b + tmp024c * 32 = tmp[0][m] + + "vfmacc.vf v25, fa0, v11\n\t" // tmp135a + tmp135b * 2 + "vfmacc.vf v27, fa2, v11\n\t" // tmp135a + tmp135b * 8 + + //--------------------------------------------- + "vfadd.vv v24, v24, v16\n\t" // + bias + + "vfmacc.vf v26, fa2, v12\n\t" // tmp024a + tmp024b * 4 + tmp024c * 8 = tmp[2][m] + "vfmacc.vf v28, fa0, v12\n\t" // tmp024a + tmp024b * 16 + tmp024c + tmp024c = tmp[4][m] + "vfmacc.vf v15, fa4, v11\n\t" // tmp135b * 32 + tmp135c + + "vse.v v24, (a0)\n\t" + + "vfmacc.vf v25, fa3, v13\n\t" // tmp135a + tmp135b * 2 + tmp135c * 16 = tmp[1][m] + "vfmacc.vf v27, fa1, v13\n\t" // tmp135a + tmp135b * 8 + tmp135c * 4 = tmp[3][m] + + "vfadd.vv v26, v26, v16\n\t" // + bias + "vfadd.vv v28, v28, v16\n\t" // + bias + + "vfadd.vv v29, v7, v15\n\t" // tmp[m][7] + tmp135a + tmp135b * 32 + tmp135c + + "vse.v v26, (a2)\n\t" + "vse.v v28, (a4)\n\t" + + //--------------------------------------------- + + "vfadd.vv v25, v25, v16\n\t" // + bias + "vfadd.vv v27, v27, v16\n\t" // + bias + "vfadd.vv v29, v29, v16\n\t" // + bias + + "vse.v v25, (a1)\n\t" + "vse.v v27, (a3)\n\t" + "vse.v v29, (a5)\n\t" + + "add %1, %1, t1\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 3b" + + :"=r"(output0_tm_0), // %0 + "=r"(output0), // %1 + "=r"(tmp1), // %2 + "=r"(ratio_ptr), // %3 + "=r"(tiles), // %4 + "=r"(out_w6), // %5 + "=r"(bias_tmp) // %6 + :"0"(output0_tm_0), + "1"(output0), + "2"(tmp1), + "3"(ratio_ptr), + "4"(tiles), + "5"(out_w6), + "6"(bias_tmp) + + :"cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v24", "v25", "v26", "v27", "v28", "v29", + "t0", "t1", "t2", "t5", "s1", "a0", "a1", "a2", "a3", "a4", "a5", + "fa0", "fa1", "fa2", "fa3", "fa4" + ); + } + } + csi_mem_free(tmp1); + } + + csi_mem_free(output_dot_buf); + // crop the output after transform: cut extra part (right , bottom) + csi_c906_crop_output_pack4to1(output_tm1_buf, output_data, out_c, out_h, out_w, block_h * 6, block_w * 6); + output_data += output_size; + csi_mem_free(output_tm1_buf); + } + + if (!flag_bias) { + csi_mem_free(bias_data); + bias_data = NULL; + } + return CSINN_TRUE; +} + + + +void csi_c906_conv3x3s1_winograd43_transform_kernel_pack4(struct csi_tensor *o_kernel, + struct csi_tensor *t_kernel) +{ + int32_t outch = o_kernel->dim[0]; + int32_t inch = o_kernel->dim[1]; + + float *kernel_data = (float *)o_kernel->data; + // for kernel transform buf, 3x3 --> 6x6 + float *kernel_tm = (float *)csi_mem_alloc(outch * inch * 6 * 6 * sizeof(float)); + + // kernel transform matrix: G + const float ktm[6][3] = { + { 1.0f/4, 0.0f, 0.0f}, + { -1.0f/6, -1.0f/6, -1.0f/6}, + { -1.0f/6, 1.0f/6, -1.0f/6}, + { 1.0f/24, 1.0f/12, 1.0f/6}, + { 1.0f/24, -1.0f/12, 1.0f/6}, + { 0.0f, 0.0f, 1.0f} + }; + + csi_tensor_copy(t_kernel, o_kernel); + + for (int p = 0; p < outch; p++) { + for (int q = 0; q < inch; q++) { + + const float* kernel0 = kernel_data + p * inch * 9 + q * 9; + float* kernel_tm0 = kernel_tm + p * inch * 36 + q * 36; + + // transform kernel + const float *k0 = kernel0; + const float *k1 = kernel0 + 3; + const float *k2 = kernel0 + 6; + + // h : first compute the transport matrix tmp = (g * GT)T + float tmp[6][3]; + for (int i = 0; i < 6; i++) { + + tmp[i][0] = k0[0] * ktm[i][0] + k0[1] * ktm[i][1] + k0[2] * ktm[i][2]; + tmp[i][1] = k1[0] * ktm[i][0] + k1[1] * ktm[i][1] + k1[2] * ktm[i][2]; + tmp[i][2] = k2[0] * ktm[i][0] + k2[1] * ktm[i][1] + k2[2] * ktm[i][2]; + } + + // U + for (int j = 0; j < 6; j++) { + float* tmpp = &tmp[j][0]; + + for (int i = 0; i < 6; i++) { + kernel_tm0[j * 6 + i] = tmpp[0] * ktm[i][0] + tmpp[1] * ktm[i][1] + tmpp[2] * ktm[i][2]; + } + } + } + } + + // [O, I, 6, 6] --> [O/4, 6*6, I, 4] + float *kernel_tm_pack4 = (float *)csi_mem_alloc(outch * inch * 6 * 6 * sizeof(float)); + t_kernel->data = kernel_tm_pack4; + + for (int oc = 0; oc < outch / 4; oc++) { + + float *g0 = kernel_tm_pack4 + oc * 36 * inch * 4; + + const float *k0 = kernel_tm + oc * 36 * inch * 4; + const float *k1 = k0 + 36 * inch; + const float *k2 = k1 + 36 * inch; + const float *k3 = k2 + 36 * inch; + + for (int k = 0; k < 36; k++) { + + float *g00 = g0 + k * inch * 4; + + for (int ic = 0; ic < inch / 4; ic++) { + + for (int i = 0; i < 4; i++) { + + const float *k00 = k0 + (ic * 4 + i) * 36; + const float *k10 = k1 + (ic * 4 + i) * 36; + const float *k20 = k2 + (ic * 4 + i) * 36; + const float *k30 = k3 + (ic * 4 + i) * 36; + + g00[0] = k00[k]; + g00[1] = k10[k]; + g00[2] = k20[k]; + g00[3] = k30[k]; + + g00 += 4; + } + } + } + } + + csi_mem_free(kernel_tm); +} + + +int csi_c906_conv3x3s1_winograd43_pack4(struct csi_tensor *input, + struct csi_tensor *output, + struct csi_tensor *kernel, + struct csi_tensor *bias, + struct conv2d_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + float *kernel_data = (float *)params->conv_extra.kernel_tm->data; + float *bias_data = (float *)bias->data; + + // param + int kernel_h = kernel->dim[2]; + int kernel_w = kernel->dim[3]; + int stride_h = params->stride_height; + int stride_w = params->stride_width; + int dilation_h = params->dilation_height; + int dilation_w = params->dilation_width; + int pad_left = params->pad_left; + int pad_top = params->pad_top; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + int kernel_size = in_c * kernel_h * kernel_w; + + int out_c = kernel->dim[0]; + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int output_size = out_c * out_h * out_w; + + // winograd param + int block_h = (out_h + 3) / 4; + int block_w = (out_w + 3) / 4; + + int padded_in_h = block_h * 4 + 2; // block * 4 for alignment with 4,kernel = 3 * 3, stride = 1,thus input_size + 2 + int padded_in_w = block_w * 4 + 2; + int padded_in_hw = padded_in_h * padded_in_w; // element size after padding per channel + + /****************************** bias *****************************/ + bool flag_bias = 1; // default: conv2d layer include bias + if (bias_data == NULL) { + flag_bias = 0; + bias_data = (float *)csi_mem_alloc(out_c * sizeof(float)); + } + + + for(int n = 0; n < batch; n++) { + + // pad buffer: [in_c/4 h w 4] + float *input_padd_buf = (float *)csi_mem_alloc(in_c * padded_in_hw * sizeof(float)); + + // pad input + csi_c906_pad_input_pack1to4(input_data, input_padd_buf, in_c, in_h, in_w, padded_in_h, padded_in_w, pad_top, pad_left); + input_data += input_size; + + // input transform buffer1: [in_ch/4, 36, blocks, 6] + float *input_tm1_buf = (float *)csi_mem_alloc(in_c * block_h * block_w * 6 * 6 * sizeof(float)); + + /****************************** transform input *****************************/ + /* + BT = { + { 4 0 -5 0 1 0 }; + { 0 -4 -4 1 1 0 }; + { 0 4 -4 -1 1 0 }; + { 0 -2 -1 2 1 0 }; + { 0 2 -1 -2 1 0 }; + { 0 4 0 -5 0 1 } + }; + */ + + int tiles = block_h * block_w; + + #pragma omp parallel for num_threads(1) + for(int q = 0; q < in_c / 4; q++) { + + float *img0 = input_padd_buf + q * padded_in_h * padded_in_w * 4; // feature map after padding - q channel + float *img0_tm = input_tm1_buf + q * 36 * tiles * 4; // transform and interleave - q channel + + float *tmp = (float *)csi_mem_alloc(6 * 6 * 4 * sizeof(float)); + + for(int i = 0; i < block_h; i++) { + + for(int j = 0; j < block_w; j++) { + + float *r0 = img0 + (i * padded_in_w * 4 + j * 4) * 4; // feature map after padding 6*6 start addr + float *r0_tm = img0_tm + (i * block_w + j) * 4; // input_tm1 6*6 block start addr + + float ratio[] = {4, -4, 2, -2, -5}; // note: in fact cannot be output constrain + float *ratio_ptr = ratio; + + asm volatile( + "vsetvli zero, zero, e32, m1\n\t" + "li t0, 6\n\t" // m = 6 + "mv t5, %2\n\t" // t5 = tmp start addr + "slli t1, %4, 4\n\t" // t1 = padded_in_w * 4 * 4bytes + + "flw fa0, 0(%3)\n\t" // fa0 = 4 + "flw fa1, 4(%3)\n\t" // fa1 = -4 + "flw fa2, 8(%3)\n\t" // fa2 = 2 + "flw fa3, 12(%3)\n\t" // fa3 = -2 + "flw fa4, 16(%3)\n\t" // fa4 = -5 + + "1:\n\t" + "mv s1, %0\n\t" // s1 = r00 addr + + "mv a0, t5\n\t" // tmp[0][m] + "addi a1, a0, 96\n\t" // tmp[1][m] + "addi a2, a1, 96\n\t" // tmp[2][m] + "addi a3, a2, 96\n\t" // tmp[3][m] + "addi a4, a3, 96\n\t" // tmp[4][m] + "addi a5, a4, 96\n\t" // tmp[5][m] + + "vle.v v0, (s1)\n\t" // r00 + "addi s1, s1, 16\n\t" + "vle.v v1, (s1)\n\t" // r01 + "addi s1, s1, 16\n\t" + "vle.v v2, (s1)\n\t" // r02 + "addi s1, s1, 16\n\t" + "vle.v v3, (s1)\n\t" // r03 + "addi s1, s1, 16\n\t" + "vle.v v4, (s1)\n\t" // r04 + "addi s1, s1, 16\n\t" + "vle.v v5, (s1)\n\t" // r05 + "addi s1, s1, 16\n\t" + + "vmv.v.v v24, v4\n\t" + "vmv.v.v v29, v5\n\t" + //--------------------------------------------- + "vfmacc.vf v24, fa0, v0\n\t" // r04 + 4 * r00 + "vfmacc.vf v24, fa4, v2\n\t" // r04 + 4 * r00 - 5 * r02 + + "vse.v v24, (a0)\n\t" + //--------------------------------------------- + "vfadd.vv v25, v3, v4\n\t" // r03 + r04 + "vfadd.vv v6, v1, v2\n\t" // r01 + r02 + "vfmacc.vf v25, fa1, v6\n\t" // r03 + r04 - 4 * (r01 - r02) + + "vse.v v25, (a1)\n\t" + //--------------------------------------------- + "vfsub.vv v26, v4, v3\n\t" // r04 - r03 + "vfsub.vv v7, v1, v2\n\t" // r01 - r02 + "vfmacc.vf v26, fa0, v7\n\t" // r04 - r03 + 4 * (r01 - r02) + + "vse.v v26, (a2)\n\t" + //--------------------------------------------- + "vfsub.vv v8, v1, v3\n\t" // r01 - r03 + "vfsub.vv v27, v4, v2\n\t" // r04 - r02 + "vfsub.vv v28, v4, v2\n\t" // r04 - r02 + + "vfmacc.vf v27, fa3, v8\n\t" // r04 - r02 - 2 * (r01 - r03) + "vse.v v27, (a3)\n\t" + + "vfmacc.vf v28, fa2, v8\n\t" // r04 - r02 + 2 * (r01 - r03) + "vse.v v28, (a4)\n\t" + //--------------------------------------------- + "vfmacc.vf v29, fa0, v1\n\t" // r05 + 4 * r01 + "vfmacc.vf v29, fa4, v3\n\t" // r05 + 4 * r01 - 5 * r03 + + "vse.v v29, (a5)\n\t" + //--------------------------------------------- + + "add %0, %0, t1\n\t" // padding feature map 6*6 next line addr + "addi t5, t5, 16\n\t" // tmp[0][0] --> tmp[0][1] + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "2:\n\t" + + "mv t5, %2\n\t" // tmp start addr + "li t0, 6\n\t" // m = 6 + + "slli t1, %5, 4\n\t" // t1 = tiles * 4 * 4 bytes + "mulw t2, t0, t1\n\t" // t2 = tiles * 6 blocks * 4 channels * 4 bytes + + "3:\n\t" + + "mv a0, %1\n\t" // r0_tm_0 + "add a1, a0, t1\n\t" // r0_tm_1 + "add a2, a1, t1\n\t" // r0_tm_2 + "add a3, a2, t1\n\t" // r0_tm_3 + "add a4, a3, t1\n\t" // r0_tm_4 + "add a5, a4, t1\n\t" // r0_tm_5 + + "vle.v v0, (t5)\n\t" // tmp[m][0] + "addi t5, t5, 16\n\t" + "vle.v v1, (t5)\n\t" // tmp[m][1] + "addi t5, t5, 16\n\t" + "vle.v v2, (t5)\n\t" // tmp[m][2] + "addi t5, t5, 16\n\t" + "vle.v v3, (t5)\n\t" // tmp[m][3] + "addi t5, t5, 16\n\t" + "vle.v v4, (t5)\n\t" // tmp[m][4] + "addi t5, t5, 16\n\t" + "vle.v v5, (t5)\n\t" // tmp[m][5] + "addi t5, t5, 16\n\t" + + "vmv.v.v v24, v4\n\t" + "vmv.v.v v29, v5\n\t" + //--------------------------------------------- + "vfmacc.vf v24, fa0, v0\n\t" // r04 + 4 * r00 + "vfmacc.vf v24, fa4, v2\n\t" // r04 * 4 * r00 - 5 * r02 + + "vse.v v24, (a0)\n\t" + //--------------------------------------------- + "vfadd.vv v25, v3, v4\n\t" // r03 + r04 + "vfadd.vv v6, v1, v2\n\t" // r01 + r02 + "vfmacc.vf v25, fa1, v6\n\t" // r03 + r04 - 4 * (r01 - r02) + + "vse.v v25, (a1)\n\t" + //--------------------------------------------- + "vfsub.vv v26, v4, v3\n\t" // r04 - r03 + "vfsub.vv v7, v1, v2\n\t" // r01 - r02 + "vfmacc.vf v26, fa0, v7\n\t" // r04 - r03 + 4 * (r01 - r02) + + "vse.v v26, (a2)\n\t" + //--------------------------------------------- + "vfsub.vv v8, v1, v3\n\t" // r01 - r03 + "vfsub.vv v27, v4, v2\n\t" // r04 - r02 + "vfsub.vv v28, v4, v2\n\t" // r04 - r02 + + "vfmacc.vf v27, fa3, v8\n\t" // r04 - r02 - 2 * (r01 - r03) + "vse.v v27, (a3)\n\t" + + "vfmacc.vf v28, fa2, v8\n\t" // r04 - r02 + 2 * (r01 - r03) + "vse.v v28, (a4)\n\t" + //--------------------------------------------- + "vfmacc.vf v29, fa0, v1\n\t" // r05 + 4 * r01 + "vfmacc.vf v29, fa4, v3\n\t" // r05 + 4 * r01 - 5 * r03 + + "vse.v v29, (a5)\n\t" + //--------------------------------------------- + + "add %1, %1, t2\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 3b" + + + :"=r"(r0), // %0 + "=r"(r0_tm), // %1 + "=r"(tmp), // %2 + "=r"(ratio_ptr), // %3 + "=r"(padded_in_w), // %4 + "=r"(tiles) // %5 + :"0"(r0), + "1"(r0_tm), + "2"(tmp), + "3"(ratio_ptr), + "4"(padded_in_w), + "5"(tiles) + :"cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v24", "v25", "v26", "v27", "v28", "v29", + "t0", "t1", "t2", "t5", "s1", "a0", "a1", "a2", "a3", "a4", "a5", + "fa0", "fa1", "fa2", "fa3", "fa4", "fa5" + ); + + } + } + csi_mem_free(tmp); + } + csi_mem_free(input_padd_buf); + + /*********************************** dot ***************************************/ + // reorder input_tm1_buf + int size_input_tm2 = 0; + if (tiles >= 8) { + size_input_tm2 = 36 * (tiles / 8 + (tiles % 8) / 4 + (tiles % 4) / 2 + tiles % 2) * in_c * 8; + } else if (tiles >= 4) { + size_input_tm2 = 36 * (tiles / 4 + (tiles % 4) / 2 + tiles % 2) * in_c * 4; + } else if (tiles >= 2) { + size_input_tm2 = 36 * (tiles / 2 + tiles % 2) * in_c * 2; + } else { + size_input_tm2 = 36 * tiles * in_c; + } + float *input_tm2_buf = (float *)csi_mem_alloc(size_input_tm2 * sizeof(float)); + + #pragma omp parallel for num_threads(1) + for (int r = 0; r < 36; r++) { + + float *img_tm2 = input_tm2_buf + r * size_input_tm2 / 36; // input_tm2 r channel data + + int t = 0; + for (; t + 7 < tiles; t += 8) { + float *tm2 = img_tm2 + t * in_c; // img_tm2 row data + float *tm1 = input_tm1_buf; + + tm1 += (r * tiles + t) * 4; + + //----------------- + for (int q = 0; q < in_c / 4; q++) { + for (int l = 0; l < 4; l++) { + tm2[0] = tm1[l]; + tm2[1] = tm1[l + 4 * 1]; + tm2[2] = tm1[l + 4 * 2]; + tm2[3] = tm1[l + 4 * 3]; + tm2[4] = tm1[l + 4 * 4]; + tm2[5] = tm1[l + 4 * 5]; + tm2[6] = tm1[l + 4 * 6]; + tm2[7] = tm1[l + 4 * 7]; + tm2 += 8; + } + tm1 += 36 * tiles * 4; + } + } + for (; t + 3 < tiles; t += 4) { + float *tm2 = img_tm2 + (t / 8 + (t % 8) / 4) * in_c * 8; // img_tm2 row data + float *tm1 = input_tm1_buf; + + tm1 += (r * tiles + t) * 4; + + for (int q = 0; q < in_c / 4; q++) { + for (int l = 0; l < 4; l++) { + tm2[0] = tm1[l]; + tm2[1] = tm1[l + 4 * 1]; + tm2[2] = tm1[l + 4 * 2]; + tm2[3] = tm1[l + 4 * 3]; + tm2 += 4; + } + tm1 += 36 * tiles * 4; + } + } + for (; t + 1 < tiles; t += 2) { + float *tm2 = img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2) * in_c * 8; // img_tm2 row data + float *tm1 = input_tm1_buf; + + tm1 += (r * tiles + t) * 4; + for (int q = 0; q < in_c / 4; q++) { + for (int l = 0; l < 4; l++) { + tm2[0] = tm1[l]; + tm2[1] = tm1[l + 4]; + tm2 += 2; + } + tm1 += 36 * tiles * 4; + } + + } + for (; t < tiles; t++) { + float *tm2 = img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2 + t % 2) * in_c * 8; // img_tm2 row data + float *tm1 = input_tm1_buf; + + tm1 += (r * tiles + t) * 4; + for (int q = 0; q < in_c / 4; q++) { + for (int l = 0; l < 4; l++) { + tm2[0] = tm1[l]; + tm2++; + } + tm1 += 36 * tiles * 4; + } + } + } + + csi_mem_free(input_tm1_buf); + + // output_dot_buf: [out_c/4, 36, blocks, 4] + float *output_dot_buf = (float *)csi_mem_alloc(out_c * block_h * block_w * 6 * 6 * sizeof(float)); + + #pragma omp parallel for num_threads(1) + for (int p = 0; p < out_c / 4; p++) { + + float *output0_tm = output_dot_buf + p * 36 * tiles * 4; // 4 channel dot output + float *kernel0_tm = kernel_data + p * 36 * in_c * 4; // 4 channel kernel + + for (int r = 0; r < 36; r++) { + + float *img_tm2 = input_tm2_buf + r * size_input_tm2 / 36; // img_tm2 第r个channel + + int t = 0; + for (; t + 7 < tiles; t += 8) { + + float *r0 = img_tm2 + t * in_c; + float *k0 = kernel0_tm + r * in_c * 4; + + asm volatile( + "vsetvli zero, zero, e32, m1\n\t" + "mv t0, %3\n\t" // t0 = in_c + + "vmv.v.x v0, zero\n\t" + "vmv.v.x v1, zero\n\t" + "vmv.v.x v2, zero\n\t" + "vmv.v.x v3, zero\n\t" + "vmv.v.x v4, zero\n\t" + "vmv.v.x v5, zero\n\t" + "vmv.v.x v6, zero\n\t" + "vmv.v.x v7, zero\n\t" // clear + + "1:\n\t" + + "flw fa0, (%0)\n\t" + "flw fa1, 4(%0)\n\t" + "flw fa2, 8(%0)\n\t" + "flw fa3, 12(%0)\n\t" + "flw fa4, 16(%0)\n\t" + "flw fa5, 20(%0)\n\t" + "flw fa6, 24(%0)\n\t" + "flw fa7, 28(%0)\n\t" + "addi %0, %0, 32\n\t" + + "vle.v v8, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "vfmacc.vf v0, fa0, v8\n\t" + "vfmacc.vf v1, fa1, v8\n\t" + "vfmacc.vf v2, fa2, v8\n\t" + "vfmacc.vf v3, fa3, v8\n\t" + "vfmacc.vf v4, fa4, v8\n\t" + "vfmacc.vf v5, fa5, v8\n\t" + "vfmacc.vf v6, fa6, v8\n\t" + "vfmacc.vf v7, fa7, v8\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "vse.v v0, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v1, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v2, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v3, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v4, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v5, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v6, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v7, (%2)\n\t" + "addi %2, %2, 16\n\t" + + :"=r"(r0), // %0 + "=r"(k0), // %1 + "=r"(output0_tm), // %2 + "=r"(in_c) // %3 + :"0"(r0), + "1"(k0), + "2"(output0_tm), + "3"(in_c) + + :"cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", + "fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7", "t0" + + ); + } + for (; t + 3 < tiles; t += 4) { + float *r0 = img_tm2 + (t / 8 + (t % 8) / 4) * in_c * 8; + float *k0 = kernel0_tm + r * in_c * 4; + + asm volatile( + "vsetvli zero, zero, e32, m1\n\t" + "mv t0, %3\n\t" // t0 = in_c + "vmv.v.x v0, zero\n\t" + "vmv.v.x v1, zero\n\t" + "vmv.v.x v2, zero\n\t" + "vmv.v.x v3, zero\n\t" // clear + + "1:\n\t" + + "flw fa0, (%0)\n\t" + "flw fa1, 4(%0)\n\t" + "flw fa2, 8(%0)\n\t" + "flw fa3, 12(%0)\n\t" + "addi %0, %0, 16\n\t" + + "vle.v v4, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "vfmacc.vf v0, fa0, v4\n\t" + "vfmacc.vf v1, fa1, v4\n\t" + "vfmacc.vf v2, fa2, v4\n\t" + "vfmacc.vf v3, fa3, v4\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "vse.v v0, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v1, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v2, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v3, (%2)\n\t" + "addi %2, %2, 16\n\t" + + :"=r"(r0), // %0 + "=r"(k0), // %1 + "=r"(output0_tm), // %2 + "=r"(in_c) // %3 + :"0"(r0), + "1"(k0), + "2"(output0_tm), + "3"(in_c) + :"cc", "memory", "v0", "v1", "v2", "v3", "v4", "fa0", "fa1", "fa2", "fa3", "t0" + ); + } + for (; t + 1 < tiles; t += 2) { + float *r0 = img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2) * in_c * 8; + float *k0 = kernel0_tm + r * in_c * 4; + + asm volatile( + "vsetvli zero, zero, e32, m1\n\t" + "mv t0, %3\n\t" // t0 = in_c + "vmv.v.x v0, zero\n\t" + "vmv.v.x v1, zero\n\t" // clear + + "1:\n\t" + + "flw fa0, (%0)\n\t" + "flw fa1, 4(%0)\n\t" + "addi %0, %0, 8\n\t" + + "vle.v v2, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "vfmacc.vf v0, fa0, v2\n\t" + "vfmacc.vf v1, fa1, v2\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "vse.v v0, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v1, (%2)\n\t" + "addi %2, %2, 16\n\t" + + :"=r"(r0), // %0 + "=r"(k0), // %1 + "=r"(output0_tm), // %2 + "=r"(in_c) // %3 + :"0"(r0), + "1"(k0), + "2"(output0_tm), + "3"(in_c) + :"cc", "memory", "v0", "v1", "v2", "fa0", "fa1", "t0" + ); + } + for (; t < tiles; t++) { + + float *r0 = img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2 + t % 2) * in_c * 8; + float *k0 = kernel0_tm + r * in_c * 4; + + asm volatile( + "vsetvli zero, zero, e32, m1\n\t" + "mv t0, %3\n\t" // t0 = in_c + "vmv.v.x v0, zero\n\t" // clear + + "1:\n\t" + + "flw fa0, (%0)\n\t" + "addi %0, %0, 4\n\t" + + "vle.v v1, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "vfmacc.vf v0, fa0, v1\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "vse.v v0, (%2)\n\t" + "addi %2, %2, 16\n\t" + + :"=r"(r0), // %0 + "=r"(k0), // %1 + "=r"(output0_tm), // %2 + "=r"(in_c) // %3 + :"0"(r0), + "1"(k0), + "2"(output0_tm), + "3"(in_c) + :"cc", "memory", "v0", "v1", "fa0", "t0" + ); + + } + + } + + } + + csi_mem_free(input_tm2_buf); + /*************************** transform output ****************************/ + // output_tm1_buf: [out_c/4, out_h4, out_w4, 4] + float *output_tm1_buf = (float *)csi_mem_alloc(out_c * block_h * block_w * 4 * 4 * sizeof(float)); + + /* + AT = { + { 1 1 1 1 1 0 }, + { 0 1 -1 2 -2 0 }, + { 0 1 1 4 4 0 }, + { 0 1 -1 8 -8 1 } + }; + */ + + #pragma omp parallel for num_threads(1) + for (int p = 0; p < out_c / 4; p++) + { + + float *bias_tmp = bias_data + p * 4; + + float *out0_tm = output_dot_buf + p * 36 * block_h * block_w * 4; // 输出转换前/dot后 第p个channel + float *out0 = output_tm1_buf + p * 4*block_h * 4*block_w * 4; // 转换后输出 第p个channel + + float *tmp1 = (float *)csi_mem_alloc(4 * 6 * 4 * sizeof(float)); + int out_w4 = block_w * 4; + + for (int i = 0; i < block_h; i++) { + + for (int j = 0; j < block_w; j++) { + + float *output0_tm_0 = out0_tm + (i * block_w + j) * 4; // 6*6 起始地址 + + float *output0 = out0 + (i * block_w * 4 * 4 + j * 4) * 4; // 输出 4*4 的起始地址 + + float ratio[] = {2.0, 4.0, 8.0}; + float *ratio_ptr = ratio; + + asm volatile( + "vsetvli zero, zero, e32, m1\n\t" + "li t0, 6\n\t" // m = 6 + "mv t5, %2\n\t" // t5 = tmp start addr + "slli t1, %4, 4\n\t" // t1 = tiles * 4 * 4 + "mulw t2, t0, t1\n\t" // t2 = tiles * 6 blocks * 4 channels * 4 bytes + + "flw fa0, 0(%3)\n\t" // fa0 = 2 + "flw fa1, 4(%3)\n\t" // fa1 = 4 + "flw fa2, 8(%3)\n\t" // fa2 = 8 + + "mv s1, %0\n\t" + + "1:\n\t" // shape : [4 * 6] * [6 * 6] = [4 * 6] + + "mv a0, t5\n\t" // tmp[0][m] + "addi a1, a0, 96\n\t" // tmp[1][m] + "addi a2, a1, 96\n\t" // tmp[2][m] + "addi a3, a2, 96\n\t" // tmp[3][m] + + "vle.v v0, (s1)\n\t" // r00 + "add s1, s1, t1\n\t" + "vle.v v1, (s1)\n\t" // r01 + "add s1, s1, t1\n\t" + "vle.v v2, (s1)\n\t" // r02 + "add s1, s1, t1\n\t" + "vle.v v3, (s1)\n\t" // r03 + "add s1, s1, t1\n\t" + "vle.v v4, (s1)\n\t" // r04 + "add s1, s1, t1\n\t" + "vle.v v5, (s1)\n\t" // r05 + "add s1, s1, t1\n\t" + + //--------------------------------------------- + "vfadd.vv v26, v1, v2\n\t" // r01 + r02 = tmp02a + "vfsub.vv v6, v1, v2\n\t" // r01 - r02 = tmp13a + + "vfadd.vv v7, v3, v4\n\t" // r03 + r04 = tmp02b + "vfsub.vv v8, v3, v4\n\t" // r03 - r04 = tmp13b + "vmv.v.v v25, v6\n\t" // v25 = tmp13a + //--------------------------------------------- + "vfadd.vv v24, v0, v26\n\t" // r00 + tmp02a + "vfadd.vv v24, v24, v7\n\t" // r00 + tmp02a + tmp02b + "vse.v v24, (a0)\n\t" + + "vfmacc.vf v25, fa0, v8\n\t" // tmp13a + 2 * tmp13b + "vse.v v25, (a1)\n\t" + + "vfmacc.vf v26, fa1, v7\n\t" // tmp02a + 4 * tmp02b + "vse.v v26, (a2)\n\t" + + "vfadd.vv v27, v5, v6\n\t" // r05 + tmp13a + "vfmacc.vf v27, fa2, v8\n\t" // r05 + tmp13a * 8 tmp13b + "vse.v v27, (a3)\n\t" + //--------------------------------------------- + + "addi t5, t5, 16\n\t" // tmp[0][0] --> tmp[0][1] + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "2:\n\t" + + "mv t5, %2\n\t" // tmp start addr + "li t0, 4\n\t" // m = 4 + "slli t1, %5, 4\n\t" // t1 = out_w4 * 4 * 4bytes + "vle.v v16, (%6)\n\t" // load 4 channel bias data + + "3:\n\t" // shape : [4 * 6] * [6 * 4] = [4 * 4] + + "mv a0, %1\n\t" + "addi a1, a0, 16\n\t" + "addi a2, a1, 16\n\t" + "addi a3, a2, 16\n\t" + + "vle.v v0, (t5)\n\t" // tmp[m][0] + "addi t5, t5, 16\n\t" + "vle.v v1, (t5)\n\t" // tmp[m][1] + "addi t5, t5, 16\n\t" + "vle.v v2, (t5)\n\t" // tmp[m][2] + "addi t5, t5, 16\n\t" + "vle.v v3, (t5)\n\t" // tmp[m][3] + "addi t5, t5, 16\n\t" + "vle.v v4, (t5)\n\t" // tmp[m][4] + "addi t5, t5, 16\n\t" + "vle.v v5, (t5)\n\t" // tmp[m][5] + "addi t5, t5, 16\n\t" + + //--------------------------------------------- + "vfadd.vv v26, v1, v2\n\t" // r01 + r02 = tmp02a + "vfsub.vv v6, v1, v2\n\t" // r01 - r02 = tmp13a + + "vfadd.vv v7, v3, v4\n\t" // r03 + r04 = tmp02b + "vfsub.vv v8, v3, v4\n\t" // r03 - r04 = tmp13b + "vmv.v.v v25, v6\n\t" // v25 = tmp13a + //--------------------------------------------- + "vfadd.vv v24, v0, v26\n\t" // r00 + tmp02a + "vfadd.vv v24, v24, v7\n\t" // r00 + tmp02a + tmp02b + "vfadd.vv v24, v24, v16\n\t" // add bias + "vse.v v24, (a0)\n\t" + + "vfmacc.vf v25, fa0, v8\n\t" // tmp13a + 2 * tmp13b + "vfadd.vv v25, v25, v16\n\t" // add bias + "vse.v v25, (a1)\n\t" + + "vfmacc.vf v26, fa1, v7\n\t" // tmp02a + 4 * tmp02b + "vfadd.vv v26, v26, v16\n\t" // add bias + "vse.v v26, (a2)\n\t" + + "vfadd.vv v27, v5, v6\n\t" // r05 + tmp13a + "vfmacc.vf v27, fa2, v8\n\t" // r05 + tmp13a * 8 tmp13b + "vfadd.vv v27, v27, v16\n\t" // add bias + "vse.v v27, (a3)\n\t" + + "add %1, %1, t1\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 3b" + + :"=r"(output0_tm_0), // %0 + "=r"(output0), // %1 + "=r"(tmp1), // %2 + "=r"(ratio_ptr), // %3 + "=r"(tiles), // %4 + "=r"(out_w4), // %5 + "=r"(bias_tmp) // %6 + :"0"(output0_tm_0), + "1"(output0), + "2"(tmp1), + "3"(ratio_ptr), + "4"(tiles), + "5"(out_w4), + "6"(bias_tmp) + + :"cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v16", "v24", "v25", "v26", "v27", + "t0", "t1", "t2", "t5", "s1", "a0", "a1", "a2", "a3", + "fa0", "fa1", "fa2" + ); + } + } + csi_mem_free(tmp1); + } + + csi_mem_free(output_dot_buf); + // crop the output after transform: cut extra part (right , bottom) + csi_c906_crop_output_pack4to1(output_tm1_buf, output_data, out_c, out_h, out_w, block_h * 4, block_w * 4); + output_data += output_size; + csi_mem_free(output_tm1_buf); + } + + if (!flag_bias) { + csi_mem_free(bias_data); + bias_data = NULL; + } + return CSINN_TRUE; +} + +void csi_c906_conv3x3s1(struct csi_tensor *input, + struct csi_tensor *output, + struct csi_tensor *kernel, + struct csi_tensor *bias, + struct conv2d_params *params) +{ + /* to do */ +} + +void csi_c906_conv3x3s2(struct csi_tensor *input, + struct csi_tensor *output, + struct csi_tensor *kernel, + struct csi_tensor *bias, + struct conv2d_params *params) +{ + /* to do */ +} diff --git a/source/c906_opt/convolution_3x3_fp16.c b/source/c906_opt/convolution_3x3_fp16.c index c1b94866..4b640794 100644 --- a/source/c906_opt/convolution_3x3_fp16.c +++ b/source/c906_opt/convolution_3x3_fp16.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.11.x */ +/* CSI-NN2 version 1.12.x */ /* @@ -175,11 +175,11 @@ void csi_c906_crop_output_pack8to1_fp16(const __fp16 *output_trans, __fp16 *outp "2:\n\t" // crop h "mv t2, %4\n\t" // t2 = out_w - "mv s0, a0\n\t" // update crop_addr per-row + "mv s1, a0\n\t" // update crop_addr per-row "3:\n\t" // crop w - "vle.v v2, (s0)\n\t" - "addi s0, s0, 16\n\t" + "vle.v v2, (s1)\n\t" + "addi s1, s1, 16\n\t" "vsse.v v2, (a1), t0\n\t" "addi a1, a1, 2\n\t" @@ -212,11 +212,10 @@ void csi_c906_crop_output_pack8to1_fp16(const __fp16 *output_trans, __fp16 *outp "4"(out_w), "5"(wino_h), "6"(wino_w) - :"cc", "memory", "v2", "v3", "a0", "a1", "s0", + :"cc", "memory", "v2", "v3", "a0", "a1", "s1", "t0", "t1", "t2", "t3", "t4", "t5" ); - } /* @@ -225,8 +224,8 @@ void csi_c906_crop_output_pack8to1_fp16(const __fp16 *output_trans, __fp16 *outp kernel before: [O I 3*3] kernel after : [O/8 8*8 I 8] */ -void csi_c906_conv3x3s1_winograd64_transform_kernel_fp16(struct csi_tensor *o_kernel, - struct csi_tensor *t_kernel) +void csi_c906_conv3x3s1_winograd64_transform_kernel_pack8_fp16(struct csi_tensor *o_kernel, + struct csi_tensor *t_kernel) { int32_t outch = o_kernel->dim[0]; int32_t inch = o_kernel->dim[1]; @@ -291,6 +290,7 @@ void csi_c906_conv3x3s1_winograd64_transform_kernel_fp16(struct csi_tensor *o_ke } // optimized layout for winograd64 __fp16 *kernel_tm_pack8 = (__fp16 *)csi_mem_alloc(outch * inch * 8 * 8 * sizeof(__fp16)); + t_kernel->data = kernel_tm_pack8; for (int oc = 0; oc < outch / 8; oc++) { @@ -338,7 +338,6 @@ void csi_c906_conv3x3s1_winograd64_transform_kernel_fp16(struct csi_tensor *o_ke } csi_mem_free(kernel_tm); - o_kernel->data = kernel_tm_pack8; } @@ -357,7 +356,7 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, __fp16 *input_data = (__fp16 *)input->data; __fp16 *output_data = (__fp16 *)output->data; - __fp16 *kernel_data = (__fp16 *)kernel->data; + __fp16 *kernel_data = (__fp16 *)params->conv_extra.kernel_tm->data; __fp16 *bias_data = (__fp16 *)bias->data; // param @@ -445,6 +444,7 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, __fp16 *r0_tm = img0_tm + (i * block_w + j) * 8; // input_tm1 8*8 block start addr __fp16 ratio[] = {5.25, -4.25, 0.25, -1.25, 4.0, 0.5, -2.5, 2.0}; // note: in fact cannot be output constrain + __fp16 *ratio_ptr = ratio; asm volatile( "vsetvli zero, zero, e16, m1\n\t" @@ -462,7 +462,7 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, "flh fa7, 14(%3)\n\t" // fa7 = 2.0 "1:\n\t" - "mv s0, %0\n\t" // s0 = r00 addr + "mv s1, %0\n\t" // s1 = r00 addr "mv a0, t5\n\t" // tmp[0][m] "addi a1, a0, 128\n\t" // tmp[1][m] @@ -473,22 +473,24 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, "addi a6, a5, 128\n\t" // tmp[6][m] "addi a7, a6, 128\n\t" // tmp[7][m] - "vle.v v0, (s0)\n\t" // r00 - "addi s0, s0, 16\n\t" - "vle.v v1, (s0)\n\t" // r01 - "addi s0, s0, 16\n\t" - "vle.v v2, (s0)\n\t" // r02 - "addi s0, s0, 16\n\t" - "vle.v v3, (s0)\n\t" // r03 - "addi s0, s0, 16\n\t" - "vle.v v4, (s0)\n\t" // r04 - "addi s0, s0, 16\n\t" - "vle.v v5, (s0)\n\t" // r05 - "addi s0, s0, 16\n\t" - "vle.v v6, (s0)\n\t" // r06 - "addi s0, s0, 16\n\t" - "vle.v v7, (s0)\n\t" // r07 - "addi s0, s0, 16\n\t" + "vle.v v0, (s1)\n\t" // r00 + "addi s1, s1, 16\n\t" + "vle.v v1, (s1)\n\t" // r01 + "addi s1, s1, 16\n\t" + "vle.v v2, (s1)\n\t" // r02 + "addi s1, s1, 16\n\t" + "vle.v v3, (s1)\n\t" // r03 + "addi s1, s1, 16\n\t" + "vle.v v4, (s1)\n\t" // r04 + "addi s1, s1, 16\n\t" + "vle.v v5, (s1)\n\t" // r05 + "addi s1, s1, 16\n\t" + "vle.v v6, (s1)\n\t" // r06 + "addi s1, s1, 16\n\t" + "vle.v v7, (s1)\n\t" // r07 + "addi s1, s1, 16\n\t" + + "vmv.v.v v10, v6\n\t" //--------------------------------------------- "vfsub.vv v8, v4, v2\n\t" // r04 - r02 @@ -497,48 +499,47 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, "vfsub.vv v24, v0, v6\n\t" // r00 - r06 "vfsub.vv v31, v7, v1\n\t" // r07 - r01 + "vfmacc.vf v10, fa2, v2\n\t" // r06 + r02 * 0.25f + + "vfmul.vf v11, v1, fa5\n\t" // r01 * 0.5f + "vfmul.vf v12, v1, fa7\n\t" // r01 * 2.0f + "vfmacc.vf v24, fa0, v8\n\t" // r00 - r06 + 5.25 * (r04 - r02) = tmp[0][m] "vfmacc.vf v31, fa0, v9\n\t" // r07 - r01 + 5.25 * (r03 - r05) = tmp[7][m] - "vse.v v24, (a0)\n\t" - "vse.v v31, (a7)\n\t" - //--------------------------------------------- "vfadd.vv v8, v2, v6\n\t" // r02 + r06 "vfadd.vv v9, v1, v5\n\t" // r01 + r05 + "vfmacc.vf v11, fa6, v3\n\t" // r01 * 0.5f - r03 * 2.5f + "vfmacc.vf v12, fa6, v3\n\t" // r01 * 2.f - r03 * 2.5f + + "vfmacc.vf v2, fa3, v4\n\t" // r02 - r04 * 1.25f 注意 + "vfmacc.vf v10, fa3, v4\n\t" // r06 + r02 * 0.25f - r04 * 1.25f = tmp34a + "vfmacc.vf v8, fa1, v4\n\t" // r02 + r06 - r04 * 4.25f = tmp12a "vfmacc.vf v9, fa1, v3\n\t" // r01 + r05 - r03 * 4.25f = tmp12b + "vfmacc.vf v11, fa7, v5\n\t" // r01 * 0.5f - r03 * 2.5f + r05 * 2.0 = tmp34b + "vfmacc.vf v12, fa5, v5\n\t" // r01 * 2.f - r03 * 2.5f + r05 * 0.5 = tmp56b + + "vse.v v24, (a0)\n\t" + "vse.v v31, (a7)\n\t" + "vfadd.vv v25, v8, v9\n\t" // tmp12a + tmp12b = tmp[1][m] "vfsub.vv v26, v8, v9\n\t" // tmp12a - tmp12b = tmp[2][m] - "vse.v v25, (a1)\n\t" - "vse.v v26, (a2)\n\t" - //--------------------------------------------- - "vmv.v.v v10, v6\n\t" - - "vfmacc.vf v10, fa2, v2\n\t" // r06 + r02 * 0.25f - "vfmacc.vf v10, fa3, v4\n\t" // r06 + r02 * 0.25f - r04 * 1.25f = tmp34a - - "vfmacc.vf v2, fa3, v4\n\t" // r02 - r04 * 1.25f "vfmacc.vf v6, fa4, v2\n\t" // r06 + (r02 - r04 * 1.25f) * 4 = tmp56a - "vfmul.vf v11, v1, fa5\n\t" // r01 * 0.5f - "vfmacc.vf v11, fa6, v3\n\t" // r01 * 0.5f - r03 * 2.5f - "vfmacc.vf v11, fa7, v5\n\t" // r01 * 0.5f - r03 * 2.5f + r05 * 2.0 = tmp34b - - "vfmul.vf v12, v1, fa7\n\t" // r01 * 2.0f - "vfmacc.vf v12, fa6, v3\n\t" // r01 * 2.f - r03 * 2.5f - "vfmacc.vf v12, fa5, v5\n\t" // r01 * 2.f - r03 * 2.5f + r05 * 0.5 = tmp56b - "vfadd.vv v27, v10, v11\n\t" // tmp34a + tmp34b = tmp[3][m] "vfsub.vv v28, v10, v11\n\t" // tmp34a - tmp34b = tmp[4][m] "vfadd.vv v29, v6, v12\n\t" // tmp56a + tmp56b = tmp[5][m] "vfsub.vv v30, v6, v12\n\t" // tmp56a - tmp56b = tmp[6][m] + "vse.v v25, (a1)\n\t" + "vse.v v26, (a2)\n\t" "vse.v v27, (a3)\n\t" "vse.v v28, (a4)\n\t" "vse.v v29, (a5)\n\t" @@ -588,6 +589,8 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, "vle.v v7, (t5)\n\t" // tmp[m][7] "addi t5, t5, 16\n\t" + "vmv.v.v v10, v6\n\t" + //--------------------------------------------- "vfsub.vv v8, v4, v2\n\t" // tmp04 - tmp02 (tmp[m][4] - tmp[m][2]) "vfsub.vv v9, v3, v5\n\t" // tmp03 - tmp05 @@ -595,48 +598,48 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, "vfsub.vv v24, v0, v6\n\t" // tmp00 - tmp06 "vfsub.vv v31, v7, v1\n\t" // tmp07 - tmp01 + "vfmacc.vf v10, fa2, v2\n\t" // tmp06 + tmp02 * 0.25f + + "vfmul.vf v11, v1, fa5\n\t" // tmp01 * 0.5f + "vfmul.vf v12, v1, fa7\n\t" // tmp01 * 2.0f + "vfmacc.vf v24, fa0, v8\n\t" // tmp00 - tmp06 + 5.25 * (tmp04 - tmp02) = r0_tm_0[m] "vfmacc.vf v31, fa0, v9\n\t" // tmp07 - tmp01 + 5.25 * (tmp03 - tmp05) = r0_tm_7[m] - "vse.v v24, (a0)\n\t" - "vse.v v31, (a7)\n\t" - //--------------------------------------------- "vfadd.vv v8, v2, v6\n\t" // tmp02 + tmp06 "vfadd.vv v9, v1, v5\n\t" // tmp01 + tmp05 + "vfmacc.vf v11, fa6, v3\n\t" // tmp01 * 0.5f - tmp03 * 2.5f + "vfmacc.vf v12, fa6, v3\n\t" // tmp01 * 2.f - tmp03 * 2.5f + + "vfmacc.vf v2, fa3, v4\n\t" // tmp02 - tmp04 * 1.25f + "vfmacc.vf v10, fa3, v4\n\t" // tmp06 + tmp02 * 0.25f - tmp04 * 1.25f = tmp34a + "vfmacc.vf v8, fa1, v4\n\t" // tmp02 + tmp06 - tmp04 * 4.25f = tmp12a "vfmacc.vf v9, fa1, v3\n\t" // tmp01 + tmp05 - tmp03 * 4.25f = tmp12b + "vfmacc.vf v11, fa7, v5\n\t" // tmp01 * 0.5f - tmp03 * 2.5f + tmp05 * 2.0 = tmp34b + "vfmacc.vf v12, fa5, v5\n\t" // tmp01 * 2.f - tmp03 * 2.5f + tmp05 * 0.5 = tmp56b + + "vse.v v24, (a0)\n\t" + "vse.v v31, (a7)\n\t" + "vfadd.vv v25, v8, v9\n\t" // tmp12a + tmp12b = r0_tm_1[m] "vfsub.vv v26, v8, v9\n\t" // tmp12a - tmp12b = r0_tm_2[m] - "vse.v v25, (a1)\n\t" - "vse.v v26, (a2)\n\t" - //--------------------------------------------- - "vmv.v.v v10, v6\n\t" - - "vfmacc.vf v10, fa2, v2\n\t" // tmp06 + tmp02 * 0.25f - "vfmacc.vf v10, fa3, v4\n\t" // tmp06 + tmp02 * 0.25f - tmp04 * 1.25f = tmp34a - "vfmacc.vf v2, fa3, v4\n\t" // tmp02 - tmp04 * 1.25f "vfmacc.vf v6, fa4, v2\n\t" // tmp06 + (tmp02 - tmp04 * 1.25f) * 4 = tmp56a - "vfmul.vf v11, v1, fa5\n\t" // tmp01 * 0.5f - "vfmacc.vf v11, fa6, v3\n\t" // tmp01 * 0.5f - tmp03 * 2.5f - "vfmacc.vf v11, fa7, v5\n\t" // tmp01 * 0.5f - tmp03 * 2.5f + tmp05 * 2.0 = tmp34b - - "vfmul.vf v12, v1, fa7\n\t" // tmp01 * 2.0f - "vfmacc.vf v12, fa6, v3\n\t" // tmp01 * 2.f - tmp03 * 2.5f - "vfmacc.vf v12, fa5, v5\n\t" // tmp01 * 2.f - tmp03 * 2.5f + tmp05 * 0.5 = tmp56b - "vfadd.vv v27, v10, v11\n\t" // tmp34a + tmp34b = r0_tm_3[m] "vfsub.vv v28, v10, v11\n\t" // tmp34a - tmp34b = r0_tm_4[m] "vfadd.vv v29, v6, v12\n\t" // tmp56a + tmp56b = r0_tm_5[m] "vfsub.vv v30, v6, v12\n\t" // tmp56a - tmp56b = r0_tm_6[m] + "vse.v v25, (a1)\n\t" + "vse.v v26, (a2)\n\t" "vse.v v27, (a3)\n\t" "vse.v v28, (a4)\n\t" "vse.v v29, (a5)\n\t" @@ -647,21 +650,20 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, "addi t0, t0, -1\n\t" "bnez t0, 3b" - :"=r"(r0), // %0 "=r"(r0_tm), // %1 "=r"(tmp), // %2 - "=r"(ratio), // %3 + "=r"(ratio_ptr), // %3 "=r"(padded_in_w), // %4 "=r"(tiles) // %5 :"0"(r0), "1"(r0_tm), "2"(tmp), - "3"(ratio), + "3"(ratio_ptr), "4"(padded_in_w), "5"(tiles) :"cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", - "t0", "t1", "t2", "t5", "s0", "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", + "t0", "t1", "t2", "t5", "s1", "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7" ); @@ -697,42 +699,67 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, tm1 += (r * tiles + t) * 8; - //----------------- - for (int q = 0; q < in_c / 8; q++) { - for (int l = 0; l < 8; l++) { - tm2[0] = tm1[l]; - tm2[1] = tm1[l + 8 * 1]; - tm2[2] = tm1[l + 8 * 2]; - tm2[3] = tm1[l + 8 * 3]; - tm2[4] = tm1[l + 8 * 4]; - tm2[5] = tm1[l + 8 * 5]; - tm2[6] = tm1[l + 8 * 6]; - tm2[7] = tm1[l + 8 * 7]; - tm2 += 8; - } - tm1 += 64 * tiles * 8; - } - - //------------------- - - - // asm volatile( - // "vsetvli zero, zero, e16, m1\n\t" - - // "1:\n\t" - - - // :"=r"(tm2), - // "=r"(tm1), - // "=r"(tiles) - - // :"0"(tm2), - // "1"(tm1), - // "2"(tiles) - - // :"cc", "memory", "v0" - - // ); + //---------------------------- + // for (int q = 0; q < in_c / 8; q++) { + // for (int l = 0; l < 8; l++) { + // tm2[0] = tm1[l]; + // tm2[1] = tm1[l + 8 * 1]; + // tm2[2] = tm1[l + 8 * 2]; + // tm2[3] = tm1[l + 8 * 3]; + // tm2[4] = tm1[l + 8 * 4]; + // tm2[5] = tm1[l + 8 * 5]; + // tm2[6] = tm1[l + 8 * 6]; + // tm2[7] = tm1[l + 8 * 7]; + // tm2 += 8; + // } + // tm1 += 64 * tiles * 8; + // } + + //----------------------------- + asm volatile( + "vsetvli zero, zero, e16, m1\n\t" + "slli t1, %2, 10\n\t" // 64 * tiles * 8 * 2 bytes + "srai t2, %3, 3\n\t" // in_ch8 + + "1:\n\t" // in_ch loop8 + + "mv a0, %1\n\t" // updata tm1 addr + + "vle.v v0, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v1, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v2, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v3, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v4, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v5, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v6, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v7, (a0)\n\t" + + "vsseg8e.v v0, (%0)\n\t" + + "add %1, %1, t1\n\t" + "addi %0, %0, 128\n\t" + + "addi t2, t2, -1\n\t" + "bnez t2, 1b\n\t" + + :"=r"(tm2), // %0 + "=r"(tm1), // %1 + "=r"(tiles), // %2 + "=r"(in_c) // %3 + :"0"(tm2), + "1"(tm1), + "2"(tiles), + "3"(in_c) + :"cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "a0", "t1", "t2" + ); } for (; t + 3 < tiles; t += 4) { __fp16 *tm2 = img_tm2 + (t / 8 + (t % 8) / 4) * in_c * 8; // img_tm2 row data @@ -740,30 +767,101 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, tm1 += (r * tiles + t) * 8; - for (int q = 0; q < in_c / 8; q++) { - for (int l = 0; l < 8; l++) { - tm2[0] = tm1[l]; - tm2[1] = tm1[l + 8 * 1]; - tm2[2] = tm1[l + 8 * 2]; - tm2[3] = tm1[l + 8 * 3]; - tm2 += 4; - } - tm1 += 64 * tiles * 8; - } + // for (int q = 0; q < in_c / 8; q++) { + // for (int l = 0; l < 8; l++) { + // tm2[0] = tm1[l]; + // tm2[1] = tm1[l + 8 * 1]; + // tm2[2] = tm1[l + 8 * 2]; + // tm2[3] = tm1[l + 8 * 3]; + // tm2 += 4; + // } + // tm1 += 64 * tiles * 8; + // } + + asm volatile( + "vsetvli zero, zero, e16, m1\n\t" + "slli t1, %2, 10\n\t" // 64 * tiles * 8 * 2 bytes + "srai t2, %3, 3\n\t" // in_ch8 + + "1:\n\t" // in_ch loop8 + + "mv a0, %1\n\t" // updata tm1 addr + + "vle.v v0, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v1, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v2, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v3, (a0)\n\t" + + "vsseg4e.v v0, (%0)\n\t" + + "add %1, %1, t1\n\t" + "addi %0, %0, 64\n\t" + + "addi t2, t2, -1\n\t" + "bnez t2, 1b\n\t" + + :"=r"(tm2), // %0 + "=r"(tm1), // %1 + "=r"(tiles), // %2 + "=r"(in_c) // %3 + :"0"(tm2), + "1"(tm1), + "2"(tiles), + "3"(in_c) + :"cc", "memory", "v0", "v1", "v2", "v3", + "a0", "t1", "t2" + ); + } for (; t + 1 < tiles; t += 2) { __fp16 *tm2 = img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2) * in_c * 8; // img_tm2 row data __fp16 *tm1 = input_tm1_buf; tm1 += (r * tiles + t) * 8; - for (int q = 0; q < in_c / 8; q++) { - for (int l = 0; l < 8; l++) { - tm2[0] = tm1[l]; - tm2[1] = tm1[l + 8]; - tm2 += 2; - } - tm1 += 64 * tiles * 8; - } + // for (int q = 0; q < in_c / 8; q++) { + // for (int l = 0; l < 8; l++) { + // tm2[0] = tm1[l]; + // tm2[1] = tm1[l + 8]; + // tm2 += 2; + // } + // tm1 += 64 * tiles * 8; + // } + + asm volatile( + "vsetvli zero, zero, e16, m1\n\t" + "slli t1, %2, 10\n\t" // 64 * tiles * 8 * 2 bytes + "srai t2, %3, 3\n\t" // in_ch8 + + "1:\n\t" // in_ch loop8 + + "mv a0, %1\n\t" // updata tm1 addr + + "vle.v v0, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vle.v v1, (a0)\n\t" + + "vsseg2e.v v0, (%0)\n\t" + + "add %1, %1, t1\n\t" + "addi %0, %0, 32\n\t" + + "addi t2, t2, -1\n\t" + "bnez t2, 1b\n\t" + + :"=r"(tm2), // %0 + "=r"(tm1), // %1 + "=r"(tiles), // %2 + "=r"(in_c) // %3 + :"0"(tm2), + "1"(tm1), + "2"(tiles), + "3"(in_c) + :"cc", "memory", "v0", "v1", + "a0", "t1", "t2" + ); } for (; t < tiles; t++) { @@ -771,13 +869,45 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, __fp16 *tm1 = input_tm1_buf; tm1 += (r * tiles + t) * 8; - for (int q = 0; q < in_c / 8; q++) { - for (int l = 0; l < 8; l++) { - tm2[0] = tm1[l]; - tm2++; - } - tm1 += 64 * tiles * 8; - } + // for (int q = 0; q < in_c / 8; q++) { + // for (int l = 0; l < 8; l++) { + // tm2[0] = tm1[l]; + // tm2++; + // } + // tm1 += 64 * tiles * 8; + // } + + asm volatile( + "vsetvli zero, zero, e16, m1\n\t" + "slli t1, %2, 10\n\t" // 64 * tiles * 8 * 2 bytes + "srai t2, %3, 3\n\t" // in_ch8 + + "1:\n\t" // in_ch loop8 + + "mv a0, %1\n\t" // updata tm1 addr + + "vle.v v0, (a0)\n\t" + "addi a0, a0, 16\n\t" + + "vse.v v0, (%0)\n\t" + + "add %1, %1, t1\n\t" + "addi %0, %0, 16\n\t" + + "addi t2, t2, -1\n\t" + "bnez t2, 1b\n\t" + + :"=r"(tm2), // %0 + "=r"(tm1), // %1 + "=r"(tiles), // %2 + "=r"(in_c) // %3 + :"0"(tm2), + "1"(tm1), + "2"(tiles), + "3"(in_c) + :"cc", "memory", "v0", + "a0", "t1", "t2" + ); } } @@ -817,6 +947,9 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, "1:\n\t" + "vle.v v8, (%1)\n\t" + "addi %1, %1, 16\n\t" + "flh fa0, (%0)\n\t" "flh fa1, 2(%0)\n\t" "flh fa2, 4(%0)\n\t" @@ -827,9 +960,6 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, "flh fa7, 14(%0)\n\t" "addi %0, %0, 16\n\t" - "vle.v v8, (%1)\n\t" - "addi %1, %1, 16\n\t" - "vfmacc.vf v0, fa0, v8\n\t" "vfmacc.vf v1, fa1, v8\n\t" "vfmacc.vf v2, fa2, v8\n\t" @@ -887,15 +1017,15 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, "1:\n\t" + "vle.v v4, (%1)\n\t" + "addi %1, %1, 16\n\t" + "flh fa0, (%0)\n\t" "flh fa1, 2(%0)\n\t" "flh fa2, 4(%0)\n\t" "flh fa3, 6(%0)\n\t" "addi %0, %0, 8\n\t" - "vle.v v4, (%1)\n\t" - "addi %1, %1, 16\n\t" - "vfmacc.vf v0, fa0, v4\n\t" "vfmacc.vf v1, fa1, v4\n\t" "vfmacc.vf v2, fa2, v4\n\t" @@ -936,13 +1066,13 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, "1:\n\t" + "vle.v v2, (%1)\n\t" + "addi %1, %1, 16\n\t" + "flh fa0, (%0)\n\t" "flh fa1, 2(%0)\n\t" "addi %0, %0, 4\n\t" - "vle.v v2, (%1)\n\t" - "addi %1, %1, 16\n\t" - "vfmacc.vf v0, fa0, v2\n\t" "vfmacc.vf v1, fa1, v2\n\t" @@ -977,12 +1107,12 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, "1:\n\t" - "flh fa0, (%0)\n\t" - "addi %0, %0, 2\n\t" - "vle.v v1, (%1)\n\t" "addi %1, %1, 16\n\t" + "flh fa0, (%0)\n\t" + "addi %0, %0, 2\n\t" + "vfmacc.vf v0, fa0, v1\n\t" "addi t0, t0, -1\n\t" @@ -1053,8 +1183,8 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, __fp16 *output0 = out0 + (i * block_w * 6 * 6 + j * 6) * 8; // 输出 6*6 的起始地址 - // __fp16 ratio[8] = {2.0, 4.0, 8.0, 16.0, 32.0}; - __fp16 ratio[8] = {2.0, 4.0, 8.0, 16.0, 32.0}; + __fp16 ratio[] = {2.0, 4.0, 8.0, 16.0, 32.0}; + __fp16 *ratio_ptr = ratio; asm volatile( "vsetvli zero, zero, e16, m1\n\t" @@ -1069,7 +1199,7 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, "flh fa3, 6(%3)\n\t" // fa3 = 16 "flh fa4, 8(%3)\n\t" // fa4 = 32 - "mv s0, %0\n\t" + "mv s1, %0\n\t" "1:\n\t" // shape : [6 * 8] * [8 * 8] = [6 * 8] @@ -1080,64 +1210,67 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, "addi a4, a3, 128\n\t" // tmp[4][m] "addi a5, a4, 128\n\t" // tmp[5][m] - "vle.v v0, (s0)\n\t" // r00 - "add s0, s0, t1\n\t" - "vle.v v1, (s0)\n\t" // r01 - "add s0, s0, t1\n\t" - "vle.v v2, (s0)\n\t" // r02 - "add s0, s0, t1\n\t" - "vle.v v3, (s0)\n\t" // r03 - "add s0, s0, t1\n\t" - "vle.v v4, (s0)\n\t" // r04 - "add s0, s0, t1\n\t" - "vle.v v5, (s0)\n\t" // r05 - "add s0, s0, t1\n\t" - "vle.v v6, (s0)\n\t" // r06 - "add s0, s0, t1\n\t" - "vle.v v7, (s0)\n\t" // r07 - "add s0, s0, t1\n\t" + "vle.v v0, (s1)\n\t" // r00 + "add s1, s1, t1\n\t" + "vle.v v1, (s1)\n\t" // r01 + "add s1, s1, t1\n\t" + "vle.v v2, (s1)\n\t" // r02 + "add s1, s1, t1\n\t" + "vle.v v3, (s1)\n\t" // r03 + "add s1, s1, t1\n\t" + "vle.v v4, (s1)\n\t" // r04 + "add s1, s1, t1\n\t" + "vle.v v5, (s1)\n\t" // r05 + "add s1, s1, t1\n\t" + "vle.v v6, (s1)\n\t" // r06 + "add s1, s1, t1\n\t" + "vle.v v7, (s1)\n\t" // r07 + "add s1, s1, t1\n\t" //--------------------------------------------- "vfadd.vv v8, v1, v2\n\t" // r01 + r02 = tmp024a "vfsub.vv v9, v1, v2\n\t" // r01 - r02 = tmp135a - "vmv.v.v v26, v8\n\t" // v26 = tmp024a - "vmv.v.v v28, v8\n\t" // v28 = tmp024a "vfadd.vv v10, v3, v4\n\t" // r03 + r04 = tmp024b "vfsub.vv v11, v3, v4\n\t" // r03 - r04 = tmp135b - "vmv.v.v v14, v10\n\t" // v14 = tmp024b "vfadd.vv v12, v5, v6\n\t" // r05 + r06 = tmp024c "vfsub.vv v13, v5, v6\n\t" // r05 - r06 = tmp135c - //--------------------------------------------- "vfadd.vv v0, v0, v8\n\t" // r00 + tmp024a + "vfadd.vv v7, v7, v9\n\t" // r07 + tmp135a + "vmv.v.v v14, v10\n\t" // v14 = tmp024b + + "vmv.v.v v26, v8\n\t" // v26 = tmp024a + "vmv.v.v v28, v8\n\t" // v28 = tmp024a + + "vfmacc.vf v26, fa1, v10\n\t" // tmp024a + tmp024b * 4 "vfmacc.vf v14, fa4, v12\n\t" // tmp024b + tmp024c * 32 + "vfmacc.vf v28, fa3, v10\n\t" // tmp024a + tmp024b * 16 + + "vmv.v.v v15, v13\n\t" // v15 = tmp135c + "vmv.v.v v25, v9\n\t" // v25 = tmp135a + "vmv.v.v v27, v9\n\t" // v27 = tmp135a "vfadd.vv v24, v0, v14\n\t" // r00 + tmp024a + tmp024b + tmp024c * 32 = tmp[0][m] - "vfmacc.vf v26, fa1, v10\n\t" // tmp024a + tmp024b * 4 - "vfmacc.vf v26, fa2, v12\n\t" // tmp024a + tmp024b * 4 + tmp024c * 8 = tmp[2][m] + "vfmacc.vf v25, fa0, v11\n\t" // tmp135a + tmp135b * 2 + "vfmacc.vf v27, fa2, v11\n\t" // tmp135a + tmp135b * 8 - "vfmacc.vf v28, fa3, v10\n\t" // tmp024a + tmp024b * 16 + //--------------------------------------------- + "vse.v v24, (a0)\n\t" + + "vfmacc.vf v26, fa2, v12\n\t" // tmp024a + tmp024b * 4 + tmp024c * 8 = tmp[2][m] "vfmacc.vf v28, fa0, v12\n\t" // tmp024a + tmp024b * 16 + tmp024c + tmp024c = tmp[4][m] + "vfmacc.vf v15, fa4, v11\n\t" // tmp135b * 32 + tmp135c - "vse.v v24, (a0)\n\t" "vse.v v26, (a2)\n\t" "vse.v v28, (a4)\n\t" //--------------------------------------------- - "vmv.v.v v15, v13\n\t" // v15 = tmp135c - "vmv.v.v v25, v9\n\t" // v25 = tmp135a - "vmv.v.v v27, v9\n\t" // v27 = tmp135a - "vfmacc.vf v25, fa0, v11\n\t" // tmp135a + tmp135b * 2 "vfmacc.vf v25, fa3, v13\n\t" // tmp135a + tmp135b * 2 + tmp135c * 16 = tmp[1][m] - - "vfmacc.vf v27, fa2, v11\n\t" // tmp135a + tmp135b * 8 "vfmacc.vf v27, fa1, v13\n\t" // tmp135a + tmp135b * 8 + tmp135c * 4 = tmp[3][m] - "vfadd.vv v7, v7, v9\n\t" // r07 + tmp135a - "vfmacc.vf v15, fa4, v11\n\t" // tmp135b * 32 + tmp135c "vfadd.vv v29, v7, v15\n\t" // r07 + tmp135a + tmp135b * 32 + tmp135c "vse.v v25, (a1)\n\t" @@ -1185,50 +1318,56 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, //--------------------------------------------- "vfadd.vv v8, v1, v2\n\t" // tmp[m][1] + tmp[m][2] = tmp024a "vfsub.vv v9, v1, v2\n\t" // tmp[m][1] - tmp[m][2] = tmp135a - "vmv.v.v v26, v8\n\t" // v26 = tmp024a - "vmv.v.v v28, v8\n\t" // v28 = tmp024a "vfadd.vv v10, v3, v4\n\t" // tmp[m][3] + tmp[m][4] = tmp024b "vfsub.vv v11, v3, v4\n\t" // tmp[m][3] - tmp[m][4] = tmp135b - "vmv.v.v v14, v10\n\t" // v14 = tmp024b "vfadd.vv v12, v5, v6\n\t" // tmp[m][5] + tmp[m][6] = tmp024c "vfsub.vv v13, v5, v6\n\t" // tmp[m][5] - tmp[m][6] = tmp135c - //--------------------------------------------- "vfadd.vv v0, v0, v8\n\t" // tmp[m][0] + tmp024a + "vfadd.vv v7, v7, v9\n\t" // tmp[m][7] + tmp135a + "vmv.v.v v14, v10\n\t" // v14 = tmp024b + + "vmv.v.v v26, v8\n\t" // v26 = tmp024a + "vmv.v.v v28, v8\n\t" // v28 = tmp024a + + "vfmacc.vf v26, fa1, v10\n\t" // tmp024a + tmp024b * 4 "vfmacc.vf v14, fa4, v12\n\t" // tmp024b + tmp024c * 32 + "vfmacc.vf v28, fa3, v10\n\t" // tmp024a + tmp024b * 16 + + "vmv.v.v v15, v13\n\t" // v15 = tmp135c + "vmv.v.v v25, v9\n\t" // v25 = tmp135a + "vmv.v.v v27, v9\n\t" // v27 = tmp135a "vfadd.vv v24, v0, v14\n\t" // tmp[m][0] + tmp024a + tmp024b + tmp024c * 32 = tmp[0][m] + + "vfmacc.vf v25, fa0, v11\n\t" // tmp135a + tmp135b * 2 + "vfmacc.vf v27, fa2, v11\n\t" // tmp135a + tmp135b * 8 + + //--------------------------------------------- "vfadd.vv v24, v24, v16\n\t" // + bias - "vfmacc.vf v26, fa1, v10\n\t" // tmp024a + tmp024b * 4 "vfmacc.vf v26, fa2, v12\n\t" // tmp024a + tmp024b * 4 + tmp024c * 8 = tmp[2][m] - "vfadd.vv v26, v26, v16\n\t" // + bias - - "vfmacc.vf v28, fa3, v10\n\t" // tmp024a + tmp024b * 16 "vfmacc.vf v28, fa0, v12\n\t" // tmp024a + tmp024b * 16 + tmp024c + tmp024c = tmp[4][m] - "vfadd.vv v28, v28, v16\n\t" // + bias + "vfmacc.vf v15, fa4, v11\n\t" // tmp135b * 32 + tmp135c "vse.v v24, (a0)\n\t" + + "vfmacc.vf v25, fa3, v13\n\t" // tmp135a + tmp135b * 2 + tmp135c * 16 = tmp[1][m] + "vfmacc.vf v27, fa1, v13\n\t" // tmp135a + tmp135b * 8 + tmp135c * 4 = tmp[3][m] + + "vfadd.vv v26, v26, v16\n\t" // + bias + "vfadd.vv v28, v28, v16\n\t" // + bias + + "vfadd.vv v29, v7, v15\n\t" // tmp[m][7] + tmp135a + tmp135b * 32 + tmp135c + "vse.v v26, (a2)\n\t" "vse.v v28, (a4)\n\t" //--------------------------------------------- - "vmv.v.v v15, v13\n\t" // v15 = tmp135c - "vmv.v.v v25, v9\n\t" // v25 = tmp135a - "vmv.v.v v27, v9\n\t" // v27 = tmp135a - "vfmacc.vf v25, fa0, v11\n\t" // tmp135a + tmp135b * 2 - "vfmacc.vf v25, fa3, v13\n\t" // tmp135a + tmp135b * 2 + tmp135c * 16 = tmp[1][m] "vfadd.vv v25, v25, v16\n\t" // + bias - - "vfmacc.vf v27, fa2, v11\n\t" // tmp135a + tmp135b * 8 - "vfmacc.vf v27, fa1, v13\n\t" // tmp135a + tmp135b * 8 + tmp135c * 4 = tmp[3][m] "vfadd.vv v27, v27, v16\n\t" // + bias - - "vfadd.vv v7, v7, v9\n\t" // tmp[m][7] + tmp135a - "vfmacc.vf v15, fa4, v11\n\t" // tmp135b * 32 + tmp135c - "vfadd.vv v29, v7, v15\n\t" // tmp[m][7] + tmp135a + tmp135b * 32 + tmp135c "vfadd.vv v29, v29, v16\n\t" // + bias "vse.v v25, (a1)\n\t" @@ -1243,20 +1382,20 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, :"=r"(output0_tm_0), // %0 "=r"(output0), // %1 "=r"(tmp1), // %2 - "=r"(ratio), // %3 + "=r"(ratio_ptr), // %3 "=r"(tiles), // %4 "=r"(out_w6), // %5 "=r"(bias_tmp) // %6 :"0"(output0_tm_0), "1"(output0), "2"(tmp1), - "3"(ratio), + "3"(ratio_ptr), "4"(tiles), "5"(out_w6), "6"(bias_tmp) :"cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v24", "v25", "v26", "v27", "v28", "v29", - "t0", "t1", "t2", "t5", "s0", "a0", "a1", "a2", "a3", "a4", "a5", + "t0", "t1", "t2", "t5", "s1", "a0", "a1", "a2", "a3", "a4", "a5", "fa0", "fa1", "fa2", "fa3", "fa4" ); } @@ -1270,29 +1409,26 @@ int csi_c906_conv3x3s1_winograd64_pack8_fp16(struct csi_tensor *input, output_data += output_size; csi_mem_free(output_tm1_buf); } - if (!flag_bias) { csi_mem_free(bias_data); bias_data = NULL; } - // end_time = csi_get_timespec(); - // printf("Run graph execution time: %.5fms, FPS=%.2f\n", ((float)(end_time-start_time))/1000000, - // 1000000000.0/((float)(end_time-start_time))); + return CSINN_TRUE; } -void csi_c906_conv3x3s1_winograd43_transform_kernel_fp16(struct csi_tensor *o_kernel, - struct csi_tensor *t_kernel) +void csi_c906_conv3x3s1_winograd43_transform_kernel_pack8_fp16(struct csi_tensor *o_kernel, + struct csi_tensor *t_kernel) { int32_t outch = o_kernel->dim[0]; int32_t inch = o_kernel->dim[1]; - float *kernel_data = (float *)o_kernel->data; + __fp16 *kernel_data = (__fp16 *)o_kernel->data; // for kernel transform buf, 3x3 --> 6x6 - float *kernel_tm = (float *)csi_mem_alloc(outch * inch * 6 * 6 * sizeof(float)); + __fp16 *kernel_tm = (__fp16 *)csi_mem_alloc(outch * inch * 6 * 6 * sizeof(__fp16)); // kernel transform matrix: G - const float ktm[6][3] = { + const __fp16 ktm[6][3] = { { 1.0f/4, 0.0f, 0.0f}, { -1.0f/6, -1.0f/6, -1.0f/6}, { -1.0f/6, 1.0f/6, -1.0f/6}, @@ -1302,21 +1438,20 @@ void csi_c906_conv3x3s1_winograd43_transform_kernel_fp16(struct csi_tensor *o_ke }; csi_tensor_copy(t_kernel, o_kernel); - t_kernel->data = kernel_tm; for (int p = 0; p < outch; p++) { for (int q = 0; q < inch; q++) { - const float* kernel0 = kernel_data + p * inch * 9 + q * 9; - float* kernel_tm0 = kernel_tm + p * inch * 36 + q * 36; + const __fp16* kernel0 = kernel_data + p * inch * 9 + q * 9; + __fp16* kernel_tm0 = kernel_tm + p * inch * 36 + q * 36; // transform kernel - const float *k0 = kernel0; - const float *k1 = kernel0 + 3; - const float *k2 = kernel0 + 6; + const __fp16 *k0 = kernel0; + const __fp16 *k1 = kernel0 + 3; + const __fp16 *k2 = kernel0 + 6; // h : first compute the transport matrix tmp = (g * GT)T - float tmp[6][3]; + __fp16 tmp[6][3]; for (int i = 0; i < 6; i++) { tmp[i][0] = k0[0] * ktm[i][0] + k0[1] * ktm[i][1] + k0[2] * ktm[i][2]; @@ -1326,40 +1461,90 @@ void csi_c906_conv3x3s1_winograd43_transform_kernel_fp16(struct csi_tensor *o_ke // U for (int j = 0; j < 6; j++) { - float* tmpp = &tmp[j][0]; + __fp16* tmpp = &tmp[j][0]; for (int i = 0; i < 6; i++) { - kernel_tm0[i * 6 + j] = tmpp[0] * ktm[i][0] + tmpp[1] * ktm[i][1] + tmpp[2] * ktm[i][2]; + kernel_tm0[j * 6 + i] = tmpp[0] * ktm[i][0] + tmpp[1] * ktm[i][1] + tmpp[2] * ktm[i][2]; } } } } -} + // [O, I, 6, 6] --> [O/4, 6*6, I, 4] + __fp16 *kernel_tm_pack4 = (__fp16 *)csi_mem_alloc(outch * inch * 6 * 6 * sizeof(__fp16)); + t_kernel->data = kernel_tm_pack4; -int csi_c906_conv3x3s1_winograd43_fp16(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params) -{ - float *input_data = (float *)input->data; - float *output_data = (float *)output->data; - float *kernel_data = (float *)kernel->data; - float *bias_data = (float *)bias->data; + for (int oc = 0; oc < outch / 8; oc++) { - // param - int kernel_h = kernel->dim[2]; - int kernel_w = kernel->dim[3]; - int stride_h = params->stride_height; - int stride_w = params->stride_width; - int dilation_h = params->dilation_height; - int dilation_w = params->dilation_width; - int pad_left = params->pad_left; - int pad_top = params->pad_top; + __fp16 *g0 = kernel_tm_pack4 + oc * 36 * inch * 8; - int batch = input->dim[0]; - int in_c = input->dim[1]; + const __fp16 *k0 = kernel_tm + oc * 36 * inch * 8; + const __fp16 *k1 = k0 + 36 * inch; + const __fp16 *k2 = k1 + 36 * inch; + const __fp16 *k3 = k2 + 36 * inch; + const __fp16 *k4 = k3 + 36 * inch; + const __fp16 *k5 = k4 + 36 * inch; + const __fp16 *k6 = k5 + 36 * inch; + const __fp16 *k7 = k6 + 36 * inch; + + for (int k = 0; k < 36; k++) { + + __fp16 *g00 = g0 + k * inch * 8; + + for (int ic = 0; ic < inch / 8; ic++) { + + for (int i = 0; i < 8; i++) { + + const __fp16 *k00 = k0 + (ic * 8 + i) * 36; + const __fp16 *k10 = k1 + (ic * 8 + i) * 36; + const __fp16 *k20 = k2 + (ic * 8 + i) * 36; + const __fp16 *k30 = k3 + (ic * 8 + i) * 36; + const __fp16 *k40 = k4 + (ic * 8 + i) * 36; + const __fp16 *k50 = k5 + (ic * 8 + i) * 36; + const __fp16 *k60 = k6 + (ic * 8 + i) * 36; + const __fp16 *k70 = k7 + (ic * 8 + i) * 36; + + g00[0] = k00[k]; + g00[1] = k10[k]; + g00[2] = k20[k]; + g00[3] = k30[k]; + g00[4] = k40[k]; + g00[5] = k50[k]; + g00[6] = k60[k]; + g00[7] = k70[k]; + + g00 += 8; + } + } + } + } + + csi_mem_free(kernel_tm); +} + +int csi_c906_conv3x3s1_winograd43_pack8_fp16(struct csi_tensor *input, + struct csi_tensor *output, + struct csi_tensor *kernel, + struct csi_tensor *bias, + struct conv2d_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + __fp16 *kernel_data = (__fp16 *)params->conv_extra.kernel_tm->data; + __fp16 *bias_data = (__fp16 *)bias->data; + + // param + int kernel_h = kernel->dim[2]; + int kernel_w = kernel->dim[3]; + int stride_h = params->stride_height; + int stride_w = params->stride_width; + int dilation_h = params->dilation_height; + int dilation_w = params->dilation_width; + int pad_left = params->pad_left; + int pad_top = params->pad_top; + + int batch = input->dim[0]; + int in_c = input->dim[1]; int in_h = input->dim[2]; int in_w = input->dim[3]; int input_size = in_c * in_h * in_w; @@ -1374,22 +1559,31 @@ int csi_c906_conv3x3s1_winograd43_fp16(struct csi_tensor *input, int block_h = (out_h + 3) / 4; int block_w = (out_w + 3) / 4; - int padded_in_h = block_h * 4 + 2; // block * 4 for alignment with 4,kernel = 3 * 3 ,stride = 1,thus input_size + 2 + int padded_in_h = block_h * 4 + 2; // block * 4 for alignment with 4,kernel = 3 * 3, stride = 1,thus input_size + 2 int padded_in_w = block_w * 4 + 2; int padded_in_hw = padded_in_h * padded_in_w; // element size after padding per channel - // buffer addr - float *input_padd_buf = (float *)csi_mem_alloc(in_c * padded_in_hw * sizeof(float)); - float *input_trans_buf = (float *)csi_mem_alloc(in_c * block_h * block_w * 6 * 6 * sizeof(float)); - float *output_trans_buf = (float *)csi_mem_alloc(out_c * block_h * block_w * 4 * 4 * sizeof(float)); + /****************************** bias *****************************/ + bool flag_bias = 1; // default: conv2d layer include bias + if (bias_data == NULL) { + flag_bias = 0; + bias_data = (__fp16 *)csi_mem_alloc(out_c * sizeof(__fp16)); + } + for(int n = 0; n < batch; n++) { + // pad buffer: [in_c/4 h w 4] + __fp16 *input_padd_buf = (__fp16 *)csi_mem_alloc(in_c * padded_in_hw * sizeof(__fp16)); + // pad input - csi_c906_pad_input(input_data, input_padd_buf, in_c, in_h, in_w, padded_in_h, padded_in_w, pad_top, pad_left); + csi_c906_pad_input_pack1to8_fp16(input_data, input_padd_buf, in_c, in_h, in_w, padded_in_h, padded_in_w, pad_top, pad_left); input_data += input_size; - // transform input + // input transform buffer1: [in_ch/4, 36, blocks, 6] + __fp16 *input_tm1_buf = (__fp16 *)csi_mem_alloc(in_c * block_h * block_w * 6 * 6 * sizeof(__fp16)); + + /****************************** transform input *****************************/ /* BT = { { 4 0 -5 0 1 0 }; @@ -1400,193 +1594,706 @@ int csi_c906_conv3x3s1_winograd43_fp16(struct csi_tensor *input, { 0 4 0 -5 0 1 } }; */ - int in_h_tm = block_h * 6; // input height after transform - int in_w_tm = block_w * 6; - const int tiles = block_h * block_w; + int tiles = block_h * block_w; - for(int q = 0; q < in_c; q++) { + #pragma omp parallel for num_threads(1) + for(int q = 0; q < in_c / 4; q++) { - const float *img0 = input_padd_buf + q * padded_in_h * padded_in_w; - float *img0_tm = input_trans_buf + q * block_h * block_w * 6 * 6; + __fp16 *img0 = input_padd_buf + q * padded_in_h * padded_in_w * 8; // feature map after padding - q channel + __fp16 *img0_tm = input_tm1_buf + q * 36 * tiles * 8; // transform and interleave - q channel - float tmp[6][6]; + __fp16 *tmp = (__fp16 *)csi_mem_alloc(6 * 6 * 8 * sizeof(__fp16)); for(int i = 0; i < block_h; i++) { for(int j = 0; j < block_w; j++) { - const float *r0 = img0 + i * padded_in_w * 4 + j * 4; + __fp16 *r0 = img0 + (i * padded_in_w * 4 + j * 4) * 8; // feature map after padding 6*6 start addr + __fp16 *r0_tm = img0_tm + (i * block_w + j) * 8; // input_tm1 6*6 block start addr - for(int m = 0; m < 6; m++) { - tmp[0][m] = 4 * r0[0] - 5 * r0[2] + r0[4]; - tmp[1][m] = r0[3] + r0[4] - 4 * r0[1] - 4 * r0[2]; - tmp[2][m] = 4 * r0[1] + r0[4] - 4 * r0[2] - r0[3]; - tmp[3][m] = 2 * r0[3] + r0[4] - 2 * r0[1] - r0[2]; - tmp[4][m] = 2 * r0[1] + r0[4] - 2 * r0[3] - r0[2]; - tmp[5][m] = 4 * r0[1] - 5 * r0[3] + r0[5]; - r0 += padded_in_w; - } + __fp16 ratio[] = {4, -4, 2, -2, -5}; // note: in fact cannot be output constrain + __fp16 *ratio_ptr = ratio; + + asm volatile( + "vsetvli zero, zero, e16, m1\n\t" + "li t0, 6\n\t" // m = 6 + "mv t5, %2\n\t" // t5 = tmp start addr + "slli t1, %4, 4\n\t" // t1 = padded_in_w * 8 * 2 bytes + + "flh fa0, 0(%3)\n\t" // fa0 = 4 + "flh fa1, 2(%3)\n\t" // fa1 = -4 + "flh fa2, 4(%3)\n\t" // fa2 = 2 + "flh fa3, 6(%3)\n\t" // fa3 = -2 + "flh fa4, 8(%3)\n\t" // fa4 = -5 + + "1:\n\t" + "mv s1, %0\n\t" // s1 = r00 addr + + "mv a0, t5\n\t" // tmp[0][m] + "addi a1, a0, 96\n\t" // tmp[1][m] + "addi a2, a1, 96\n\t" // tmp[2][m] + "addi a3, a2, 96\n\t" // tmp[3][m] + "addi a4, a3, 96\n\t" // tmp[4][m] + "addi a5, a4, 96\n\t" // tmp[5][m] + + "vle.v v0, (s1)\n\t" // r00 + "addi s1, s1, 16\n\t" + "vle.v v1, (s1)\n\t" // r01 + "addi s1, s1, 16\n\t" + "vle.v v2, (s1)\n\t" // r02 + "addi s1, s1, 16\n\t" + "vle.v v3, (s1)\n\t" // r03 + "addi s1, s1, 16\n\t" + "vle.v v4, (s1)\n\t" // r04 + "addi s1, s1, 16\n\t" + "vle.v v5, (s1)\n\t" // r05 + "addi s1, s1, 16\n\t" + + "vmv.v.v v24, v4\n\t" + "vmv.v.v v29, v5\n\t" + //--------------------------------------------- + "vfmacc.vf v24, fa0, v0\n\t" // r04 + 4 * r00 + "vfmacc.vf v24, fa4, v2\n\t" // r04 + 4 * r00 - 5 * r02 + + "vse.v v24, (a0)\n\t" + //--------------------------------------------- + "vfadd.vv v25, v3, v4\n\t" // r03 + r04 + "vfadd.vv v6, v1, v2\n\t" // r01 + r02 + "vfmacc.vf v25, fa1, v6\n\t" // r03 + r04 - 4 * (r01 - r02) + + "vse.v v25, (a1)\n\t" + //--------------------------------------------- + "vfsub.vv v26, v4, v3\n\t" // r04 - r03 + "vfsub.vv v7, v1, v2\n\t" // r01 - r02 + "vfmacc.vf v26, fa0, v7\n\t" // r04 - r03 + 4 * (r01 - r02) + + "vse.v v26, (a2)\n\t" + //--------------------------------------------- + "vfsub.vv v8, v1, v3\n\t" // r01 - r03 + "vfsub.vv v27, v4, v2\n\t" // r04 - r02 + "vfsub.vv v28, v4, v2\n\t" // r04 - r02 + + "vfmacc.vf v27, fa3, v8\n\t" // r04 - r02 - 2 * (r01 - r03) + "vse.v v27, (a3)\n\t" + + "vfmacc.vf v28, fa2, v8\n\t" // r04 - r02 + 2 * (r01 - r03) + "vse.v v28, (a4)\n\t" + //--------------------------------------------- + "vfmacc.vf v29, fa0, v1\n\t" // r05 + 4 * r01 + "vfmacc.vf v29, fa4, v3\n\t" // r05 + 4 * r01 - 5 * r03 + + "vse.v v29, (a5)\n\t" + //--------------------------------------------- + + "add %0, %0, t1\n\t" // padding feature map 6*6 next line addr + "addi t5, t5, 16\n\t" // tmp[0][0] --> tmp[0][1] + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "2:\n\t" + + "mv t5, %2\n\t" // tmp start addr + "li t0, 6\n\t" // m = 6 + + "slli t1, %5, 4\n\t" // t1 = tiles * 8 * 2 bytes + "mulw t2, t0, t1\n\t" // t2 = tiles * 6 blocks * 8 channels * 2 bytes + + "3:\n\t" + + "mv a0, %1\n\t" // r0_tm_0 + "add a1, a0, t1\n\t" // r0_tm_1 + "add a2, a1, t1\n\t" // r0_tm_2 + "add a3, a2, t1\n\t" // r0_tm_3 + "add a4, a3, t1\n\t" // r0_tm_4 + "add a5, a4, t1\n\t" // r0_tm_5 + + "vle.v v0, (t5)\n\t" // tmp[m][0] + "addi t5, t5, 16\n\t" + "vle.v v1, (t5)\n\t" // tmp[m][1] + "addi t5, t5, 16\n\t" + "vle.v v2, (t5)\n\t" // tmp[m][2] + "addi t5, t5, 16\n\t" + "vle.v v3, (t5)\n\t" // tmp[m][3] + "addi t5, t5, 16\n\t" + "vle.v v4, (t5)\n\t" // tmp[m][4] + "addi t5, t5, 16\n\t" + "vle.v v5, (t5)\n\t" // tmp[m][5] + "addi t5, t5, 16\n\t" + + "vmv.v.v v24, v4\n\t" + "vmv.v.v v29, v5\n\t" + //--------------------------------------------- + "vfmacc.vf v24, fa0, v0\n\t" // r04 + 4 * r00 + "vfmacc.vf v24, fa4, v2\n\t" // r04 * 4 * r00 - 5 * r02 + + "vse.v v24, (a0)\n\t" + //--------------------------------------------- + "vfadd.vv v25, v3, v4\n\t" // r03 + r04 + "vfadd.vv v6, v1, v2\n\t" // r01 + r02 + "vfmacc.vf v25, fa1, v6\n\t" // r03 + r04 - 4 * (r01 - r02) + + "vse.v v25, (a1)\n\t" + //--------------------------------------------- + "vfsub.vv v26, v4, v3\n\t" // r04 - r03 + "vfsub.vv v7, v1, v2\n\t" // r01 - r02 + "vfmacc.vf v26, fa0, v7\n\t" // r04 - r03 + 4 * (r01 - r02) + + "vse.v v26, (a2)\n\t" + //--------------------------------------------- + "vfsub.vv v8, v1, v3\n\t" // r01 - r03 + "vfsub.vv v27, v4, v2\n\t" // r04 - r02 + "vfsub.vv v28, v4, v2\n\t" // r04 - r02 + + "vfmacc.vf v27, fa3, v8\n\t" // r04 - r02 - 2 * (r01 - r03) + "vse.v v27, (a3)\n\t" + + "vfmacc.vf v28, fa2, v8\n\t" // r04 - r02 + 2 * (r01 - r03) + "vse.v v28, (a4)\n\t" + //--------------------------------------------- + "vfmacc.vf v29, fa0, v1\n\t" // r05 + 4 * r01 + "vfmacc.vf v29, fa4, v3\n\t" // r05 + 4 * r01 - 5 * r03 + + "vse.v v29, (a5)\n\t" + //--------------------------------------------- + + "add %1, %1, t2\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 3b" + + + :"=r"(r0), // %0 + "=r"(r0_tm), // %1 + "=r"(tmp), // %2 + "=r"(ratio_ptr), // %3 + "=r"(padded_in_w), // %4 + "=r"(tiles) // %5 + :"0"(r0), + "1"(r0_tm), + "2"(tmp), + "3"(ratio_ptr), + "4"(padded_in_w), + "5"(tiles) + :"cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v24", "v25", "v26", "v27", "v28", "v29", + "t0", "t1", "t2", "t5", "s1", "a0", "a1", "a2", "a3", "a4", "a5", + "fa0", "fa1", "fa2", "fa3", "fa4", "fa5" + ); - float *r0_tm_0 = img0_tm + i * in_w_tm * 6 + j * 6; - float *r0_tm_1 = r0_tm_0 + in_w_tm; - float *r0_tm_2 = r0_tm_1 + in_w_tm; - float *r0_tm_3 = r0_tm_2 + in_w_tm; - float *r0_tm_4 = r0_tm_3 + in_w_tm; - float *r0_tm_5 = r0_tm_4 + in_w_tm; - - for(int m = 0; m < 6; m++) { - - const float *tmp0 = tmp[m]; - r0_tm_0[m] = 4 * tmp0[0] - 5 * tmp0[2] + tmp0[4]; - r0_tm_1[m] = tmp0[3] + tmp0[4] - 4 * tmp0[1] - 4 * tmp0[2]; - r0_tm_2[m] = 4 * tmp0[1] + tmp0[4] - 4 * tmp0[2] - tmp0[3]; - r0_tm_3[m] = 2 * tmp0[3] + tmp0[4] - 2 * tmp0[1] - tmp0[2]; - r0_tm_4[m] = 2 * tmp0[1] + tmp0[4] - 2 * tmp0[3] - tmp0[2]; - r0_tm_5[m] = 4 * tmp0[1] - 5 * tmp0[3] + tmp0[5]; - } } } + csi_mem_free(tmp); + } + csi_mem_free(input_padd_buf); + + /*********************************** dot ***************************************/ + // reorder input_tm1_buf + int size_input_tm2 = 0; + if (tiles >= 8) { + size_input_tm2 = 36 * (tiles / 8 + (tiles % 8) / 4 + (tiles % 4) / 2 + tiles % 2) * in_c * 8; + } else if (tiles >= 4) { + size_input_tm2 = 36 * (tiles / 4 + (tiles % 4) / 2 + tiles % 2) * in_c * 4; + } else if (tiles >= 2) { + size_input_tm2 = 36 * (tiles / 2 + tiles % 2) * in_c * 2; + } else { + size_input_tm2 = 36 * tiles * in_c; } + __fp16 *input_tm2_buf = (__fp16 *)csi_mem_alloc(size_input_tm2 * sizeof(__fp16)); + + #pragma omp parallel for num_threads(1) + for (int r = 0; r < 36; r++) { + + __fp16 *img_tm2 = input_tm2_buf + r * size_input_tm2 / 36; // input_tm2 r channel data + + int t = 0; + for (; t + 7 < tiles; t += 8) { + __fp16 *tm2 = img_tm2 + t * in_c; // img_tm2 row data + __fp16 *tm1 = input_tm1_buf; + + tm1 += (r * tiles + t) * 8; + + //----------------- + for (int q = 0; q < in_c / 8; q++) { + for (int l = 0; l < 8; l++) { + tm2[0] = tm1[l]; + tm2[1] = tm1[l + 8 * 1]; + tm2[2] = tm1[l + 8 * 2]; + tm2[3] = tm1[l + 8 * 3]; + tm2[4] = tm1[l + 8 * 4]; + tm2[5] = tm1[l + 8 * 5]; + tm2[6] = tm1[l + 8 * 6]; + tm2[7] = tm1[l + 8 * 7]; + tm2 += 8; + } + tm1 += 36 * tiles * 8; + } + } + for (; t + 3 < tiles; t += 4) { + __fp16 *tm2 = img_tm2 + (t / 8 + (t % 8) / 4) * in_c * 8; // img_tm2 row data + __fp16 *tm1 = input_tm1_buf; - // dot - float *output_dot_buf = (float *)csi_mem_alloc(out_c * block_h * block_w * 6 * 6 * sizeof(float)); - - for(int i = 0; i < out_c; i++) { - for(int j = 0; j < block_h; j++) { - for(int k = 0; k < block_w; k++) { - float *input_0 = input_trans_buf + j * 6 * 6 * block_w + k * 6; - float *input_1 = input_0 + block_w * 6; - float *input_2 = input_1 + block_w * 6; - float *input_3 = input_2 + block_w * 6; - float *input_4 = input_3 + block_w * 6; - float *input_5 = input_4 + block_w * 6; - - float *kernel_0 = kernel_data + i * in_c * 36; - float *kernel_1 = kernel_0 + 6; - float *kernel_2 = kernel_1 + 6; - float *kernel_3 = kernel_2 + 6; - float *kernel_4 = kernel_3 + 6; - float *kernel_5 = kernel_4 + 6; - - float *output_0 = output_dot_buf + i * block_h * block_w * 36 + j * 36 * block_w + k * 6; - float *output_1 = output_0 + block_w * 6; - float *output_2 = output_1 + block_w * 6; - float *output_3 = output_2 + block_w * 6; - float *output_4 = output_3 + block_w * 6; - float *output_5 = output_4 + block_w * 6; - - for(int a = 0; a < in_c; a++) { - output_0[0] += input_0[0] * kernel_0[0]; - output_0[1] += input_0[1] * kernel_0[1]; - output_0[2] += input_0[2] * kernel_0[2]; - output_0[3] += input_0[3] * kernel_0[3]; - output_0[4] += input_0[4] * kernel_0[4]; - output_0[5] += input_0[5] * kernel_0[5]; - - output_1[0] += input_1[0] * kernel_1[0]; - output_1[1] += input_1[1] * kernel_1[1]; - output_1[2] += input_1[2] * kernel_1[2]; - output_1[3] += input_1[3] * kernel_1[3]; - output_1[4] += input_1[4] * kernel_1[4]; - output_1[5] += input_1[5] * kernel_1[5]; - - output_2[0] += input_2[0] * kernel_2[0]; - output_2[1] += input_2[1] * kernel_2[1]; - output_2[2] += input_2[2] * kernel_2[2]; - output_2[3] += input_2[3] * kernel_2[3]; - output_2[4] += input_2[4] * kernel_2[4]; - output_2[5] += input_2[5] * kernel_2[5]; - - output_3[0] += input_3[0] * kernel_3[0]; - output_3[1] += input_3[1] * kernel_3[1]; - output_3[2] += input_3[2] * kernel_3[2]; - output_3[3] += input_3[3] * kernel_3[3]; - output_3[4] += input_3[4] * kernel_3[4]; - output_3[5] += input_3[5] * kernel_3[5]; - - output_4[0] += input_4[0] * kernel_4[0]; - output_4[1] += input_4[1] * kernel_4[1]; - output_4[2] += input_4[2] * kernel_4[2]; - output_4[3] += input_4[3] * kernel_4[3]; - output_4[4] += input_4[4] * kernel_4[4]; - output_4[5] += input_4[5] * kernel_4[5]; - - output_5[0] += input_5[0] * kernel_5[0]; - output_5[1] += input_5[1] * kernel_5[1]; - output_5[2] += input_5[2] * kernel_5[2]; - output_5[3] += input_5[3] * kernel_5[3]; - output_5[4] += input_5[4] * kernel_5[4]; - output_5[5] += input_5[5] * kernel_5[5]; - - input_0 += block_h * block_w * 36; - input_1 += block_h * block_w * 36; - input_2 += block_h * block_w * 36; - input_3 += block_h * block_w * 36; - input_4 += block_h * block_w * 36; - input_5 += block_h * block_w * 36; - - kernel_0 += 36; - kernel_1 += 36; - kernel_2 += 36; - kernel_3 += 36; - kernel_4 += 36; - kernel_5 += 36; + tm1 += (r * tiles + t) * 8; + + for (int q = 0; q < in_c / 8; q++) { + for (int l = 0; l < 8; l++) { + tm2[0] = tm1[l]; + tm2[1] = tm1[l + 8 * 1]; + tm2[2] = tm1[l + 8 * 2]; + tm2[3] = tm1[l + 8 * 3]; + tm2 += 4; } + tm1 += 36 * tiles * 8; } } - } + for (; t + 1 < tiles; t += 2) { + __fp16 *tm2 = img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2) * in_c * 8; // img_tm2 row data + __fp16 *tm1 = input_tm1_buf; - // transform output - /* - AT = { - { 1 1 1 1 1 0 }, - { 0 1 -1 2 -2 0 }, - { 0 1 1 4 4 0 }, - { 0 1 -1 8 -8 1 } - }; - */ - for(int i = 0; i < out_c; i++) { - - const float bias = bias_data ? bias_data[i] : 0.f; - const float *img1 = output_dot_buf + i * block_h * block_w * 6 * 6; - float *img1_tm = output_trans_buf + i * block_h * block_w * 4 * 4; - - float tmp[4][6]; - for(int j = 0; j < block_h; j++) { - for(int k = 0; k < block_w; k++) { - const float *r1 = img1 + j * block_w * 6 * 6 + k * 6; - - for(int m = 0; m < 6; m++) { - tmp[0][m] = r1[0] + r1[1] + r1[2] + r1[3] + r1[4]; - tmp[1][m] = r1[1] - r1[2] + 2 * r1[3] - 2 * r1[4]; - tmp[2][m] = r1[1] + r1[2] + 4 * r1[3] + 4 * r1[4]; - tmp[3][m] = r1[1] - r1[2] + 8 * r1[3] - 8 * r1[4] + r1[5]; - r1 += block_w * 6; + tm1 += (r * tiles + t) * 8; + for (int q = 0; q < in_c / 8; q++) { + for (int l = 0; l < 8; l++) { + tm2[0] = tm1[l]; + tm2[1] = tm1[l + 8]; + tm2 += 2; } - float *r1_tm_0 = img1_tm + j * block_w * 4 * 4 + k * 4; - float *r1_tm_1 = r1_tm_0 + block_w * 4; - float *r1_tm_2 = r1_tm_1 + block_w * 4; - float *r1_tm_3 = r1_tm_2 + block_w * 4; - - for(int m = 0; m < 4; m++) { - const float *tmp1 = tmp[m]; - r1_tm_0[m] = tmp1[0] + tmp1[1] + tmp1[2] + tmp1[3] + tmp1[4] + bias; - r1_tm_1[m] = tmp1[1] - tmp1[2] + 2 * tmp1[3] - 2 * tmp1[4] + bias; - r1_tm_2[m] = tmp1[1] + tmp1[2] + 4 * tmp1[3] + 4 * tmp1[4] + bias; - r1_tm_3[m] = tmp1[1] - tmp1[2] + 8 * tmp1[3] - 8 * tmp1[4] + tmp1[5] + bias; + tm1 += 36 * tiles * 8; + } + + } + for (; t < tiles; t++) { + __fp16 *tm2 = img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2 + t % 2) * in_c * 8; // img_tm2 row data + __fp16 *tm1 = input_tm1_buf; + + tm1 += (r * tiles + t) * 8; + for (int q = 0; q < in_c / 8; q++) { + for (int l = 0; l < 8; l++) { + tm2[0] = tm1[l]; + tm2++; } + tm1 += 36 * tiles * 8; } } } - csi_mem_free(output_dot_buf); - // crop the output after transform: cut extra part (right , bottom) - csi_c906_crop_output(output_trans_buf, output_data, out_c, out_h, out_w, block_h * 4, block_w * 4); - output_data += output_size; + + csi_mem_free(input_tm1_buf); + + // output_dot_buf: [out_c/4, 36, blocks, 4] + __fp16 *output_dot_buf = (__fp16 *)csi_mem_alloc(out_c * block_h * block_w * 6 * 6 * sizeof(__fp16)); + + #pragma omp parallel for num_threads(1) + for (int p = 0; p < out_c / 8; p++) { + + __fp16 *output0_tm = output_dot_buf + p * 36 * tiles * 8; // 8 channel dot output + __fp16 *kernel0_tm = kernel_data + p * 36 * in_c * 8; // 8 channel kernel + + for (int r = 0; r < 36; r++) { + + __fp16 *img_tm2 = input_tm2_buf + r * size_input_tm2 / 36; // img_tm2 第r个channel + + int t = 0; + for (; t + 7 < tiles; t += 8) { + + __fp16 *r0 = img_tm2 + t * in_c; + __fp16 *k0 = kernel0_tm + r * in_c * 8; + + asm volatile( + "vsetvli zero, zero, e16, m1\n\t" + "mv t0, %3\n\t" // t0 = in_c + + "vmv.v.x v0, zero\n\t" + "vmv.v.x v1, zero\n\t" + "vmv.v.x v2, zero\n\t" + "vmv.v.x v3, zero\n\t" + "vmv.v.x v4, zero\n\t" + "vmv.v.x v5, zero\n\t" + "vmv.v.x v6, zero\n\t" + "vmv.v.x v7, zero\n\t" // clear + + "1:\n\t" + + "flh fa0, (%0)\n\t" + "flh fa1, 2(%0)\n\t" + "flh fa2, 4(%0)\n\t" + "flh fa3, 6(%0)\n\t" + "flh fa4, 8(%0)\n\t" + "flh fa5, 10(%0)\n\t" + "flh fa6, 12(%0)\n\t" + "flh fa7, 14(%0)\n\t" + "addi %0, %0, 16\n\t" + + "vle.v v8, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "vfmacc.vf v0, fa0, v8\n\t" + "vfmacc.vf v1, fa1, v8\n\t" + "vfmacc.vf v2, fa2, v8\n\t" + "vfmacc.vf v3, fa3, v8\n\t" + "vfmacc.vf v4, fa4, v8\n\t" + "vfmacc.vf v5, fa5, v8\n\t" + "vfmacc.vf v6, fa6, v8\n\t" + "vfmacc.vf v7, fa7, v8\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "vse.v v0, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v1, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v2, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v3, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v4, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v5, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v6, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v7, (%2)\n\t" + "addi %2, %2, 16\n\t" + + :"=r"(r0), // %0 + "=r"(k0), // %1 + "=r"(output0_tm), // %2 + "=r"(in_c) // %3 + :"0"(r0), + "1"(k0), + "2"(output0_tm), + "3"(in_c) + + :"cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", + "fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7", "t0" + + ); + } + for (; t + 3 < tiles; t += 4) { + __fp16 *r0 = img_tm2 + (t / 8 + (t % 8) / 4) * in_c * 8; + __fp16 *k0 = kernel0_tm + r * in_c * 8; + + asm volatile( + "vsetvli zero, zero, e16, m1\n\t" + "mv t0, %3\n\t" // t0 = in_c + "vmv.v.x v0, zero\n\t" + "vmv.v.x v1, zero\n\t" + "vmv.v.x v2, zero\n\t" + "vmv.v.x v3, zero\n\t" // clear + + "1:\n\t" + + "flh fa0, (%0)\n\t" + "flh fa1, 2(%0)\n\t" + "flh fa2, 4(%0)\n\t" + "flh fa3, 6(%0)\n\t" + "addi %0, %0, 8\n\t" + + "vle.v v4, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "vfmacc.vf v0, fa0, v4\n\t" + "vfmacc.vf v1, fa1, v4\n\t" + "vfmacc.vf v2, fa2, v4\n\t" + "vfmacc.vf v3, fa3, v4\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "vse.v v0, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v1, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v2, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v3, (%2)\n\t" + "addi %2, %2, 16\n\t" + + :"=r"(r0), // %0 + "=r"(k0), // %1 + "=r"(output0_tm), // %2 + "=r"(in_c) // %3 + :"0"(r0), + "1"(k0), + "2"(output0_tm), + "3"(in_c) + :"cc", "memory", "v0", "v1", "v2", "v3", "v4", "fa0", "fa1", "fa2", "fa3", "t0" + ); + } + for (; t + 1 < tiles; t += 2) { + __fp16 *r0 = img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2) * in_c * 8; + __fp16 *k0 = kernel0_tm + r * in_c * 8; + + asm volatile( + "vsetvli zero, zero, e16, m1\n\t" + "mv t0, %3\n\t" // t0 = in_c + "vmv.v.x v0, zero\n\t" + "vmv.v.x v1, zero\n\t" // clear + + "1:\n\t" + + "flh fa0, (%0)\n\t" + "flh fa1, 2(%0)\n\t" + "addi %0, %0, 4\n\t" + + "vle.v v2, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "vfmacc.vf v0, fa0, v2\n\t" + "vfmacc.vf v1, fa1, v2\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "vse.v v0, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vse.v v1, (%2)\n\t" + "addi %2, %2, 16\n\t" + + :"=r"(r0), // %0 + "=r"(k0), // %1 + "=r"(output0_tm), // %2 + "=r"(in_c) // %3 + :"0"(r0), + "1"(k0), + "2"(output0_tm), + "3"(in_c) + :"cc", "memory", "v0", "v1", "v2", "fa0", "fa1", "t0" + ); + } + for (; t < tiles; t++) { + + __fp16 *r0 = img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2 + t % 2) * in_c * 8; + __fp16 *k0 = kernel0_tm + r * in_c * 8; + + asm volatile( + "vsetvli zero, zero, e16, m1\n\t" + "mv t0, %3\n\t" // t0 = in_c + "vmv.v.x v0, zero\n\t" // clear + + "1:\n\t" + + "flw fa0, (%0)\n\t" + "addi %0, %0, 2\n\t" + + "vle.v v1, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "vfmacc.vf v0, fa0, v1\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "vse.v v0, (%2)\n\t" + "addi %2, %2, 16\n\t" + + :"=r"(r0), // %0 + "=r"(k0), // %1 + "=r"(output0_tm), // %2 + "=r"(in_c) // %3 + :"0"(r0), + "1"(k0), + "2"(output0_tm), + "3"(in_c) + :"cc", "memory", "v0", "v1", "fa0", "t0" + ); + + } + + } + + } + + csi_mem_free(input_tm2_buf); + /*************************** transform output ****************************/ + // output_tm1_buf: [out_c/4, out_h4, out_w4, 4] + __fp16 *output_tm1_buf = (__fp16 *)csi_mem_alloc(out_c * block_h * block_w * 4 * 4 * sizeof(__fp16)); + + /* + AT = { + { 1 1 1 1 1 0 }, + { 0 1 -1 2 -2 0 }, + { 0 1 1 4 4 0 }, + { 0 1 -1 8 -8 1 } + }; + */ + + #pragma omp parallel for num_threads(1) + for (int p = 0; p < out_c / 8; p++) + { + + __fp16 *bias_tmp = bias_data + p * 8; + + __fp16 *out0_tm = output_dot_buf + p * 36 * block_h * block_w * 8; // 输出转换前/dot后 第p个channel + __fp16 *out0 = output_tm1_buf + p * 4*block_h * 4*block_w * 8; // 转换后输出 第p个channel + + __fp16 *tmp1 = (__fp16 *)csi_mem_alloc(4 * 6 * 8 * sizeof(__fp16)); + int out_w4 = block_w * 4; + + for (int i = 0; i < block_h; i++) { + + for (int j = 0; j < block_w; j++) { + + __fp16 *output0_tm_0 = out0_tm + (i * block_w + j) * 8; // 6*6 起始地址 + + __fp16 *output0 = out0 + (i * block_w * 4 * 4 + j * 4) * 8; // 输出 4*4 的起始地址 + + __fp16 ratio[] = {2.0, 4.0, 8.0}; + __fp16 *ratio_ptr = ratio; + + asm volatile( + "vsetvli zero, zero, e16, m1\n\t" + "li t0, 6\n\t" // m = 6 + "mv t5, %2\n\t" // t5 = tmp start addr + "slli t1, %4, 4\n\t" // t1 = tiles * 8 * 2 + "mulw t2, t0, t1\n\t" // t2 = tiles * 6 blocks * 8 channels * 2 bytes + + "flh fa0, 0(%3)\n\t" // fa0 = 2 + "flh fa1, 2(%3)\n\t" // fa1 = 4 + "flh fa2, 4(%3)\n\t" // fa2 = 8 + + "mv s1, %0\n\t" + + "1:\n\t" // shape : [4 * 6] * [6 * 6] = [4 * 6] + + "mv a0, t5\n\t" // tmp[0][m] + "addi a1, a0, 96\n\t" // tmp[1][m] + "addi a2, a1, 96\n\t" // tmp[2][m] + "addi a3, a2, 96\n\t" // tmp[3][m] + + "vle.v v0, (s1)\n\t" // r00 + "add s1, s1, t1\n\t" + "vle.v v1, (s1)\n\t" // r01 + "add s1, s1, t1\n\t" + "vle.v v2, (s1)\n\t" // r02 + "add s1, s1, t1\n\t" + "vle.v v3, (s1)\n\t" // r03 + "add s1, s1, t1\n\t" + "vle.v v4, (s1)\n\t" // r04 + "add s1, s1, t1\n\t" + "vle.v v5, (s1)\n\t" // r05 + "add s1, s1, t1\n\t" + + //--------------------------------------------- + "vfadd.vv v26, v1, v2\n\t" // r01 + r02 = tmp02a + "vfsub.vv v6, v1, v2\n\t" // r01 - r02 = tmp13a + + "vfadd.vv v7, v3, v4\n\t" // r03 + r04 = tmp02b + "vfsub.vv v8, v3, v4\n\t" // r03 - r04 = tmp13b + "vmv.v.v v25, v6\n\t" // v25 = tmp13a + //--------------------------------------------- + "vfadd.vv v24, v0, v26\n\t" // r00 + tmp02a + "vfadd.vv v24, v24, v7\n\t" // r00 + tmp02a + tmp02b + "vse.v v24, (a0)\n\t" + + "vfmacc.vf v25, fa0, v8\n\t" // tmp13a + 2 * tmp13b + "vse.v v25, (a1)\n\t" + + "vfmacc.vf v26, fa1, v7\n\t" // tmp02a + 4 * tmp02b + "vse.v v26, (a2)\n\t" + + "vfadd.vv v27, v5, v6\n\t" // r05 + tmp13a + "vfmacc.vf v27, fa2, v8\n\t" // r05 + tmp13a * 8 tmp13b + "vse.v v27, (a3)\n\t" + //--------------------------------------------- + + "addi t5, t5, 16\n\t" // tmp[0][0] --> tmp[0][1] + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "2:\n\t" + + "mv t5, %2\n\t" // tmp start addr + "li t0, 4\n\t" // m = 4 + "slli t1, %5, 4\n\t" // t1 = out_w4 * 8 * 2 bytes + "vle.v v16, (%6)\n\t" // load 8 channel bias data + + "3:\n\t" // shape : [4 * 6] * [6 * 4] = [4 * 4] + + "mv a0, %1\n\t" + "addi a1, a0, 16\n\t" + "addi a2, a1, 16\n\t" + "addi a3, a2, 16\n\t" + + "vle.v v0, (t5)\n\t" // tmp[m][0] + "addi t5, t5, 16\n\t" + "vle.v v1, (t5)\n\t" // tmp[m][1] + "addi t5, t5, 16\n\t" + "vle.v v2, (t5)\n\t" // tmp[m][2] + "addi t5, t5, 16\n\t" + "vle.v v3, (t5)\n\t" // tmp[m][3] + "addi t5, t5, 16\n\t" + "vle.v v4, (t5)\n\t" // tmp[m][4] + "addi t5, t5, 16\n\t" + "vle.v v5, (t5)\n\t" // tmp[m][5] + "addi t5, t5, 16\n\t" + + //--------------------------------------------- + "vfadd.vv v26, v1, v2\n\t" // r01 + r02 = tmp02a + "vfsub.vv v6, v1, v2\n\t" // r01 - r02 = tmp13a + + "vfadd.vv v7, v3, v4\n\t" // r03 + r04 = tmp02b + "vfsub.vv v8, v3, v4\n\t" // r03 - r04 = tmp13b + "vmv.v.v v25, v6\n\t" // v25 = tmp13a + //--------------------------------------------- + "vfadd.vv v24, v0, v26\n\t" // r00 + tmp02a + "vfadd.vv v24, v24, v7\n\t" // r00 + tmp02a + tmp02b + "vfadd.vv v24, v24, v16\n\t" // add bias + "vse.v v24, (a0)\n\t" + + "vfmacc.vf v25, fa0, v8\n\t" // tmp13a + 2 * tmp13b + "vfadd.vv v25, v25, v16\n\t" // add bias + "vse.v v25, (a1)\n\t" + + "vfmacc.vf v26, fa1, v7\n\t" // tmp02a + 4 * tmp02b + "vfadd.vv v26, v26, v16\n\t" // add bias + "vse.v v26, (a2)\n\t" + + "vfadd.vv v27, v5, v6\n\t" // r05 + tmp13a + "vfmacc.vf v27, fa2, v8\n\t" // r05 + tmp13a * 8 tmp13b + "vfadd.vv v27, v27, v16\n\t" // add bias + "vse.v v27, (a3)\n\t" + + "add %1, %1, t1\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 3b" + + :"=r"(output0_tm_0), // %0 + "=r"(output0), // %1 + "=r"(tmp1), // %2 + "=r"(ratio_ptr), // %3 + "=r"(tiles), // %4 + "=r"(out_w4), // %5 + "=r"(bias_tmp) // %6 + :"0"(output0_tm_0), + "1"(output0), + "2"(tmp1), + "3"(ratio_ptr), + "4"(tiles), + "5"(out_w4), + "6"(bias_tmp) + + :"cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v16", "v24", "v25", "v26", "v27", + "t0", "t1", "t2", "t5", "s1", "a0", "a1", "a2", "a3", + "fa0", "fa1", "fa2" + ); + } + } + csi_mem_free(tmp1); + } + + csi_mem_free(output_dot_buf); + // crop the output after transform: cut extra part (right , bottom) + csi_c906_crop_output_pack8to1_fp16(output_tm1_buf, output_data, out_c, out_h, out_w, block_h * 4, block_w * 4); + output_data += output_size; + csi_mem_free(output_tm1_buf); + } + + if (!flag_bias) { + csi_mem_free(bias_data); + bias_data = NULL; } - csi_mem_free(input_padd_buf); - csi_mem_free(input_trans_buf); - csi_mem_free(output_trans_buf); return CSINN_TRUE; } diff --git a/source/c906_opt/convolution_gemm_fp16.c b/source/c906_opt/convolution_gemm_fp16.c index 77cd7bb5..41573054 100644 --- a/source/c906_opt/convolution_gemm_fp16.c +++ b/source/c906_opt/convolution_gemm_fp16.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" @@ -66,7 +66,7 @@ int csi_c906_conv_im2col_sgemm_fp16(struct csi_tensor *input, int32_t stride_w = params->stride_width; int32_t pad_left = params->pad_left; int32_t pad_top = params->pad_top; - + int32_t pad_if_zero = pad_left + pad_top; // im2col matrix_col = out_height * out_width // im2col matrix_row = channel_col int channel_col = in_ch / group * ksize_h * ksize_w; @@ -78,42 +78,104 @@ int csi_c906_conv_im2col_sgemm_fp16(struct csi_tensor *input, __fp16 *im2col_data = (__fp16 *)csi_mem_alloc(k * n * sizeof(__fp16)); __fp16* pb_reorder = (__fp16 *)csi_mem_alloc(k * n * sizeof(__fp16)); - for (int i = 0; i < batch; i++) { - for (int g = 0; g < group; g++) { - - // im2col - for(int c = 0; c < channel_col; ++c) { - int w_offset = c % ksize_w; - int h_offset = c / ksize_w % ksize_h; - int c_im = c / ksize_h / ksize_w; - for(int h = 0; h < out_height; ++h) { - for(int w = 0; w < out_width; ++w) { - int im_row = h_offset + h * stride_h; - int im_col = w_offset + w * stride_w; - int col_index = (c * out_height + h) * out_width + w; // [channel_col, out_h, out_w] - im_row = im_row - params->pad_top; - im_col = im_col - params->pad_left; - if(im_row < 0 || im_col < 0 || im_row >= in_height || im_col >= in_width) { - im2col_data[col_index] = 0.0f; - } else { - im2col_data[col_index] = input_data[(c_im * input->dim[2] + im_row) * input->dim[3] + im_col]; + if(pad_if_zero) + { + for (int i = 0; i < batch; i++) { + for (int g = 0; g < group; g++) { + // im2col + for(int c = 0; c < channel_col; ++c) { + int w_offset = c % ksize_w; + int h_offset = c / ksize_w % ksize_h; + int c_im = c / ksize_h / ksize_w; + for(int h = 0; h < out_height; ++h) { + for(int w = 0; w < out_width; ++w) { + int im_row = h_offset + h * stride_h; + int im_col = w_offset + w * stride_w; + int col_index = (c * out_height + h) * out_width + w; // [channel_col, out_h, out_w] + im_row = im_row - params->pad_top; + im_col = im_col - params->pad_left; + if(im_row < 0 || im_col < 0 || im_row >= in_height || im_col >= in_width) { + im2col_data[col_index] = 0.0f; + } else { + im2col_data[col_index] = input_data[(c_im * input->dim[2] + im_row) * input->dim[3] + im_col]; + } } } } + + __fp16 *pa = kernel_data + g * m * k; + __fp16 *pb = pb_reorder; + __fp16 *pc = output_data; + + // pack + csi_c906_reorder_input_fp16_1(im2col_data, pb, k, n, n); + // GEMM + csi_c906_sgemm_kernel_fp16(pc, pa, pb, m, k, n, n, bias_data + g * m); + input_data += in_ch / group * in_height * in_width; + output_data += m * n; } + } + } + else{ + for (int i = 0; i < batch; i++) { + for (int g = 0; g < group; g++) { + // im2col + for(int c = 0; c < channel_col; ++c) { + int w_offset = c % ksize_w; + int h_offset = c / ksize_w % ksize_h; + int c_im = c / ksize_h / ksize_w; + int input_h = c_im * in_height; + int im_row =h_offset; + int col_index_tmp = (c * out_height ) * out_width; - __fp16 *pa = kernel_data + g * m * k; - __fp16 *pb = pb_reorder; - __fp16 *pc = output_data; + for (int h = 0; h < out_height; ++h) { + int im_col = w_offset; - // pack - csi_c906_reorder_input_fp16(im2col_data, pb, k, n, n); - // GEMM - csi_c906_sgemm_kernel_fp16(pc, pa, pb, m, k, n, n, bias_data + g * m); - input_data += in_ch / group * in_height * in_width; - output_data += m * n; + int w = 0; + for (; w + 15 < out_width; w += 16) { + // printf("%d \n",out_width); + int col_index = col_index_tmp + w; + + vfloat16m2_t v_input_data = + vlse16_v_f16m2(input_data + (input_h + im_row) * in_width + im_col, + 2 * stride_w, 16); + + vse16_v_f16m2(im2col_data + col_index, v_input_data, 16); + + im_col += 16 * stride_w; + } + if (w != out_width) { + int vl = out_width - w; + + int col_index = col_index_tmp + w; + vfloat16m2_t v_input_data = + vlse16_v_f16m2(input_data + (input_h + im_row) * in_width + im_col, + 2 * stride_w, vl); + vse16_v_f16m2(im2col_data + col_index, v_input_data, vl); + im_col += vl * stride_w; + } + + im_row += stride_h; + col_index_tmp += out_width; + } + } + + __fp16 *pa = kernel_data + g * m * k; + __fp16 *pb = pb_reorder; + __fp16 *pc = output_data; + + // pack + csi_nn_rvv_reorder_input_z16_fp16(im2col_data, pb, k, n, n); + // csi_c906_reorder_input_fp16_1(im2col_data, pb, k, n, n); + // GEMM + csi_nn_rvv_gemm_8x16_fp16(pc, pa, pb, m, k, n, n, bias_data + g * m); + // csi_c906_sgemm_kernel_fp16(pc, pa, pb, m, k, n, n, bias_data + g * m); + input_data += in_ch / group * in_height * in_width; + output_data += m * n; + } } } + csi_mem_free(pb_reorder); csi_mem_free(im2col_data); return CSINN_TRUE; diff --git a/source/c906_opt/convolution_relu.c b/source/c906_opt/convolution_relu.c index ecb07cf4..5a2c1e0d 100644 --- a/source/c906_opt/convolution_relu.c +++ b/source/c906_opt/convolution_relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" diff --git a/source/c906_opt/convolution_sgemm.c b/source/c906_opt/convolution_sgemm.c index 6f3d1c64..509bf595 100644 --- a/source/c906_opt/convolution_sgemm.c +++ b/source/c906_opt/convolution_sgemm.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" @@ -109,7 +109,7 @@ static int csi_c906_conv_im2col_sgemm_base(struct csi_tensor *input, float *pc = output_data; // pack - csi_c906_reorder_input(im2col_data, pb, k, n, n); + csi_c906_reorder_input_1(im2col_data, pb, k, n, n); // GEMM csi_c906_sgemm_kernel_f32(pc, pa, pb, m, k, n, n, bias_data + g * m, fuse_relu); input_data += in_ch / group * in_height * in_width; diff --git a/source/c906_opt/depthwise_convolution_3x3.c b/source/c906_opt/depthwise_convolution_3x3.c index 3180351c..e7dcd292 100644 --- a/source/c906_opt/depthwise_convolution_3x3.c +++ b/source/c906_opt/depthwise_convolution_3x3.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" @@ -118,7 +118,7 @@ int DWCONV3X3S1(struct csi_tensor *input, in_h = in_h + params->pad_top + params->pad_down; in_w = in_w + params->pad_left + params->pad_right; -#pragma omp parallel for num_threads(8) +#pragma omp parallel for num_threads(1) for (int c = 0; c < in_c; c++) { float *out = output_data + c * out_h * out_w; float *outptr0 = out; @@ -770,7 +770,7 @@ int DWCONV3X3S2(struct csi_tensor *input, int tailstep = in_w - 2 * out_w + in_w; -#pragma omp parallel for num_threads(8) +#pragma omp parallel for num_threads(1) for (int c = 0; c < in_c; c++) { float *out = output_data + c * out_h * out_w; diff --git a/source/c906_opt/depthwise_convolution_3x3_fp16.c b/source/c906_opt/depthwise_convolution_3x3_fp16.c index 750d0134..4180ab9f 100644 --- a/source/c906_opt/depthwise_convolution_3x3_fp16.c +++ b/source/c906_opt/depthwise_convolution_3x3_fp16.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" @@ -82,7 +82,7 @@ int csi_c906_dwconv3x3s1_fp16(struct csi_tensor *input, in_h = in_h + params->pad_top + params->pad_down; in_w = in_w + params->pad_left + params->pad_right; -#pragma omp parallel for num_threads(8) +#pragma omp parallel for num_threads(1) for (int c = 0; c < in_c; c++) { __fp16 *out = output_data + c * out_h * out_w; __fp16 *outptr0 = out; @@ -616,7 +616,7 @@ int csi_c906_dwconv3x3s2_fp16(struct csi_tensor *input, int tailstep = in_w - 2 * out_w + in_w; -#pragma omp parallel for num_threads(8) +#pragma omp parallel for num_threads(1) for (int c = 0; c < in_c; c++) { __fp16 *out = output_data + c * out_h * out_w; diff --git a/source/c906_opt/depthwise_convolution_3x3_pack4.c b/source/c906_opt/depthwise_convolution_3x3_pack4.c index e0e66590..8977776c 100644 --- a/source/c906_opt/depthwise_convolution_3x3_pack4.c +++ b/source/c906_opt/depthwise_convolution_3x3_pack4.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" diff --git a/source/c906_opt/depthwise_convolution_3x3_pack8_fp16.c b/source/c906_opt/depthwise_convolution_3x3_pack8_fp16.c index c2d29bfc..a638ee78 100644 --- a/source/c906_opt/depthwise_convolution_3x3_pack8_fp16.c +++ b/source/c906_opt/depthwise_convolution_3x3_pack8_fp16.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" diff --git a/source/c906_opt/depthwise_convolution_5x5.c b/source/c906_opt/depthwise_convolution_5x5.c index ebd5bbf5..6805d967 100644 --- a/source/c906_opt/depthwise_convolution_5x5.c +++ b/source/c906_opt/depthwise_convolution_5x5.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" @@ -60,7 +60,7 @@ int DWCONV5X5S1(struct csi_tensor *input, in_h = in_h + params->pad_top + params->pad_down; in_w = in_w + params->pad_left + params->pad_right; -#pragma omp parallel for num_threads(8) +#pragma omp parallel for num_threads(1) for (int c = 0; c < in_c; c++) { float *out = output_data + c * out_h * out_w; float *outptr0 = out; @@ -204,7 +204,7 @@ int DWCONV5X5S2(struct csi_tensor *input, const int tailstep = in_w - 2 * out_w + in_w; -#pragma omp parallel for num_threads(8) +#pragma omp parallel for num_threads(1) for (int c = 0; c < in_c; c++) { float *out = output_data + c * out_h * out_w; float *outptr0 = out; diff --git a/source/c906_opt/depthwise_convolution_fp16.c b/source/c906_opt/depthwise_convolution_fp16.c new file mode 100644 index 00000000..c591db60 --- /dev/null +++ b/source/c906_opt/depthwise_convolution_fp16.c @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_c906.h" + +int csi_c906_dwconv2d_s1_pad0_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + __fp16 *kernel_data = (__fp16 *)kernel->data; + __fp16 *bias_data = (__fp16 *)bias->data; + + const int32_t dilation_width_factor = params->dilation_width; + const int32_t dilation_height_factor = params->dilation_height; + const int32_t batches = input->dim[0]; + const int32_t input_depth = input->dim[1]; + const int32_t output_depth = output->dim[1]; + const int32_t input_height = input->dim[2]; + const int32_t input_width = input->dim[3]; + const int32_t filter_height = kernel->dim[2]; + const int32_t filter_width = kernel->dim[3]; + const int32_t output_height = output->dim[2]; + const int32_t output_width = output->dim[3]; // input_depth = output_depth; + + for (int32_t b = 0; b < batches; ++b) { + int output_dim_pos = 0; + for (int32_t ic = 0; ic < input_depth; ++ic) { + int kernel_dim_pos_tmp = (ic * kernel->dim[1]) * filter_height * filter_width; + int input_dim_pos_tmp = (b * input_depth + ic) * input_height * input_width; + for (int32_t out_y = 0; out_y < output_height; ++out_y) { + for (int32_t out_x = 0; out_x < output_width; ++out_x) { + __fp16 acc = 0; + vfloat16m1_t _acc = vfmv_v_f_f16m1(0.0f, 8); + for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) { + int kernel_dim_pos_tmp2 = kernel_dim_pos_tmp + filter_y * filter_width; + int32_t filter_x = 0; + for (; filter_x + 7 < filter_width; filter_x += 8) { + const int32_t in_x = out_x + filter_x; + const int32_t in_y = out_y + filter_y; + int input_dim_pos = input_dim_pos_tmp + in_y * input_width + in_x; + int kernel_dim_pos = kernel_dim_pos_tmp2 + filter_x; + vfloat16m1_t _input_val = vle16_v_f16m1(input_data + input_dim_pos, 8); + vfloat16m1_t _kernel_data = + vle16_v_f16m1(kernel_data + kernel_dim_pos, 8); + _acc = vfmacc_vv_f16m1(_acc, _input_val, _kernel_data, 8); + } + + vfloat16m1_t _0_f = vfmv_v_f_f16m1(0.0f, 8); + vfloat16m1_t _sum2 = vfredosum_vs_f16m1_f16m1(_0_f, _acc, _0_f, 16); + acc = vfmv_f_s_f16m1_f16(_sum2); + for (; filter_x < filter_width; ++filter_x) { + const int32_t in_x = out_x + filter_x; + const int32_t in_y = out_y + filter_y; + int input_dim_pos = input_dim_pos_tmp + in_y * input_width + in_x; + int kernel_dim_pos = kernel_dim_pos_tmp2 + filter_x; + __fp16 input_val = input_data[input_dim_pos]; + __fp16 filter_val = kernel_data[kernel_dim_pos]; + acc += (filter_val) * (input_val); + } + } + acc += bias_data[ic]; + output_data[output_dim_pos] = acc; + output_dim_pos++; + } + } + } + } + return CSINN_TRUE; +} diff --git a/source/c906_opt/depthwise_convolution_relu_3x3.c b/source/c906_opt/depthwise_convolution_relu_3x3.c index de3e1627..8f7ba794 100644 --- a/source/c906_opt/depthwise_convolution_relu_3x3.c +++ b/source/c906_opt/depthwise_convolution_relu_3x3.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #define DWCONV3X3S1 csi_c906_dwconv3x3s1_fuse_relu #define DWCONV3X3S2 csi_c906_dwconv3x3s2_fuse_relu diff --git a/source/c906_opt/depthwise_convolution_relu_3x3_pack4.c b/source/c906_opt/depthwise_convolution_relu_3x3_pack4.c index 92ba9a09..64001ad2 100644 --- a/source/c906_opt/depthwise_convolution_relu_3x3_pack4.c +++ b/source/c906_opt/depthwise_convolution_relu_3x3_pack4.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #define DWCONV3X3S1_PACK4 csi_c906_dwconv3x3s1_pack4_fuse_relu #define DWCONV3X3S2_PACK4 csi_c906_dwconv3x3s2_pack4_fuse_relu diff --git a/source/c906_opt/depthwise_convolution_relu_5x5.c b/source/c906_opt/depthwise_convolution_relu_5x5.c index 973d27ef..106becd9 100644 --- a/source/c906_opt/depthwise_convolution_relu_5x5.c +++ b/source/c906_opt/depthwise_convolution_relu_5x5.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #define DWCONV5X5S1 csi_c906_dwconv5x5s1_fuse_relu #define DWCONV5X5S2 csi_c906_dwconv5x5s2_fuse_relu diff --git a/source/c906_opt/div.c b/source/c906_opt/div.c new file mode 100644 index 00000000..bbfd9fbe --- /dev/null +++ b/source/c906_opt/div.c @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_c906.h" +int csi_c906_div_init(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) +{ + if (input1->dtype == CSINN_DTYPE_FLOAT32) { + float *ptr = input1->data; + size_t tensor_size = csi_tensor_size(input1); + for (size_t i = 0; i < tensor_size; i++) { + ptr[i] = 1.f / ptr[i]; + } + params->base.bc = csi_c906_mul_f32; + } else if (input1->dtype == CSINN_DTYPE_FLOAT16) { + __fp16 *ptr = input1->data; + size_t tensor_size = csi_tensor_size(input1); + for (size_t i = 0; i < tensor_size; i++) { + ptr[i] = 1.f / ptr[i]; + } + params->base.bc = csi_c906_mul_fp16; + } + return CSINN_TRUE; +} diff --git a/source/c906_opt/fullyconnected.c b/source/c906_opt/fullyconnected.c index 2d9b7f5f..51345b0a 100644 --- a/source/c906_opt/fullyconnected.c +++ b/source/c906_opt/fullyconnected.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,14 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" /* - change memory layout for weight matrix [out_nodes * in_nodes] by N shape + change memory layout for weight matrix [out_nodes * in_nodes] by N(8) shape */ -void csi_c906_reorder_weight_fp16(__fp16 *src, __fp16 *dst, int m, int k, int ldx) +void csi_c906_reorder_weight_n8_fp16(__fp16 *src, __fp16 *dst, int m, int k, int ldx) { int i = 0; for (; i + 7 < m; i += 8) { @@ -47,6 +47,39 @@ void csi_c906_reorder_weight_fp16(__fp16 *src, __fp16 *dst, int m, int k, int ld } } +void csi_c906_reorder_weight_n16_fp16(__fp16 *src, __fp16 *dst, int m, int k, int ldx) +{ + int i = 0; + for (; i + 15 < m; i += 16) { + for (int j = 0; j < k; j++) { + dst[i * k + 16 * j + 0] = src[(i + 0) * k + j]; + dst[i * k + 16 * j + 1] = src[(i + 1) * k + j]; + dst[i * k + 16 * j + 2] = src[(i + 2) * k + j]; + dst[i * k + 16 * j + 3] = src[(i + 3) * k + j]; + dst[i * k + 16 * j + 4] = src[(i + 4) * k + j]; + dst[i * k + 16 * j + 5] = src[(i + 5) * k + j]; + dst[i * k + 16 * j + 6] = src[(i + 6) * k + j]; + dst[i * k + 16 * j + 7] = src[(i + 7) * k + j]; + dst[i * k + 16 * j + 8] = src[(i + 8) * k + j]; + dst[i * k + 16 * j + 9] = src[(i + 9) * k + j]; + dst[i * k + 16 * j + 10] = src[(i + 10) * k + j]; + dst[i * k + 16 * j + 11] = src[(i + 11) * k + j]; + dst[i * k + 16 * j + 12] = src[(i + 12) * k + j]; + dst[i * k + 16 * j + 13] = src[(i + 13) * k + j]; + dst[i * k + 16 * j + 14] = src[(i + 14) * k + j]; + dst[i * k + 16 * j + 15] = src[(i + 15) * k + j]; + } + } + // TODO: reorder N8 + dst += i * k; + src += i * k; + for (; i < m; i++) { + csi_c906_memcpy(dst, src, sizeof(__fp16) * ldx); + dst += k; + src += k; + } +} + void csi_c906_fc_gemv_transform_weight_fp16(struct csi_tensor *weights) { @@ -56,7 +89,7 @@ void csi_c906_fc_gemv_transform_weight_fp16(struct csi_tensor *weights) int k = weights->dim[1]; // in_nodes __fp16* pa_reorder = (__fp16 *)csi_mem_alloc(n * k * sizeof(__fp16)); - csi_c906_reorder_weight_fp16(weight_data, pa_reorder, n, k, k); + csi_c906_reorder_weight_n16_fp16(weight_data, pa_reorder, n, k, k); memcpy(weight_data, pa_reorder, n * k * sizeof(__fp16)); csi_mem_free(pa_reorder); } @@ -75,7 +108,11 @@ int csi_c906_fullyconnected_f32(struct csi_tensor *input, const int output_dims_count = output->dim_count; const int weights_dims_count = weights->dim_count; const int bias_dims_count = bias->dim_count; - const int batches = output->dim[0]; + int batches = 1; + /* compute the outer size */ + for (int i = 0; i < output_dims_count - 1; i++) { + batches *= output->dim[i]; + } const int output_depth = weights->dim[weights_dims_count - 2]; // output_nodes const int accum_depth = weights->dim[weights_dims_count - 1]; // input_nodes @@ -154,7 +191,11 @@ int csi_c906_fullyconnected_fp16(struct csi_tensor *input, const int output_dims_count = output->dim_count; const int weights_dims_count = weights->dim_count; const int bias_dims_count = bias->dim_count; - int batches = output->dim[0]; + int batches = 1; + /* compute the outer size */ + for (int i = 0; i < output_dims_count - 1; i++) { + batches *= output->dim[i]; + } int output_depth = weights->dim[weights_dims_count - 2]; // output_nodes int accum_depth = weights->dim[weights_dims_count - 1]; // input_nodes @@ -391,12 +432,15 @@ int csi_c906_fullyconnected_fp16(struct csi_tensor *input, return CSINN_TRUE; } -// best implementation from the software perspective -int csi_c906_fullyconnected_fp16_1(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, - struct fc_params *params) +/* + best implementation from the software perspective + loop unroll: k = 8 +*/ +int csi_c906_fullyconnected_pack8_fp16(struct csi_tensor *input, + struct csi_tensor *output, + struct csi_tensor *weights, + struct csi_tensor *bias, + struct fc_params *params) { __fp16 *input_data = (__fp16 *)input->data; __fp16 *output_data = (__fp16 *)output->data; @@ -405,7 +449,11 @@ int csi_c906_fullyconnected_fp16_1(struct csi_tensor *input, const int output_dims_count = output->dim_count; const int weights_dims_count = weights->dim_count; const int bias_dims_count = bias->dim_count; - int batches = output->dim[0]; + int batches = 1; + /* compute the outer size */ + for (int i = 0; i < output_dims_count - 1; i++) { + batches *= output->dim[i]; + } int output_depth = weights->dim[weights_dims_count - 2]; // output_nodes int accum_depth = weights->dim[weights_dims_count - 1]; // input_nodes @@ -565,230 +613,207 @@ int csi_c906_fullyconnected_fp16_1(struct csi_tensor *input, "addi t0, t0, -1\n\t" "bnez t0, 1b\n\t" - // "7:\n\t" - "7:\n\t" // m1n4 + "7:\n\t" // m1n_tail + "andi t0, %4, 7\n\t" // n_tail + "beqz t0, 12f\n\t" // if n_tail = 0, jump to ending // prepare for n4 n2 n1 "andi t2, %5, 7\n\t" // t2 = k_tail "slli t3, t2, 1\n\t" // t3 = k_tail * 2 - "andi t0, %4, 7\n\t" // n & 7 - "srai t0, t0, 2\n\t" // (n & 7) >> 2 - "beqz t0, 11f\n\t" // jump to m1n2 - // start kernel_m1n4 + "8:\n\t" + "mv t6, %1\n\t" "vmv.v.x v24, zero\n\t" - "vmv.v.x v25, zero\n\t" - "vmv.v.x v26, zero\n\t" - "vmv.v.x v27, zero\n\t" // clear acc - - "flh fs0, 0(%3)\n\t" - "flh fs1, 2(%3)\n\t" - "flh fs2, 4(%3)\n\t" - "flh fs3, 6(%3)\n\t" - "addi %3, %3, 8\n\t" - - "vfmv.s.f v28, fs0\n\t" // v28[0] = bias[0] - "vfmv.s.f v29, fs1\n\t" // v29[0] = bias[1] - "vfmv.s.f v30, fs2\n\t" // v30[0] = bias[2] - "vfmv.s.f v31, fs3\n\t" // v31[0] = bias[3] - - // init addr for pa, pb and pc - "slli t0, %5, 1\n\t" // t_tmp = k * 2 - - "mv t6, %1\n\t" // t6 hold pa(input) 1 lines start addr - - "mv a4, %2\n\t" - "add a5, a4, t0\n\t" - "add a6, a5, t0\n\t" - "add a7, a6, t0\n\t" // a4-a7 hold pb(weight) 4 cols addr - - // %0 hold pc(output) addr + "flh fa0, 0(%3)\n\t" + "addi %3, %3, 2\n\t" + "vfmv.s.f v25, fa0\n\t" // v25[0] = bias "mv t5, t1\n\t" // t5 = k8 - "beqz t2, 9f\n\t" // if k_tail == 0, jump to subkernel_m1n4k8 + "beqz t2, 10f\n\t" // if k_tail = 0 - "8:\n\t" - // start subkernel_m1n4k_tail + "9:\n\t" + // m1n1k_tail "vsetvli zero, t2, e16, m1\n\t" "vle.v v1, (t6)\n\t" "add t6, t6, t3\n\t" - "vle.v v2, (a4)\n\t" - "add a4, a4, t3\n\t" - "vle.v v3, (a5)\n\t" - "add a5, a5, t3\n\t" - "vle.v v4, (a6)\n\t" - "add a6, a6, t3\n\t" - "vle.v v5, (a7)\n\t" - "add a7, a7, t3\n\t" + "vle.v v2, (%2)\n\t" + "add %2, %2, t3\n\t" "vfmacc.vv v24, v1, v2\n\t" - "vfmacc.vv v25, v1, v3\n\t" - "vfmacc.vv v26, v1, v4\n\t" - "vfmacc.vv v27, v1, v5\n\t" - "beqz t1, 10f\n\t" // if k8 == 0, jump to end kernel_m1n4 + "beqz t1, 11f\n\t" // if k8 == 0, jump to end m1n1 "vsetvli zero, zero, e16, m1\n\t" - "9:\n\t" - // start subkernel_m1n4k8 + "10:\n\t" + // m1n1k8 "vle.v v1, (t6)\n\t" "addi t6, t6, 16\n\t" - "vle.v v2, (a4)\n\t" - "addi a4, a4, 16\n\t" - "vle.v v3, (a5)\n\t" - "addi a5, a5, 16\n\t" - "vle.v v4, (a6)\n\t" - "addi a6, a6, 16\n\t" - "vle.v v5, (a7)\n\t" - "addi a7, a7, 16\n\t" + "vle.v v2, (%2)\n\t" + "addi %2, %2, 16\n\t" "vfmacc.vv v24, v1, v2\n\t" - "vfmacc.vv v25, v1, v3\n\t" - "vfmacc.vv v26, v1, v4\n\t" - "vfmacc.vv v27, v1, v5\n\t" "addi t5, t5, -1\n\t" - "bnez t5, 9b\n\t" - + "bnez t5, 10b\n\t" - "10:\n\t" // end kernel_m1n4 - - "vfredsum.vs v28, v24, v28\n\t" // v28[0] = v28[0](bias) + sum(v24[0..7]) - "vfredsum.vs v29, v25, v29\n\t" - "vfredsum.vs v30, v26, v30\n\t" - "vfredsum.vs v31, v27, v31\n\t" - "vfmv.f.s fa0, v28\n\t" - "vfmv.f.s fa1, v29\n\t" - "vfmv.f.s fa2, v30\n\t" - "vfmv.f.s fa3, v31\n\t" + "11:\n\t" // end m1n1 + "vfredsum.vs v25, v24, v25\n\t" // v25[0] = v25[0](bias) + sum(v24[0..7]) + "vfmv.f.s fa0, v25\n\t" "fsh fa0, 0(%0)\n\t" - "fsh fa1, 2(%0)\n\t" - "fsh fa2, 4(%0)\n\t" - "fsh fa3, 6(%0)\n\t" + "addi %0, %0, 2\n\t" - "addi %0, %0, 8\n\t" // updata output start addr ( +4 cols) - "slli t0, %5, 3\n\t" // t_tmp = k * 4 * 2 - "add %2, %2, t0\n\t" // updata pb start addr + "addi t0, t0, -1\n\t" + "bnez t0, 8b\n\t" - "11:\n\t" // m1n2 - "andi t0, %4, 3\n\t" // n & 3 - "srai t0, t0, 1\n\t" // (n & 3) >> 1 - "beqz t0, 15f\n\t" // jump to m1n1 - // start kernel_m1n2 + "12:\n\t" // ending - "vmv.v.x v24, zero\n\t" - "vmv.v.x v25, zero\n\t" // clear acc + :"=r"(init_output), // %0 + "=r"(init_input), // %1 + "=r"(init_weight), // %2 + "=r"(init_bias), // %3 + "=r"(output_depth), // %4 + "=r"(accum_depth) // %5 + :"0"(init_output), + "1"(init_input), + "2"(init_weight), + "3"(init_bias), + "4"(output_depth), + "5"(accum_depth) + :"v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v24", "v25", + "t0", "t1", "t2", "t3", "t4", "t5", "t6", + "fa0", "ft0" - "flh fs0, 0(%3)\n\t" - "flh fs1, 2(%3)\n\t" - "addi %3, %3, 4\n\t" + ); + } - "vfmv.s.f v28, fs0\n\t" // v28[0] = bias[0] - "vfmv.s.f v29, fs1\n\t" // v29[0] = bias[1] + if (!flag_bias) { + csi_mem_free(bias_data); + bias_data = NULL; + } - // init addr for pa, pb and pc - "slli t0, %5, 1\n\t" // t_tmp = k * 2 + return CSINN_TRUE; +} - "mv t6, %1\n\t" // t6 hold pa(input) 1 lines start addr +/* + loop unroll: k = 1 +*/ +int csi_c906_fullyconnected_pack8_fp16_1(struct csi_tensor *input, + struct csi_tensor *output, + struct csi_tensor *weights, + struct csi_tensor *bias, + struct fc_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + __fp16 *weights_data = (__fp16 *)weights->data; + __fp16 *bias_data = (__fp16 *)bias->data; + const int output_dims_count = output->dim_count; + const int weights_dims_count = weights->dim_count; + const int bias_dims_count = bias->dim_count; + int batches = 1; + /* compute the outer size */ + for (int i = 0; i < output_dims_count - 1; i++) { + batches *= output->dim[i]; + } + int output_depth = weights->dim[weights_dims_count - 2]; // output_nodes + int accum_depth = weights->dim[weights_dims_count - 1]; // input_nodes - "mv a4, %2\n\t" - "add a5, a4, t0\n\t" // a4-a5 hold pb(weight) 2 cols addr + bool flag_bias = 1; // default: fc layer include bias + if (bias_data == NULL) { + flag_bias = 0; + bias_data = (__fp16 *)csi_mem_alloc(output_depth * 2); + } - // %0 hold pc(output) addr + for (int b = 0; b < batches; b++) { - "mv t5, t1\n\t" // t5 = k8 - "beqz t2, 13f\n\t" // if k_tail == 0, jump to subkernel_m1n2k8 + __fp16 *init_output = output_data + b * output_depth; + __fp16 *init_input = input_data + b * accum_depth; + __fp16 *init_weight = weights_data; + __fp16 *init_bias = bias_data; - "12:\n\t" - // start subkernel_m1n2k_tail - "vsetvli zero, t2, e16, m1\n\t" - "vle.v v1, (t6)\n\t" - "add t6, t6, t3\n\t" - "vle.v v2, (a4)\n\t" - "add a4, a4, t3\n\t" - "vle.v v3, (a5)\n\t" - "add a5, a5, t3\n\t" - "vfmacc.vv v24, v1, v2\n\t" - "vfmacc.vv v25, v1, v3\n\t" + asm volatile( + "vsetvli zero, zero, e16, m1\n\t" // set vl = 8 - "beqz t1, 14f\n\t" // if k8 == 0, jump to end kernel_m1n2 - "vsetvli zero, zero, e16, m1\n\t" + "srai t4, %5, 3\n\t" // k8 + "srai t0, %4, 3\n\t" // out_node >> 3 (n8) + "beqz t0, 3f\n\t" - "13:\n\t" - // start subkernel_m1n2k8 - "vle.v v1, (t6)\n\t" - "addi t6, t6, 16\n\t" - "vle.v v2, (a4)\n\t" - "addi a4, a4, 16\n\t" - "vle.v v3, (a5)\n\t" - "addi a5, a5, 16\n\t" - "vfmacc.vv v24, v1, v2\n\t" - "vfmacc.vv v25, v1, v3\n\t" + "1:\n\t" // m1n8 + "vle.v v4, (%3)\n\t" // init out_tmp = bias_data + "addi %3, %3, 16\n\t" - "addi t5, t5, -1\n\t" - "bnez t5, 13b\n\t" + "mv t1, %5\n\t" // in_node (k) + "mv t6, %1\n\t" // init input_data addr - "14:\n\t" // end kernel_m1n2 + "2:\n\t" + // m1n8k1 + "vle.v v2, (%2)\n\t" + "addi %2, %2, 16\n\t" + "flh fa0, 0(t6)\n\t" + "vfmacc.vf v4, fa0, v2\n\t" + "addi t6, t6, 2\n\t" - "vfredsum.vs v28, v24, v28\n\t" // v28[0] = v28[0](bias) + sum(v24[0..7]) - "vfredsum.vs v29, v25, v29\n\t" - "vfmv.f.s fa0, v28\n\t" - "vfmv.f.s fa1, v29\n\t" - "fsh fa0, 0(%0)\n\t" - "fsh fa1, 2(%0)\n\t" + "addi t1, t1, -1\n\t" + "bnez t1, 2b\n\t" - "addi %0, %0, 4\n\t" // updata output start addr ( +2 cols) - "slli t0, %5, 2\n\t" // t_tmp = k * 2 * 2 - "add %2, %2, t0\n\t" // updata pb start addr + "vse.v v4, (%0)\n\t" + "addi %0, %0, 16\n\t" - "15:\n\t" // m1n1 - "andi t0, %4, 1\n\t" // n & 1 - "beqz t0, 19f\n\t" // jump to ending - // start kernel_m1n1 - "vmv.v.x v24, zero\n\t" // clear acc + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" - "flh fs0, 0(%3)\n\t" - "vfmv.s.f v28, fs0\n\t" // v28[0] = bias + "3:\n\t" // n_tail + "andi t0, %4, 7\n\t" // n_tail + "beqz t0, 8f\n\t" // if n_tail = 0, jump to ending - // init addr for pa, pb and pc - "mv t6, %1\n\t" // t6 hold pa(input) 8 lines start addr + // "mv a2, %2\n\t" // updata weight_data addr + "andi t2, %5, 7\n\t" // k_tail + "slli t3, t2, 1\n\t" // k_tail * 2 - "mv a4, %2\n\t" // a4 hold pb(weight) 1 cols addr + "4:\n\t" + "mv t6, %1\n\t" // init input_data addr - // %0 hold pc(output) addr + "vmv.v.x v4, zero\n\t" // clear acc + "flh fa0, 0(%3)\n\t" // load bias + "addi %3, %3, 2\n\t" + "vfmv.s.f v3, fa0\n\t" // v3[0] = bias - "mv t5, t1\n\t" // t5 = k8 - "beqz t2, 17f\n\t" // if k_tail == 0, jump to subkernel_m1n1k8 + "mv t5, t4\n\t" // t5 = k8 + "beqz t2, 6f\n\t" - "16:\n\t" - // start subkernel_m1n1k_tail + "5:\n\t" + // m1n1k_tail "vsetvli zero, t2, e16, m1\n\t" "vle.v v1, (t6)\n\t" "add t6, t6, t3\n\t" - "vle.v v2, (a4)\n\t" - "add a4, a4, t3\n\t" - "vfmacc.vv v24, v1, v2\n\t" + "vle.v v2, (%2)\n\t" + "add %2, %2, t3\n\t" + "vfmacc.vv v4, v1, v2\n\t" - "beqz t1, 18f\n\t" // if k8 == 0, jump to end kernel_m1n1 + "beqz t4, 7f\n\t" // if k8 == 0, jump to end m1n1 "vsetvli zero, zero, e16, m1\n\t" - "17:\n\t" - // start subkernel_m1n1k8 + "6:\n\t" + // m1n1k8 "vle.v v1, (t6)\n\t" "addi t6, t6, 16\n\t" - "vle.v v2, (a4)\n\t" - "addi a4, a4, 16\n\t" - "vfmacc.vv v24, v1, v2\n\t" + "vle.v v2, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vfmacc.vv v4, v1, v2\n\t" "addi t5, t5, -1\n\t" - "bnez t5, 17b\n\t" + "bnez t5, 6b\n\t" - "18:\n\t" // end kernel_m1n1 - "vfredsum.vs v28, v24, v28\n\t" // v28[0] = v28[0](bias) + sum(v24[0..7]) - "vfmv.f.s fa0, v28\n\t" + "7:\n\t" // end m1n1 + "vfredsum.vs v3, v4, v3\n\t" // v3[0] = v3[0](bias) + sum(v4[0..7]) + "vfmv.f.s fa0, v3\n\t" "fsh fa0, 0(%0)\n\t" "addi %0, %0, 2\n\t" - "19:\n\t" // ending + "addi t0, t0, -1\n\t" + "bnez t0, 4b\n\t" + + + "8:\n\t" // ending :"=r"(init_output), // %0 "=r"(init_input), // %1 @@ -802,13 +827,12 @@ int csi_c906_fullyconnected_fp16_1(struct csi_tensor *input, "3"(init_bias), "4"(output_depth), "5"(accum_depth) - :"v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", - "a2", "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", "t4", "t5", "t6", - "fa0", "fa1", "fa2", "fa3", "ft0", "fs0", "fs1", "fs2", "fs3" - + : "v1", "v2", "v3", "v4", + "t0", "t1", "t2", "t3", "t4", "t5", "t6", + "fa0" ); - } + } if (!flag_bias) { csi_mem_free(bias_data); bias_data = NULL; @@ -817,12 +841,16 @@ int csi_c906_fullyconnected_fp16_1(struct csi_tensor *input, return CSINN_TRUE; } -// best performance measured on D1 -int csi_c906_fullyconnected_fp16_2(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, - struct fc_params *params) + +/* + best performance measured on D1 + loop unroll: k = 1 && pack16 +*/ +int csi_c906_fullyconnected_pack16_fp16(struct csi_tensor *input, + struct csi_tensor *output, + struct csi_tensor *weights, + struct csi_tensor *bias, + struct fc_params *params) { __fp16 *input_data = (__fp16 *)input->data; __fp16 *output_data = (__fp16 *)output->data; @@ -831,7 +859,11 @@ int csi_c906_fullyconnected_fp16_2(struct csi_tensor *input, const int output_dims_count = output->dim_count; const int weights_dims_count = weights->dim_count; const int bias_dims_count = bias->dim_count; - int batches = output->dim[0]; + int batches = 1; + /* compute the outer size */ + for (int i = 0; i < output_dims_count - 1; i++) { + batches *= output->dim[i]; + } int output_depth = weights->dim[weights_dims_count - 2]; // output_nodes int accum_depth = weights->dim[weights_dims_count - 1]; // input_nodes @@ -849,15 +881,15 @@ int csi_c906_fullyconnected_fp16_2(struct csi_tensor *input, __fp16 *init_bias = bias_data; asm volatile( - "vsetvli zero, zero, e16, m1\n\t" // set vl = 8 + "vsetvli zero, zero, e16, m2\n\t" // set vl = 16 - "srai t4, %5, 3\n\t" // k8 - "srai t0, %4, 3\n\t" // out_node >> 3 (n8) + "srai t4, %5, 4\n\t" // k16 + "srai t0, %4, 4\n\t" // out_node >> 4 (n16) "beqz t0, 3f\n\t" "1:\n\t" // m1n8 "vle.v v4, (%3)\n\t" // init out_tmp = bias_data - "addi %3, %3, 16\n\t" + "addi %3, %3, 32\n\t" "mv t1, %5\n\t" // in_node (k) "mv t6, %1\n\t" // init input_data addr @@ -865,7 +897,7 @@ int csi_c906_fullyconnected_fp16_2(struct csi_tensor *input, "2:\n\t" // m1n8k1 "vle.v v2, (%2)\n\t" - "addi %2, %2, 16\n\t" + "addi %2, %2, 32\n\t" "flh fa0, 0(t6)\n\t" "vfmacc.vf v4, fa0, v2\n\t" "addi t6, t6, 2\n\t" @@ -874,17 +906,17 @@ int csi_c906_fullyconnected_fp16_2(struct csi_tensor *input, "bnez t1, 2b\n\t" "vse.v v4, (%0)\n\t" - "addi %0, %0, 16\n\t" + "addi %0, %0, 32\n\t" "addi t0, t0, -1\n\t" "bnez t0, 1b\n\t" "3:\n\t" // n_tail - "andi t0, %4, 7\n\t" // n_tail - "beqz t0, 8f\n\t" // if n_tail = 0, jump to ending + "andi t0, %4, 15\n\t" // n_tail + "beqz t0, 8f\n\t" // if n_tail = 0, jump to ending // "mv a2, %2\n\t" // updata weight_data addr - "andi t2, %5, 7\n\t" // k_tail + "andi t2, %5, 15\n\t" // k_tail "slli t3, t2, 1\n\t" // k_tail * 2 "4:\n\t" @@ -893,44 +925,43 @@ int csi_c906_fullyconnected_fp16_2(struct csi_tensor *input, "vmv.v.x v4, zero\n\t" // clear acc "flh fa0, 0(%3)\n\t" // load bias "addi %3, %3, 2\n\t" - "vfmv.s.f v3, fa0\n\t" // v3[0] = bias + "vfmv.s.f v6, fa0\n\t" // v6[0] = bias "mv t5, t4\n\t" // t5 = k8 "beqz t2, 6f\n\t" "5:\n\t" // m1n1k_tail - "vsetvli zero, t2, e16, m1\n\t" - "vle.v v1, (t6)\n\t" + "vsetvli zero, t2, e16, m2\n\t" + "vle.v v0, (t6)\n\t" "add t6, t6, t3\n\t" "vle.v v2, (%2)\n\t" "add %2, %2, t3\n\t" - "vfmacc.vv v4, v1, v2\n\t" + "vfmacc.vv v4, v0, v2\n\t" "beqz t4, 7f\n\t" // if k8 == 0, jump to end m1n1 - "vsetvli zero, zero, e16, m1\n\t" + "vsetvli zero, zero, e16, m2\n\t" "6:\n\t" // m1n1k8 - "vle.v v1, (t6)\n\t" - "addi t6, t6, 16\n\t" + "vle.v v0, (t6)\n\t" + "addi t6, t6, 32\n\t" "vle.v v2, (%2)\n\t" - "addi %2, %2, 16\n\t" - "vfmacc.vv v4, v1, v2\n\t" + "addi %2, %2, 32\n\t" + "vfmacc.vv v4, v0, v2\n\t" "addi t5, t5, -1\n\t" "bnez t5, 6b\n\t" "7:\n\t" // end m1n1 - "vfredsum.vs v3, v4, v3\n\t" // v3[0] = v3[0](bias) + sum(v4[0..7]) - "vfmv.f.s fa0, v3\n\t" + "vfredsum.vs v6, v4, v6\n\t" // v6[0] = v6[0](bias) + sum(v4[0..7]) + "vfmv.f.s fa0, v6\n\t" "fsh fa0, 0(%0)\n\t" "addi %0, %0, 2\n\t" "addi t0, t0, -1\n\t" "bnez t0, 4b\n\t" - "8:\n\t" // ending :"=r"(init_output), // %0 @@ -945,7 +976,7 @@ int csi_c906_fullyconnected_fp16_2(struct csi_tensor *input, "3"(init_bias), "4"(output_depth), "5"(accum_depth) - : "v1", "v2", "v3", "v4", + : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "fa0" ); @@ -955,10 +986,140 @@ int csi_c906_fullyconnected_fp16_2(struct csi_tensor *input, csi_mem_free(bias_data); bias_data = NULL; } - return CSINN_TRUE; } +int csi_c906_fullyconnected_pack16_output16_fp16(struct csi_tensor *input, + struct csi_tensor *output, + struct csi_tensor *weights, + struct csi_tensor *bias, struct fc_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + __fp16 *weights_data = (__fp16 *)weights->data; + __fp16 *bias_data = (__fp16 *)bias->data; + const int output_dims_count = output->dim_count; + const int weights_dims_count = weights->dim_count; + const int bias_dims_count = bias->dim_count; + int batches = 1; + /* compute the outer size */ + for (int i = 0; i < output_dims_count - 1; i++) { + batches *= output->dim[i]; + } + int output_depth = weights->dim[weights_dims_count - 2]; // output_nodes + int accum_depth = weights->dim[weights_dims_count - 1]; // input_nodes + + int packn = 16; + int vl = 16; + int b = 0; + for (; b + 3 < batches; b += 4) { + __fp16 *init_output = output_data + b * output_depth; + __fp16 *init_output2 = init_output + output_depth; + __fp16 *init_output3 = init_output2 + output_depth; + __fp16 *init_output4 = init_output3 + output_depth; + __fp16 *init_input = input_data + b * accum_depth; + __fp16 *init_input2 = init_input + accum_depth; + __fp16 *init_input3 = init_input2 + accum_depth; + __fp16 *init_input4 = init_input3 + accum_depth; + + __fp16 *init_weight = weights_data; + __fp16 *init_bias = bias_data; + int n = output_depth; + while (n > 0) { + __fp16 *in_ptr = init_input; + __fp16 *in_ptr2 = init_input2; + __fp16 *in_ptr3 = init_input3; + __fp16 *in_ptr4 = init_input4; + + vfloat16m2_t _acc = vle16_v_f16m2(init_bias, vl); + vfloat16m2_t _acc2 = vmv_v_v_f16m2(_acc, vl); + vfloat16m2_t _acc3 = vmv_v_v_f16m2(_acc, vl); + vfloat16m2_t _acc4 = vmv_v_v_f16m2(_acc, vl); + + init_bias += vl; + int k = accum_depth; + while (k > 0) { + vfloat16m2_t _weight = vle16_v_f16m2(init_weight, vl); + _acc = vfmacc_vf_f16m2(_acc, *in_ptr, _weight, vl); + _acc2 = vfmacc_vf_f16m2(_acc2, *in_ptr2, _weight, vl); + _acc3 = vfmacc_vf_f16m2(_acc3, *in_ptr3, _weight, vl); + _acc4 = vfmacc_vf_f16m2(_acc4, *in_ptr4, _weight, vl); + init_weight += vl; + in_ptr++; + in_ptr2++; + in_ptr3++; + in_ptr4++; + k--; + } + vse16_v_f16m2(init_output, _acc, vl); + vse16_v_f16m2(init_output2, _acc2, vl); + vse16_v_f16m2(init_output3, _acc3, vl); + vse16_v_f16m2(init_output4, _acc4, vl); + init_output += vl; + init_output2 += vl; + init_output3 += vl; + init_output4 += vl; + n -= vl; + } + } + for (; b + 1 < batches; b += 2) { + __fp16 *init_output = output_data + b * output_depth; + __fp16 *init_output2 = init_output + output_depth; + __fp16 *init_input = input_data + b * accum_depth; + __fp16 *init_input2 = init_input + accum_depth; + + __fp16 *init_weight = weights_data; + __fp16 *init_bias = bias_data; + int n = output_depth; + while (n > 0) { + __fp16 *in_ptr = init_input; + __fp16 *in_ptr2 = init_input2; + vfloat16m2_t _acc = vle16_v_f16m2(init_bias, vl); + vfloat16m2_t _acc2 = vmv_v_v_f16m2(_acc, vl); + init_bias += vl; + int k = accum_depth; + while (k > 0) { + vfloat16m2_t _weight = vle16_v_f16m2(init_weight, vl); + _acc = vfmacc_vf_f16m2(_acc, *in_ptr, _weight, vl); + _acc2 = vfmacc_vf_f16m2(_acc2, *in_ptr2, _weight, vl); + init_weight += vl; + in_ptr++; + in_ptr2++; + k--; + } + vse16_v_f16m2(init_output, _acc, vl); + vse16_v_f16m2(init_output2, _acc2, vl); + init_output += vl; + init_output2 += vl; + n -= vl; + } + } + for (; b < batches; b++) { + __fp16 *init_output = output_data + b * output_depth; + __fp16 *init_input = input_data + b * accum_depth; + + __fp16 *init_weight = weights_data; + __fp16 *init_bias = bias_data; + int n = output_depth; + while (n > 0) { + __fp16 *in_ptr = init_input; + vfloat16m2_t _acc = vle16_v_f16m2(init_bias, vl); + init_bias += vl; + int k = accum_depth; + while (k > 0) { + vfloat16m2_t _weight = vle16_v_f16m2(init_weight, vl); + _acc = vfmacc_vf_f16m2(_acc, *in_ptr, _weight, vl); + init_weight += vl; + in_ptr++; + k--; + } + vse16_v_f16m2(init_output, _acc, vl); + init_output += vl; + n -= vl; + } + } + return CSINN_TRUE; +} int csi_c906_fullyconnected_init(struct csi_tensor *input, struct csi_tensor *output, @@ -967,11 +1128,26 @@ int csi_c906_fullyconnected_init(struct csi_tensor *input, struct fc_params *params) { if (input->dtype == CSINN_DTYPE_FLOAT32) { - params->base.bc = csi_c906_fullyconnected_f32; + csi_nn_rvv_fc_gemv_transform_weight_fp32(weights); + params->base.bc = csi_nn_rvv_fullyconnected_packn_fp32; } else if (input->dtype == CSINN_DTYPE_FLOAT16) { csi_c906_fc_gemv_transform_weight_fp16(weights); - params->base.bc = csi_c906_fullyconnected_fp16_2; + int output_depth = weights->dim[weights->dim_count - 2]; + if (bias != NULL && output_depth % 16 == 0) { + params->base.bc = csi_c906_fullyconnected_pack16_output16_fp16; + } else { + params->base.bc = csi_c906_fullyconnected_pack16_fp16; + } // params->base.bc = csi_c906_fullyconnected_fp16; + } else if (input->dtype == CSINN_DTYPE_INT8) { + csi_nn_rvv_fc_gemv_transform_weight_int8(weights); + // support channel quantization + for (int i = 0; i < weights->quant_channel; i++) { + float real_scale = input->qinfo->scale * weights->qinfo[i].scale / output->qinfo->scale; + csi_quantize_multiplier(real_scale, &(weights->qinfo[i].multiplier), + &(weights->qinfo[i].shift)); + } + params->base.bc = csi_nn_rvv_fullyconnected_packn_int8; } return CSINN_TRUE; } diff --git a/source/c906_opt/gather.c b/source/c906_opt/gather.c new file mode 100644 index 00000000..77791cf2 --- /dev/null +++ b/source/c906_opt/gather.c @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_c906.h" + +int csi_c906_gather_fp16(struct csi_tensor *input, + struct csi_tensor *indices, + struct csi_tensor *output, + struct gather_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + int32_t *indices_data = (int32_t *)indices->data; + + int inner_size = 1; + for (int i = params->axis + 1; i < input->dim_count; i++) { + inner_size *= input->dim[i]; + } + int outer_size = 1; + for (int i = 0; i < params->axis; i++) { + outer_size *= input->dim[i]; + } + int indices_size = 1; + for (int i = 0; i < indices->dim_count; i++) { + indices_size *= indices->dim[i]; + } + + for (int i = 0; i < outer_size; i++) { + for (int j = 0; j < indices_size; j++) { + if (indices_data[j] < input->dim[params->axis]) { + csi_c906_memcpy(output_data, input_data + indices_data[j] * inner_size, + inner_size * sizeof(__fp16)); + } else { + memset(output_data, 0, inner_size * sizeof(__fp16)); + } + output_data += inner_size; + } + input_data += inner_size * input->dim[params->axis]; + } + return CSINN_TRUE; +} + diff --git a/source/c906_opt/gemm_fp16.c b/source/c906_opt/gemm_fp16.c index ebcc8153..1ae23b82 100644 --- a/source/c906_opt/gemm_fp16.c +++ b/source/c906_opt/gemm_fp16.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" @@ -553,6 +553,92 @@ void csi_c906_reorder_input_fp16(__fp16 *b, __fp16 *sb, int k, int n, int ldx) ); } + +void csi_c906_reorder_input_fp16_1(__fp16 *b, __fp16 *sb, int k, int n, int ldx) +{ + asm volatile( + "vsetvli zero, zero, e16, m1\n\t" // set vl = 8 + + "slli t2, %4, 1\n\t" // t2 = ldx * 2 (line stride) + + "srai t0, %3, 3\n\t" // t0 = n8 + "beqz t0, 3f\n\t" // jump to packn_tail + + "1:\n\t" // n8 + "mv a0, %0\n\t" + "addi %0, %0, 16\n\t" + "mv t1, %2\n\t" // k + + "2:\n\t" + // start packn8k1 + "vle.v v2, (a0)\n\t" + "add a0, a0, t2\n\t" + "vse.v v2, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "addi t1, t1, -1\n\t" + "bnez t1, 2b\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "3:\n\t" // n_tail + "andi t0, %3, 7\n\t" // n & 7u + "beqz t0, 8f\n\t" + + "srai t3, %2, 3\n\t" // k8 + "slli t5, %4, 4\n\t" // t5 = ldx * 8 * 2 (8 lines) + "andi t6, %2, 7\n\t" // k_tail + "slli t4, t6, 1\n\t" // k_tail * 2 + + "4:\n\t" + "mv a0, %0\n\t" + "addi %0, %0, 2\n\t" + "mv t1, t3\n\t" // t1 = k8 + "beqz t3, 6f\n\t" + + "5:\n\t" + "vsetvli zero, zero, e16, m1\n\t" + "vlse.v v2, (a0), t2\n\t" + "add a0, a0, t5\n\t" + "vse.v v2, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "addi t1, t1, -1\n\t" + "bnez t1, 5b\n\t" + + "6:\n\t" + "vsetvli zero, t6, e16, m1\n\t" + "vlse.v v2, (a0), t2\n\t" + "vse.v v2, (%1)\n\t" + "add %1, %1, t4\n\t" + + "7:\n\t" + "addi t0, t0, -1\n\t" + "bnez t0, 4b\n\t" + + + "8:\n\t" // ending + + + :"=r"(b), // %0 + "=r"(sb), // %1 + "=r"(k), // %2 + "=r"(n), // %3 + "=r"(ldx) // %4 + :"0"(b), + "1"(sb), + "2"(k), + "3"(n), + "4"(ldx) + :"v0", "v2", "a0", + "t0", "t1", "t2", "t3", "t4", "t5", "t6" + ); + +} + + + /* (1) Algorithm works as follows: m1n8_loop: m1n8k8_loop --> m1n8k4 --> m1n8k2 --> m1n8k1 @@ -3351,6 +3437,259 @@ static void kernel_m8_fp16(__fp16* dst, __fp16* sa, __fp16* sb, int m, int k, in } +static void kernel_m8_fp16_1(__fp16* dst, __fp16* sa, __fp16* sb, int m, int k, int n, int ldc, __fp16* bias) +{ + asm volatile( + "vsetvli zero, zero, e16, m1\n\t" // set vl = 8 + + "flh fs0, 0(%2)\n\t" + "flh fs1, 2(%2)\n\t" + "flh fs2, 4(%2)\n\t" + "flh fs3, 6(%2)\n\t" + "flh fs4, 8(%2)\n\t" + "flh fs5, 10(%2)\n\t" + "flh fs6, 12(%2)\n\t" + "flh fs7, 14(%2)\n\t" // load 8 bias_data for 8 out_channels + + // init output addr + "slli t5, %6, 1\n\t" // t5_tmp = ldx * 2 + "mv a0, %3\n\t" + "add a1, a0, t5\n\t" + "add a2, a1, t5\n\t" + "add a3, a2, t5\n\t" + "add a4, a3, t5\n\t" + "add a5, a4, t5\n\t" + "add a6, a5, t5\n\t" + "add a7, a6, t5\n\t" + + "srai t0, %5, 3\n\t" // t0 = n >> 3 (n8) + "beqz t0, 7f\n\t" // jump to m8n4 + + "1:\n\t" // m8n8 + // start kernel_m8n8 + "vfmv.v.f v24, fs0\n\t" + "vfmv.v.f v25, fs1\n\t" + "vfmv.v.f v26, fs2\n\t" + "vfmv.v.f v27, fs3\n\t" + "vfmv.v.f v28, fs4\n\t" + "vfmv.v.f v29, fs5\n\t" + "vfmv.v.f v30, fs6\n\t" + "vfmv.v.f v31, fs7\n\t" // init out_tmp = bias + + "mv t6, %0\n\t" // t6 hold kernel 8 lines start addr + "mv t5, %4\n\t" // t5 = k (k > 0) + + "2:\n\t" + // start subkernel_m8n8k1 + "vle.v v1, (%1)\n\t" + "addi %1, %1, 16\n\t" + "flh fa0, 0(t6)\n\t" + "flh fa1, 2(t6)\n\t" + "flh fa2, 4(t6)\n\t" + "flh fa3, 6(t6)\n\t" + "flh fa4, 8(t6)\n\t" + "flh fa5, 10(t6)\n\t" + "flh fa6, 12(t6)\n\t" + "flh fa7, 14(t6)\n\t" + "addi t6, t6, 16\n\t" + + "vfmacc.vf v24, fa0, v1\n\t" + "vfmacc.vf v25, fa1, v1\n\t" + "vfmacc.vf v26, fa2, v1\n\t" + "vfmacc.vf v27, fa3, v1\n\t" + "vfmacc.vf v28, fa4, v1\n\t" + "vfmacc.vf v29, fa5, v1\n\t" + "vfmacc.vf v30, fa6, v1\n\t" + "vfmacc.vf v31, fa7, v1\n\t" // 0 + + "addi t5, t5, -1\n\t" + "bnez t5, 2b\n\t" + + "6:\n\t" // end kernel_m8n8 + + "vse.v v24, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vse.v v25, (a1)\n\t" + "addi a1, a1, 16\n\t" + "vse.v v26, (a2)\n\t" + "addi a2, a2, 16\n\t" + "vse.v v27, (a3)\n\t" + "addi a3, a3, 16\n\t" + "vse.v v28, (a4)\n\t" + "addi a4, a4, 16\n\t" + "vse.v v29, (a5)\n\t" + "addi a5, a5, 16\n\t" + "vse.v v30, (a6)\n\t" + "addi a6, a6, 16\n\t" + "vse.v v31, (a7)\n\t" + "addi a7, a7, 16\n\t" // store output + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + + "7:\n\t" // m8n4 + "andi t0, %5, 7\n\t" // n & 7 + "srai t0, t0, 2\n\t" // (n & 7) >> 2 + "beqz t0, 13f\n\t" // jump to m8n2 + // start kernel_m8n4 + + "vle.v v28, (%2)\n\t" // v28[0..7] = bias_data[0..7] + "vle.v v29, (%2)\n\t" + "vle.v v30, (%2)\n\t" + "vle.v v31, (%2)\n\t" // init out_tmp = bias + + // init addr for pa, pb and pc + "slli t0, %4, 1\n\t" // t0_tmp = k * 2 + + "mv t6, %0\n\t" // t6 hold pa(kernel) 8 lines start addr + + "mv a4, %1\n\t" + "add a5, a4, t0\n\t" + "add a6, a5, t0\n\t" + "add a7, a6, t0\n\t" // a4-a7 hold pb(input) 4 cols addr + + "addi a1, a0, 2\n\t" + "addi a2, a1, 2\n\t" + "addi a3, a2, 2\n\t" // a0-a3 hold pc(output) addr + + "mv t5, %4\n\t" // t5 = k + + "8:\n\t" + // start subkernel_m8n4k1 + "vle.v v1, (t6)\n\t" // load pa for next + "addi t6, t6, 16\n\t" + "flh fa0, 0(a4)\n\t" + "vfmacc.vf v28, fa0, v1\n\t" + "flh fa1, 0(a5)\n\t" + "vfmacc.vf v29, fa1, v1\n\t" + "flh fa2, 0(a6)\n\t" + "vfmacc.vf v30, fa2, v1\n\t" + "flh fa3, 0(a7)\n\t" + "vfmacc.vf v31, fa3, v1\n\t" // 0 + + "addi a4, a4, 2\n\t" + "addi a5, a5, 2\n\t" + "addi a6, a6, 2\n\t" + "addi a7, a7, 2\n\t" + + "addi t5, t5, -1\n\t" + "bnez t5, 8b\n\t" + + "12:\n\t" // end kernel_m8n4 + "slli t0, %6, 1\n\t" // t0_tmp = ldx * 2 (store_stride) + + "vsse.v v28, (a0), t0\n\t" + "vsse.v v29, (a1), t0\n\t" + "vsse.v v30, (a2), t0\n\t" + "vsse.v v31, (a3), t0\n\t" + + "addi a0, a0, 8\n\t" // updata output start addr ( +4 cols) + "slli t0, %4, 3\n\t" // t_tmp = k * 4 * 2 + "add %1, %1, t0\n\t" // updata pb start addr + + "13:\n\t" // m8n2 + "andi t0, %5, 3\n\t" // n & 3 + "srai t0, t0, 1\n\t" // (n & 3) >> 1 + "beqz t0, 19f\n\t" // jump to m8n1 + // start kernel_m8n2 + + "vle.v v28, (%2)\n\t" // v28[0..7] = bias[0..7] + "vle.v v29, (%2)\n\t" // init out_tmp = bias + + // init addr for pa, pb and pc + "slli t0, %4, 1\n\t" // t_tmp = k * 2 + + "mv t6, %0\n\t" // t6 hold pa(kernel) 8 lines start addr + + "mv a4, %1\n\t" + "add a5, a4, t0\n\t" // a4-a5 hold pb(input) 2 cols addr + + "addi a1, a0, 2\n\t" // a0-a1 hold pc(output) addr + + "mv t5, %4\n\t" // t5 = k + + "14:\n\t" + // start subkernel_m8n2k8 + "vle.v v1, (t6)\n\t" + "addi t6, t6, 16\n\t" + "flh fa0, 0(a4)\n\t" + "vfmacc.vf v28, fa0, v1\n\t" + "flh fa1, 0(a5)\n\t" + "vfmacc.vf v29, fa1, v1\n\t" // 0 + + "addi a4, a4, 2\n\t" + "addi a5, a5, 2\n\t" + + "addi t5, t5, -1\n\t" + "bnez t5, 14b\n\t" + + "18:\n\t" // end kernel_m8n2 + "slli t0, %6, 1\n\t" // t0_tmp = ldx * 2 (store_stride) + + "vsse.v v28, (a0), t0\n\t" + "vsse.v v29, (a1), t0\n\t" + + "addi a0, a0, 4\n\t" // updata output start addr ( +2 cols) + "slli t0, %4, 2\n\t" // t_tmp = k * 2 * 2 + "add %1, %1, t0\n\t" // updata pb start addr (+2 cols) + + "19:\n\t" // m8n1 + "andi t0, %5, 1\n\t" // n & 1 + "beqz t0, 25f\n\t" // jump to ending + // start kernel_m8n1 + + "vle.v v28, (%2)\n\t" // init out_tmp = bias + + // init addr for pa, pb and pc + "mv t6, %0\n\t" // t6 hold pa(kernel) 8 lines start addr + "mv a4, %1\n\t" // a4 hold pb(input) 1 cols addr + // a0 hold pc(output) addr + + "mv t5, %4\n\t" // t5 = k + + "20:\n\t" + // start subkernel_m8n1k8 + "vle.v v1, (t6)\n\t" + "addi t6, t6, 16\n\t" + "flh fa0, 0(a4)\n\t" + "vfmacc.vf v28, fa0, v1\n\t" // 0 + + "addi a4, a4, 2\n\t" + + "addi t5, t5, -1\n\t" + "bnez t5, 20b\n\t" + + "24:\n\t" // end kernel_m8n1 + "slli t0, %6, 1\n\t" // t0_tmp = ldx * 2 (store_stride) + + "vsse.v v28, (a0), t0\n\t" + + "25:\n\t" // ending + + + :"=r"(sa), // %0 + "=r"(sb), // %1 + "=r"(bias), // %2 + "=r"(dst), // %3 + "=r"(k), // %4 + "=r"(n), // %5 + "=r"(ldc) // %6 + :"0"(sa), + "1"(sb), + "2"(bias), + "3"(dst), + "4"(k), + "5"(n), + "6"(ldc) + :"v1", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", + "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "t0", "t5", "t6", + "fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7", "fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7" + ); + +} + + void csi_c906_sgemm_kernel_fp16(__fp16* dst, const __fp16* sa, const __fp16* sb, int m, int k, int n, int ldc, __fp16* bias) { __fp16* pa = (__fp16 *)sa; @@ -3367,7 +3706,7 @@ void csi_c906_sgemm_kernel_fp16(__fp16* dst, const __fp16* sa, const __fp16* sb, const int mm = (m >> 3) << 3; for (int i = 0; i < mm; i += 8) { - kernel_m8_fp16(pc + i * ldc, pa + i * k, pb, m, k, n, ldc, bias_tmp + i); + kernel_m8_fp16_1(pc + i * ldc, pa + i * k, pb, m, k, n, ldc, bias_tmp + i); } pa += mm * k; diff --git a/source/c906_opt/gemv_fp16.c b/source/c906_opt/gemv_fp16.c new file mode 100644 index 00000000..3e841756 --- /dev/null +++ b/source/c906_opt/gemv_fp16.c @@ -0,0 +1,415 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_c906.h" + +/* + change memory layout for matrix [k * n] by Z shape + Z length: 8 +*/ +void csi_c906_reorder_matrix_z8_fp16(__fp16 *src, __fp16 *dst, int k, int n, int ldx) +{ + asm volatile( + "vsetvli zero, zero, e16, m1\n\t" // set vl = 8 + + "slli t2, %4, 1\n\t" // t2 = ldx * 2 (line stride) + + "srai t0, %3, 3\n\t" // t0 = n8 + "beqz t0, 3f\n\t" // jump to packn_tail + + "1:\n\t" // n8 + "mv a0, %0\n\t" + "addi %0, %0, 16\n\t" + "mv t1, %2\n\t" // k + + "2:\n\t" + // start packn8k1 + "vle.v v2, (a0)\n\t" + "add a0, a0, t2\n\t" + "vse.v v2, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "addi t1, t1, -1\n\t" + "bnez t1, 2b\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "3:\n\t" // n_tail + "andi t0, %3, 7\n\t" // n & 7u + "beqz t0, 8f\n\t" + + "srai t3, %2, 3\n\t" // k8 + "slli t5, %4, 4\n\t" // t5 = ldx * 8 * 2 (8 lines) + "andi t6, %2, 7\n\t" // k_tail + "slli t4, t6, 1\n\t" // k_tail * 2 + + "4:\n\t" + "mv a0, %0\n\t" + "addi %0, %0, 2\n\t" + "mv t1, t3\n\t" // t1 = k8 + "beqz t3, 6f\n\t" + + "5:\n\t" + "vsetvli zero, zero, e16, m1\n\t" + "vlse.v v2, (a0), t2\n\t" + "add a0, a0, t5\n\t" + "vse.v v2, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "addi t1, t1, -1\n\t" + "bnez t1, 5b\n\t" + + "6:\n\t" + "vsetvli zero, t6, e16, m1\n\t" + "vlse.v v2, (a0), t2\n\t" + "vse.v v2, (%1)\n\t" + "add %1, %1, t4\n\t" + + "7:\n\t" + "addi t0, t0, -1\n\t" + "bnez t0, 4b\n\t" + + + "8:\n\t" // ending + + + :"=r"(src), // %0 + "=r"(dst), // %1 + "=r"(k), // %2 + "=r"(n), // %3 + "=r"(ldx) // %4 + :"0"(src), + "1"(dst), + "2"(k), + "3"(n), + "4"(ldx) + :"v0", "v2", "a0", + "t0", "t1", "t2", "t3", "t4", "t5", "t6" + ); +} + +void csi_c906_reorder_matrix_z16_fp16(__fp16 *src, __fp16 *dst, int k, int n, int ldx) +{ + asm volatile( + "vsetvli zero, zero, e16, m2\n\t" // set vl = 8 + + "slli t2, %4, 1\n\t" // t2 = ldx * 2 (line stride) + + "srai t0, %3, 4\n\t" // t0 = n16 + "beqz t0, 3f\n\t" // jump to packn_tail + + "1:\n\t" // n8 + "mv a0, %0\n\t" + "addi %0, %0, 32\n\t" + "mv t1, %2\n\t" // k + + "2:\n\t" + // start packn8k1 + "vle.v v2, (a0)\n\t" + "add a0, a0, t2\n\t" + "vse.v v2, (%1)\n\t" + "addi %1, %1, 32\n\t" + + "addi t1, t1, -1\n\t" + "bnez t1, 2b\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "3:\n\t" // n_tail + "andi t0, %3, 15\n\t" // n & 15u + "beqz t0, 8f\n\t" + + "srai t3, %2, 4\n\t" // k15 + "slli t5, %4, 5\n\t" // t5 = ldx * 16 * 2 (16 lines) + "andi t6, %2, 15\n\t" // k_tail + "slli t4, t6, 1\n\t" // k_tail * 2 + + "4:\n\t" + "mv a0, %0\n\t" + "addi %0, %0, 2\n\t" + "mv t1, t3\n\t" // t1 = k8 + "beqz t3, 6f\n\t" + + "5:\n\t" + "vsetvli zero, zero, e16, m2\n\t" + "vlse.v v2, (a0), t2\n\t" + "add a0, a0, t5\n\t" + "vse.v v2, (%1)\n\t" + "addi %1, %1, 32\n\t" + + "addi t1, t1, -1\n\t" + "bnez t1, 5b\n\t" + + "6:\n\t" + "vsetvli zero, t6, e16, m2\n\t" + "vlse.v v2, (a0), t2\n\t" + "vse.v v2, (%1)\n\t" + "add %1, %1, t4\n\t" + + "7:\n\t" + "addi t0, t0, -1\n\t" + "bnez t0, 4b\n\t" + + + "8:\n\t" // ending + + :"=r"(src), // %0 + "=r"(dst), // %1 + "=r"(k), // %2 + "=r"(n), // %3 + "=r"(ldx) // %4 + :"0"(src), + "1"(dst), + "2"(k), + "3"(n), + "4"(ldx) + :"v0", "v2", "v3", "a0", + "t0", "t1", "t2", "t3", "t4", "t5", "t6" + ); +} + +/* + vector: 1 x k + matrix: n x k +*/ +void csi_c906_gemv_pack8_fp16(__fp16* dst, const __fp16* sa, const __fp16* sb, int k, int n, int ldc, __fp16* bias) +{ + + +} + +void csi_c906_gemv_pack16_fp16(__fp16* dst, const __fp16* sa, const __fp16* sb, int k, int n, int ldc, __fp16* bias) +{ + + +} + + +/* + vector: 1 x k + matrix: k x n +*/ +void csi_c906_gemv_trans_pack8_fp16(__fp16* dst, const __fp16* sa, const __fp16* sb, int k, int n, int ldc, __fp16* bias) +{ + asm volatile( + "vsetvli zero, zero, e16, m1\n\t" // set vl = 8 + + "flh ft0, (%3)\n\t" // bias + + "srai t4, %4, 3\n\t" // k >> 3 (k8) + "srai t0, %5, 3\n\t" // n >> 3 (n8) + "beqz t0, 3f\n\t" + + "1:\n\t" // m1n8 + "vfmv.v.f v4, ft0\n\t" // v4[0..n] = bias + + "mv t1, %4\n\t" // (k) + "mv t6, %1\n\t" // vector start addr + + "2:\n\t" + // m1n8k1 + "vle.v v2, (%2)\n\t" + "addi %2, %2, 16\n\t" + "flh fa0, 0(t6)\n\t" + "vfmacc.vf v4, fa0, v2\n\t" + "addi t6, t6, 2\n\t" + + "addi t1, t1, -1\n\t" + "bnez t1, 2b\n\t" + + "vse.v v4, (%0)\n\t" + "addi %0, %0, 16\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "3:\n\t" // n_tail + "andi t0, %5, 7\n\t" // n_tail + "beqz t0, 8f\n\t" // if n_tail = 0, jump to ending + + "andi t2, %4, 7\n\t" // k_tail + "slli t3, t2, 1\n\t" // k_tail * 2 + + "4:\n\t" + "mv t6, %1\n\t" // init input_data addr + + "vmv.v.x v4, zero\n\t" // clear acc + "vfmv.s.f v3, ft0\n\t" // v3[0] = bias + + "mv t5, t4\n\t" // t5 = k8 + "beqz t2, 6f\n\t" + + "5:\n\t" + // m1n1k_tail + "vsetvli zero, t2, e16, m1\n\t" + "vle.v v1, (t6)\n\t" + "add t6, t6, t3\n\t" + "vle.v v2, (%2)\n\t" + "add %2, %2, t3\n\t" + "vfmacc.vv v4, v1, v2\n\t" + + "beqz t4, 7f\n\t" // if k8 == 0, jump to end m1n1 + "vsetvli zero, zero, e16, m1\n\t" + + "6:\n\t" + // m1n1k8 + "vle.v v1, (t6)\n\t" + "addi t6, t6, 16\n\t" + "vle.v v2, (%2)\n\t" + "addi %2, %2, 16\n\t" + "vfmacc.vv v4, v1, v2\n\t" + + "addi t5, t5, -1\n\t" + "bnez t5, 6b\n\t" + + "7:\n\t" // end m1n1 + "vfredsum.vs v3, v4, v3\n\t" // v3[0] = v3[0](bias) + sum(v4[0..7]) + "vfmv.f.s fa0, v3\n\t" + "fsh fa0, 0(%0)\n\t" + "addi %0, %0, 2\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 4b\n\t" + + "8:\n\t" // ending + + :"=r"(dst), // %0 + "=r"(sa), // %1 + "=r"(sb), // %2 + "=r"(bias), // %3 + "=r"(k), // %4 + "=r"(n) // %5 + :"0"(dst), + "1"(sa), + "2"(sb), + "3"(bias), + "4"(k), + "5"(n) + : "v1", "v2", "v3", "v4", + "t0", "t1", "t2", "t3", "t4", "t5", "t6", + "fa0", "ft0" + ); + +} + + +void csi_c906_gemv_trans_pack16_fp16(__fp16* dst, const __fp16* sa, const __fp16* sb, int k, int n, int ldc, __fp16* bias) +{ + asm volatile( + "vsetvli zero, zero, e16, m2\n\t" // set vl = 8 + + "flh ft0, (%3)\n\t" // bias + + "srai t4, %4, 4\n\t" // k >> 4 (k16) + "srai t0, %5, 4\n\t" // n >> 4 (n16) + "beqz t0, 3f\n\t" + + "1:\n\t" // m1n8 + "vfmv.v.f v4, ft0\n\t" // v4[0..n] = bias + + "mv t1, %4\n\t" // (k) + "mv t6, %1\n\t" // vector start addr + + "2:\n\t" + // m1n8k1 + "vle.v v2, (%2)\n\t" + "addi %2, %2, 32\n\t" + "flh fa0, 0(t6)\n\t" + "addi t6, t6, 2\n\t" + "vfmacc.vf v4, fa0, v2\n\t" + + "addi t1, t1, -1\n\t" + "bnez t1, 2b\n\t" + + "vse.v v4, (%0)\n\t" + "addi %0, %0, 32\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "3:\n\t" // n_tail + "andi t0, %5, 15\n\t" // n_tail + "beqz t0, 8f\n\t" // if n_tail = 0, jump to ending + + "andi t2, %4, 15\n\t" // k_tail + "slli t3, t2, 1\n\t" // k_tail * 2 + + "4:\n\t" + "mv t6, %1\n\t" // init input_data addr + + "vmv.v.x v4, zero\n\t" // clear acc + "vfmv.s.f v8, ft0\n\t" // v8[0] = bias + + "mv t5, t4\n\t" // t5 = k16 + "beqz t2, 6f\n\t" + + "5:\n\t" + // m1n1k_tail + "vsetvli zero, t2, e16, m2\n\t" + "vle.v v6, (t6)\n\t" + "add t6, t6, t3\n\t" + "vle.v v2, (%2)\n\t" + "add %2, %2, t3\n\t" + "vfmacc.vv v4, v2, v6\n\t" + + "beqz t4, 7f\n\t" // if k16 == 0, jump to end m1n1 + "vsetvli zero, zero, e16, m2\n\t" + + "6:\n\t" + // m1n1k16 + "vle.v v6, (t6)\n\t" + "addi t6, t6, 32\n\t" + "vle.v v2, (%2)\n\t" + "addi %2, %2, 32\n\t" + "vfmacc.vv v4, v2, v6\n\t" + + "addi t5, t5, -1\n\t" + "bnez t5, 6b\n\t" + + "7:\n\t" // end m1n1 + "vfredsum.vs v8, v4, v8\n\t" // v3[0] = v3[0](bias) + sum(v4[0..7]) + "vfmv.f.s fa0, v8\n\t" + "fsh fa0, 0(%0)\n\t" + "addi %0, %0, 2\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 4b\n\t" + + "8:\n\t" // ending + + :"=r"(dst), // %0 + "=r"(sa), // %1 + "=r"(sb), // %2 + "=r"(bias), // %3 + "=r"(k), // %4 + "=r"(n) // %5 + :"0"(dst), + "1"(sa), + "2"(sb), + "3"(bias), + "4"(k), + "5"(n) + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "t0", "t1", "t2", "t3", "t4", "t5", "t6", + "fa0", "ft0" + ); +} diff --git a/source/c906_opt/global_avgpool.c b/source/c906_opt/global_avgpool.c index e1e4796e..3ebd93c9 100644 --- a/source/c906_opt/global_avgpool.c +++ b/source/c906_opt/global_avgpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" diff --git a/source/c906_opt/global_maxpool.c b/source/c906_opt/global_maxpool.c index 4d15cd91..cba7d763 100644 --- a/source/c906_opt/global_maxpool.c +++ b/source/c906_opt/global_maxpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" diff --git a/source/c906_opt/hpm.c b/source/c906_opt/hpm.c new file mode 100644 index 00000000..5d4df358 --- /dev/null +++ b/source/c906_opt/hpm.c @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_c906.h" + +/* + hpm: hardware performance monitor + note: Refer to the hpm sample program in the c906 user manual, Enable related status first. +*/ +struct csi_c906_hpm csi_c906_get_hw_perf() +{ + struct csi_c906_hpm tmp; + asm volatile( + "csrr %0, instret\n\t" + "csrr %1, cycle\n\t" + "csrr %2, hpmcounter3\n\t" + "csrr %3, hpmcounter4\n\t" + "csrr %4, hpmcounter13\n\t" + "csrr %5, hpmcounter14\n\t" + "csrr %6, hpmcounter15\n\t" + "csrr %7, hpmcounter16\n\t" + "csrr %8, hpmcounter17\n\t" + + : "=r"(tmp.inst), + "=r"(tmp.cycle), + "=r"(tmp.l1_icache_access), + "=r"(tmp.l1_icache_miss), + "=r"(tmp.store_inst), + "=r"(tmp.l1_dcache_raccess), + "=r"(tmp.l1_dcache_rmiss), + "=r"(tmp.l1_dcache_waccess), + "=r"(tmp.l1_dcache_wmiss) + : + : "memory"); + return tmp; +} + + +uint64_t csi_c906_get_inst() +{ + uint64_t inst = 0; + asm volatile("csrr %0, instret" + : "=r"(inst) + : + : "memory"); + // asm volatile("csrr %[inst], minstret" + // : [inst]"=r"(inst) + // : + // : "memory"); + return inst; +} + +uint64_t csi_c906_get_cycle() +{ + uint64_t a = 0; + asm volatile("csrr %0, cycle" + : "=r"(a) + : + : "memory"); + return a; +} + + +/* + index event counter + 0x1 L1 ICache Access Counter mhpmcounter3 + 0x2 L1 ICache Miss Counter mhpmcounter4 + 0x3 I-uTLB Miss Counter mhpmcounter5 + 0x4 D-uTLB Miss Counter mhpmcounter6 + 0x5 jTLB Miss Counter mhpmcounter7 + 0x6 Conditional Branch Mispredict Counter mhpmcounter8 + 0x7 Conditional Branch instruction counter mhpmcounter9 + 0x9 undefine mhpmcounter10-12 + 0xb Store Instruction Counter mhpmcounter13 + 0xc L1 DCache read access Counter mhpmcounter14 + 0xd L1 DCache read miss Counter mhpmcounter15 + 0xe L1 DCache write access Counter mhpmcounter16 + 0xf L1 DCache write miss Counter mhpmcounter17 + >=0x10 Reserve mhpmcounter18-31 +*/ + +uint64_t csi_c906_get_l1_icache_access() +{ + uint64_t a = 0; + asm volatile("csrr %0, hpmcounter3" + : "=r"(a) + : + : "memory"); + return a; +} + +uint64_t csi_c906_get_l1_icache_miss() +{ + uint64_t a = 0; + asm volatile("csrr %0, hpmcounter4" + : "=r"(a) + : + : "memory"); + return a; +} + +uint64_t csi_c906_get_cb_miss() +{ + uint64_t a = 0; + asm volatile("csrr %0, hpmcounter8" + : "=r"(a) + : + : "memory"); + return a; +} + +uint64_t csi_c906_get_cb_inst() +{ + uint64_t a = 0; + asm volatile("csrr %0, hpmcounter9" + : "=r"(a) + : + : "memory"); + return a; +} + +uint64_t csi_c906_get_store_inst() +{ + uint64_t a = 0; + asm volatile("csrr %0, hpmcounter13" + : "=r"(a) + : + : "memory"); + return a; +} + +uint64_t csi_c906_get_l1_dcache_raccess() +{ + uint64_t a = 0; + asm volatile("csrr %0, hpmcounter14" + : "=r"(a) + : + : "memory"); + return a; +} + +uint64_t csi_c906_get_l1_dcache_rmiss() +{ + uint64_t a = 0; + asm volatile("csrr %0, hpmcounter15" + : "=r"(a) + : + : "memory"); + return a; +} + +uint64_t csi_c906_get_l1_dcache_waccess() +{ + uint64_t a = 0; + asm volatile("csrr %0, hpmcounter16" + : "=r"(a) + : + : "memory"); + return a; +} + +uint64_t csi_c906_get_l1_dcache_wmiss() +{ + uint64_t a = 0; + asm volatile("csrr %0, hpmcounter17" + : "=r"(a) + : + : "memory"); + return a; +} diff --git a/source/c906_opt/layer_norm.c b/source/c906_opt/layer_norm.c new file mode 100644 index 00000000..81ef9b9d --- /dev/null +++ b/source/c906_opt/layer_norm.c @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include + +#include "csi_c906.h" +#include "csi_utils.h" + +int csi_c906_layer_norm_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *gamma, struct csi_tensor *beta, + struct layer_norm_params *params) +{ + int flatten_size = 0; + flatten_size *= input->dim[0] * input->dim[1] * input->dim[2]; + + __fp16 *sum = (__fp16 *)csi_mem_alloc(input->dim[1] * sizeof(__fp16)); + __fp16 *sum2 = (__fp16 *)csi_mem_alloc(input->dim[1] * sizeof(__fp16)); + __fp16 *input_data = input->data; + __fp16 *output_data = output->data; + __fp16 *gamma_data = gamma->data; + __fp16 *beta_data = beta->data; + + __fp16 *p_input_data = input_data; + __fp16 *p_output_data = output_data; + + size_t batch = input->dim[1]; + size_t output_depth = input->dim[2]; + for (int i = 0; i < batch; i++) { + vfloat16m2_t _sum = vfmv_v_f_f16m2(0.0f, 16); + for (int j = 0; j + 15 < output_depth; j += 16) { + vfloat16m2_t _input_data = vle16_v_f16m2(p_input_data, 16); + + _sum = vfadd_vv_f16m2(_sum, _input_data, 16); + p_input_data += 16; + } + + vfloat16m1_t _0_f = vfmv_v_f_f16m1(0.0f, 8); + vfloat16m1_t _sum2 = vfredosum_vs_f16m2_f16m1(_0_f, _sum, _0_f, 16); + __fp16 tmp = vfmv_f_s_f16m1_f16(_sum2); + tmp = tmp / output_depth; + sum[i] = tmp; + } + + p_input_data = input_data; + p_output_data = output_data; + for (int i = 0; i < batch; i++) { + vfloat32m4_t _sum_f32 = vfmv_v_f_f32m4(0.0f, 16); + vfloat16m2_t _sum = vfmv_v_f_f16m2(sum[i], 16); + + for (int j = 0; j + 15 < output_depth; j += 16) { + vfloat16m2_t _input_data = vle16_v_f16m2(p_input_data, 16); + _input_data = vfsub_vv_f16m2(_input_data, _sum, 16); + vse16_v_f16m2(p_output_data, _input_data, 16); + vfloat32m4_t _input_data_f32 = vfwmul_vv_f32m4(_input_data, _input_data, 16); + _sum_f32 = vfadd_vv_f32m4(_input_data_f32, _sum_f32, 16); + p_input_data += 16; + p_output_data += 16; + } + vfloat32m1_t _0_f = vfmv_v_f_f32m1(0.0f, 4); + vfloat32m1_t _sum2 = vfmv_v_f_f32m1(0.0f, 4); + _sum2 = vfredosum_vs_f32m4_f32m1(_0_f, _sum_f32, _0_f, 16); + float tmp = vfmv_f_s_f32m1_f32(_sum2); + tmp /= output_depth; + tmp = sqrtf(tmp); + sum2[i] = tmp; + } + + p_output_data = output_data; + for (int i = 0; i < batch; i++) { + __fp16 mul = 1.0f / sum2[i]; + vfloat16m2_t _sum = vfmv_v_f_f16m2(mul, 16); + for (int j = 0; j + 15 < output_depth; j += 16) { + vfloat16m2_t _output_data = vle16_v_f16m2(p_output_data, 16); + _output_data = vfmul_vv_f16m2(_output_data, _sum, 16); + vfloat16m2_t _gamma_data = vle16_v_f16m2(gamma_data + j, 16); + _output_data = vfmul_vv_f16m2(_output_data, _gamma_data, 16); + vfloat16m2_t _beta_data = vle16_v_f16m2(beta_data + j, 16); + _output_data = vfadd_vv_f16m2(_output_data, _beta_data, 16); + vse16_v_f16m2(p_output_data, _output_data, 16); + p_output_data += 16; + } + } + + csi_mem_free(sum); + csi_mem_free(sum2); + + return CSINN_TRUE; +} diff --git a/source/c906_opt/leaky_relu.c b/source/c906_opt/leaky_relu.c index 56611a25..e70f5e2f 100644 --- a/source/c906_opt/leaky_relu.c +++ b/source/c906_opt/leaky_relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" diff --git a/source/c906_opt/lrn.c b/source/c906_opt/lrn.c new file mode 100644 index 00000000..92a062ce --- /dev/null +++ b/source/c906_opt/lrn.c @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.13.x */ + +#include "csi_c906.h" + +int csi_c906_lrn_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct lrn_params *params) +{ + __fp16 *input_data = input->data; + __fp16 *output_data = output->data; + int inner_size = 1; + const int depth = input->dim[1]; + int half_range = params->range / 2; + + /* inner_size = H * W */ + inner_size = input->dim[2] * input->dim[3]; + + for (int j = 0; j < input->dim[0]; j++) { + for (int c = 0; c < depth; ++c) { + const int begin_input_c = csi_ref_max_internal_s32(0, c - half_range); + const int end_input_c = csi_ref_min_internal_s32(depth, c + half_range + 1); + for (int i = 0; i < inner_size; ++i) { + float accum = 0.f; + for (int input_c = begin_input_c; input_c < end_input_c; ++input_c) { + const float input_val = + input_data[j * depth * inner_size + input_c * inner_size + i]; + accum += input_val * input_val; + } + const float multiplier = + pow(params->bias + params->alpha * accum / params->range, -params->beta); + output_data[j * depth * inner_size + c * inner_size + i] = + input_data[j * depth * inner_size + c * inner_size + i] * multiplier; + } + } + } + return CSINN_TRUE; +} diff --git a/source/c906_opt/matmul.c b/source/c906_opt/matmul.c new file mode 100644 index 00000000..78b83ad4 --- /dev/null +++ b/source/c906_opt/matmul.c @@ -0,0 +1,215 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_c906.h" + +static void reorder_matrixa_n8_fp16(__fp16 *src, __fp16 *dst, int row, int col) +{ + int vl = vsetvl_e16m1(8); + int i = 0; + for (; i + 7 < row; i += 8) { + __fp16 *in_ptr = src + i * col; + for (int j = 0; j < col; j++) { + vfloat16m1_t _input = vlse16_v_f16m1(in_ptr, col * sizeof(__fp16), vl); + in_ptr++; + vse16_v_f16m1(dst, _input, vl); + dst += vl; + } + } + for (; i + 3 < row; i += 4) { + __fp16 *in_ptr0 = src + i * col; + __fp16 *in_ptr1 = in_ptr0 + col; + __fp16 *in_ptr2 = in_ptr1 + col; + __fp16 *in_ptr3 = in_ptr2 + col; + + int j = 0; + for (; j + 7 < col; j += 8) { + vfloat16m1_t _input0 = vle16_v_f16m1(in_ptr0, vl); + in_ptr0 += vl; + vfloat16m1_t _input1 = vle16_v_f16m1(in_ptr1, vl); + in_ptr1 += vl; + vfloat16m1_t _input2 = vle16_v_f16m1(in_ptr2, vl); + in_ptr2 += vl; + vfloat16m1_t _input3 = vle16_v_f16m1(in_ptr3, vl); + in_ptr3 += vl; + + vsse16_v_f16m1(dst, 4 * sizeof(__fp16), _input0, vl); + dst++; + vsse16_v_f16m1(dst, 4 * sizeof(__fp16), _input1, vl); + dst++; + vsse16_v_f16m1(dst, 4 * sizeof(__fp16), _input2, vl); + dst++; + vsse16_v_f16m1(dst, 4 * sizeof(__fp16), _input3, vl); + dst -= 3; + dst += 32; + } + // col tail + if (j < col) { + int col_tail = col & 7; + vl = vsetvl_e16m1(col_tail); + vfloat16m1_t _input0 = vle16_v_f16m1(in_ptr0, vl); + vfloat16m1_t _input1 = vle16_v_f16m1(in_ptr1, vl); + vfloat16m1_t _input2 = vle16_v_f16m1(in_ptr2, vl); + vfloat16m1_t _input3 = vle16_v_f16m1(in_ptr3, vl); + + vsse16_v_f16m1(dst, 4 * sizeof(__fp16), _input0, vl); + dst++; + vsse16_v_f16m1(dst, 4 * sizeof(__fp16), _input1, vl); + dst++; + vsse16_v_f16m1(dst, 4 * sizeof(__fp16), _input2, vl); + dst++; + vsse16_v_f16m1(dst, 4 * sizeof(__fp16), _input3, vl); + dst -= 3; + dst += 4 * col_tail; + } + } + for (; i + 1 < row; i += 2) { + __fp16 *in_ptr0 = src + i * col; + __fp16 *in_ptr1 = in_ptr0 + col; + vl = vsetvl_e16m1(8); + int j = 0; + for (; j + 7 < col; j += 8) { + vfloat16m1_t _input0 = vle16_v_f16m1(in_ptr0, vl); + in_ptr0 += vl; + vfloat16m1_t _input1 = vle16_v_f16m1(in_ptr1, vl); + in_ptr1 += vl; + + vsse16_v_f16m1(dst, 2 * sizeof(__fp16), _input0, vl); + dst++; + vsse16_v_f16m1(dst, 2 * sizeof(__fp16), _input1, vl); + dst--; + dst += 16; + } + // col tail + if (j < col) { + int col_tail = col & 7; + vl = vsetvl_e16m1(col_tail); + vfloat16m1_t _input0 = vle16_v_f16m1(in_ptr0, vl); + vfloat16m1_t _input1 = vle16_v_f16m1(in_ptr1, vl); + + vsse16_v_f16m1(dst, 2 * sizeof(__fp16), _input0, vl); + dst++; + vsse16_v_f16m1(dst, 2 * sizeof(__fp16), _input1, vl); + dst--; + dst += 2 * col_tail; + } + } + for (; i < row; i++) { + __fp16 *in_ptr0 = src + i * col; + vl = vsetvl_e16m1(8); + int j = 0; + for (; j + 7 < col; j += 8) { + vfloat16m1_t _input0 = vle16_v_f16m1(in_ptr0, vl); + in_ptr0 += vl; + vse16_v_f16m1(dst, _input0, vl); + dst += vl; + } + // col tail + if (j < col) { + int col_tail = col & 7; + vl = vsetvl_e16m1(col_tail); + vfloat16m1_t _input0 = vle16_v_f16m1(in_ptr0, vl); + in_ptr0 += vl; + vse16_v_f16m1(dst, _input0, vl); + dst += vl; + } + } +} + +static void reorder_matrixb_z8_fp16(__fp16 *src, __fp16 *dst, int row, int col) +{ + int vl = vsetvl_e16m1(8); + int i = 0; + for (; i + 7 < col; i += 8) { + __fp16 *in_ptr = src + i; + for (int j = 0; j < row; j++) { + vfloat16m1_t _input = vle16_v_f16m1(in_ptr, vl); + in_ptr += col; + vse16_v_f16m1(dst, _input, vl); + dst += vl; + } + } + for (; i < col; i++) { + __fp16 *in_ptr = src + i; + vl = vsetvl_e16m1(8); + int j = 0; + for (; j + 7 < row; j += 8) { + vfloat16m1_t _input0 = vlse16_v_f16m1(in_ptr, col * sizeof(__fp16), vl); + in_ptr += 8 * col; + vse16_v_f16m1(dst, _input0, vl); + dst += vl; + } + // col tail + if (j < row) { + vl = vsetvl_e16m1(row & 7); + vfloat16m1_t _input0 = vlse16_v_f16m1(in_ptr, col * sizeof(__fp16), vl); + vse16_v_f16m1(dst, _input0, vl); + dst += vl; + } + } +} + +int csi_c906_matmul_fp32(struct csi_tensor *mat0, struct csi_tensor *mat1, + struct csi_tensor *output, struct matmul_params *params) +{ + return CSINN_TRUE; +} + +int csi_c906_matmul_fp16(struct csi_tensor *mat0, struct csi_tensor *mat1, + struct csi_tensor *output, struct matmul_params *params) +{ + __fp16 *mat0_data = (__fp16 *)mat0->data; + __fp16 *mat1_data = (__fp16 *)mat1->data; + __fp16 *output_data = (__fp16 *)output->data; + + const int dims_count = mat0->dim_count; + int batches = 1; + + /* compute the outer size */ + for (int i = 0; i < dims_count - 2; i++) { + batches *= mat0->dim[i]; + } + + const int dim_m = mat0->dim[dims_count - (params->trans_a ? 1 : 2)]; + const int dim_k = mat0->dim[dims_count - (params->trans_a ? 2 : 1)]; + const int dim_n = mat1->dim[dims_count - (params->trans_b ? 2 : 1)]; + + if (!params->trans_a && !params->trans_b) { + __fp16 *in0 = (__fp16 *)csi_mem_alloc(dim_m * dim_k * sizeof(__fp16)); + __fp16 *in1 = (__fp16 *)csi_mem_alloc(dim_k * dim_n * sizeof(__fp16)); + + for (int b = 0; b < batches; b++) { + reorder_matrixa_n8_fp16(mat0_data, in0, dim_m, dim_k); + reorder_matrixb_z8_fp16(mat1_data, in1, dim_k, dim_n); + + csi_c906_sgemm_kernel_fp16(output_data, in0, in1, dim_m, dim_k, dim_n, dim_n, NULL); + + mat0_data += dim_m * dim_k; + mat1_data += dim_n * dim_k; + output_data += dim_m * dim_n; + } + csi_mem_free(in0); + csi_mem_free(in1); + } else { + csi_debug_error("Unsupport matrix transpose on C906\n"); + return CSINN_FALSE; + } + return CSINN_TRUE; +} \ No newline at end of file diff --git a/source/c906_opt/maxpool.c b/source/c906_opt/maxpool.c index 39c92379..c0445a75 100644 --- a/source/c906_opt/maxpool.c +++ b/source/c906_opt/maxpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" diff --git a/source/c906_opt/minimum.c b/source/c906_opt/minimum.c index 9203e484..0c44ea8d 100644 --- a/source/c906_opt/minimum.c +++ b/source/c906_opt/minimum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" diff --git a/source/c906_opt/mul.c b/source/c906_opt/mul.c index b4302739..921fd545 100644 --- a/source/c906_opt/mul.c +++ b/source/c906_opt/mul.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" @@ -62,6 +62,39 @@ int csi_c906_mul_f32(struct csi_tensor *input0, int in_size1 = csi_tensor_size(input1); int out_size = csi_tensor_size(output); + // HACK: special case: tensorflow densenet121 + // example: [1, 64, 55, 55] + [1, 64, 1, 1] = [1, 64, 55, 55] + if ((input1->dim[2] == 1) && (input1->dim[3] == 1) && (input1->dim[1] == input0->dim[1])) { + int inner_size = input0->dim[2] * input0->dim[3]; + int outer_size = input0->dim[1]; + asm volatile( + "1:\n\t" + "flw ft0, 0(%2)\n\t" + "mv t1, %4\n\t" + "2:\n\t" + "vsetvli t0, t1, e32, m2\n\t" + "vle.v v8, (%1)\n\t" + "sub t1, t1, t0\n\t" + "slli t0, t0, 2\n\t" // element: 4 bytes + "add %1, %1, t0\n\t" + "vfmul.vf v16, v8, ft0\n\t" + "vse.v v16, (%0)\n\t" + "add %0, %0, t0\n\t" + "bnez t1, 2b\n\t" + "addi %3, %3, -1\n\t" + "addi %2, %2, 4\n\t" + "bnez %3, 1b\n\t" + + : "=r"(output_data), // %0 + "=r"(input0_data), // %1 + "=r"(input1_data), // %2 + "=r"(outer_size), // %3 + "=r"(inner_size) // %4 + : "0"(output_data), "1"(input0_data), "2"(input1_data), "3"(outer_size), "4"(inner_size) + : "v8", "v9", "v16", "v17", "t0", "t1", "ft0"); + return CSINN_TRUE; + } + // example: [1, 3, 224, 224] + [1] = [1, 3, 224, 224] if (in_size1 == 1) { asm volatile( @@ -139,33 +172,28 @@ int csi_c906_mul_f32(struct csi_tensor *input0, return CSINN_TRUE; } - static void element_mul_fp16(__fp16 *input0, __fp16 *input1, __fp16 *output, int size) { asm volatile( - "1:\n\t" - "vsetvli t0, %3, e16, m2\n\t" - "vle.v v8, (%1)\n\t" - "sub %3, %3, t0\n\t" - "slli t0, t0, 1\n\t" // element: 2 bytes - "add %1, %1, t0\n\t" - "vle.v v12, (%2)\n\t" - "add %2, %2, t0\n\t" - "vfmul.vv v16, v8, v12\n\t" - "vse.v v16, (%0)\n\t" - "add %0, %0, t0\n\t" - "bnez %3, 1b\n\t" + "1:\n\t" + "vsetvli t0, %3, e16, m2\n\t" + "vle.v v8, (%1)\n\t" + "sub %3, %3, t0\n\t" + "slli t0, t0, 1\n\t" // element: 2 bytes + "add %1, %1, t0\n\t" + "vle.v v12, (%2)\n\t" + "add %2, %2, t0\n\t" + "vfmul.vv v16, v8, v12\n\t" + "vse.v v16, (%0)\n\t" + "add %0, %0, t0\n\t" + "bnez %3, 1b\n\t" - :"=r"(output), // %0 - "=r"(input0), // %1 - "=r"(input1), // %2 - "=r"(size) // %3 - :"0"(output), - "1"(input0), - "2"(input1), - "3"(size) - : "v8", "v9", "v12", "v13", "v16", "v17", "t0" - ); + : "=r"(output), // %0 + "=r"(input0), // %1 + "=r"(input1), // %2 + "=r"(size) // %3 + : "0"(output), "1"(input0), "2"(input1), "3"(size) + : "v8", "v9", "v12", "v13", "v16", "v17", "t0"); } int csi_c906_mul_fp16(struct csi_tensor *input0, @@ -181,6 +209,37 @@ int csi_c906_mul_fp16(struct csi_tensor *input0, int in_size1 = csi_tensor_size(input1); int out_size = csi_tensor_size(output); + if ((input1->dim[2] == 1) && (input1->dim[3] == 1) && (input1->dim[1] == input0->dim[1])) { + int inner_size = input0->dim[2] * input0->dim[3]; + int outer_size = input0->dim[1]; + asm volatile( + "1:\n\t" + "flh ft0, 0(%2)\n\t" + "mv t1, %4\n\t" + "2:\n\t" + "vsetvli t0, t1, e16, m2\n\t" + "vle.v v8, (%1)\n\t" + "sub t1, t1, t0\n\t" + "slli t0, t0, 1\n\t" + "add %1, %1, t0\n\t" + "vfmul.vf v16, v8, ft0\n\t" + "vse.v v16, (%0)\n\t" + "add %0, %0, t0\n\t" + "bnez t1, 2b\n\t" + "addi %3, %3, -1\n\t" + "addi %2, %2, 2\n\t" + "bnez %3, 1b\n\t" + + : "=r"(output_data), // %0 + "=r"(input0_data), // %1 + "=r"(input1_data), // %2 + "=r"(outer_size), // %3 + "=r"(inner_size) // %4 + : "0"(output_data), "1"(input0_data), "2"(input1_data), "3"(outer_size), "4"(inner_size) + : "v8", "v9", "v16", "v17", "t0", "t1", "ft0"); + return CSINN_TRUE; + } + if (in_size1 == 1) { asm volatile( "flh ft0, 0(%2)\n\t" diff --git a/source/c906_opt/pad.c b/source/c906_opt/pad.c index d11850d3..11a42ca3 100644 --- a/source/c906_opt/pad.c +++ b/source/c906_opt/pad.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" diff --git a/source/c906_opt/prelu.c b/source/c906_opt/prelu.c index 9b88630c..c36cefb0 100644 --- a/source/c906_opt/prelu.c +++ b/source/c906_opt/prelu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" diff --git a/source/c906_opt/relu.c b/source/c906_opt/relu.c index e3a9b5c9..34a26ca2 100644 --- a/source/c906_opt/relu.c +++ b/source/c906_opt/relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" diff --git a/source/c906_opt/relu1.c b/source/c906_opt/relu1.c index 75f2d06d..7dcacd64 100644 --- a/source/c906_opt/relu1.c +++ b/source/c906_opt/relu1.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" diff --git a/source/c906_opt/relu6.c b/source/c906_opt/relu6.c index 44b82fcc..8c12e2e4 100644 --- a/source/c906_opt/relu6.c +++ b/source/c906_opt/relu6.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" diff --git a/source/c906_opt/reshape.c b/source/c906_opt/reshape.c new file mode 100644 index 00000000..10d63d50 --- /dev/null +++ b/source/c906_opt/reshape.c @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_c906.h" + +int csi_c906_reshape_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct reshape_params *params) +{ + float *input_data = input->data; + float *output_data = output->data; + int size = csi_tensor_byte_size(input); + if (input_data != output_data) { + csi_c906_memcpy(output_data, input_data, size); + } + return CSINN_TRUE; +} diff --git a/source/c906_opt/setup.c b/source/c906_opt/setup.c index 34f99790..ed964abf 100644 --- a/source/c906_opt/setup.c +++ b/source/c906_opt/setup.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,361 +16,438 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" -void *csi_init_map_c906(int op, int dtype) -{ - if (op == CSINN_OP_CONV2D || op == CSINN_OP_GROUP_CONV2D) { - return csi_c906_conv2d_init; - } else if (op == CSINN_OP_MAXPOOL2D) { - return csi_c906_maxpool2d_init; - } else if(op == CSINN_OP_AVGPOOL2D) { - return csi_c906_avgpool2d_init; - } else if (op == CSINN_OP_DEPTHWISE_CONV2D) { - return csi_c906_depthwise_conv2d_init; - } else if (op == CSINN_OP_CONV2D_RELU) { - return csi_c906_conv2d_relu_init; - } else if (op == CSINN_OP_DEPTHWISE_CONV2D_RELU) { - return csi_c906_depthwise_conv2d_relu_init; - } else if (op == CSINN_OP_FULLYCONNECTED) { - return csi_c906_fullyconnected_init; - } +static struct csi_bc_op_list csi_nn_c906_init_bc_op_list; +static struct csi_bc_op_list csi_nn_c906_func_bc_op_list; - return NULL; +int csi_nn_c906_register_op_init(enum csinn_dtype_enum dtype, enum csinn_op_enum op_name, void *bc) +{ + struct csi_bc_op_list *list_end = csi_bc_list_end(&csi_nn_c906_init_bc_op_list); + struct csi_bc_op_list *next = csi_mem_alloc(sizeof(struct csi_bc_op_list)); + next->bc = bc; + next->dtype = dtype; + next->op_name = op_name; + list_end->next = next; + return CSINN_TRUE; } -static void *setup_bc_map() +int csi_nn_c906_register_op(enum csinn_dtype_enum dtype, enum csinn_op_enum op_name, void *bc) { - static void* bc_map[CSINN_OP_AND_UTILS_SIZE][2]; - - bc_map[CSINN_OP_ABS][0] = csi_c906_abs_fp16; - bc_map[CSINN_OP_ACOS][0] = csi_ref_acos_quant; - bc_map[CSINN_OP_ACOSH][0] = csi_ref_acosh_quant; - bc_map[CSINN_OP_ADD][0] = csi_c906_add_fp16; - bc_map[CSINN_OP_AND][0] = csi_ref_and_i8; - bc_map[CSINN_OP_ARANGE][0] = csi_ref_arange_quant; - bc_map[CSINN_OP_ARGMAX][0] = csi_ref_argmax_stride_quant; - bc_map[CSINN_OP_ARGMIN][0] = csi_ref_argmin_stride_quant; - bc_map[CSINN_OP_ASIN][0] = csi_ref_asin_quant; - bc_map[CSINN_OP_ASINH][0] = csi_ref_asinh_quant; - bc_map[CSINN_OP_ATAN][0] = csi_ref_atan_quant; - bc_map[CSINN_OP_ATANH][0] = csi_ref_atanh_quant; - bc_map[CSINN_OP_AVGPOOL2D][0] = csi_ref_avgpool2d_quant; - bc_map[CSINN_OP_AVGPOOL3D][0] = csi_ref_avgpool3d_quant; - bc_map[CSINN_OP_BN][0] = csi_ref_batch_normalization_quant; - bc_map[CSINN_OP_BATCH_TO_SPACE][0] = csi_ref_batch_to_space_quant; - bc_map[CSINN_OP_BROADCOST][0] = csi_ref_broadcast_to_quant; - bc_map[CSINN_OP_CEIL][0] = csi_ref_ceil_quant; - bc_map[CSINN_OP_CLIP][0] = csi_c906_clip_fp16; - bc_map[CSINN_OP_CONCAT][0] = csi_c906_concat_fp16; - bc_map[CSINN_OP_CONV2D][0] = csi_ref_conv2d_quant; - bc_map[CSINN_OP_CONV2D_RELU][0] = csi_ref_conv2d_relu_quant; - bc_map[CSINN_OP_CONV2D_RELU6][0] = csi_ref_conv2d_relu6_quant; - bc_map[CSINN_OP_DEPTHWISE_CONV2D][0] = csi_ref_depthwise_conv2d_quant; - bc_map[CSINN_OP_DEPTHWISE_CONV2D_RELU][0] = csi_ref_depthwise_conv2d_relu_quant; - bc_map[CSINN_OP_DEPTHWISE_CONV2D_RELU6][0] = csi_ref_depthwise_conv2d_relu6_quant; - bc_map[CSINN_OP_GROUP_CONV2D][0] = csi_ref_group_conv2d_quant; - bc_map[CSINN_OP_CONV3D][0] = csi_ref_conv3d_quant; - bc_map[CSINN_OP_DECONV2D][0] = csi_ref_deconv2d_quant; - bc_map[CSINN_OP_DEPTHWISE_DECONV2D][0] = csi_ref_depthwise_deconv2d_quant; - bc_map[CSINN_OP_DECONV3D][0] = csi_ref_deconv3d_quant; - bc_map[CSINN_OP_COS][0] = csi_ref_cos_quant; - bc_map[CSINN_OP_COSH][0] = csi_ref_cosh_quant; - bc_map[CSINN_OP_CUMPROD][0] = csi_ref_cumprod_quant; - bc_map[CSINN_OP_CUMSUM][0] = csi_ref_cumsum_quant; - bc_map[CSINN_OP_DEPTH_TO_SPACE][0] = csi_ref_depth_to_space_quant; - bc_map[CSINN_OP_DIV][0] = csi_ref_div_quant; - bc_map[CSINN_OP_ELU][0] = csi_ref_elu_quant; - bc_map[CSINN_OP_EQUANL][0] = csi_ref_equal_quant; - bc_map[CSINN_OP_ERF][0] = csi_ref_erf_quant; - bc_map[CSINN_OP_EXP][0] = csi_ref_exp_quant; - bc_map[CSINN_OP_EXPAND_DIMS][0] = csi_ref_expand_dims_quant; - bc_map[CSINN_OP_EXPM1][0] = csi_ref_expm1_quant; - bc_map[CSINN_OP_FLATTEN][0] = csi_ref_flatten; - bc_map[CSINN_OP_FLOOR_DIVIDE][0] = csi_ref_floor_divide_quant; - bc_map[CSINN_OP_FLOOR_MOD][0] = csi_ref_floor_mod_quant; - bc_map[CSINN_OP_FLOOR][0] = csi_ref_floor_quant; - bc_map[CSINN_OP_FSMN][0] = csi_ref_fsmn_quant; - bc_map[CSINN_OP_FULLYCONNECTED][0] = csi_c906_fullyconnected_fp16; - bc_map[CSINN_OP_GATHER_ND][0] = csi_ref_gather_nd_quant; - bc_map[CSINN_OP_GATHER][0] = csi_ref_gather_quant; - bc_map[CSINN_OP_GLOBAL_AVGPOOL2D][0] = csi_ref_global_avgpool2d_quant; - bc_map[CSINN_OP_GLOBAL_MAXPOOL2D][0] = csi_ref_global_maxpool2d_quant; - bc_map[CSINN_OP_GREATHER_EQUAL][0] = csi_ref_greater_equal_quant; - bc_map[CSINN_OP_GREATHER][0] = csi_ref_greater_quant; - bc_map[CSINN_OP_HARD_SIGMOID][0] = csi_ref_hard_sigmoid_quant; - bc_map[CSINN_OP_IM2COL][0] = csi_ref_im2col_quant; - bc_map[CSINN_OP_L2N][0] = csi_ref_l2_normalization_quant; - bc_map[CSINN_OP_LEAKY_RELU][0] = csi_c906_leaky_relu_fp16; - bc_map[CSINN_OP_LESS_EQUAL][0] = csi_ref_less_equal_quant; - bc_map[CSINN_OP_LESS][0] = csi_ref_less_quant; - bc_map[CSINN_OP_LOG_SOFTMAX][0] = csi_ref_log_softmax_quant; - bc_map[CSINN_OP_LOG][0] = csi_ref_log_quant; - bc_map[CSINN_OP_LOG1P][0] = csi_ref_log1p_quant; - bc_map[CSINN_OP_LOGICAL_AND][0] = csi_ref_logical_and_quant; - bc_map[CSINN_OP_LOGICAL_NOT][0] = csi_ref_logical_not_quant; - bc_map[CSINN_OP_LOGICAL_OR][0] = csi_ref_logical_or_quant; - bc_map[CSINN_OP_LOGICAL_XOR][0] = csi_ref_logical_xor_quant; - bc_map[CSINN_OP_LRN][0] = csi_ref_lrn_quant; - bc_map[CSINN_OP_MATMUL][0] = csi_ref_matmul_quant; - bc_map[CSINN_OP_MAX][0] = csi_ref_max_stride_quant; - bc_map[CSINN_OP_MAXIMUM][0] = csi_ref_maximum_quant; - bc_map[CSINN_OP_MAXPOOL2D][0] = csi_ref_maxpool2d_quant; - bc_map[CSINN_OP_MAXPOOL2D_LOCAT][0] = csi_ref_maxpool2d_locat_quant; - bc_map[CSINN_OP_MAXPOOL3D][0] = csi_ref_maxpool3d_quant; - bc_map[CSINN_OP_MEAN][0] = csi_ref_mean_stride_quant; - bc_map[CSINN_OP_MEAN_STRIDE][0] = csi_ref_mean_stride_quant; - bc_map[CSINN_OP_MIN][0] = csi_ref_min_stride_quant; - bc_map[CSINN_OP_MINIMUM][0] = csi_c906_minimum_fp16; - bc_map[CSINN_OP_MOD][0] = csi_ref_mod_quant; - bc_map[CSINN_OP_MUL][0] = csi_c906_mul_fp16; - bc_map[CSINN_OP_NDARRAY_SIZE][0] = csi_ref_ndarray_size_i8; - bc_map[CSINN_OP_NEGATIIVE][0] = csi_ref_negative_quant; - bc_map[CSINN_OP_NOT_EQUAL][0] = csi_ref_not_equal_quant; - bc_map[CSINN_OP_NOT][0] = csi_ref_not_i8; - bc_map[CSINN_OP_OR][0] = csi_ref_or_i8; - bc_map[CSINN_OP_PAD][0] = csi_ref_pad_quant; - bc_map[CSINN_OP_POWER][0] = csi_ref_power_quant; - bc_map[CSINN_OP_PRELU][0] = csi_c906_prelu_fp16; - bc_map[CSINN_OP_PROD][0] = csi_ref_prod_stride_quant; - bc_map[CSINN_OP_PROPOSAL][0] = csi_ref_proposal_quant; - bc_map[CSINN_OP_PSROIPOOLING][0] = csi_ref_psroipooling_quant; - bc_map[CSINN_OP_REDUCE_LOGSUMEXP][0] = csi_ref_reduce_logsumexp_quant; - bc_map[CSINN_OP_REDUCE_MAX][0] = csi_ref_reduce_max_quant; - bc_map[CSINN_OP_REDUCE_MEAN][0] = csi_ref_reduce_mean_quant; - bc_map[CSINN_OP_REDUCE_MIN][0] = csi_ref_reduce_min_quant; - bc_map[CSINN_OP_REDUCE_PROD][0] = csi_ref_reduce_prod_quant; - bc_map[CSINN_OP_REDUCE_SUM][0] = csi_ref_reduce_sum_quant; - bc_map[CSINN_OP_RELU][0] = csi_c906_relu_fp16; - bc_map[CSINN_OP_RELU1][0] = csi_c906_relu1_fp16; - bc_map[CSINN_OP_RELU6][0] = csi_c906_relu6_fp16; - bc_map[CSINN_OP_RELUN][0] = csi_ref_relun_quant; - bc_map[CSINN_OP_RESHAPE][0] = csi_ref_reshape; - bc_map[CSINN_OP_RESIZE][0] = csi_ref_resize_quant; - bc_map[CSINN_OP_REVERSE][0] = csi_ref_reverse_quant; - bc_map[CSINN_OP_ROIPOOL][0] = csi_ref_roipool_quant; - bc_map[CSINN_OP_ROUND][0] = csi_ref_round_quant; - bc_map[CSINN_OP_RSQRT][0] = csi_ref_rsqrt_quant; - bc_map[CSINN_OP_SCATTER_ND][0] = csi_ref_scatter_nd_quant; - bc_map[CSINN_OP_SEGMENT_MAX][0] = csi_ref_segment_max_quant; - bc_map[CSINN_OP_UNSORTED_SEGMENT_MAX][0] = csi_ref_unsorted_segment_max_quant; - bc_map[CSINN_OP_SEGMENT_MEAN][0] = csi_ref_segment_mean_quant; - bc_map[CSINN_OP_UNSORTED_SEGMENT_MEAN][0] = csi_ref_unsorted_segment_mean_quant; - bc_map[CSINN_OP_SEGMENT_MIN][0] = csi_ref_segment_min_quant; - bc_map[CSINN_OP_UNSORTED_SEGMENT_MIN][0] = csi_ref_unsorted_segment_min_quant; - bc_map[CSINN_OP_SEGMENT_PROD][0] = csi_ref_segment_prod_quant; - bc_map[CSINN_OP_UNSORTED_SEGMENT_PROD][0] = csi_ref_unsorted_segment_prod_quant; - bc_map[CSINN_OP_SEGMENT_SUM][0] = csi_ref_segment_sum_quant; - bc_map[CSINN_OP_UNSORTED_SEGMENT_SUM][0] = csi_ref_unsorted_segment_sum_quant; - bc_map[CSINN_OP_SELECT][0] = csi_ref_select_i8; - bc_map[CSINN_OP_SHAPE][0] = csi_ref_shape_i8; - bc_map[CSINN_OP_SHUFFLE_CHANNEL][0] = csi_ref_shuffle_channel_quant; - bc_map[CSINN_OP_SIGMOID][0] = csi_ref_sigmoid_quant; - bc_map[CSINN_OP_SIGN][0] = csi_ref_sign_quant; - bc_map[CSINN_OP_SIN][0] = csi_ref_sin_quant; - bc_map[CSINN_OP_SINH][0] = csi_ref_sinh_quant; - bc_map[CSINN_OP_SLICE][0] = csi_ref_slice_quant; - bc_map[CSINN_OP_SOFTMAX][0] = csi_ref_softmax_quant; - bc_map[CSINN_OP_SOFTPLUS][0] = csi_ref_softplus_quant; - bc_map[CSINN_OP_SOFTRELU][0] = csi_ref_softrelu_quant; - bc_map[CSINN_OP_SOFTSIGN][0] = csi_ref_softsign_quant; - bc_map[CSINN_OP_SPACE_TO_BATCH][0] = csi_ref_space_to_batch_quant; - bc_map[CSINN_OP_SPACE_TO_DEPTH][0] = csi_ref_space_to_depth_quant; - bc_map[CSINN_OP_SPLIT][0] = csi_c906_split_fp16; - bc_map[CSINN_OP_SQRT][0] = csi_ref_sqrt_quant; - bc_map[CSINN_OP_SQUEEZE][0] = csi_ref_squeeze; - bc_map[CSINN_OP_STACK][0] = csi_ref_stack_quant; - bc_map[CSINN_OP_STRIDED_SLICE][0] = csi_ref_strided_slice_quant; - bc_map[CSINN_OP_SUB][0] = csi_c906_sub_fp16; - bc_map[CSINN_OP_SUM][0] = csi_ref_sum_stride_quant; - bc_map[CSINN_OP_TAN][0] = csi_ref_tan_quant; - bc_map[CSINN_OP_TANH][0] = csi_ref_tanh_quant; - bc_map[CSINN_OP_THRESHOLD_RELU][0] = csi_ref_threshold_relu_quant; - bc_map[CSINN_OP_TILE][0] = csi_ref_tile_quant; - bc_map[CSINN_OP_TOPK][0] = csi_ref_topk_quant; - bc_map[CSINN_OP_TRUNC][0] = csi_ref_trunc_quant; - bc_map[CSINN_OP_TRANSPOSE][0] = csi_ref_transpose_requant; - bc_map[CSINN_OP_TRUNC][0] = csi_ref_trunc_quant; - bc_map[CSINN_OP_UNPOOLING][0] = csi_ref_unpooling_quant; - bc_map[CSINN_OP_UNSTACK][0] = csi_ref_unstack_qunat; - bc_map[CSINN_OP_XOR][0] = csi_ref_xor_i8; - bc_map[CSINN_OP_YUV_RGB_SCALE][0] = csi_ref_yuv_rgb_scale_quant; + struct csi_bc_op_list *list_end = csi_bc_list_end(&csi_nn_c906_func_bc_op_list); + struct csi_bc_op_list *next = csi_mem_alloc(sizeof(struct csi_bc_op_list)); + next->bc = bc; + next->dtype = dtype; + next->op_name = op_name; + list_end->next = next; + return CSINN_TRUE; +} - bc_map[CSINN_OP_ABS][1] = csi_c906_abs_f32; - bc_map[CSINN_OP_ACOS][1] = csi_ref_acos_f32; - bc_map[CSINN_OP_ACOSH][1] = csi_ref_acosh_f32; - bc_map[CSINN_OP_ADD][1] = csi_c906_add_f32; - bc_map[CSINN_OP_ARANGE][1] = csi_ref_arange_f32; - bc_map[CSINN_OP_ARGMAX][1] = csi_ref_argmax_stride_i32_f32; - bc_map[CSINN_OP_ARGMIN][1] = csi_ref_argmin_stride_i32_f32; - bc_map[CSINN_OP_ASIN][1] = csi_ref_asin_f32; - bc_map[CSINN_OP_ASINH][1] = csi_ref_asinh_f32; - bc_map[CSINN_OP_ATAN][1] = csi_ref_atan_f32; - bc_map[CSINN_OP_ATANH][1] = csi_ref_atanh_f32; - bc_map[CSINN_OP_AVGPOOL2D][1] = csi_ref_avgpool2d_f32; - bc_map[CSINN_OP_AVGPOOL3D][1] = csi_ref_avgpool3d_f32; - bc_map[CSINN_OP_BN][1] = csi_ref_batch_normalization_f32; - bc_map[CSINN_OP_BATCH_TO_SPACE][1] = csi_ref_batch_to_space_f32; - bc_map[CSINN_OP_BROADCOST][1] = csi_ref_broadcast_to_f32; - bc_map[CSINN_OP_CEIL][1] = csi_ref_ceil_f32; - bc_map[CSINN_OP_CLIP][1] = csi_c906_clip_f32; - bc_map[CSINN_OP_COL2IM][1] = csi_ref_col2im_f32; - bc_map[CSINN_OP_CONCAT][1] = csi_c906_concat_f32; - bc_map[CSINN_OP_CONV2D][1] = csi_ref_conv2d_f32; - bc_map[CSINN_OP_CONV2D_RELU][1] = csi_ref_conv2d_relu_f32; - bc_map[CSINN_OP_DEPTHWISE_CONV2D][1] = csi_ref_depthwise_conv2d_f32; - bc_map[CSINN_OP_DEPTHWISE_CONV2D_RELU][1] = csi_ref_depthwise_conv2d_relu_f32; - bc_map[CSINN_OP_GROUP_CONV2D][1] = csi_ref_group_conv2d_f32; - bc_map[CSINN_OP_CONV3D][1] = csi_ref_conv3d_f32; - bc_map[CSINN_OP_DECONV2D][1] = csi_ref_deconv2d_f32; - bc_map[CSINN_OP_DEPTHWISE_DECONV2D][1] = csi_ref_depthwise_deconv2d_f32; - bc_map[CSINN_OP_DECONV3D][1] = csi_ref_deconv3d_f32; - bc_map[CSINN_OP_COS][1] = csi_ref_cos_f32; - bc_map[CSINN_OP_COSH][1] = csi_ref_cosh_f32; - bc_map[CSINN_OP_CUMPROD][1] = csi_ref_cumprod_f32; - bc_map[CSINN_OP_CUMSUM][1] = csi_ref_cumsum_f32; - bc_map[CSINN_OP_DEPTH_TO_SPACE][1] = csi_ref_depth_to_space_f32; - bc_map[CSINN_OP_DIV][1] = csi_ref_div_f32; - bc_map[CSINN_OP_ELU][1] = csi_ref_elu_f32; - bc_map[CSINN_OP_EQUANL][1] = csi_ref_equal_f32; - bc_map[CSINN_OP_ERF][1] = csi_ref_erf_f32; - bc_map[CSINN_OP_EXP][1] = csi_ref_exp_f32; - bc_map[CSINN_OP_EXPAND_DIMS][1] = csi_ref_expand_dims_f32; - bc_map[CSINN_OP_EXPM1][1] = csi_ref_expm1_f32; - bc_map[CSINN_OP_FLATTEN][1] = csi_ref_flatten; - bc_map[CSINN_OP_FLOOR_DIVIDE][1] = csi_ref_floor_divide_f32; - bc_map[CSINN_OP_FLOOR_MOD][1] = csi_ref_floor_mod_f32; - bc_map[CSINN_OP_FLOOR][1] = csi_ref_floor_f32; - bc_map[CSINN_OP_FSMN][1] = csi_ref_fsmn_f32; - bc_map[CSINN_OP_FULLYCONNECTED][1] = csi_c906_fullyconnected_f32; - bc_map[CSINN_OP_GATHER_ND][1] = csi_ref_gather_nd_f32; - bc_map[CSINN_OP_GATHER][1] = csi_ref_gather_f32; - bc_map[CSINN_OP_GLOBAL_AVGPOOL2D][1] = csi_c906_global_avgpool2d_f32; - bc_map[CSINN_OP_GLOBAL_MAXPOOL2D][1] = csi_c906_global_maxpool2d_f32; - bc_map[CSINN_OP_GREATHER_EQUAL][1] = csi_ref_greater_equal_f32; - bc_map[CSINN_OP_GREATHER][1] = csi_ref_greater_f32; - bc_map[CSINN_OP_HARD_SIGMOID][1] = csi_ref_hard_sigmoid_f32; - bc_map[CSINN_OP_IM2COL][1] = csi_ref_im2col_f32; - bc_map[CSINN_OP_ISNAN][1] = csi_ref_isnan_bool_f32; - bc_map[CSINN_OP_L2N][1] = csi_ref_l2_normalization_f32; - bc_map[CSINN_OP_L2POOL2D][1] = csi_ref_l2pool_f32; - bc_map[CSINN_OP_LEAKY_RELU][1] = csi_c906_leaky_relu_f32; - bc_map[CSINN_OP_LESS_EQUAL][1] = csi_ref_less_equal_f32; - bc_map[CSINN_OP_LESS][1] = csi_ref_less_f32; - bc_map[CSINN_OP_LOG_SOFTMAX][1] = csi_ref_log_softmax_f32; - bc_map[CSINN_OP_LOG][1] = csi_ref_log_f32; - bc_map[CSINN_OP_LOG1P][1] = csi_ref_log1p_f32; - bc_map[CSINN_OP_LOGICAL_AND][1] = csi_ref_logical_and_f32; - bc_map[CSINN_OP_LOGICAL_NOT][1] = csi_ref_logical_not_f32; - bc_map[CSINN_OP_LOGICAL_OR][1] = csi_ref_logical_or_f32; - bc_map[CSINN_OP_LOGICAL_XOR][1] = csi_ref_logical_xor_f32; - bc_map[CSINN_OP_LRN][1] = csi_ref_lrn_f32; - bc_map[CSINN_OP_MATMUL][1] = csi_ref_matmul_f32; - bc_map[CSINN_OP_MAX][1] = csi_ref_max_stride_f32; - bc_map[CSINN_OP_MAXIMUM][1] = csi_ref_maximum_f32; - bc_map[CSINN_OP_MAXPOOL2D][1] = csi_ref_maxpool2d_f32; - bc_map[CSINN_OP_MAXPOOL2D_LOCAT][1] = csi_ref_maxpool2d_locat_f32; - bc_map[CSINN_OP_MAXPOOL3D][1] = csi_ref_maxpool3d_f32; - bc_map[CSINN_OP_MEAN][1] = csi_ref_mean_stride_f32; - bc_map[CSINN_OP_MEAN_STRIDE][1] = csi_ref_mean_stride_f32; - bc_map[CSINN_OP_MINIMUM][1] = csi_c906_minimum_f32; - bc_map[CSINN_OP_MOD][1] = csi_ref_mod_f32; - bc_map[CSINN_OP_MUL][1] = csi_c906_mul_f32; - bc_map[CSINN_OP_NDARRAY_SIZE][1] = csi_ref_ndarray_size_f32; - bc_map[CSINN_OP_NEGATIIVE][1] = csi_ref_negative_f32; - bc_map[CSINN_OP_NOT_EQUAL][1] = csi_ref_not_equal_f32; - bc_map[CSINN_OP_PAD][1] = csi_ref_pad_f32; - bc_map[CSINN_OP_POWER][1] = csi_ref_power_f32; - bc_map[CSINN_OP_PRELU][1] = csi_c906_prelu_f32; - bc_map[CSINN_OP_PROD][1] = csi_ref_prod_stride_f32; - bc_map[CSINN_OP_PROPOSAL][1] = csi_ref_proposal_f32; - bc_map[CSINN_OP_PSROIPOOLING][1] = csi_ref_psroipooling_f32; - bc_map[CSINN_OP_REDUCE_LOGSUMEXP][1] = csi_ref_reduce_logsumexp_f32; - bc_map[CSINN_OP_REDUCE_MAX][1] = csi_ref_reduce_max_f32; - bc_map[CSINN_OP_REDUCE_MEAN][1] = csi_ref_reduce_mean_f32; - bc_map[CSINN_OP_REDUCE_MIN][1] = csi_ref_reduce_min_f32; - bc_map[CSINN_OP_REDUCE_PROD][1] = csi_ref_reduce_prod_f32; - bc_map[CSINN_OP_REDUCE_SUM][1] = csi_ref_reduce_sum_f32; - bc_map[CSINN_OP_RELU][1] = csi_c906_relu_f32; - bc_map[CSINN_OP_RELU1][1] = csi_c906_relu1_f32; - bc_map[CSINN_OP_RELU6][1] = csi_c906_relu6_f32; - bc_map[CSINN_OP_RELUN][1] = csi_ref_relun_f32; - bc_map[CSINN_OP_RESHAPE][1] = csi_ref_reshape; - bc_map[CSINN_OP_RESIZE][1] = csi_ref_resize_f32; - bc_map[CSINN_OP_REVERSE][1] = csi_ref_reverse_f32; - bc_map[CSINN_OP_ROIALIGN][1] = csi_ref_roi_align_f32; - bc_map[CSINN_OP_ROIPOOL][1] = csi_ref_roipool_f32; - bc_map[CSINN_OP_ROUND][1] = csi_ref_round_f32; - bc_map[CSINN_OP_RSQRT][1] = csi_ref_rsqrt_f32; - bc_map[CSINN_OP_SCATTER_ND][1] = csi_ref_scatter_nd_f32; - bc_map[CSINN_OP_SEGMENT_MAX][1] = csi_ref_segment_max_f32; - bc_map[CSINN_OP_UNSORTED_SEGMENT_MAX][1] = csi_ref_unsorted_segment_max_f32; - bc_map[CSINN_OP_SEGMENT_MEAN][1] = csi_ref_segment_mean_f32; - bc_map[CSINN_OP_UNSORTED_SEGMENT_MEAN][1] = csi_ref_unsorted_segment_mean_f32; - bc_map[CSINN_OP_SEGMENT_MIN][1] = csi_ref_segment_min_f32; - bc_map[CSINN_OP_UNSORTED_SEGMENT_MIN][1] = csi_ref_unsorted_segment_min_f32; - bc_map[CSINN_OP_SEGMENT_PROD][1] = csi_ref_segment_prod_f32; - bc_map[CSINN_OP_UNSORTED_SEGMENT_PROD][1] = csi_ref_unsorted_segment_prod_f32; - bc_map[CSINN_OP_SEGMENT_SUM][1] = csi_ref_segment_sum_f32; - bc_map[CSINN_OP_UNSORTED_SEGMENT_SUM][1] = csi_ref_unsorted_segment_sum_f32; - bc_map[CSINN_OP_SELECT][1] = csi_ref_select_f32; - bc_map[CSINN_OP_SHUFFLE_CHANNEL][1] = csi_ref_shuffle_channel_f32; - bc_map[CSINN_OP_SIGMOID][1] = csi_ref_sigmoid_f32; - bc_map[CSINN_OP_SIGN][1] = csi_ref_sign_f32; - bc_map[CSINN_OP_SIN][1] = csi_ref_sin_f32; - bc_map[CSINN_OP_SINH][1] = csi_ref_sinh_f32; - bc_map[CSINN_OP_SLICE][1] = csi_ref_slice_f32; - bc_map[CSINN_OP_SOFTMAX][1] = csi_ref_softmax_f32; - bc_map[CSINN_OP_SOFTPLUS][1] = csi_ref_softplus_f32; - bc_map[CSINN_OP_SOFTRELU][1] = csi_ref_softrelu_f32; - bc_map[CSINN_OP_SOFTSIGN][1] = csi_ref_softsign_f32; - bc_map[CSINN_OP_SPACE_TO_BATCH][1] = csi_ref_space_to_batch_f32; - bc_map[CSINN_OP_SPACE_TO_DEPTH][1] = csi_ref_space_to_depth_f32; - bc_map[CSINN_OP_SPLIT][1] = csi_c906_split_f32; - bc_map[CSINN_OP_SQRT][1] = csi_ref_sqrt_f32; - bc_map[CSINN_OP_SQUARE][1] = csi_ref_square_f32; - bc_map[CSINN_OP_SQUEEZE][1] = csi_ref_squeeze; - bc_map[CSINN_OP_STACK][1] = csi_ref_stack_f32; - bc_map[CSINN_OP_STRIDED_SLICE][1] = csi_ref_strided_slice_f32; - bc_map[CSINN_OP_SUB][1] = csi_c906_sub_f32; - bc_map[CSINN_OP_SUM][1] = csi_ref_sum_stride_f32; - bc_map[CSINN_OP_TAN][1] = csi_ref_tan_f32; - bc_map[CSINN_OP_TANH][1] = csi_ref_tanh_f32; - bc_map[CSINN_OP_THRESHOLD_RELU][1] = csi_ref_threshold_relu_f32; - bc_map[CSINN_OP_TILE][1] = csi_ref_tile_f32; - bc_map[CSINN_OP_TOPK][1] = csi_ref_topk_f32; - bc_map[CSINN_OP_TRUNC][1] = csi_ref_trunc_f32; - bc_map[CSINN_OP_TRANSPOSE][1] = csi_ref_transpose; - bc_map[CSINN_OP_TRUNC][1] = csi_ref_trunc_f32; - bc_map[CSINN_OP_UNPOOLING][1] = csi_ref_unpooling_f32; - bc_map[CSINN_OP_UNSTACK][1] = csi_ref_unstack_f32; - bc_map[CSINN_OP_YUV_RGB_SCALE][1] = csi_ref_yuv_rgb_scale_f32; +static inline void register_op_init_all(enum csinn_op_enum op_name, void *bc) +{ + csi_nn_c906_register_op_init(CSINN_DTYPE_FLOAT16, op_name, bc); + csi_nn_c906_register_op_init(CSINN_DTYPE_FLOAT32, op_name, bc); +} - return bc_map; +void __attribute__((weak)) csi_nn_c906_bc_init_reg() +{ + register_op_init_all(CSINN_OP_CONV2D, csi_c906_conv2d_init); + register_op_init_all(CSINN_OP_GROUP_CONV2D, csi_c906_conv2d_init); + register_op_init_all(CSINN_OP_CONV1D, csi_c906_conv1d_init); + register_op_init_all(CSINN_OP_MAXPOOL2D, csi_c906_maxpool2d_init); + register_op_init_all(CSINN_OP_AVGPOOL2D, csi_c906_avgpool2d_init); + register_op_init_all(CSINN_OP_DEPTHWISE_CONV2D, csi_c906_depthwise_conv2d_init); + register_op_init_all(CSINN_OP_FULLYCONNECTED, csi_c906_fullyconnected_init); + register_op_init_all(CSINN_OP_CACHE_MATMUL, csi_c906_cache_matmul_init); + register_op_init_all(CSINN_OP_DIV, csi_c906_div_init); + register_op_init_all(CSINN_OP_CACHE_CONV1D, csi_c906_cache_conv1d_init); } -static int get_bc_map_index(int op, int dtype) +void *csi_init_map_c906(int op, int dtype) { - switch (dtype) { - case CSINN_DTYPE_FLOAT16: - return op * 2; - break; - case CSINN_DTYPE_FLOAT32: - return op * 2 + 1; - break; - default: - return CSINN_UNSUPPORT_DTYPE; + static int has_reg; + if (has_reg == 0) { + csi_nn_c906_bc_init_reg(); + has_reg = 1; + } + void *ret = csi_bc_list_match(&csi_nn_c906_init_bc_op_list, dtype, op); + if (ret == NULL) { + csi_debug_info("no c906 init\n"); } + return ret; +} + +void __attribute__((weak)) csi_nn_c906_bc_reg() +{ + /* float16 */ + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_ABS, csi_c906_abs_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_ACOS, csi_ref_acos_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_ACOSH, csi_ref_acosh_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_ADD, csi_c906_add_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_AND, csi_ref_and_i8); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_ARANGE, csi_ref_arange_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_ARGMAX, csi_ref_argmax_stride_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_ARGMIN, csi_ref_argmin_stride_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_ASIN, csi_ref_asin_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_ASINH, csi_ref_asinh_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_ATAN, csi_ref_atan_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_ATANH, csi_ref_atanh_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_AVGPOOL2D, csi_ref_avgpool2d_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_AVGPOOL3D, csi_ref_avgpool3d_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_BN, csi_ref_batch_normalization_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_BATCH_TO_SPACE, + csi_ref_batch_to_space_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_BROADCOST, csi_ref_broadcast_to_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_CACHE_MATMUL, csi_c906_cache_matmul_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_CACHE_CONV1D, csi_c906_cache_conv1d_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_CEIL, csi_ref_ceil_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_CLIP, csi_c906_clip_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_CONCAT, csi_c906_concat_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_CONV1D, csi_ref_conv1d_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_CONV2D, csi_ref_conv2d_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_CONV2D_RELU, csi_ref_conv2d_relu_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_CONV2D_RELU6, csi_ref_conv2d_relu6_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_DEPTHWISE_CONV2D, + csi_ref_depthwise_conv2d_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_DEPTHWISE_CONV2D_RELU, + csi_ref_depthwise_conv2d_relu_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_DEPTHWISE_CONV2D_RELU6, + csi_ref_depthwise_conv2d_relu6_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_GROUP_CONV2D, csi_ref_group_conv2d_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_CONV3D, csi_ref_conv3d_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_DECONV2D, csi_ref_deconv2d_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_DEPTHWISE_DECONV2D, + csi_ref_depthwise_deconv2d_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_DECONV3D, csi_ref_deconv3d_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_COS, csi_ref_cos_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_COSH, csi_ref_cosh_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_CUMPROD, csi_ref_cumprod_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_CUMSUM, csi_ref_cumsum_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_DEPTH_TO_SPACE, + csi_ref_depth_to_space_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_DIV, csi_ref_div_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_ELU, csi_ref_elu_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_EQUANL, csi_ref_equal_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_ERF, csi_ref_erf_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_EXP, csi_ref_exp_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_EXPAND_DIMS, csi_ref_expand_dims_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_EXPM1, csi_ref_expm1_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_FLATTEN, csi_ref_flatten); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_FLOOR_DIVIDE, csi_ref_floor_divide_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_FLOOR_MOD, csi_ref_floor_mod_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_FLOOR, csi_ref_floor_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_FSMN, csi_ref_fsmn_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_FULLYCONNECTED, + csi_c906_fullyconnected_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_GATHER_ND, csi_ref_gather_nd_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_GATHER, csi_c906_gather_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_GLOBAL_AVGPOOL2D, + csi_ref_global_avgpool2d_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_GLOBAL_MAXPOOL2D, + csi_ref_global_maxpool2d_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_GREATHER_EQUAL, + csi_ref_greater_equal_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_GREATHER, csi_ref_greater_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_HARD_SIGMOID, csi_ref_hard_sigmoid_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_IM2COL, csi_ref_im2col_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_L2N, csi_ref_l2_normalization_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_LAYER_NORM, csi_c906_layer_norm_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_LEAKY_RELU, csi_c906_leaky_relu_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_LESS_EQUAL, csi_ref_less_equal_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_LESS, csi_ref_less_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_LOG_SOFTMAX, csi_ref_log_softmax_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_LOG, csi_ref_log_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_LOG1P, csi_ref_log1p_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_LOGICAL_AND, csi_ref_logical_and_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_LOGICAL_NOT, csi_ref_logical_not_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_LOGICAL_OR, csi_ref_logical_or_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_LOGICAL_XOR, csi_ref_logical_xor_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_LRN, csi_c906_lrn_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_MATMUL, csi_c906_matmul_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_MAX, csi_ref_max_stride_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_MAXIMUM, csi_ref_maximum_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_MAXPOOL2D, csi_ref_maxpool2d_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_MAXPOOL2D_LOCAT, + csi_ref_maxpool2d_locat_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_MAXPOOL3D, csi_ref_maxpool3d_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_MEAN, csi_ref_mean_stride_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_MEAN_STRIDE, csi_ref_mean_stride_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_MIN, csi_ref_min_stride_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_MINIMUM, csi_c906_minimum_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_MOD, csi_ref_mod_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_MUL, csi_c906_mul_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_NDARRAY_SIZE, csi_ref_ndarray_size_i8); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_NEGATIIVE, csi_ref_negative_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_NOT_EQUAL, csi_ref_not_equal_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_NOT, csi_ref_not_i8); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_OR, csi_ref_or_i8); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_PAD, csi_ref_pad_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_POWER, csi_ref_power_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_PRELU, csi_c906_prelu_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_PROD, csi_ref_prod_stride_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_PROPOSAL, csi_ref_proposal_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_PSROIPOOLING, csi_ref_psroipooling_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_REDUCE_LOGSUMEXP, + csi_ref_reduce_logsumexp_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_REDUCE_MAX, csi_ref_reduce_max_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_REDUCE_MEAN, csi_ref_reduce_mean_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_REDUCE_MIN, csi_ref_reduce_min_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_REDUCE_PROD, csi_ref_reduce_prod_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_REDUCE_SUM, csi_ref_reduce_sum_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_RELU, csi_c906_relu_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_RELU1, csi_c906_relu1_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_RELU6, csi_c906_relu6_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_RELUN, csi_ref_relun_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_RESHAPE, csi_c906_reshape_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_RESIZE, csi_ref_resize_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_REVERSE, csi_ref_reverse_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_ROIPOOL, csi_ref_roipool_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_ROUND, csi_ref_round_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_RSQRT, csi_ref_rsqrt_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SCATTER_ND, csi_ref_scatter_nd_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SEGMENT_MAX, csi_ref_segment_max_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_UNSORTED_SEGMENT_MAX, + csi_ref_unsorted_segment_max_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SEGMENT_MEAN, csi_ref_segment_mean_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_UNSORTED_SEGMENT_MEAN, + csi_ref_unsorted_segment_mean_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SEGMENT_MIN, csi_ref_segment_min_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_UNSORTED_SEGMENT_MIN, + csi_ref_unsorted_segment_min_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SEGMENT_PROD, csi_ref_segment_prod_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_UNSORTED_SEGMENT_PROD, + csi_ref_unsorted_segment_prod_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SEGMENT_SUM, csi_ref_segment_sum_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_UNSORTED_SEGMENT_SUM, + csi_ref_unsorted_segment_sum_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SELECT, csi_ref_select_i8); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SHAPE, csi_ref_shape_i8); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SHUFFLE_CHANNEL, + csi_ref_shuffle_channel_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SIGMOID, csi_nn_rvv_sigmoid_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SIGN, csi_ref_sign_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SIN, csi_ref_sin_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SINH, csi_ref_sinh_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SLICE, csi_ref_slice_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SOFTMAX, csi_nn_rvv_softmax_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SOFTPLUS, csi_ref_softplus_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SOFTRELU, csi_ref_softrelu_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SOFTSIGN, csi_ref_softsign_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SPACE_TO_BATCH, + csi_ref_space_to_batch_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SPACE_TO_DEPTH, + csi_ref_space_to_depth_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SPLIT, csi_c906_split_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SQRT, csi_ref_sqrt_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SQUEEZE, csi_ref_squeeze); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_STACK, csi_ref_stack_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_STRIDED_SLICE, + csi_ref_strided_slice_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SUB, csi_c906_sub_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_SUM, csi_c906_sum_stride_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_TAN, csi_ref_tan_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_TANH, csi_ref_tanh_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_THRESHOLD_RELU, + csi_ref_threshold_relu_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_TILE, csi_ref_tile_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_TOPK, csi_ref_topk_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_TRUNC, csi_ref_trunc_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_TRANSPOSE, csi_c906_transpose_fp16); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_UNPOOLING, csi_ref_unpooling_quant); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_UNSTACK, csi_ref_unstack_qunat); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_XOR, csi_ref_xor_i8); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT16, CSINN_OP_YUV_RGB_SCALE, + csi_ref_yuv_rgb_scale_quant); + + /* float32 */ + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_ABS, csi_c906_abs_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_ACOS, csi_ref_acos_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_ACOSH, csi_ref_acosh_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_ADD, csi_c906_add_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_ARANGE, csi_ref_arange_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_ARGMAX, csi_ref_argmax_stride_i32_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_ARGMIN, csi_ref_argmin_stride_i32_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_ASIN, csi_ref_asin_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_ASINH, csi_ref_asinh_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_ATAN, csi_ref_atan_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_ATANH, csi_ref_atanh_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_AVGPOOL2D, csi_ref_avgpool2d_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_AVGPOOL3D, csi_ref_avgpool3d_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_BN, csi_ref_batch_normalization_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_BATCH_TO_SPACE, + csi_ref_batch_to_space_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_BROADCOST, csi_ref_broadcast_to_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_CACHE_MATMUL, csi_ref_cache_matmul_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_CACHE_CONV1D, csi_ref_cache_conv1d_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_CEIL, csi_ref_ceil_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_CLIP, csi_c906_clip_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_CONCAT, csi_c906_concat_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_CONV1D, csi_ref_conv1d_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_CONV2D, csi_ref_conv2d_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_CONV2D_RELU, csi_ref_conv2d_relu_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_DEPTHWISE_CONV2D, + csi_ref_depthwise_conv2d_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_GROUP_CONV2D, csi_ref_group_conv2d_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_CONV3D, csi_ref_conv3d_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_DECONV2D, csi_ref_deconv2d_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_DEPTHWISE_DECONV2D, + csi_ref_depthwise_deconv2d_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_DECONV3D, csi_ref_deconv3d_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_COS, csi_ref_cos_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_COSH, csi_ref_cosh_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_CUMPROD, csi_ref_cumprod_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_CUMSUM, csi_ref_cumsum_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_DEPTH_TO_SPACE, + csi_ref_depth_to_space_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_DIV, csi_ref_div_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_ELU, csi_ref_elu_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_EQUANL, csi_ref_equal_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_ERF, csi_ref_erf_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_EXP, csi_ref_exp_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_EXPAND_DIMS, csi_ref_expand_dims_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_EXPM1, csi_ref_expm1_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_FLATTEN, csi_ref_flatten); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_FLOOR_DIVIDE, csi_ref_floor_divide_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_FLOOR_MOD, csi_ref_floor_mod_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_FLOOR, csi_ref_floor_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_FSMN, csi_ref_fsmn_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_FULLYCONNECTED, + csi_c906_fullyconnected_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_GATHER_ND, csi_ref_gather_nd_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_GATHER, csi_ref_gather_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_GLOBAL_AVGPOOL2D, + csi_c906_global_avgpool2d_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_GLOBAL_MAXPOOL2D, + csi_c906_global_maxpool2d_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_GREATHER_EQUAL, + csi_ref_greater_equal_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_GREATHER, csi_ref_greater_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_HARD_SIGMOID, csi_ref_hard_sigmoid_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_IM2COL, csi_ref_im2col_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_L2N, csi_ref_l2_normalization_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_L2POOL2D, csi_ref_l2pool_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_LAYER_NORM, csi_ref_layer_norm_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_LEAKY_RELU, csi_c906_leaky_relu_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_LESS_EQUAL, csi_ref_less_equal_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_LESS, csi_ref_less_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_LOG_SOFTMAX, csi_ref_log_softmax_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_LOG, csi_ref_log_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_LOG1P, csi_ref_log1p_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_LOGICAL_AND, csi_ref_logical_and_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_LOGICAL_NOT, csi_ref_logical_not_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_LOGICAL_OR, csi_ref_logical_or_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_LOGICAL_XOR, csi_ref_logical_xor_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_LRN, csi_ref_lrn_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_MATMUL, csi_ref_matmul_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_MAX, csi_ref_max_stride_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_MAXIMUM, csi_ref_maximum_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_MAXPOOL2D, csi_ref_maxpool2d_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_MAXPOOL2D_LOCAT, + csi_ref_maxpool2d_locat_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_MAXPOOL3D, csi_ref_maxpool3d_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_MEAN, csi_ref_mean_stride_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_MEAN_STRIDE, csi_ref_mean_stride_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_MINIMUM, csi_c906_minimum_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_MOD, csi_ref_mod_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_MUL, csi_c906_mul_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_NDARRAY_SIZE, csi_ref_ndarray_size_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_NEGATIIVE, csi_ref_negative_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_NOT_EQUAL, csi_ref_not_equal_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_PAD, csi_ref_pad_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_POWER, csi_ref_power_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_PRELU, csi_c906_prelu_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_PROD, csi_ref_prod_stride_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_PROPOSAL, csi_ref_proposal_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_PSROIPOOLING, csi_ref_psroipooling_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_REDUCE_LOGSUMEXP, + csi_ref_reduce_logsumexp_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_REDUCE_MAX, csi_ref_reduce_max_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_REDUCE_MEAN, csi_ref_reduce_mean_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_REDUCE_MIN, csi_ref_reduce_min_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_REDUCE_PROD, csi_ref_reduce_prod_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_REDUCE_SUM, csi_ref_reduce_sum_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_RELU, csi_c906_relu_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_RELU1, csi_c906_relu1_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_RELU6, csi_c906_relu6_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_RELUN, csi_ref_relun_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_RESHAPE, csi_ref_reshape); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_RESIZE, csi_ref_resize_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_REVERSE, csi_ref_reverse_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_ROIALIGN, csi_ref_roi_align_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_ROIPOOL, csi_ref_roipool_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_ROUND, csi_ref_round_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_RSQRT, csi_ref_rsqrt_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SCATTER_ND, csi_ref_scatter_nd_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SEGMENT_MAX, csi_ref_segment_max_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_UNSORTED_SEGMENT_MAX, + csi_ref_unsorted_segment_max_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SEGMENT_MEAN, csi_ref_segment_mean_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_UNSORTED_SEGMENT_MEAN, + csi_ref_unsorted_segment_mean_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SEGMENT_MIN, csi_ref_segment_min_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_UNSORTED_SEGMENT_MIN, + csi_ref_unsorted_segment_min_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SEGMENT_PROD, csi_ref_segment_prod_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_UNSORTED_SEGMENT_PROD, + csi_ref_unsorted_segment_prod_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SEGMENT_SUM, csi_ref_segment_sum_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_UNSORTED_SEGMENT_SUM, + csi_ref_unsorted_segment_sum_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SELECT, csi_ref_select_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SHUFFLE_CHANNEL, + csi_ref_shuffle_channel_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SIGMOID, csi_ref_sigmoid_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SIGN, csi_ref_sign_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SIN, csi_ref_sin_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SINH, csi_ref_sinh_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SLICE, csi_ref_slice_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SOFTMAX, csi_ref_softmax_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SOFTPLUS, csi_ref_softplus_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SOFTRELU, csi_ref_softrelu_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SOFTSIGN, csi_ref_softsign_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SPACE_TO_BATCH, + csi_ref_space_to_batch_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SPACE_TO_DEPTH, + csi_ref_space_to_depth_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SPLIT, csi_c906_split_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SQRT, csi_ref_sqrt_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SQUEEZE, csi_ref_square_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_STACK, csi_ref_stack_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_STRIDED_SLICE, csi_ref_strided_slice_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SUB, csi_c906_sub_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_SUM, csi_ref_sum_stride_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_TAN, csi_ref_tan_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_TANH, csi_ref_tanh_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_THRESHOLD_RELU, + csi_ref_threshold_relu_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_TILE, csi_ref_tile_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_TOPK, csi_ref_topk_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_TRUNC, csi_ref_trunc_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_TRANSPOSE, csi_ref_transpose); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_UNPOOLING, csi_ref_unpooling_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_UNSTACK, csi_ref_unstack_f32); + csi_nn_c906_register_op(CSINN_DTYPE_FLOAT32, CSINN_OP_YUV_RGB_SCALE, csi_ref_yuv_rgb_scale_f32); + + /* int8 */ + csi_nn_c906_register_op(CSINN_DTYPE_INT8, CSINN_OP_CONCAT, csi_nn_rvv_concat_int8); + csi_nn_c906_register_op(CSINN_DTYPE_INT8, CSINN_OP_MUL, csi_nn_rvv_mul_int8); + csi_nn_c906_register_op(CSINN_DTYPE_INT8, CSINN_OP_RELU, csi_nn_rvv_relu_int8); + csi_nn_c906_register_op(CSINN_DTYPE_INT8, CSINN_OP_RESHAPE, csi_ref_reshape); + csi_nn_c906_register_op(CSINN_DTYPE_INT8, CSINN_OP_SUM, csi_nn_rvv_sum_stride_int8); + csi_nn_c906_register_op(CSINN_DTYPE_INT8, CSINN_OP_SOFTMAX, csi_ref_softmax_quant); } void *csi_bc_map_c906(int op, int dtype) { - static int has_init; - static void **bc_map_table; - if (has_init == 0) { - bc_map_table = setup_bc_map(); - has_init = 1; + static int has_reg; + if (has_reg == 0) { + csi_nn_c906_bc_reg(); + has_reg = 1; + } + void *ret = csi_bc_list_match(&csi_nn_c906_func_bc_op_list, dtype, op); + if (ret == NULL) { + csi_debug_info("cannot find c906 func\n"); } - return bc_map_table[get_bc_map_index(op, dtype)]; + return ret; } diff --git a/source/c906_opt/sgemm.c b/source/c906_opt/sgemm.c index ed8e993a..492ab65f 100644 --- a/source/c906_opt/sgemm.c +++ b/source/c906_opt/sgemm.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" @@ -675,6 +675,88 @@ void csi_c906_reorder_input(float *b, float *sb, int k, int n, int ldx) } +void csi_c906_reorder_input_1(float *b, float *sb, int k, int n, int ldx) +{ + asm volatile( + "vsetvli zero, zero, e32, m1\n\t" // set vl = 8 + + "slli t2, %4, 2\n\t" // t2 = ldx * 4 (line stride) + + "srai t0, %3, 2\n\t" // t0 = n4 + "beqz t0, 3f\n\t" // jump to packn_tail + + "1:\n\t" // n4 + "mv a0, %0\n\t" + "addi %0, %0, 16\n\t" + "mv t1, %2\n\t" // k + + "2:\n\t" + // start packn8k1 + "vle.v v2, (a0)\n\t" + "add a0, a0, t2\n\t" + "vse.v v2, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "addi t1, t1, -1\n\t" + "bnez t1, 2b\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "3:\n\t" // n_tail + "andi t0, %3, 3\n\t" // n & 3u + "beqz t0, 8f\n\t" + + "srai t3, %2, 2\n\t" // k4 + "slli t5, %4, 4\n\t" // t5 = ldx * 4 * 4 (4 lines) + "andi t6, %2, 3\n\t" // k_tail + "slli t4, t6, 2\n\t" // k_tail * 4 + + "4:\n\t" + "mv a0, %0\n\t" + "addi %0, %0, 4\n\t" + "mv t1, t3\n\t" // t1 = k4 + "beqz t3, 6f\n\t" + + "5:\n\t" + "vsetvli zero, zero, e32, m1\n\t" + "vlse.v v2, (a0), t2\n\t" + "add a0, a0, t5\n\t" + "vse.v v2, (%1)\n\t" + "addi %1, %1, 16\n\t" + + "addi t1, t1, -1\n\t" + "bnez t1, 5b\n\t" + + "6:\n\t" + "vsetvli zero, t6, e32, m1\n\t" + "vlse.v v2, (a0), t2\n\t" + "vse.v v2, (%1)\n\t" + "add %1, %1, t4\n\t" + + "7:\n\t" + "addi t0, t0, -1\n\t" + "bnez t0, 4b\n\t" + + + "8:\n\t" // ending + + + :"=r"(b), // %0 + "=r"(sb), // %1 + "=r"(k), // %2 + "=r"(n), // %3 + "=r"(ldx) // %4 + :"0"(b), + "1"(sb), + "2"(k), + "3"(n), + "4"(ldx) + :"v0", "v2", "a0", + "t0", "t1", "t2", "t3", "t4", "t5", "t6" + ); +} + static inline void kernel_m1_f32(float* dst, float* sa, float* sb, int m, int k, int n, int ldc, float* bias, bool fuse_relu) { float *pa = sa; @@ -2869,6 +2951,171 @@ static inline void kernel_m4_f32(float* dst, float* sa, float* sb, int m, int k, #endif // __riscv_vector } + +static inline void kernel_m4_f32_1(float* dst, float* sa, float* sb, int m, int k, int n, int ldc, float* bias, bool fuse_relu) +{ + asm volatile( + "vsetvli zero, zero, e32, m1\n\t" // set vl = 4 + + "flw fs0, 0(%2)\n\t" + "flw fs1, 4(%2)\n\t" + "flw fs2, 8(%2)\n\t" + "flw fs3, 12(%2)\n\t" + + // init output addr + "slli t5, %6, 2\n\t" // t5_tmp = ldx * 4 + "mv a0, %3\n\t" + "add a1, a0, t5\n\t" + "add a2, a1, t5\n\t" + "add a3, a2, t5\n\t" + + "srai t0, %5, 2\n\t" // t0 = n >> 2 (n4) + "beqz t0, 4f\n\t" + + "1:\n\t" // m4n4 + // start kernel_m4n4 + "vfmv.v.f v24, fs0\n\t" + "vfmv.v.f v25, fs1\n\t" + "vfmv.v.f v26, fs2\n\t" + "vfmv.v.f v27, fs3\n\t" // init acc = bias + + "mv t6, %0\n\t" // t6 hold kernel 4 lines start addr + "mv t5, %4\n\t" // t5 = k (k > 0) + + "2:\n\t" + // start subkernel_m4n4k1 + "vle.v v1, (%1)\n\t" + "addi %1, %1, 16\n\t" + "flw fa0, 0(t6)\n\t" + "flw fa1, 4(t6)\n\t" + "flw fa2, 8(t6)\n\t" + "flw fa3, 12(t6)\n\t" + "addi t6, t6, 16\n\t" + + "vfmacc.vf v24, fa0, v1\n\t" + "vfmacc.vf v25, fa1, v1\n\t" + "vfmacc.vf v26, fa2, v1\n\t" + "vfmacc.vf v27, fa3, v1\n\t" + + "addi t5, t5, -1\n\t" + "bnez t5, 2b\n\t" + + "3:\n\t" // end kernel_m4n4 + + "vse.v v24, (a0)\n\t" + "addi a0, a0, 16\n\t" + "vse.v v25, (a1)\n\t" + "addi a1, a1, 16\n\t" + "vse.v v26, (a2)\n\t" + "addi a2, a2, 16\n\t" + "vse.v v27, (a3)\n\t" + "addi a3, a3, 16\n\t" + + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + + "4:\n\t" // m4n2 + "andi t0, %5, 3\n\t" // n & 3 + "srai t0, t0, 1\n\t" // (n & 3) >> 2 + "beqz t0, 7f\n\t" // jump to m4n1 + // start kernel_m4n2 + "vle.v v24, (%2)\n\t" + "vle.v v25, (%2)\n\t" // init acc = bias + + // init addr for pa, pb and pc + "slli t0, %4, 2\n\t" // t0_tmp = k * 4 + + "mv t6, %0\n\t" // t6 hold pa(kernel) 2 lines start addr + + "mv a4, %1\n\t" + "add a5, a4, t0\n\t" // a4-a5 hold pb(input) 2 cols addr + + "addi a1, a0, 4\n\t" // a0-a1 hold pc(output) addr + + "mv t5, %4\n\t" // t5 = k + + "5:\n\t" + // start subkernel_m4n2k1 + "vle.v v1, (t6)\n\t" + "addi t6, t6, 16\n\t" + "flw fa0, 0(a4)\n\t" + "vfmacc.vf v24, fa0, v1\n\t" + "flw fa1, 0(a5)\n\t" + "vfmacc.vf v25, fa1, v1\n\t" + + "addi a4, a4, 4\n\t" + "addi a5, a5, 4\n\t" + + "addi t5, t5, -1\n\t" + "bnez t5, 5b\n\t" + + "6:\n\t" // end kernel_m4n2 + "slli t0, %6, 2\n\t" // t0_tmp = ldx * 4 (store_stride) + + "vsse.v v24, (a0), t0\n\t" + "vsse.v v25, (a1), t0\n\t" + + "addi a0, a0, 8\n\t" // updata output start addr ( +2 cols) + "slli t0, %4, 3\n\t" // t_tmp = k * 2 * 4 + "add %1, %1, t0\n\t" // updata pb start addr + + + "7:\n\t" // m4n1 + "andi t0, %5, 1\n\t" // n & 1 + "beqz t0, 10f\n\t" // jump to ending + // start kernel_m8n1 + + "vle.v v24, (%2)\n\t" // init out_tmp = bias + + // init addr for pa, pb and pc + "mv t6, %0\n\t" // t6 hold pa(kernel) 8 lines start addr + "mv a4, %1\n\t" // a4 hold pb(input) 1 cols addr + // a0 hold pc(output) addr + + "mv t5, %4\n\t" // t5 = k + + "8:\n\t" + // start subkernel_m8n1k8 + "vle.v v1, (t6)\n\t" + "addi t6, t6, 16\n\t" + "flw fa0, 0(a4)\n\t" + "vfmacc.vf v24, fa0, v1\n\t" // 0 + + "addi a4, a4, 4\n\t" + + "addi t5, t5, -1\n\t" + "bnez t5, 8b\n\t" + + "9:\n\t" // end kernel_m8n1 + "slli t0, %6, 2\n\t" // t0_tmp = ldx * 4 (store_stride) + + "vsse.v v24, (a0), t0\n\t" + + "10:\n\t" // ending + + + :"=r"(sa), // %0 + "=r"(sb), // %1 + "=r"(bias), // %2 + "=r"(dst), // %3 + "=r"(k), // %4 + "=r"(n), // %5 + "=r"(ldc) // %6 + :"0"(sa), + "1"(sb), + "2"(bias), + "3"(dst), + "4"(k), + "5"(n), + "6"(ldc) + :"v1", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", + "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "t0", "t5", "t6", + "fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7", "fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7" + ); + +} + + void csi_c906_sgemm_kernel_f32(float* dst, const float* sa, const float* sb, int m, int k, int n, int ldc, float* bias, bool fuse_relu) { float* pa = (float *)sa; @@ -2885,7 +3132,7 @@ void csi_c906_sgemm_kernel_f32(float* dst, const float* sa, const float* sb, int const int mm = (m >> 2) << 2; for (int i = 0; i < mm; i += 4) { - kernel_m4_f32(pc + i * ldc, pa + i * k, pb, m, k, n, ldc, bias_tmp + i, fuse_relu); + kernel_m4_f32_1(pc + i * ldc, pa + i * k, pb, m, k, n, ldc, bias_tmp + i, fuse_relu); } pa += mm * k; diff --git a/include/include_xt800/dsp_include/utils.h b/source/c906_opt/sgemv.c similarity index 69% rename from include/include_xt800/dsp_include/utils.h rename to source/c906_opt/sgemv.c index 19a0a2c7..58ce5258 100644 --- a/include/include_xt800/dsp_include/utils.h +++ b/source/c906_opt/sgemv.c @@ -1,5 +1,5 @@ /* -* Copyright (C) 2016-2021 PingTouGe Semiconductor Co., Ltd Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,7 @@ * limitations under the License. */ -#ifndef _UTILS_H +/* CSI-NN2 version 1.12.x */ -#define weak_alias(name, aliasname) extern __typeof(name) aliasname __attribute__((weak, alias(#name))) +#include "csi_c906.h" -#endif /* _UTILS_H */ - -/** - * - * End of file. - */ diff --git a/source/c906_opt/split.c b/source/c906_opt/split.c index 447e384b..d8b72298 100644 --- a/source/c906_opt/split.c +++ b/source/c906_opt/split.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/c906_opt/sub.c b/source/c906_opt/sub.c index 136fa3c1..f091f48f 100644 --- a/source/c906_opt/sub.c +++ b/source/c906_opt/sub.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" @@ -62,6 +62,39 @@ int csi_c906_sub_f32(struct csi_tensor *input0, int in_size1 = csi_tensor_size(input1); int out_size = csi_tensor_size(output); + // HACK: special case + // example: [1, 64, 55, 55] + [1, 64, 1, 1] = [1, 64, 55, 55] + if ((input1->dim[2] == 1) && (input1->dim[3] == 1) && (input1->dim[1] == input0->dim[1])) { + int inner_size = input0->dim[2] * input0->dim[3]; + int outer_size = input0->dim[1]; + asm volatile( + "1:\n\t" + "flw ft0, 0(%2)\n\t" + "mv t1, %4\n\t" + "2:\n\t" + "vsetvli t0, t1, e32, m2\n\t" + "vle.v v8, (%1)\n\t" + "sub t1, t1, t0\n\t" + "slli t0, t0, 2\n\t" // element: 4 bytes + "add %1, %1, t0\n\t" + "vfsub.vf v16, v8, ft0\n\t" + "vse.v v16, (%0)\n\t" + "add %0, %0, t0\n\t" + "bnez t1, 2b\n\t" + "addi %3, %3, -1\n\t" + "addi %2, %2, 4\n\t" + "bnez %3, 1b\n\t" + + : "=r"(output_data), // %0 + "=r"(input0_data), // %1 + "=r"(input1_data), // %2 + "=r"(outer_size), // %3 + "=r"(inner_size) // %4 + : "0"(output_data), "1"(input0_data), "2"(input1_data), "3"(outer_size), "4"(inner_size) + : "v8", "v9", "v16", "v17", "t0", "t1", "ft0"); + return CSINN_TRUE; + } + // example: [1, 3, 224, 224] + [1] = [1, 3, 224, 224] if (in_size1 == 1) { asm volatile( @@ -183,6 +216,37 @@ int csi_c906_sub_fp16(struct csi_tensor *input0, int in_size1 = csi_tensor_size(input1); int out_size = csi_tensor_size(output); + if ((input1->dim[2] == 1) && (input1->dim[3] == 1) && (input1->dim[1] == input0->dim[1])) { + int inner_size = input0->dim[2] * input0->dim[3]; + int outer_size = input0->dim[1]; + asm volatile( + "1:\n\t" + "flh ft0, 0(%2)\n\t" + "mv t1, %4\n\t" + "2:\n\t" + "vsetvli t0, t1, e16, m2\n\t" + "vle.v v8, (%1)\n\t" + "sub t1, t1, t0\n\t" + "slli t0, t0, 1\n\t" + "add %1, %1, t0\n\t" + "vfsub.vf v16, v8, ft0\n\t" + "vse.v v16, (%0)\n\t" + "add %0, %0, t0\n\t" + "bnez t1, 2b\n\t" + "addi %3, %3, -1\n\t" + "addi %2, %2, 2\n\t" + "bnez %3, 1b\n\t" + + : "=r"(output_data), // %0 + "=r"(input0_data), // %1 + "=r"(input1_data), // %2 + "=r"(outer_size), // %3 + "=r"(inner_size) // %4 + : "0"(output_data), "1"(input0_data), "2"(input1_data), "3"(outer_size), "4"(inner_size) + : "v8", "v9", "v16", "v17", "t0", "t1", "ft0"); + return CSINN_TRUE; + } + if (in_size1 == 1) { asm volatile( "flh ft0, 0(%2)\n\t" diff --git a/source/c906_opt/sum.c b/source/c906_opt/sum.c new file mode 100644 index 00000000..9514bd96 --- /dev/null +++ b/source/c906_opt/sum.c @@ -0,0 +1,151 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_c906.h" + +// reduce_sum +int csi_c906_sum_stride_fp16(struct csi_tensor *input, + struct csi_tensor *output, + struct reduce_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + //axis=none + if(*(params->axis) == -1) { + int size = 1; + for(int i = 0; i < input->dim_count; i++) { + size = size * input->dim[i]; + } + + asm volatile( + "vsetvli zero, zero, e16, m2\n\t" + "fmv.h.x ft0, zero\n\t" // clear + "vfmv.s.f v0, ft0\n\t" // v6[0] = bias + "srai t0, %2, 4\n\t" // t0 = size_16 + "andi t1, %2, 15\n\t" // size tail + "vmv.v.x v2, zero\n\t" // clear + "beqz t0, 2f\n\t" + "1:\n\t" + "vle.v v4, (%0)\n\t" + "addi %0, %0, 32\n\t" + "vfadd.vv v2, v2, v4\n\t" + "addi t0, t0, -1\n\t" + "bnez t0, 1b\n\t" + "2:\n\t" + "vsetvli zero, t1, e16, m2\n\t" + "vle.v v4, (%0)\n\t" + "vfadd.vv v2, v2, v4\n\t" + + "3:\n\t" + "vfredsum.vs v0, v2, v0\n\t" // v0[0] = v0[0] + sum(v2[0..7]) + "vfmv.f.s ft0, v0\n\t" + "fsh ft0, 0(%1)\n\t" + + :"=r"(input_data), // %0 + "=r"(output_data), // %1 + "=r"(size) // %2 + :"0"(input_data), + "1"(output_data), + "2"(size) + :"cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "t0", "t1", "ft0" + ); + + } else { + int axis = *(params->axis); + int64_t outer_size = 1; + for(int i = 0; i < axis; i++) { + outer_size *= input->dim[i]; + } + int64_t inner_size = 1; + for(int i = axis + 1; i < input->dim_count; i++) { + inner_size *= input->dim[i]; + } + int cnt = input->dim[axis]; + + asm volatile( + "vsetvli zero, zero, e16, m2\n\t" + "mulw t0, %3, %4\n\t" + "slli t0, t0, 1\n\t" // inner_size * cnt * 2 + "slli t1, %3, 1\n\t" // inner_size * 2 + + "0:\n\t" // outer_size loop + "srai t2, %3, 4\n\t" // inner_size 16 + "andi t3, %3, 15\n\t" // inner_size tail + "mv a0, %0\n\t" + "beqz t2, 3f\n\t" + + "1:\n\t" // inner_size_16 loop + "mv a1, a0\n\t" + "vmv.v.x v2, zero\n\t" + "mv t4, %4\n\t" // t4 = cnt + + "2:\n\t" // cnt loop + "vle.v v0, (a1)\n\t" + "add a1, a1, t1\n\t" + "vfadd.vv v2, v2, v0\n\t" + + "addi t4, t4, -1\n\t" + "bnez t4, 2b\n\t" + + "vse.v v2, (%1)\n\t" + "addi %1, %1, 32\n\t" + "addi a0, a0, 32\n\t" + + "addi t2, t2, -1\n\t" + "bnez t2, 1b\n\t" + + "3:\n\t" // inner_size tail + "vsetvli zero, t3, e16, m2\n\t" + "vmv.v.x v2, zero\n\t" + "mv t4, %4\n\t" // t4 = cnt + + "4:\n\t" // cnt loop + "vle.v v0, (a0)\n\t" + "add a0, a0, t1\n\t" + "vfadd.vv v2, v2, v0\n\t" + + "addi t4, t4, -1\n\t" + "bnez t4, 4b\n\t" + + "vse.v v2, (%1)\n\t" + "add %1, %1, t3\n\t" + "add %1, %1, t3\n\t" + + "add %0, %0, t0\n\t" + "addi %2, %2, -1\n\t" + "bnez %2, 0b\n\t" + + :"=r"(input_data), // %0 + "=r"(output_data), // %1 + "=r"(outer_size), // %2 + "=r"(inner_size), // %3 + "=r"(cnt) // %4 + :"0"(input_data), + "1"(output_data), + "2"(outer_size), + "3"(inner_size), + "4"(cnt) + :"cc", "memory", "v0", "v1", "v2", "v3", "a0", "a1", + "t0", "t1", "t2", "t3", "t4", "t5" + ); + + } + return CSINN_TRUE; +} diff --git a/source/c906_opt/transpose.c b/source/c906_opt/transpose.c new file mode 100644 index 00000000..93d39de9 --- /dev/null +++ b/source/c906_opt/transpose.c @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_c906.h" + +int csi_c906_transpose_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct transpose_params *params) +{ + if (params->permute_num == 4 && params->permute[0] == 0 && params->permute[1] == 2 && + params->permute[2] == 1 && params->permute[3] == 3) { + __fp16 *p_input = input->data; + __fp16 *p_output = output->data; + int *shape = output->dim; + int batch = shape[2]; + int shape3 = shape[3]; + int flatten_shape = shape[1] * shape[3]; + + if (flatten_shape % 16 == 0) { + for (int i = 0; i < batch; i++) { + for (int j = 0; j < flatten_shape; j += 16) { + int out_pos = i * shape3 + j % shape3 + batch * shape3 * (j / shape3); + vfloat16m2_t _output_from_buffer; + _output_from_buffer = vle16_v_f16m2(p_input + i * flatten_shape + j, 16); + vse16_v_f16m2(p_output + out_pos, _output_from_buffer, 16); + } + } + + } else { + for (int i = 0; i < batch; i++) { + for (int j = 0; j < flatten_shape; j++) { + int out_pos = i * shape3 + j % shape3 + batch * shape3 * (j / shape3); + p_output[out_pos] = p_input[i * flatten_shape + j]; + } + } + } + return CSINN_TRUE; + } + if (params->permute_num == 3 && params->permute[0] == 0 && params->permute[1] == 2 && + params->permute[2] == 1) { + int *shape = output->dim; + __fp16 *p_input = input->data; + __fp16 *p_output = output->data; + for (int i = 0; i < shape[2]; i++) // 256 + { + int j = 0; + for (; j + 15 < shape[1]; j += 16) // 6 + { + int out_pos = j * shape[2] + i; + vfloat16m2_t _output_from_buffer; + _output_from_buffer = vle16_v_f16m2(p_input + i * shape[1] + j, 16); + vsse16_v_f16m2(p_output + out_pos, 2 * shape[2], _output_from_buffer, 16); + } + if (j != shape[1]) { + int vl = shape[1] - j; + int out_pos = j * shape[2] + i; + vfloat16m2_t _output_from_buffer; + _output_from_buffer = vle16_v_f16m2(p_input + i * shape[1] + j, vl); + vsse16_v_f16m2(p_output + out_pos, 2 * shape[2], _output_from_buffer, vl); + } + } + return CSINN_TRUE; + } + return csi_ref_siso_callback_base(input, output, params, csi_ref_transpose); +} diff --git a/source/c906_opt/utils.c b/source/c906_opt/utils.c index dfd00c38..6a352b79 100644 --- a/source/c906_opt/utils.c +++ b/source/c906_opt/utils.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.11.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_c906.h" @@ -350,3 +350,45 @@ void csi_c906_crop_output_fp16(__fp16 *output_trans, __fp16 *output, int out_c, } } } + + +/* + fcsr: float control status register + bit region: + 9-10: Vxrm - 矢量舍入模式 + 8: Vxsat - 矢量溢出标志位 + 5-7: RM - 舍入模式: + • 当RM=0 时, RNE 舍入模式, 向最近偶数舍入; + • 当RM=1 时, RTZ 舍入模式, 向 0 舍入; + • 当RM=2 时, RDN 舍入模式, 向负无穷舍入; + • 当RM=3 时, RUP 舍入模式, 向正无穷舍入; + • 当RM=4 时, RMM 舍入模式, 向最近舍入 + 4: NV - 无效操作数异常 + 3: DZ - 除0异常 + 2: OF - 上溢异常 + 1: UF - 下溢异常 + 0: NX - 非精确异常 +*/ + +void csi_c906_reset_fcsr() +{ + asm volatile( + "csrrw x0, fcsr, zero\n\t" + : + : + :"memory" + ); +} + +int csi_c906_get_fcsr() +{ + int f_flag = 0; + asm volatile( + "csrrs %0, fcsr, zero\n\t" + + :"=r"(f_flag) + : + :"memory" + ); + return f_flag; +} diff --git a/source/e804_opt/activation/csi_xt800p_nn_activations_q15.S b/source/e804_opt/activation/csi_xt800p_nn_activations_q15.S index b02ca697..38c5caa7 100644 --- a/source/e804_opt/activation/csi_xt800p_nn_activations_q15.S +++ b/source/e804_opt/activation/csi_xt800p_nn_activations_q15.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/e804_opt/activation/csi_xt800p_nn_activations_q7.S b/source/e804_opt/activation/csi_xt800p_nn_activations_q7.S index 8366ee37..1522096e 100644 --- a/source/e804_opt/activation/csi_xt800p_nn_activations_q7.S +++ b/source/e804_opt/activation/csi_xt800p_nn_activations_q7.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/e804_opt/activation/csi_xt800p_relu_q15.S b/source/e804_opt/activation/csi_xt800p_relu_q15.S index f405cdb8..cd1b07d0 100644 --- a/source/e804_opt/activation/csi_xt800p_relu_q15.S +++ b/source/e804_opt/activation/csi_xt800p_relu_q15.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/e804_opt/activation/csi_xt800p_relu_q7.S b/source/e804_opt/activation/csi_xt800p_relu_q7.S index 1598b04a..c597b7f1 100644 --- a/source/e804_opt/activation/csi_xt800p_relu_q7.S +++ b/source/e804_opt/activation/csi_xt800p_relu_q7.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/e804_opt/avgpool.c b/source/e804_opt/avgpool.c index be5bff6d..8b6b7793 100644 --- a/source/e804_opt/avgpool.c +++ b/source/e804_opt/avgpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_e804.h" diff --git a/source/e804_opt/convolution.c b/source/e804_opt/convolution.c index 25916022..bcb28b0d 100644 --- a/source/e804_opt/convolution.c +++ b/source/e804_opt/convolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_e804.h" diff --git a/source/e804_opt/convolution/csi_xt800p_convolve_1x1_HWC_q7_fast.S b/source/e804_opt/convolution/csi_xt800p_convolve_1x1_HWC_q7_fast.S index ab07c70f..6b28899e 100644 --- a/source/e804_opt/convolution/csi_xt800p_convolve_1x1_HWC_q7_fast.S +++ b/source/e804_opt/convolution/csi_xt800p_convolve_1x1_HWC_q7_fast.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/e804_opt/convolution/csi_xt800p_convolve_HWC_q15_basic.S b/source/e804_opt/convolution/csi_xt800p_convolve_HWC_q15_basic.S index dbfe68b6..a2477870 100644 --- a/source/e804_opt/convolution/csi_xt800p_convolve_HWC_q15_basic.S +++ b/source/e804_opt/convolution/csi_xt800p_convolve_HWC_q15_basic.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/e804_opt/convolution/csi_xt800p_convolve_HWC_q7_RGB.S b/source/e804_opt/convolution/csi_xt800p_convolve_HWC_q7_RGB.S index d8d4e2b9..f0df9751 100644 --- a/source/e804_opt/convolution/csi_xt800p_convolve_HWC_q7_RGB.S +++ b/source/e804_opt/convolution/csi_xt800p_convolve_HWC_q7_RGB.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/e804_opt/convolution/csi_xt800p_convolve_HWC_q7_basic.S b/source/e804_opt/convolution/csi_xt800p_convolve_HWC_q7_basic.S index 66472ca1..a26d2e56 100644 --- a/source/e804_opt/convolution/csi_xt800p_convolve_HWC_q7_basic.S +++ b/source/e804_opt/convolution/csi_xt800p_convolve_HWC_q7_basic.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/e804_opt/convolution/csi_xt800p_depthwise_separable_conv_HWC_q7.S b/source/e804_opt/convolution/csi_xt800p_depthwise_separable_conv_HWC_q7.S index 5a81fc2f..ca56ba09 100644 --- a/source/e804_opt/convolution/csi_xt800p_depthwise_separable_conv_HWC_q7.S +++ b/source/e804_opt/convolution/csi_xt800p_depthwise_separable_conv_HWC_q7.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/e804_opt/fully-connect/csi_xt800p_fully_connected_mat_q7_vec_q15.S b/source/e804_opt/fully-connect/csi_xt800p_fully_connected_mat_q7_vec_q15.S index 158877ef..0498f87c 100644 --- a/source/e804_opt/fully-connect/csi_xt800p_fully_connected_mat_q7_vec_q15.S +++ b/source/e804_opt/fully-connect/csi_xt800p_fully_connected_mat_q7_vec_q15.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/e804_opt/fully-connect/csi_xt800p_fully_connected_q15.S b/source/e804_opt/fully-connect/csi_xt800p_fully_connected_q15.S index 1d8c86bc..5919bbff 100644 --- a/source/e804_opt/fully-connect/csi_xt800p_fully_connected_q15.S +++ b/source/e804_opt/fully-connect/csi_xt800p_fully_connected_q15.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/e804_opt/fully-connect/csi_xt800p_fully_connected_q7.S b/source/e804_opt/fully-connect/csi_xt800p_fully_connected_q7.S index 4da617e0..8c755e31 100644 --- a/source/e804_opt/fully-connect/csi_xt800p_fully_connected_q7.S +++ b/source/e804_opt/fully-connect/csi_xt800p_fully_connected_q7.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/e804_opt/fullyconnected.c b/source/e804_opt/fullyconnected.c index 73bbcd83..46d0228f 100644 --- a/source/e804_opt/fullyconnected.c +++ b/source/e804_opt/fullyconnected.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_e804.h" diff --git a/source/e804_opt/maxpool.c b/source/e804_opt/maxpool.c index 4875ee87..8aaba6f1 100644 --- a/source/e804_opt/maxpool.c +++ b/source/e804_opt/maxpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_e804.h" diff --git a/source/e804_opt/nn-support/csi_xt800p_nntables.c b/source/e804_opt/nn-support/csi_xt800p_nntables.c index d05eb63f..b25db41f 100644 --- a/source/e804_opt/nn-support/csi_xt800p_nntables.c +++ b/source/e804_opt/nn-support/csi_xt800p_nntables.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/e804_opt/pooling/csi_xt800p_pool_q7_HWC.S b/source/e804_opt/pooling/csi_xt800p_pool_q7_HWC.S index fefe3d23..6de52ba4 100644 --- a/source/e804_opt/pooling/csi_xt800p_pool_q7_HWC.S +++ b/source/e804_opt/pooling/csi_xt800p_pool_q7_HWC.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/e804_opt/relu.c b/source/e804_opt/relu.c index 7862fa9b..5f5015a8 100644 --- a/source/e804_opt/relu.c +++ b/source/e804_opt/relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_e804.h" diff --git a/source/e804_opt/setup.c b/source/e804_opt/setup.c index f8ed4c48..b78e83e5 100644 --- a/source/e804_opt/setup.c +++ b/source/e804_opt/setup.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_e804.h" diff --git a/source/e804_opt/sigmoid.c b/source/e804_opt/sigmoid.c index 09b1a0da..f5eeb581 100644 --- a/source/e804_opt/sigmoid.c +++ b/source/e804_opt/sigmoid.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_e804.h" diff --git a/source/e804_opt/softmax.c b/source/e804_opt/softmax.c index 1bb19cf6..79a033f5 100644 --- a/source/e804_opt/softmax.c +++ b/source/e804_opt/softmax.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_e804.h" diff --git a/source/e804_opt/softmax/csi_xt800p_softmax_q15.S b/source/e804_opt/softmax/csi_xt800p_softmax_q15.S index c9426594..f57f9410 100644 --- a/source/e804_opt/softmax/csi_xt800p_softmax_q15.S +++ b/source/e804_opt/softmax/csi_xt800p_softmax_q15.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/e804_opt/softmax/csi_xt800p_softmax_q7.S b/source/e804_opt/softmax/csi_xt800p_softmax_q7.S index 0c57f377..04df43e6 100644 --- a/source/e804_opt/softmax/csi_xt800p_softmax_q7.S +++ b/source/e804_opt/softmax/csi_xt800p_softmax_q7.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/e804_opt/tanh.c b/source/e804_opt/tanh.c index d7d80ac1..a9343c3e 100644 --- a/source/e804_opt/tanh.c +++ b/source/e804_opt/tanh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_e804.h" diff --git a/source/graph_ref/abs.c b/source/graph_ref/abs.c index 539b3cf3..2d7d2476 100644 --- a/source/graph_ref/abs.c +++ b/source/graph_ref/abs.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/acos.c b/source/graph_ref/acos.c index 88ff0054..f8ff2efe 100644 --- a/source/graph_ref/acos.c +++ b/source/graph_ref/acos.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/acosh.c b/source/graph_ref/acosh.c index 4d19d511..9969d232 100644 --- a/source/graph_ref/acosh.c +++ b/source/graph_ref/acosh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/add.c b/source/graph_ref/add.c index 7e9b9ce8..a90f17fd 100644 --- a/source/graph_ref/add.c +++ b/source/graph_ref/add.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/all.c b/source/graph_ref/all.c index dfbd54b4..3ab40d26 100644 --- a/source/graph_ref/all.c +++ b/source/graph_ref/all.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/and.c b/source/graph_ref/and.c index 7d13337b..f883ae75 100644 --- a/source/graph_ref/and.c +++ b/source/graph_ref/and.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/any.c b/source/graph_ref/any.c index 63feb323..49d6b5db 100644 --- a/source/graph_ref/any.c +++ b/source/graph_ref/any.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/arange.c b/source/graph_ref/arange.c index fb78ca38..5d044223 100644 --- a/source/graph_ref/arange.c +++ b/source/graph_ref/arange.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/argmax.c b/source/graph_ref/argmax.c index aecc6492..81c72e2c 100644 --- a/source/graph_ref/argmax.c +++ b/source/graph_ref/argmax.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/argmin.c b/source/graph_ref/argmin.c index 7a9262b0..1e2abc0d 100644 --- a/source/graph_ref/argmin.c +++ b/source/graph_ref/argmin.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/asin.c b/source/graph_ref/asin.c index b389de9e..21d6f356 100644 --- a/source/graph_ref/asin.c +++ b/source/graph_ref/asin.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/asinh.c b/source/graph_ref/asinh.c index 9fcdfd6c..c9c817e2 100644 --- a/source/graph_ref/asinh.c +++ b/source/graph_ref/asinh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/atan.c b/source/graph_ref/atan.c index e55b7b3c..68a82797 100644 --- a/source/graph_ref/atan.c +++ b/source/graph_ref/atan.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/atanh.c b/source/graph_ref/atanh.c index c7100b9e..f8ce38e4 100644 --- a/source/graph_ref/atanh.c +++ b/source/graph_ref/atanh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/avgpool.c b/source/graph_ref/avgpool.c index 79462b46..6fa6f630 100644 --- a/source/graph_ref/avgpool.c +++ b/source/graph_ref/avgpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/avgpool3d.c b/source/graph_ref/avgpool3d.c index 1affc899..d08775aa 100644 --- a/source/graph_ref/avgpool3d.c +++ b/source/graph_ref/avgpool3d.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/batch_to_space.c b/source/graph_ref/batch_to_space.c index 9115752a..6c6d8976 100644 --- a/source/graph_ref/batch_to_space.c +++ b/source/graph_ref/batch_to_space.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/batch_to_space_nd.c b/source/graph_ref/batch_to_space_nd.c index a75ea52c..54568832 100644 --- a/source/graph_ref/batch_to_space_nd.c +++ b/source/graph_ref/batch_to_space_nd.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/bn.c b/source/graph_ref/bn.c index 376f4e44..bb2d186c 100644 --- a/source/graph_ref/bn.c +++ b/source/graph_ref/bn.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/broadcast_to.c b/source/graph_ref/broadcast_to.c index f00cea4d..ed00208c 100644 --- a/source/graph_ref/broadcast_to.c +++ b/source/graph_ref/broadcast_to.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/cache_conv1d.c b/source/graph_ref/cache_conv1d.c new file mode 100644 index 00000000..0aaa7630 --- /dev/null +++ b/source/graph_ref/cache_conv1d.c @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_gref.h" + +int csi_gref_cache_conv1d(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_conv1d_params *params) +{ + csi_gref_sidcso_op(input, output, weight, bias, CSINN_OP_CACHE_CONV1D, params); + return CSINN_TRUE; +} \ No newline at end of file diff --git a/source/graph_ref/cache_matmul.c b/source/graph_ref/cache_matmul.c new file mode 100644 index 00000000..8d5ca4f5 --- /dev/null +++ b/source/graph_ref/cache_matmul.c @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_gref.h" + +int csi_gref_cache_matmul(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_matmul_params *params) +{ + csi_gref_sidcso_op(input, output, weight, bias, CSINN_OP_CACHE_MATMUL, params); + return CSINN_TRUE; +} \ No newline at end of file diff --git a/source/graph_ref/ceil.c b/source/graph_ref/ceil.c index 9742e332..1a23c4ba 100644 --- a/source/graph_ref/ceil.c +++ b/source/graph_ref/ceil.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/clip.c b/source/graph_ref/clip.c index 0185cda3..9cce441c 100644 --- a/source/graph_ref/clip.c +++ b/source/graph_ref/clip.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/col2im.c b/source/graph_ref/col2im.c index ed32e474..7956da71 100644 --- a/source/graph_ref/col2im.c +++ b/source/graph_ref/col2im.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/concat.c b/source/graph_ref/concat.c index 9996e6e8..aa376940 100644 --- a/source/graph_ref/concat.c +++ b/source/graph_ref/concat.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/convolution.c b/source/graph_ref/convolution.c index 6f0ee3b0..9e7b383e 100644 --- a/source/graph_ref/convolution.c +++ b/source/graph_ref/convolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/convolution1d.c b/source/graph_ref/convolution1d.c new file mode 100644 index 00000000..5413235c --- /dev/null +++ b/source/graph_ref/convolution1d.c @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_gref.h" + +int csi_gref_conv1d(struct csi_tensor *input, + struct csi_tensor *output, + struct csi_tensor *kernel, + struct csi_tensor *bias, + struct conv2d_params *params) +{ + csi_gref_sidcso_op(input, output, kernel, bias, CSINN_OP_CONV1D, params); + return CSINN_TRUE; +} \ No newline at end of file diff --git a/source/graph_ref/convolution3d.c b/source/graph_ref/convolution3d.c index ce5efa51..47e3033e 100644 --- a/source/graph_ref/convolution3d.c +++ b/source/graph_ref/convolution3d.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/cos.c b/source/graph_ref/cos.c index 3cb515d1..b365c62f 100644 --- a/source/graph_ref/cos.c +++ b/source/graph_ref/cos.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/cosh.c b/source/graph_ref/cosh.c index 8e741947..55ead117 100644 --- a/source/graph_ref/cosh.c +++ b/source/graph_ref/cosh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/crop.c b/source/graph_ref/crop.c index 60850ad7..8d623cbc 100644 --- a/source/graph_ref/crop.c +++ b/source/graph_ref/crop.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/cumprod.c b/source/graph_ref/cumprod.c index 26a0f4da..ea208ddb 100644 --- a/source/graph_ref/cumprod.c +++ b/source/graph_ref/cumprod.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/cumsum.c b/source/graph_ref/cumsum.c index 415f9b4c..08493115 100644 --- a/source/graph_ref/cumsum.c +++ b/source/graph_ref/cumsum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/deconvolution.c b/source/graph_ref/deconvolution.c index 9bf396a5..695dc48c 100644 --- a/source/graph_ref/deconvolution.c +++ b/source/graph_ref/deconvolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/deconvolution3d.c b/source/graph_ref/deconvolution3d.c index e829da69..eeaae97e 100644 --- a/source/graph_ref/deconvolution3d.c +++ b/source/graph_ref/deconvolution3d.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/depth_to_space.c b/source/graph_ref/depth_to_space.c index b3954625..bc07d936 100644 --- a/source/graph_ref/depth_to_space.c +++ b/source/graph_ref/depth_to_space.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/div.c b/source/graph_ref/div.c index 24c605ea..86790bb7 100644 --- a/source/graph_ref/div.c +++ b/source/graph_ref/div.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/elu.c b/source/graph_ref/elu.c index 65d6457e..dabcbc84 100644 --- a/source/graph_ref/elu.c +++ b/source/graph_ref/elu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/equal.c b/source/graph_ref/equal.c index ff6fc6f1..b92af4c9 100644 --- a/source/graph_ref/equal.c +++ b/source/graph_ref/equal.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/erf.c b/source/graph_ref/erf.c index 64a127eb..01889d04 100644 --- a/source/graph_ref/erf.c +++ b/source/graph_ref/erf.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/exp.c b/source/graph_ref/exp.c index 90a84bfc..d31b4b34 100644 --- a/source/graph_ref/exp.c +++ b/source/graph_ref/exp.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/expand_dims.c b/source/graph_ref/expand_dims.c index 9a379868..05537189 100644 --- a/source/graph_ref/expand_dims.c +++ b/source/graph_ref/expand_dims.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/expm1.c b/source/graph_ref/expm1.c index bd45911c..7a79aebb 100644 --- a/source/graph_ref/expm1.c +++ b/source/graph_ref/expm1.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/flatten.c b/source/graph_ref/flatten.c index 1074f2d9..bf2fed9b 100644 --- a/source/graph_ref/flatten.c +++ b/source/graph_ref/flatten.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/floor.c b/source/graph_ref/floor.c index 65711f56..617a5a6f 100644 --- a/source/graph_ref/floor.c +++ b/source/graph_ref/floor.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/floor_divide.c b/source/graph_ref/floor_divide.c index 86ca10f0..25dc7ab8 100644 --- a/source/graph_ref/floor_divide.c +++ b/source/graph_ref/floor_divide.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/floor_mod.c b/source/graph_ref/floor_mod.c index e8287d77..bc1c2c51 100644 --- a/source/graph_ref/floor_mod.c +++ b/source/graph_ref/floor_mod.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/fsmn.c b/source/graph_ref/fsmn.c index 48fa1776..61ae482d 100644 --- a/source/graph_ref/fsmn.c +++ b/source/graph_ref/fsmn.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/fullyconnected.c b/source/graph_ref/fullyconnected.c index 50589717..3e2fd8da 100644 --- a/source/graph_ref/fullyconnected.c +++ b/source/graph_ref/fullyconnected.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/gather.c b/source/graph_ref/gather.c index 07a734d6..f584e039 100644 --- a/source/graph_ref/gather.c +++ b/source/graph_ref/gather.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/gather_nd.c b/source/graph_ref/gather_nd.c index 2a8c92fa..7e6fbf99 100644 --- a/source/graph_ref/gather_nd.c +++ b/source/graph_ref/gather_nd.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/global_averagepool.c b/source/graph_ref/global_averagepool.c index 9fede8e6..7ad41eea 100644 --- a/source/graph_ref/global_averagepool.c +++ b/source/graph_ref/global_averagepool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/global_maxpool.c b/source/graph_ref/global_maxpool.c index 6dc62633..1b8112b9 100644 --- a/source/graph_ref/global_maxpool.c +++ b/source/graph_ref/global_maxpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/greater.c b/source/graph_ref/greater.c index 483d69ee..d32ad682 100644 --- a/source/graph_ref/greater.c +++ b/source/graph_ref/greater.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/greater_equal.c b/source/graph_ref/greater_equal.c index b13f658d..2c4095d5 100644 --- a/source/graph_ref/greater_equal.c +++ b/source/graph_ref/greater_equal.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/hard_sigmoid.c b/source/graph_ref/hard_sigmoid.c index 76651033..4745b233 100644 --- a/source/graph_ref/hard_sigmoid.c +++ b/source/graph_ref/hard_sigmoid.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/im2col.c b/source/graph_ref/im2col.c index e5a75c4b..cdffbb2b 100644 --- a/source/graph_ref/im2col.c +++ b/source/graph_ref/im2col.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/isnan.c b/source/graph_ref/isnan.c index 8e6a319c..b2c4906a 100644 --- a/source/graph_ref/isnan.c +++ b/source/graph_ref/isnan.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/l2_normalization.c b/source/graph_ref/l2_normalization.c index c3daa07a..c3dc96f3 100644 --- a/source/graph_ref/l2_normalization.c +++ b/source/graph_ref/l2_normalization.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/l2pool.c b/source/graph_ref/l2pool.c index 143dd6fc..8553d73b 100644 --- a/source/graph_ref/l2pool.c +++ b/source/graph_ref/l2pool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/layer_norm.c b/source/graph_ref/layer_norm.c new file mode 100644 index 00000000..4914d346 --- /dev/null +++ b/source/graph_ref/layer_norm.c @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_gref.h" + +int csi_gref_layer_norm(struct csi_tensor *input, + struct csi_tensor *output, + struct csi_tensor *gamma, + struct csi_tensor *beta, + struct layer_norm_params *params) +{ + csi_gref_sidcso_op(input, output, gamma, beta, CSINN_OP_LAYER_NORM, params); + return CSINN_TRUE; +} \ No newline at end of file diff --git a/source/graph_ref/leaky_relu.c b/source/graph_ref/leaky_relu.c index eaeacfb2..add038ea 100644 --- a/source/graph_ref/leaky_relu.c +++ b/source/graph_ref/leaky_relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/less.c b/source/graph_ref/less.c index c01f269d..e51c83df 100644 --- a/source/graph_ref/less.c +++ b/source/graph_ref/less.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/less_equal.c b/source/graph_ref/less_equal.c index a66e405e..ae93f5f1 100644 --- a/source/graph_ref/less_equal.c +++ b/source/graph_ref/less_equal.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/log.c b/source/graph_ref/log.c index 50c86c10..87e2a07f 100644 --- a/source/graph_ref/log.c +++ b/source/graph_ref/log.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/log1p.c b/source/graph_ref/log1p.c index 5f8a79db..7f225cde 100644 --- a/source/graph_ref/log1p.c +++ b/source/graph_ref/log1p.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/log_softmax.c b/source/graph_ref/log_softmax.c index 1635aa35..a283f6a3 100644 --- a/source/graph_ref/log_softmax.c +++ b/source/graph_ref/log_softmax.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/logical_and.c b/source/graph_ref/logical_and.c index c3d9bcc9..2f32519b 100644 --- a/source/graph_ref/logical_and.c +++ b/source/graph_ref/logical_and.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/logical_not.c b/source/graph_ref/logical_not.c index 9672a14d..9646cdaa 100644 --- a/source/graph_ref/logical_not.c +++ b/source/graph_ref/logical_not.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/logical_or.c b/source/graph_ref/logical_or.c index eda50d51..0a75f7a6 100644 --- a/source/graph_ref/logical_or.c +++ b/source/graph_ref/logical_or.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/logical_xor.c b/source/graph_ref/logical_xor.c index 24b6c511..f506b489 100644 --- a/source/graph_ref/logical_xor.c +++ b/source/graph_ref/logical_xor.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/lrn.c b/source/graph_ref/lrn.c index 15b7687f..0df8e8ae 100644 --- a/source/graph_ref/lrn.c +++ b/source/graph_ref/lrn.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/matmul.c b/source/graph_ref/matmul.c index eb4853c7..8cfab3d1 100644 --- a/source/graph_ref/matmul.c +++ b/source/graph_ref/matmul.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/max.c b/source/graph_ref/max.c index dfadf643..1422ddf6 100644 --- a/source/graph_ref/max.c +++ b/source/graph_ref/max.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/maximum.c b/source/graph_ref/maximum.c index f18d653e..0ad0f028 100644 --- a/source/graph_ref/maximum.c +++ b/source/graph_ref/maximum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/maxpool.c b/source/graph_ref/maxpool.c index 2157e830..46cce60a 100644 --- a/source/graph_ref/maxpool.c +++ b/source/graph_ref/maxpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/maxpool2d_locat.c b/source/graph_ref/maxpool2d_locat.c index 01e00d1b..7263fa8b 100644 --- a/source/graph_ref/maxpool2d_locat.c +++ b/source/graph_ref/maxpool2d_locat.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/maxpool3d.c b/source/graph_ref/maxpool3d.c index ff6dadcd..95860e2f 100644 --- a/source/graph_ref/maxpool3d.c +++ b/source/graph_ref/maxpool3d.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/mean.c b/source/graph_ref/mean.c index 1b530380..aa1b469e 100644 --- a/source/graph_ref/mean.c +++ b/source/graph_ref/mean.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/min.c b/source/graph_ref/min.c index 7959a212..6e79bf54 100644 --- a/source/graph_ref/min.c +++ b/source/graph_ref/min.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/minimum.c b/source/graph_ref/minimum.c index 48ca5e9c..af6a711d 100644 --- a/source/graph_ref/minimum.c +++ b/source/graph_ref/minimum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/mod.c b/source/graph_ref/mod.c index eafa8190..f7f26d3b 100644 --- a/source/graph_ref/mod.c +++ b/source/graph_ref/mod.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/mul.c b/source/graph_ref/mul.c index 3259675a..7ea7a30b 100644 --- a/source/graph_ref/mul.c +++ b/source/graph_ref/mul.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/ndarray_size.c b/source/graph_ref/ndarray_size.c index ac39f62e..4fde9d24 100644 --- a/source/graph_ref/ndarray_size.c +++ b/source/graph_ref/ndarray_size.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/negative.c b/source/graph_ref/negative.c index 1a67f902..a2280dad 100644 --- a/source/graph_ref/negative.c +++ b/source/graph_ref/negative.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/non_max_suppression.c b/source/graph_ref/non_max_suppression.c index 5e6e6c2e..d6a4bbbc 100644 --- a/source/graph_ref/non_max_suppression.c +++ b/source/graph_ref/non_max_suppression.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/not.c b/source/graph_ref/not.c index 94143df1..39441206 100644 --- a/source/graph_ref/not.c +++ b/source/graph_ref/not.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/not_equal.c b/source/graph_ref/not_equal.c index afca8daa..aec880be 100644 --- a/source/graph_ref/not_equal.c +++ b/source/graph_ref/not_equal.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/or.c b/source/graph_ref/or.c index 3ca0f372..556e8e11 100644 --- a/source/graph_ref/or.c +++ b/source/graph_ref/or.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/pad.c b/source/graph_ref/pad.c index 86cb73a2..a026b757 100644 --- a/source/graph_ref/pad.c +++ b/source/graph_ref/pad.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/power.c b/source/graph_ref/power.c index 2c11eb27..652fb26e 100644 --- a/source/graph_ref/power.c +++ b/source/graph_ref/power.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/prelu.c b/source/graph_ref/prelu.c index 16e87efa..b0f4cf4e 100644 --- a/source/graph_ref/prelu.c +++ b/source/graph_ref/prelu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/prod.c b/source/graph_ref/prod.c index f12e2b7d..c19a5eb3 100644 --- a/source/graph_ref/prod.c +++ b/source/graph_ref/prod.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/proposal.c b/source/graph_ref/proposal.c index a8c3ed72..a3e496df 100644 --- a/source/graph_ref/proposal.c +++ b/source/graph_ref/proposal.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/psroipooling.c b/source/graph_ref/psroipooling.c index 92aa5504..6b043c4b 100644 --- a/source/graph_ref/psroipooling.c +++ b/source/graph_ref/psroipooling.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/reduce_logsumexp.c b/source/graph_ref/reduce_logsumexp.c index e2d98715..db47597f 100644 --- a/source/graph_ref/reduce_logsumexp.c +++ b/source/graph_ref/reduce_logsumexp.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/reduce_max.c b/source/graph_ref/reduce_max.c index 8732a5a6..3e018c99 100644 --- a/source/graph_ref/reduce_max.c +++ b/source/graph_ref/reduce_max.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/reduce_mean.c b/source/graph_ref/reduce_mean.c index 09c96632..55b63a31 100644 --- a/source/graph_ref/reduce_mean.c +++ b/source/graph_ref/reduce_mean.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/reduce_min.c b/source/graph_ref/reduce_min.c index bba7fbb2..952cd293 100644 --- a/source/graph_ref/reduce_min.c +++ b/source/graph_ref/reduce_min.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/reduce_prod.c b/source/graph_ref/reduce_prod.c index fced452f..7c91c5c8 100644 --- a/source/graph_ref/reduce_prod.c +++ b/source/graph_ref/reduce_prod.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/reduce_sum.c b/source/graph_ref/reduce_sum.c index 49e6ad9e..13e00e65 100644 --- a/source/graph_ref/reduce_sum.c +++ b/source/graph_ref/reduce_sum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/relu.c b/source/graph_ref/relu.c index c51c975c..3bf0a5a6 100644 --- a/source/graph_ref/relu.c +++ b/source/graph_ref/relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/relu1.c b/source/graph_ref/relu1.c index 6d750dae..809716cf 100644 --- a/source/graph_ref/relu1.c +++ b/source/graph_ref/relu1.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/relu6.c b/source/graph_ref/relu6.c index dcded1f2..1b830603 100644 --- a/source/graph_ref/relu6.c +++ b/source/graph_ref/relu6.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/relun.c b/source/graph_ref/relun.c index 1ec3d839..69ca4b79 100644 --- a/source/graph_ref/relun.c +++ b/source/graph_ref/relun.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/reorg.c b/source/graph_ref/reorg.c index ccf1b07b..b945fd71 100644 --- a/source/graph_ref/reorg.c +++ b/source/graph_ref/reorg.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/reshape.c b/source/graph_ref/reshape.c index 45af1dd3..54106616 100644 --- a/source/graph_ref/reshape.c +++ b/source/graph_ref/reshape.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/resize.c b/source/graph_ref/resize.c index fe311600..7b8fa1d1 100644 --- a/source/graph_ref/resize.c +++ b/source/graph_ref/resize.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/reverse.c b/source/graph_ref/reverse.c index 48aba220..52f18f9d 100644 --- a/source/graph_ref/reverse.c +++ b/source/graph_ref/reverse.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/roialign.c b/source/graph_ref/roialign.c index 5203bef9..e9e26127 100644 --- a/source/graph_ref/roialign.c +++ b/source/graph_ref/roialign.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/roipool.c b/source/graph_ref/roipool.c index ee040840..cbdae26b 100644 --- a/source/graph_ref/roipool.c +++ b/source/graph_ref/roipool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/round.c b/source/graph_ref/round.c index 7a3c2a8c..63e4da48 100644 --- a/source/graph_ref/round.c +++ b/source/graph_ref/round.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/rsqrt.c b/source/graph_ref/rsqrt.c index 1503ff35..74f1ce8a 100644 --- a/source/graph_ref/rsqrt.c +++ b/source/graph_ref/rsqrt.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/scatter.c b/source/graph_ref/scatter.c index 7865988e..5fc59448 100644 --- a/source/graph_ref/scatter.c +++ b/source/graph_ref/scatter.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/segment_max.c b/source/graph_ref/segment_max.c index 00657d42..891b5e51 100644 --- a/source/graph_ref/segment_max.c +++ b/source/graph_ref/segment_max.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/segment_mean.c b/source/graph_ref/segment_mean.c index b74bebd7..70d9304b 100644 --- a/source/graph_ref/segment_mean.c +++ b/source/graph_ref/segment_mean.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/segment_min.c b/source/graph_ref/segment_min.c index 2fdb1841..788207c8 100644 --- a/source/graph_ref/segment_min.c +++ b/source/graph_ref/segment_min.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/segment_prod.c b/source/graph_ref/segment_prod.c index 2ed48854..d57f0277 100644 --- a/source/graph_ref/segment_prod.c +++ b/source/graph_ref/segment_prod.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/segment_sum.c b/source/graph_ref/segment_sum.c index 63bea6c3..2e94c56c 100644 --- a/source/graph_ref/segment_sum.c +++ b/source/graph_ref/segment_sum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/select.c b/source/graph_ref/select.c index 5a31021a..18651d96 100644 --- a/source/graph_ref/select.c +++ b/source/graph_ref/select.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/sequence_mask.c b/source/graph_ref/sequence_mask.c index 66875f8e..ba30de23 100644 --- a/source/graph_ref/sequence_mask.c +++ b/source/graph_ref/sequence_mask.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/setup.c b/source/graph_ref/setup.c index 7e3edd4c..59a3e654 100644 --- a/source/graph_ref/setup.c +++ b/source/graph_ref/setup.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" #include "csi_utils.h" @@ -82,240 +82,261 @@ static int call_layer_func(void *fn, struct csi_node *node) struct csi_tensor **inputs; struct csi_tensor **outputs; - switch (node->type) - { - case CSINN_OP_ABS: - case CSINN_OP_ACOS: - case CSINN_OP_ACOSH: - case CSINN_OP_ANY: - case CSINN_OP_ARGMAX: - case CSINN_OP_ARGMIN: - case CSINN_OP_ASIN: - case CSINN_OP_ASINH: - case CSINN_OP_ATAN: - case CSINN_OP_ATANH: - case CSINN_OP_AVGPOOL2D: - case CSINN_OP_AVGPOOL3D: - case CSINN_OP_BATCH_TO_SPACE: - case CSINN_OP_BATCH_TO_SPACE_ND: - case CSINN_OP_BROADCOST: - case CSINN_OP_CEIL: - case CSINN_OP_CLIP: - case CSINN_OP_COL2IM: - case CSINN_OP_COS: - case CSINN_OP_COSH: - case CSINN_OP_CROP: - case CSINN_OP_CUMPROD: - case CSINN_OP_CUMSUM: - case CSINN_OP_DEPTH_TO_SPACE: - case CSINN_OP_ELU: - case CSINN_OP_ERF: - case CSINN_OP_EXP: - case CSINN_OP_EXPAND_DIMS: - case CSINN_OP_EXPM1: - case CSINN_OP_FLATTEN: - case CSINN_OP_FLOOR: - case CSINN_OP_GLOBAL_AVGPOOL2D: - case CSINN_OP_GLOBAL_MAXPOOL2D: - case CSINN_OP_HARD_SIGMOID: - case CSINN_OP_IM2COL: - case CSINN_OP_ISNAN: - case CSINN_OP_L2N: - case CSINN_OP_L2POOL2D: - case CSINN_OP_LEAKY_RELU: - case CSINN_OP_LOG_SOFTMAX: - case CSINN_OP_LOG: - case CSINN_OP_LOG1P: - case CSINN_OP_LOGICAL_NOT: - case CSINN_OP_LRN: - case CSINN_OP_MAX: - case CSINN_OP_MAXPOOL2D: - case CSINN_OP_MAXPOOL2D_LOCAT: - case CSINN_OP_MAXPOOL3D: - case CSINN_OP_MEAN: - case CSINN_OP_MEAN_STRIDE: - case CSINN_OP_MIN: - case CSINN_OP_NDARRAY_SIZE: - case CSINN_OP_NEGATIIVE: - case CSINN_OP_NOT: - case CSINN_OP_PAD: - case CSINN_OP_PROD: - case CSINN_OP_REDUCE_LOGSUMEXP: - case CSINN_OP_REDUCE_MAX: - case CSINN_OP_REDUCE_MEAN: - case CSINN_OP_REDUCE_MIN: - case CSINN_OP_REDUCE_PROD: - case CSINN_OP_REDUCE_SUM: - case CSINN_OP_RELU: - case CSINN_OP_RELU1: - case CSINN_OP_RELU6: - case CSINN_OP_RELUN: - case CSINN_OP_REORG: - case CSINN_OP_RESHAPE: - case CSINN_OP_RESIZE: - case CSINN_OP_REVERSE: - case CSINN_OP_ROUND: - case CSINN_OP_RSQRT: - case CSINN_OP_SHAPE: - case CSINN_OP_SHUFFLE_CHANNEL: - case CSINN_OP_SIGMOID: - case CSINN_OP_SIGN: - case CSINN_OP_SIN: - case CSINN_OP_SINH: - case CSINN_OP_SLICE: - case CSINN_OP_SOFTMAX: - case CSINN_OP_SOFTPLUS: - case CSINN_OP_SOFTRELU: - case CSINN_OP_SOFTSIGN: - case CSINN_OP_SPACE_TO_BATCH: - case CSINN_OP_SPACE_TO_BATCH_ND: - case CSINN_OP_SPACE_TO_DEPTH: - case CSINN_OP_SQRT: - case CSINN_OP_SQUARE: - case CSINN_OP_SQUEEZE: - case CSINN_OP_STACK: - case CSINN_OP_STRIDED_SLICE: - case CSINN_OP_SUM: - case CSINN_OP_TAN: - case CSINN_OP_TANH: - case CSINN_OP_THRESHOLD_RELU: - case CSINN_OP_TILE: - case CSINN_OP_TRANSPOSE: - case CSINN_OP_TRUNC: - case CSINN_OP_UNPOOLING: - case CSINN_OP_UNSTACK: - case CSINN_OP_YUV_RGB_SCALE: - ret = func(node->in[0]->data, node->out[0]->data, params); - break; - case CSINN_OP_ADD: - case CSINN_OP_AND: - case CSINN_OP_DIV: - case CSINN_OP_EQUANL: - case CSINN_OP_FLOOR_DIVIDE: - case CSINN_OP_FLOOR_MOD: - case CSINN_OP_GATHER_ND: - case CSINN_OP_GATHER: - case CSINN_OP_GREATHER_EQUAL: - case CSINN_OP_GREATHER: - case CSINN_OP_LESS_EQUAL: - case CSINN_OP_LESS: - case CSINN_OP_LOGICAL_AND: - case CSINN_OP_LOGICAL_OR: - case CSINN_OP_LOGICAL_XOR: - case CSINN_OP_MATMUL: - case CSINN_OP_MAXIMUM: - case CSINN_OP_MINIMUM: - case CSINN_OP_MOD: - case CSINN_OP_MUL: - case CSINN_OP_NON_MAX_SUPPRESSION: - case CSINN_OP_NOT_EQUAL: - case CSINN_OP_OR: - case CSINN_OP_POWER: - case CSINN_OP_PRELU: - case CSINN_OP_SEQUENCE_MASK: - case CSINN_OP_SEGMENT_MAX: - case CSINN_OP_UNSORTED_SEGMENT_MAX: - case CSINN_OP_SEGMENT_MEAN: - case CSINN_OP_UNSORTED_SEGMENT_MEAN: - case CSINN_OP_SEGMENT_MIN: - case CSINN_OP_UNSORTED_SEGMENT_MIN: - case CSINN_OP_SEGMENT_PROD: - case CSINN_OP_UNSORTED_SEGMENT_PROD: - case CSINN_OP_SEGMENT_SUM: - case CSINN_OP_UNSORTED_SEGMENT_SUM: - case CSINN_OP_SUB: - case CSINN_OP_XOR: - ret = func(node->in[0]->data, node->in[1]->data, node->out[0]->data, params); - break; - case CSINN_OP_CONV2D: - case CSINN_OP_CONV2D_RELU: - case CSINN_OP_CONV2D_RELU6: - case CSINN_OP_CONV2D_CHANNEL: - case CSINN_OP_CONV2D_CHANNEL_RELU: - case CSINN_OP_CONV2D_CHANNEL_RELU6: - case CSINN_OP_DEPTHWISE_CONV2D: - case CSINN_OP_DEPTHWISE_CONV2D_RELU: - case CSINN_OP_DEPTHWISE_CONV2D_RELU6: - case CSINN_OP_DEPTHWISE_CONV2D_CHANNEL: - case CSINN_OP_DEPTHWISE_CONV2D_CHANNEL_RELU: - case CSINN_OP_DEPTHWISE_CONV2D_CHANNEL_RELU6: - case CSINN_OP_GROUP_CONV2D: - case CSINN_OP_GROUP_CONV2D_RELU: - case CSINN_OP_GROUP_CONV2D_RELU6: - case CSINN_OP_GROUP_CONV2D_CHANNEL: - case CSINN_OP_GROUP_CONV2D_CHANNEL_RELU: - case CSINN_OP_CONV3D: - case CSINN_OP_DECONV2D: - case CSINN_OP_DEPTHWISE_DECONV2D: - case CSINN_OP_DECONV3D: - case CSINN_OP_FULLYCONNECTED: - ret = func(node->in[0]->data, node->out[0]->data, node->in[1]->data, node->in[2]->data, params); - break; - case CSINN_OP_FSMN: - ret = func(node->in[0]->data, node->in[1]->data, node->in[2]->data, node->in[3]->data, node->in[4]->data, node->out[0]->data, params); - break; - case CSINN_OP_CONCAT: - inputs = csi_mem_alloc(sizeof(struct csi_tensor *) * ((struct concat_params *)params)->inputs_count); - for (int i = 0; i < ((struct concat_params *)params)->inputs_count; i++){ - inputs[i] = node->in[i]->data; - } - ret = func(inputs, node->out[0]->data, params); - csi_mem_free(inputs); - break; - case CSINN_OP_SPLIT: - outputs = csi_mem_alloc(sizeof(struct csi_tensor *) * ((struct split_params *)params)->output_num); - for (int i = 0; i < ((struct split_params *)params)->output_num; i++){ - outputs[i] = node->out[i]->data; - } - ret = func(node->in[0]->data, outputs, params); - csi_mem_free(outputs); - break; - case CSINN_OP_ALL: - CSI_DEBUG_CALL(printf("unsupported CSINN_OP_ALL\n")); - break; - case CSINN_OP_ARANGE: - CSI_DEBUG_CALL(printf("unsupported CSINN_OP_ARANGE\n")); - break; - case CSINN_OP_BN: - CSI_DEBUG_CALL(printf("unsupported CSINN_OP_BN\n")); - break; - case CSINN_OP_MIN_STRIDE: - CSI_DEBUG_CALL(printf("unsupported CSINN_OP_MIN_STRIDE\n")); - break; - case CSINN_OP_ONE_HOT: - CSI_DEBUG_CALL(printf("unsupported CSINN_OP_ONE_HOT\n")); - break; - case CSINN_OP_PROPOSAL: - CSI_DEBUG_CALL(printf("unsupported CSINN_OP_PROPOSAL\n")); - break; - case CSINN_OP_PSROIPOOLING: - CSI_DEBUG_CALL(printf("unsupported CSINN_OP_PSROIPOOLING\n")); - break; - case CSINN_OP_ROIALIGN: - CSI_DEBUG_CALL(printf("unsupported CSINN_OP_ROIALIGN\n")); - break; - case CSINN_OP_ROIPOOL: - CSI_DEBUG_CALL(printf("unsupported CSINN_OP_ROIPOOL\n")); - break; - case CSINN_OP_SCATTER_ND: - CSI_DEBUG_CALL(printf("unsupported CSINN_OP_SCATTER_ND\n")); - break; - case CSINN_OP_SELECT: - CSI_DEBUG_CALL(printf("unsupported CSINN_OP_SELECT\n")); - break; - case CSINN_OP_TOPK: - CSI_DEBUG_CALL(printf("unsupported CSINN_OP_TOPK\n")); - break; - case CSINN_OP_WHERE: - CSI_DEBUG_CALL(printf("unsupported CSINN_OP_WHERE\n")); - break; - default: - CSI_DEBUG_CALL(printf("unknown op\n")); - return CSINN_FALSE; + switch (node->type) { + case CSINN_OP_ABS: + case CSINN_OP_ACOS: + case CSINN_OP_ACOSH: + case CSINN_OP_ANY: + case CSINN_OP_ARGMAX: + case CSINN_OP_ARGMIN: + case CSINN_OP_ASIN: + case CSINN_OP_ASINH: + case CSINN_OP_ATAN: + case CSINN_OP_ATANH: + case CSINN_OP_AVGPOOL2D: + case CSINN_OP_AVGPOOL3D: + case CSINN_OP_BATCH_TO_SPACE: + case CSINN_OP_BATCH_TO_SPACE_ND: + case CSINN_OP_BROADCOST: + case CSINN_OP_CEIL: + case CSINN_OP_CLIP: + case CSINN_OP_COL2IM: + case CSINN_OP_COS: + case CSINN_OP_COSH: + case CSINN_OP_CROP: + case CSINN_OP_CUMPROD: + case CSINN_OP_CUMSUM: + case CSINN_OP_DEPTH_TO_SPACE: + case CSINN_OP_ELU: + case CSINN_OP_ERF: + case CSINN_OP_EXP: + case CSINN_OP_EXPAND_DIMS: + case CSINN_OP_EXPM1: + case CSINN_OP_FLATTEN: + case CSINN_OP_FLOOR: + case CSINN_OP_GLOBAL_AVGPOOL2D: + case CSINN_OP_GLOBAL_MAXPOOL2D: + case CSINN_OP_HARD_SIGMOID: + case CSINN_OP_IM2COL: + case CSINN_OP_ISNAN: + case CSINN_OP_L2N: + case CSINN_OP_L2POOL2D: + case CSINN_OP_LEAKY_RELU: + case CSINN_OP_LOG_SOFTMAX: + case CSINN_OP_LOG: + case CSINN_OP_LOG1P: + case CSINN_OP_LOGICAL_NOT: + case CSINN_OP_LRN: + case CSINN_OP_MAX: + case CSINN_OP_MAXPOOL2D: + case CSINN_OP_MAXPOOL2D_LOCAT: + case CSINN_OP_MAXPOOL3D: + case CSINN_OP_MEAN: + case CSINN_OP_MEAN_STRIDE: + case CSINN_OP_MIN: + case CSINN_OP_NDARRAY_SIZE: + case CSINN_OP_NEGATIIVE: + case CSINN_OP_NOT: + case CSINN_OP_PAD: + case CSINN_OP_PROD: + case CSINN_OP_REDUCE_LOGSUMEXP: + case CSINN_OP_REDUCE_MAX: + case CSINN_OP_REDUCE_MEAN: + case CSINN_OP_REDUCE_MIN: + case CSINN_OP_REDUCE_PROD: + case CSINN_OP_REDUCE_SUM: + case CSINN_OP_RELU: + case CSINN_OP_RELU1: + case CSINN_OP_RELU6: + case CSINN_OP_RELUN: + case CSINN_OP_REORG: + case CSINN_OP_RESHAPE: + case CSINN_OP_RESIZE: + case CSINN_OP_REVERSE: + case CSINN_OP_ROUND: + case CSINN_OP_RSQRT: + case CSINN_OP_SHAPE: + case CSINN_OP_SHUFFLE_CHANNEL: + case CSINN_OP_SIGMOID: + case CSINN_OP_SIGN: + case CSINN_OP_SIN: + case CSINN_OP_SINH: + case CSINN_OP_SLICE: + case CSINN_OP_SOFTMAX: + case CSINN_OP_SOFTPLUS: + case CSINN_OP_SOFTRELU: + case CSINN_OP_SOFTSIGN: + case CSINN_OP_SPACE_TO_BATCH: + case CSINN_OP_SPACE_TO_BATCH_ND: + case CSINN_OP_SPACE_TO_DEPTH: + case CSINN_OP_SQRT: + case CSINN_OP_SQUARE: + case CSINN_OP_SQUEEZE: + case CSINN_OP_STACK: + case CSINN_OP_STRIDED_SLICE: + case CSINN_OP_SUM: + case CSINN_OP_TAN: + case CSINN_OP_TANH: + case CSINN_OP_THRESHOLD_RELU: + case CSINN_OP_TILE: + case CSINN_OP_TRANSPOSE: + case CSINN_OP_TRUNC: + case CSINN_OP_UNPOOLING: + case CSINN_OP_UNSTACK: + case CSINN_OP_YUV_RGB_SCALE: + ret = func(node->in[0]->data, node->out[0]->data, params); + break; + case CSINN_OP_ADD: + case CSINN_OP_AND: + case CSINN_OP_DIV: + case CSINN_OP_EQUANL: + case CSINN_OP_FLOOR_DIVIDE: + case CSINN_OP_FLOOR_MOD: + case CSINN_OP_GATHER_ND: + case CSINN_OP_GATHER: + case CSINN_OP_GREATHER_EQUAL: + case CSINN_OP_GREATHER: + case CSINN_OP_LESS_EQUAL: + case CSINN_OP_LESS: + case CSINN_OP_LOGICAL_AND: + case CSINN_OP_LOGICAL_OR: + case CSINN_OP_LOGICAL_XOR: + case CSINN_OP_MATMUL: + case CSINN_OP_MAXIMUM: + case CSINN_OP_MINIMUM: + case CSINN_OP_MOD: + case CSINN_OP_MUL: + case CSINN_OP_NON_MAX_SUPPRESSION: + case CSINN_OP_NOT_EQUAL: + case CSINN_OP_OR: + case CSINN_OP_POWER: + case CSINN_OP_PRELU: + case CSINN_OP_SEQUENCE_MASK: + case CSINN_OP_SEGMENT_MAX: + case CSINN_OP_UNSORTED_SEGMENT_MAX: + case CSINN_OP_SEGMENT_MEAN: + case CSINN_OP_UNSORTED_SEGMENT_MEAN: + case CSINN_OP_SEGMENT_MIN: + case CSINN_OP_UNSORTED_SEGMENT_MIN: + case CSINN_OP_SEGMENT_PROD: + case CSINN_OP_UNSORTED_SEGMENT_PROD: + case CSINN_OP_SEGMENT_SUM: + case CSINN_OP_UNSORTED_SEGMENT_SUM: + case CSINN_OP_SUB: + case CSINN_OP_XOR: + ret = func(node->in[0]->data, node->in[1]->data, node->out[0]->data, params); + break; + case CSINN_OP_CONV1D: + case CSINN_OP_CONV2D: + case CSINN_OP_CONV2D_RELU: + case CSINN_OP_CONV2D_RELU6: + case CSINN_OP_CONV2D_CHANNEL: + case CSINN_OP_CONV2D_CHANNEL_RELU: + case CSINN_OP_CONV2D_CHANNEL_RELU6: + case CSINN_OP_DEPTHWISE_CONV2D: + case CSINN_OP_DEPTHWISE_CONV2D_RELU: + case CSINN_OP_DEPTHWISE_CONV2D_RELU6: + case CSINN_OP_DEPTHWISE_CONV2D_CHANNEL: + case CSINN_OP_DEPTHWISE_CONV2D_CHANNEL_RELU: + case CSINN_OP_DEPTHWISE_CONV2D_CHANNEL_RELU6: + case CSINN_OP_GROUP_CONV2D: + case CSINN_OP_GROUP_CONV2D_RELU: + case CSINN_OP_GROUP_CONV2D_RELU6: + case CSINN_OP_GROUP_CONV2D_CHANNEL: + case CSINN_OP_GROUP_CONV2D_CHANNEL_RELU: + case CSINN_OP_CONV3D: + case CSINN_OP_DECONV2D: + case CSINN_OP_DEPTHWISE_DECONV2D: + case CSINN_OP_DECONV3D: + case CSINN_OP_FULLYCONNECTED: + case CSINN_OP_LAYER_NORM: + case CSINN_OP_CACHE_MATMUL: + case CSINN_OP_CACHE_CONV1D: + ret = func(node->in[0]->data, node->out[0]->data, node->in[1]->data, node->in[2]->data, + params); + break; + case CSINN_OP_FSMN: + ret = func(node->in[0]->data, node->in[1]->data, node->in[2]->data, node->in[3]->data, + node->in[4]->data, node->out[0]->data, params); + break; + case CSINN_OP_CONCAT: + inputs = csi_mem_alloc(sizeof(struct csi_tensor *) * + ((struct concat_params *)params)->inputs_count); + for (int i = 0; i < ((struct concat_params *)params)->inputs_count; i++) { + inputs[i] = node->in[i]->data; + } + ret = func(inputs, node->out[0]->data, params); + csi_mem_free(inputs); + break; + case CSINN_OP_SPLIT: + outputs = csi_mem_alloc(sizeof(struct csi_tensor *) * + ((struct split_params *)params)->output_num); + for (int i = 0; i < ((struct split_params *)params)->output_num; i++) { + outputs[i] = node->out[i]->data; + } + ret = func(node->in[0]->data, outputs, params); + csi_mem_free(outputs); + break; + case CSINN_OP_ALL: + CSI_DEBUG_CALL(printf("unsupported CSINN_OP_ALL\n")); + break; + case CSINN_OP_ARANGE: + CSI_DEBUG_CALL(printf("unsupported CSINN_OP_ARANGE\n")); + break; + case CSINN_OP_BN: + CSI_DEBUG_CALL(printf("unsupported CSINN_OP_BN\n")); + break; + case CSINN_OP_MIN_STRIDE: + CSI_DEBUG_CALL(printf("unsupported CSINN_OP_MIN_STRIDE\n")); + break; + case CSINN_OP_ONE_HOT: + CSI_DEBUG_CALL(printf("unsupported CSINN_OP_ONE_HOT\n")); + break; + case CSINN_OP_PROPOSAL: + CSI_DEBUG_CALL(printf("unsupported CSINN_OP_PROPOSAL\n")); + break; + case CSINN_OP_PSROIPOOLING: + CSI_DEBUG_CALL(printf("unsupported CSINN_OP_PSROIPOOLING\n")); + break; + case CSINN_OP_ROIALIGN: + CSI_DEBUG_CALL(printf("unsupported CSINN_OP_ROIALIGN\n")); + break; + case CSINN_OP_ROIPOOL: + CSI_DEBUG_CALL(printf("unsupported CSINN_OP_ROIPOOL\n")); + break; + case CSINN_OP_SCATTER_ND: + CSI_DEBUG_CALL(printf("unsupported CSINN_OP_SCATTER_ND\n")); + break; + case CSINN_OP_SELECT: + CSI_DEBUG_CALL(printf("unsupported CSINN_OP_SELECT\n")); + break; + case CSINN_OP_TOPK: + CSI_DEBUG_CALL(printf("unsupported CSINN_OP_TOPK\n")); + break; + case CSINN_OP_WHERE: + CSI_DEBUG_CALL(printf("unsupported CSINN_OP_WHERE\n")); + break; + default: + CSI_DEBUG_CALL(printf("unknown op\n")); + return CSINN_FALSE; } return ret; } +void csi_gref_reset_graph_visit(struct csi_ref_graph *graph) +{ + for (int i = 0; i < graph->layer_index; i++) { + if (graph->layer[i]->type == CSINN_SUBGRAPH) { + graph->layer[i]->visited = 0; + struct csi_ref_graph *s_subgraph = graph->layer[i]->data; + for (int j = 0; j < s_subgraph->layer_index; j++) { + s_subgraph->layer[j]->visited = 0; + } + } else { + graph->layer[i]->visited = 0; + } + } +} /* * transform graph as gloal graph and sub graph @@ -364,6 +385,78 @@ static int init_op(struct csi_node *node) return CSINN_FALSE; } +void csi_subgraph_fvisit_create(struct csi_ref_graph *graph, struct csi_node *node) +{ + csi_gref_graph_insert(node, graph); +} + +/* + * transform graph as gloal graph and sub graph + */ +static struct csi_ref_graph *convert_graph(struct csi_ref_graph *ograph) +{ + if (csi_debug_get_level() <= CSI_DEBUG_LEVEL_INFO) { + csi_debug_info("\nOriginal graph:\n"); + csi_gref_post_dfs(ograph, csi_subgraph_fvisit_print); + csi_gref_reset_graph_visit(ograph); + } + + struct csi_ref_graph *subgraph = csi_subgraph_generate(ograph); + csi_gref_reset_graph_visit(subgraph); + + csi_debug_info("\nGenerated subgraph:\n"); + for (int i = 0; i < subgraph->layer_index; i++) { + if (subgraph->layer[i]->type == CSINN_SUBGRAPH) { + struct csi_ref_graph *s_subgraph = subgraph->layer[i]->data; + if (s_subgraph->layer_size == 0) continue; + csi_gref_update_input_output(subgraph, i); + if (csi_debug_get_level() <= CSI_DEBUG_LEVEL_INFO) { + csi_debug_info("---- subgraph_%d: ----\n", i); + csi_gref_reset_graph_visit(s_subgraph); + csi_gref_post_dfs(s_subgraph, csi_subgraph_fvisit_print); + csi_gref_reset_graph_visit(s_subgraph); + csi_debug_info("----subgraph_%d end.----\n", i); + } + + struct csi_ref_graph *new_sgraph = csi_mem_alloc(sizeof(struct csi_ref_graph)); + new_sgraph->input = s_subgraph->input; + new_sgraph->output = s_subgraph->output; + new_sgraph->input_num = s_subgraph->input_num; + new_sgraph->output_num = s_subgraph->output_num; + csi_gref_post_dfs(new_sgraph, csi_subgraph_fvisit_create); + subgraph->layer[i]->data = new_sgraph; + + csi_gref_reset_graph_visit(s_subgraph); + } else { + csi_debug_info("%s\n", subgraph->layer[i]->name); + } + } + + csi_gref_reset_graph_visit(subgraph); + struct csi_ref_graph *ggraph = csi_subgraph_rebuild(subgraph); + + struct csi_ref_graph *sorted_graph = csi_subgraph_topology_sort(ggraph); + csi_debug_info("\nsorted subgraph:\n"); + for (int i = 0; i < sorted_graph->layer_index; i++) { + if (sorted_graph->layer[i]->type == CSINN_SUBGRAPH) { + struct csi_ref_graph *s_subgraph = sorted_graph->layer[i]->data; + if (s_subgraph->layer_size == 0) continue; + if (csi_debug_get_level() <= CSI_DEBUG_LEVEL_INFO) { + csi_debug_info("---- subgraph_%d: ----\n", i); + csi_gref_reset_graph_visit(s_subgraph); + csi_gref_post_dfs(s_subgraph, csi_subgraph_fvisit_print); + csi_gref_reset_graph_visit(s_subgraph); + csi_debug_info("----subgraph_%d end.----\n", i); + } + csi_gref_reset_graph_visit(s_subgraph); + } else { + csi_debug_info("%s\n", sorted_graph->layer[i]->name); + } + } + + return sorted_graph; +} + void csi_gref_session_setup(struct csi_session *sess) { struct csi_ref_graph *graph = csi_gref_get_graph(sess); @@ -381,11 +474,11 @@ void csi_gref_session_setup(struct csi_session *sess) } } - for (int i = 0; i< graph->output_num; i++){ + for (int i = 0; i < graph->output_num; i++) { graph->output[i]->ref_count_init++; } - struct csi_ref_graph *ggraph = transform_graph(graph); + struct csi_ref_graph *ggraph = convert_graph(graph); for (int i = 0; i < ggraph->layer_index; i++) { struct csi_node *n = ggraph->layer[i]; @@ -410,7 +503,9 @@ static void node_ref_reset(struct csi_session *sess) for (int i = 0; i < graph->layer_index; i++) { n = graph->layer[i]; for (int k = 0; k < n->out_num; k++) { - n->out[k]->ref_count = n->out[k]->ref_count_init; + if (n->out[k] != NULL) { + n->out[k]->ref_count = n->out[k]->ref_count_init; + } } } } @@ -454,7 +549,7 @@ static int op_run(struct csi_node *node) int csi_gref_session_run(struct csi_session *sess) { struct csi_ref_graph *g = csi_gref_get_graph(sess); - + uint64_t time_acc = 0; node_ref_reset(sess); for (int i = 0; i < g->layer_index; i++) { struct csi_node *n = g->layer[i]; @@ -464,13 +559,23 @@ int csi_gref_session_run(struct csi_session *sess) csi_subgraph_run_deinit(n); } else if (n->type >= 0 && n->type < CSINN_SESSION_INIT) { op_run_init(n); +#ifdef CSINN_LAYER_BENCHMARK + uint64_t start_time = csi_get_timespec(); op_run(n); + uint64_t end_time = csi_get_timespec(); + csi_benchmark_layer(n, start_time, end_time, i); + time_acc += end_time - start_time; +#else + op_run(n); +#endif op_run_deinit(n); } else { return CSINN_FALSE; } } - +#ifdef CSINN_LAYER_BENCHMARK + csi_debug_info("[layer-benchmark]: network exec time = %f\n", time_acc / 1000000.0f); +#endif return CSINN_TRUE; } @@ -519,9 +624,177 @@ struct csi_ref_graph *csi_gref_get_graph(struct csi_session *sess) return td->graph; } +int csi_gref_is_root_node(struct csi_ref_graph *graph, struct csi_node *node) +{ + int is_root = 1; + for (int i = 0; i < node->in_num; i++) { + struct csi_tensor *in_tensor = node->in[i]->data; + if (in_tensor->is_const) continue; + int find_res = 0; + for (int j = 0; j < graph->input_num; j++) { + if (node->in[i] == graph->input[j]) { + find_res = 1; + break; + } + } + if (find_res == 0) { + is_root = 0; + break; + } + } + return is_root; +} + +void csi_gref_post_dfs(struct csi_ref_graph *graph, + void (*fvisit)(struct csi_ref_graph *, struct csi_node *)) +{ + int stack_size = 32; + struct csi_node **node_stack = csi_mem_alloc(sizeof(struct csi_node *) * stack_size); + int *input_idx_stack = csi_mem_alloc(sizeof(int) * stack_size); + int stack_top = -1; + + struct csi_node *curr_node; + for (int i = 0; i < graph->output_num; i++) { + struct csi_tensor *ot = graph->output[i]->data; + if (ot->is_const) continue; + curr_node = graph->output[i]->in[0]; + if (curr_node->visited == 0) { + ++stack_top; + if (stack_top >= stack_size) { + stack_size += 32; + node_stack = csi_mem_realloc(node_stack, sizeof(struct csi_node *) * stack_size); + input_idx_stack = csi_mem_realloc(input_idx_stack, sizeof(int) * stack_size); + } + node_stack[stack_top] = curr_node; + input_idx_stack[stack_top] = 0; + curr_node->visited = 1; + } + while (stack_top != -1) { + curr_node = node_stack[stack_top]; + if (input_idx_stack[stack_top] == csi_node_get_non_const_in_number(curr_node)) { + fvisit(graph, curr_node); + --stack_top; + } else { + struct csi_node *next_node = NULL; + if (csi_node_find(graph->input, graph->input_num, + curr_node->in[input_idx_stack[stack_top]]) == -1) { + next_node = curr_node->in[input_idx_stack[stack_top]]->in[0]; + if (next_node && next_node->type == CSINN_SUBGRAPH_RETURN) { + next_node = graph->layer[next_node->subgraph_idx]; + } + } + input_idx_stack[stack_top] += 1; + if (next_node && next_node->visited == 0) { + ++stack_top; + if (stack_top >= stack_size) { + stack_size += 32; + node_stack = + csi_mem_realloc(node_stack, sizeof(struct csi_node *) * stack_size); + input_idx_stack = + csi_mem_realloc(input_idx_stack, sizeof(int) * stack_size); + } + node_stack[stack_top] = next_node; + input_idx_stack[stack_top] = 0; + next_node->visited = 1; + } + } + } + } + + csi_mem_free(node_stack); + csi_mem_free(input_idx_stack); +} + +void csi_gref_update_input_output(struct csi_ref_graph *ograph, int index) +{ + if (ograph->layer[index]->type != CSINN_SUBGRAPH) { + return; + } + struct csi_ref_graph *graph = ograph->layer[index]->data; + if (graph->layer_size == 0) return; + + /* update inputs */ + graph->input = NULL; + graph->input_num = 0; + struct csi_node **tensor_node_set = NULL; + int set_num = 0; + for (int i = 0; i < graph->layer_index; i++) { + for (int j = 0; j < csi_node_get_non_const_in_number(graph->layer[i]); j++) { + struct csi_node *in_tensor_node = graph->layer[i]->in[j]; + if (csi_node_find(graph->layer, graph->layer_index, in_tensor_node->in[0]) == -1 && + csi_node_find(tensor_node_set, set_num, in_tensor_node) == -1) { + graph->input = csi_mem_realloc(graph->input, + sizeof(struct csi_node *) * (graph->input_num + 1)); + graph->input[graph->input_num] = in_tensor_node; + graph->input_num++; + + // tensor_node_set[set_num] = in_tensor_node; + tensor_node_set = + csi_mem_realloc(tensor_node_set, sizeof(struct csi_node *) * (set_num + 1)); + tensor_node_set[set_num] = in_tensor_node; + set_num++; + } + } + } + csi_mem_free(tensor_node_set); + + /* update outputs */ + graph->output = NULL; + graph->output_num = 0; + for (int i = 0; i < graph->layer_index; i++) { + for (int j = 0; j < graph->layer[i]->out_num; j++) { + struct csi_node *out_tensor_node = graph->layer[i]->out[j]; + + int find_res_inside = 0; + for (int k = 0; k < graph->layer_index; k++) { + if (k == i) continue; + if (csi_node_find(graph->layer[k]->in, graph->layer[k]->in_num, out_tensor_node) > + -1) { + find_res_inside = 1; + break; + } + } + + int find_res_outside = 0; + for (int s_idx = 0; s_idx < ograph->layer_index; s_idx++) { + if (s_idx == index) continue; + if (ograph->layer[s_idx]->type != CSINN_SUBGRAPH) { + if (csi_node_find(ograph->layer[s_idx]->in, ograph->layer[s_idx]->in_num, + out_tensor_node) > -1) { + find_res_outside = 1; + break; + } + } else { + struct csi_ref_graph *outside_sgraph = ograph->layer[s_idx]->data; + if (outside_sgraph->layer_size == 0) continue; + + for (int inner_idx = 0; inner_idx < outside_sgraph->layer_index; inner_idx++) { + if (csi_node_find(outside_sgraph->layer[inner_idx]->in, + outside_sgraph->layer[inner_idx]->in_num, + out_tensor_node) > -1) { + find_res_outside = 1; + break; + } + } + if (find_res_outside) { + break; + } + } + } + + if (!find_res_inside || find_res_outside) { + graph->output = csi_mem_realloc( + graph->output, sizeof(struct csi_node *) * (graph->output_num + 1)); + graph->output[graph->output_num] = out_tensor_node; + graph->output_num++; + } + } + } +} + static void *setup_bc_map() { - static void* bc_map[CSINN_OP_AND_UTILS_SIZE]; + static void *bc_map[CSINN_OP_AND_UTILS_SIZE]; bc_map[CSINN_OP_ABS] = csi_gref_abs; bc_map[CSINN_OP_ACOS] = csi_gref_acos; @@ -543,10 +816,13 @@ static void *setup_bc_map() bc_map[CSINN_OP_BATCH_TO_SPACE] = csi_gref_batch_to_space; bc_map[CSINN_OP_BATCH_TO_SPACE_ND] = csi_gref_batch_to_space_nd; bc_map[CSINN_OP_BROADCOST] = csi_gref_broadcast_to; + bc_map[CSINN_OP_CACHE_MATMUL] = csi_gref_cache_matmul; + bc_map[CSINN_OP_CACHE_CONV1D] = csi_gref_cache_conv1d; bc_map[CSINN_OP_CEIL] = csi_gref_ceil; bc_map[CSINN_OP_CLIP] = csi_gref_clip; bc_map[CSINN_OP_COL2IM] = csi_gref_col2im; bc_map[CSINN_OP_CONCAT] = csi_gref_concat; + bc_map[CSINN_OP_CONV1D] = csi_gref_conv1d; bc_map[CSINN_OP_CONV2D] = csi_gref_conv2d; bc_map[CSINN_OP_CONV2D_RELU] = csi_gref_conv2d_relu; bc_map[CSINN_OP_CONV2D_RELU6] = csi_gref_conv2d_relu6; @@ -585,6 +861,7 @@ static void *setup_bc_map() bc_map[CSINN_OP_HARD_SIGMOID] = csi_gref_hard_sigmoid; bc_map[CSINN_OP_IM2COL] = csi_gref_im2col; bc_map[CSINN_OP_ISNAN] = csi_gref_isnan_bool; + bc_map[CSINN_OP_LAYER_NORM] = csi_gref_layer_norm; bc_map[CSINN_OP_L2N] = csi_gref_l2_normalization; bc_map[CSINN_OP_L2POOL2D] = csi_gref_l2pool; bc_map[CSINN_OP_LEAKY_RELU] = csi_gref_leaky_relu; @@ -681,7 +958,6 @@ static void *setup_bc_map() bc_map[CSINN_OP_TOPK] = csi_gref_topk; bc_map[CSINN_OP_TRUNC] = csi_gref_trunc; bc_map[CSINN_OP_TRANSPOSE] = csi_gref_transpose; - bc_map[CSINN_OP_TRUNC] = csi_gref_trunc; bc_map[CSINN_OP_UNPOOLING] = csi_gref_unpooling; bc_map[CSINN_OP_UNSTACK] = csi_gref_unstack; bc_map[CSINN_OP_WHERE] = csi_gref_where; @@ -705,12 +981,10 @@ static void *setup_bc_map() return bc_map; } -static int get_bc_map_index(int op, int dtype) -{ - return op; -} +static int get_bc_map_index(int op, int dtype) { return op; } -void *csi_bc_map_gref(int op, int dtype) { +void *csi_bc_map_gref(int op, int dtype) +{ static int has_init; static void **bc_map_table; if (has_init == 0) { diff --git a/source/graph_ref/shape.c b/source/graph_ref/shape.c index 56dd9bb6..b06cb619 100644 --- a/source/graph_ref/shape.c +++ b/source/graph_ref/shape.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/shuffle_channel.c b/source/graph_ref/shuffle_channel.c index e92ea020..eb419830 100644 --- a/source/graph_ref/shuffle_channel.c +++ b/source/graph_ref/shuffle_channel.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/sigmoid.c b/source/graph_ref/sigmoid.c index d753f8eb..0758aa11 100644 --- a/source/graph_ref/sigmoid.c +++ b/source/graph_ref/sigmoid.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/sign.c b/source/graph_ref/sign.c index 34d506e5..75bd150e 100644 --- a/source/graph_ref/sign.c +++ b/source/graph_ref/sign.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/sin.c b/source/graph_ref/sin.c index 366630f4..8ac236aa 100644 --- a/source/graph_ref/sin.c +++ b/source/graph_ref/sin.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/sinh.c b/source/graph_ref/sinh.c index b7fec753..dc3fbf0f 100644 --- a/source/graph_ref/sinh.c +++ b/source/graph_ref/sinh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/slice.c b/source/graph_ref/slice.c index 5ed61894..252f0834 100644 --- a/source/graph_ref/slice.c +++ b/source/graph_ref/slice.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/softmax.c b/source/graph_ref/softmax.c index e0f081c2..1ab06362 100644 --- a/source/graph_ref/softmax.c +++ b/source/graph_ref/softmax.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/softplus.c b/source/graph_ref/softplus.c index 43f1111e..b4ec8933 100644 --- a/source/graph_ref/softplus.c +++ b/source/graph_ref/softplus.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/softrelu.c b/source/graph_ref/softrelu.c index a44e5a82..0a9972e6 100644 --- a/source/graph_ref/softrelu.c +++ b/source/graph_ref/softrelu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/softsign.c b/source/graph_ref/softsign.c index 928fd766..023ad975 100644 --- a/source/graph_ref/softsign.c +++ b/source/graph_ref/softsign.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/space_to_batch.c b/source/graph_ref/space_to_batch.c index fdadaf63..3d6a7679 100644 --- a/source/graph_ref/space_to_batch.c +++ b/source/graph_ref/space_to_batch.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/space_to_batch_nd.c b/source/graph_ref/space_to_batch_nd.c index a641c8c9..7cdf00aa 100644 --- a/source/graph_ref/space_to_batch_nd.c +++ b/source/graph_ref/space_to_batch_nd.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/space_to_depth.c b/source/graph_ref/space_to_depth.c index b3b6dd56..d4da69da 100644 --- a/source/graph_ref/space_to_depth.c +++ b/source/graph_ref/space_to_depth.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/split.c b/source/graph_ref/split.c index 110deec6..2c675495 100644 --- a/source/graph_ref/split.c +++ b/source/graph_ref/split.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/sqrt.c b/source/graph_ref/sqrt.c index 273f9c1c..649941d3 100644 --- a/source/graph_ref/sqrt.c +++ b/source/graph_ref/sqrt.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/square.c b/source/graph_ref/square.c index 4fd01565..d68bacc8 100644 --- a/source/graph_ref/square.c +++ b/source/graph_ref/square.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/squeeze.c b/source/graph_ref/squeeze.c index 6b761f77..8d4dbe2f 100644 --- a/source/graph_ref/squeeze.c +++ b/source/graph_ref/squeeze.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/stack.c b/source/graph_ref/stack.c index 062fbfbf..e0185f4b 100644 --- a/source/graph_ref/stack.c +++ b/source/graph_ref/stack.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/strided_slice.c b/source/graph_ref/strided_slice.c index 4ca6c10c..e29f899b 100644 --- a/source/graph_ref/strided_slice.c +++ b/source/graph_ref/strided_slice.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/sub.c b/source/graph_ref/sub.c index 202b4236..33e7f6f7 100644 --- a/source/graph_ref/sub.c +++ b/source/graph_ref/sub.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/subgraph.c b/source/graph_ref/subgraph.c index c34e1510..f721fc77 100644 --- a/source/graph_ref/subgraph.c +++ b/source/graph_ref/subgraph.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,19 +16,14 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" #include "csi_utils.h" -void csi_subgraph_alloc(struct csi_node *node, struct csi_ref_graph *ograph, struct csi_ref_graph *ggraph) +void csi_subgraph_alloc(struct csi_node *node, struct csi_ref_graph *ograph, + struct csi_ref_graph *ggraph) { - struct csi_ref_graph *sgraph = csi_mem_alloc(sizeof(struct csi_ref_graph)); - sgraph->input_num = 1; - sgraph->output_num = 1; - sgraph->input = csi_mem_alloc(sgraph->input_num * sizeof(struct csi_node *)); - sgraph->output = csi_mem_alloc(sgraph->output_num * sizeof(struct csi_node *)); - csi_gref_graph_insert(node, sgraph); int node_input_num = 0; for (int i = 0; i < node->in_num; i++) { struct csi_tensor *node_in = node->in[i]->data; @@ -36,200 +31,295 @@ void csi_subgraph_alloc(struct csi_node *node, struct csi_ref_graph *ograph, str node_input_num++; } } + struct csi_ref_graph *sgraph = csi_mem_alloc(sizeof(struct csi_ref_graph)); + sgraph->input_num = node_input_num; + sgraph->output_num = node->out_num; + sgraph->input = csi_mem_alloc(sgraph->input_num * sizeof(struct csi_node *)); + sgraph->output = csi_mem_alloc(sgraph->output_num * sizeof(struct csi_node *)); + csi_gref_graph_insert(node, sgraph); - struct csi_node *sg_in = csi_node_alloc(CSINN_SUBGRAPH, "graph_in", node_input_num, node_input_num, sgraph); + struct csi_node *sg_in = + csi_node_alloc(CSINN_SUBGRAPH, "graph_in", node_input_num, node_input_num, sgraph); + csi_gref_graph_insert(sg_in, ggraph); + sg_in->subgraph_idx = ggraph->layer_index - 1; + node->subgraph_idx = ggraph->layer_index - 1; for (int i = 0; i < node_input_num; i++) { sg_in->in[i] = node->in[i]; struct csi_tensor *sg_in_tensor = csi_alloc_tensor(NULL); csi_tensor_copy(sg_in_tensor, node->in[i]->data); struct csi_node *sg_in_node = csi_node_var_alloc("graph_in_tensor", sg_in_tensor); + sg_in_node->subgraph_idx = ggraph->layer_index - 1; node->in[i] = sg_in_node; - csi_gref_graph_insert(sg_in, ggraph); + sg_in_node->out[0] = node; + + sgraph->input[i] = sg_in_node; } - sgraph->input[0] = node->in[0]; - sgraph->output[0] = node->out[0]; - struct csi_node *sg_out = csi_node_alloc(CSINN_SUBGRAPH_RETURN, "graph_out", node->out_num, node->out_num, ggraph->layer[ggraph->layer_index]); + + // sgraph->input[0] = node->in[0]; + // sgraph->output[0] = node->out[0]; + + struct csi_node *sg_out = csi_node_alloc(CSINN_SUBGRAPH_RETURN, "graph_out", node->out_num, + node->out_num, ggraph->layer[ggraph->layer_index]); + csi_gref_graph_insert(sg_out, sgraph); + sg_out->subgraph_idx = ggraph->layer_index - 1; for (int i = 0; i < node->out_num; i++) { sg_out->out[i] = node->out[i]; + node->out[i]->in[0] = sg_out; struct csi_tensor *sg_out_tensor = csi_alloc_tensor(NULL); csi_tensor_copy(sg_out_tensor, node->out[i]->data); struct csi_node *sg_out_node = csi_node_var_alloc("graph_out_tensor", sg_out_tensor); + sg_out_node->subgraph_idx = ggraph->layer_index - 1; node->out[i] = sg_out_node; + sg_out_node->in[0] = node; sg_out->in[i] = sg_out_node; - csi_gref_graph_insert(sg_out, sgraph); + + sgraph->output[i] = sg_out->out[i]; + } +} + +static void set_sub_session(struct csi_session *sub_sess, struct csi_params_base *params, + struct csi_ref_graph *graph) +{ + struct csi_session *base_sess = params->sess; + sub_sess->base_api = params->api; + if (params->api == CSINN_LIGHT) { + sub_sess->base_dtype = base_sess->base_dtype; + sub_sess->debug_level = base_sess->debug_level; + sub_sess->base_run_mode = CSINN_RM_NPU_GRAPH; + sub_sess->base_quant_type = base_sess->base_quant_type; + } else if (params->api = CSINN_ASP) { + sub_sess->base_dtype = base_sess->base_dtype; + sub_sess->debug_level = base_sess->debug_level; + sub_sess->base_quant_type = base_sess->base_quant_type; + sub_sess->td = csi_mem_alloc(sizeof(struct csi_gref_target_data)); + /* ASP: reuse gref graph */ + struct csi_gref_target_data *td = sub_sess->td; + td->graph = graph; + } else { + csi_debug_error("sub session api unsupport\n"); } } int csi_subgraph_init(struct csi_node *n) { struct csi_ref_graph *sgraph = n->data; - struct csi_node *node = sgraph->layer[0]; - struct csi_params_base *params = node->data; + struct csi_node *init_node = sgraph->layer[0]; + struct csi_params_base *init_params = init_node->data; struct csi_session *sub_sess = csi_alloc_session(); - sub_sess->base_api = CSINN_LIGHT; - sub_sess->base_dtype = CSINN_DTYPE_FLOAT32; - sub_sess->debug_level = CSI_DEBUG_LEVEL_INFO; + set_sub_session(sub_sess, init_params, sgraph); csi_session_init(sub_sess); - params->sess = sub_sess; - int (*func)(); - struct csi_tensor *input0, *output, *kernel, *bias; - input0 = node->in[0]->data; - input0->sess = sub_sess; - func = csi_bc_map(params->api, CSINN_RM_LAYER, node->type, input0->dtype); + csi_set_input_number(sgraph->input_num, sub_sess); + csi_set_output_number(sgraph->output_num, sub_sess); + + /* set input tensor */ + for (int i = 0; i < sgraph->input_num; i++) { + struct csi_tensor *input_t; + input_t = sgraph->input[i]->data; + input_t->sess = sub_sess; + csi_set_tensor_entry(input_t, sub_sess); + csi_set_input(i, input_t, sub_sess); + } int ret = CSINN_TRUE; + for (int idx = 0; idx < sgraph->layer_index; idx++) { + struct csi_node *node = sgraph->layer[idx]; + if (node->type == CSINN_SUBGRAPH_RETURN) continue; - switch (node->type) - { - case CSINN_OP_ABS: - case CSINN_OP_ACOS: - case CSINN_OP_ACOSH: - case CSINN_OP_ANY: - case CSINN_OP_ARGMAX: - case CSINN_OP_ARGMIN: - case CSINN_OP_ASIN: - case CSINN_OP_ASINH: - case CSINN_OP_ATAN: - case CSINN_OP_ATANH: - case CSINN_OP_AVGPOOL2D: - case CSINN_OP_AVGPOOL3D: - case CSINN_OP_BATCH_TO_SPACE: - case CSINN_OP_BATCH_TO_SPACE_ND: - case CSINN_OP_BROADCOST: - case CSINN_OP_CEIL: - case CSINN_OP_CLIP: - case CSINN_OP_COL2IM: - case CSINN_OP_COS: - case CSINN_OP_COSH: - case CSINN_OP_CROP: - case CSINN_OP_CUMPROD: - case CSINN_OP_CUMSUM: - case CSINN_OP_DEPTH_TO_SPACE: - case CSINN_OP_ELU: - case CSINN_OP_ERF: - case CSINN_OP_EXP: - case CSINN_OP_EXPAND_DIMS: - case CSINN_OP_EXPM1: - case CSINN_OP_FLATTEN: - case CSINN_OP_FLOOR: - case CSINN_OP_GLOBAL_AVGPOOL2D: - case CSINN_OP_GLOBAL_MAXPOOL2D: - case CSINN_OP_HARD_SIGMOID: - case CSINN_OP_IM2COL: - case CSINN_OP_ISNAN: - case CSINN_OP_L2N: - case CSINN_OP_L2POOL2D: - case CSINN_OP_LEAKY_RELU: - case CSINN_OP_LOG_SOFTMAX: - case CSINN_OP_LOG: - case CSINN_OP_LOG1P: - case CSINN_OP_LOGICAL_NOT: - case CSINN_OP_LRN: - case CSINN_OP_MAX: - case CSINN_OP_MAXPOOL2D: - case CSINN_OP_MAXPOOL2D_LOCAT: - case CSINN_OP_MAXPOOL3D: - case CSINN_OP_MEAN: - case CSINN_OP_MIN: - case CSINN_OP_NDARRAY_SIZE: - case CSINN_OP_NEGATIIVE: - case CSINN_OP_NOT: - case CSINN_OP_PAD: - case CSINN_OP_PROD: - case CSINN_OP_REDUCE_LOGSUMEXP: - case CSINN_OP_REDUCE_MAX: - case CSINN_OP_REDUCE_MEAN: - case CSINN_OP_REDUCE_MIN: - case CSINN_OP_REDUCE_PROD: - case CSINN_OP_REDUCE_SUM: - case CSINN_OP_RELU: - case CSINN_OP_RELU1: - case CSINN_OP_RELU6: - case CSINN_OP_RELUN: - case CSINN_OP_REORG: - case CSINN_OP_RESHAPE: - case CSINN_OP_RESIZE: - case CSINN_OP_REVERSE: - case CSINN_OP_ROUND: - case CSINN_OP_RSQRT: - case CSINN_OP_SHAPE: - case CSINN_OP_SHUFFLE_CHANNEL: - case CSINN_OP_SIGMOID: - case CSINN_OP_SIGN: - case CSINN_OP_SIN: - case CSINN_OP_SINH: - case CSINN_OP_SLICE: - case CSINN_OP_SOFTMAX: - case CSINN_OP_SOFTPLUS: - case CSINN_OP_SOFTRELU: - case CSINN_OP_SOFTSIGN: - case CSINN_OP_SPACE_TO_BATCH: - case CSINN_OP_SPACE_TO_BATCH_ND: - case CSINN_OP_SPACE_TO_DEPTH: - case CSINN_OP_SQRT: - case CSINN_OP_SQUARE: - case CSINN_OP_SQUEEZE: - case CSINN_OP_STACK: - case CSINN_OP_STRIDED_SLICE: - case CSINN_OP_SUM: - case CSINN_OP_TAN: - case CSINN_OP_TANH: - case CSINN_OP_THRESHOLD_RELU: - case CSINN_OP_TILE: - case CSINN_OP_TRANSPOSE: - case CSINN_OP_TRUNC: - case CSINN_OP_UNPOOLING: - case CSINN_OP_UNSTACK: - case CSINN_OP_YUV_RGB_SCALE: - csi_set_input_number(1, sub_sess); - csi_set_output_number(1, sub_sess); - csi_set_tensor_entry(input0, sub_sess); - csi_set_input(0, input0, sub_sess); - output = node->out[0]->data; - output->sess = sub_sess; - ret = func(input0, output, params); - csi_set_output(0, output, sub_sess); - break; - case CSINN_OP_CONV2D: - case CSINN_OP_CONV2D_RELU: - case CSINN_OP_CONV2D_RELU6: - case CSINN_OP_CONV2D_CHANNEL: - case CSINN_OP_CONV2D_CHANNEL_RELU: - case CSINN_OP_CONV2D_CHANNEL_RELU6: - case CSINN_OP_DEPTHWISE_CONV2D: - case CSINN_OP_DEPTHWISE_CONV2D_RELU: - case CSINN_OP_DEPTHWISE_CONV2D_RELU6: - case CSINN_OP_DEPTHWISE_CONV2D_CHANNEL: - case CSINN_OP_DEPTHWISE_CONV2D_CHANNEL_RELU: - case CSINN_OP_DEPTHWISE_CONV2D_CHANNEL_RELU6: - case CSINN_OP_GROUP_CONV2D: - case CSINN_OP_GROUP_CONV2D_RELU: - case CSINN_OP_GROUP_CONV2D_RELU6: - case CSINN_OP_GROUP_CONV2D_CHANNEL: - case CSINN_OP_GROUP_CONV2D_CHANNEL_RELU: - case CSINN_OP_CONV3D: - case CSINN_OP_DECONV2D: - case CSINN_OP_DEPTHWISE_DECONV2D: - case CSINN_OP_DECONV3D: - case CSINN_OP_FULLYCONNECTED: - csi_set_input_number(1, sub_sess); - csi_set_output_number(1, sub_sess); - csi_set_tensor_entry(input0, sub_sess); - csi_set_input(0, input0, sub_sess); - output = node->out[0]->data; - output->sess = sub_sess; - kernel = node->in[1]->data; - kernel->sess = sub_sess; - bias = node->in[2]->data; - bias->sess = sub_sess; - ret = func(input0, output, kernel, bias, params); - csi_set_output(0, output, sub_sess); - break; - default: - CSI_DEBUG_CALL(printf("unknown op1\n")); - return CSINN_FALSE; + struct csi_params_base *params = node->data; + params->sess = sub_sess; + int (*func)(); + struct csi_tensor *input0, *output, *kernel, *bias; + input0 = node->in[0]->data; + input0->sess = sub_sess; + func = csi_bc_map(params->api, CSINN_RM_LAYER, node->type, input0->dtype); + + switch (node->type) { + case CSINN_OP_ABS: + case CSINN_OP_ACOS: + case CSINN_OP_ACOSH: + case CSINN_OP_ANY: + case CSINN_OP_ARGMAX: + case CSINN_OP_ARGMIN: + case CSINN_OP_ASIN: + case CSINN_OP_ASINH: + case CSINN_OP_ATAN: + case CSINN_OP_ATANH: + case CSINN_OP_AVGPOOL2D: + case CSINN_OP_AVGPOOL3D: + case CSINN_OP_BATCH_TO_SPACE: + case CSINN_OP_BATCH_TO_SPACE_ND: + case CSINN_OP_BROADCOST: + case CSINN_OP_CEIL: + case CSINN_OP_CLIP: + case CSINN_OP_COL2IM: + case CSINN_OP_COS: + case CSINN_OP_COSH: + case CSINN_OP_CROP: + case CSINN_OP_CUMPROD: + case CSINN_OP_CUMSUM: + case CSINN_OP_DEPTH_TO_SPACE: + case CSINN_OP_ELU: + case CSINN_OP_ERF: + case CSINN_OP_EXP: + case CSINN_OP_EXPAND_DIMS: + case CSINN_OP_EXPM1: + case CSINN_OP_FLATTEN: + case CSINN_OP_FLOOR: + case CSINN_OP_GLOBAL_AVGPOOL2D: + case CSINN_OP_GLOBAL_MAXPOOL2D: + case CSINN_OP_HARD_SIGMOID: + case CSINN_OP_IM2COL: + case CSINN_OP_ISNAN: + case CSINN_OP_L2N: + case CSINN_OP_L2POOL2D: + case CSINN_OP_LEAKY_RELU: + case CSINN_OP_LOG_SOFTMAX: + case CSINN_OP_LOG: + case CSINN_OP_LOG1P: + case CSINN_OP_LOGICAL_NOT: + case CSINN_OP_LRN: + case CSINN_OP_MAX: + case CSINN_OP_MAXPOOL2D: + case CSINN_OP_MAXPOOL2D_LOCAT: + case CSINN_OP_MAXPOOL3D: + case CSINN_OP_MEAN: + case CSINN_OP_MIN: + case CSINN_OP_NDARRAY_SIZE: + case CSINN_OP_NEGATIIVE: + case CSINN_OP_NOT: + case CSINN_OP_PAD: + case CSINN_OP_PROD: + case CSINN_OP_REDUCE_LOGSUMEXP: + case CSINN_OP_REDUCE_MAX: + case CSINN_OP_REDUCE_MEAN: + case CSINN_OP_REDUCE_MIN: + case CSINN_OP_REDUCE_PROD: + case CSINN_OP_REDUCE_SUM: + case CSINN_OP_RELU: + case CSINN_OP_RELU1: + case CSINN_OP_RELU6: + case CSINN_OP_RELUN: + case CSINN_OP_REORG: + case CSINN_OP_RESHAPE: + case CSINN_OP_RESIZE: + case CSINN_OP_REVERSE: + case CSINN_OP_ROUND: + case CSINN_OP_RSQRT: + case CSINN_OP_SHAPE: + case CSINN_OP_SHUFFLE_CHANNEL: + case CSINN_OP_SIGMOID: + case CSINN_OP_SIGN: + case CSINN_OP_SIN: + case CSINN_OP_SINH: + case CSINN_OP_SLICE: + case CSINN_OP_SOFTMAX: + case CSINN_OP_SOFTPLUS: + case CSINN_OP_SOFTRELU: + case CSINN_OP_SOFTSIGN: + case CSINN_OP_SPACE_TO_BATCH: + case CSINN_OP_SPACE_TO_BATCH_ND: + case CSINN_OP_SPACE_TO_DEPTH: + case CSINN_OP_SQRT: + case CSINN_OP_SQUARE: + case CSINN_OP_SQUEEZE: + case CSINN_OP_STACK: + case CSINN_OP_STRIDED_SLICE: + case CSINN_OP_SUM: + case CSINN_OP_TAN: + case CSINN_OP_TANH: + case CSINN_OP_THRESHOLD_RELU: + case CSINN_OP_TILE: + case CSINN_OP_TRANSPOSE: + case CSINN_OP_TRUNC: + case CSINN_OP_UNPOOLING: + case CSINN_OP_UNSTACK: + case CSINN_OP_YUV_RGB_SCALE: + output = node->out[0]->data; + output->sess = sub_sess; + ret = func(input0, output, params); + break; + case CSINN_OP_ADD: + case CSINN_OP_MUL: { + output = node->out[0]->data; + output->sess = sub_sess; + struct csi_tensor *rhs = node->in[1]->data; + rhs->sess = sub_sess; + ret = func(input0, rhs, output, params); + break; + } + case CSINN_OP_CONV2D: + case CSINN_OP_CONV2D_RELU: + case CSINN_OP_CONV2D_RELU6: + case CSINN_OP_CONV2D_CHANNEL: + case CSINN_OP_CONV2D_CHANNEL_RELU: + case CSINN_OP_CONV2D_CHANNEL_RELU6: + case CSINN_OP_DEPTHWISE_CONV2D: + case CSINN_OP_DEPTHWISE_CONV2D_RELU: + case CSINN_OP_DEPTHWISE_CONV2D_RELU6: + case CSINN_OP_DEPTHWISE_CONV2D_CHANNEL: + case CSINN_OP_DEPTHWISE_CONV2D_CHANNEL_RELU: + case CSINN_OP_DEPTHWISE_CONV2D_CHANNEL_RELU6: + case CSINN_OP_GROUP_CONV2D: + case CSINN_OP_GROUP_CONV2D_RELU: + case CSINN_OP_GROUP_CONV2D_RELU6: + case CSINN_OP_GROUP_CONV2D_CHANNEL: + case CSINN_OP_GROUP_CONV2D_CHANNEL_RELU: + case CSINN_OP_CONV3D: + case CSINN_OP_DECONV2D: + case CSINN_OP_DEPTHWISE_DECONV2D: + case CSINN_OP_DECONV3D: + case CSINN_OP_FULLYCONNECTED: + output = node->out[0]->data; + output->sess = sub_sess; + kernel = node->in[1]->data; + kernel->sess = sub_sess; + bias = node->in[2]->data; + bias->sess = sub_sess; + ret = func(input0, output, kernel, bias, params); + break; + case CSINN_OP_SPLIT: { + struct csi_tensor **split_output = + csi_mem_alloc(sizeof(struct csi_tensor *) * node->out_num); + for (int i = 0; i < node->out_num; i++) { + split_output[i] = node->out[i]->data; + split_output[i]->sess = sub_sess; + } + ret = func(input0, split_output, params); + break; + } + case CSINN_OP_CONCAT: { + struct csi_tensor **concat_input = + csi_mem_alloc(sizeof(struct csi_tensor *) * node->in_num); + for (int i = 0; i < node->in_num; i++) { + concat_input[i] = node->in[i]->data; + concat_input[i]->sess = sub_sess; + } + output = node->out[0]->data; + output->sess = sub_sess; + ret = func(concat_input, output, params); + break; + } + default: + CSI_DEBUG_CALL(printf("unknown op1\n")); + return CSINN_FALSE; + } + } + /* set output tensor */ + int i = 0; + for (i = 0; i < sgraph->layer_index; i++) { + if (sgraph->layer[i]->type == CSINN_SUBGRAPH_RETURN) { + break; + } + } + struct csi_node *return_node = sgraph->layer[i]; + for (int i = 0; i < return_node->in_num; i++) { + struct csi_tensor *output_t; + output_t = return_node->in[i]->data; + output_t->sess = sub_sess; + csi_set_output(i, output_t, sub_sess); } + csi_session_setup(sub_sess); return ret; @@ -252,7 +342,7 @@ static int csi_subgraph_entry(struct csi_node *n) struct csi_tensor *tsrc = n->in[i]->data; struct csi_tensor *tdst = sgraph->input[i]->data; // if (tdst->data == NULL) { - tdst->data = tsrc->data; + tdst->data = tsrc->data; // } else if (tdst->data != tsrc->data) { // memcpy(tdst->data, tsrc->data, csi_tensor_byte_size(tsrc)); // } @@ -267,10 +357,10 @@ static int csi_subgraph_entry(struct csi_node *n) static int csi_subgraph_return(struct csi_ref_graph *graph, struct csi_node *ret_node) { for (int i = 0; i < graph->output_num; i++) { - struct csi_tensor *tsrc = ret_node->out[i]->data; + struct csi_tensor *tsrc = ret_node->in[i]->data; struct csi_tensor *tdst = graph->output[i]->data; // if (tdst->data == NULL) { - tdst->data = tsrc->data; + tdst->data = tsrc->data; // } else if (tdst->data != tsrc->data) { // memcpy(tdst->data, tsrc->data, csi_tensor_byte_size(tsrc)); // } @@ -278,15 +368,9 @@ static int csi_subgraph_return(struct csi_ref_graph *graph, struct csi_node *ret return CSINN_TRUE; } -int csi_subgraph_run_init(struct csi_node *n) -{ - csi_subgraph_entry(n); -} +int csi_subgraph_run_init(struct csi_node *n) { csi_subgraph_entry(n); } -int csi_subgraph_run_deinit(struct csi_node *n) -{ - -} +int csi_subgraph_run_deinit(struct csi_node *n) {} int csi_subgraph_run(struct csi_node *n) { @@ -297,140 +381,394 @@ int csi_subgraph_run(struct csi_node *n) struct csi_tensor **inputs; struct csi_tensor **outputs; - switch (node->type) - { - case CSINN_OP_ABS: - case CSINN_OP_ACOS: - case CSINN_OP_ACOSH: - case CSINN_OP_ANY: - case CSINN_OP_ARGMAX: - case CSINN_OP_ARGMIN: - case CSINN_OP_ASIN: - case CSINN_OP_ASINH: - case CSINN_OP_ATAN: - case CSINN_OP_ATANH: - case CSINN_OP_AVGPOOL2D: - case CSINN_OP_AVGPOOL3D: - case CSINN_OP_BATCH_TO_SPACE: - case CSINN_OP_BATCH_TO_SPACE_ND: - case CSINN_OP_BROADCOST: - case CSINN_OP_CEIL: - case CSINN_OP_CLIP: - case CSINN_OP_COL2IM: - case CSINN_OP_COS: - case CSINN_OP_COSH: - case CSINN_OP_CROP: - case CSINN_OP_CUMPROD: - case CSINN_OP_CUMSUM: - case CSINN_OP_DEPTH_TO_SPACE: - case CSINN_OP_ELU: - case CSINN_OP_ERF: - case CSINN_OP_EXP: - case CSINN_OP_EXPAND_DIMS: - case CSINN_OP_EXPM1: - case CSINN_OP_FLATTEN: - case CSINN_OP_FLOOR: - case CSINN_OP_GLOBAL_AVGPOOL2D: - case CSINN_OP_GLOBAL_MAXPOOL2D: - case CSINN_OP_HARD_SIGMOID: - case CSINN_OP_IM2COL: - case CSINN_OP_ISNAN: - case CSINN_OP_L2N: - case CSINN_OP_L2POOL2D: - case CSINN_OP_LEAKY_RELU: - case CSINN_OP_LOG_SOFTMAX: - case CSINN_OP_LOG: - case CSINN_OP_LOG1P: - case CSINN_OP_LOGICAL_NOT: - case CSINN_OP_LRN: - case CSINN_OP_MAX: - case CSINN_OP_MAXPOOL2D: - case CSINN_OP_MAXPOOL2D_LOCAT: - case CSINN_OP_MAXPOOL3D: - case CSINN_OP_MEAN: - case CSINN_OP_MIN: - case CSINN_OP_NDARRAY_SIZE: - case CSINN_OP_NEGATIIVE: - case CSINN_OP_NOT: - case CSINN_OP_PAD: - case CSINN_OP_PROD: - case CSINN_OP_REDUCE_LOGSUMEXP: - case CSINN_OP_REDUCE_MAX: - case CSINN_OP_REDUCE_MEAN: - case CSINN_OP_REDUCE_MIN: - case CSINN_OP_REDUCE_PROD: - case CSINN_OP_REDUCE_SUM: - case CSINN_OP_RELU: - case CSINN_OP_RELU1: - case CSINN_OP_RELU6: - case CSINN_OP_RELUN: - case CSINN_OP_REORG: - case CSINN_OP_RESHAPE: - case CSINN_OP_RESIZE: - case CSINN_OP_REVERSE: - case CSINN_OP_ROUND: - case CSINN_OP_RSQRT: - case CSINN_OP_SHAPE: - case CSINN_OP_SHUFFLE_CHANNEL: - case CSINN_OP_SIGMOID: - case CSINN_OP_SIGN: - case CSINN_OP_SIN: - case CSINN_OP_SINH: - case CSINN_OP_SLICE: - case CSINN_OP_SOFTMAX: - case CSINN_OP_SOFTPLUS: - case CSINN_OP_SOFTRELU: - case CSINN_OP_SOFTSIGN: - case CSINN_OP_SPACE_TO_BATCH: - case CSINN_OP_SPACE_TO_BATCH_ND: - case CSINN_OP_SPACE_TO_DEPTH: - case CSINN_OP_SQRT: - case CSINN_OP_SQUARE: - case CSINN_OP_SQUEEZE: - case CSINN_OP_STACK: - case CSINN_OP_STRIDED_SLICE: - case CSINN_OP_SUM: - case CSINN_OP_TAN: - case CSINN_OP_TANH: - case CSINN_OP_THRESHOLD_RELU: - case CSINN_OP_TILE: - case CSINN_OP_TRANSPOSE: - case CSINN_OP_TRUNC: - case CSINN_OP_UNPOOLING: - case CSINN_OP_UNSTACK: - case CSINN_OP_YUV_RGB_SCALE: - case CSINN_OP_CONV2D: - case CSINN_OP_CONV2D_RELU: - case CSINN_OP_CONV2D_RELU6: - case CSINN_OP_CONV2D_CHANNEL: - case CSINN_OP_CONV2D_CHANNEL_RELU: - case CSINN_OP_CONV2D_CHANNEL_RELU6: - case CSINN_OP_DEPTHWISE_CONV2D: - case CSINN_OP_DEPTHWISE_CONV2D_RELU: - case CSINN_OP_DEPTHWISE_CONV2D_RELU6: - case CSINN_OP_DEPTHWISE_CONV2D_CHANNEL: - case CSINN_OP_DEPTHWISE_CONV2D_CHANNEL_RELU: - case CSINN_OP_DEPTHWISE_CONV2D_CHANNEL_RELU6: - case CSINN_OP_GROUP_CONV2D: - case CSINN_OP_GROUP_CONV2D_RELU: - case CSINN_OP_GROUP_CONV2D_RELU6: - case CSINN_OP_GROUP_CONV2D_CHANNEL: - case CSINN_OP_GROUP_CONV2D_CHANNEL_RELU: - case CSINN_OP_CONV3D: - case CSINN_OP_DECONV2D: - case CSINN_OP_DEPTHWISE_DECONV2D: - case CSINN_OP_DECONV3D: - case CSINN_OP_FULLYCONNECTED: - csi_update_input(0, node->in[0]->data, params->sess); - csi_session_run(params->sess); - csi_get_output(0, node->out[0]->data, params->sess); - break; - default: - CSI_DEBUG_CALL(printf("unknown op2\n")); - return CSINN_FALSE; + for (int i = 0; i < sgraph->input_num; i++) { + csi_update_input(i, sgraph->input[i]->data, params->sess); + } + csi_session_run(params->sess); + + int i; + for (i = 0; i < sgraph->layer_index; i++) { + if (sgraph->layer[i]->type == CSINN_SUBGRAPH_RETURN) { + break; + } + } + struct csi_node *return_node = sgraph->layer[i]; + + for (int i = 0; i < return_node->in_num; i++) { + csi_get_output(i, return_node->in[i]->data, params->sess); } /* CSINN_SUBGRAPH_RETURN */ - csi_subgraph_return(sgraph, node); + csi_subgraph_return(sgraph, return_node); return ret; } + +struct csi_node *csi_gref_get_input_subgraph(struct csi_ref_graph *graph, struct csi_node *node, + int index) +{ + struct csi_node *next_node = node->in[index]->in[0]; + if (next_node && next_node->type == CSINN_SUBGRAPH_RETURN) { + next_node = graph->layer[next_node->subgraph_idx]; + } + return next_node; +} + +int csi_subgraph_get_device(struct csi_node *node) +{ + int device = -1; + struct csi_params_base *params; + if (node->type == CSINN_SUBGRAPH) { + struct csi_ref_graph *sgraph = node->data; + params = sgraph->layer[0]->data; + device = params->api; + } else if (node->type >= 0 && node->type < CSINN_SESSION_INIT) { + params = node->data; + device = params->api; + } else { + CSI_DEBUG_CALL(printf("unknown node type.\n")); + } + return device; +} + +void csi_subgraph_fvisit_print(struct csi_ref_graph *graph, struct csi_node *node) +{ + printf("%s\n", node->name); +} + +void csi_subgraph_fvisit_fuse(struct csi_ref_graph *graph, struct csi_node *node) +{ + /* CPU nodes needn't be added into subgraph. */ + struct csi_params_base *params = node->data; + if (params->api == params->sess->base_api) { + node->subgraph_idx = graph->layer_index; + csi_gref_graph_insert(node, graph); + + for (int m = 0; m < csi_node_get_non_const_in_number(node); m++) { + struct csi_node *m_node = csi_gref_get_input_subgraph(graph, node, m); + if (m_node) { + csi_node_restrict_map_insert(m_node->subgraph_idx, + graph->layer[node->subgraph_idx]); + } + } + return; + } + if (csi_gref_is_root_node(graph, node)) { + /* create subgraph node */ + struct csi_ref_graph *sgraph = csi_mem_alloc(sizeof(struct csi_ref_graph)); + struct csi_node *sg_in = csi_node_alloc(CSINN_SUBGRAPH, "graph_in", 0, 0, sgraph); + node->subgraph_idx = graph->layer_index; + sg_in->subgraph_idx = graph->layer_index; + csi_gref_graph_insert(node, sgraph); + csi_gref_graph_insert(sg_in, graph); + return; + } + int i; + int can_fuse = 0; + for (i = 0; i < csi_node_get_non_const_in_number(node); i++) { + struct csi_node *i_node = csi_gref_get_input_subgraph(graph, node, i); + if (!i_node) continue; + + int i_device = csi_subgraph_get_device(i_node); + int curr_device = csi_subgraph_get_device(node); + if (i_device == curr_device) { + int is_restrict = 0; + /* determine whether the i-th input subgraph is restricted by other input subgraph. */ + for (int j = 0; j < csi_node_get_non_const_in_number(node); j++) { + if (i == j) continue; + struct csi_node *j_node = csi_gref_get_input_subgraph(graph, node, j); + if (!j_node) continue; + int find_flag = 0; + + struct csi_node *j_subgraph = graph->layer[j_node->subgraph_idx]; + // if (j_subgraph->restricted_map_num == 0) break; + + for (int k = 0; k < j_subgraph->restricted_map_num; k++) { + if (i_node->subgraph_idx == j_subgraph->restricted_map[k]) { + find_flag = 1; + break; + } + } + if (find_flag) { + is_restrict = 1; + break; + } + } + if (!is_restrict) { + /* add current node into its i-th input subgraph. */ + node->subgraph_idx = i_node->subgraph_idx; + struct csi_ref_graph *sgraph = graph->layer[i_node->subgraph_idx]->data; + csi_gref_graph_insert(node, sgraph); + can_fuse = 1; + break; + } + } + } + + if (can_fuse) { + /* Try to fuse input subgraph into current subgraph. */ + for (int m = 0; m < csi_node_get_non_const_in_number(node); m++) { + if (m == i) continue; + struct csi_node *m_node = csi_gref_get_input_subgraph(graph, node, m); + if (!m_node) continue; + if (m_node->subgraph_idx == node->subgraph_idx) continue; + int curr_device = csi_subgraph_get_device(node); + int m_device = csi_subgraph_get_device(m_node); + + if (curr_device == m_device) { + /* fusing subgraphs. */ + struct csi_node *m_subgraph = graph->layer[m_node->subgraph_idx]; + struct csi_ref_graph *sgraph = m_subgraph->data; + csi_gref_update_input_output(graph, m_node->subgraph_idx); + + int is_restrict = 0; + for (int n = 0; n < sgraph->input_num; n++) { + if (sgraph->input[n]->in[0] == NULL) { + // m_node has no subgraph input. + continue; + } + int in_m_subgraph_index = sgraph->input[n]->in[0]->subgraph_idx; + int find_flag = 0; + for (int nr = 0; nr < graph->layer[in_m_subgraph_index]->restricted_map_num; + nr++) { + if (node->subgraph_idx == + graph->layer[in_m_subgraph_index]->restricted_map[nr]) { + find_flag = 1; + break; + } + } + if (find_flag) { + is_restrict = 1; + break; + } + } + + struct csi_ref_graph *curr_sgraph = graph->layer[node->subgraph_idx]->data; + csi_gref_update_input_output(graph, node->subgraph_idx); + + int is_restrict2 = 0; + for (int n = 0; n < curr_sgraph->input_num; n++) { + if (curr_sgraph->input[n]->in[0] == NULL) { + // curr_node has no subgraph input. + continue; + } + int in_m_subgraph_index = curr_sgraph->input[n]->in[0]->subgraph_idx; + int find_flag = 0; + for (int nr = 0; nr < graph->layer[in_m_subgraph_index]->restricted_map_num; + nr++) { + if (m_node->subgraph_idx == + graph->layer[in_m_subgraph_index]->restricted_map[nr]) { + find_flag = 1; + break; + } + } + if (find_flag) { + is_restrict2 = 1; + break; + } + } + + if (!is_restrict && !is_restrict2) { + /* can fuse subgraph into current subgraph. */ + for (int n = 0; n < sgraph->layer_index; n++) { + struct csi_node *subgraph_node = sgraph->layer[n]; + subgraph_node->subgraph_idx = node->subgraph_idx; + csi_gref_graph_insert(subgraph_node, curr_sgraph); + } + for (int n = 0; n < m_subgraph->restricted_map_num; n++) { + csi_node_restrict_map_insert(m_subgraph->restricted_map[n], + graph->layer[node->subgraph_idx]); + } + sgraph->layer_index = 0; + sgraph->layer_size = 0; + } else { + csi_node_restrict_map_insert(node->subgraph_idx, m_subgraph); + } + } else { + csi_node_restrict_map_insert(m_node->subgraph_idx, + graph->layer[node->subgraph_idx]); + } + } + } else { + /* current node is restricted from being fused into input subgraph by other subgraph. + * so create new subgraph and update its restricted_map. + */ + struct csi_ref_graph *sgraph = csi_mem_alloc(sizeof(struct csi_ref_graph)); + struct csi_node *sg_in = csi_node_alloc(CSINN_SUBGRAPH, "graph_in", 1, 1, sgraph); + node->subgraph_idx = graph->layer_index; + sg_in->subgraph_idx = graph->layer_index; + csi_gref_graph_insert(node, sgraph); + csi_gref_graph_insert(sg_in, graph); + + for (int m = 0; m < csi_node_get_non_const_in_number(node); m++) { + struct csi_node *m_node = csi_gref_get_input_subgraph(graph, node, m); + if (m_node) { + csi_node_restrict_map_insert(m_node->subgraph_idx, + graph->layer[node->subgraph_idx]); + } + } + } + return; +} + +struct csi_ref_graph *csi_subgraph_generate(struct csi_ref_graph *ograph) +{ + struct csi_ref_graph *ggraph = csi_mem_alloc(sizeof(struct csi_ref_graph)); + ggraph->input = ograph->input; + ggraph->output = ograph->output; + ggraph->input_num = ograph->input_num; + ggraph->output_num = ograph->output_num; + + csi_gref_post_dfs(ggraph, csi_subgraph_fvisit_fuse); + + return ggraph; +} + +void csi_subgraph_topology_sort_internal(struct csi_ref_graph *new_graph, + struct csi_ref_graph *old_graph) +{ + int stack_size = 32; + struct csi_node **node_stack = csi_mem_alloc(sizeof(struct csi_node *) * stack_size); + int *input_idx_stack = csi_mem_alloc(sizeof(int) * stack_size); + int stack_top = -1; + + struct csi_node *curr_node; + for (int i = 0; i < new_graph->output_num; i++) { + struct csi_tensor *ot = new_graph->output[i]->data; + if (ot->is_const) continue; + curr_node = new_graph->output[i]->in[0]; + if (curr_node->subgraph_idx != -1 && + old_graph->layer[curr_node->subgraph_idx]->type == CSINN_SUBGRAPH) { + // curr_node is subgraph node. + curr_node = old_graph->layer[curr_node->subgraph_idx]; + } + if (curr_node->visited == 0) { + ++stack_top; + if (stack_top >= stack_size) { + stack_size += 32; + node_stack = csi_mem_realloc(node_stack, sizeof(struct csi_node *) * stack_size); + input_idx_stack = csi_mem_realloc(input_idx_stack, sizeof(int) * stack_size); + } + node_stack[stack_top] = curr_node; + input_idx_stack[stack_top] = 0; + curr_node->visited = 1; + } + while (stack_top != -1) { + curr_node = node_stack[stack_top]; + if (input_idx_stack[stack_top] == csi_node_get_non_const_in_number(curr_node) || + csi_gref_is_root_node(new_graph, curr_node)) { + csi_gref_graph_insert(curr_node, new_graph); + + --stack_top; + } else { + struct csi_node *next_node = curr_node->in[input_idx_stack[stack_top]]->in[0]; + if (next_node && next_node->subgraph_idx != -1 && + old_graph->layer[next_node->subgraph_idx]->type == CSINN_SUBGRAPH) { + next_node = old_graph->layer[next_node->subgraph_idx]; + } + input_idx_stack[stack_top] += 1; + if (next_node && next_node->visited == 0) { + ++stack_top; + if (stack_top >= stack_size) { + stack_size += 32; + node_stack = + csi_mem_realloc(node_stack, sizeof(struct csi_node *) * stack_size); + input_idx_stack = + csi_mem_realloc(input_idx_stack, sizeof(int) * stack_size); + } + node_stack[stack_top] = next_node; + input_idx_stack[stack_top] = 0; + next_node->visited = 1; + } + } + } + } + + csi_mem_free(node_stack); + csi_mem_free(input_idx_stack); +} + +struct csi_ref_graph *csi_subgraph_topology_sort(struct csi_ref_graph *graph) +{ + struct csi_ref_graph *sorted_graph = csi_mem_alloc(sizeof(struct csi_ref_graph)); + sorted_graph->input = graph->input; + sorted_graph->output = graph->output; + sorted_graph->input_num = graph->input_num; + sorted_graph->output_num = graph->output_num; + + csi_subgraph_topology_sort_internal(sorted_graph, graph); + csi_gref_reset_graph_visit(sorted_graph); + + return sorted_graph; +} + +struct csi_ref_graph *csi_subgraph_rebuild(struct csi_ref_graph *subgraph) +{ + struct csi_ref_graph *splited_graph = csi_mem_alloc(sizeof(struct csi_ref_graph)); + splited_graph->input = subgraph->input; + splited_graph->output = subgraph->output; + splited_graph->input_num = subgraph->input_num; + splited_graph->output_num = subgraph->output_num; + for (int i = 0; i < subgraph->layer_index; i++) { + struct csi_node *node = subgraph->layer[i]; + if (node->type == CSINN_SUBGRAPH) { + struct csi_ref_graph *sgraph = node->data; + if (sgraph->layer_size == 0) continue; + + /* split graph */ + /* for input formal parameters */ + node->in = csi_mem_realloc(node->in, sgraph->input_num * sizeof(struct csi_node *)); + node->in_num = sgraph->input_num; + for (int in_idx = 0; in_idx < sgraph->input_num; in_idx++) { + struct csi_node *in_tensor_node = sgraph->input[in_idx]; + node->in[in_idx] = in_tensor_node; + + struct csi_tensor *sg_in_tensor = csi_alloc_tensor(NULL); + csi_tensor_copy(sg_in_tensor, in_tensor_node->data); + struct csi_node *sg_in_node = csi_node_var_alloc("graph_in_tensor", sg_in_tensor); + sgraph->input[in_idx] = sg_in_node; + + for (int l_idx = 0; l_idx < sgraph->layer_index; l_idx++) { + struct csi_node *curr_node = sgraph->layer[l_idx]; + int index = csi_node_find(curr_node->in, curr_node->in_num, in_tensor_node); + if (index > -1) { + curr_node->in[index] = sg_in_node; + } + } + } + /* for output formal parameters */ + struct csi_node *sg_out = csi_node_alloc(CSINN_SUBGRAPH_RETURN, "graph_out", + sgraph->output_num, sgraph->output_num, NULL); + for (int out_idx = 0; out_idx < sgraph->output_num; out_idx++) { + struct csi_node *out_tensor_node = sgraph->output[out_idx]; + sg_out->in[out_idx] = out_tensor_node; + + for (int l_idx = 0; l_idx < sgraph->layer_index; l_idx++) { + struct csi_node *curr_node = sgraph->layer[l_idx]; + int index = csi_node_find(curr_node->out, curr_node->out_num, out_tensor_node); + if (index > -1) { + struct csi_tensor *sg_out_tensor = csi_alloc_tensor(NULL); + csi_tensor_copy(sg_out_tensor, curr_node->out[index]->data); + struct csi_node *sg_out_node = + csi_node_var_alloc("graph_out_tensor", sg_out_tensor); + + sg_out->out[out_idx] = sg_out_node; + } + } + } + csi_gref_graph_insert(sg_out, sgraph); + + /* update subgraph_idx */ + int curr_subgraph_idx = splited_graph->layer_index; + for (int idx = 0; idx < sgraph->layer_index; idx++) { + sgraph->layer[idx]->subgraph_idx = curr_subgraph_idx; + } + node->subgraph_idx = curr_subgraph_idx; + csi_gref_graph_insert(node, splited_graph); + } else { + /* update subgraph_idx */ + node->subgraph_idx = splited_graph->layer_index; + csi_gref_graph_insert(node, splited_graph); + } + } + return splited_graph; +} diff --git a/source/graph_ref/sum.c b/source/graph_ref/sum.c index 468c951e..fcbaaf08 100644 --- a/source/graph_ref/sum.c +++ b/source/graph_ref/sum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/tan.c b/source/graph_ref/tan.c index 3ac7690d..b5693260 100644 --- a/source/graph_ref/tan.c +++ b/source/graph_ref/tan.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/tanh.c b/source/graph_ref/tanh.c index 1196a77d..b3fc2406 100644 --- a/source/graph_ref/tanh.c +++ b/source/graph_ref/tanh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/threshold_relu.c b/source/graph_ref/threshold_relu.c index 169ca015..381ca44b 100644 --- a/source/graph_ref/threshold_relu.c +++ b/source/graph_ref/threshold_relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/tile.c b/source/graph_ref/tile.c index f4a2737d..0d276b47 100644 --- a/source/graph_ref/tile.c +++ b/source/graph_ref/tile.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/topk.c b/source/graph_ref/topk.c index ee78f86f..3cae010c 100644 --- a/source/graph_ref/topk.c +++ b/source/graph_ref/topk.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/transpose.c b/source/graph_ref/transpose.c index 763f0e25..daf0f6ee 100644 --- a/source/graph_ref/transpose.c +++ b/source/graph_ref/transpose.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/trunc.c b/source/graph_ref/trunc.c index 385b135c..474c6800 100644 --- a/source/graph_ref/trunc.c +++ b/source/graph_ref/trunc.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/unpooling.c b/source/graph_ref/unpooling.c index 90dfedbc..9fcd33dc 100644 --- a/source/graph_ref/unpooling.c +++ b/source/graph_ref/unpooling.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/unstack.c b/source/graph_ref/unstack.c index 2231855b..a7569080 100644 --- a/source/graph_ref/unstack.c +++ b/source/graph_ref/unstack.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/utils.c b/source/graph_ref/utils.c index 5320a9ee..f0452aef 100644 --- a/source/graph_ref/utils.c +++ b/source/graph_ref/utils.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" @@ -24,7 +24,7 @@ int csi_gref_graph_insert(struct csi_node *node, struct csi_ref_graph *graph) { if (graph->layer_size == 0 || graph->layer_index == graph->layer_size - 1) { graph->layer_size += 128; - graph->layer = realloc(graph->layer, graph->layer_size * sizeof(struct csi_node *)); + graph->layer = csi_mem_realloc(graph->layer, graph->layer_size * sizeof(struct csi_node *)); } graph->layer[graph->layer_index] = node; graph->layer_index++; diff --git a/source/graph_ref/where.c b/source/graph_ref/where.c index 1904af38..fcf824b9 100644 --- a/source/graph_ref/where.c +++ b/source/graph_ref/where.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/xor.c b/source/graph_ref/xor.c index 94603ec1..9f6157a1 100644 --- a/source/graph_ref/xor.c +++ b/source/graph_ref/xor.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/graph_ref/yuv_rgb_scale.c b/source/graph_ref/yuv_rgb_scale.c index 0e282859..6ad42e13 100644 --- a/source/graph_ref/yuv_rgb_scale.c +++ b/source/graph_ref/yuv_rgb_scale.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_gref.h" diff --git a/source/i805_opt/activation/csi_i805_clip_8.S b/source/i805_opt/activation/csi_i805_clip_8.S index db3c62df..722b1b6e 100644 --- a/source/i805_opt/activation/csi_i805_clip_8.S +++ b/source/i805_opt/activation/csi_i805_clip_8.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ /****************************************************************************** diff --git a/source/i805_opt/activation/csi_i805_relu6_8.S b/source/i805_opt/activation/csi_i805_relu6_8.S index 3273dc92..5960f022 100644 --- a/source/i805_opt/activation/csi_i805_relu6_8.S +++ b/source/i805_opt/activation/csi_i805_relu6_8.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ /****************************************************************************** diff --git a/source/i805_opt/activation/csi_i805_relu_8.S b/source/i805_opt/activation/csi_i805_relu_8.S index 2c45b32c..0e4cc276 100644 --- a/source/i805_opt/activation/csi_i805_relu_8.S +++ b/source/i805_opt/activation/csi_i805_relu_8.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ /****************************************************************************** diff --git a/source/i805_opt/activation/csi_xt800v_nn_activations_q15.S b/source/i805_opt/activation/csi_xt800v_nn_activations_q15.S index e2dba93d..3399f31c 100644 --- a/source/i805_opt/activation/csi_xt800v_nn_activations_q15.S +++ b/source/i805_opt/activation/csi_xt800v_nn_activations_q15.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/activation/csi_xt800v_nn_activations_q15_fast.S b/source/i805_opt/activation/csi_xt800v_nn_activations_q15_fast.S index 08d5424a..9588c03b 100644 --- a/source/i805_opt/activation/csi_xt800v_nn_activations_q15_fast.S +++ b/source/i805_opt/activation/csi_xt800v_nn_activations_q15_fast.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/activation/csi_xt800v_nn_activations_q7.S b/source/i805_opt/activation/csi_xt800v_nn_activations_q7.S index e2f70a0b..2309a326 100644 --- a/source/i805_opt/activation/csi_xt800v_nn_activations_q7.S +++ b/source/i805_opt/activation/csi_xt800v_nn_activations_q7.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/activation/csi_xt800v_nn_activations_q7_fast.S b/source/i805_opt/activation/csi_xt800v_nn_activations_q7_fast.S index 928801bc..404f05ea 100644 --- a/source/i805_opt/activation/csi_xt800v_nn_activations_q7_fast.S +++ b/source/i805_opt/activation/csi_xt800v_nn_activations_q7_fast.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/activation/csi_xt800v_relu_q15.S b/source/i805_opt/activation/csi_xt800v_relu_q15.S index 290b4122..bd7869e1 100644 --- a/source/i805_opt/activation/csi_xt800v_relu_q15.S +++ b/source/i805_opt/activation/csi_xt800v_relu_q15.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/activation/csi_xt800v_relu_q7.S b/source/i805_opt/activation/csi_xt800v_relu_q7.S index a6a83f01..81cebfb7 100644 --- a/source/i805_opt/activation/csi_xt800v_relu_q7.S +++ b/source/i805_opt/activation/csi_xt800v_relu_q7.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/add.c b/source/i805_opt/add.c index be95d559..75eddf7f 100644 --- a/source/i805_opt/add.c +++ b/source/i805_opt/add.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_i805.h" diff --git a/source/i805_opt/avgpool.c b/source/i805_opt/avgpool.c index 8f391a52..a9d302fc 100644 --- a/source/i805_opt/avgpool.c +++ b/source/i805_opt/avgpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_i805.h" diff --git a/source/i805_opt/basic_math/csi_i805_elementwise_add_8.S b/source/i805_opt/basic_math/csi_i805_elementwise_add_8.S index 1bdbfdd2..b69212f3 100644 --- a/source/i805_opt/basic_math/csi_i805_elementwise_add_8.S +++ b/source/i805_opt/basic_math/csi_i805_elementwise_add_8.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ /****************************************************************************** diff --git a/source/i805_opt/basic_math/csi_i805_elementwise_mul_8.S b/source/i805_opt/basic_math/csi_i805_elementwise_mul_8.S index 4b107915..1a539407 100644 --- a/source/i805_opt/basic_math/csi_i805_elementwise_mul_8.S +++ b/source/i805_opt/basic_math/csi_i805_elementwise_mul_8.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ /****************************************************************************** diff --git a/source/i805_opt/clip.c b/source/i805_opt/clip.c index 3e98965f..f68cde01 100644 --- a/source/i805_opt/clip.c +++ b/source/i805_opt/clip.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_i805.h" diff --git a/source/i805_opt/convolution.c b/source/i805_opt/convolution.c index d772c8bd..4cbe32aa 100644 --- a/source/i805_opt/convolution.c +++ b/source/i805_opt/convolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_i805.h" diff --git a/source/i805_opt/convolution/csi_i805_convolution_1x1_8.S b/source/i805_opt/convolution/csi_i805_convolution_1x1_8.S index dfbc0f36..e3d7c1bd 100644 --- a/source/i805_opt/convolution/csi_i805_convolution_1x1_8.S +++ b/source/i805_opt/convolution/csi_i805_convolution_1x1_8.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ /****************************************************************************** * @file csi_i805_depthwise_convolution_3x3_8.S diff --git a/source/i805_opt/convolution/csi_i805_convolution_8.S b/source/i805_opt/convolution/csi_i805_convolution_8.S index f1582ce6..4e61cb9e 100644 --- a/source/i805_opt/convolution/csi_i805_convolution_8.S +++ b/source/i805_opt/convolution/csi_i805_convolution_8.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ /****************************************************************************** * @file csi_i805_convolution_8.S diff --git a/source/i805_opt/convolution/csi_i805_depthwise_convolution_8.S b/source/i805_opt/convolution/csi_i805_depthwise_convolution_8.S index 4a354b4a..6116347b 100644 --- a/source/i805_opt/convolution/csi_i805_depthwise_convolution_8.S +++ b/source/i805_opt/convolution/csi_i805_depthwise_convolution_8.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ /****************************************************************************** * @file csi_i805_depthwise_convolution_8.S diff --git a/source/i805_opt/convolution/csi_xt800v_convolve_1x1_HWC_q7_fast.S b/source/i805_opt/convolution/csi_xt800v_convolve_1x1_HWC_q7_fast.S index 1eb521ef..37fd1bd5 100644 --- a/source/i805_opt/convolution/csi_xt800v_convolve_1x1_HWC_q7_fast.S +++ b/source/i805_opt/convolution/csi_xt800v_convolve_1x1_HWC_q7_fast.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/convolution/csi_xt800v_convolve_1x1_HWC_q7_fast_copy.S b/source/i805_opt/convolution/csi_xt800v_convolve_1x1_HWC_q7_fast_copy.S deleted file mode 100644 index 708b19c5..00000000 --- a/source/i805_opt/convolution/csi_xt800v_convolve_1x1_HWC_q7_fast_copy.S +++ /dev/null @@ -1,337 +0,0 @@ -/* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/****************************************************************************** - * @file csi_xt800v_convolve_1x1_HWC_q7_fast.S - * @brief Fast Q7 vresion of 1x1 convolution (non-square shape). - * @version V1.0 - * @date 05. June 2018 - ******************************************************************************/ - -/* - * csi_xt800v_status csi_xt800v_convolve_1x1_HWC_q7_fast(const q7_t * Im_in, - * const uint16_t dim_im_in_x, - * const uint16_t dim_im_in_y, - * const uint16_t ch_im_in, - * const q7_t * wt, - * const uint16_t ch_im_out, - * const q7_t * bias, - * const uint16_t bias_shift, - * const uint16_t out_shift, - * q7_t * Im_out, - * const uint16_t dim_im_out_x, - * const uint16_t dim_im_out_y, - * q15_t * bufferA) - * - */ - - .file "csi_xt800v_convolve_1x1_HWC_q7_fast.S" - .section .text.csi_xt800v_convolve_HWC_q7_fast,"ax",@progbits - .align 2 - .global csi_xt800v_convolve_1x1_HWC_q7_fast - .type csi_xt800v_convolve_1x1_HWC_q7_fast, @function - -csi_xt800v_convolve_1x1_HWC_q7_fast: - push l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, lr - subi sp, sp, 64 - vstm.8 vr8-vr11, (sp) - subi sp, sp, 64 - vstm.8 vr12-vr15, (sp) - ld.w l0, (sp, 0xac) // wt - ld.hs l1, (sp, 0xb0) // ch_im_out - ld.hs l3, (sp, 0xb8) // bias_shift - vdupg.32 vr15, l3 - ld.hs l3, (sp, 0xbc) // out_shift - movi l5, 1 - subi l6, l3, 1 - lsl l5, l5, l6 - vdupg.32 vr14, l5 // NN_ROUND - ld.w l4, (sp, 0xc0) // *im_out - ld.hs l5, (sp, 0xc4) // dim_im_out_x - ld.hs l6, (sp, 0xc8) // dim_im_out_y - ld.w l7, (sp, 0xcc) // *bufferA - mov l9, l7 - lsli t9, a3, 1 - addu l8, l7, t9 // bufferA + 2*a3 - - movi t0, 0 // i_out_y - -.L0: - cmplt t0, l6 // i_out_y < dim_im_out_y - bf .L18 - - movi t1, 0 // i_out_x - -.L1: - cmplt t1, l5 // i_out_x < dim_im_out_x - bf .L17 - - mult t6, t0, a1 // (i_out_y * dim_im_in_x + i_out_x)*ch_im_in - addu t6, t6, t1 - mult t6, t6, a3 - addu t6, t6, a0 // pSrc - - lsri t2, a3, 4 // ch_im_in >> 4u - bez t2, .L3 - -.L2: - vldmu.8 vr0-vr0, (t6) - vstmu.8 vr0-vr0, (l7) - - bnezad t2, .L2 - -.L3: - andi t2, a3, 15 // ch_im_in & 15u - bez t2, .L4 - - vldx.8 vr0, (t6), t2 - vstx.8 vr0, (l7), t2 - addu l7, l7, t2 - -.L4: - cmpne l7, l8 - bt .L16 - - ld.w l2, (sp, 0xb4) // bias - mov t9, l0 - addu t2, l4, l1 // pOut2 - lsri t6, l1, 2 // rowCnt = ch_im_out >> 2u - bez t6, .L10 - -.L5: - mov l7, l9 - addu lr, l7, a3 // *pB2 = pB + numCol_A - - vldu.8.4 vr0, (l2) - vmov.s8.e vr0, vr0 - vmov.s16.e vr12, vr0 - vshl.s32.s vr12, vr12, vr15 // sum0, ... sum3 + bias - vadd.s32.s vr12, vr12, vr14 // + NN_ROUND - - vor.32 vr13, vr12, vr12 // 4 diff kernels a time, sum4, ... sum7 - vmovi.8 vr4, 0 - vmovi.8 vr5, 0 - vmovi.8 vr6, 0 - vmovi.8 vr7, 0 - vmovi.8 vr8, 0 - vmovi.8 vr9, 0 - vmovi.8 vr10, 0 - vmovi.8 vr11, 0 - - lsri t7, a3, 4 // colCnt = numCol_A >> 4u - bez t7, .L7 - -.L6: - mov t8, t9 - vldmu.8 vr0-vr0, (l7) // load 16 data from col1 - vldmu.8 vr1-vr1, (lr) // load 16 data from col2 - vldmru.8 vr2-vr3, (t8), a3 // load 16 data from kernel 1 and 2 - vmulacaa.s8 vr4, vr2, vr0 - vmulacaa.s8 vr5, vr3, vr0 - vmulacaa.s8 vr8, vr2, vr1 - vmulacaa.s8 vr9, vr3, vr1 - vldmru.8 vr2-vr3, (t8), a3 // load 16 data form kernel 3 and 4 - vmulacaa.s8 vr6, vr2, vr0 - vmulacaa.s8 vr7, vr3, vr0 - vmulacaa.s8 vr10, vr2, vr1 - vmulacaa.s8 vr11, vr3, vr1 - - addi t9, t9, 16 - bnezad t7, .L6 - -.L7: - andi t7, a3, 15 // colCnt = numCol_A & 15u - bez t7, .L9 - -.L8: - mov t8, t9 - vldx.8 vr0, (l7), t7 // load x data from col1 - vldx.8 vr1, (lr), t7 // load x data from col2 - vldx.8 vr2, (t8), t7 // load x data from kernel 1 - addu t8, t8, a3 - vldx.8 vr3, (t8), t7 // load x data from kernel 2 - addu t8, t8, a3 - vmulacaa.s8 vr4, vr2, vr0 - vmulacaa.s8 vr5, vr3, vr0 - vmulacaa.s8 vr8, vr2, vr1 - vmulacaa.s8 vr9, vr3, vr1 - vldx.8 vr2, (t8), t7 - addu t8, t8, a3 - vldx.8 vr3, (t8), t7 - addu t8, t8, a3 - vmulacaa.s8 vr6, vr2, vr0 - vmulacaa.s8 vr7, vr3, vr0 - vmulacaa.s8 vr10, vr2, vr1 - vmulacaa.s8 vr11, vr3, vr1 - -.L9: - vdupg.32 vr0, l3 - vpadd.s32.s vr4, vr4, vr5 - vpadd.s32.s vr5, vr6, vr7 - vpadd.s32.s vr4, vr4, vr5 - vadd.s32.s vr12, vr12, vr4 - vpadd.s32.s vr8, vr8, vr9 - vpadd.s32.s vr9, vr10, vr11 - vpadd.s32.s vr8, vr8, vr9 - vadd.s32.s vr13, vr13, vr8 - addu t9, t9, t7 - vshr.s32 vr12, vr12, vr0 - vshr.s32 vr13, vr13, vr0 - vclip.s32 vr0, vr12, 8 - vclip.s32 vr2, vr13, 8 - vmov.32.l vr0, vr0, vr0 - vmov.16.l vr0, vr0, vr0 - vstu.8.4 vr0, (l4) - vmov.32.l vr2, vr2, vr2 - vmov.16.l vr2, vr2, vr2 - vstu.8.4 vr2, (t2) - - lsli t8, a3, 2 - addu t9, t9, t8 - subu t9, t9, a3 - - bnezad t6, .L5 - -.L10: - andi t6, l1, 3 // ch_im_out % 0x4u - bez t6, .L15 - mov t8, t9 - -.L11: - mov l7, l9 - addu lr, l7, a3 // *pB2 = pB + numCol_A - - vldu.8.1 vr0, (l2) - vmov.s8.e vr0, vr0 - vmov.s16.e vr12, vr0 - vshl.s32.s vr12, vr12, vr15 // sum0, sum1 + bias - vor.32 vr13, vr12, vr12 - - lsri t7, a3, 4 // colCnt = numCol_A >> 4u - bez t7, .L13 - -.L12: - vldmu.8 vr0-vr0, (l7) // load 16 data from col1 - vldmu.8 vr1-vr1, (lr) // load 16 data from col2 - vldmu.8 vr2-vr2, (t8) // load 16 data from kernel 1 - vmulacaa.s8 vr12, vr0, vr2 - vmulacaa.s8 vr13, vr1, vr2 - - bnezad t7, .L12 - -.L13: - andi t7, a3, 15 // colCnt = numCol_A & 15u - bez t7, .L14 - - vldx.8 vr0, (l7), t7 // load x data from col1 - vldx.8 vr1, (lr), t7 // load x data from col2 - vldx.8 vr2, (t8), t7 // load x data from kernel 1 - addu t8, t8, t7 - vmulacaa.s8 vr12, vr0, vr2 - vmulacaa.s8 vr13, vr1, vr2 - -.L14: - vpadd.s32.s vr12, vr12, vr12 - vpadd.s32.s vr12, vr12, vr12 - vadd.s32.s vr12, vr12, vr14 // + NN_ROUND - vpadd.s32.s vr13, vr13, vr13 - vpadd.s32.s vr13, vr13, vr13 - vadd.s32.s vr13, vr13, vr14 // + NN_ROUND - vdupg.32 vr0, l3 - vshr.s32 vr12, vr12, vr0 - vshr.s32 vr13, vr13, vr0 - vclip.s32 vr12, vr12, 8 - vclip.s32 vr13, vr13, 8 - vstu.8.1 vr12, (l4) - vstu.8.1 vr13, (t2) - - bnezad t6, .L11 - -.L15: - addu l4, l4, l1 - mov l7, l9 // bufferA - -.L16: - addi t1, t1, 1 - br .L1 - -.L17: - addi t0, t0, 1 - br .L0 - - /* check for left-over */ -.L18: - cmpne l7, l8 - bf .L23 - - ld.w l2, (sp, 0xb4) // bias - vdupg.32 vr8, l3 - movi t1, 0 - mov t9, l0 - -.L19: - cmplt t1, l1 // i < ch_im_out - bf .L23 - - vldu.8.1 vr0, (l2) - vmov.s8.e vr0, vr0 - vmov.s16.e vr6, vr0 - vshl.s32.s vr6, vr6, vr15 - - mov l7, l9 // *pB = bufferA - lsri t4, a3, 4 // colCnt - bez t4, .L21 - -.L20: - vldmu.8 vr0-vr0, (l7) // col - vldmu.8 vr1-vr1, (t9) // kernel - vmulacaa.s8 vr6, vr0, vr1 - - bnezad t4, .L20 - -.L21: - andi t4, a3, 15 // colCnt - bez t4, .L22 - - vldx.8 vr0, (l7), t4 - vldx.8 vr1, (t9), t4 - vmulacaa.s8 vr6, vr0, vr1 - addu t9, t9, t4 - -.L22: - vpadd.s32.s vr0, vr6, vr6 - vpadd.s32.s vr0, vr0, vr0 - vadd.s32.s vr0, vr0, vr14 // sum - vshr.s32 vr0, vr0, vr8 - vclip.s32 vr0, vr0, 8 - vstu.8.1 vr0, (l4) - - addi t1, t1, 1 - br .L19 - -.L23: - movi a0, 0 - vldmu.8 vr12-vr15, (sp) - vldmu.8 vr8-vr11, (sp) - pop l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, lr - .size csi_xt800v_convolve_1x1_HWC_q7_fast, .-csi_xt800v_convolve_1x1_HWC_q7_fast - -.weak csi_convolve_1x1_HWC_q7_fast_copy -.set csi_convolve_1x1_HWC_q7_fast_copy csi_xt800v_convolve_1x1_HWC_q7_fast_copy -.weak csky_vdsp2_convolve_1x1_HWC_q7_fast_copy -.set csky_vdsp2_convolve_1x1_HWC_q7_fast_copy csi_xt800v_convolve_1x1_HWC_q7_fast_copy diff --git a/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q15_basic.S b/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q15_basic.S index 27b2a0c1..2d365f1a 100644 --- a/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q15_basic.S +++ b/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q15_basic.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q7_RGB.S b/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q7_RGB.S index e24c9d2b..2b055d0a 100644 --- a/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q7_RGB.S +++ b/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q7_RGB.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q7_basic.S b/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q7_basic.S index 3dac92de..c371f69f 100644 --- a/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q7_basic.S +++ b/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q7_basic.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q7_fast_nonsquare.S b/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q7_fast_nonsquare.S index 5c636e65..6d83ceec 100644 --- a/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q7_fast_nonsquare.S +++ b/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q7_fast_nonsquare.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/convolution/csi_xt800v_depthwise_separable_conv_HWC_q7.S b/source/i805_opt/convolution/csi_xt800v_depthwise_separable_conv_HWC_q7.S index 69c33315..a819b757 100644 --- a/source/i805_opt/convolution/csi_xt800v_depthwise_separable_conv_HWC_q7.S +++ b/source/i805_opt/convolution/csi_xt800v_depthwise_separable_conv_HWC_q7.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/convolution/csi_xt800v_depthwise_separable_conv_HWC_q7_nonsquare.S b/source/i805_opt/convolution/csi_xt800v_depthwise_separable_conv_HWC_q7_nonsquare.S index 62549520..c5409ea4 100644 --- a/source/i805_opt/convolution/csi_xt800v_depthwise_separable_conv_HWC_q7_nonsquare.S +++ b/source/i805_opt/convolution/csi_xt800v_depthwise_separable_conv_HWC_q7_nonsquare.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/fully-connect/csi_i805_fullyconnected_8.S b/source/i805_opt/fully-connect/csi_i805_fullyconnected_8.S index 8c22de16..5711f92d 100644 --- a/source/i805_opt/fully-connect/csi_i805_fullyconnected_8.S +++ b/source/i805_opt/fully-connect/csi_i805_fullyconnected_8.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ /****************************************************************************** * @file csi_i805_fullyconnected_8.S diff --git a/source/i805_opt/fully-connect/csi_xt800v_fully_connected_mat_q7_vec_q15.S b/source/i805_opt/fully-connect/csi_xt800v_fully_connected_mat_q7_vec_q15.S index 8e72239c..04ff897e 100644 --- a/source/i805_opt/fully-connect/csi_xt800v_fully_connected_mat_q7_vec_q15.S +++ b/source/i805_opt/fully-connect/csi_xt800v_fully_connected_mat_q7_vec_q15.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/fully-connect/csi_xt800v_fully_connected_q15.S b/source/i805_opt/fully-connect/csi_xt800v_fully_connected_q15.S index 66ebe375..ae54ba1b 100644 --- a/source/i805_opt/fully-connect/csi_xt800v_fully_connected_q15.S +++ b/source/i805_opt/fully-connect/csi_xt800v_fully_connected_q15.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/fully-connect/csi_xt800v_fully_connected_q15_copy.S b/source/i805_opt/fully-connect/csi_xt800v_fully_connected_q15_copy.S deleted file mode 100644 index 83741fbe..00000000 --- a/source/i805_opt/fully-connect/csi_xt800v_fully_connected_q15_copy.S +++ /dev/null @@ -1,262 +0,0 @@ -/* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/****************************************************************************** - * @file csi_xt800v_fully_connected_q15.S - * @brief Q15 basic fully-connected layer function. - * @version V1.0 - * @date 31. May 2018 - ******************************************************************************/ - -/* - * csi_xt800v_status - * csi_xt800v_fully_connected_q15(const q15_t * pV, - * const q15_t * pM, - * const uint16_t dim_vec, - * const uint16_t num_of_rows, - * const uint16_t bias_shift, - * const uint16_t out_shift, - * const q15_t * bias, - * q15_t * pOut) - */ - - .file "csi_xt800v_fully_connected_q15.S" - .section .text.csi_xt800v_fully_connected_q15,"ax",@progbits - .align 2 - .global csi_xt800v_fully_connected_q15 - .type csi_xt800v_fully_connected_q15, @function - -csi_xt800v_fully_connected_q15: - push l0, l1, l2, l3, l4, l5, l6 - subi sp, sp, 64 - vstm.8 vr8-vr11, (sp) - subi sp, sp, 16 - vstm.8 vr12-vr12, (sp) - - ld.h l0, (sp, 0x6c) // bias_shift - vdupg.32 vr10, l0 - lsli l0, a2, 1 - ld.h l1, (sp, 0x70) // out_shift - subi l1, l1, 4 - vdupg.32 vr11, l1 - movi t0, 1 - subi l6, l1, 1 - lsl l6, t0, l6 // round value - vdupg.32 vr12, l6 - vshri.s32 vr12, vr12, 4 - ld.w l2, (sp, 0x74) // *bias - ld.w l3, (sp, 0x78) // *pOut - mov l1, a1 - - lsri t0, a3, 3 // rowCnt = num_of_rows >> 3u - bez t0, .L5 - -.L0: - vldmu.16 vr0-vr0, (l2) - vmov.s16.e vr8, vr0 - vshl.s32 vr8, vr8, vr10 // sum0, ... sum3 - vshl.s32 vr9, vr9, vr10 // sum4, ... sum7 - vshri.s32 vr8, vr7, 4 - vshri.s32 vr9, vr8, 4 - vadd.s32.s vr8, vr8, vr12 // round - vadd.s32.s vr9, vr9, vr12 - - mov l4, a0 // pA = pV - - lsri t1, a2, 3 // colCnt = dim_vec >> 3u - bez t1, .L2 - -.L1: - mov l5, a1 // pB = pM - vldmu.16 vr0-vr0, (l4) // x0, ..., x7 - vldmru.16 vr2-vr5, (l5), l0 // y00, ..., y07 - vrmulshr.s16.e vr6, vr0, vr2, 4 - vadd.s32.s vr2, vr6, vr7 - - vrmulshr.s16.e vr6, vr0, vr3, 4 // y10, ..., y17 - vadd.s32.s vr3, vr6, vr7 - - vrmulshr.s16.e vr6, vr0, vr4, 4 // y20, ..., y27 - vadd.s32.s vr4, vr6, vr7 - - vrmulshr.s16.e vr6, vr0, vr5, 4 // y30, ..., y37 - vadd.s32.s vr5, vr6, vr7 - - vpadd.s32.s vr2, vr2, vr3 - vpadd.s32.s vr3, vr4, vr5 - vpadd.s32.s vr2, vr2, vr3 - vadd.s32.s vr8, vr8, vr2 // sum0, ..., sum3 - - vldmru.16 vr2-vr5, (l5), l0 // y40, ..., y47 - vrmulshr.s16.e vr6, vr0, vr2, 4 - vadd.s32.s vr2, vr6, vr7 - - vrmulshr.s16.e vr6, vr0, vr3, 4 // y50, ..., y57 - vadd.s32.s vr3, vr6, vr7 - - vrmulshr.s16.e vr6, vr0, vr4, 4 // y60, ..., y67 - vadd.s32.s vr4, vr6, vr7 - - vrmulshr.s16.e vr6, vr0, vr5, 4 // y70, ..., y77 - vadd.s32.s vr5, vr6, vr7 - - vpadd.s32.s vr2, vr2, vr3 - vpadd.s32.s vr3, vr4, vr5 - vpadd.s32.s vr2, vr2, vr3 - vadd.s32.s vr9, vr9, vr2 // sum4, ..., sum7 - - addi a1, a1, 16 - bnezad t1, .L1 - -.L2: - andi t1, a2, 7 // colCnt = dim_vec % 8u - bez t1, .L4 - -.L3: - mov l5, a1 - vldx.16 vr0, (l4), t1 - vldx.16 vr2, (l5), t1 - addu l5, l5, l0 - vldx.16 vr3, (l5), t1 - addu l5, l5, l0 - vldx.16 vr4, (l5), t1 - addu l5, l5, l0 - vldx.16 vr5, (l5), t1 - addu l5, l5, l0 - vrmulshr.s16.e vr6, vr0, vr2, 4 - vadd.s32.s vr2, vr6, vr7 - - vrmulshr.s16.e vr6, vr0, vr3, 4 // y10, ..., y17 - vadd.s32.s vr3, vr6, vr7 - - vrmulshr.s16.e vr6, vr0, vr4, 4 // y20, ..., y27 - vadd.s32.s vr4, vr6, vr7 - - vrmulshr.s16.e vr6, vr0, vr5, 4 // y30, ..., y37 - vadd.s32.s vr5, vr6, vr7 - - vpadd.s32.s vr2, vr2, vr3 - vpadd.s32.s vr4, vr4, vr5 - vpadd.s32.s vr2, vr2, vr4 - vadd.s32.s vr8, vr8, vr2 // sum0, ..., sum3 - - - vldx.16 vr2, (l5), t1 - addu l5, l5, l0 - vldx.16 vr3, (l5), t1 - addu l5, l5, l0 - vldx.16 vr4, (l5), t1 - addu l5, l5, l0 - vldx.16 vr5, (l5), t1 - vrmulshr.s16.e vr6, vr0, vr2, 4 - vadd.s32.s vr2, vr6, vr7 - - vrmulshr.s16.e vr6, vr0, vr3, 4 // y50, ..., y57 - vadd.s32.s vr3, vr6, vr7 - - vrmulshr.s16.e vr6, vr0, vr4, 4 // y60, ..., y67 - vadd.s32.s vr4, vr6, vr7 - - vrmulshr.s16.e vr6, vr0, vr5, 4 // y70, ..., y77 - vadd.s32.s vr5, vr6, vr7 - - vpadd.s32.s vr2, vr2, vr3 - vpadd.s32.s vr4, vr4, vr5 - vpadd.s32.s vr2, vr2, vr4 - vadd.s32.s vr9, vr9, vr2 // sum4, ..., sum7 - -.L4: - vshr.s32 vr8, vr8, vr11 - vshr.s32 vr9, vr9, vr11 - vclip.s32 vr8, vr8, 16 - vclip.s32 vr9, vr9, 16 - vmov.32.l vr0, vr8, vr9 - vstmu.16 vr0-vr0, (l3) - - lsli t1, a2, 4 - addu l1, l1, t1 - mov a1, l1 - bnezad t0, .L0 - -.L5: - andi t0, a3, 7 // rowCnt = num_of_rows % 8u - bez t0, .L10 - -.L12: - vldu.16.1 vr0, (l2) - vmov.s16.e vr0, vr0 - vshl.s32 vr8, vr0, vr10 - vshri.s32 vr8, vr8, 4 - vmovi.8 vr9, 0 - - mov l4, a0 // pA = pV - mov l5, a1 - - lsri t1, a2, 5 // colCnt = dim_vec >> 5u - bez t1, .L7 - -.L6: - vldmu.16 vr0-vr3, (l4) - vldmu.16 vr4-vr7, (l5) - vrmulsa.s16.e vr8, vr0, vr4, 4 - vrmulsa.s16.e vr8, vr1, vr5, 4 - vrmulsa.s16.e vr8, vr2, vr6, 4 - vrmulsa.s16.e vr8, vr3, vr7, 4 - - bnezad t1, .L6 - -.L7: - andi t2, a2, 31 // colCnt = dim_vec % 32u - lsri t1, t2, 3 - bez t1, .L8 - -.L11: - vldmu.16 vr0-vr0, (l4) - vldmu.16 vr1-vr1, (l5) - vrmulsa.s16.e vr8, vr0, vr1, 4 - - bnezad t1, .L11 - -.L8: - andi t1, t2, 7 - bez t1, .L9 - vldx.16 vr0, (l4), t1 - vldx.16 vr1, (l5), t1 - vrmulsa.s16.e vr8, vr0, vr1, 4 - -.L9: - vadd.s32.s vr8, vr8, vr9 - vpadd.s32.s vr0, vr8, vr8 - vpadd.s32.s vr0, vr0, vr0 - vadd.s32.s vr8, vr8, vr12 // sum - vshr.s32 vr0, vr0, vr11 - vclip.s32 vr0, vr0, 16 - vstu.16.1 vr0, (l3) - - mov a1, l5 - bnezad t0, .L12 - -.L10: - vldmu.8 vr12-vr12, (sp) - vldmu.8 vr8-vr11, (sp) - pop l0, l1, l2, l3, l4, l5, l6 - .size csi_xt800v_fully_connected_q15, .-csi_xt800v_fully_connected_q15 -.weak csi_fully_connected_q15_copy -.set csi_fully_connected_q15_copy csi_xt800v_fully_connected_q15_copy -.weak csky_vdsp2_fully_connected_q15_copy -.set csky_vdsp2_fully_connected_q15_copy csi_xt800v_fully_connected_q15_copy diff --git a/source/i805_opt/fully-connect/csi_xt800v_fully_connected_q7x16.S b/source/i805_opt/fully-connect/csi_xt800v_fully_connected_q7x16.S deleted file mode 100644 index ad8c474c..00000000 --- a/source/i805_opt/fully-connect/csi_xt800v_fully_connected_q7x16.S +++ /dev/null @@ -1,280 +0,0 @@ -/* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/****************************************************************************** - * @file csi_xt800v_fully_connected_q7.S - * @brief Q7 basic fully-connected layer function. - * @version V1.0 - * @date 19. Mar 2018 - ******************************************************************************/ - -/* - * csi_xt800v_status - * csi_xt800v_fully_connected_q7(const q7_t * pV, - * const q7_t * pM, - * const uint16_t dim_vec, - * const uint16_t num_of_rows, - * const uint16_t bias_shift, - * const uint16_t out_shift, - * const q7_t * bias, - * q7_t * pOut) - */ - - .file "csi_xt800v_fully_connected_q7.S" - .section .text.csi_xt800v_fully_connected_q7,"ax",@progbits - .align 2 - .global csi_xt800v_fully_connected_q7 - .type csi_xt800v_fully_connected_q7, @function - -csi_xt800v_fully_connected_q7: - push l0, l1, l2, l3, l4, l5, l6 - subi sp, sp, 64 - vstm.8 vr8-vr11, (sp) - subi sp, sp, 16 - vstm.8 vr12-vr12, (sp) - - ld.h l0, (sp, 0x6c) // bias_shift - vdupg.32 vr10, l0 - ld.h l1, (sp, 0x70) // out_shift - vdupg.32 vr11, l1 - movi t0, 1 - subi l6, l1, 1 - lsl l6, t0, l6 // round value - vdupg.32 vr12, l6 - ld.w l2, (sp, 0x74) // *bias - ld.w l3, (sp, 0x78) // *pOut - mov l1, a1 - - lsri t0, a3, 4 // rowCnt = num_of_rows >> 4u - bez t0, .L5 - -.L0: - vldmu.8 vr0-vr0, (l2) - vmov.s8.e vr0, vr0 - vmov.s16.e vr6, vr0 - vmov.s16.e vr8, vr1 - vshl.s32 vr6, vr6, vr10 // sum0, ... sum3 - vshl.s32 vr7, vr7, vr10 // sum4, ... sum7 - vshl.s32 vr8, vr8, vr10 // sum8, ... sum11 - vshl.s32 vr9, vr9, vr10 // sum12, ... sum15 - vadd.s32.s vr6, vr6, vr12 // round - vadd.s32.s vr7, vr7, vr12 - vadd.s32.s vr8, vr8, vr12 - vadd.s32.s vr9, vr9, vr12 - - mov l4, a0 // pA = pV - - lsri t1, a2, 4 // colCnt = dim_vec >> 4u - bez t1, .L2 - -.L1: - mov l5, a1 // pB = pM - vldmu.8 vr0-vr0, (l4) // x0, ... x15 - vldmru.8 vr2-vr5, (l5), a2 // y0, ... y15 - vmulaca.s8 vr2, vr0, vr2 - vmulaca.s8 vr3, vr0, vr3 - vmulaca.s8 vr4, vr0, vr4 - vmulaca.s8 vr5, vr0, vr5 - - vpadd.s32.s vr2, vr2, vr3 - vpadd.s32.s vr4, vr4, vr5 - vpadd.s32.s vr2, vr2, vr4 - vadd.s32.s vr6, vr6, vr2 // sum0, ...sum3 - - vldmru.8 vr2-vr5, (l5), a2 // y0, ... y15 - vmulaca.s8 vr2, vr0, vr2 - vmulaca.s8 vr3, vr0, vr3 - vmulaca.s8 vr4, vr0, vr4 - vmulaca.s8 vr5, vr0, vr5 - - vpadd.s32.s vr2, vr2, vr3 - vpadd.s32.s vr4, vr4, vr5 - vpadd.s32.s vr2, vr2, vr4 - vadd.s32.s vr7, vr7, vr2 // sum4, ...sum7 - - vldmru.8 vr2-vr5, (l5), a2 // y0, ... y15 - vmulaca.s8 vr2, vr0, vr2 - vmulaca.s8 vr3, vr0, vr3 - vmulaca.s8 vr4, vr0, vr4 - vmulaca.s8 vr5, vr0, vr5 - - vpadd.s32.s vr2, vr2, vr3 - vpadd.s32.s vr4, vr4, vr5 - vpadd.s32.s vr2, vr2, vr4 - vadd.s32.s vr8, vr8, vr2 // sum8, ...sum11 - - vldmru.8 vr2-vr5, (l5), a2 // y0, ... y15 - vmulaca.s8 vr2, vr0, vr2 - vmulaca.s8 vr3, vr0, vr3 - vmulaca.s8 vr4, vr0, vr4 - vmulaca.s8 vr5, vr0, vr5 - - vpadd.s32.s vr2, vr2, vr3 - vpadd.s32.s vr4, vr4, vr5 - vpadd.s32.s vr2, vr2, vr4 - vadd.s32.s vr9, vr9, vr2 // sum12, ...sum15 - - addi a1, a1, 16 - bnezad t1, .L1 - -.L2: - andi t1, a2, 15 // colCnt = dim_vec % 15u - bez t1, .L4 - -.L3: - mov l5, a1 - vldx.8 vr0, (l4), t1 - vldx.8 vr2, (l5), t1 - addu l5, l5, a2 - vldx.8 vr3, (l5), t1 - addu l5, l5, a2 - vldx.8 vr4, (l5), t1 - addu l5, l5, a2 - vldx.8 vr5, (l5), t1 - addu l5, l5, a2 - vmulaca.s8 vr2, vr0, vr2 - vmulaca.s8 vr3, vr0, vr3 - vmulaca.s8 vr4, vr0, vr4 - vmulaca.s8 vr5, vr0, vr5 - - vpadd.s32.s vr2, vr2, vr3 - vpadd.s32.s vr4, vr4, vr5 - vpadd.s32.s vr2, vr2, vr4 - vadd.s32.s vr6, vr6, vr2 // sum0, ...sum3 - - vldx.8 vr2, (l5), t1 - addu l5, l5, a2 - vldx.8 vr3, (l5), t1 - addu l5, l5, a2 - vldx.8 vr4, (l5), t1 - addu l5, l5, a2 - vldx.8 vr5, (l5), t1 - addu l5, l5, a2 - vmulaca.s8 vr2, vr0, vr2 - vmulaca.s8 vr3, vr0, vr3 - vmulaca.s8 vr4, vr0, vr4 - vmulaca.s8 vr5, vr0, vr5 - - vpadd.s32.s vr2, vr2, vr3 - vpadd.s32.s vr4, vr4, vr5 - vpadd.s32.s vr2, vr2, vr4 - vadd.s32.s vr7, vr7, vr2 // sum4, ...sum7 - - vldx.8 vr2, (l5), t1 - addu l5, l5, a2 - vldx.8 vr3, (l5), t1 - addu l5, l5, a2 - vldx.8 vr4, (l5), t1 - addu l5, l5, a2 - vldx.8 vr5, (l5), t1 - addu l5, l5, a2 - vmulaca.s8 vr2, vr0, vr2 - vmulaca.s8 vr3, vr0, vr3 - vmulaca.s8 vr4, vr0, vr4 - vmulaca.s8 vr5, vr0, vr5 - - vpadd.s32.s vr2, vr2, vr3 - vpadd.s32.s vr4, vr4, vr5 - vpadd.s32.s vr2, vr2, vr4 - vadd.s32.s vr8, vr8, vr2 // sum8, ...sum11 - - vldx.8 vr2, (l5), t1 - addu l5, l5, a2 - vldx.8 vr3, (l5), t1 - addu l5, l5, a2 - vldx.8 vr4, (l5), t1 - addu l5, l5, a2 - vldx.8 vr5, (l5), t1 - vmulaca.s8 vr2, vr0, vr2 - vmulaca.s8 vr3, vr0, vr3 - vmulaca.s8 vr4, vr0, vr4 - vmulaca.s8 vr5, vr0, vr5 - - vpadd.s32.s vr2, vr2, vr3 - vpadd.s32.s vr4, vr4, vr5 - vpadd.s32.s vr2, vr2, vr4 - vadd.s32.s vr9, vr9, vr2 // sum12, ...sum15 - -.L4: - vshr.s32 vr6, vr6, vr11 - vshr.s32 vr7, vr7, vr11 - vshr.s32 vr8, vr8, vr11 - vshr.s32 vr9, vr9, vr11 - vmov.s32.sl vr0, vr6, vr7 - vmov.s32.sl vr1, vr8, vr9 - vmov.s16.sl vr0, vr0, vr1 - vstmu.8 vr0-vr0, (l3) - - lsli t1, a2, 4 - addu l1, l1, t1 - mov a1, l1 - bnezad t0, .L0 - -.L5: - andi t0, a3, 15 // rowCnt = num_of_rows % 16u - bez t0, .L10 - -.L12: - vldu.8.1 vr0, (l2) - vmov.s8.e vr0, vr0 - vmov.s16.e vr0, vr0 - vshl.s32 vr6, vr0, vr10 - - mov l4, a0 // pA = pV - mov l5, a1 - - lsri t1, a2, 4 // colCnt = dim_vec >> 4u - bez t1, .L7 - -.L6: - vldmu.8 vr0-vr0, (l4) - vldmu.8 vr2-vr2, (l5) - vmulacaa.s8 vr6, vr0, vr2 - - bnezad t1, .L6 - -.L7: - andi t1, a2, 15 // colCnt = dim_vec % 32u - bez t1, .L9 - -.L8: - vldx.8 vr0, (l4), t1 - vldx.8 vr1, (l5), t1 - addu l5, l5, t1 - vmulacaa.s8 vr6, vr0, vr1 - -.L9: - vpadd.s32.s vr0, vr6, vr6 - vpadd.s32.s vr0, vr0, vr0 - vadd.s32.s vr0, vr0, vr12 // sum - vshr.s32 vr0, vr0, vr11 - vclip.s32 vr0, vr0, 8 - vstu.8.1 vr0, (l3) - - mov a1, l5 - bnezad t0, .L12 - -.L10: - vldmu.8 vr12-vr12, (sp) - vldmu.8 vr8-vr11, (sp) - pop l0, l1, l2, l3, l4, l5, l6 - .size csi_xt800v_fully_connected_q7, .-csi_xt800v_fully_connected_q7 -.weak csi_fully_connected_q7 -.set csi_fully_connected_q7 csi_xt800v_fully_connected_q7 -.weak csky_vdsp2_fully_connected_q7 -.set csky_vdsp2_fully_connected_q7 csi_xt800v_fully_connected_q7 diff --git a/source/i805_opt/fully-connect/csi_xt800v_fully_connected_q7x4.S b/source/i805_opt/fully-connect/csi_xt800v_fully_connected_q7x4.S index f0c49811..93af45f8 100644 --- a/source/i805_opt/fully-connect/csi_xt800v_fully_connected_q7x4.S +++ b/source/i805_opt/fully-connect/csi_xt800v_fully_connected_q7x4.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/fullyconnected.c b/source/i805_opt/fullyconnected.c index 9d6f4c61..284ac4d5 100644 --- a/source/i805_opt/fullyconnected.c +++ b/source/i805_opt/fullyconnected.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_i805.h" diff --git a/source/i805_opt/gemm/csi_i805_mat_mult_nt_t_8.S b/source/i805_opt/gemm/csi_i805_mat_mult_nt_t_8.S index e249da39..d29b7282 100644 --- a/source/i805_opt/gemm/csi_i805_mat_mult_nt_t_8.S +++ b/source/i805_opt/gemm/csi_i805_mat_mult_nt_t_8.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ /****************************************************************************** * @file csi_i805_mat_mult_nt_t_8.S diff --git a/source/i805_opt/gemm/csi_i805_vec_mat_mult_8.S b/source/i805_opt/gemm/csi_i805_vec_mat_mult_8.S index 71a3dd72..1fd7d64a 100644 --- a/source/i805_opt/gemm/csi_i805_vec_mat_mult_8.S +++ b/source/i805_opt/gemm/csi_i805_vec_mat_mult_8.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ /****************************************************************************** * @file csi_i805_vec_mat_mult_8.S diff --git a/source/i805_opt/maxpool.c b/source/i805_opt/maxpool.c index 844def22..43b6eb76 100644 --- a/source/i805_opt/maxpool.c +++ b/source/i805_opt/maxpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_i805.h" diff --git a/source/i805_opt/mul.c b/source/i805_opt/mul.c index 7d261fc4..eaf1004a 100644 --- a/source/i805_opt/mul.c +++ b/source/i805_opt/mul.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_i805.h" diff --git a/source/i805_opt/nn-support/csi_xt800v_nntables.c b/source/i805_opt/nn-support/csi_xt800v_nntables.c index 2fd4eecc..1563f833 100644 --- a/source/i805_opt/nn-support/csi_xt800v_nntables.c +++ b/source/i805_opt/nn-support/csi_xt800v_nntables.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/pooling/csi_i805_maxpool_8.S b/source/i805_opt/pooling/csi_i805_maxpool_8.S index 99d40ef1..5b2a6eb0 100644 --- a/source/i805_opt/pooling/csi_i805_maxpool_8.S +++ b/source/i805_opt/pooling/csi_i805_maxpool_8.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ /****************************************************************************** diff --git a/source/i805_opt/pooling/csi_xt800v_avepool_q7_HWC_nonsquare.S b/source/i805_opt/pooling/csi_xt800v_avepool_q7_HWC_nonsquare.S index 00c8d229..ebf3f516 100644 --- a/source/i805_opt/pooling/csi_xt800v_avepool_q7_HWC_nonsquare.S +++ b/source/i805_opt/pooling/csi_xt800v_avepool_q7_HWC_nonsquare.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/pooling/csi_xt800v_pool_q7_HWC.S b/source/i805_opt/pooling/csi_xt800v_pool_q7_HWC.S index 96c2548c..3945e91a 100644 --- a/source/i805_opt/pooling/csi_xt800v_pool_q7_HWC.S +++ b/source/i805_opt/pooling/csi_xt800v_pool_q7_HWC.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/relu.c b/source/i805_opt/relu.c index 8e21283a..0b7d1342 100644 --- a/source/i805_opt/relu.c +++ b/source/i805_opt/relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_i805.h" diff --git a/source/i805_opt/relu6.c b/source/i805_opt/relu6.c index 7a20f28b..d9f215bb 100644 --- a/source/i805_opt/relu6.c +++ b/source/i805_opt/relu6.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_i805.h" diff --git a/source/i805_opt/reshape.c b/source/i805_opt/reshape.c index b6619648..1dd23cdd 100644 --- a/source/i805_opt/reshape.c +++ b/source/i805_opt/reshape.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_i805.h" diff --git a/source/i805_opt/reshape/csi_i805_reshape_8.S b/source/i805_opt/reshape/csi_i805_reshape_8.S index 7c04eb0b..cc8d27e8 100644 --- a/source/i805_opt/reshape/csi_i805_reshape_8.S +++ b/source/i805_opt/reshape/csi_i805_reshape_8.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ /****************************************************************************** * @file csi_i805_reshape_8.S diff --git a/source/i805_opt/setup.c b/source/i805_opt/setup.c index 30bbb3d6..4eea511b 100644 --- a/source/i805_opt/setup.c +++ b/source/i805_opt/setup.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_i805.h" diff --git a/source/i805_opt/sigmoid.c b/source/i805_opt/sigmoid.c index 925831f9..961b4c98 100644 --- a/source/i805_opt/sigmoid.c +++ b/source/i805_opt/sigmoid.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_i805.h" diff --git a/source/i805_opt/softmax.c b/source/i805_opt/softmax.c index 4fadaeb7..37041e95 100644 --- a/source/i805_opt/softmax.c +++ b/source/i805_opt/softmax.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_i805.h" diff --git a/source/i805_opt/softmax/csi_xt800v_softmax_q15.S b/source/i805_opt/softmax/csi_xt800v_softmax_q15.S index e44b9c78..ac6e5e13 100644 --- a/source/i805_opt/softmax/csi_xt800v_softmax_q15.S +++ b/source/i805_opt/softmax/csi_xt800v_softmax_q15.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/softmax/csi_xt800v_softmax_q7.S b/source/i805_opt/softmax/csi_xt800v_softmax_q7.S index a4d05cdf..6e591d0b 100644 --- a/source/i805_opt/softmax/csi_xt800v_softmax_q7.S +++ b/source/i805_opt/softmax/csi_xt800v_softmax_q7.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_opt/tanh.c b/source/i805_opt/tanh.c index 3b9f6c33..e550b861 100644 --- a/source/i805_opt/tanh.c +++ b/source/i805_opt/tanh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_i805.h" diff --git a/source/i805_ref/activation/csi_nn_activations_q15.c b/source/i805_ref/activation/csi_nn_activations_q15.c index fc33d2ec..f7da936f 100644 --- a/source/i805_ref/activation/csi_nn_activations_q15.c +++ b/source/i805_ref/activation/csi_nn_activations_q15.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,9 +22,7 @@ * * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nn_tables.h" -// #include "csi_common_tables.h" #include "csi_nnfunctions.h" /** diff --git a/source/i805_ref/activation/csi_nn_activations_q7.c b/source/i805_ref/activation/csi_nn_activations_q7.c index 1b1ea469..919c4ea5 100644 --- a/source/i805_ref/activation/csi_nn_activations_q7.c +++ b/source/i805_ref/activation/csi_nn_activations_q7.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,9 +22,7 @@ * * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nn_tables.h" -// #include "csi_common_tables.h" #include "csi_nnfunctions.h" /** diff --git a/source/i805_ref/activation/csi_relu_q15.c b/source/i805_ref/activation/csi_relu_q15.c index 5d683a85..bbe8ae45 100644 --- a/source/i805_ref/activation/csi_relu_q15.c +++ b/source/i805_ref/activation/csi_relu_q15.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,7 +22,6 @@ * * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nnfunctions.h" /** diff --git a/source/i805_ref/activation/csi_relu_q7.c b/source/i805_ref/activation/csi_relu_q7.c index 3a26924a..8e6f2a9c 100644 --- a/source/i805_ref/activation/csi_relu_q7.c +++ b/source/i805_ref/activation/csi_relu_q7.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,7 +22,6 @@ * * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nnfunctions.h" /** diff --git a/source/i805_ref/avgpool.c b/source/i805_ref/avgpool.c index 28c86384..10b1019d 100644 --- a/source/i805_ref/avgpool.c +++ b/source/i805_ref/avgpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref_i805.h" diff --git a/source/i805_ref/convolution.c b/source/i805_ref/convolution.c index a52ea952..de0b3ec2 100644 --- a/source/i805_ref/convolution.c +++ b/source/i805_ref/convolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref_i805.h" diff --git a/source/i805_ref/convolution/csi_convolve_1x1_HWC_q7_fast.c b/source/i805_ref/convolution/csi_convolve_1x1_HWC_q7_fast.c index bc7f6c20..f39ea334 100644 --- a/source/i805_ref/convolution/csi_convolve_1x1_HWC_q7_fast.c +++ b/source/i805_ref/convolution/csi_convolve_1x1_HWC_q7_fast.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,9 +22,7 @@ * * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nnfunctions.h" -#include "csi_nnsupportfunctions.h" /** * @ingroup groupNN diff --git a/source/i805_ref/convolution/csi_convolve_HWC_q15_basic.c b/source/i805_ref/convolution/csi_convolve_HWC_q15_basic.c index ff096114..60038362 100644 --- a/source/i805_ref/convolution/csi_convolve_HWC_q15_basic.c +++ b/source/i805_ref/convolution/csi_convolve_HWC_q15_basic.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,9 +22,7 @@ * * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nnfunctions.h" -#include "csi_nnsupportfunctions.h" /** * @ingroup groupNN diff --git a/source/i805_ref/convolution/csi_convolve_HWC_q15_fast.c b/source/i805_ref/convolution/csi_convolve_HWC_q15_fast.c index 80d4f22a..835b4854 100644 --- a/source/i805_ref/convolution/csi_convolve_HWC_q15_fast.c +++ b/source/i805_ref/convolution/csi_convolve_HWC_q15_fast.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,9 +22,7 @@ * * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nnfunctions.h" -#include "csi_nnsupportfunctions.h" /** * @ingroup groupNN diff --git a/source/i805_ref/convolution/csi_convolve_HWC_q7_RGB.c b/source/i805_ref/convolution/csi_convolve_HWC_q7_RGB.c index b9b31a9b..c1e5f7aa 100644 --- a/source/i805_ref/convolution/csi_convolve_HWC_q7_RGB.c +++ b/source/i805_ref/convolution/csi_convolve_HWC_q7_RGB.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -21,9 +21,8 @@ * Description: Q7 version of convolution for RGB image * * -------------------------------------------------------------------- */ -#include "csi_math.h" + #include "csi_nnfunctions.h" -#include "csi_nnsupportfunctions.h" /** * @ingroup groupNN diff --git a/source/i805_ref/convolution/csi_convolve_HWC_q7_basic.c b/source/i805_ref/convolution/csi_convolve_HWC_q7_basic.c index 12528a23..81f1d03b 100644 --- a/source/i805_ref/convolution/csi_convolve_HWC_q7_basic.c +++ b/source/i805_ref/convolution/csi_convolve_HWC_q7_basic.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -21,9 +21,8 @@ * Description: Q7 version of convolution * * -------------------------------------------------------------------- */ -#include "csi_math.h" + #include "csi_nnfunctions.h" -#include "csi_nnsupportfunctions.h" /** * @ingroup groupNN diff --git a/source/i805_ref/convolution/csi_convolve_HWC_q7_fast.c b/source/i805_ref/convolution/csi_convolve_HWC_q7_fast.c index ac82f4e9..91c24c3d 100644 --- a/source/i805_ref/convolution/csi_convolve_HWC_q7_fast.c +++ b/source/i805_ref/convolution/csi_convolve_HWC_q7_fast.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,9 +22,7 @@ * * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nnfunctions.h" -#include "csi_nnsupportfunctions.h" /** * @ingroup groupNN diff --git a/source/i805_ref/convolution/csi_convolve_HWC_q7_fast_nonsquare.c b/source/i805_ref/convolution/csi_convolve_HWC_q7_fast_nonsquare.c index 210a76cf..a3b0a6b6 100644 --- a/source/i805_ref/convolution/csi_convolve_HWC_q7_fast_nonsquare.c +++ b/source/i805_ref/convolution/csi_convolve_HWC_q7_fast_nonsquare.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,9 +22,7 @@ * * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nnfunctions.h" -#include "csi_nnsupportfunctions.h" /** * @ingroup groupNN diff --git a/source/i805_ref/convolution/csi_depthwise_separable_conv_HWC_q7.c b/source/i805_ref/convolution/csi_depthwise_separable_conv_HWC_q7.c index 2653b567..8df5e394 100644 --- a/source/i805_ref/convolution/csi_depthwise_separable_conv_HWC_q7.c +++ b/source/i805_ref/convolution/csi_depthwise_separable_conv_HWC_q7.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,9 +22,7 @@ * * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nnfunctions.h" -#include "csi_nnsupportfunctions.h" /** * @ingroup groupNN diff --git a/source/i805_ref/convolution/csi_depthwise_separable_conv_HWC_q7_nonsquare.c b/source/i805_ref/convolution/csi_depthwise_separable_conv_HWC_q7_nonsquare.c index 0d118983..4a491fa9 100644 --- a/source/i805_ref/convolution/csi_depthwise_separable_conv_HWC_q7_nonsquare.c +++ b/source/i805_ref/convolution/csi_depthwise_separable_conv_HWC_q7_nonsquare.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,9 +22,7 @@ * * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nnfunctions.h" -#include "csi_nnsupportfunctions.h" /** * @ingroup groupNN diff --git a/source/i805_ref/convolution/csi_nn_mat_mult_kernel_q7_q15.c b/source/i805_ref/convolution/csi_nn_mat_mult_kernel_q7_q15.c index 38c84386..5e2df5ec 100644 --- a/source/i805_ref/convolution/csi_nn_mat_mult_kernel_q7_q15.c +++ b/source/i805_ref/convolution/csi_nn_mat_mult_kernel_q7_q15.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -21,9 +21,7 @@ * Description: Matrix-multiplication function for convolution * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nnfunctions.h" -#include "csi_nnsupportfunctions.h" /** * @brief Matrix-multiplication function for convolution diff --git a/source/i805_ref/convolution/csi_nn_mat_mult_kernel_q7_q15_reordered.c b/source/i805_ref/convolution/csi_nn_mat_mult_kernel_q7_q15_reordered.c index fcdbfc24..38a8090c 100644 --- a/source/i805_ref/convolution/csi_nn_mat_mult_kernel_q7_q15_reordered.c +++ b/source/i805_ref/convolution/csi_nn_mat_mult_kernel_q7_q15_reordered.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -23,8 +23,6 @@ * -------------------------------------------------------------------- */ #include "csi_nnfunctions.h" -#include "csi_nnsupportfunctions.h" -#include "csi_math.h" /** * @brief Matrix-multiplication function for convolution with reordered columns diff --git a/source/i805_ref/fully-connect/csi_fully_connected_mat_q7_vec_q15.c b/source/i805_ref/fully-connect/csi_fully_connected_mat_q7_vec_q15.c index 8b71dd09..f2e9d508 100644 --- a/source/i805_ref/fully-connect/csi_fully_connected_mat_q7_vec_q15.c +++ b/source/i805_ref/fully-connect/csi_fully_connected_mat_q7_vec_q15.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,9 +22,7 @@ * * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nnfunctions.h" -#include "csi_nnsupportfunctions.h" /** * @ingroup groupNN diff --git a/source/i805_ref/fully-connect/csi_fully_connected_mat_q7_vec_q15_opt.c b/source/i805_ref/fully-connect/csi_fully_connected_mat_q7_vec_q15_opt.c index e531196e..2df9659b 100644 --- a/source/i805_ref/fully-connect/csi_fully_connected_mat_q7_vec_q15_opt.c +++ b/source/i805_ref/fully-connect/csi_fully_connected_mat_q7_vec_q15_opt.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,9 +22,7 @@ * * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nnfunctions.h" -#include "csi_nnsupportfunctions.h" /** * @ingroup groupNN diff --git a/source/i805_ref/fully-connect/csi_fully_connected_q15.c b/source/i805_ref/fully-connect/csi_fully_connected_q15.c index 43633947..64cd0d8b 100644 --- a/source/i805_ref/fully-connect/csi_fully_connected_q15.c +++ b/source/i805_ref/fully-connect/csi_fully_connected_q15.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,9 +22,7 @@ * * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nnfunctions.h" -#include "csi_nnsupportfunctions.h" /** * @ingroup groupNN diff --git a/source/i805_ref/fully-connect/csi_fully_connected_q15_opt.c b/source/i805_ref/fully-connect/csi_fully_connected_q15_opt.c index 64929513..cb0b24b6 100644 --- a/source/i805_ref/fully-connect/csi_fully_connected_q15_opt.c +++ b/source/i805_ref/fully-connect/csi_fully_connected_q15_opt.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,9 +22,7 @@ * * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nnfunctions.h" -#include "csi_nnsupportfunctions.h" /** * @ingroup groupNN diff --git a/source/i805_ref/fully-connect/csi_fully_connected_q7.c b/source/i805_ref/fully-connect/csi_fully_connected_q7.c index 14317467..60689c47 100644 --- a/source/i805_ref/fully-connect/csi_fully_connected_q7.c +++ b/source/i805_ref/fully-connect/csi_fully_connected_q7.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,9 +22,7 @@ * * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nnfunctions.h" -#include "csi_nnsupportfunctions.h" /** * @ingroup groupNN diff --git a/source/i805_ref/fully-connect/csi_fully_connected_q7_opt.c b/source/i805_ref/fully-connect/csi_fully_connected_q7_opt.c index 2ee270de..d712fe88 100644 --- a/source/i805_ref/fully-connect/csi_fully_connected_q7_opt.c +++ b/source/i805_ref/fully-connect/csi_fully_connected_q7_opt.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,9 +22,7 @@ * * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nnfunctions.h" -#include "csi_nnsupportfunctions.h" /** * @ingroup groupNN diff --git a/source/i805_ref/fullyconnected.c b/source/i805_ref/fullyconnected.c index bde99549..97d3b70b 100644 --- a/source/i805_ref/fullyconnected.c +++ b/source/i805_ref/fullyconnected.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref_i805.h" diff --git a/source/i805_ref/maxpool.c b/source/i805_ref/maxpool.c index 37896025..e3a840b2 100644 --- a/source/i805_ref/maxpool.c +++ b/source/i805_ref/maxpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref_i805.h" diff --git a/source/i805_ref/nn-support/csi_nntables.c b/source/i805_ref/nn-support/csi_nntables.c index 8125cc0a..b5a5ad64 100644 --- a/source/i805_ref/nn-support/csi_nntables.c +++ b/source/i805_ref/nn-support/csi_nntables.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_ref/nn-support/csi_q7_to_q15_no_shift.c b/source/i805_ref/nn-support/csi_q7_to_q15_no_shift.c index 39343f34..bae01450 100644 --- a/source/i805_ref/nn-support/csi_q7_to_q15_no_shift.c +++ b/source/i805_ref/nn-support/csi_q7_to_q15_no_shift.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_ref/nn-support/csi_q7_to_q15_reordered_no_shift.c b/source/i805_ref/nn-support/csi_q7_to_q15_reordered_no_shift.c index 3043c6bf..c79ddb46 100644 --- a/source/i805_ref/nn-support/csi_q7_to_q15_reordered_no_shift.c +++ b/source/i805_ref/nn-support/csi_q7_to_q15_reordered_no_shift.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/source/i805_ref/pooling/csi_avepool_q7_HWC_nonsquare.c b/source/i805_ref/pooling/csi_avepool_q7_HWC_nonsquare.c index 36591555..8187e1d7 100644 --- a/source/i805_ref/pooling/csi_avepool_q7_HWC_nonsquare.c +++ b/source/i805_ref/pooling/csi_avepool_q7_HWC_nonsquare.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,9 +16,7 @@ * limitations under the License. */ -#include "csi_math.h" #include "csi_nnfunctions.h" -#include "csi_nnsupportfunctions.h" void csi_avepool_q7_HWC_nonsquare( q7_t *Im_in, // input image diff --git a/source/i805_ref/pooling/csi_pool_q7_HWC.c b/source/i805_ref/pooling/csi_pool_q7_HWC.c index 76caad5b..c5ee5760 100644 --- a/source/i805_ref/pooling/csi_pool_q7_HWC.c +++ b/source/i805_ref/pooling/csi_pool_q7_HWC.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,9 +22,7 @@ * * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nnfunctions.h" -#include "csi_nnsupportfunctions.h" #if defined (CSI_MATH_DSP) diff --git a/source/i805_ref/relu.c b/source/i805_ref/relu.c index f98d3cab..2b874cf6 100644 --- a/source/i805_ref/relu.c +++ b/source/i805_ref/relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref_i805.h" diff --git a/source/i805_ref/setup.c b/source/i805_ref/setup.c index 6d2404e5..761281aa 100644 --- a/source/i805_ref/setup.c +++ b/source/i805_ref/setup.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref_i805.h" diff --git a/source/i805_ref/sigmoid.c b/source/i805_ref/sigmoid.c index 95b874d6..d434c8f6 100644 --- a/source/i805_ref/sigmoid.c +++ b/source/i805_ref/sigmoid.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref_i805.h" diff --git a/source/i805_ref/softmax.c b/source/i805_ref/softmax.c index 23c90f2c..951690bd 100644 --- a/source/i805_ref/softmax.c +++ b/source/i805_ref/softmax.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref_i805.h" diff --git a/source/i805_ref/softmax/csi_softmax_q15.c b/source/i805_ref/softmax/csi_softmax_q15.c index a4246a32..c5379623 100644 --- a/source/i805_ref/softmax/csi_softmax_q15.c +++ b/source/i805_ref/softmax/csi_softmax_q15.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,7 +22,6 @@ * * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nnfunctions.h" /** diff --git a/source/i805_ref/softmax/csi_softmax_q7.c b/source/i805_ref/softmax/csi_softmax_q7.c index 24c8f5fe..d9b41723 100644 --- a/source/i805_ref/softmax/csi_softmax_q7.c +++ b/source/i805_ref/softmax/csi_softmax_q7.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -22,7 +22,6 @@ * * -------------------------------------------------------------------- */ -#include "csi_math.h" #include "csi_nnfunctions.h" /** diff --git a/source/i805_ref/tanh.c b/source/i805_ref/tanh.c index ee4c308d..65f56b2b 100644 --- a/source/i805_ref/tanh.c +++ b/source/i805_ref/tanh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref_i805.h" diff --git a/source/nn2/abs.c b/source/nn2/abs.c index d2d14d93..82648425 100644 --- a/source/nn2/abs.c +++ b/source/nn2/abs.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/acos.c b/source/nn2/acos.c index e1922972..de390369 100644 --- a/source/nn2/acos.c +++ b/source/nn2/acos.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/acosh.c b/source/nn2/acosh.c index f65c9432..52b5d9d1 100644 --- a/source/nn2/acosh.c +++ b/source/nn2/acosh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/add.c b/source/nn2/add.c index 7c9e2bd5..3c5a94f1 100644 --- a/source/nn2/add.c +++ b/source/nn2/add.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/all.c b/source/nn2/all.c index 6ab93bff..f02a20de 100644 --- a/source/nn2/all.c +++ b/source/nn2/all.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/and.c b/source/nn2/and.c index 14c342d3..c4bc2399 100644 --- a/source/nn2/and.c +++ b/source/nn2/and.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/any.c b/source/nn2/any.c index 3113f392..c94247d9 100644 --- a/source/nn2/any.c +++ b/source/nn2/any.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/arange.c b/source/nn2/arange.c index 16294132..9bd56831 100644 --- a/source/nn2/arange.c +++ b/source/nn2/arange.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/argmax.c b/source/nn2/argmax.c index f679aa67..b2ae3eb5 100644 --- a/source/nn2/argmax.c +++ b/source/nn2/argmax.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/argmin.c b/source/nn2/argmin.c index 43593fe6..14deb401 100644 --- a/source/nn2/argmin.c +++ b/source/nn2/argmin.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/asin.c b/source/nn2/asin.c index f46bd1e3..2c96a2a0 100644 --- a/source/nn2/asin.c +++ b/source/nn2/asin.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/asinh.c b/source/nn2/asinh.c index 4b801092..6faa97fa 100644 --- a/source/nn2/asinh.c +++ b/source/nn2/asinh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/atan.c b/source/nn2/atan.c index c633ee96..6350def2 100644 --- a/source/nn2/atan.c +++ b/source/nn2/atan.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/atanh.c b/source/nn2/atanh.c index 813e8c2e..453874d5 100644 --- a/source/nn2/atanh.c +++ b/source/nn2/atanh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/averagepool.c b/source/nn2/averagepool.c index a9f2caf5..96a07c2c 100644 --- a/source/nn2/averagepool.c +++ b/source/nn2/averagepool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/averagepool3d.c b/source/nn2/averagepool3d.c index cd7fcff3..f07590cd 100644 --- a/source/nn2/averagepool3d.c +++ b/source/nn2/averagepool3d.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/batch_normalization.c b/source/nn2/batch_normalization.c index e75ad5f9..7de2849a 100644 --- a/source/nn2/batch_normalization.c +++ b/source/nn2/batch_normalization.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/batch_to_space.c b/source/nn2/batch_to_space.c index faa77a60..24c2a388 100644 --- a/source/nn2/batch_to_space.c +++ b/source/nn2/batch_to_space.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/batch_to_space_nd.c b/source/nn2/batch_to_space_nd.c index ac12cdf0..4b199497 100644 --- a/source/nn2/batch_to_space_nd.c +++ b/source/nn2/batch_to_space_nd.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/broadcast_to.c b/source/nn2/broadcast_to.c index db56309a..4bfc6315 100644 --- a/source/nn2/broadcast_to.c +++ b/source/nn2/broadcast_to.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/cache_conv1d.c b/source/nn2/cache_conv1d.c new file mode 100644 index 00000000..90608de5 --- /dev/null +++ b/source/nn2/cache_conv1d.c @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" + +int csi_cache_conv1d_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_conv1d_params *params) +{ + params->base.bc = + csi_bc_map(params->base.api, params->base.run_mode, CSINN_OP_CACHE_CONV1D, input->dtype); + if (params->base.bc == NULL) { + return CSINN_UNSUPPORT_DTYPE; + } + return CSINN_TRUE; +} + +int csi_cache_conv1d(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *weight, + struct csi_tensor *bias, struct cache_conv1d_params *params) +{ + CSI_DEBUG_CALL(csi_cache_conv1d_debug_info(input, output, weight, bias, params, __func__)); + if (params->base.bc != NULL) { + params->base.bc(input, output, weight, bias, params); + } else { + return CSINN_CALLBACK_UNSET; + } + return CSINN_TRUE; +} \ No newline at end of file diff --git a/source/nn2/cache_matmul.c b/source/nn2/cache_matmul.c new file mode 100644 index 00000000..7648b0d6 --- /dev/null +++ b/source/nn2/cache_matmul.c @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" + +int csi_cache_matmul_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_matmul_params *params) +{ + params->base.bc = + csi_bc_map(params->base.api, params->base.run_mode, CSINN_OP_CACHE_MATMUL, input->dtype); + if (params->base.bc == NULL) { + return CSINN_UNSUPPORT_DTYPE; + } + return CSINN_TRUE; +} + +int csi_cache_matmul(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *weight, + struct csi_tensor *bias, struct cache_matmul_params *params) +{ + CSI_DEBUG_CALL(csi_cache_matmul_debug_info(input, output, weight, bias, params, __func__)); + if (params->base.bc != NULL) { + params->base.bc(input, output, weight, bias, params); + } else { + return CSINN_CALLBACK_UNSET; + } + return CSINN_TRUE; +} \ No newline at end of file diff --git a/source/nn2/ceil.c b/source/nn2/ceil.c index 03b73e0a..bd792a37 100644 --- a/source/nn2/ceil.c +++ b/source/nn2/ceil.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/clip.c b/source/nn2/clip.c index 3b794d20..5dbe4e56 100644 --- a/source/nn2/clip.c +++ b/source/nn2/clip.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/col2im.c b/source/nn2/col2im.c index 0279fec5..a979943d 100644 --- a/source/nn2/col2im.c +++ b/source/nn2/col2im.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/concat.c b/source/nn2/concat.c index 36034c74..31bdaca4 100644 --- a/source/nn2/concat.c +++ b/source/nn2/concat.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/convolution.c b/source/nn2/convolution.c index e07bae0e..5ad95e99 100644 --- a/source/nn2/convolution.c +++ b/source/nn2/convolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" @@ -44,6 +44,8 @@ int csi_conv2d_init(struct csi_tensor *input, } else { init_func = csi_init_map(params->base.api, CSINN_OP_GROUP_CONV2D, input->dtype); } + } else { + init_func = NULL; } if (init_func != NULL) { return init_func(input, output, kernel, bias, params); @@ -90,7 +92,7 @@ int csi_conv2d(struct csi_tensor *input, if (params->conv_extra.kernel_tm != NULL && params->conv_extra.conv_mode == CSINN_WINOGRAD) { params->base.bc(input, output, params->conv_extra.kernel_tm, bias, params); csi_mem_free(params->conv_extra.kernel_tm->data); - csi_mem_free(params->conv_extra.kernel_tm); + csi_free_tensor(params->conv_extra.kernel_tm); } else { params->base.bc(input, output, kernel, bias, params); } diff --git a/source/nn2/convolution1d.c b/source/nn2/convolution1d.c new file mode 100644 index 00000000..d1de4cbb --- /dev/null +++ b/source/nn2/convolution1d.c @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" + +int csi_conv1d_init(struct csi_tensor *input, + struct csi_tensor *output, + struct csi_tensor *kernel, + struct csi_tensor *bias, + struct conv1d_params *params) +{ + params->base.bc = csi_bc_map(params->base.api, params->base.run_mode, CSINN_OP_CONV1D, input->dtype); + if (params->base.bc == NULL) + { + return CSINN_UNSUPPORT_DTYPE; + } + return CSINN_TRUE; +} + +int csi_conv1d(struct csi_tensor *input, + struct csi_tensor *output, + struct csi_tensor *kernel, + struct csi_tensor *bias, + struct conv1d_params *params) +{ + CSI_DEBUG_CALL(csi_conv1d_debug_info(input, output, kernel, bias, params, __func__)); + if (params->base.bc != NULL) + { + params->base.bc(input, output, kernel, bias, params); + } + else + { + return CSINN_CALLBACK_UNSET; + } + return CSINN_TRUE; +} \ No newline at end of file diff --git a/source/nn2/convolution3d.c b/source/nn2/convolution3d.c index 0501c7d9..09ffaa92 100644 --- a/source/nn2/convolution3d.c +++ b/source/nn2/convolution3d.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/convolution_relu.c b/source/nn2/convolution_relu.c index c922a376..672e25cd 100644 --- a/source/nn2/convolution_relu.c +++ b/source/nn2/convolution_relu.c @@ -1,6 +1,6 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -17,7 +17,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" @@ -45,6 +45,8 @@ int csi_conv2d_relu_init(struct csi_tensor *input, } else { init_func = csi_init_map(params->base.api, CSINN_OP_GROUP_CONV2D_RELU, input->dtype); } + } else { + init_func = NULL; } if (init_func != NULL) { return init_func(input, output, kernel, bias, params); diff --git a/source/nn2/convolution_relu6.c b/source/nn2/convolution_relu6.c index c759124a..d6efec99 100644 --- a/source/nn2/convolution_relu6.c +++ b/source/nn2/convolution_relu6.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/cos.c b/source/nn2/cos.c index cb22049e..3c788dd0 100644 --- a/source/nn2/cos.c +++ b/source/nn2/cos.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/cosh.c b/source/nn2/cosh.c index e62c66a2..a788393c 100644 --- a/source/nn2/cosh.c +++ b/source/nn2/cosh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/crop.c b/source/nn2/crop.c index 48f32d6c..246f19c1 100644 --- a/source/nn2/crop.c +++ b/source/nn2/crop.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/cumprod.c b/source/nn2/cumprod.c index 6b32bbc0..ed971d75 100644 --- a/source/nn2/cumprod.c +++ b/source/nn2/cumprod.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/cumsum.c b/source/nn2/cumsum.c index 58a9cf60..820522cf 100644 --- a/source/nn2/cumsum.c +++ b/source/nn2/cumsum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/data_convert.c b/source/nn2/data_convert.c new file mode 100644 index 00000000..e4043122 --- /dev/null +++ b/source/nn2/data_convert.c @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.11.x */ + +#include "csi_nn.h" + +int csi_data_convert_init(struct csi_tensor *input, struct csi_tensor *output, + struct siso_params *params) +{ + params->base.bc = + csi_bc_map(params->base.api, params->base.run_mode, CSINN_OP_DATA_CONVERT, input->dtype); + if (params->base.bc == NULL) { + return CSINN_UNSUPPORT_DTYPE; + } + return CSINN_TRUE; +} + +int csi_data_convert(struct csi_tensor *input, struct csi_tensor *output, + struct siso_params *params) +{ + CSI_DEBUG_CALL(csi_siso_debug_info(input, output, params, __func__)); + if (params->base.bc != NULL) { + params->base.bc(input, output, params); + } else { + return CSINN_CALLBACK_UNSET; + } + return CSINN_TRUE; +} \ No newline at end of file diff --git a/source/nn2/deconvolution.c b/source/nn2/deconvolution.c index b771a3b5..74c4223d 100644 --- a/source/nn2/deconvolution.c +++ b/source/nn2/deconvolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/deconvolution3d.c b/source/nn2/deconvolution3d.c index a20a2f8d..def29799 100644 --- a/source/nn2/deconvolution3d.c +++ b/source/nn2/deconvolution3d.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/depth_to_space.c b/source/nn2/depth_to_space.c index 655708b0..63c831fb 100644 --- a/source/nn2/depth_to_space.c +++ b/source/nn2/depth_to_space.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/div.c b/source/nn2/div.c index 2fbf7e8c..3ca7ab73 100644 --- a/source/nn2/div.c +++ b/source/nn2/div.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/elu.c b/source/nn2/elu.c index d892918c..51698d45 100644 --- a/source/nn2/elu.c +++ b/source/nn2/elu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/equal.c b/source/nn2/equal.c index 77cf158a..ea039284 100644 --- a/source/nn2/equal.c +++ b/source/nn2/equal.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/erf.c b/source/nn2/erf.c index cf10962c..47e9b638 100644 --- a/source/nn2/erf.c +++ b/source/nn2/erf.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/exp.c b/source/nn2/exp.c index d7f8df3d..07ba2aac 100644 --- a/source/nn2/exp.c +++ b/source/nn2/exp.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/expand_dims.c b/source/nn2/expand_dims.c index 006c0aa0..a6b17d92 100644 --- a/source/nn2/expand_dims.c +++ b/source/nn2/expand_dims.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/expm1.c b/source/nn2/expm1.c index 646d9e1d..8080ea98 100644 --- a/source/nn2/expm1.c +++ b/source/nn2/expm1.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/flatten.c b/source/nn2/flatten.c index 2b0760f3..a668049b 100644 --- a/source/nn2/flatten.c +++ b/source/nn2/flatten.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/floor.c b/source/nn2/floor.c index a20ad122..44fbaf88 100644 --- a/source/nn2/floor.c +++ b/source/nn2/floor.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/floor_divide.c b/source/nn2/floor_divide.c index 72cf1d60..4b7d01f0 100644 --- a/source/nn2/floor_divide.c +++ b/source/nn2/floor_divide.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/floor_mod.c b/source/nn2/floor_mod.c index ea650c7a..4bab78a6 100644 --- a/source/nn2/floor_mod.c +++ b/source/nn2/floor_mod.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/fsmn.c b/source/nn2/fsmn.c index 28ba34cc..46583837 100644 --- a/source/nn2/fsmn.c +++ b/source/nn2/fsmn.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/fullyconnected.c b/source/nn2/fullyconnected.c index a254a016..64c5e370 100644 --- a/source/nn2/fullyconnected.c +++ b/source/nn2/fullyconnected.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/gather.c b/source/nn2/gather.c index 00374dd6..7e62edee 100644 --- a/source/nn2/gather.c +++ b/source/nn2/gather.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/gather_nd.c b/source/nn2/gather_nd.c index 9723c6ba..b8641413 100644 --- a/source/nn2/gather_nd.c +++ b/source/nn2/gather_nd.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/global_averagepool.c b/source/nn2/global_averagepool.c index baded74a..ffbcbef9 100644 --- a/source/nn2/global_averagepool.c +++ b/source/nn2/global_averagepool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/global_maxpool.c b/source/nn2/global_maxpool.c index 85ceb2a3..53d62354 100644 --- a/source/nn2/global_maxpool.c +++ b/source/nn2/global_maxpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/greater.c b/source/nn2/greater.c index 18a9089a..cada7e57 100644 --- a/source/nn2/greater.c +++ b/source/nn2/greater.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/greater_equal.c b/source/nn2/greater_equal.c index fadc6257..c1e1a794 100644 --- a/source/nn2/greater_equal.c +++ b/source/nn2/greater_equal.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/hard_sigmoid.c b/source/nn2/hard_sigmoid.c index 6a5267ce..4fee974d 100644 --- a/source/nn2/hard_sigmoid.c +++ b/source/nn2/hard_sigmoid.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/im2col.c b/source/nn2/im2col.c index 312f2e12..d2e50aa1 100644 --- a/source/nn2/im2col.c +++ b/source/nn2/im2col.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/isnan.c b/source/nn2/isnan.c index 515a3889..7d077ef3 100644 --- a/source/nn2/isnan.c +++ b/source/nn2/isnan.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/l2_normalization.c b/source/nn2/l2_normalization.c index 9ef7be2f..9cb4d49b 100644 --- a/source/nn2/l2_normalization.c +++ b/source/nn2/l2_normalization.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/l2pool.c b/source/nn2/l2pool.c index c4995980..f0db727f 100644 --- a/source/nn2/l2pool.c +++ b/source/nn2/l2pool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/layer_norm.c b/source/nn2/layer_norm.c new file mode 100644 index 00000000..a7b2c37a --- /dev/null +++ b/source/nn2/layer_norm.c @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" + +int csi_layer_norm_init(struct csi_tensor *input, + struct csi_tensor *output, + struct csi_tensor *gamma, + struct csi_tensor *beta, + struct layer_norm_params *params) +{ + params->base.bc = csi_bc_map(params->base.api, params->base.run_mode, CSINN_OP_LAYER_NORM, input->dtype); + if (params->base.bc == NULL) + { + return CSINN_UNSUPPORT_DTYPE; + } + return CSINN_TRUE; +} + +int csi_layer_norm(struct csi_tensor *input, + struct csi_tensor *output, + struct csi_tensor *gamma, + struct csi_tensor *beta, + struct layer_norm_params *params) +{ + CSI_DEBUG_CALL(csi_layer_norm_debug_info(input, output, gamma, beta, params, __func__)); + if (params->base.bc != NULL) + { + params->base.bc(input, output, gamma, beta, params); + } + else + { + return CSINN_CALLBACK_UNSET; + } + return CSINN_TRUE; +} \ No newline at end of file diff --git a/source/nn2/leaky_relu.c b/source/nn2/leaky_relu.c index 476729f0..689d4846 100644 --- a/source/nn2/leaky_relu.c +++ b/source/nn2/leaky_relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/less.c b/source/nn2/less.c index 420cc0cf..b1ccef84 100644 --- a/source/nn2/less.c +++ b/source/nn2/less.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/less_equal.c b/source/nn2/less_equal.c index 9a82ea85..9c2f8176 100644 --- a/source/nn2/less_equal.c +++ b/source/nn2/less_equal.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/log.c b/source/nn2/log.c index 4f9fb960..1575ee69 100644 --- a/source/nn2/log.c +++ b/source/nn2/log.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/log1p.c b/source/nn2/log1p.c index 24230a90..1bdcad25 100644 --- a/source/nn2/log1p.c +++ b/source/nn2/log1p.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/log_softmax.c b/source/nn2/log_softmax.c index 0d7f9364..6d60f0be 100644 --- a/source/nn2/log_softmax.c +++ b/source/nn2/log_softmax.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/logical_and.c b/source/nn2/logical_and.c index 6f88e830..507e6023 100644 --- a/source/nn2/logical_and.c +++ b/source/nn2/logical_and.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/logical_not.c b/source/nn2/logical_not.c index 1a325e4c..907933a5 100644 --- a/source/nn2/logical_not.c +++ b/source/nn2/logical_not.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/logical_or.c b/source/nn2/logical_or.c index bd1a3c79..7c4cad30 100644 --- a/source/nn2/logical_or.c +++ b/source/nn2/logical_or.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/logical_xor.c b/source/nn2/logical_xor.c index 01796c01..5454e266 100644 --- a/source/nn2/logical_xor.c +++ b/source/nn2/logical_xor.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/lrn.c b/source/nn2/lrn.c index bc5318c7..9e8d24ba 100644 --- a/source/nn2/lrn.c +++ b/source/nn2/lrn.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/matmul.c b/source/nn2/matmul.c index 26a71e9d..a862fad2 100644 --- a/source/nn2/matmul.c +++ b/source/nn2/matmul.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/max.c b/source/nn2/max.c index 14426c2f..54211abc 100644 --- a/source/nn2/max.c +++ b/source/nn2/max.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/maximum.c b/source/nn2/maximum.c index 563480e8..4b03df6d 100644 --- a/source/nn2/maximum.c +++ b/source/nn2/maximum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/maxpool.c b/source/nn2/maxpool.c index ce3e7a6b..1edb1371 100644 --- a/source/nn2/maxpool.c +++ b/source/nn2/maxpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/maxpool2d_locat.c b/source/nn2/maxpool2d_locat.c index 1555eb10..2cdaaf12 100644 --- a/source/nn2/maxpool2d_locat.c +++ b/source/nn2/maxpool2d_locat.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/maxpool3d.c b/source/nn2/maxpool3d.c index 07d78746..0070f756 100644 --- a/source/nn2/maxpool3d.c +++ b/source/nn2/maxpool3d.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/mean.c b/source/nn2/mean.c index ebc86fa7..1022c686 100644 --- a/source/nn2/mean.c +++ b/source/nn2/mean.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/min.c b/source/nn2/min.c index db4ffdc7..118028c7 100644 --- a/source/nn2/min.c +++ b/source/nn2/min.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/minimum.c b/source/nn2/minimum.c index 03c03e3a..f648fe5e 100644 --- a/source/nn2/minimum.c +++ b/source/nn2/minimum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/mod.c b/source/nn2/mod.c index 45bb3c84..91dea742 100644 --- a/source/nn2/mod.c +++ b/source/nn2/mod.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/mul.c b/source/nn2/mul.c index 190cbe17..89c50cb2 100644 --- a/source/nn2/mul.c +++ b/source/nn2/mul.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/ndarray_size.c b/source/nn2/ndarray_size.c index 375b14c5..2b6c6f48 100644 --- a/source/nn2/ndarray_size.c +++ b/source/nn2/ndarray_size.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/negative.c b/source/nn2/negative.c index e5808a73..daacccc1 100644 --- a/source/nn2/negative.c +++ b/source/nn2/negative.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/node.c b/source/nn2/node.c index e35ceeb0..5819ed5b 100644 --- a/source/nn2/node.c +++ b/source/nn2/node.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,8 +16,9 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ +#include "csi_nn.h" #include "csi_node.h" struct csi_node *csi_node_alloc(int node_type, char *name, int in_num, int out_num, void *data) @@ -29,8 +30,13 @@ struct csi_node *csi_node_alloc(int node_type, char *name, int in_num, int out_n ret->data = data; ret->in_num = in_num; ret->out_num = out_num; - ret->in = csi_mem_alloc(in_num * sizeof(struct csi_node*)); - ret->out = csi_mem_alloc(out_num * sizeof(struct csi_node*)); + if (in_num != 0) { + ret->in = csi_mem_alloc(in_num * sizeof(struct csi_node *)); + } + if (out_num != 0) { + ret->out = csi_mem_alloc(out_num * sizeof(struct csi_node *)); + } + ret->subgraph_idx = -1; return ret; } @@ -62,6 +68,10 @@ int csi_node_add_in(struct csi_node *node, struct csi_node *in, int index) int csi_node_add_out(struct csi_node *node, struct csi_node *out, int index) { node->out[index] = out; + + if (out->type == CSINN_TENSOR && out->in_num == 1) { + out->in[0] = node; + } return CSINN_TRUE; } @@ -75,6 +85,19 @@ int csi_node_get_out_number(struct csi_node *node) return node->out_num; } +int csi_node_get_non_const_in_number(struct csi_node *node) +{ + int in_num = csi_node_get_in_number(node); + int const_in_num = 0; + for (int i = 0; i < in_num; i++) { + struct csi_tensor *data = node->in[i]->data; + if (data->is_const) { + const_in_num ++; + } + } + return (in_num - const_in_num); +} + struct csi_node *csi_node_get_in(struct csi_node *node, int index) { return node->in[index]; @@ -84,3 +107,27 @@ struct csi_node *csi_node_get_out(struct csi_node *node, int index) { return node->out[index]; } + +int csi_node_restrict_map_insert(int value, struct csi_node *node) +{ + node->restricted_map = + csi_mem_realloc(node->restricted_map, (node->restricted_map_num + 1) * sizeof(int)); + node->restricted_map[node->restricted_map_num] = value; + node->restricted_map_num++; + return CSINN_TRUE; +} + +int csi_node_find(struct csi_node **list, int len, struct csi_node *node) +{ + int res = -1; + if (!list || len < 1) { + return res; + } + for (int i = 0; i < len; i++) { + if (list[i] == node) { + res = i; + break; + } + } + return res; +} diff --git a/source/nn2/non_max_suppression.c b/source/nn2/non_max_suppression.c index 323b1e91..19e2ffc4 100644 --- a/source/nn2/non_max_suppression.c +++ b/source/nn2/non_max_suppression.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/not.c b/source/nn2/not.c index 672d7ece..57fca4b6 100644 --- a/source/nn2/not.c +++ b/source/nn2/not.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/not_equal.c b/source/nn2/not_equal.c index 5b90c731..19898e64 100644 --- a/source/nn2/not_equal.c +++ b/source/nn2/not_equal.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/one_hot.c b/source/nn2/one_hot.c index f977cd48..82b4ca23 100644 --- a/source/nn2/one_hot.c +++ b/source/nn2/one_hot.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/or.c b/source/nn2/or.c index 8f283a6e..17f152d2 100644 --- a/source/nn2/or.c +++ b/source/nn2/or.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/pad.c b/source/nn2/pad.c index 9853b862..22608c3a 100644 --- a/source/nn2/pad.c +++ b/source/nn2/pad.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/power.c b/source/nn2/power.c index 12133c72..5e3ea54c 100644 --- a/source/nn2/power.c +++ b/source/nn2/power.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/prelu.c b/source/nn2/prelu.c index 0ab88188..23e03295 100644 --- a/source/nn2/prelu.c +++ b/source/nn2/prelu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/prod.c b/source/nn2/prod.c index 18540a54..122ab882 100644 --- a/source/nn2/prod.c +++ b/source/nn2/prod.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/proposal.c b/source/nn2/proposal.c index 87445df3..fe16b026 100644 --- a/source/nn2/proposal.c +++ b/source/nn2/proposal.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/psroipooling.c b/source/nn2/psroipooling.c index de7ed7f3..abd81074 100644 --- a/source/nn2/psroipooling.c +++ b/source/nn2/psroipooling.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/reduce_logsumexp.c b/source/nn2/reduce_logsumexp.c index 7b2673b3..8208f911 100644 --- a/source/nn2/reduce_logsumexp.c +++ b/source/nn2/reduce_logsumexp.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/reduce_max.c b/source/nn2/reduce_max.c index cd6e187c..4beda5a8 100644 --- a/source/nn2/reduce_max.c +++ b/source/nn2/reduce_max.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/reduce_mean.c b/source/nn2/reduce_mean.c index ebe7be7a..71d30fb9 100644 --- a/source/nn2/reduce_mean.c +++ b/source/nn2/reduce_mean.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/reduce_min.c b/source/nn2/reduce_min.c index 8de9b8fa..a7587be0 100644 --- a/source/nn2/reduce_min.c +++ b/source/nn2/reduce_min.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/reduce_prod.c b/source/nn2/reduce_prod.c index d2711b5f..11f2e241 100644 --- a/source/nn2/reduce_prod.c +++ b/source/nn2/reduce_prod.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/reduce_sum.c b/source/nn2/reduce_sum.c index ff4bb1ba..0be101b9 100644 --- a/source/nn2/reduce_sum.c +++ b/source/nn2/reduce_sum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/relu.c b/source/nn2/relu.c index 9db9a068..4ecbe064 100644 --- a/source/nn2/relu.c +++ b/source/nn2/relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/relu1.c b/source/nn2/relu1.c index ee2014f3..60616b97 100644 --- a/source/nn2/relu1.c +++ b/source/nn2/relu1.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/relu6.c b/source/nn2/relu6.c index 8fed5d4d..ed04a7b2 100644 --- a/source/nn2/relu6.c +++ b/source/nn2/relu6.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/relun.c b/source/nn2/relun.c index 00ab9435..df4191e4 100644 --- a/source/nn2/relun.c +++ b/source/nn2/relun.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/reorg.c b/source/nn2/reorg.c index e47d18ab..34c10ed1 100644 --- a/source/nn2/reorg.c +++ b/source/nn2/reorg.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/reshape.c b/source/nn2/reshape.c index 2f5b8e1f..35135f66 100644 --- a/source/nn2/reshape.c +++ b/source/nn2/reshape.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/resize.c b/source/nn2/resize.c index fc2c22c2..d9ae5ab7 100644 --- a/source/nn2/resize.c +++ b/source/nn2/resize.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/reverse.c b/source/nn2/reverse.c index 03f7fda4..7663f2cf 100644 --- a/source/nn2/reverse.c +++ b/source/nn2/reverse.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/roialign.c b/source/nn2/roialign.c index 9d14c0e4..b5d0694d 100644 --- a/source/nn2/roialign.c +++ b/source/nn2/roialign.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/roipool.c b/source/nn2/roipool.c index 7ee9b5a0..6e36b70a 100644 --- a/source/nn2/roipool.c +++ b/source/nn2/roipool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/round.c b/source/nn2/round.c index ddb3800b..c01c7f84 100644 --- a/source/nn2/round.c +++ b/source/nn2/round.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/rsqrt.c b/source/nn2/rsqrt.c index ef57d759..3aa35526 100644 --- a/source/nn2/rsqrt.c +++ b/source/nn2/rsqrt.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/scatter.c b/source/nn2/scatter.c index 5af802dc..2eab72f3 100644 --- a/source/nn2/scatter.c +++ b/source/nn2/scatter.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/segment_max.c b/source/nn2/segment_max.c index 8506e0fa..46091951 100644 --- a/source/nn2/segment_max.c +++ b/source/nn2/segment_max.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/segment_mean.c b/source/nn2/segment_mean.c index 96dc68e5..2f2262d5 100644 --- a/source/nn2/segment_mean.c +++ b/source/nn2/segment_mean.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/segment_min.c b/source/nn2/segment_min.c index 3f6594c0..9acc72cc 100644 --- a/source/nn2/segment_min.c +++ b/source/nn2/segment_min.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/segment_prod.c b/source/nn2/segment_prod.c index 376c56c5..6453e7e4 100644 --- a/source/nn2/segment_prod.c +++ b/source/nn2/segment_prod.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/segment_sum.c b/source/nn2/segment_sum.c index 893ba804..84a08bf9 100644 --- a/source/nn2/segment_sum.c +++ b/source/nn2/segment_sum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/select.c b/source/nn2/select.c index 868681fc..66f72318 100644 --- a/source/nn2/select.c +++ b/source/nn2/select.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/sequence_mask.c b/source/nn2/sequence_mask.c index 3c1821de..f4d4e691 100644 --- a/source/nn2/sequence_mask.c +++ b/source/nn2/sequence_mask.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/setup.c b/source/nn2/setup.c index 5755f416..399dd05b 100644 --- a/source/nn2/setup.c +++ b/source/nn2/setup.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,20 +16,14 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" #include "csi_utils.h" -struct csi_session *csi_alloc_session() -{ - return csi_mem_alloc(sizeof(struct csi_session)); -} +struct csi_session *csi_alloc_session() { return csi_mem_alloc(sizeof(struct csi_session)); } -void csi_free_session(struct csi_session *sess) -{ - csi_mem_free(sess); -} +void csi_free_session(struct csi_session *sess) { csi_mem_free(sess); } void *csi_bc_map_ref(int op, int dtype); void *csi_bc_map_gref(int op, int dtype); @@ -41,6 +35,9 @@ void *csi_bc_map_ch8601(int op, int dtype); void *csi_bc_map_i805(int op, int dtype); void *csi_bc_map_e804(int op, int dtype); void *csi_bc_map_ref_i805(int op, int dtype); +void *csi_bc_map_c908(int op, int dtype); +void *csi_bc_map_asp(int op, int dtype); +void *csi_bc_map_rvv(int op, int dtype); void *csi_bc_func_table[CSINN_API_SIZE] = { #ifdef CSI_BUILD_REF csi_bc_map_ref, @@ -93,13 +90,28 @@ void *csi_bc_func_table[CSINN_API_SIZE] = { csi_bc_map_ref_i805, #else NULL, +#endif +#ifdef CSI_BUILD_C908 + csi_bc_map_c908, +#else + NULL, /* c908 */ #endif NULL, /* tvmgen */ +#ifdef CSI_BUILD_ASP + csi_bc_map_asp, +#else + NULL, /* asp */ +#endif +#ifdef CSI_BUILD_RVV + csi_bc_map_rvv, +#else + NULL, /* rvv */ +#endif }; void *csi_bc_map(int api, int rmode, int op, int dtype) { - void* (*func)(); + void *(*func)(); if (rmode == CSINN_RM_CPU_GRAPH) { func = csi_bc_func_table[CSINN_GREF]; } else { @@ -113,9 +125,11 @@ void *csi_init_map_ref(int op, int dtype); void *csi_init_map_i805(int op, int dtype); void *csi_init_map_e804(int op, int dtype); void *csi_init_map_ref_i805(int op, int dtype); +void *csi_init_map_c908(int op, int dtype); +void *csi_init_map_rvv(int op, int dtype); void *csi_init_func_table[CSINN_API_SIZE] = { #ifdef CSI_BUILD_REF - csi_init_map_ref,/* c code */ + csi_init_map_ref, /* c code */ #else NULL, /* c code */ #endif @@ -145,13 +159,24 @@ void *csi_init_func_table[CSINN_API_SIZE] = { csi_init_map_ref_i805, #else NULL, +#endif +#ifdef CSI_BUILD_C908 + csi_init_map_c908, +#else + NULL, /* c908 */ #endif NULL, /* tvmgen */ + NULL, /* asp */ +#ifdef CSI_BUILD_RVV + csi_init_map_rvv, +#else + NULL, /* rvv */ +#endif }; void *csi_init_map(int api, int op, int dtype) { - void* (*func)() = csi_init_func_table[api]; + void *(*func)() = csi_init_func_table[api]; if (func != NULL) { return func(op, dtype); } else { @@ -163,7 +188,7 @@ void csi_session_init(struct csi_session *sess) { csi_debug_set_level(sess->debug_level); - void* (*func)(); + void *(*func)(); func = csi_bc_map(sess->base_api, sess->base_run_mode, CSINN_SESSION_INIT, sess->base_dtype); if (func != NULL) { func(sess); @@ -172,7 +197,7 @@ void csi_session_init(struct csi_session *sess) void csi_session_deinit(struct csi_session *sess) { - void* (*func)(); + void *(*func)(); func = csi_bc_map(sess->base_api, sess->base_run_mode, CSINN_SESSION_DEINIT, sess->base_dtype); if (func != NULL) { func(sess); @@ -184,7 +209,8 @@ void csi_set_output_number(int number, struct csi_session *sess) sess->output_num = number; sess->output = csi_mem_alloc(sess->output_num * sizeof(struct csi_tensor *)); void (*func)(); - func = csi_bc_map(sess->base_api, sess->base_run_mode, CSINN_SET_OUTPUT_NUMBER, sess->base_dtype); + func = + csi_bc_map(sess->base_api, sess->base_run_mode, CSINN_SET_OUTPUT_NUMBER, sess->base_dtype); if (func != NULL) { func(number, sess); } @@ -195,7 +221,8 @@ void csi_set_input_number(int number, struct csi_session *sess) sess->input_num = number; sess->input = csi_mem_alloc(sess->input_num * sizeof(struct csi_tensor *)); void (*func)(); - func = csi_bc_map(sess->base_api, sess->base_run_mode, CSINN_SET_INPUT_NUMBER, sess->base_dtype); + func = + csi_bc_map(sess->base_api, sess->base_run_mode, CSINN_SET_INPUT_NUMBER, sess->base_dtype); if (func != NULL) { func(number, sess); } @@ -204,7 +231,8 @@ void csi_set_input_number(int number, struct csi_session *sess) int csi_get_output_number(struct csi_session *sess) { int (*func)(); - func = csi_bc_map(sess->base_api, sess->base_run_mode, CSINN_GET_OUTPUT_NUMBER, sess->base_dtype); + func = + csi_bc_map(sess->base_api, sess->base_run_mode, CSINN_GET_OUTPUT_NUMBER, sess->base_dtype); if (func != NULL) { return func(sess); } else { @@ -215,7 +243,8 @@ int csi_get_output_number(struct csi_session *sess) int csi_get_input_number(struct csi_session *sess) { int (*func)(); - func = csi_bc_map(sess->base_api, sess->base_run_mode, CSINN_GET_INPUT_NUMBER, sess->base_dtype); + func = + csi_bc_map(sess->base_api, sess->base_run_mode, CSINN_GET_INPUT_NUMBER, sess->base_dtype); if (func != NULL) { return func(sess); } else { @@ -273,7 +302,16 @@ int csi_update_input(int index, struct csi_tensor *input, struct csi_session *se int (*func)(); func = csi_bc_map(sess->base_api, sess->base_run_mode, CSINN_UPDATE_INPUT, sess->base_dtype); if (func != NULL) { - return func(index, input, sess); + int ret = CSINN_FALSE; + if (sess->profiler_level == CSI_PROFILER_LEVEL_TIMER) { + uint64_t start = csi_get_timespec(); + ret = func(index, input, sess); + uint64_t end = csi_get_timespec(); + csi_print_time_interval(start, end, __func__); + } else { + ret = func(index, input, sess); + } + return ret; } return CSINN_TRUE; } @@ -294,7 +332,16 @@ int csi_session_setup(struct csi_session *sess) int (*func)(); func = csi_bc_map(sess->base_api, sess->base_run_mode, CSINN_SESSION_SETUP, sess->base_dtype); if (func != NULL) { - return func(sess); + int ret = CSINN_FALSE; + if (sess->profiler_level == CSI_PROFILER_LEVEL_TIMER) { + uint64_t start = csi_get_timespec(); + ret = func(sess); + uint64_t end = csi_get_timespec(); + csi_print_time_interval(start, end, __func__); + } else { + ret = func(sess); + } + return ret; } return CSINN_FALSE; } @@ -304,7 +351,16 @@ int csi_session_run(struct csi_session *sess) int (*func)(); func = csi_bc_map(sess->base_api, sess->base_run_mode, CSINN_SESSION_RUN, sess->base_dtype); if (func != NULL) { - return func(sess); + int ret = CSINN_FALSE; + if (sess->profiler_level == CSI_PROFILER_LEVEL_TIMER) { + uint64_t start = csi_get_timespec(); + ret = func(sess); + uint64_t end = csi_get_timespec(); + csi_print_time_interval(start, end, __func__); + } else { + ret = func(sess); + } + return ret; } return CSINN_FALSE; } @@ -319,12 +375,45 @@ int csi_set_tensor_entry(struct csi_tensor *t, struct csi_session *sess) return CSINN_FALSE; } +struct csi_bc_op_list *csi_bc_list_end(struct csi_bc_op_list *list) +{ + struct csi_bc_op_list *l = list; + while (l->next) { + l = l->next; + } + return l; +} + +void *csi_bc_list_match(struct csi_bc_op_list *list, enum csinn_dtype_enum dtype, + enum csinn_op_enum op_name) +{ + void *ret = NULL; + struct csi_bc_op_list *l = list; + while (l) { + if (l->dtype == dtype && l->op_name == op_name) { + ret = l->bc; + break; + } + l = l->next; + } + return ret; +} + int csi_load_binary_model(char *path, struct csi_session *sess) { int (*func)(); func = csi_bc_map(sess->base_api, sess->base_run_mode, CSINN_LOAD_BG, sess->base_dtype); if (func != NULL) { - return func(path, sess); + int ret = CSINN_FALSE; + if (sess->profiler_level == CSI_PROFILER_LEVEL_TIMER) { + uint64_t start = csi_get_timespec(); + ret = func(path, sess); + uint64_t end = csi_get_timespec(); + csi_print_time_interval(start, end, __func__); + } else { + ret = func(path, sess); + } + return ret; } return CSINN_FALSE; } diff --git a/source/nn2/shape.c b/source/nn2/shape.c index 75cea450..b5f5ceaf 100644 --- a/source/nn2/shape.c +++ b/source/nn2/shape.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/shuffle_channel.c b/source/nn2/shuffle_channel.c index 9915e830..1a624af1 100644 --- a/source/nn2/shuffle_channel.c +++ b/source/nn2/shuffle_channel.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/sigmoid.c b/source/nn2/sigmoid.c index f7b07619..0f482b89 100644 --- a/source/nn2/sigmoid.c +++ b/source/nn2/sigmoid.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/sign.c b/source/nn2/sign.c index e2301ff0..c8749bf3 100644 --- a/source/nn2/sign.c +++ b/source/nn2/sign.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/sin.c b/source/nn2/sin.c index c6c3a4a5..29a19ae0 100644 --- a/source/nn2/sin.c +++ b/source/nn2/sin.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/sinh.c b/source/nn2/sinh.c index ec347c35..2a4dc620 100644 --- a/source/nn2/sinh.c +++ b/source/nn2/sinh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/slice.c b/source/nn2/slice.c index 06b148aa..fb75a496 100644 --- a/source/nn2/slice.c +++ b/source/nn2/slice.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/softmax.c b/source/nn2/softmax.c index 275be0a1..684b589f 100644 --- a/source/nn2/softmax.c +++ b/source/nn2/softmax.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/softplus.c b/source/nn2/softplus.c index a417ba18..0d979527 100644 --- a/source/nn2/softplus.c +++ b/source/nn2/softplus.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/softrelu.c b/source/nn2/softrelu.c index 53a96eaa..b34a8b0b 100644 --- a/source/nn2/softrelu.c +++ b/source/nn2/softrelu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/softsign.c b/source/nn2/softsign.c index 7eabdd70..537098c0 100644 --- a/source/nn2/softsign.c +++ b/source/nn2/softsign.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/space_to_batch.c b/source/nn2/space_to_batch.c index d0d2968d..e9f791f7 100644 --- a/source/nn2/space_to_batch.c +++ b/source/nn2/space_to_batch.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/space_to_batch_nd.c b/source/nn2/space_to_batch_nd.c index 95e90696..ea23b9d8 100644 --- a/source/nn2/space_to_batch_nd.c +++ b/source/nn2/space_to_batch_nd.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/space_to_depth.c b/source/nn2/space_to_depth.c index 78ec91f9..a8725cc0 100644 --- a/source/nn2/space_to_depth.c +++ b/source/nn2/space_to_depth.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/split.c b/source/nn2/split.c index 9849fcaf..20f8eb00 100644 --- a/source/nn2/split.c +++ b/source/nn2/split.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/sqrt.c b/source/nn2/sqrt.c index da64f917..c7916298 100644 --- a/source/nn2/sqrt.c +++ b/source/nn2/sqrt.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/square.c b/source/nn2/square.c index 5c4361a1..eecfb4e2 100644 --- a/source/nn2/square.c +++ b/source/nn2/square.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/squeeze.c b/source/nn2/squeeze.c index 2beda4bb..1271f091 100644 --- a/source/nn2/squeeze.c +++ b/source/nn2/squeeze.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/stack.c b/source/nn2/stack.c index 3b4ae120..fd4e588e 100644 --- a/source/nn2/stack.c +++ b/source/nn2/stack.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/strided_slice.c b/source/nn2/strided_slice.c index 511a9567..53a20a22 100644 --- a/source/nn2/strided_slice.c +++ b/source/nn2/strided_slice.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/sub.c b/source/nn2/sub.c index ca87f6ef..e7a81e55 100644 --- a/source/nn2/sub.c +++ b/source/nn2/sub.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/sum.c b/source/nn2/sum.c index 1bff088b..c7d27bc8 100644 --- a/source/nn2/sum.c +++ b/source/nn2/sum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/tan.c b/source/nn2/tan.c index 5e32d2ca..2a5fafb3 100644 --- a/source/nn2/tan.c +++ b/source/nn2/tan.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/tanh.c b/source/nn2/tanh.c index d314f1d3..d2267479 100644 --- a/source/nn2/tanh.c +++ b/source/nn2/tanh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/threshold_relu.c b/source/nn2/threshold_relu.c index 23ec8ba4..534162ea 100644 --- a/source/nn2/threshold_relu.c +++ b/source/nn2/threshold_relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/tile.c b/source/nn2/tile.c index 5855908d..fcf52fdc 100644 --- a/source/nn2/tile.c +++ b/source/nn2/tile.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/topk.c b/source/nn2/topk.c index 768c1b21..f932f8f3 100644 --- a/source/nn2/topk.c +++ b/source/nn2/topk.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/transpose.c b/source/nn2/transpose.c index 2bf01d1c..0d1cddb7 100644 --- a/source/nn2/transpose.c +++ b/source/nn2/transpose.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/trunc.c b/source/nn2/trunc.c index ca1cf4bf..0b3e8a6c 100644 --- a/source/nn2/trunc.c +++ b/source/nn2/trunc.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/unpooling.c b/source/nn2/unpooling.c index dd6a5458..49effac4 100644 --- a/source/nn2/unpooling.c +++ b/source/nn2/unpooling.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.9.x */ #include "csi_nn.h" diff --git a/source/nn2/unstack.c b/source/nn2/unstack.c index 8a244678..67b8ce79 100644 --- a/source/nn2/unstack.c +++ b/source/nn2/unstack.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/utils.c b/source/nn2/utils.c index 3fadd288..479c1d09 100644 --- a/source/nn2/utils.c +++ b/source/nn2/utils.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,19 +16,21 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ + +#include #include "csi_nn.h" #include "csi_ref.h" -#include -/* https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/kernels/internal/quantization_util.cc */ -static int64_t integer_from_exp(double input, int* shift) +/* https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/kernels/internal/quantization_util.cc + */ +static int64_t integer_from_exp(double input, int *shift) { uint64_t kSignMask = 0x8000000000000000LL; uint64_t kExponentMask = 0x7ff0000000000000LL; - int32_t kExponentShift = 52; - int32_t kExponentBias = 1023; + int32_t kExponentShift = 52; + int32_t kExponentBias = 1023; uint32_t kExponentIsBadNum = 0x7ff; uint64_t kFractionMask = 0x000fffffffc00000LL; uint32_t kFractionShift = 22; @@ -59,10 +61,10 @@ static int64_t integer_from_exp(double input, int* shift) if (exponent_part == kExponentIsBadNum) { *shift = 0x7fffffff; if (u & kFractionMask) { - // NaN, so just return zero (with the exponent set to INT_MAX). + // NaN, so just return zero (with the exponent set to INT_MAX). return 0; } else { - // Infinity, so return +/- INT_MAX. + // Infinity, so return +/- INT_MAX. if (u & kSignMask) { return 0x8000000000000000; } else { @@ -98,7 +100,7 @@ static int64_t integer_from_exp(double input, int* shift) return fraction; } -void csi_quantize_multiplier(double double_multiplier, int32_t* quantized_multiplier, int* shift) +void csi_quantize_multiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift) { if (double_multiplier == 0.) { *quantized_multiplier = 0; @@ -203,15 +205,15 @@ void csi_show_top5(struct csi_tensor *output, struct csi_session *sess) size *= output->dim[i]; } -// #ifdef CSI_DEBUG + // #ifdef CSI_DEBUG csi_statistical_mean_std(output->data, size); -// #endif + // #endif csi_get_top5(output->data, size, prob, class); printf(" ============ top5: ===========\n"); - size = size > 5? 5:size; - for(i = 0; i< size; i++) { + size = size > 5 ? 5 : size; + for (i = 0; i < size; i++) { printf("%3d: %8.6f\n", class[i], prob[i]); } } @@ -231,23 +233,27 @@ int csi_tensor_size(struct csi_tensor *tensor) int csi_tensor_byte_size(struct csi_tensor *tensor) { int size = csi_tensor_size(tensor); - switch (tensor->dtype) - { - case CSINN_DTYPE_INT16: - case CSINN_DTYPE_UINT16: - case CSINN_DTYPE_FLOAT16: - size *= 2; - break; - case CSINN_DTYPE_INT32: - case CSINN_DTYPE_UINT32: - case CSINN_DTYPE_FLOAT32: - size *= 4; - break; - case CSINN_DTYPE_FLOAT64: - size *= 8; - break; - default: - break; + switch (tensor->dtype) { + case CSINN_DTYPE_INT4: + /* FIXME: round to byte */ + size = (size + 1) / 2; + break; + case CSINN_DTYPE_INT16: + case CSINN_DTYPE_UINT16: + case CSINN_DTYPE_FLOAT16: + case CSINN_DTYPE_BFLOAT16: + size *= 2; + break; + case CSINN_DTYPE_INT32: + case CSINN_DTYPE_UINT32: + case CSINN_DTYPE_FLOAT32: + size *= 4; + break; + case CSINN_DTYPE_FLOAT64: + size *= 8; + break; + default: + break; } return size; } @@ -268,7 +274,7 @@ struct csi_tensor *csi_alloc_tensor(struct csi_session *session) void csi_realloc_quant_info(struct csi_tensor *tensor, int quant_info_num) { tensor->quant_channel = quant_info_num; - tensor->qinfo = realloc(tensor->qinfo, quant_info_num * sizeof(struct csi_quant_info)); + tensor->qinfo = csi_mem_realloc(tensor->qinfo, quant_info_num * sizeof(struct csi_quant_info)); } void csi_tensor_copy(struct csi_tensor *dest, struct csi_tensor *src) @@ -289,7 +295,7 @@ void csi_tensor_copy(struct csi_tensor *dest, struct csi_tensor *src) void csi_free_tensor(struct csi_tensor *tensor) { - if (tensor->qinfo != NULL){ + if (tensor->qinfo != NULL) { csi_mem_free(tensor->qinfo); } csi_mem_free(tensor); @@ -307,9 +313,11 @@ void *csi_alloc_params(int params_size, struct csi_session *session) return params; } -void csi_free_params(void *params) +void csi_free_params(void *params) { csi_mem_free(params); } + +static float csi_int4_to_float_base(int8_t i, struct csi_tensor *t, int index) { - csi_mem_free(params); + return ((float)i - t->qinfo[index].zero_point) * t->qinfo[index].scale; } static float csi_uint8_to_float_base(uint8_t i, struct csi_tensor *t, int index) @@ -329,7 +337,19 @@ static float csi_int16_to_float_base(int16_t i, struct csi_tensor *t, int index) static float csi_int32_to_float_base(int32_t i, struct csi_tensor *t, int index) { - return (float)i * t->qinfo[index].scale; + return (float)i * t->qinfo[index].scale; +} + +static int8_t csi_float_to_int4_base(float i, struct csi_tensor *t, int index) +{ + float ret = round(i / t->qinfo[index].scale) + t->qinfo[index].zero_point; + if (ret > 7) { + return 7; + } else if (ret < -8) { + return -8; + } else { + return ret; + } } static uint8_t csi_float_to_uint8_base(float i, struct csi_tensor *t, int index) @@ -368,8 +388,150 @@ static int16_t csi_float_to_int16_base(float i, struct csi_tensor *t, int index) } } -static void csi_nchw_uint8_to_float(struct csi_tensor *dest, struct csi_tensor *src, - int n, int inner_size) +/* Only for CSINN_LAYOUT_OHWI, HWI's size align */ +static void csi_axis0_int4_to_float_alignHWI(struct csi_tensor *dest, struct csi_tensor *src, + int inner_size) +{ + int8_t *src_data = src->data; + float *dest_data = dest->data; + int32_t q_size = src->quant_channel; + for (int i = 0; i < q_size; i++) { + for (int j = 0; j < inner_size; j++) { + int index = i * inner_size + j; + int in_index = i * ((inner_size + 1) / 2) + j / 2; + float ret = 0; + int8_t src_tmp = 0; + /* int4 little endian */ + if (j % 2) { + src_tmp = src_data[in_index] & 0xf0; + ret = csi_int4_to_float_base(src_tmp >> 4, src, i); + } else { + src_tmp = (src_data[in_index] & 0xf) << 4; + ret = csi_int4_to_float_base(src_tmp >> 4, src, i); + } + dest_data[index] = ret; + } + } +} + +/* Only for CSINN_LAYOUT_OHWI, HWI's size align */ +static void csi_axis0_float_to_int4_alignHWI(struct csi_tensor *dest, struct csi_tensor *src, + int inner_size) +{ + float *src_data = src->data; + int8_t *dest_data = dest->data; + int32_t q_size = dest->quant_channel; + for (int i = 0; i < q_size; i++) { + for (int j = 0; j < inner_size; j++) { + int index = i * inner_size + j; + int input_val = csi_float_to_int4_base(src_data[index], dest, i); + int out_index = i * ((inner_size + 1) / 2) + j / 2; + /* int4 little endian */ + if (j % 2) { + dest_data[out_index] = (dest_data[out_index] & 0xf) | (input_val << 4); + } else { + /* init as 0 at first access half of byte */ + dest_data[out_index] = 0; + dest_data[out_index] = (dest_data[out_index] & 0xf0) | (input_val & 0xf); + } + } + } +} + +static void csi_nchw_int4_to_float(struct csi_tensor *dest, struct csi_tensor *src, int n, + int inner_size) +{ + int8_t *src_data = src->data; + float *dest_data = dest->data; + int32_t q_size = src->quant_channel; + for (int i = 0; i < q_size; i++) { + for (int j = 0; j < inner_size; j++) { + int index = n * q_size * inner_size + i * inner_size + j; + int in_index = index / 2; + float ret = 0; + int8_t src_tmp = 0; + /* int4 little endian */ + if (index % 2) { + src_tmp = src_data[in_index] & 0xf0; + ret = csi_int4_to_float_base(src_tmp >> 4, src, i); + } else { + src_tmp = (src_data[in_index] & 0xf) << 4; + ret = csi_int4_to_float_base(src_tmp >> 4, src, i); + } + dest_data[index] = ret; + } + } +} + +static void csi_nhwc_int4_to_float(struct csi_tensor *dest, struct csi_tensor *src, int n, + int inner_size) +{ + int8_t *src_data = src->data; + float *dest_data = dest->data; + int32_t q_size = src->quant_channel; + for (int j = 0; j < inner_size; j++) { + for (int i = 0; i < q_size; i++) { + int index = n * q_size * inner_size + j * q_size + i; + int in_index = index / 2; + float ret = 0; + int8_t src_tmp = 0; + /* int4 little endian */ + if (index % 2) { + src_tmp = src_data[in_index] & 0xf0; + ret = csi_int4_to_float_base(src_tmp >> 4, src, i); + } else { + src_tmp = (src_data[in_index] & 0xf) << 4; + ret = csi_int4_to_float_base(src_tmp >> 4, src, i); + } + dest_data[index] = ret; + } + } +} + +static void csi_nchw_float_to_int4(struct csi_tensor *dest, struct csi_tensor *src, int n, + int inner_size) +{ + float *src_data = src->data; + int8_t *dest_data = dest->data; + int32_t q_size = dest->quant_channel; + for (int i = 0; i < q_size; i++) { + for (int j = 0; j < inner_size; j++) { + int index = n * q_size * inner_size + i * inner_size + j; + int input_val = csi_float_to_int4_base(src_data[index], dest, i); + int out_index = index / 2; + /* int4 little endian */ + if (index % 2) { + dest_data[out_index] = (dest_data[out_index] & 0xf) | (input_val << 4); + } else { + dest_data[out_index] = (dest_data[out_index] & 0xf0) | (input_val & 0xf); + } + } + } +} + +static void csi_nhwc_float_to_int4(struct csi_tensor *dest, struct csi_tensor *src, int n, + int inner_size) +{ + float *src_data = src->data; + int8_t *dest_data = dest->data; + int32_t q_size = dest->quant_channel; + for (int j = 0; j < inner_size; j++) { + for (int i = 0; i < q_size; i++) { + int index = n * q_size * inner_size + j * q_size + i; + int input_val = csi_float_to_int4_base(src_data[index], dest, i); + int out_index = index / 2; + /* int4 little endian */ + if (index % 2) { + dest_data[out_index] = (dest_data[out_index] & 0xf) | (input_val << 4); + } else { + dest_data[out_index] = (dest_data[out_index] & 0xf0) | input_val; + } + } + } +} + +static void csi_nchw_uint8_to_float(struct csi_tensor *dest, struct csi_tensor *src, int n, + int inner_size) { uint8_t *src_data = src->data; float *dest_data = dest->data; @@ -382,8 +544,8 @@ static void csi_nchw_uint8_to_float(struct csi_tensor *dest, struct csi_tensor * } } -static void csi_nhwc_uint8_to_float(struct csi_tensor *dest, struct csi_tensor *src, - int n, int inner_size) +static void csi_nhwc_uint8_to_float(struct csi_tensor *dest, struct csi_tensor *src, int n, + int inner_size) { uint8_t *src_data = src->data; float *dest_data = dest->data; @@ -396,8 +558,8 @@ static void csi_nhwc_uint8_to_float(struct csi_tensor *dest, struct csi_tensor * } } -static void csi_nchw_float_to_uint8(struct csi_tensor *dest, struct csi_tensor *src, - int n, int inner_size) +static void csi_nchw_float_to_uint8(struct csi_tensor *dest, struct csi_tensor *src, int n, + int inner_size) { float *src_data = src->data; uint8_t *dest_data = dest->data; @@ -409,8 +571,8 @@ static void csi_nchw_float_to_uint8(struct csi_tensor *dest, struct csi_tensor * } } } -static void csi_nhwc_float_to_uint8(struct csi_tensor *dest, struct csi_tensor *src, - int n, int inner_size) +static void csi_nhwc_float_to_uint8(struct csi_tensor *dest, struct csi_tensor *src, int n, + int inner_size) { float *src_data = src->data; uint8_t *dest_data = dest->data; @@ -423,8 +585,8 @@ static void csi_nhwc_float_to_uint8(struct csi_tensor *dest, struct csi_tensor * } } -static void csi_nchw_int8_to_float(struct csi_tensor *dest, struct csi_tensor *src, - int n, int inner_size) +static void csi_nchw_int8_to_float(struct csi_tensor *dest, struct csi_tensor *src, int n, + int inner_size) { int8_t *src_data = src->data; float *dest_data = dest->data; @@ -436,8 +598,8 @@ static void csi_nchw_int8_to_float(struct csi_tensor *dest, struct csi_tensor *s } } } -static void csi_nhwc_int8_to_float(struct csi_tensor *dest, struct csi_tensor *src, - int n, int inner_size) +static void csi_nhwc_int8_to_float(struct csi_tensor *dest, struct csi_tensor *src, int n, + int inner_size) { int8_t *src_data = src->data; float *dest_data = dest->data; @@ -450,8 +612,8 @@ static void csi_nhwc_int8_to_float(struct csi_tensor *dest, struct csi_tensor *s } } -static void csi_nchw_float_to_int8(struct csi_tensor *dest, struct csi_tensor *src, - int n, int inner_size) +static void csi_nchw_float_to_int8(struct csi_tensor *dest, struct csi_tensor *src, int n, + int inner_size) { float *src_data = src->data; int8_t *dest_data = dest->data; @@ -464,8 +626,8 @@ static void csi_nchw_float_to_int8(struct csi_tensor *dest, struct csi_tensor *s } } -static void csi_nhwc_float_to_int8(struct csi_tensor *dest, struct csi_tensor *src, - int n, int inner_size) +static void csi_nhwc_float_to_int8(struct csi_tensor *dest, struct csi_tensor *src, int n, + int inner_size) { float *src_data = src->data; int8_t *dest_data = dest->data; @@ -478,8 +640,8 @@ static void csi_nhwc_float_to_int8(struct csi_tensor *dest, struct csi_tensor *s } } -static void csi_nchw_int16_to_float(struct csi_tensor *dest, struct csi_tensor *src, - int n, int inner_size) +static void csi_nchw_int16_to_float(struct csi_tensor *dest, struct csi_tensor *src, int n, + int inner_size) { int16_t *src_data = src->data; float *dest_data = dest->data; @@ -491,8 +653,9 @@ static void csi_nchw_int16_to_float(struct csi_tensor *dest, struct csi_tensor * } } } -static void csi_nhwc_int16_to_float(struct csi_tensor *dest, struct csi_tensor *src, - int n, int inner_size) + +static void csi_nhwc_int16_to_float(struct csi_tensor *dest, struct csi_tensor *src, int n, + int inner_size) { int16_t *src_data = src->data; float *dest_data = dest->data; @@ -505,8 +668,8 @@ static void csi_nhwc_int16_to_float(struct csi_tensor *dest, struct csi_tensor * } } -static void csi_nchw_float_to_int16(struct csi_tensor *dest, struct csi_tensor *src, - int n, int inner_size) +static void csi_nchw_float_to_int16(struct csi_tensor *dest, struct csi_tensor *src, int n, + int inner_size) { float *src_data = src->data; int16_t *dest_data = dest->data; @@ -519,8 +682,8 @@ static void csi_nchw_float_to_int16(struct csi_tensor *dest, struct csi_tensor * } } -static void csi_nhwc_float_to_int16(struct csi_tensor *dest, struct csi_tensor *src, - int n, int inner_size) +static void csi_nhwc_float_to_int16(struct csi_tensor *dest, struct csi_tensor *src, int n, + int inner_size) { float *src_data = src->data; int16_t *dest_data = dest->data; @@ -533,8 +696,8 @@ static void csi_nhwc_float_to_int16(struct csi_tensor *dest, struct csi_tensor * } } -static void csi_nchw_int32_to_float(struct csi_tensor *dest, struct csi_tensor *src, - int n, int inner_size) +static void csi_nchw_int32_to_float(struct csi_tensor *dest, struct csi_tensor *src, int n, + int inner_size) { int32_t *src_data = src->data; float *dest_data = dest->data; @@ -547,8 +710,8 @@ static void csi_nchw_int32_to_float(struct csi_tensor *dest, struct csi_tensor * } } -static void csi_nhwc_int32_to_float(struct csi_tensor *dest, struct csi_tensor *src, - int n, int inner_size) +static void csi_nhwc_int32_to_float(struct csi_tensor *dest, struct csi_tensor *src, int n, + int inner_size) { int32_t *src_data = src->data; float *dest_data = dest->data; @@ -581,44 +744,213 @@ static void csi_float_to_f16(struct csi_tensor *dest, struct csi_tensor *src) } } -int csi_tensor_data_convert_weight(struct csi_tensor *dest, struct csi_tensor *src){ +static void csi_bf16_to_float(struct csi_tensor *dest, struct csi_tensor *src) +{ + int16_t *src_data = src->data; + float *dest_data = dest->data; + int32_t size = csi_tensor_size(src); + for (int j = 0; j < size; j++) { + dest_data[j] = csi_ref_bfloat16_to_float32(src_data[j]); + } +} + +static void csi_float_to_bf16(struct csi_tensor *dest, struct csi_tensor *src) +{ + float *src_data = src->data; + int16_t *dest_data = dest->data; + int32_t size = csi_tensor_size(src); + for (int i = 0; i < size; i++) { + dest_data[i] = csi_ref_float32_to_bfloat16(src_data[i]); + } +} + +int csi_tensor_data_convert_weight(struct csi_tensor *dest, struct csi_tensor *src) +{ int size = csi_tensor_size(src); int inner_size = src->quant_channel == 0 ? size : size / src->quant_channel; - if (dest->dtype == CSINN_DTYPE_FLOAT32 && src->dtype == CSINN_DTYPE_UINT8) { - if (src->layout >= CSINN_LAYOUT_O && src->layout <= CSINN_LAYOUT_OIDHW){ - csi_nchw_uint8_to_float(dest, src, 0, inner_size); - }else if (src->layout >= CSINN_LAYOUT_OWI && src->layout <= CSINN_LAYOUT_ODHWI){ - csi_nhwc_uint8_to_float(dest, src, 0, inner_size); + if (dest->dtype == CSINN_DTYPE_FLOAT32 && src->dtype == CSINN_DTYPE_INT4) { + switch (src->layout) { + case CSINN_LAYOUT_O: + case CSINN_LAYOUT_OI: + case CSINN_LAYOUT_OIW: + case CSINN_LAYOUT_OIHW: + case CSINN_LAYOUT_OIDHW: + case CSINN_LAYOUT_O1HW: + case CSINN_LAYOUT_OWI: + case CSINN_LAYOUT_ODHWI: + csi_nchw_int4_to_float(dest, src, 0, inner_size); + break; + case CSINN_LAYOUT_OHWI: + csi_axis0_int4_to_float_alignHWI(dest, src, inner_size); + break; + case CSINN_LAYOUT_1HWO: + csi_nhwc_int4_to_float(dest, src, 0, inner_size); + break; + default: + break; + } + } else if (dest->dtype == CSINN_DTYPE_INT4 && src->dtype == CSINN_DTYPE_FLOAT32) { + switch (src->layout) { + case CSINN_LAYOUT_O: + case CSINN_LAYOUT_OI: + case CSINN_LAYOUT_OIW: + case CSINN_LAYOUT_OIHW: + case CSINN_LAYOUT_OIDHW: + case CSINN_LAYOUT_O1HW: + case CSINN_LAYOUT_OWI: + case CSINN_LAYOUT_ODHWI: + csi_nchw_float_to_int4(dest, src, 0, inner_size); + break; + case CSINN_LAYOUT_OHWI: + csi_axis0_float_to_int4_alignHWI(dest, src, inner_size); + case CSINN_LAYOUT_1HWO: + csi_nhwc_float_to_int4(dest, src, 0, inner_size); + break; + default: + break; + } + } else if (dest->dtype == CSINN_DTYPE_FLOAT32 && src->dtype == CSINN_DTYPE_UINT8) { + switch (src->layout) { + case CSINN_LAYOUT_O: + case CSINN_LAYOUT_OI: + case CSINN_LAYOUT_OIW: + case CSINN_LAYOUT_OIHW: + case CSINN_LAYOUT_OIDHW: + case CSINN_LAYOUT_O1HW: + case CSINN_LAYOUT_OWI: + case CSINN_LAYOUT_OHWI: + case CSINN_LAYOUT_ODHWI: + csi_nchw_uint8_to_float(dest, src, 0, inner_size); + break; + case CSINN_LAYOUT_1HWO: + csi_nhwc_uint8_to_float(dest, src, 0, inner_size); + break; + default: + break; } } else if (dest->dtype == CSINN_DTYPE_UINT8 && src->dtype == CSINN_DTYPE_FLOAT32) { - if (src->layout >= CSINN_LAYOUT_O && src->layout <= CSINN_LAYOUT_OIDHW){ - csi_nchw_float_to_uint8(dest, src, 0, inner_size); - }else if (src->layout >= CSINN_LAYOUT_OWI && src->layout <= CSINN_LAYOUT_ODHWI){ - csi_nhwc_float_to_uint8(dest, src, 0, inner_size); + switch (src->layout) { + case CSINN_LAYOUT_O: + case CSINN_LAYOUT_OI: + case CSINN_LAYOUT_OIW: + case CSINN_LAYOUT_OIHW: + case CSINN_LAYOUT_OIDHW: + case CSINN_LAYOUT_O1HW: + case CSINN_LAYOUT_OWI: + case CSINN_LAYOUT_OHWI: + case CSINN_LAYOUT_ODHWI: + csi_nchw_float_to_uint8(dest, src, 0, inner_size); + break; + case CSINN_LAYOUT_1HWO: + csi_nhwc_float_to_uint8(dest, src, 0, inner_size); + break; + default: + break; } } else if (dest->dtype == CSINN_DTYPE_FLOAT32 && src->dtype == CSINN_DTYPE_INT8) { - if (src->layout >= CSINN_LAYOUT_O && src->layout <= CSINN_LAYOUT_OIDHW){ - csi_nchw_int8_to_float(dest, src, 0, inner_size); - }else if (src->layout >= CSINN_LAYOUT_OWI && src->layout <= CSINN_LAYOUT_ODHWI){ - csi_nhwc_int8_to_float(dest, src, 0, inner_size); + switch (src->layout) { + case CSINN_LAYOUT_O: + case CSINN_LAYOUT_OI: + case CSINN_LAYOUT_OIW: + case CSINN_LAYOUT_OIHW: + case CSINN_LAYOUT_OIDHW: + case CSINN_LAYOUT_O1HW: + case CSINN_LAYOUT_OWI: + case CSINN_LAYOUT_OHWI: + case CSINN_LAYOUT_ODHWI: + csi_nchw_int8_to_float(dest, src, 0, inner_size); + break; + case CSINN_LAYOUT_1HWO: + csi_nhwc_int8_to_float(dest, src, 0, inner_size); + break; + default: + break; } } else if (dest->dtype == CSINN_DTYPE_INT8 && src->dtype == CSINN_DTYPE_FLOAT32) { - if (src->layout >= CSINN_LAYOUT_O && src->layout <= CSINN_LAYOUT_OIDHW){ - csi_nchw_float_to_int8(dest, src, 0, inner_size); - }else if (src->layout >= CSINN_LAYOUT_OWI && src->layout <= CSINN_LAYOUT_ODHWI){ - csi_nhwc_float_to_int8(dest, src, 0, inner_size); + switch (src->layout) { + case CSINN_LAYOUT_O: + case CSINN_LAYOUT_OI: + case CSINN_LAYOUT_OIW: + case CSINN_LAYOUT_OIHW: + case CSINN_LAYOUT_OIDHW: + case CSINN_LAYOUT_O1HW: + case CSINN_LAYOUT_OWI: + case CSINN_LAYOUT_OHWI: + case CSINN_LAYOUT_ODHWI: + csi_nchw_float_to_int8(dest, src, 0, inner_size); + break; + case CSINN_LAYOUT_1HWO: + csi_nhwc_float_to_int8(dest, src, 0, inner_size); + break; + default: + break; + } + } else if (dest->dtype == CSINN_DTYPE_FLOAT32 && src->dtype == CSINN_DTYPE_INT16) { + switch (src->layout) { + case CSINN_LAYOUT_O: + case CSINN_LAYOUT_OI: + case CSINN_LAYOUT_OIW: + case CSINN_LAYOUT_OIHW: + case CSINN_LAYOUT_OIDHW: + case CSINN_LAYOUT_O1HW: + case CSINN_LAYOUT_OWI: + case CSINN_LAYOUT_OHWI: + case CSINN_LAYOUT_ODHWI: + csi_nchw_int16_to_float(dest, src, 0, inner_size); + break; + case CSINN_LAYOUT_1HWO: + csi_nhwc_int16_to_float(dest, src, 0, inner_size); + break; + default: + break; + } + } else if (dest->dtype == CSINN_DTYPE_INT16 && src->dtype == CSINN_DTYPE_FLOAT32) { + switch (src->layout) { + case CSINN_LAYOUT_O: + case CSINN_LAYOUT_OI: + case CSINN_LAYOUT_OIW: + case CSINN_LAYOUT_OIHW: + case CSINN_LAYOUT_OIDHW: + case CSINN_LAYOUT_O1HW: + case CSINN_LAYOUT_OWI: + case CSINN_LAYOUT_OHWI: + case CSINN_LAYOUT_ODHWI: + csi_nchw_float_to_int16(dest, src, 0, inner_size); + break; + case CSINN_LAYOUT_1HWO: + csi_nhwc_float_to_int16(dest, src, 0, inner_size); + break; + default: + break; } } else if (dest->dtype == CSINN_DTYPE_FLOAT32 && src->dtype == CSINN_DTYPE_INT32) { - if (src->layout >= CSINN_LAYOUT_O && src->layout <= CSINN_LAYOUT_OIDHW){ - csi_nchw_int32_to_float(dest, src, 0, inner_size); - }else if (src->layout >= CSINN_LAYOUT_OWI && src->layout <= CSINN_LAYOUT_ODHWI){ - csi_nhwc_int32_to_float(dest, src, 0, inner_size); + switch (src->layout) { + case CSINN_LAYOUT_O: + case CSINN_LAYOUT_OI: + case CSINN_LAYOUT_OIW: + case CSINN_LAYOUT_OIHW: + case CSINN_LAYOUT_OIDHW: + case CSINN_LAYOUT_O1HW: + case CSINN_LAYOUT_OWI: + case CSINN_LAYOUT_OHWI: + case CSINN_LAYOUT_ODHWI: + csi_nchw_int32_to_float(dest, src, 0, inner_size); + break; + case CSINN_LAYOUT_1HWO: + csi_nhwc_int32_to_float(dest, src, 0, inner_size); + break; + default: + break; } } else if (dest->dtype == CSINN_DTYPE_FLOAT16 && src->dtype == CSINN_DTYPE_FLOAT32) { csi_float_to_f16(dest, src); } else if (dest->dtype == CSINN_DTYPE_FLOAT32 && src->dtype == CSINN_DTYPE_FLOAT16) { csi_f16_to_float(dest, src); - } else if (dest->dtype == CSINN_DTYPE_FLOAT32 && src->dtype == CSINN_DTYPE_FLOAT32) { + } else if (dest->dtype == CSINN_DTYPE_BFLOAT16 && src->dtype == CSINN_DTYPE_FLOAT32) { + csi_float_to_bf16(dest, src); + } else if (dest->dtype == CSINN_DTYPE_FLOAT32 && src->dtype == CSINN_DTYPE_BFLOAT16) { + csi_bf16_to_float(dest, src); + } else if (dest->dtype == src->dtype) { memcpy(dest->data, src->data, csi_tensor_byte_size(src)); } else { return CSINN_FALSE; @@ -626,66 +958,83 @@ int csi_tensor_data_convert_weight(struct csi_tensor *dest, struct csi_tensor *s return CSINN_TRUE; } -int csi_tensor_data_convert_activation(struct csi_tensor *dest, struct csi_tensor *src){ +int csi_tensor_data_convert_activation(struct csi_tensor *dest, struct csi_tensor *src) +{ int size = csi_tensor_size(src); - int32_t q_size = src->quant_channel !=0 ? src->quant_channel : dest->quant_channel; + int32_t q_size = src->quant_channel != 0 ? src->quant_channel : dest->quant_channel; if (q_size == 0) { q_size = 1; } int inner_size = size / q_size / src->dim[0]; - if (dest->dtype == CSINN_DTYPE_FLOAT32 && src->dtype == CSINN_DTYPE_UINT8) { - for (int n = 0; n < src->dim[0]; n++){ - if (src->layout >= CSINN_LAYOUT_N && src->layout <= CSINN_LAYOUT_NCDHW){ + if (dest->dtype == CSINN_DTYPE_FLOAT32 && src->dtype == CSINN_DTYPE_INT4) { + for (int n = 0; n < src->dim[0]; n++) { + if (src->layout >= CSINN_LAYOUT_N && src->layout <= CSINN_LAYOUT_NCDHW) { + csi_nchw_int4_to_float(dest, src, n, inner_size); + } else if (src->layout >= CSINN_LAYOUT_NWC && src->layout <= CSINN_LAYOUT_NDHWC) { + csi_nhwc_int4_to_float(dest, src, n, inner_size); + } + } + } else if (dest->dtype == CSINN_DTYPE_INT4 && src->dtype == CSINN_DTYPE_FLOAT32) { + for (int n = 0; n < src->dim[0]; n++) { + if (src->layout >= CSINN_LAYOUT_N && src->layout <= CSINN_LAYOUT_NCDHW) { + csi_nchw_float_to_int4(dest, src, n, inner_size); + } else if (src->layout >= CSINN_LAYOUT_NWC && src->layout <= CSINN_LAYOUT_NDHWC) { + csi_nhwc_float_to_int4(dest, src, n, inner_size); + } + } + } else if (dest->dtype == CSINN_DTYPE_FLOAT32 && src->dtype == CSINN_DTYPE_UINT8) { + for (int n = 0; n < src->dim[0]; n++) { + if (src->layout >= CSINN_LAYOUT_N && src->layout <= CSINN_LAYOUT_NCDHW) { csi_nchw_uint8_to_float(dest, src, n, inner_size); - }else if (src->layout >= CSINN_LAYOUT_NWC && src->layout <= CSINN_LAYOUT_NDHWC){ + } else if (src->layout >= CSINN_LAYOUT_NWC && src->layout <= CSINN_LAYOUT_NDHWC) { csi_nhwc_uint8_to_float(dest, src, n, inner_size); } } } else if (dest->dtype == CSINN_DTYPE_UINT8 && src->dtype == CSINN_DTYPE_FLOAT32) { - for (int n = 0; n < src->dim[0]; n++){ - if (src->layout >= CSINN_LAYOUT_N && src->layout <= CSINN_LAYOUT_NCDHW){ + for (int n = 0; n < src->dim[0]; n++) { + if (src->layout >= CSINN_LAYOUT_N && src->layout <= CSINN_LAYOUT_NCDHW) { csi_nchw_float_to_uint8(dest, src, n, inner_size); - }else if (src->layout >= CSINN_LAYOUT_NWC && src->layout <= CSINN_LAYOUT_NDHWC){ + } else if (src->layout >= CSINN_LAYOUT_NWC && src->layout <= CSINN_LAYOUT_NDHWC) { csi_nhwc_float_to_uint8(dest, src, n, inner_size); } } } else if (dest->dtype == CSINN_DTYPE_FLOAT32 && src->dtype == CSINN_DTYPE_INT8) { - for (int n = 0; n < src->dim[0]; n++){ - if (src->layout >= CSINN_LAYOUT_N && src->layout <= CSINN_LAYOUT_NCDHW){ + for (int n = 0; n < src->dim[0]; n++) { + if (src->layout >= CSINN_LAYOUT_N && src->layout <= CSINN_LAYOUT_NCDHW) { csi_nchw_int8_to_float(dest, src, n, inner_size); - }else if (src->layout >= CSINN_LAYOUT_NWC && src->layout <= CSINN_LAYOUT_NDHWC){ + } else if (src->layout >= CSINN_LAYOUT_NWC && src->layout <= CSINN_LAYOUT_NDHWC) { csi_nhwc_int8_to_float(dest, src, n, inner_size); } } } else if (dest->dtype == CSINN_DTYPE_INT8 && src->dtype == CSINN_DTYPE_FLOAT32) { - for (int n = 0; n < src->dim[0]; n++){ - if (src->layout >= CSINN_LAYOUT_N && src->layout <= CSINN_LAYOUT_NCDHW){ + for (int n = 0; n < src->dim[0]; n++) { + if (src->layout >= CSINN_LAYOUT_N && src->layout <= CSINN_LAYOUT_NCDHW) { csi_nchw_float_to_int8(dest, src, n, inner_size); - }else if (src->layout >= CSINN_LAYOUT_NWC && src->layout <= CSINN_LAYOUT_NDHWC){ + } else if (src->layout >= CSINN_LAYOUT_NWC && src->layout <= CSINN_LAYOUT_NDHWC) { csi_nhwc_float_to_int8(dest, src, n, inner_size); } } } else if (dest->dtype == CSINN_DTYPE_FLOAT32 && src->dtype == CSINN_DTYPE_INT16) { - for (int n = 0; n < src->dim[0]; n++){ - if (src->layout >= CSINN_LAYOUT_N && src->layout <= CSINN_LAYOUT_NCDHW){ + for (int n = 0; n < src->dim[0]; n++) { + if (src->layout >= CSINN_LAYOUT_N && src->layout <= CSINN_LAYOUT_NCDHW) { csi_nchw_int16_to_float(dest, src, n, inner_size); - }else if (src->layout >= CSINN_LAYOUT_NWC && src->layout <= CSINN_LAYOUT_NDHWC){ + } else if (src->layout >= CSINN_LAYOUT_NWC && src->layout <= CSINN_LAYOUT_NDHWC) { csi_nhwc_int16_to_float(dest, src, n, inner_size); } } } else if (dest->dtype == CSINN_DTYPE_INT16 && src->dtype == CSINN_DTYPE_FLOAT32) { - for (int n = 0; n < src->dim[0]; n++){ - if (src->layout >= CSINN_LAYOUT_N && src->layout <= CSINN_LAYOUT_NCDHW){ + for (int n = 0; n < src->dim[0]; n++) { + if (src->layout >= CSINN_LAYOUT_N && src->layout <= CSINN_LAYOUT_NCDHW) { csi_nchw_float_to_int16(dest, src, n, inner_size); - }else if (src->layout >= CSINN_LAYOUT_NWC && src->layout <= CSINN_LAYOUT_NDHWC){ + } else if (src->layout >= CSINN_LAYOUT_NWC && src->layout <= CSINN_LAYOUT_NDHWC) { csi_nhwc_float_to_int16(dest, src, n, inner_size); } } } else if (dest->dtype == CSINN_DTYPE_FLOAT32 && src->dtype == CSINN_DTYPE_INT32) { - for (int n = 0; n < src->dim[0]; n++){ - if (src->layout >= CSINN_LAYOUT_N && src->layout <= CSINN_LAYOUT_NCDHW){ + for (int n = 0; n < src->dim[0]; n++) { + if (src->layout >= CSINN_LAYOUT_N && src->layout <= CSINN_LAYOUT_NCDHW) { csi_nchw_int32_to_float(dest, src, n, inner_size); - }else if (src->layout >= CSINN_LAYOUT_NWC && src->layout <= CSINN_LAYOUT_NDHWC){ + } else if (src->layout >= CSINN_LAYOUT_NWC && src->layout <= CSINN_LAYOUT_NDHWC) { csi_nhwc_int32_to_float(dest, src, n, inner_size); } } @@ -693,53 +1042,66 @@ int csi_tensor_data_convert_activation(struct csi_tensor *dest, struct csi_tenso csi_float_to_f16(dest, src); } else if (dest->dtype == CSINN_DTYPE_FLOAT32 && src->dtype == CSINN_DTYPE_FLOAT16) { csi_f16_to_float(dest, src); - } else if (dest->dtype == CSINN_DTYPE_FLOAT32 && src->dtype == CSINN_DTYPE_FLOAT32) { - memcpy(dest->data, src->data, csi_tensor_size(src) * 4); + } else if (dest->dtype == CSINN_DTYPE_BFLOAT16 && src->dtype == CSINN_DTYPE_FLOAT32) { + csi_float_to_bf16(dest, src); + } else if (dest->dtype == CSINN_DTYPE_FLOAT32 && src->dtype == CSINN_DTYPE_BFLOAT16) { + csi_bf16_to_float(dest, src); + } else if (dest->dtype == src->dtype) { + memcpy(dest->data, src->data, csi_tensor_byte_size(src)); } else { return CSINN_FALSE; } return CSINN_TRUE; } - int csi_tensor_data_convert(struct csi_tensor *dest, struct csi_tensor *src) { if (src->layout != dest->layout) return CSINN_FALSE; - switch (src->layout) - { - case CSINN_LAYOUT_NULL: - return CSINN_TRUE; - case CSINN_LAYOUT_N: - case CSINN_LAYOUT_NC: - case CSINN_LAYOUT_NCW: - case CSINN_LAYOUT_NCHW: - case CSINN_LAYOUT_NHWC: - case CSINN_LAYOUT_NWC: - case CSINN_LAYOUT_NCDHW: - case CSINN_LAYOUT_NDHWC: - return csi_tensor_data_convert_activation(dest, src); - case CSINN_LAYOUT_O: - case CSINN_LAYOUT_OI: - case CSINN_LAYOUT_OIW: - case CSINN_LAYOUT_OWI: - case CSINN_LAYOUT_OIHW: - case CSINN_LAYOUT_OHWI: - case CSINN_LAYOUT_OIDHW: - case CSINN_LAYOUT_ODHWI: - return csi_tensor_data_convert_weight(dest, src); - default: - return CSINN_FALSE; + switch (src->layout) { + case CSINN_LAYOUT_NULL: + return CSINN_TRUE; + case CSINN_LAYOUT_N: + case CSINN_LAYOUT_NC: + case CSINN_LAYOUT_NCW: + case CSINN_LAYOUT_NCHW: + case CSINN_LAYOUT_NHWC: + case CSINN_LAYOUT_NWC: + case CSINN_LAYOUT_NCDHW: + case CSINN_LAYOUT_NDHWC: + return csi_tensor_data_convert_activation(dest, src); + case CSINN_LAYOUT_O: + case CSINN_LAYOUT_OI: + case CSINN_LAYOUT_OIW: + case CSINN_LAYOUT_OWI: + case CSINN_LAYOUT_OIHW: + case CSINN_LAYOUT_OHWI: + case CSINN_LAYOUT_OIDHW: + case CSINN_LAYOUT_ODHWI: + case CSINN_LAYOUT_O1HW: + case CSINN_LAYOUT_1HWO: + return csi_tensor_data_convert_weight(dest, src); + default: + return CSINN_FALSE; } } -#if ((!defined CSI_BUILD_I805) && (!defined CSI_BUILD_E804) && (!defined CSI_BUILD_REF_I805)) -#define BILLION 1000000000 +#ifdef CSI_BUILD_RTOS +uint64_t csi_get_timespec() { return 0; } + +void csi_print_time_interval(uint64_t start, uint64_t end, const char *msg) { return; } +#else +#define BILLION 1000000000 uint64_t csi_get_timespec() { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return (uint64_t)((uint64_t)ts.tv_nsec + (uint64_t)ts.tv_sec * BILLION); } -#endif +void csi_print_time_interval(uint64_t start, uint64_t end, const char *msg) +{ + printf("Run %s time: %.5fms, FPS=%.2f\n", msg, ((double)(end - start)) / 1000000, + 1000000000.0 / ((double)(end - start))); +} +#endif diff --git a/source/nn2/where.c b/source/nn2/where.c index 58f4af8b..8d234a54 100644 --- a/source/nn2/where.c +++ b/source/nn2/where.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/xor.c b/source/nn2/xor.c index f8503531..4a2a4e5c 100644 --- a/source/nn2/xor.c +++ b/source/nn2/xor.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/nn2/yuv_rgb_scale.c b/source/nn2/yuv_rgb_scale.c index 2ed3efdb..fb5180ce 100644 --- a/source/nn2/yuv_rgb_scale.c +++ b/source/nn2/yuv_rgb_scale.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" diff --git a/source/reference/abs.c b/source/reference/abs.c index ec049e3a..15924b4d 100644 --- a/source/reference/abs.c +++ b/source/reference/abs.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_abs_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params) +int csi_ref_abs_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = input->data; float *output_data = output->data; @@ -34,8 +32,7 @@ int csi_ref_abs_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_abs_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_abs_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_abs_f32); diff --git a/source/reference/acos.c b/source/reference/acos.c index b54ca51f..1267fb6f 100644 --- a/source/reference/acos.c +++ b/source/reference/acos.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,12 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_acos_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_acos_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = (float *)input->data; @@ -34,8 +33,7 @@ int csi_ref_acos_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_acos_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_acos_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_acos_f32); diff --git a/source/reference/acosh.c b/source/reference/acosh.c index fdc6af9b..2d77e373 100644 --- a/source/reference/acosh.c +++ b/source/reference/acosh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,12 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_acosh_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_acosh_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = (float *)input->data; @@ -34,8 +33,7 @@ int csi_ref_acosh_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_acosh_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_acosh_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_acosh_f32); diff --git a/source/reference/add.c b/source/reference/add.c index d521767a..6c6d7ac3 100644 --- a/source/reference/add.c +++ b/source/reference/add.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,19 +16,16 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -static void element_add_f32(float *src0, float *src1, float *dest, - int input_idx, int output_idx) +static void element_add_f32(float *src0, float *src1, float *dest, int input_idx, int output_idx) { dest[output_idx] = src0[output_idx] + src1[input_idx]; } -int csi_ref_add_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_add_f32(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params) { struct csi_ref_diso_callback cb; @@ -38,10 +35,8 @@ int csi_ref_add_f32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_add_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_add_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { return csi_ref_diso_callback_base(input0, input1, output, params, csi_ref_add_f32); } diff --git a/source/reference/and.c b/source/reference/and.c index 7cfa84d7..afa942d2 100644 --- a/source/reference/and.c +++ b/source/reference/and.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_and_u32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_and_u32(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params) { uint32_t *input0_data = input0->data; @@ -36,9 +34,7 @@ int csi_ref_and_u32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_and_u8(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_and_u8(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params) { uint8_t *input0_data = input0->data; @@ -52,9 +48,7 @@ int csi_ref_and_u8(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_and_i8(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_and_i8(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params) { int8_t *input0_data = input0->data; diff --git a/source/reference/arange.c b/source/reference/arange.c index 728ac44e..c26c5ca7 100644 --- a/source/reference/arange.c +++ b/source/reference/arange.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,22 +16,21 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_arange_f32(struct csi_tensor *output, - struct arange_params *params) +int csi_ref_arange_f32(struct csi_tensor *output, struct arange_params *params) { - float_t * data = output->data; + float *data = output->data; int j = 0; float i = params->start; while (1) { if (params->step > FLT_EPSILON) { - if (i - params->stop > FLT_EPSILON) //i > stop + if (i - params->stop > FLT_EPSILON) // i > stop break; } else { - if (i - params->stop < FLT_EPSILON) //i < stop + if (i - params->stop < FLT_EPSILON) // i < stop break; } @@ -42,8 +41,7 @@ int csi_ref_arange_f32(struct csi_tensor *output, return CSINN_TRUE; } -int csi_ref_arange_quant(struct csi_tensor *output, - struct arange_params *params) +int csi_ref_arange_quant(struct csi_tensor *output, struct arange_params *params) { struct csi_quant_info qinfo; qinfo.zero_point = 0; diff --git a/source/reference/argmax.c b/source/reference/argmax.c index c27fc67e..978f701e 100644 --- a/source/reference/argmax.c +++ b/source/reference/argmax.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" @@ -25,16 +25,15 @@ struct ArgPos { int32_t index; }; -static struct ArgPos fargmax_stride(struct ArgPos lhs, struct ArgPos rhs) { - +static struct ArgPos fargmax_stride(struct ArgPos lhs, struct ArgPos rhs) +{ if (lhs.value < rhs.value) { return rhs; } return lhs; } -int csi_ref_argmax_stride_i32_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_argmax_stride_i32_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { float *input_data = input->data; @@ -53,10 +52,12 @@ int csi_ref_argmax_stride_i32_f32(struct csi_tensor *input, for (int32_t out = 0; out < out_size; out++) { struct ArgPos result = {-FLT_MAX, -1}; - int32_t out_index = csi_ref_get_reduction_index(out, params->out_strides, params->out_extents, params->n); + int32_t out_index = + csi_ref_get_reduction_index(out, params->out_strides, params->out_extents, params->n); for (int32_t inner = 0; inner < inner_size; inner++) { - int32_t index = out_index + csi_ref_get_reduction_index(inner, params->inner_strides, - params->inner_extents, params->m); + int32_t index = + out_index + csi_ref_get_reduction_index(inner, params->inner_strides, + params->inner_extents, params->m); float val = input_data[index]; struct ArgPos pos = {val, inner}; result = fargmax_stride(result, pos); @@ -67,8 +68,7 @@ int csi_ref_argmax_stride_i32_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_argmax_stride_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_argmax_stride_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { int ret; @@ -76,6 +76,4 @@ int csi_ref_argmax_stride_quant(struct csi_tensor *input, ret = csi_ref_argmax_stride_i32_f32(finput, output, params); csi_ref_tensor_transform_free_f32(finput); return ret; - } - diff --git a/source/reference/argmin.c b/source/reference/argmin.c index 236d6122..57158877 100644 --- a/source/reference/argmin.c +++ b/source/reference/argmin.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" @@ -25,15 +25,15 @@ struct ArgPos { int32_t index; }; -static struct ArgPos fargmin_stride(struct ArgPos lhs, struct ArgPos rhs) { +static struct ArgPos fargmin_stride(struct ArgPos lhs, struct ArgPos rhs) +{ if (lhs.value > rhs.value) { return rhs; } return lhs; } -int csi_ref_argmin_stride_i32_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_argmin_stride_i32_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { float *input_data = input->data; @@ -52,10 +52,12 @@ int csi_ref_argmin_stride_i32_f32(struct csi_tensor *input, for (int32_t out = 0; out < out_size; out++) { struct ArgPos result = {FLT_MAX, -1}; - int32_t out_index = csi_ref_get_reduction_index(out, params->out_strides, params->out_extents, params->n); + int32_t out_index = + csi_ref_get_reduction_index(out, params->out_strides, params->out_extents, params->n); for (int32_t inner = 0; inner < inner_size; inner++) { - int32_t index = out_index + csi_ref_get_reduction_index(inner, params->inner_strides, - params->inner_extents, params->m); + int32_t index = + out_index + csi_ref_get_reduction_index(inner, params->inner_strides, + params->inner_extents, params->m); float val = input_data[index]; struct ArgPos pos = {val, inner}; result = fargmin_stride(result, pos); @@ -66,8 +68,7 @@ int csi_ref_argmin_stride_i32_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_argmin_stride_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_argmin_stride_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { int ret; diff --git a/source/reference/asin.c b/source/reference/asin.c index 9d2f7046..bbb5dfd0 100644 --- a/source/reference/asin.c +++ b/source/reference/asin.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,12 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_asin_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_asin_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = (float *)input->data; @@ -34,8 +33,7 @@ int csi_ref_asin_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_asin_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_asin_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_asin_f32); diff --git a/source/reference/asinh.c b/source/reference/asinh.c index 9f5d50b1..58216b35 100644 --- a/source/reference/asinh.c +++ b/source/reference/asinh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,12 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_asinh_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_asinh_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = (float *)input->data; @@ -34,8 +33,7 @@ int csi_ref_asinh_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_asinh_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_asinh_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_asinh_f32); diff --git a/source/reference/atan.c b/source/reference/atan.c index 2b6f4713..020aacef 100644 --- a/source/reference/atan.c +++ b/source/reference/atan.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,12 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_atan_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_atan_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = (float *)input->data; @@ -34,8 +33,7 @@ int csi_ref_atan_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_atan_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_atan_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_atan_f32); diff --git a/source/reference/atanh.c b/source/reference/atanh.c index 8b71cbfe..0935afc4 100644 --- a/source/reference/atanh.c +++ b/source/reference/atanh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,12 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_atanh_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_atanh_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = (float *)input->data; @@ -34,10 +33,8 @@ int csi_ref_atanh_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_atanh_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_atanh_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_atanh_f32); } - diff --git a/source/reference/averagepool.c b/source/reference/averagepool.c index 10e4305f..cbcd8bc7 100644 --- a/source/reference/averagepool.c +++ b/source/reference/averagepool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_avgpool2d_nhwc_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params) +int csi_ref_avgpool2d_nhwc_f32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) { float *input_data = input->data; float *output_data = output->data; @@ -53,8 +52,8 @@ int csi_ref_avgpool2d_nhwc_f32(struct csi_tensor *input, for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x) { const int in_x = in_x_origin + filter_x; const int in_y = in_y_origin + filter_y; - total += - input_data[csi_ref_get_index(input->dim, batch, in_y, in_x, channel)]; + total += input_data[csi_ref_get_index(input->dim, batch, in_y, in_x, + channel)]; filter_count++; } } @@ -71,9 +70,8 @@ int csi_ref_avgpool2d_nhwc_f32(struct csi_tensor *input, return CSINN_TRUE; } -static int csi_ref_avgpool2d_nchw_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params) +static int csi_ref_avgpool2d_nchw_f32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) { float *input_data = input->data; float *output_data = output->data; @@ -104,8 +102,8 @@ static int csi_ref_avgpool2d_nchw_f32(struct csi_tensor *input, for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x) { const int in_x = in_x_origin + filter_x; const int in_y = in_y_origin + filter_y; - total += - input_data[csi_ref_get_index(input->dim, batch, channel, in_y, in_x)]; + total += input_data[csi_ref_get_index(input->dim, batch, channel, in_y, + in_x)]; filter_count++; } } @@ -122,9 +120,8 @@ static int csi_ref_avgpool2d_nchw_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_avgpool2d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params) +int csi_ref_avgpool2d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) { if (params->base.layout == CSINN_LAYOUT_NCHW) { csi_ref_avgpool2d_nchw_f32(input, output, params); @@ -135,10 +132,8 @@ int csi_ref_avgpool2d_f32(struct csi_tensor *input, } } -int csi_ref_avgpool2d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params) +int csi_ref_avgpool2d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_avgpool2d_f32); } - diff --git a/source/reference/averagepool3d.c b/source/reference/averagepool3d.c index 7e23d26f..e7d879d9 100644 --- a/source/reference/averagepool3d.c +++ b/source/reference/averagepool3d.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_avgpool3d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params) +int csi_ref_avgpool3d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) { float *input_data = (float *)input->data; float *output_data = (float *)output->data; @@ -36,42 +35,49 @@ int csi_ref_avgpool3d_f32(struct csi_tensor *input, const int out_height = output->dim[3]; const int out_width = output->dim[4]; - for(int in_ch=0; in_chstride_depth) - params->pad_front; const int in_h_origin = (out_h * params->stride_height) - params->pad_top; const int in_w_origin = (out_w * params->stride_width) - params->pad_left; const int filter_d_begin = csi_ref_max_internal_s32(0, -in_d_origin); - const int filter_d_end = csi_ref_min_internal_s32(params->filter_depth, in_depth - in_d_origin); + const int filter_d_end = + csi_ref_min_internal_s32(params->filter_depth, in_depth - in_d_origin); const int filter_h_begin = csi_ref_max_internal_s32(0, -in_h_origin); - const int filter_h_end = csi_ref_min_internal_s32(params->filter_height, in_height - in_h_origin); + const int filter_h_end = csi_ref_min_internal_s32(params->filter_height, + in_height - in_h_origin); const int filter_w_begin = csi_ref_max_internal_s32(0, -in_w_origin); - const int filter_w_end = csi_ref_min_internal_s32(params->filter_width, in_width - in_w_origin); + const int filter_w_end = + csi_ref_min_internal_s32(params->filter_width, in_width - in_w_origin); float total = 0.0f; int filter_cnt = 0; - for(int filter_d=filter_d_begin; filter_ddim, in_ch, out_ch, in_d, in_h, in_w)]; + total += input_data[csi_ref_get_index_5( + input->dim, in_ch, out_ch, in_d, in_h, in_w)]; filter_cnt++; } } } - if(params->count_include_pad) { - filter_cnt = (params->filter_depth) * (params->filter_height) * (params->filter_width); + if (params->count_include_pad) { + filter_cnt = (params->filter_depth) * (params->filter_height) * + (params->filter_width); } // float average = filter_cnt==0 ? total : total/filter_cnt; - float average = total/filter_cnt; - output_data[csi_ref_get_index_5(output->dim, in_ch, out_ch, out_d, out_h, out_w)] = average; + float average = total / filter_cnt; + output_data[csi_ref_get_index_5(output->dim, in_ch, out_ch, out_d, out_h, + out_w)] = average; } } } @@ -81,9 +87,8 @@ int csi_ref_avgpool3d_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_avgpool3d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params) +int csi_ref_avgpool3d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_avgpool3d_f32); } diff --git a/source/reference/batch_normalization.c b/source/reference/batch_normalization.c index 95bae686..7fcd63ed 100644 --- a/source/reference/batch_normalization.c +++ b/source/reference/batch_normalization.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,29 +16,27 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -/* https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/ops/nn_impl.py#L1474-L1542 */ -int csi_ref_batch_normalization_f32(struct csi_tensor *input, - struct csi_tensor *mean, - struct csi_tensor *variance, - struct csi_tensor *gamma, - struct csi_tensor *beta, - struct csi_tensor *output, +/* https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/ops/nn_impl.py#L1474-L1542 + */ +int csi_ref_batch_normalization_f32(struct csi_tensor *input, struct csi_tensor *mean, + struct csi_tensor *variance, struct csi_tensor *gamma, + struct csi_tensor *beta, struct csi_tensor *output, struct bn_params *params) { float *input_data = input->data; - float *mean_data = mean->data; - float *var_data = variance->data; - float *beta_data = beta->data; + float *mean_data = mean->data; + float *var_data = variance->data; + float *beta_data = beta->data; float *output_data = output->data; const int dims_count = input->dim_count; int batches = 1; /* compute the outer size */ - for(int i = 0; i < dims_count - 1; i++ ){ + for (int i = 0; i < dims_count - 1; i++) { batches *= input->dim[i]; } @@ -47,14 +45,14 @@ int csi_ref_batch_normalization_f32(struct csi_tensor *input, for (int b = 0; b < batches; ++b) { for (int c = 0; c < input->dim[dims_count - 1]; ++c) { float intput_val = input_data[b * batch_offset + c]; - float mean_val = mean_data[c]; - float var_val = var_data[c]; - float beta_val = beta_data[c]; - float result = 1/sqrt(var_val + params->epsilon); + float mean_val = mean_data[c]; + float var_val = var_data[c]; + float beta_val = beta_data[c]; + float result = 1 / sqrt(var_val + params->epsilon); result *= (intput_val - mean_val); if (gamma != NULL) { - float *gamma_data = gamma->data; - result *= gamma_data[c]; + float *gamma_data = gamma->data; + result *= gamma_data[c]; } result += beta_val; output_data[b * batch_offset + c] = result; @@ -64,13 +62,9 @@ int csi_ref_batch_normalization_f32(struct csi_tensor *input, return CSINN_TRUE; } - -int csi_ref_batch_normalization_quant(struct csi_tensor *input, - struct csi_tensor *mean, - struct csi_tensor *variance, - struct csi_tensor *gamma, - struct csi_tensor *beta, - struct csi_tensor *output, +int csi_ref_batch_normalization_quant(struct csi_tensor *input, struct csi_tensor *mean, + struct csi_tensor *variance, struct csi_tensor *gamma, + struct csi_tensor *beta, struct csi_tensor *output, struct bn_params *params) { int ret; diff --git a/source/reference/batch_to_space.c b/source/reference/batch_to_space.c index b169b64b..aded7dcf 100644 --- a/source/reference/batch_to_space.c +++ b/source/reference/batch_to_space.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -//the input->data is a 4-D Tensor with shape [batch, depth, height, width]. -int csi_ref_batch_to_space_f32(struct csi_tensor *input, - struct csi_tensor *output, +// the input->data is a 4-D Tensor with shape [batch, depth, height, width]. +int csi_ref_batch_to_space_f32(struct csi_tensor *input, struct csi_tensor *output, struct batch_to_space_params *params) { float *input_data = (float *)input->data; @@ -36,28 +35,32 @@ int csi_ref_batch_to_space_f32(struct csi_tensor *input, int block_size = params->block_size; int block_size2 = block_size * block_size; - int out_batch = output->dim[0]; //out_batch = batch / block_size2; - int out_channel = output->dim[1]; //out_channel = in_channel; - int out_height = output->dim[2]; //out_height = in_height * block_size - params->crop_top - params->crop_bottom; - int out_width = output->dim[3]; //out_width = in_width * block_size - params->crop_left - params->crop_right; - - for(int out_b = 0; out_b < out_batch; ++out_b) { - for(int in_h = 0; in_h< in_height; ++in_h) { - for(int in_w = 0; in_w < in_width; ++in_w) { - for(int out_c=0; out_cdim[0]; // out_batch = batch / block_size2; + int out_channel = output->dim[1]; // out_channel = in_channel; + int out_height = output->dim[2]; // out_height = in_height * block_size - params->crop_top - + // params->crop_bottom; + int out_width = output->dim[3]; // out_width = in_width * block_size - params->crop_left - + // params->crop_right; + for (int out_b = 0; out_b < out_batch; ++out_b) { + for (int in_h = 0; in_h < in_height; ++in_h) { + for (int in_w = 0; in_w < in_width; ++in_w) { + for (int out_c = 0; out_c < out_channel; ++out_c) { float *temp = (float *)csi_mem_alloc(block_size2 * sizeof(float)); int in_start_addr = csi_ref_get_index(input->dim, out_b, out_c, in_h, in_w); - for(int i = 0; i < block_size2; ++i) { - temp[i] = input_data[in_start_addr + i * out_batch * out_channel * in_height * in_width]; + for (int i = 0; i < block_size2; ++i) { + temp[i] = input_data[in_start_addr + + i * out_batch * out_channel * in_height * in_width]; } - for(int h = 0; h < block_size; ++h) { - for(int w = 0; w < block_size; ++w) { + for (int h = 0; h < block_size; ++h) { + for (int w = 0; w < block_size; ++w) { int h_now = in_h * block_size + h - params->crop_top; int w_now = in_w * block_size + w - params->crop_left; - if(h_now >= 0 && h_now < out_height && w_now >= 0 && w_now < out_width) { - int out_addr = csi_ref_get_index(output->dim, out_b, out_c, h_now, w_now); + if (h_now >= 0 && h_now < out_height && w_now >= 0 && + w_now < out_width) { + int out_addr = + csi_ref_get_index(output->dim, out_b, out_c, h_now, w_now); output_data[out_addr] = temp[h * block_size + w]; } } @@ -70,10 +73,8 @@ int csi_ref_batch_to_space_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_batch_to_space_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_batch_to_space_quant(struct csi_tensor *input, struct csi_tensor *output, struct batch_to_space_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_batch_to_space_f32); } - diff --git a/source/reference/broadcast_to.c b/source/reference/broadcast_to.c index 9bc1244e..d0d29fa6 100644 --- a/source/reference/broadcast_to.c +++ b/source/reference/broadcast_to.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" diff --git a/source/reference/cache_conv1d.c b/source/reference/cache_conv1d.c new file mode 100644 index 00000000..d1062676 --- /dev/null +++ b/source/reference/cache_conv1d.c @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_internal.h" +#include "csi_ref.h" + +int csi_ref_cache_conv1d_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_conv1d_params *params) +{ + size_t data_size = + output->dim[0] * output->dim[1] * output->dim[2] * sizeof(float); // 512*13*2 + asr_buffer_init(¶ms->asr_buffer, 2 * data_size, data_size); + + if (input->dtype == CSINN_DTYPE_FLOAT32) { + params->base.bc = csi_ref_cache_conv1d_f32; + } else { + params->base.bc = csi_ref_cache_conv1d_quant; + } + + return CSINN_TRUE; +} + +int csi_ref_cache_conv1d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_conv1d_params *params) +{ + float *input_data = input->data; + float *output_data = output->data; + float *weights_data = weight->data; + float *bias_data = bias->data; + const int weights_dims_count = weight->dim_count; + const int output_depth = weight->dim[weights_dims_count - 3]; + const int accum_depth = weight->dim[weights_dims_count - 2]; + const int batches = input->dim[1]; + for (int b = 0; b < batches; ++b) { + for (int out_c = 0; out_c < output_depth; ++out_c) { + float total = 0.f; + for (int d = 0; d < accum_depth; ++d) { + total += input_data[b * accum_depth + d] * weights_data[out_c * accum_depth + d]; + } + float bias_value = 0.0f; + if (bias->dim_count != 0) { + bias_value = bias_data[out_c]; + } + output_data[out_c + output_depth * b] = total + bias_value; + } + } + size_t insert_lenth = output->dim[1] * input->dim[1]; + float *output_from_buffer; + output_from_buffer = + asr_buffer_insert_back(¶ms->asr_buffer, output_data, insert_lenth * sizeof(float)); + size_t output_lenth = output->dim[0] * output->dim[1] * output->dim[2]; + int32_t *shape = output->dim; + for (int i = 0; i < shape[2]; i++) { + int j = 0; + for (; j < shape[1]; j++) { + int out_pos = j * shape[2] + i; + output_data[out_pos] = output_from_buffer[i * shape[1] + j]; + } + } +} + +int csi_ref_cache_conv1d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_conv1d_params *params) +{ + struct csi_tensor *float_input = csi_ref_tensor_transform_f32(input); + struct csi_tensor *float_output = csi_ref_tensor_transform_f32(output); + struct csi_tensor *float_weight = csi_ref_tensor_transform_f32(weight); + struct csi_tensor *float_bias = csi_ref_tensor_transform_f32(bias); + + int ret = csi_ref_cache_conv1d_f32(float_input, float_output, float_weight, float_bias, params); + + csi_tensor_data_convert(output, float_output); + + csi_ref_tensor_transform_free_f32(float_input); + csi_ref_tensor_transform_free_f32(float_output); + csi_ref_tensor_transform_free_f32(float_weight); + csi_ref_tensor_transform_free_f32(float_bias); + + return CSINN_TRUE; +} \ No newline at end of file diff --git a/source/reference/cache_matmul.c b/source/reference/cache_matmul.c new file mode 100644 index 00000000..1189f9af --- /dev/null +++ b/source/reference/cache_matmul.c @@ -0,0 +1,207 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_internal.h" +#include "csi_ref.h" + +// asr data buffer +void asr_buffer_init(struct asr_buffer_t *buffer, size_t buffer_size, size_t data_lenth) +{ + buffer->buffer = csi_mem_alloc(buffer_size); + buffer->buffer_lenth = buffer_size; + buffer->data_lenth = data_lenth; + buffer->writer_index = buffer_size - data_lenth; + buffer->flag = 0; //用来记录有没有经过位置0.有的话置为1. +} + +// insert front +void *asr_buffer_insert_front(struct asr_buffer_t *buffer, void *input, size_t len) +{ + int start_position = buffer->writer_index - len; + uint8_t *p = NULL; + if (buffer->flag == 0) { + if (start_position < 0) { + buffer->flag = 1; + } + } + if (start_position >= 0) { + p = &buffer->buffer[start_position]; + memcpy(p, input, len); + buffer->writer_index = start_position; + if (buffer->flag == 0) { + return (void *)&buffer->buffer[0]; + } else { + return (void *)p; + } + } else { + start_position = buffer->buffer_lenth - buffer->data_lenth; + p = &buffer->buffer[start_position]; + memcpy(p, input, len); + memcpy(p + len, &buffer->buffer[buffer->writer_index], buffer->data_lenth - len); + buffer->writer_index = start_position; + return (void *)p; + } +} + +void *asr_buffer_insert_back(struct asr_buffer_t *buffer, void *input, size_t len) +{ + int end_position = buffer->writer_index + len; + uint8_t *p = NULL; + if (end_position <= buffer->buffer_lenth) { + p = &buffer->buffer[buffer->writer_index]; + memcpy(p, input, len); + buffer->writer_index += len; + p -= (buffer->data_lenth - len); + } else { + p = &buffer->buffer[buffer->writer_index + len - buffer->data_lenth]; + memcpy(&buffer->buffer[0], p, buffer->data_lenth - len); + buffer->writer_index = buffer->data_lenth; + memcpy(&buffer->buffer[buffer->data_lenth - len], input, len); + p = &buffer->buffer[0]; + } + return (void *)p; +} + +// get buffer +void *asr_buffer_get_buffer(struct asr_buffer_t *buffer) +{ + return asr_buffer_insert_back(buffer, NULL, 0); +} + +// reset buffer +void asr_buffer_reset(struct asr_buffer_t *buffer) +{ + free(buffer->buffer); + buffer->writer_index = 0; + buffer->buffer = NULL; + buffer->buffer_lenth = 0; + buffer->data_lenth = 0; + buffer->flag = 0; +} + +int csi_ref_cache_matmul_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_matmul_params *params) +{ + size_t data_size = + params->shape[0] * params->shape[1] * params->shape[2] * params->shape[3] * sizeof(float); + asr_buffer_init(¶ms->asr_buffer, 2 * data_size, data_size); + + int accum_depth = weight->dim[0]; + int output_depth = weight->dim[1]; + + if (input->dtype == CSINN_DTYPE_FLOAT32) { + params->base.bc = csi_ref_cache_matmul_f32; + } else { + params->base.bc = csi_ref_cache_matmul_quant; + } + + return CSINN_TRUE; +} + +int csi_ref_cache_matmul_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_matmul_params *params) +{ + int accum_depth = weight->dim[0]; + int output_depth = weight->dim[1]; + int batches = input->dim[1]; + float *input_data = input->data; + float *output_data = output->data; + float *weight_data = weight->data; + float *bias_data = bias->data; + + for (int b = 0; b < batches; ++b) { + for (int out_c = 0; out_c < output_depth; ++out_c) { + float total = 0.f; + for (int d = 0; d < accum_depth; ++d) { + total += input_data[b * accum_depth + d] * weight_data[out_c * accum_depth + d]; + } + float bias_value = 0.0f; + + bias_value = bias_data[out_c]; + + int out_pos = out_c + b * output_depth; //如果无transpose + output_data[out_pos] = total + bias_value; + } + } + + float judge = + bias_data[0] + bias_data[1] + bias_data[2] + bias_data[3] + bias_data[4] + bias_data[5]; + size_t insert_lenth = output_depth * batches; + float *output_from_buffer; + if (fabs(judge) < 0.01) { + output_from_buffer = + asr_buffer_insert_front(¶ms->asr_buffer, output_data, insert_lenth * sizeof(float)); + } else { + output_from_buffer = + asr_buffer_insert_back(¶ms->asr_buffer, output_data, insert_lenth * sizeof(float)); + } + // deal with reshape & transpose + int32_t *shape = output->dim; + + // transpose can only be 0,2,3,1 or 0,2,1,3 + if (params->axes[2] == 3) // 0,2,3,1 + { + int batch = shape[3]; + int shape3 = shape[2]; + int flatten_shape = shape[1] * shape[2]; + for (int i = 0; i < batch; i++) { + for (int j = 0; j < flatten_shape; j++) { + int out_pos = j * batch + i; + output_data[out_pos] = output_from_buffer[i * flatten_shape + j]; + } + } + } else // 0,2,1,3 + { + int batch = shape[2]; + int shape3 = shape[3]; + int flatten_shape = shape[1] * shape[3]; + for (int i = 0; i < batch; i++) { + for (int j = 0; j < flatten_shape; j++) { + int out_pos = i * shape3 + j % shape3 + batch * shape3 * (j / shape3); + output_data[out_pos] = output_from_buffer[i * flatten_shape + j]; + } + } + } + + return CSINN_TRUE; +} + +int csi_ref_cache_matmul_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_matmul_params *params) +{ + struct csi_tensor *float_input = csi_ref_tensor_transform_f32(input); + struct csi_tensor *float_output = csi_ref_tensor_transform_f32(output); + struct csi_tensor *float_weight = csi_ref_tensor_transform_f32(weight); + struct csi_tensor *float_bias = csi_ref_tensor_transform_f32(bias); + + int ret = csi_ref_cache_matmul_f32(float_input, float_output, float_weight, float_bias, params); + + csi_tensor_data_convert(output, float_output); + + csi_ref_tensor_transform_free_f32(float_input); + csi_ref_tensor_transform_free_f32(float_output); + csi_ref_tensor_transform_free_f32(float_weight); + csi_ref_tensor_transform_free_f32(float_bias); + + return CSINN_TRUE; +} \ No newline at end of file diff --git a/source/reference/ceil.c b/source/reference/ceil.c index dc93a086..d77f4950 100644 --- a/source/reference/ceil.c +++ b/source/reference/ceil.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,12 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_ceil_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_ceil_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = input->data; @@ -38,8 +37,7 @@ int csi_ref_ceil_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_ceil_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_ceil_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_ceil_f32); diff --git a/source/reference/clip.c b/source/reference/clip.c index 74d1ffbd..2db4c3aa 100644 --- a/source/reference/clip.c +++ b/source/reference/clip.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,12 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_clip_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_clip_f32(struct csi_tensor *input, struct csi_tensor *output, struct clip_params *params) { float *input_data = (float *)input->data; @@ -32,9 +31,9 @@ int csi_ref_clip_f32(struct csi_tensor *input, } for (int i = 0; i < size; i++) { - if(input_data[i] < params->min_value) { + if (input_data[i] < params->min_value) { output_data[i] = params->min_value; - } else if(input_data[i] > params->max_value) { + } else if (input_data[i] > params->max_value) { output_data[i] = params->max_value; } else { output_data[i] = input_data[i]; @@ -43,8 +42,7 @@ int csi_ref_clip_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_clip_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_clip_quant(struct csi_tensor *input, struct csi_tensor *output, struct clip_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_clip_f32); diff --git a/source/reference/col2im.c b/source/reference/col2im.c index 12835e68..7a394509 100644 --- a/source/reference/col2im.c +++ b/source/reference/col2im.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_col2im_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct col2im_params *params) +int csi_ref_col2im_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct col2im_params *params) { int32_t height = input->dim[1]; int32_t width = input->dim[2]; diff --git a/source/reference/concat.c b/source/reference/concat.c index 1bbea41d..619b32a0 100644 --- a/source/reference/concat.c +++ b/source/reference/concat.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,12 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_concat_f32(struct csi_tensor **input, - struct csi_tensor *output, +int csi_ref_concat_f32(struct csi_tensor **input, struct csi_tensor *output, struct concat_params *params) { int64_t outer_size = 1; @@ -48,11 +47,10 @@ int csi_ref_concat_f32(struct csi_tensor **input, return CSINN_TRUE; } -int csi_ref_concat_quant(struct csi_tensor **input, - struct csi_tensor *output, +int csi_ref_concat_quant(struct csi_tensor **input, struct csi_tensor *output, struct concat_params *params) { - if (params->axis == -1){ + if (params->axis == -1) { params->axis = input[0]->dim_count - 1; } diff --git a/source/reference/conv_avx.c b/source/reference/conv_avx.h similarity index 51% rename from source/reference/conv_avx.c rename to source/reference/conv_avx.h index ceb8145d..622a16b8 100644 --- a/source/reference/conv_avx.c +++ b/source/reference/conv_avx.h @@ -1,13 +1,11 @@ #include -static float *channel(struct csi_tensor *t, int64_t c) +static float* channel(struct csi_tensor* t, int64_t c) { - return (float *)t->data + c * t->dim[2] * t->dim[3]; + return (float*)t->data + c * t->dim[2] * t->dim[3]; } -static void conv_trans_kernel_avx(struct csi_tensor *o_kernel, - struct csi_tensor *t_kernel) +static void conv_trans_kernel_avx(struct csi_tensor* o_kernel, struct csi_tensor* t_kernel) { - float* kernel = o_kernel->data; float* ret; @@ -17,12 +15,12 @@ static void conv_trans_kernel_avx(struct csi_tensor *o_kernel, int64_t inch = o_kernel->dim[1]; int64_t kernel_size = o_kernel->dim[2] * o_kernel->dim[3]; t_kernel->dim[0] = 0; - t_kernel->dim[1] = outch/8 + (outch%8)/4 + outch%4; + t_kernel->dim[1] = outch / 8 + (outch % 8) / 4 + outch % 4; t_kernel->dim[2] = o_kernel->dim[1]; t_kernel->dim[3] = o_kernel->dim[2] * o_kernel->dim[3] * 8; - ret = csi_mem_alloc(8 * kernel_size * inch * (outch/8 + (outch%8)/4 + outch%4) * - sizeof(float)); + ret = csi_mem_alloc(8 * kernel_size * inch * (outch / 8 + (outch % 8) / 4 + outch % 4) * + sizeof(float)); t_kernel->data = ret; int64_t nn_outch = 0; @@ -31,23 +29,21 @@ static void conv_trans_kernel_avx(struct csi_tensor *o_kernel, nn_outch = outch >> 3; remain_outch_start = nn_outch << 3; - for (int64_t pp=0; pp> 2; - for (int64_t pp=0; ppdim[3]; int64_t inch = input->dim[1]; @@ -132,30 +124,25 @@ static void conv_im2col_sgemm_avx(struct csi_tensor *input, struct csi_tensor *o } // im2col - struct csi_tensor *bottom_im2col = csi_alloc_tensor(NULL); + struct csi_tensor* bottom_im2col = csi_alloc_tensor(NULL); csi_tensor_copy(bottom_im2col, input); - bottom_im2col->data = csi_mem_alloc(outw*outh * kernel_h*kernel_w*inch * sizeof(float)); + bottom_im2col->data = csi_mem_alloc(outw * outh * kernel_h * kernel_w * inch * sizeof(float)); bottom_im2col->dim[0] = 0; bottom_im2col->dim[1] = 0; - bottom_im2col->dim[2] = kernel_h*kernel_w*inch; - bottom_im2col->dim[3] = outw*outh; + bottom_im2col->dim[2] = kernel_h * kernel_w * inch; + bottom_im2col->dim[3] = outw * outh; { - const int64_t stride = kernel_h*kernel_w*outw*outh; + const int64_t stride = kernel_h * kernel_w * outw * outh; float* ret = (float*)bottom_im2col->data; - #pragma omp parallel for num_threads(8) - for (int64_t p=0; pdata = csi_mem_alloc(8*kernel_size * inch * - (out_size/8 + out_size%8) * 4); + bottom_tm->data = csi_mem_alloc(8 * kernel_size * inch * (out_size / 8 + out_size % 8) * 4); bottom_tm->dim[0] = 0; - bottom_tm->dim[1] = out_size/8 + out_size%8; + bottom_tm->dim[1] = out_size / 8 + out_size % 8; bottom_tm->dim[2] = inch; - bottom_tm->dim[3] = 8*kernel_size; + bottom_tm->dim[3] = 8 * kernel_size; { int64_t nn_size = out_size >> 3; int64_t remain_size_start = nn_size << 3; - #pragma omp parallel for num_threads(8) - for (int64_t ii=0; ii> 3; remain_outch_start = nn_outch << 3; - #pragma omp parallel for num_threads(8) - for (int64_t pp=0; pp> 2; - #pragma omp parallel for num_threads(8) - for (int64_t pp=0; ppdata); csi_mem_free(bottom_im2col); } - diff --git a/source/reference/convolution.c b/source/reference/convolution.c index f7399c1b..03f0471e 100644 --- a/source/reference/convolution.c +++ b/source/reference/convolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,25 +16,25 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #ifdef CSI_AVX_OPT -#include "conv_avx.c" +#include "conv_avx.h" #endif -/* reference https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/kernels/internal/reference/conv.h */ +/* reference + * https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/kernels/internal/reference/conv.h + */ -static int csi_ref_conv2d_nhwc_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +static int csi_ref_conv2d_nhwc_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { - float *input_data = input->data; + float *input_data = input->data; float *output_data = output->data; float *kernel_data = kernel->data; - float *bias_data = bias->data; + float *bias_data = bias->data; const int32_t dilation_width_factor = params->dilation_width; const int32_t dilation_height_factor = params->dilation_height; @@ -59,13 +59,14 @@ static int csi_ref_conv2d_nhwc_f32(struct csi_tensor *input, for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) { for (int32_t in_channel = 0; in_channel < input_depth; ++in_channel) { const int32_t in_x = in_x_origin + dilation_width_factor * filter_x; - const int32_t in_y = in_y_origin + dilation_height_factor * filter_y; + const int32_t in_y = + in_y_origin + dilation_height_factor * filter_y; // If the location is outside the bounds of the input image, // use zero as a default value. if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height)) { - int32_t input_index = - csi_ref_get_index(input->dim, batch, in_y, in_x, in_channel); + int32_t input_index = csi_ref_get_index(input->dim, batch, in_y, + in_x, in_channel); float input_val = input_data[input_index]; int32_t filter_index = csi_ref_get_index( kernel->dim, out_channel, filter_y, filter_x, in_channel); @@ -79,7 +80,8 @@ static int csi_ref_conv2d_nhwc_f32(struct csi_tensor *input, if (bias_data && bias->dim_count != 0) { bias_value = bias_data[out_channel]; } - output_data[csi_ref_get_index(output->dim, batch, out_y, out_x, out_channel)] = acc + bias_value; + output_data[csi_ref_get_index(output->dim, batch, out_y, out_x, out_channel)] = + acc + bias_value; } } } @@ -88,10 +90,8 @@ static int csi_ref_conv2d_nhwc_f32(struct csi_tensor *input, return CSINN_TRUE; } -static int csi_ref_conv2d_nchw_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +static int csi_ref_conv2d_nchw_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { #ifdef CSI_AVX_OPT @@ -101,8 +101,8 @@ static int csi_ref_conv2d_nchw_f32(struct csi_tensor *input, int32_t pad_a[4] = {0, 0, params->pad_down, params->pad_right}; t_input->dim[2] = input->dim[2] + params->pad_top + params->pad_down; t_input->dim[3] = input->dim[3] + params->pad_left + params->pad_right; - t_input->data = csi_mem_alloc(t_input->dim[0] * t_input->dim[1] * - t_input->dim[2] * t_input->dim[3] * 4); + t_input->data = + csi_mem_alloc(t_input->dim[0] * t_input->dim[1] * t_input->dim[2] * t_input->dim[3] * 4); struct pad_params pparams; pparams.base.layout = CSINN_LAYOUT_NCHW; pparams.base.api = CSINN_REF; @@ -118,18 +118,17 @@ static int csi_ref_conv2d_nchw_f32(struct csi_tensor *input, struct csi_tensor *t_kernel = csi_alloc_tensor(NULL); conv_trans_kernel_avx(kernel, t_kernel); - conv_im2col_sgemm_avx(t_input, output, t_kernel, bias, - kernel->dim[3], kernel->dim[2], + conv_im2col_sgemm_avx(t_input, output, t_kernel, bias, kernel->dim[3], kernel->dim[2], params->stride_width, params->stride_height); csi_mem_free(t_input->data); csi_mem_free(t_kernel->data); #else - struct csi_tensor* t_input; - struct csi_tensor* t_output; - struct csi_tensor* t_kernel; - struct csi_tensor* t_bias = bias; - t_input = csi_ref_nchw_to_nhwc_f32(input); + struct csi_tensor *t_input; + struct csi_tensor *t_output; + struct csi_tensor *t_kernel; + struct csi_tensor *t_bias = bias; + t_input = csi_ref_nchw_to_nhwc_f32(input); t_kernel = csi_ref_nchw_to_nhwc_f32(kernel); t_output = csi_ref_nchw_to_nhwc_f32(output); csi_ref_conv2d_nhwc_f32(t_input, t_output, t_kernel, t_bias, params); @@ -143,10 +142,8 @@ static int csi_ref_conv2d_nchw_f32(struct csi_tensor *input, return CSINN_TRUE; } -static int csi_ref_depthwise_conv2d_nhwc_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +static int csi_ref_depthwise_conv2d_nhwc_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { float *input_data = input->data; @@ -166,7 +163,8 @@ static int csi_ref_depthwise_conv2d_nhwc_f32(struct csi_tensor *input, const int32_t output_width = output->dim[2]; const int32_t depth_multiplier = output_depth / input_depth; - assert(input_depth * depth_multiplier == output_depth); // The input and output channels are equal for dw convolution + assert(input_depth * depth_multiplier == + output_depth); // The input and output channels are equal for dw convolution for (int32_t b = 0; b < batches; ++b) { for (int32_t out_y = 0; out_y < output_height; ++out_y) { @@ -174,19 +172,22 @@ static int csi_ref_depthwise_conv2d_nhwc_f32(struct csi_tensor *input, for (int32_t ic = 0; ic < input_depth; ++ic) { for (int32_t m = 0; m < depth_multiplier; m++) { const int32_t oc = m + ic * depth_multiplier; - const int32_t in_x_origin = (out_x * params->stride_width) - params->pad_left; - const int32_t in_y_origin = (out_y * params->stride_height) - params->pad_top; + const int32_t in_x_origin = + (out_x * params->stride_width) - params->pad_left; + const int32_t in_y_origin = + (out_y * params->stride_height) - params->pad_top; float acc = 0; for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) { for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) { const int32_t in_x = in_x_origin + dilation_width_factor * filter_x; - const int32_t in_y = in_y_origin + dilation_height_factor * filter_y; + const int32_t in_y = + in_y_origin + dilation_height_factor * filter_y; // If the location is outside the bounds of the input image, // use zero as a default value. if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height)) { - float input_val = - input_data[csi_ref_get_index(input->dim, b, in_y, in_x, ic)]; + float input_val = input_data[csi_ref_get_index(input->dim, b, + in_y, in_x, ic)]; float filter_val = kernel_data[csi_ref_get_index( kernel->dim, 0, filter_y, filter_x, oc)]; acc += (filter_val) * (input_val); @@ -205,10 +206,8 @@ static int csi_ref_depthwise_conv2d_nhwc_f32(struct csi_tensor *input, return CSINN_TRUE; } -static int csi_ref_depthwise_conv2d_nchw_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +static int csi_ref_depthwise_conv2d_nchw_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { float *input_data = (float *)input->data; @@ -228,7 +227,8 @@ static int csi_ref_depthwise_conv2d_nchw_f32(struct csi_tensor *input, const int32_t output_height = output->dim[2]; const int32_t output_width = output->dim[3]; const int32_t depth_multiplier = output_depth / input_depth; - assert(input_depth * depth_multiplier == output_depth); // The input and output channels are equal for dw convolution + assert(input_depth * depth_multiplier == + output_depth); // The input and output channels are equal for dw convolution for (int32_t b = 0; b < batches; ++b) { for (int32_t ic = 0; ic < input_depth; ++ic) { @@ -236,19 +236,22 @@ static int csi_ref_depthwise_conv2d_nchw_f32(struct csi_tensor *input, for (int32_t out_x = 0; out_x < output_width; ++out_x) { for (int32_t m = 0; m < depth_multiplier; m++) { const int32_t oc = m + ic * depth_multiplier; - const int32_t in_x_origin = (out_x * params->stride_width) - params->pad_left; - const int32_t in_y_origin = (out_y * params->stride_height) - params->pad_top; + const int32_t in_x_origin = + (out_x * params->stride_width) - params->pad_left; + const int32_t in_y_origin = + (out_y * params->stride_height) - params->pad_top; float acc = 0; for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) { for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) { const int32_t in_x = in_x_origin + dilation_width_factor * filter_x; - const int32_t in_y = in_y_origin + dilation_height_factor * filter_y; + const int32_t in_y = + in_y_origin + dilation_height_factor * filter_y; // If the location is outside the bounds of the input image, // use zero as a default value. if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height)) { - float input_val = - input_data[csi_ref_get_index(input->dim, b, ic, in_y, in_x)]; + float input_val = input_data[csi_ref_get_index(input->dim, b, + ic, in_y, in_x)]; float filter_val = kernel_data[csi_ref_get_index( kernel->dim, oc, 0, filter_y, filter_x)]; acc += (filter_val) * (input_val); @@ -264,13 +267,10 @@ static int csi_ref_depthwise_conv2d_nchw_f32(struct csi_tensor *input, } } } - } -static int csi_ref_group_conv2d_nhwc_f32(struct csi_tensor *o_input, - struct csi_tensor *o_output, - struct csi_tensor *o_kernel, - struct csi_tensor *o_bias, +static int csi_ref_group_conv2d_nhwc_f32(struct csi_tensor *o_input, struct csi_tensor *o_output, + struct csi_tensor *o_kernel, struct csi_tensor *o_bias, struct conv2d_params *params) { struct csi_tensor *input = csi_alloc_tensor(NULL); @@ -307,10 +307,8 @@ static int csi_ref_group_conv2d_nhwc_f32(struct csi_tensor *o_input, return CSINN_TRUE; } -static int csi_ref_group_conv2d_nchw_f32(struct csi_tensor *o_input, - struct csi_tensor *o_output, - struct csi_tensor *o_kernel, - struct csi_tensor *o_bias, +static int csi_ref_group_conv2d_nchw_f32(struct csi_tensor *o_input, struct csi_tensor *o_output, + struct csi_tensor *o_kernel, struct csi_tensor *o_bias, struct conv2d_params *params) { struct csi_tensor *input = csi_alloc_tensor(NULL); @@ -347,10 +345,8 @@ static int csi_ref_group_conv2d_nchw_f32(struct csi_tensor *o_input, return CSINN_TRUE; } -int csi_ref_conv2d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_conv2d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { if (params->base.layout == CSINN_LAYOUT_NHWC) { @@ -362,19 +358,40 @@ int csi_ref_conv2d_f32(struct csi_tensor *input, } } -int csi_ref_conv2d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_conv2d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { - return csi_ref_conv_callback_base(input, output, kernel, bias, params, csi_ref_conv2d_f32); + int ret; + if (params->conv_extra.fuse_zp2bias) { + struct csi_tensor *tmp_bias = csi_ref_tensor_transform_f32(bias); + struct csi_tensor *tmp_kernel = csi_ref_tensor_transform_f32(kernel); + float *tmp_bias_data = tmp_bias->data; + float *tmp_kernel_data = tmp_kernel->data; + + int k_len = kernel->dim[0]; + int k_inner = csi_tensor_size(kernel) / k_len; + float sp = input->qinfo->scale * input->qinfo->zero_point; + for (int i = 0; i < k_len; i++) { + float t_k = 0; + for (int j = 0; j < k_inner; j++) { + int k_idx = i * k_inner + j; + t_k += tmp_kernel_data[k_idx] * sp; + } + tmp_bias_data[i] += t_k; + } + csi_ref_tensor_transform_free_f32(tmp_kernel); + ret = + csi_ref_conv_callback_base(input, output, kernel, tmp_bias, params, csi_ref_conv2d_f32); + csi_ref_tensor_transform_free_f32(tmp_bias); + } else { + ret = csi_ref_conv_callback_base(input, output, kernel, bias, params, csi_ref_conv2d_f32); + } + return ret; } -int csi_ref_depthwise_conv2d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_depthwise_conv2d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { if (params->base.layout == CSINN_LAYOUT_NHWC) { @@ -386,19 +403,54 @@ int csi_ref_depthwise_conv2d_f32(struct csi_tensor *input, } } -int csi_ref_depthwise_conv2d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_depthwise_conv2d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { - return csi_ref_conv_callback_base(input, output, kernel, bias, params, csi_ref_depthwise_conv2d_f32); + int ret; + if (params->conv_extra.fuse_zp2bias) { + struct csi_tensor *tmp_bias = csi_ref_tensor_transform_f32(bias); + struct csi_tensor *tmp_kernel = csi_ref_tensor_transform_f32(kernel); + float *tmp_bias_data = tmp_bias->data; + float *tmp_kernel_data = tmp_kernel->data; + if (params->base.layout == CSINN_LAYOUT_NCHW) { + int k_len = kernel->dim[0]; + int k_inner = csi_tensor_size(kernel) / k_len; + float sp = input->qinfo->scale * input->qinfo->zero_point; + for (int i = 0; i < k_len; i++) { + float t_k = tmp_bias_data[i]; + for (int j = 0; j < k_inner; j++) { + int k_idx = i * k_inner + j; + t_k += tmp_kernel_data[k_idx] * sp; + } + tmp_bias_data[i] = t_k; + } + } else { + int k_len = kernel->dim[3]; + int k_outer = csi_tensor_size(kernel) / k_len; + float sp = input->qinfo->scale * input->qinfo->zero_point; + for (int i = 0; i < k_len; i++) { + float t_k = tmp_bias_data[i]; + for (int j = 0; j < k_outer; j++) { + int k_idx = j * k_len + i; + t_k += tmp_kernel_data[k_idx] * sp; + } + tmp_bias_data[i] = t_k; + } + } + csi_ref_tensor_transform_free_f32(tmp_kernel); + ret = csi_ref_conv_callback_base(input, output, kernel, tmp_bias, params, + csi_ref_depthwise_conv2d_f32); + csi_ref_tensor_transform_free_f32(tmp_bias); + } else { + ret = csi_ref_conv_callback_base(input, output, kernel, bias, params, + csi_ref_depthwise_conv2d_f32); + } + return ret; } -int csi_ref_group_conv2d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_group_conv2d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { if (params->base.layout == CSINN_LAYOUT_NHWC) { @@ -410,12 +462,36 @@ int csi_ref_group_conv2d_f32(struct csi_tensor *input, } } -int csi_ref_group_conv2d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_group_conv2d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { - return csi_ref_conv_callback_base(input, output, kernel, bias, params, csi_ref_group_conv2d_f32); -} + int ret; + if (params->conv_extra.fuse_zp2bias) { + struct csi_tensor *tmp_bias = csi_ref_tensor_transform_f32(bias); + struct csi_tensor *tmp_kernel = csi_ref_tensor_transform_f32(kernel); + float *tmp_bias_data = tmp_bias->data; + float *tmp_kernel_data = tmp_kernel->data; + + int k_len = kernel->dim[0]; + int k_inner = csi_tensor_size(kernel) / k_len; + float sp = input->qinfo->scale * input->qinfo->zero_point; + for (int i = 0; i < k_len; i++) { + float t_k = 0; + for (int j = 0; j < k_inner; j++) { + int k_idx = i * k_inner + j; + t_k += tmp_kernel_data[k_idx] * sp; + } + tmp_bias_data[i] += t_k; + } + csi_ref_tensor_transform_free_f32(tmp_kernel); + ret = csi_ref_conv_callback_base(input, output, kernel, tmp_bias, params, + csi_ref_group_conv2d_f32); + csi_ref_tensor_transform_free_f32(tmp_bias); + } else { + ret = csi_ref_conv_callback_base(input, output, kernel, bias, params, + csi_ref_group_conv2d_f32); + } + return ret; +} diff --git a/source/reference/convolution1d.c b/source/reference/convolution1d.c new file mode 100644 index 00000000..af1a7e40 --- /dev/null +++ b/source/reference/convolution1d.c @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_ref.h" + +int csi_ref_conv1d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv1d_params *params) +{ + struct conv2d_params params_conv2d; + params_conv2d.base = params->base; + params_conv2d.group = params->group; + params_conv2d.stride_height = 1; + params_conv2d.stride_width = params->stride_width; + params_conv2d.pad_top = 0; + params_conv2d.pad_left = params->pad_left; + params_conv2d.pad_right = params->pad_right; + params_conv2d.dilation_height = 1; + params_conv2d.dilation_width = params->dilation_width; + params_conv2d.conv_extra.kernel_tm = NULL; + params_conv2d.conv_extra.conv_mode = 0; + params_conv2d.conv_extra.fuse_zp2bias = 0; + kernel->dim_count = 4; + kernel->dim[3] = 1; + input->dim_count = 4; + input->dim[3] = 1; + output->dim_count = 4; + output->dim[3] = 1; + csi_ref_conv2d_f32(input, output, kernel, bias, ¶ms_conv2d); + + return CSINN_TRUE; +} + +int csi_ref_conv1d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv1d_params *params) +{ + struct conv2d_params params_conv2d; + params_conv2d.base = params->base; + params_conv2d.group = params->group; + params_conv2d.stride_height = 1; + params_conv2d.stride_width = params->stride_width; + params_conv2d.pad_top = 0; + params_conv2d.pad_left = params->pad_left; + params_conv2d.pad_right = params->pad_right; + params_conv2d.dilation_height = 1; + params_conv2d.dilation_width = params->dilation_width; + params_conv2d.conv_extra.kernel_tm = NULL; + params_conv2d.conv_extra.conv_mode = 0; + params_conv2d.conv_extra.fuse_zp2bias = 0; + kernel->dim_count = 4; + kernel->dim[3] = 1; + input->dim_count = 4; + input->dim[3] = 1; + output->dim_count = 4; + output->dim[3] = 1; + csi_ref_conv2d_quant(input, output, kernel, bias, ¶ms_conv2d); + + return CSINN_TRUE; +} diff --git a/source/reference/convolution3d.c b/source/reference/convolution3d.c index b53a1d3b..49d742f8 100644 --- a/source/reference/convolution3d.c +++ b/source/reference/convolution3d.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_conv3d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_conv3d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv3d_params *params) { float *input_data = (float *)input->data; @@ -37,13 +35,13 @@ int csi_ref_conv3d_f32(struct csi_tensor *input, const int32_t in_height = input->dim[3]; const int32_t in_width = input->dim[4]; - //const int filter_outchannel = kernel->dim[0]; - //const int filter_inchannel = kernel->dim[1]; + // const int filter_outchannel = kernel->dim[0]; + // const int filter_inchannel = kernel->dim[1]; const int32_t filter_depth = kernel->dim[2]; const int32_t filter_height = kernel->dim[3]; const int32_t filter_width = kernel->dim[4]; - //int output_batch = output->dim[0]; + // int output_batch = output->dim[0]; const int32_t output_channel = output->dim[1]; const int32_t output_depth = output->dim[2]; const int32_t output_height = output->dim[3]; @@ -53,31 +51,38 @@ int csi_ref_conv3d_f32(struct csi_tensor *input, const int32_t dilation_height = params->dilation_height; const int32_t dilation_width = params->dilation_width; - for(int32_t out_b=0; out_bstride_depth) - params->pad_front; - const int32_t in_h_origin = (out_h * params->stride_height) - params->pad_top; - const int32_t in_w_origin = (out_w * params->stride_width) - params->pad_left; + for (int32_t out_b = 0; out_b < batch; ++out_b) { + for (int32_t out_ch = 0; out_ch < output_channel; ++out_ch) { + for (int32_t out_d = 0; out_d < output_depth; ++out_d) { + for (int32_t out_h = 0; out_h < output_height; ++out_h) { + for (int32_t out_w = 0; out_w < output_width; ++out_w) { + const int32_t in_d_origin = + (out_d * params->stride_depth) - params->pad_front; + const int32_t in_h_origin = + (out_h * params->stride_height) - params->pad_top; + const int32_t in_w_origin = + (out_w * params->stride_width) - params->pad_left; float acc = 0.0f; - for(int32_t in_ch=0; in_ch=0)&&(in_d=0)&&(in_h=0)&&(in_wdim, out_b, in_ch, in_d, in_h, in_w); + if ((in_d >= 0) && (in_d < in_depth) && (in_h >= 0) && + (in_h < in_height) && (in_w >= 0) && + (in_w < in_width)) { + int32_t input_idx = csi_ref_get_index_5( + input->dim, out_b, in_ch, in_d, in_h, in_w); float input_val = input_data[input_idx]; - int32_t filter_idx = csi_ref_get_index_5(kernel->dim, out_ch, in_ch, filter_d, filter_h, filter_w); + int32_t filter_idx = + csi_ref_get_index_5(kernel->dim, out_ch, in_ch, + filter_d, filter_h, filter_w); float filter_val = kernel_data[filter_idx]; acc += input_val * filter_val; } @@ -86,10 +91,11 @@ int csi_ref_conv3d_f32(struct csi_tensor *input, } } float bias_val = 0.0f; - if(bias_data!=NULL && bias->dim_count != 0) { + if (bias_data != NULL && bias->dim_count != 0) { bias_val = bias_data[out_ch]; } - int32_t output_idx = csi_ref_get_index_5(output->dim, out_b, out_ch, out_d, out_h, out_w); + int32_t output_idx = + csi_ref_get_index_5(output->dim, out_b, out_ch, out_d, out_h, out_w); output_data[output_idx] = acc + bias_val; } } @@ -99,10 +105,8 @@ int csi_ref_conv3d_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_conv3d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_conv3d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv3d_params *params) { return csi_ref_conv_callback_base(input, output, kernel, bias, params, csi_ref_conv3d_f32); diff --git a/source/reference/convolution_channel.c b/source/reference/convolution_channel.c index 6adf4b5f..92fa3912 100644 --- a/source/reference/convolution_channel.c +++ b/source/reference/convolution_channel.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" diff --git a/source/reference/convolution_relu.c b/source/reference/convolution_relu.c index 08357ed8..34d6880c 100644 --- a/source/reference/convolution_relu.c +++ b/source/reference/convolution_relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_conv2d_relu_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_conv2d_relu_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { csi_ref_conv2d_f32(input, output, kernel, bias, params); @@ -35,10 +33,8 @@ int csi_ref_conv2d_relu_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_conv2d_relu_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_conv2d_relu_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { csi_ref_conv2d_quant(input, output, kernel, bias, params); @@ -49,11 +45,8 @@ int csi_ref_conv2d_relu_quant(struct csi_tensor *input, return CSINN_TRUE; } - -int csi_ref_depthwise_conv2d_relu_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_depthwise_conv2d_relu_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { csi_ref_depthwise_conv2d_f32(input, output, kernel, bias, params); @@ -65,10 +58,8 @@ int csi_ref_depthwise_conv2d_relu_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_depthwise_conv2d_relu_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_depthwise_conv2d_relu_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { csi_ref_depthwise_conv2d_quant(input, output, kernel, bias, params); @@ -79,16 +70,14 @@ int csi_ref_depthwise_conv2d_relu_quant(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_group_conv2d_relu_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_group_conv2d_relu_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { csi_ref_group_conv2d_quant(input, output, kernel, bias, params); struct relu_params *rp = csi_mem_alloc(sizeof(struct relu_params)); memcpy(&(rp->base), &(params->base), sizeof(struct csi_params_base)); - csi_relu_init(output,output, rp); + csi_relu_init(output, output, rp); csi_relu(output, output, rp); return CSINN_TRUE; diff --git a/source/reference/convolution_relu6.c b/source/reference/convolution_relu6.c index cebec29e..9a5f2447 100644 --- a/source/reference/convolution_relu6.c +++ b/source/reference/convolution_relu6.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_conv2d_relu6_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_conv2d_relu6_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { csi_ref_conv2d_quant(input, output, kernel, bias, params); @@ -34,10 +32,8 @@ int csi_ref_conv2d_relu6_quant(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_depthwise_conv2d_relu6_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_depthwise_conv2d_relu6_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { csi_ref_depthwise_conv2d_quant(input, output, kernel, bias, params); @@ -48,16 +44,14 @@ int csi_ref_depthwise_conv2d_relu6_quant(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_group_conv2d_relu6_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_group_conv2d_relu6_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { csi_ref_group_conv2d_quant(input, output, kernel, bias, params); struct relu_params *rp = csi_mem_alloc(sizeof(struct relu_params)); memcpy(&(rp->base), &(params->base), sizeof(struct csi_params_base)); - csi_relu6_init(output,output, rp); + csi_relu6_init(output, output, rp); csi_relu6(output, output, rp); return CSINN_TRUE; diff --git a/source/reference/cos.c b/source/reference/cos.c index 55c9f461..01aca588 100644 --- a/source/reference/cos.c +++ b/source/reference/cos.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_cos_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params) +int csi_ref_cos_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = input->data; float *output_data = output->data; @@ -34,9 +32,8 @@ int csi_ref_cos_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_cos_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params) +int csi_ref_cos_quant(struct csi_tensor *input, struct csi_tensor *output, + struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_cos_f32); } diff --git a/source/reference/cosh.c b/source/reference/cosh.c index 8d5c200a..5613cd13 100644 --- a/source/reference/cosh.c +++ b/source/reference/cosh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,12 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_cosh_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_cosh_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = (float *)input->data; @@ -34,8 +33,7 @@ int csi_ref_cosh_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_cosh_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_cosh_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_cosh_f32); diff --git a/source/reference/cumprod.c b/source/reference/cumprod.c index 465282fb..dade805a 100644 --- a/source/reference/cumprod.c +++ b/source/reference/cumprod.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,12 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_cumprod_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_cumprod_f32(struct csi_tensor *input, struct csi_tensor *output, struct cumprod_params *params) { float *input_data = (float *)input->data; @@ -32,21 +31,21 @@ int csi_ref_cumprod_f32(struct csi_tensor *input, // For all input arrays, // FlatSize() = outer_size * inner_size * cnt; int64_t outer_size = 1; - for(int i = 0; i < axis; i++) { + for (int i = 0; i < axis; i++) { outer_size *= input->dim[i]; } int64_t inner_size = 1; - for(int i = axis + 1; i < input->dim_count; i++) { + for (int i = axis + 1; i < input->dim_count; i++) { inner_size *= input->dim[i]; } int cnt = input->dim[axis]; - for(int i = 0; i < outer_size; i++) { - for(int k = 0; k < inner_size; k++) { + for (int i = 0; i < outer_size; i++) { + for (int k = 0; k < inner_size; k++) { float temp = 1.0f; - for(int j = 0; j < cnt; j++) { + for (int j = 0; j < cnt; j++) { temp *= *(input_data + j * inner_size + k); - if(!params->exclusive) { + if (!params->exclusive) { *(output_data + j * inner_size + k) = temp; } else { *(output_data + j * inner_size + k) = temp / *(input_data + j * inner_size + k); @@ -59,8 +58,7 @@ int csi_ref_cumprod_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_cumprod_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_cumprod_quant(struct csi_tensor *input, struct csi_tensor *output, struct cumprod_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_cumprod_f32); diff --git a/source/reference/cumsum.c b/source/reference/cumsum.c index 5d53f9d5..5320fe55 100644 --- a/source/reference/cumsum.c +++ b/source/reference/cumsum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,12 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_cumsum_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_cumsum_f32(struct csi_tensor *input, struct csi_tensor *output, struct cumsum_params *params) { float *input_data = (float *)input->data; @@ -32,21 +31,21 @@ int csi_ref_cumsum_f32(struct csi_tensor *input, // For all input arrays, // FlatSize() = outer_size * inner_size * cnt; int64_t outer_size = 1; - for(int i = 0; i < axis; i++) { + for (int i = 0; i < axis; i++) { outer_size *= input->dim[i]; } int64_t inner_size = 1; - for(int i = axis + 1; i < input->dim_count; i++) { + for (int i = axis + 1; i < input->dim_count; i++) { inner_size *= input->dim[i]; } int cnt = input->dim[axis]; - for(int i = 0; i < outer_size; i++) { - for(int k = 0; k < inner_size; k++) { + for (int i = 0; i < outer_size; i++) { + for (int k = 0; k < inner_size; k++) { float temp = 0.0f; - for(int j = 0; j < cnt; j++) { + for (int j = 0; j < cnt; j++) { temp += *(input_data + j * inner_size + k); - if(!params->exclusive) { + if (!params->exclusive) { *(output_data + j * inner_size + k) = temp; } else { *(output_data + j * inner_size + k) = temp - *(input_data + j * inner_size + k); @@ -59,8 +58,7 @@ int csi_ref_cumsum_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_cumsum_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_cumsum_quant(struct csi_tensor *input, struct csi_tensor *output, struct cumsum_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_cumsum_f32); diff --git a/source/reference/data_convert.c b/source/reference/data_convert.c new file mode 100644 index 00000000..50f091d7 --- /dev/null +++ b/source/reference/data_convert.c @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.11.x */ + +#include "csi_ref.h" + +int csi_ref_data_convert_f32(struct csi_tensor *input, struct csi_tensor *output, + struct siso_params *params) +{ + float *input_data = input->data; + float *output_data = output->data; + int size_byte = csi_tensor_byte_size(input); + + memcpy(output_data, input_data, size_byte); + return CSINN_TRUE; +} + +int csi_ref_data_convert_quant(struct csi_tensor *input, struct csi_tensor *output, + struct siso_params *params) +{ + return csi_ref_siso_callback_base(input, output, params, csi_ref_data_convert_f32); +} diff --git a/source/reference/deconvolution.c b/source/reference/deconvolution.c index d85a5983..23acf0d2 100644 --- a/source/reference/deconvolution.c +++ b/source/reference/deconvolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -static int csi_ref_deconv2d_nhwc_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +static int csi_ref_deconv2d_nhwc_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { float *input_data = input->data; @@ -66,7 +64,7 @@ static int csi_ref_deconv2d_nhwc_f32(struct csi_tensor *input, float filter_value = filter_data[csi_ref_get_index( kernel->dim, out_channel, filter_y, filter_x, in_channel)]; output_data[csi_ref_get_index(output->dim, batch, out_y, out_x, - out_channel)] += + out_channel)] += input_value * filter_value; } } @@ -77,13 +75,13 @@ static int csi_ref_deconv2d_nhwc_f32(struct csi_tensor *input, } } - if (bias->dim_count != 0){ + if (bias->dim_count != 0) { for (int batch = 0; batch < output_batch; batch++) { for (int o_y = 0; o_y < output_height; o_y++) { for (int o_x = 0; o_x < output_width; o_x++) { for (int o_channel = 0; o_channel < output_depth; ++o_channel) { - output_data[csi_ref_get_index(output->dim, batch, o_y, o_x, - o_channel)] += bias_data[o_channel]; + output_data[csi_ref_get_index(output->dim, batch, o_y, o_x, o_channel)] += + bias_data[o_channel]; } } } @@ -93,17 +91,15 @@ static int csi_ref_deconv2d_nhwc_f32(struct csi_tensor *input, return CSINN_TRUE; } -static int csi_ref_deconv2d_nchw_f32(struct csi_tensor *o_input, - struct csi_tensor *o_output, - struct csi_tensor *o_kernel, - struct csi_tensor *o_bias, +static int csi_ref_deconv2d_nchw_f32(struct csi_tensor *o_input, struct csi_tensor *o_output, + struct csi_tensor *o_kernel, struct csi_tensor *o_bias, struct conv2d_params *params) { - struct csi_tensor* input = csi_ref_nchw_to_nhwc_f32(o_input); - struct csi_tensor* output = csi_ref_nchw_to_nhwc_f32(o_output); + struct csi_tensor *input = csi_ref_nchw_to_nhwc_f32(o_input); + struct csi_tensor *output = csi_ref_nchw_to_nhwc_f32(o_output); int32_t permute[4] = {1, 2, 3, 0}; - struct csi_tensor* kernel = csi_ref_deconv_kernel_nchw_to_nhwc_f32(o_kernel, permute); - struct csi_tensor* bias = o_bias; + struct csi_tensor *kernel = csi_ref_deconv_kernel_nchw_to_nhwc_f32(o_kernel, permute); + struct csi_tensor *bias = o_bias; csi_ref_deconv2d_nhwc_f32(input, output, kernel, bias, params); @@ -112,10 +108,8 @@ static int csi_ref_deconv2d_nchw_f32(struct csi_tensor *o_input, return CSINN_TRUE; } -int csi_ref_depthwise_deconv2d_nhwc_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_depthwise_deconv2d_nhwc_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { float *input_data = input->data; @@ -146,33 +140,33 @@ int csi_ref_depthwise_deconv2d_nhwc_f32(struct csi_tensor *input, const int out_y_origin = (in_y * params->stride_height) - params->pad_top; for (int filter_y = 0; filter_y < filter_height; ++filter_y) { for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - // Compute output element location. - const int out_x = out_x_origin + filter_x; - const int out_y = out_y_origin + filter_y; - // We cannot accumulate out of bounds. - if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) && - (out_y < output_height)) { - float input_value = input_data[csi_ref_get_index( - input->dim, batch, in_y, in_x, in_channel)]; - float filter_value = filter_data[csi_ref_get_index( - kernel->dim, 0, filter_y, filter_x, in_channel)]; - output_data[csi_ref_get_index(output->dim, batch, out_y, out_x, - in_channel)] += - input_value * filter_value; - } + // Compute output element location. + const int out_x = out_x_origin + filter_x; + const int out_y = out_y_origin + filter_y; + // We cannot accumulate out of bounds. + if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) && + (out_y < output_height)) { + float input_value = input_data[csi_ref_get_index( + input->dim, batch, in_y, in_x, in_channel)]; + float filter_value = filter_data[csi_ref_get_index( + kernel->dim, 0, filter_y, filter_x, in_channel)]; + output_data[csi_ref_get_index(output->dim, batch, out_y, out_x, + in_channel)] += + input_value * filter_value; + } } } } } } } - if (bias->dim_count != 0){ + if (bias->dim_count != 0) { for (int batch = 0; batch < output_batch; batch++) { for (int o_y = 0; o_y < output_height; o_y++) { for (int o_x = 0; o_x < output_width; o_x++) { for (int o_channel = 0; o_channel < output_depth; ++o_channel) { - output_data[csi_ref_get_index(output->dim, batch, o_y, o_x, - o_channel)] += bias_data[o_channel]; + output_data[csi_ref_get_index(output->dim, batch, o_y, o_x, o_channel)] += + bias_data[o_channel]; } } } @@ -182,17 +176,15 @@ int csi_ref_depthwise_deconv2d_nhwc_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_depthwise_deconv2d_nchw_f32(struct csi_tensor *o_input, - struct csi_tensor *o_output, - struct csi_tensor *o_kernel, - struct csi_tensor *o_bias, +int csi_ref_depthwise_deconv2d_nchw_f32(struct csi_tensor *o_input, struct csi_tensor *o_output, + struct csi_tensor *o_kernel, struct csi_tensor *o_bias, struct conv2d_params *params) { - struct csi_tensor* input = csi_ref_nchw_to_nhwc_f32(o_input); - struct csi_tensor* output = csi_ref_nchw_to_nhwc_f32(o_output); + struct csi_tensor *input = csi_ref_nchw_to_nhwc_f32(o_input); + struct csi_tensor *output = csi_ref_nchw_to_nhwc_f32(o_output); int32_t permute[4] = {1, 2, 3, 0}; - struct csi_tensor* kernel = csi_ref_deconv_kernel_nchw_to_nhwc_f32(o_kernel, permute); - struct csi_tensor* bias = o_bias; + struct csi_tensor *kernel = csi_ref_deconv_kernel_nchw_to_nhwc_f32(o_kernel, permute); + struct csi_tensor *bias = o_bias; csi_ref_depthwise_deconv2d_nhwc_f32(input, output, kernel, bias, params); csi_ref_nhwc_to_nchw_f32(o_output, output); @@ -200,10 +192,8 @@ int csi_ref_depthwise_deconv2d_nchw_f32(struct csi_tensor *o_input, return CSINN_TRUE; } -int csi_ref_depthwise_deconv2d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_depthwise_deconv2d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { if (params->base.layout == CSINN_LAYOUT_NCHW) { @@ -215,20 +205,17 @@ int csi_ref_depthwise_deconv2d_f32(struct csi_tensor *input, } } -int csi_ref_depthwise_deconv2d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_depthwise_deconv2d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { - return csi_ref_conv_callback_base(input, output, kernel, bias, params, csi_ref_depthwise_deconv2d_f32); + return csi_ref_conv_callback_base(input, output, kernel, bias, params, + csi_ref_depthwise_deconv2d_f32); } -int csi_ref_deconv2d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - struct conv2d_params *params) +int csi_ref_deconv2d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) { if (params->base.layout == CSINN_LAYOUT_NCHW) { csi_ref_deconv2d_nchw_f32(input, output, kernel, bias, params); @@ -239,10 +226,8 @@ int csi_ref_deconv2d_f32(struct csi_tensor *input, } } -int csi_ref_deconv2d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_deconv2d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv2d_params *params) { return csi_ref_conv_callback_base(input, output, kernel, bias, params, csi_ref_deconv2d_f32); diff --git a/source/reference/deconvolution3d.c b/source/reference/deconvolution3d.c index 419ddce7..35e5899f 100644 --- a/source/reference/deconvolution3d.c +++ b/source/reference/deconvolution3d.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,17 +16,15 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" // input: NCDHW // kernel: IODHW // output: NODHW -int csi_ref_deconv3d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, +int csi_ref_deconv3d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, struct conv3d_params *params) { float *input_data = (float *)input->data; @@ -40,52 +38,57 @@ int csi_ref_deconv3d_f32(struct csi_tensor *input, const int32_t in_height = input->dim[3]; const int32_t in_width = input->dim[4]; - //const int32_t filter_inchannel = kernel->dim[0]; - //const int32_t filter_outchannel = kernel->dim[1]; + // const int32_t filter_inchannel = kernel->dim[0]; + // const int32_t filter_outchannel = kernel->dim[1]; const int32_t filter_depth = kernel->dim[2]; const int32_t filter_height = kernel->dim[3]; const int32_t filter_width = kernel->dim[4]; - //const int32_t output_batch = output->dim[0]; + // const int32_t output_batch = output->dim[0]; const int32_t output_channel = output->dim[1]; const int32_t output_depth = output->dim[2]; const int32_t output_height = output->dim[3]; const int32_t output_width = output->dim[4]; int num_elements = 1; - for(int i = 0; i < output->dim_count; ++i) { + for (int i = 0; i < output->dim_count; ++i) { num_elements *= output->dim[i]; } // We need to initialize scratch_buffer to all 0s float *scratch_buffer = csi_mem_alloc(num_elements * sizeof(float)); // Loop through input elements one at a time. - for(int out_b=0; out_bstride_depth) - params->pad_front; + const int out_d_origin = (in_d * params->stride_depth) - params->pad_front; const int out_h_origin = (in_h * params->stride_height) - params->pad_top; - const int out_w_origin = (in_w * params->stride_width) - params->pad_left; + const int out_w_origin = (in_w * params->stride_width) - params->pad_left; - for(int out_ch=0; out_ch=0)&&(out_d=0)&&(out_h=0)&&(out_wdim, out_b, in_ch, in_d, in_h, in_w); + if ((out_d >= 0) && (out_d < output_depth) && + (out_h >= 0) && (out_h < output_height) && + (out_w >= 0) && (out_w < output_width)) { + int32_t input_idx = csi_ref_get_index_5( + input->dim, out_b, in_ch, in_d, in_h, in_w); float input_val = input_data[input_idx]; - int32_t filter_idx = csi_ref_get_index_5(kernel->dim, in_ch, out_ch, filter_d, filter_h, filter_w); + int32_t filter_idx = + csi_ref_get_index_5(kernel->dim, in_ch, out_ch, + filter_d, filter_h, filter_w); float filter_val = kernel_data[filter_idx]; - int32_t output_idx = csi_ref_get_index_5(output->dim, out_b, out_ch, out_d, out_h, out_w); + int32_t output_idx = csi_ref_get_index_5( + output->dim, out_b, out_ch, out_d, out_h, out_w); scratch_buffer[output_idx] += input_val * filter_val; } } @@ -98,13 +101,14 @@ int csi_ref_deconv3d_f32(struct csi_tensor *input, } } - if(bias->dim_count != 0) { - for(int out_b=0; out_bdim, out_b, out_ch, out_d, out_h, out_w); + if (bias->dim_count != 0) { + for (int out_b = 0; out_b < batch; ++out_b) { + for (int out_ch = 0; out_ch < output_channel; ++out_ch) { + for (int out_d = 0; out_d < output_depth; ++out_d) { + for (int out_h = 0; out_h < output_height; ++out_h) { + for (int out_w = 0; out_w < output_width; ++out_w) { + int32_t out_idx = csi_ref_get_index_5(output->dim, out_b, out_ch, out_d, + out_h, out_w); scratch_buffer[out_idx] += bias_data[out_ch]; } } @@ -112,17 +116,15 @@ int csi_ref_deconv3d_f32(struct csi_tensor *input, } } } - for (int i=0; idata is a 4-D Tensor with shape [batch, depth, height, width]. -int csi_ref_depth_to_space_nchw_f32(struct csi_tensor *input, - struct csi_tensor *output, +// the input->data is a 4-D Tensor with shape [batch, depth, height, width]. +int csi_ref_depth_to_space_nchw_f32(struct csi_tensor *input, struct csi_tensor *output, struct depth_to_space_params *params) { if (params->mode == CSINN_DEPTHTOSPACE_CRD) return CSINN_FALSE; @@ -36,26 +35,28 @@ int csi_ref_depth_to_space_nchw_f32(struct csi_tensor *input, int block_size = params->block_size; int block_size2 = block_size * block_size; - assert(in_channel%block_size2 == 0); + assert(in_channel % block_size2 == 0); - int out_channel = output->dim[1]; //out_channel = in_channel/block_size2; - int out_height = output->dim[2]; //out_weight = in_weight*block_size; - int out_width = output->dim[3]; //out_width = in_width*block_size; - - for(int out_b=0; out_bdim[1]; // out_channel = in_channel/block_size2; + int out_height = output->dim[2]; // out_weight = in_weight*block_size; + int out_width = output->dim[3]; // out_width = in_width*block_size; + for (int out_b = 0; out_b < batch; ++out_b) { + for (int in_h = 0; in_h < in_height; ++in_h) { + for (int in_w = 0; in_w < in_width; ++in_w) { + for (int out_c = 0; out_c < out_channel; ++out_c) { float *temp = (float *)csi_mem_alloc(block_size2 * sizeof(float)); int in_start_addr = csi_ref_get_index(input->dim, out_b, out_c, in_h, in_w); - for(int i=0; idim, out_b, out_c, in_h*block_size, in_w*block_size); - for(int h=0; hdim, out_b, out_c, + in_h * block_size, in_w * block_size); + for (int h = 0; h < block_size; h++) { + for (int w = 0; w < block_size; w++) { + output_data[out_start_addr + h * out_width + w] = + temp[h * block_size + w]; } } csi_mem_free(temp); @@ -66,11 +67,9 @@ int csi_ref_depth_to_space_nchw_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_depth_to_space_nhwc_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_depth_to_space_nhwc_f32(struct csi_tensor *input, struct csi_tensor *output, struct depth_to_space_params *params) { - struct csi_tensor *t_input = csi_alloc_tensor(NULL); csi_tensor_copy(t_input, input); t_input->layout = CSINN_LAYOUT_NCHW; @@ -113,22 +112,18 @@ int csi_ref_depth_to_space_nhwc_f32(struct csi_tensor *input, return CSINN_TRUE; } - - -int csi_ref_depth_to_space_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_depth_to_space_f32(struct csi_tensor *input, struct csi_tensor *output, struct depth_to_space_params *params) { - if (input->layout == CSINN_LAYOUT_NCHW){ + if (input->layout == CSINN_LAYOUT_NCHW) { return csi_ref_depth_to_space_nchw_f32(input, output, params); - } else if (input->layout == CSINN_LAYOUT_NHWC){ + } else if (input->layout == CSINN_LAYOUT_NHWC) { return csi_ref_depth_to_space_nhwc_f32(input, output, params); } return CSINN_FALSE; } -int csi_ref_depth_to_space_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_depth_to_space_quant(struct csi_tensor *input, struct csi_tensor *output, struct depth_to_space_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_depth_to_space_f32); diff --git a/source/reference/div.c b/source/reference/div.c index babf79c7..cf7ac84b 100644 --- a/source/reference/div.c +++ b/source/reference/div.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,19 +16,16 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -static void element_div_f32(float *src0, float *src1, float *dest, - int input_idx, int output_idx) +static void element_div_f32(float *src0, float *src1, float *dest, int input_idx, int output_idx) { dest[output_idx] = src0[output_idx] / src1[input_idx]; } -int csi_ref_div_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_div_f32(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params) { struct csi_ref_diso_callback cb; @@ -38,10 +35,8 @@ int csi_ref_div_f32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_div_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_div_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { return csi_ref_diso_callback_base(input0, input1, output, params, csi_ref_div_f32); } diff --git a/source/reference/elu.c b/source/reference/elu.c index 10c9c5f8..4b1eadb0 100644 --- a/source/reference/elu.c +++ b/source/reference/elu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,17 +16,13 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -static float elu(float x){ - return x < 0.0 ? exp(x) - 1 : x; -} +static float elu(float x) { return x < 0.0 ? exp(x) - 1 : x; } -int csi_ref_elu_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct relu_params *params) +int csi_ref_elu_f32(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params) { float *input_data = input->data; float *output_data = output->data; @@ -41,8 +37,7 @@ int csi_ref_elu_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_elu_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_elu_quant(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_elu_f32); diff --git a/source/reference/equal.c b/source/reference/equal.c index 8aa9d527..40cfd579 100644 --- a/source/reference/equal.c +++ b/source/reference/equal.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_equal_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_equal_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { float *input0_data = input0->data; float *input1_data = input1->data; @@ -36,10 +34,8 @@ int csi_ref_equal_f32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_equal_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_equal_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { int ret; struct csi_tensor *finput0 = csi_ref_tensor_transform_f32(input0); @@ -47,4 +43,5 @@ int csi_ref_equal_quant(struct csi_tensor *input0, ret = csi_ref_equal_f32(finput0, finput1, output, params); csi_ref_tensor_transform_free_f32(finput0); csi_ref_tensor_transform_free_f32(finput1); - return ret;} + return ret; +} diff --git a/source/reference/erf.c b/source/reference/erf.c index c8505377..cdedfde4 100644 --- a/source/reference/erf.c +++ b/source/reference/erf.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_erf_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params) +int csi_ref_erf_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = (float *)input->data; float *output_data = (float *)output->data; @@ -37,8 +35,7 @@ int csi_ref_erf_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_erf_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_erf_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_erf_f32); diff --git a/source/reference/exp.c b/source/reference/exp.c index 64a9a767..b75eb577 100644 --- a/source/reference/exp.c +++ b/source/reference/exp.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_exp_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params) +int csi_ref_exp_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = input->data; float *output_data = output->data; @@ -37,8 +35,7 @@ int csi_ref_exp_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_exp_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_exp_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_exp_f32); diff --git a/source/reference/expand_dims.c b/source/reference/expand_dims.c index 7cf61392..18c72aa2 100644 --- a/source/reference/expand_dims.c +++ b/source/reference/expand_dims.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,12 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_expand_dims_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_expand_dims_f32(struct csi_tensor *input, struct csi_tensor *output, struct expand_dims_params *params) { float *input_data = (float *)input->data; @@ -36,8 +35,7 @@ int csi_ref_expand_dims_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_expand_dims_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_expand_dims_quant(struct csi_tensor *input, struct csi_tensor *output, struct expand_dims_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_expand_dims_f32); diff --git a/source/reference/expm1.c b/source/reference/expm1.c index b307a2b8..9ffeb4f3 100644 --- a/source/reference/expm1.c +++ b/source/reference/expm1.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,12 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_expm1_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_expm1_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = (float *)input->data; @@ -37,8 +36,7 @@ int csi_ref_expm1_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_expm1_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_expm1_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_expm1_f32); diff --git a/source/reference/flatten.c b/source/reference/flatten.c index 37b2ba24..4df53721 100644 --- a/source/reference/flatten.c +++ b/source/reference/flatten.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,28 +16,26 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_flatten_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_flatten_init(struct csi_tensor *input, struct csi_tensor *output, struct reshape_params *params) { - if (input->quant_channel == output->quant_channel){ + if (input->quant_channel == output->quant_channel) { int quant_size = input->quant_channel * sizeof(struct csi_quant_info); int t = memcmp(input->qinfo, output->qinfo, quant_size); - if (t == 0){ + if (t == 0) { params->base.bc = csi_ref_flatten; return CSINN_TRUE; } } - params->base.bc = csi_ref_flatten_requant; + params->base.bc = csi_ref_flatten_quant; return CSINN_TRUE; } -int csi_ref_flatten(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_flatten(struct csi_tensor *input, struct csi_tensor *output, struct flatten_params *params) { uint8_t *input_data = input->data; @@ -53,9 +51,8 @@ int csi_ref_flatten(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_flatten_requant(struct csi_tensor *input, - struct csi_tensor *output, - struct flatten_params *params) +int csi_ref_flatten_quant(struct csi_tensor *input, struct csi_tensor *output, + struct flatten_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_flatten); } diff --git a/source/reference/floor.c b/source/reference/floor.c index 2b70f76c..736a3efb 100644 --- a/source/reference/floor.c +++ b/source/reference/floor.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,12 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_floor_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_floor_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = input->data; @@ -37,8 +36,7 @@ int csi_ref_floor_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_floor_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_floor_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_floor_f32); diff --git a/source/reference/floor_divide.c b/source/reference/floor_divide.c index 61973487..b139b95e 100644 --- a/source/reference/floor_divide.c +++ b/source/reference/floor_divide.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_floor_divide_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_floor_divide_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { float *input0_data = input0->data; float *input1_data = input1->data; @@ -36,10 +34,8 @@ int csi_ref_floor_divide_f32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_floor_divide_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_floor_divide_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { return csi_ref_diso_callback_base(input0, input1, output, params, csi_ref_floor_divide_f32); } diff --git a/source/reference/floor_mod.c b/source/reference/floor_mod.c index 8abaa84c..b8d9796e 100644 --- a/source/reference/floor_mod.c +++ b/source/reference/floor_mod.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_floor_mod_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_floor_mod_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { float *input0_data = input0->data; float *input1_data = input1->data; @@ -41,10 +39,8 @@ int csi_ref_floor_mod_f32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_floor_mod_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_floor_mod_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { return csi_ref_diso_callback_base(input0, input1, output, params, csi_ref_floor_mod_f32); } diff --git a/source/reference/fsmn.c b/source/reference/fsmn.c index a1ad47d8..52ffe84f 100644 --- a/source/reference/fsmn.c +++ b/source/reference/fsmn.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,22 +16,16 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -#include -static float fsmn(float x){ - return x > 0 ? x : 0; -} +static float fsmn(float x) { return x > 0 ? x : 0; } -int csi_ref_fsmn_f32(struct csi_tensor *frame, - struct csi_tensor *l_filter, - struct csi_tensor *r_filter, - struct csi_tensor *frame_sequence, - struct csi_tensor *frame_counter, - struct csi_tensor *output, +int csi_ref_fsmn_f32(struct csi_tensor *frame, struct csi_tensor *l_filter, + struct csi_tensor *r_filter, struct csi_tensor *frame_sequence, + struct csi_tensor *frame_counter, struct csi_tensor *output, struct fsmn_params *params) { float *last_frame = frame->data; @@ -42,64 +36,59 @@ int csi_ref_fsmn_f32(struct csi_tensor *frame, float *output_data = output->data; int len_order = frame_sequence->dim[0]; - int length = frame_sequence->dim[1]; + int length = frame_sequence->dim[1]; - for (int i = 0; i < length; i++) - output_data[i] = 0.0; + for (int i = 0; i < length; i++) output_data[i] = 0.0; frame_count[0]++; // set last frame to sequence tail. - if(frame_count[0] > params->unavailable_frames){ - for(int i = 0; i < len_order; i++){ - for (int j = 0; j < length; j++){ + if (frame_count[0] > params->unavailable_frames) { + for (int i = 0; i < len_order; i++) { + for (int j = 0; j < length; j++) { int new_index = i * length + j; - if(i == (len_order - 1)){ + if (i == (len_order - 1)) { sequence_frame[new_index] = last_frame[j]; - }else{ + } else { int original_index = (i + 1) * length + j; sequence_frame[new_index] = sequence_frame[original_index]; } } } - } // past frame - for (int k = 0; k < params->l_order; k++){ - for( int l = 0; l < length; l++){ + for (int k = 0; k < params->l_order; k++) { + for (int l = 0; l < length; l++) { int in_index = k * params->l_stride * length + l; int filter_index = (params->l_order - k - 1) * length + l; output_data[l] = past_filter[filter_index] * sequence_frame[in_index] + output_data[l]; - } - } // current frame - for (int m = 0; m < length; m++){ + for (int m = 0; m < length; m++) { int in_index = (params->l_order - 1) * length * params->l_stride + m; output_data[m] = sequence_frame[in_index] + output_data[m]; } // future frame - for(int m = 0; m < params->r_order; m++){ - for(int n = 0; n < length; n++){ - int in_index = m * params->r_stride * length + n + params->l_order * params->l_stride * length; + for (int m = 0; m < params->r_order; m++) { + for (int n = 0; n < length; n++) { + int in_index = + m * params->r_stride * length + n + params->l_order * params->l_stride * length; int filter_index = m * length + n; - output_data[n] = future_filter[filter_index] * sequence_frame[in_index] + output_data[n]; + output_data[n] = + future_filter[filter_index] * sequence_frame[in_index] + output_data[n]; } } return CSINN_TRUE; } -int csi_ref_fsmn_quant(struct csi_tensor *frame, - struct csi_tensor *l_filter, - struct csi_tensor *r_filter, - struct csi_tensor *frame_sequence, - struct csi_tensor *frame_count, - struct csi_tensor *output, - struct fsmn_params *params) +int csi_ref_fsmn_quant(struct csi_tensor *frame, struct csi_tensor *l_filter, + struct csi_tensor *r_filter, struct csi_tensor *frame_sequence, + struct csi_tensor *frame_count, struct csi_tensor *output, + struct fsmn_params *params) { struct csi_tensor *float_frame = csi_ref_tensor_transform_f32(frame); struct csi_tensor *float_l_filter = csi_ref_tensor_transform_f32(l_filter); @@ -107,7 +96,8 @@ int csi_ref_fsmn_quant(struct csi_tensor *frame, struct csi_tensor *float_frame_sequence = csi_ref_tensor_transform_f32(frame_sequence); struct csi_tensor *float_output = csi_ref_tensor_transform_f32(output); - int ret = csi_ref_fsmn_f32(float_frame, float_l_filter, float_r_filter, float_frame_sequence, frame_count, float_output, params); + int ret = csi_ref_fsmn_f32(float_frame, float_l_filter, float_r_filter, float_frame_sequence, + frame_count, float_output, params); csi_tensor_data_convert(output, float_output); csi_tensor_data_convert(frame_sequence, float_frame_sequence); csi_ref_tensor_transform_free_f32(float_frame); diff --git a/source/reference/fullyconnected.c b/source/reference/fullyconnected.c index 17b3666a..e2bec8af 100644 --- a/source/reference/fullyconnected.c +++ b/source/reference/fullyconnected.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_fullyconnected_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_ref_fullyconnected_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params) { float *input_data = input->data; @@ -32,7 +30,11 @@ int csi_ref_fullyconnected_f32(struct csi_tensor *input, float *bias_data = bias->data; const int output_dims_count = output->dim_count; const int weights_dims_count = weights->dim_count; - const int batches = output->dim[0]; + int batches = 1; + /* compute the outer size */ + for (int i = 0; i < output_dims_count - 1; i++) { + batches *= output->dim[i]; + } const int output_depth = weights->dim[weights_dims_count - 2]; const int accum_depth = weights->dim[weights_dims_count - 1]; for (int b = 0; b < batches; ++b) { @@ -51,18 +53,33 @@ int csi_ref_fullyconnected_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_fullyconnected_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *weights, - struct csi_tensor *bias, +int csi_ref_fullyconnected_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, struct fc_params *params) { struct csi_tensor *float_input = csi_ref_tensor_transform_f32(input); struct csi_tensor *float_kernel = csi_ref_tensor_transform_f32(weights); struct csi_tensor *float_bias = csi_ref_tensor_transform_f32(bias); struct csi_tensor *float_output = csi_ref_tensor_transform_f32(output); + if (params->fc_extra.fuse_zp2bias) { + float *float_bias_data = float_bias->data; + float *float_kernel_data = float_kernel->data; + + int k_len = weights->dim[0]; + int k_inner = csi_tensor_size(weights) / k_len; + float sp = input->qinfo->scale * input->qinfo->zero_point; + for (int i = 0; i < k_len; i++) { + float t_k = 0; + for (int j = 0; j < k_inner; j++) { + int k_idx = i * k_inner + j; + t_k += float_kernel_data[k_idx] * sp; + } + float_bias_data[i] += t_k; + } + } - int ret = csi_ref_fullyconnected_f32(float_input, float_output, float_kernel, float_bias, params); + int ret = + csi_ref_fullyconnected_f32(float_input, float_output, float_kernel, float_bias, params); csi_tensor_data_convert(output, float_output); csi_ref_tensor_transform_free_f32(float_input); csi_ref_tensor_transform_free_f32(float_output); diff --git a/source/reference/gather.c b/source/reference/gather.c index 88e608a2..44a5c223 100644 --- a/source/reference/gather.c +++ b/source/reference/gather.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_gather_f32(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *output, - struct gather_params *params) +int csi_ref_gather_f32(struct csi_tensor *input, struct csi_tensor *indices, + struct csi_tensor *output, struct gather_params *params) { float *input_data = (float *)input->data; float *output_data = (float *)output->data; @@ -43,10 +41,10 @@ int csi_ref_gather_f32(struct csi_tensor *input, } for (int i = 0; i < outer_size; i++) { - for (int j = 0; j < indices_size; j++) { if (indices_data[j] < input->dim[params->axis]) { - memcpy(output_data, input_data + indices_data[j] * inner_size, inner_size * sizeof(float)); + memcpy(output_data, input_data + indices_data[j] * inner_size, + inner_size * sizeof(float)); } else { memset(output_data, 0, inner_size * sizeof(float)); } @@ -57,10 +55,8 @@ int csi_ref_gather_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_gather_quant(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *output, - struct gather_params *params) +int csi_ref_gather_quant(struct csi_tensor *input, struct csi_tensor *indices, + struct csi_tensor *output, struct gather_params *params) { int ret; struct csi_tensor *finput = csi_ref_tensor_transform_f32(input); diff --git a/source/reference/gather_nd.c b/source/reference/gather_nd.c index 5ae43a77..9632c807 100644 --- a/source/reference/gather_nd.c +++ b/source/reference/gather_nd.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,33 +16,31 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -static int Multiplication(int *input, int s, int e) +static int Multiplication(int32_t *input, int s, int e) { int res = 1; - for(int i=s; i<=e; i++) { + for (int i = s; i <= e; i++) { res = res * input[i]; } return res; } -int csi_ref_gather_nd_f32(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *output, - struct gather_nd_params *params) +int csi_ref_gather_nd_f32(struct csi_tensor *input, struct csi_tensor *indices, + struct csi_tensor *output, struct gather_nd_params *params) { float *input_data = (float *)input->data; float *output_data = (float *)output->data; uint32_t *indices_data = (uint32_t *)indices->data; int in_size = 1, indices_size = 1; - for(int i = 0; i < input->dim_count; i++) { + for (int i = 0; i < input->dim_count; i++) { in_size *= input->dim[i]; } - for(int i = 0; i < indices->dim_count; i++) { + for (int i = 0; i < indices->dim_count; i++) { indices_size *= indices->dim[i]; } @@ -54,45 +52,44 @@ int csi_ref_gather_nd_f32(struct csi_tensor *input, indices_outer_size = indices_size / indices_last_dim; int input_outer_size = 1; - for(int i = 0; i < axis; i++) { + for (int i = 0; i < axis; i++) { input_outer_size *= input->dim[i]; } int input_inner_size = 1; - for(int i = axis + 1; i < input->dim_count; i++) { + for (int i = axis + 1; i < input->dim_count; i++) { input_inner_size *= input->dim[i]; } float *in_copy_addr = NULL; int dim_over_flag = 0; - for(int i = 0; i < indices_outer_size; i++) { + for (int i = 0; i < indices_outer_size; i++) { int input_outer_idx = 0; - for(int j = 0; j < indices_last_dim; j++) { + for (int j = 0; j < indices_last_dim; j++) { int indices_val = indices_data[i * indices_last_dim + j]; - if(indices_val >= input->dim[j]) { + if (indices_val >= input->dim[j]) { dim_over_flag = 1; break; } else { - input_outer_idx += indices_val * Multiplication(input->dim, j + 1, indices_last_dim - 1); + input_outer_idx += + indices_val * Multiplication(input->dim, j + 1, indices_last_dim - 1); } } - if(dim_over_flag == 1) { + if (dim_over_flag == 1) { dim_over_flag = 0; - for(int n = 0; n < input_inner_size; n++) { + for (int n = 0; n < input_inner_size; n++) { *(output_data + n) = 0.0f; } } else { in_copy_addr = input_data + input_outer_idx * input_inner_size; - memcpy(output_data , in_copy_addr, input_inner_size * sizeof(float)); + memcpy(output_data, in_copy_addr, input_inner_size * sizeof(float)); } output_data += input_inner_size; } return CSINN_TRUE; } -int csi_ref_gather_nd_quant(struct csi_tensor *input, - struct csi_tensor *indices, - struct csi_tensor *output, - struct gather_nd_params *params) +int csi_ref_gather_nd_quant(struct csi_tensor *input, struct csi_tensor *indices, + struct csi_tensor *output, struct gather_nd_params *params) { int ret; struct csi_tensor *finput = csi_ref_tensor_transform_f32(input); diff --git a/source/reference/global_averagepool.c b/source/reference/global_averagepool.c index 50093a5d..67138b6d 100644 --- a/source/reference/global_averagepool.c +++ b/source/reference/global_averagepool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_global_avgpool2d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params) +int csi_ref_global_avgpool2d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) { params->stride_height = 1; params->stride_width = 1; @@ -45,9 +44,8 @@ int csi_ref_global_avgpool2d_f32(struct csi_tensor *input, csi_ref_avgpool2d_f32(input, output, params); } -int csi_ref_global_avgpool2d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params) +int csi_ref_global_avgpool2d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_global_avgpool2d_f32); } \ No newline at end of file diff --git a/source/reference/global_maxpool.c b/source/reference/global_maxpool.c index 073384cf..c681a213 100644 --- a/source/reference/global_maxpool.c +++ b/source/reference/global_maxpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -int csi_ref_global_maxpool2d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params) +int csi_ref_global_maxpool2d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) { params->stride_height = 1; params->stride_width = 1; @@ -45,9 +44,8 @@ int csi_ref_global_maxpool2d_f32(struct csi_tensor *input, csi_ref_maxpool2d_f32(input, output, params); } -int csi_ref_global_maxpool2d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params) +int csi_ref_global_maxpool2d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_global_maxpool2d_f32); } \ No newline at end of file diff --git a/source/reference/greater.c b/source/reference/greater.c index e4fd2c41..99467882 100644 --- a/source/reference/greater.c +++ b/source/reference/greater.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,15 +16,13 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_greater_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_greater_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { float *input0_data = input0->data; float *input1_data = input1->data; @@ -40,10 +38,8 @@ int csi_ref_greater_f32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_greater_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_greater_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { return csi_ref_diso_callback_base(input0, input1, output, params, csi_ref_greater_f32); } diff --git a/source/reference/greater_equal.c b/source/reference/greater_equal.c index 5454016e..9e5eb9b3 100644 --- a/source/reference/greater_equal.c +++ b/source/reference/greater_equal.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,15 +16,13 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_greater_equal_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_greater_equal_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { float *input0_data = input0->data; float *input1_data = input1->data; @@ -40,10 +38,8 @@ int csi_ref_greater_equal_f32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_greater_equal_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_greater_equal_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { return csi_ref_diso_callback_base(input0, input1, output, params, csi_ref_greater_equal_f32); } diff --git a/source/reference/hard_sigmoid.c b/source/reference/hard_sigmoid.c index a06832f7..653b55dc 100644 --- a/source/reference/hard_sigmoid.c +++ b/source/reference/hard_sigmoid.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,25 +16,24 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_hard_sigmoid_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_hard_sigmoid_f32(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params) { float *input_data = (float *)input->data; float *output_data = (float *)output->data; int size = 1; - for(int i = 0; i < input->dim_count; i++) { + for (int i = 0; i < input->dim_count; i++) { size *= input->dim[i]; } - for(int i = 0; i < size; i++) { - if(input_data[i] < -2.5) { + for (int i = 0; i < size; i++) { + if (input_data[i] < -2.5) { output_data[i] = 0; - } else if(input_data[i] > 2.5) { + } else if (input_data[i] > 2.5) { output_data[i] = 1; } else { output_data[i] = 0.2 * input_data[i] + 0.5; @@ -43,8 +42,7 @@ int csi_ref_hard_sigmoid_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_hard_sigmoid_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_hard_sigmoid_quant(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_hard_sigmoid_f32); diff --git a/source/reference/im2col.c b/source/reference/im2col.c index fb95d867..33e5dec8 100644 --- a/source/reference/im2col.c +++ b/source/reference/im2col.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" @@ -24,9 +24,8 @@ // input_data layout:NCHW // https://github.com/pjreddie/darknet/blob/master/src/im2col.c // output_data: row = channels*ksize_h*ksize_w, col = batch*height_col*width_col -static int csi_ref_im2col_nchw_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct im2col_params *params) +static int csi_ref_im2col_nchw_f32(struct csi_tensor *input, struct csi_tensor *output, + struct im2col_params *params) { float *input_data = (float *)input->data; float *output_data = (float *)output->data; @@ -39,26 +38,29 @@ static int csi_ref_im2col_nchw_f32(struct csi_tensor *input, int32_t stride_h = params->stride_h; int32_t stride_w = params->stride_w; - int height_col = (height + params->pad_top + params->pad_down - ksize_h) / stride_h + 1; // output_height - int width_col = (width + params->pad_left + params->pad_right - ksize_w) / stride_w + 1; // output_width, batch * output_height * output_width = matrix_col + int height_col = + (height + params->pad_top + params->pad_down - ksize_h) / stride_h + 1; // output_height + int width_col = (width + params->pad_left + params->pad_right - ksize_w) / stride_w + + 1; // output_width, batch * output_height * output_width = matrix_col int channel_col = channel * ksize_h * ksize_w; - for(int c = 0; c < channel_col; ++c) { + for (int c = 0; c < channel_col; ++c) { int w_offset = c % ksize_w; int h_offset = c / ksize_w % ksize_h; int c_im = c / ksize_h / ksize_w; - for(int b = 0; b < batch; ++b) { - for(int h = 0; h < height_col; ++h) { - for(int w = 0; w < width_col; ++w) { + for (int b = 0; b < batch; ++b) { + for (int h = 0; h < height_col; ++h) { + for (int w = 0; w < width_col; ++w) { int im_row = h_offset + h * stride_h; int im_col = w_offset + w * stride_w; int col_index = ((c * batch + b) * height_col + h) * width_col + w; im_row = im_row - params->pad_top; im_col = im_col - params->pad_left; - if(im_row < 0 || im_col < 0 || im_row >= height || im_col >= width) { + if (im_row < 0 || im_col < 0 || im_row >= height || im_col >= width) { output_data[col_index] = 0.0f; } else { - output_data[col_index] = input_data[csi_ref_get_index(input->dim, b, c_im, im_row, im_col)]; + output_data[col_index] = + input_data[csi_ref_get_index(input->dim, b, c_im, im_row, im_col)]; } } } @@ -69,9 +71,8 @@ static int csi_ref_im2col_nchw_f32(struct csi_tensor *input, // input_data layout:NHWC // output_data: row = batch*height_col*width_col, col = channels*ksize_h*ksize_w -static int csi_ref_im2col_nhwc_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct im2col_params *params) +static int csi_ref_im2col_nhwc_f32(struct csi_tensor *input, struct csi_tensor *output, + struct im2col_params *params) { float *input_data = (float *)input->data; float *output_data = (float *)output->data; @@ -84,15 +85,16 @@ static int csi_ref_im2col_nhwc_f32(struct csi_tensor *input, int32_t stride_h = params->stride_h; int32_t stride_w = params->stride_w; - - int height_col = (height + params->pad_top + params->pad_down - ksize_h) / stride_h + 1; // output_height - int width_col = (width + params->pad_left + params->pad_right - ksize_w) / stride_w + 1; // output_width, output_height * output_width = matrix_ + int height_col = + (height + params->pad_top + params->pad_down - ksize_h) / stride_h + 1; // output_height + int width_col = (width + params->pad_left + params->pad_right - ksize_w) / stride_w + + 1; // output_width, output_height * output_width = matrix_ int channel_col = channel * ksize_h * ksize_w; - for(int b = 0; b < batch; ++b) { - for(int h = 0; h < height_col; ++h) { - for(int w = 0; w < width_col; ++w) { - for(int c = 0; c < channel_col; ++c) { + for (int b = 0; b < batch; ++b) { + for (int h = 0; h < height_col; ++h) { + for (int w = 0; w < width_col; ++w) { + for (int c = 0; c < channel_col; ++c) { int w_offset = c % ksize_w; int h_offset = c / ksize_w % ksize_h; int c_im = c / ksize_h / ksize_w; @@ -102,10 +104,11 @@ static int csi_ref_im2col_nhwc_f32(struct csi_tensor *input, int col_index = ((b * height_col + h) * width_col + w) * channel_col + c; im_row = im_row - params->pad_top; im_col = im_col - params->pad_left; - if(im_row < 0 || im_col < 0 || im_row >= height || im_col >= width) { + if (im_row < 0 || im_col < 0 || im_row >= height || im_col >= width) { output_data[col_index] = 0.0f; } else { - output_data[col_index] = input_data[csi_ref_get_index(input->dim, b, im_row, im_col, c_im)]; + output_data[col_index] = + input_data[csi_ref_get_index(input->dim, b, im_row, im_col, c_im)]; } } } @@ -115,8 +118,7 @@ static int csi_ref_im2col_nhwc_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_im2col_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_im2col_f32(struct csi_tensor *input, struct csi_tensor *output, struct im2col_params *params) { if (params->base.layout == CSINN_LAYOUT_NCHW) { @@ -129,8 +131,7 @@ int csi_ref_im2col_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_im2col_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_im2col_quant(struct csi_tensor *input, struct csi_tensor *output, struct im2col_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_im2col_f32); diff --git a/source/reference/isnan.c b/source/reference/isnan.c index a770aed5..51c4bc72 100644 --- a/source/reference/isnan.c +++ b/source/reference/isnan.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_isnan_bool_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_isnan_bool_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = input->data; diff --git a/source/reference/l2_normalization.c b/source/reference/l2_normalization.c index 19a9ae27..04e2bdc5 100644 --- a/source/reference/l2_normalization.c +++ b/source/reference/l2_normalization.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,15 +16,15 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -/* https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/kernels/internal/reference/l2normalization.h */ +/* https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/kernels/internal/reference/l2normalization.h + */ -int csi_ref_l2_normalization_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_l2_normalization_f32(struct csi_tensor *input, struct csi_tensor *output, struct l2n_params *params) { float *input_data = input->data; @@ -51,8 +51,7 @@ int csi_ref_l2_normalization_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_l2_normalization_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_l2_normalization_quant(struct csi_tensor *input, struct csi_tensor *output, struct l2n_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_l2_normalization_f32); diff --git a/source/reference/l2pool.c b/source/reference/l2pool.c index cfde8a14..e328fb7a 100644 --- a/source/reference/l2pool.c +++ b/source/reference/l2pool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_l2pool_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_l2pool_f32(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params) { float *input_data = input->data; @@ -53,14 +52,15 @@ int csi_ref_l2pool_f32(struct csi_tensor *input, for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x) { const int in_x = in_x_origin + filter_x; const int in_y = in_y_origin + filter_y; - const float val = - input_data[csi_ref_get_index(input->dim, batch, in_y, in_x, channel)]; + const float val = input_data[csi_ref_get_index(input->dim, batch, in_y, + in_x, channel)]; sum_squares += val * val; filter_count++; } } const float l2pool_result = sqrt(sum_squares / filter_count); - output_data[csi_ref_get_index(output->dim, batch, out_y, out_x, channel)] = l2pool_result; + output_data[csi_ref_get_index(output->dim, batch, out_y, out_x, channel)] = + l2pool_result; } } } diff --git a/source/reference/layer_norm.c b/source/reference/layer_norm.c new file mode 100644 index 00000000..aa409b34 --- /dev/null +++ b/source/reference/layer_norm.c @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_ref.h" +#include "csi_utils.h" + +int csi_ref_layer_norm_f32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *gamma, struct csi_tensor *beta, + struct layer_norm_params *params) +{ + int flatten_size = 0; + flatten_size *= input->dim[0] * input->dim[1] * input->dim[2]; + + float *sum = (float *)calloc(input->dim[1], sizeof(float)); + float *sum2 = (float *)calloc(input->dim[1], sizeof(float)); + float *input_data = input->data; + float *output_data = output->data; + float *gamma_data = gamma->data; + float *beta_data = beta->data; + + for (int i = 0; i < input->dim[1]; i++) { + for (int j = 0; j < input->dim[2]; j++) { + sum[i] += input_data[j + i * input->dim[2]]; + } + sum[i] /= input->dim[2]; + } + + for (int i = 0; i < input->dim[1]; i++) { + for (int j = 0; j < input->dim[2]; j++) { + input_data[j + i * input->dim[2]] -= sum[i]; + output_data[j + i * input->dim[2]] = input_data[j + i * input->dim[2]]; + + input_data[j + i * input->dim[2]] = + input_data[j + i * input->dim[2]] * input_data[j + i * input->dim[2]]; + sum2[i] += input_data[j + i * input->dim[2]]; + } + sum2[i] /= input->dim[2]; + sum2[i] = sqrtf(sum2[i]); + } + + for (int i = 0; i < input->dim[1]; i++) { + for (int j = 0; j < input->dim[2]; j++) { + output_data[j + i * input->dim[2]] = + output_data[j + i * input->dim[2]] / sum2[i] * gamma_data[j] + beta_data[j]; + } + } + + free(sum); + free(sum2); + + return CSINN_TRUE; +} + +int csi_ref_layer_norm_quant(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *gamma, struct csi_tensor *beta, + struct layer_norm_params *params) +{ + struct csi_tensor *float_input = csi_ref_tensor_transform_f32(input); + struct csi_tensor *float_output = csi_ref_tensor_transform_f32(output); + struct csi_tensor *float_gamma = csi_ref_tensor_transform_f32(gamma); + struct csi_tensor *float_beta = csi_ref_tensor_transform_f32(beta); + + int ret = csi_ref_layer_norm_f32(float_input, float_output, float_gamma, float_beta, params); + + csi_tensor_data_convert(output, float_output); + + csi_ref_tensor_transform_free_f32(float_input); + csi_ref_tensor_transform_free_f32(float_output); + csi_ref_tensor_transform_free_f32(float_gamma); + csi_ref_tensor_transform_free_f32(float_beta); + + return CSINN_TRUE; +} diff --git a/source/reference/leaky_relu.c b/source/reference/leaky_relu.c index 0d76d176..6a089945 100644 --- a/source/reference/leaky_relu.c +++ b/source/reference/leaky_relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_leaky_relu_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_leaky_relu_f32(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params) { float *input_data = input->data; @@ -39,8 +38,7 @@ int csi_ref_leaky_relu_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_leaky_relu_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_leaky_relu_quant(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_leaky_relu_f32); diff --git a/source/reference/less.c b/source/reference/less.c index 020896ad..08914b7a 100644 --- a/source/reference/less.c +++ b/source/reference/less.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,15 +16,13 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_less_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_less_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { float *input0_data = input0->data; float *input1_data = input1->data; @@ -40,10 +38,8 @@ int csi_ref_less_f32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_less_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_less_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { return csi_ref_diso_callback_base(input0, input1, output, params, csi_ref_less_f32); } diff --git a/source/reference/less_equal.c b/source/reference/less_equal.c index 12b54d08..c1e70cbf 100644 --- a/source/reference/less_equal.c +++ b/source/reference/less_equal.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,15 +16,13 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_less_equal_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_less_equal_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { float *input0_data = input0->data; float *input1_data = input1->data; @@ -40,10 +38,8 @@ int csi_ref_less_equal_f32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_less_equal_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_less_equal_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { return csi_ref_diso_callback_base(input0, input1, output, params, csi_ref_less_equal_f32); } diff --git a/source/reference/log.c b/source/reference/log.c index 158deff2..b77a95fc 100644 --- a/source/reference/log.c +++ b/source/reference/log.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,15 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -#include -int csi_ref_log_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params) +int csi_ref_log_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = input->data; float *output_data = output->data; @@ -39,8 +36,7 @@ int csi_ref_log_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_log_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_log_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_log_f32); diff --git a/source/reference/log1p.c b/source/reference/log1p.c index c014a173..42cc5b89 100644 --- a/source/reference/log1p.c +++ b/source/reference/log1p.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -#include -int csi_ref_log1p_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_log1p_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = (float *)input->data; @@ -34,13 +32,12 @@ int csi_ref_log1p_f32(struct csi_tensor *input, } for (int i = 0; i < size; i++) { - output_data[i] = log(1+input_data[i]); + output_data[i] = log(1 + input_data[i]); } return CSINN_TRUE; } -int csi_ref_log1p_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_log1p_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_log1p_f32); diff --git a/source/reference/log_softmax.c b/source/reference/log_softmax.c index 29b4c070..9ef78be1 100644 --- a/source/reference/log_softmax.c +++ b/source/reference/log_softmax.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,13 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" /* logsoftmax = logits - log(reduce_sum(exp(logits), axis)) */ -int csi_ref_log_softmax_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_log_softmax_f32(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params) { // now only support 2D input @@ -32,31 +31,32 @@ int csi_ref_log_softmax_f32(struct csi_tensor *input, float *output_data = (float *)output->data; int in_size = 1, out_size = 1; - for(int i = 0; i < input->dim_count; i++) { + for (int i = 0; i < input->dim_count; i++) { in_size *= input->dim[i]; } out_size = in_size; int input_outer_size = 1; - for(int i = 0; i < params->axis; i++) { + for (int i = 0; i < params->axis; i++) { input_outer_size *= input->dim[i]; } int input_inner_size = 1; - for(int i = params->axis + 1; i < input->dim_count; i++) { + for (int i = params->axis + 1; i < input->dim_count; i++) { input_inner_size *= input->dim[i]; } int axis_dim = input->dim[params->axis]; - for(int i = 0; i < input_outer_size; i++) { - for(int k = 0; k < input_inner_size; k++) { + for (int i = 0; i < input_outer_size; i++) { + for (int k = 0; k < input_inner_size; k++) { float acc = 0.0f; float input_val = 0.0f; - for(int j = 0; j < axis_dim; j++) { + for (int j = 0; j < axis_dim; j++) { input_val = *(input_data + j * input_inner_size + k); acc += exp(input_val); } acc = log(acc); - for(int j = 0; j < axis_dim; j++) { - *(output_data + j * input_inner_size + k) = *(input_data + j * input_inner_size + k) - acc; + for (int j = 0; j < axis_dim; j++) { + *(output_data + j * input_inner_size + k) = + *(input_data + j * input_inner_size + k) - acc; } } input_data += input_inner_size * axis_dim; @@ -65,8 +65,7 @@ int csi_ref_log_softmax_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_log_softmax_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_log_softmax_quant(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_log_softmax_f32); diff --git a/source/reference/logical_and.c b/source/reference/logical_and.c index d279276a..152578bc 100644 --- a/source/reference/logical_and.c +++ b/source/reference/logical_and.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,15 +16,13 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_logical_and_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_logical_and_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { float *input0_data = input0->data; float *input1_data = input1->data; @@ -40,10 +38,8 @@ int csi_ref_logical_and_f32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_logical_and_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_logical_and_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { return csi_ref_diso_callback_base(input0, input1, output, params, csi_ref_logical_and_f32); } diff --git a/source/reference/logical_not.c b/source/reference/logical_not.c index 8e78dce0..6b81bb8d 100644 --- a/source/reference/logical_not.c +++ b/source/reference/logical_not.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_logical_not_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_logical_not_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = (float *)input->data; @@ -37,8 +36,7 @@ int csi_ref_logical_not_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_logical_not_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_logical_not_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_logical_not_f32); diff --git a/source/reference/logical_or.c b/source/reference/logical_or.c index 597e5847..8db0b883 100644 --- a/source/reference/logical_or.c +++ b/source/reference/logical_or.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,15 +16,13 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_logical_or_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_logical_or_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { float *input0_data = input0->data; float *input1_data = input1->data; @@ -40,10 +38,8 @@ int csi_ref_logical_or_f32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_logical_or_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_logical_or_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { return csi_ref_diso_callback_base(input0, input1, output, params, csi_ref_logical_or_f32); } diff --git a/source/reference/logical_xor.c b/source/reference/logical_xor.c index 174cd282..4297cb11 100644 --- a/source/reference/logical_xor.c +++ b/source/reference/logical_xor.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,15 +16,13 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_logical_xor_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_logical_xor_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { float *input0_data = (float *)input0->data; float *input1_data = (float *)input1->data; @@ -40,10 +38,8 @@ int csi_ref_logical_xor_f32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_logical_xor_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_logical_xor_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { return csi_ref_diso_callback_base(input0, input1, output, params, csi_ref_logical_xor_f32); } diff --git a/source/reference/lrn.c b/source/reference/lrn.c index 254aa06f..46e434fc 100644 --- a/source/reference/lrn.c +++ b/source/reference/lrn.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -static int csi_ref_lrn_nhwc_f32(struct csi_tensor *input, - struct csi_tensor *output, +static int csi_ref_lrn_nhwc_f32(struct csi_tensor *input, struct csi_tensor *output, struct lrn_params *params) { float *input_data = input->data; @@ -45,54 +44,48 @@ static int csi_ref_lrn_nhwc_f32(struct csi_tensor *input, const float input_val = input_data[i * depth + input_c]; accum += input_val * input_val; } - const float multiplier = pow(params->bias + params->alpha * accum / params->range, -params->beta); + const float multiplier = + pow(params->bias + params->alpha * accum / params->range, -params->beta); output_data[i * depth + c] = input_data[i * depth + c] * multiplier; } } return CSINN_TRUE; } -static int csi_ref_lrn_nchw_f32(struct csi_tensor *o_input, - struct csi_tensor *o_output, +static int csi_ref_lrn_nchw_f32(struct csi_tensor *input, struct csi_tensor *output, struct lrn_params *params) { - struct csi_tensor* input; - struct csi_tensor* output; - input = csi_ref_nchw_to_nhwc_f32(o_input); - output = csi_ref_nchw_to_nhwc_f32(o_output); - float *input_data = input->data; float *output_data = output->data; - const int trailing_dim = input->dim_count - 1; - int outer_size = 1; - const int depth = input->dim[trailing_dim]; + int inner_size = 1; + const int depth = input->dim[1]; int half_range = params->range / 2; - for (int i = 0; i < trailing_dim; i++) { - outer_size *= input->dim[i]; - } + /* inner_size = H * W */ + inner_size = input->dim[2] * input->dim[3]; - for (int i = 0; i < outer_size; ++i) { + for (int j = 0; j < input->dim[0]; j++) { for (int c = 0; c < depth; ++c) { const int begin_input_c = csi_ref_max_internal_s32(0, c - half_range); const int end_input_c = csi_ref_min_internal_s32(depth, c + half_range + 1); - float accum = 0.f; - for (int input_c = begin_input_c; input_c < end_input_c; ++input_c) { - const float input_val = input_data[i * depth + input_c]; - accum += input_val * input_val; + for (int i = 0; i < inner_size; ++i) { + float accum = 0.f; + for (int input_c = begin_input_c; input_c < end_input_c; ++input_c) { + const float input_val = + input_data[j * depth * inner_size + input_c * inner_size + i]; + accum += input_val * input_val; + } + const float multiplier = + pow(params->bias + params->alpha * accum / params->range, -params->beta); + output_data[j * depth * inner_size + c * inner_size + i] = + input_data[j * depth * inner_size + c * inner_size + i] * multiplier; } - const float multiplier = pow(params->bias + params->alpha * accum / params->range, -params->beta); - output_data[i * depth + c] = input_data[i * depth + c] * multiplier; } } - csi_ref_nhwc_to_nchw_f32(o_output, output); - csi_ref_free_float_tensor(input); return CSINN_TRUE; } -int csi_ref_lrn_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct lrn_params *params) +int csi_ref_lrn_f32(struct csi_tensor *input, struct csi_tensor *output, struct lrn_params *params) { if (params->base.layout == CSINN_LAYOUT_NCHW) { csi_ref_lrn_nchw_f32(input, output, params); @@ -103,8 +96,7 @@ int csi_ref_lrn_f32(struct csi_tensor *input, } } -int csi_ref_lrn_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_lrn_quant(struct csi_tensor *input, struct csi_tensor *output, struct lrn_params *params) { double bias_f, alpha_f, beta_f; diff --git a/source/reference/matmul.c b/source/reference/matmul.c index a0241e2e..79f26002 100644 --- a/source/reference/matmul.c +++ b/source/reference/matmul.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_matmul_f32(struct csi_tensor *mat0, - struct csi_tensor *mat1, - struct csi_tensor *output, +int csi_ref_matmul_f32(struct csi_tensor *mat0, struct csi_tensor *mat1, struct csi_tensor *output, struct matmul_params *params) { float *mat0_data = mat0->data; @@ -33,7 +31,7 @@ int csi_ref_matmul_f32(struct csi_tensor *mat0, int batches = 1; /* compute the outer size */ - for(int i = 0; i < dims_count - 2; i++ ){ + for (int i = 0; i < dims_count - 2; i++) { batches *= mat0->dim[i]; } @@ -42,9 +40,9 @@ int csi_ref_matmul_f32(struct csi_tensor *mat0, const int dim_j = mat1->dim[dims_count - (params->trans_b ? 2 : 1)]; const int mat0_offset = dim_i * dim_k; const int mat1_offset = dim_k * dim_j; - const int out_offset = dim_i * dim_j; + const int out_offset = dim_i * dim_j; - if ( !params->trans_a && !params->trans_b) { + if (!params->trans_a && !params->trans_b) { for (int b = 0; b < batches; ++b) { for (int i = 0; i < dim_i; ++i) { for (int j = 0; j < dim_j; ++j) { @@ -105,10 +103,8 @@ int csi_ref_matmul_f32(struct csi_tensor *mat0, return CSINN_TRUE; } -int csi_ref_matmul_quant(struct csi_tensor *mat0, - struct csi_tensor *mat1, - struct csi_tensor *output, - struct matmul_params *params) +int csi_ref_matmul_quant(struct csi_tensor *mat0, struct csi_tensor *mat1, + struct csi_tensor *output, struct matmul_params *params) { return csi_ref_diso_callback_base(mat0, mat1, output, params, csi_ref_matmul_f32); } diff --git a/source/reference/max.c b/source/reference/max.c index b67c8d07..36a6087f 100644 --- a/source/reference/max.c +++ b/source/reference/max.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,41 +16,36 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_max_stride_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_max_stride_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { - float *input_data = input->data; float *output_data = output->data; int32_t inner_size = 1; int32_t out_size = 1; - for (int32_t k = 0; k < params->n; k++) - { + for (int32_t k = 0; k < params->n; k++) { out_size *= params->out_extents[k]; } - for (int32_t k = 0; k < params->m; k++) - { + for (int32_t k = 0; k < params->m; k++) { inner_size *= params->inner_extents[k]; } - for (int32_t out = 0; out < out_size; out++) - { - + for (int32_t out = 0; out < out_size; out++) { float result = -FLT_MAX; - int32_t out_index = csi_ref_get_reduction_index(out, params->out_strides, params->out_extents, params->n); - for (int32_t inner = 0; inner < inner_size; inner++) - { - int32_t index = out_index + csi_ref_get_reduction_index(inner, params->inner_strides, - params->inner_extents, params->m); + int32_t out_index = + csi_ref_get_reduction_index(out, params->out_strides, params->out_extents, params->n); + for (int32_t inner = 0; inner < inner_size; inner++) { + int32_t index = + out_index + csi_ref_get_reduction_index(inner, params->inner_strides, + params->inner_extents, params->m); float val = input_data[index]; result = fmax(result, val); } @@ -60,8 +55,7 @@ int csi_ref_max_stride_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_max_stride_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_max_stride_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_max_stride_f32); diff --git a/source/reference/maximum.c b/source/reference/maximum.c index f43e59fe..56cdbcfb 100644 --- a/source/reference/maximum.c +++ b/source/reference/maximum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,15 +16,13 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_maximum_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_maximum_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { float *input0_data = input0->data; float *input1_data = input1->data; @@ -40,10 +38,8 @@ int csi_ref_maximum_f32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_maximum_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_maximum_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { return csi_ref_diso_callback_base(input0, input1, output, params, csi_ref_maximum_f32); } diff --git a/source/reference/maxpool.c b/source/reference/maxpool.c index 165636bf..085ba05b 100644 --- a/source/reference/maxpool.c +++ b/source/reference/maxpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,13 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -static int csi_ref_maxpool2d_nhwc_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params) +static int csi_ref_maxpool2d_nhwc_f32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) { float *input_data = input->data; float *output_data = output->data; @@ -43,16 +42,19 @@ static int csi_ref_maxpool2d_nhwc_f32(struct csi_tensor *input, // Compute the boundaries of the filter region clamped so as to // ensure that the filter window fits in the input array. const int filter_x_start = csi_ref_max_internal_s32(0, -in_x_origin); - const int filter_x_end = csi_ref_min_internal_s32(params->filter_width, input_width - in_x_origin); + const int filter_x_end = + csi_ref_min_internal_s32(params->filter_width, input_width - in_x_origin); const int filter_y_start = csi_ref_max_internal_s32(0, -in_y_origin); - const int filter_y_end = csi_ref_min_internal_s32(params->filter_height, input_height - in_y_origin); + const int filter_y_end = + csi_ref_min_internal_s32(params->filter_height, input_height - in_y_origin); float max = -FLT_MAX; int filter_cnt = 0; for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y) { for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x) { const int in_x = in_x_origin + filter_x; const int in_y = in_y_origin + filter_y; - max = fmax(max, input_data[csi_ref_get_index(input->dim, batch, in_y, in_x, channel)]); + max = fmax(max, input_data[csi_ref_get_index(input->dim, batch, in_y, + in_x, channel)]); filter_cnt++; } } @@ -68,9 +70,8 @@ static int csi_ref_maxpool2d_nhwc_f32(struct csi_tensor *input, return CSINN_TRUE; } -static int csi_ref_maxpool2d_nchw_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params) +static int csi_ref_maxpool2d_nchw_f32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) { float *input_data = input->data; float *output_data = output->data; @@ -90,16 +91,19 @@ static int csi_ref_maxpool2d_nchw_f32(struct csi_tensor *input, // Compute the boundaries of the filter region clamped so as to // ensure that the filter window fits in the input array. const int filter_x_start = csi_ref_max_internal_s32(0, -in_x_origin); - const int filter_x_end = csi_ref_min_internal_s32(params->filter_width, input_width - in_x_origin); + const int filter_x_end = + csi_ref_min_internal_s32(params->filter_width, input_width - in_x_origin); const int filter_y_start = csi_ref_max_internal_s32(0, -in_y_origin); - const int filter_y_end = csi_ref_min_internal_s32(params->filter_height, input_height - in_y_origin); + const int filter_y_end = + csi_ref_min_internal_s32(params->filter_height, input_height - in_y_origin); float max = -FLT_MAX; int filter_cnt = 0; for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y) { for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x) { const int in_x = in_x_origin + filter_x; const int in_y = in_y_origin + filter_y; - max = fmax(max, input_data[csi_ref_get_index(input->dim, batch, channel, in_y, in_x)]); + max = fmax(max, input_data[csi_ref_get_index(input->dim, batch, channel, + in_y, in_x)]); filter_cnt++; } } @@ -115,9 +119,8 @@ static int csi_ref_maxpool2d_nchw_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_maxpool2d_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params) +int csi_ref_maxpool2d_f32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) { if (params->base.layout == CSINN_LAYOUT_NCHW) { csi_ref_maxpool2d_nchw_f32(input, output, params); @@ -128,9 +131,8 @@ int csi_ref_maxpool2d_f32(struct csi_tensor *input, } } -int csi_ref_maxpool2d_quant(struct csi_tensor *input, - struct csi_tensor *output, - struct pool_params *params) +int csi_ref_maxpool2d_quant(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_maxpool2d_f32); } diff --git a/source/reference/maxpool2d_locat.c b/source/reference/maxpool2d_locat.c index c4bcb9b8..f4645888 100644 --- a/source/reference/maxpool2d_locat.c +++ b/source/reference/maxpool2d_locat.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -static int csi_ref_maxpool2d_locat_nhwc_f32(struct csi_tensor *input, - struct csi_tensor *output, +static int csi_ref_maxpool2d_locat_nhwc_f32(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params) { float *input_data = input->data; @@ -49,19 +48,22 @@ static int csi_ref_maxpool2d_locat_nhwc_f32(struct csi_tensor *input, const int filter_y_end = csi_ref_min_internal_s32(params->filter_height, input_height - in_y_origin); float max = FLT_MIN; - int locat = (in_y_origin + filter_y_start) * input->dim[2] + (in_x_origin + filter_x_start); + int locat = (in_y_origin + filter_y_start) * input->dim[2] + + (in_x_origin + filter_x_start); for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y) { for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x) { const int in_x = in_x_origin + filter_x; const int in_y = in_y_origin + filter_y; - int in_index = csi_ref_get_index(input->dim, batch, channel, in_y, in_x); - if (input_data[in_index] > max){ + int in_index = + csi_ref_get_index(input->dim, batch, channel, in_y, in_x); + if (input_data[in_index] > max) { max = input_data[in_index]; locat = in_y * input->dim[2] + in_x; } } } - output_data[csi_ref_get_index(output->dim, batch, out_y, out_x, channel)] = locat; + output_data[csi_ref_get_index(output->dim, batch, out_y, out_x, channel)] = + locat; } } } @@ -69,8 +71,7 @@ static int csi_ref_maxpool2d_locat_nhwc_f32(struct csi_tensor *input, return CSINN_TRUE; } -static int csi_ref_maxpool2d_locat_nchw_f32(struct csi_tensor *input, - struct csi_tensor *output, +static int csi_ref_maxpool2d_locat_nchw_f32(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params) { float *input_data = input->data; @@ -97,19 +98,22 @@ static int csi_ref_maxpool2d_locat_nchw_f32(struct csi_tensor *input, const int filter_y_end = csi_ref_min_internal_s32(params->filter_height, input_height - in_y_origin); float max = FLT_MIN; - int locat = (in_y_origin + filter_y_start) * input->dim[3] + (in_x_origin + filter_x_start); + int locat = (in_y_origin + filter_y_start) * input->dim[3] + + (in_x_origin + filter_x_start); for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y) { for (int filter_x = filter_x_start; filter_x < filter_x_end; ++filter_x) { const int in_x = in_x_origin + filter_x; const int in_y = in_y_origin + filter_y; - int in_index = csi_ref_get_index(input->dim, batch, channel, in_y, in_x); - if (input_data[in_index] > max){ + int in_index = + csi_ref_get_index(input->dim, batch, channel, in_y, in_x); + if (input_data[in_index] > max) { max = input_data[in_index]; locat = in_y * input->dim[3] + in_x; } } } - output_data[csi_ref_get_index(output->dim, batch, channel, out_y, out_x)] = locat; + output_data[csi_ref_get_index(output->dim, batch, channel, out_y, out_x)] = + locat; } } } @@ -117,8 +121,7 @@ static int csi_ref_maxpool2d_locat_nchw_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_maxpool2d_locat_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_maxpool2d_locat_f32(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params) { if (params->base.layout == CSINN_LAYOUT_NCHW) { @@ -131,8 +134,7 @@ int csi_ref_maxpool2d_locat_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_maxpool2d_locat_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_maxpool2d_locat_quant(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params) { struct csi_tensor *finput = csi_ref_tensor_transform_f32(input); diff --git a/source/reference/maxpool3d.c b/source/reference/maxpool3d.c index 86adc635..66eb3587 100644 --- a/source/reference/maxpool3d.c +++ b/source/reference/maxpool3d.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,16 +16,15 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_maxpool3d_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_maxpool3d_f32(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params) { - float *input_data = (float *)input->data; + float *input_data = (float *)input->data; float *output_data = (float *)output->data; const int batch = input->dim[0]; @@ -37,41 +36,49 @@ int csi_ref_maxpool3d_f32(struct csi_tensor *input, const int out_height = output->dim[3]; const int out_width = output->dim[4]; - for(int in_ch=0; in_chstride_depth) - params->pad_front; const int in_h_origin = (out_h * params->stride_height) - params->pad_top; const int in_w_origin = (out_w * params->stride_width) - params->pad_left; // Compute the boundaries of the filter region clamped so as to // ensure that the filter window fits in the input array. const int filter_d_begin = csi_ref_max_internal_s32(0, -in_d_origin); - const int filter_d_end = csi_ref_min_internal_s32(params->filter_depth, in_depth - in_d_origin); + const int filter_d_end = + csi_ref_min_internal_s32(params->filter_depth, in_depth - in_d_origin); const int filter_h_begin = csi_ref_max_internal_s32(0, -in_h_origin); - const int filter_h_end = csi_ref_min_internal_s32(params->filter_height, in_height - in_h_origin); + const int filter_h_end = csi_ref_min_internal_s32(params->filter_height, + in_height - in_h_origin); const int filter_w_begin = csi_ref_max_internal_s32(0, -in_w_origin); - const int filter_w_end = csi_ref_min_internal_s32(params->filter_width, in_width - in_w_origin); + const int filter_w_end = + csi_ref_min_internal_s32(params->filter_width, in_width - in_w_origin); float max = -FLT_MAX; int filter_cnt = 0; - for(int filter_d=filter_d_begin; filter_ddim, in_ch, out_ch, in_d, in_h, in_w)]); + max = fmax(max, + input_data[csi_ref_get_index_5( + input->dim, in_ch, out_ch, in_d, in_h, in_w)]); filter_cnt++; } } } - if(filter_cnt != params->filter_depth * params->filter_height * params->filter_width) { + if (filter_cnt != + params->filter_depth * params->filter_height * params->filter_width) { max = fmax(max, 0); } - output_data[csi_ref_get_index_5(output->dim, in_ch, out_ch, out_d, out_h, out_w)] = max; + output_data[csi_ref_get_index_5(output->dim, in_ch, out_ch, out_d, out_h, + out_w)] = max; } } } @@ -80,8 +87,7 @@ int csi_ref_maxpool3d_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_maxpool3d_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_maxpool3d_quant(struct csi_tensor *input, struct csi_tensor *output, struct pool_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_maxpool3d_f32); diff --git a/source/reference/mean.c b/source/reference/mean.c index 95cbc3b9..19538b3d 100644 --- a/source/reference/mean.c +++ b/source/reference/mean.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,41 +16,36 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_mean_stride_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_mean_stride_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { - float *input_data = input->data; float *output_data = output->data; int32_t inner_size = 1; int32_t out_size = 1; - for (int32_t k = 0; k < params->n; k++) - { + for (int32_t k = 0; k < params->n; k++) { out_size *= params->out_extents[k]; } - for (int32_t k = 0; k < params->m; k++) - { + for (int32_t k = 0; k < params->m; k++) { inner_size *= params->inner_extents[k]; } - for (int32_t out = 0; out < out_size; out++) - { - + for (int32_t out = 0; out < out_size; out++) { float result = 0; - int32_t out_index = csi_ref_get_reduction_index(out, params->out_strides, params->out_extents, params->n); - for (int32_t inner = 0; inner < inner_size; inner++) - { - int32_t index = out_index + csi_ref_get_reduction_index(inner, params->inner_strides, - params->inner_extents, params->m); + int32_t out_index = + csi_ref_get_reduction_index(out, params->out_strides, params->out_extents, params->n); + for (int32_t inner = 0; inner < inner_size; inner++) { + int32_t index = + out_index + csi_ref_get_reduction_index(inner, params->inner_strides, + params->inner_extents, params->m); float val = input_data[index]; result += val; } @@ -60,15 +55,13 @@ int csi_ref_mean_stride_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_mean_stride_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_mean_stride_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_mean_stride_f32); } -int csi_ref_mean_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_mean_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { if (params->axis_count != 2 || params->axis[0] != 2 || params->axis[1] != 3 || @@ -82,4 +75,3 @@ int csi_ref_mean_quant(struct csi_tensor *input, csi_global_avgpool2d(input, output, &pparams); return CSINN_TRUE; } - diff --git a/source/reference/min.c b/source/reference/min.c index b13703a7..e9bf6201 100644 --- a/source/reference/min.c +++ b/source/reference/min.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,41 +16,36 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_min_stride_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_min_stride_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { - float *input_data = input->data; float *output_data = output->data; int32_t inner_size = 1; int32_t out_size = 1; - for (int32_t k = 0; k < params->n; k++) - { + for (int32_t k = 0; k < params->n; k++) { out_size *= params->out_extents[k]; } - for (int32_t k = 0; k < params->m; k++) - { + for (int32_t k = 0; k < params->m; k++) { inner_size *= params->inner_extents[k]; } - for (int32_t out = 0; out < out_size; out++) - { - + for (int32_t out = 0; out < out_size; out++) { float result = FLT_MAX; - int32_t out_index = csi_ref_get_reduction_index(out, params->out_strides, params->out_extents, params->n); - for (int32_t inner = 0; inner < inner_size; inner++) - { - int32_t index = out_index + csi_ref_get_reduction_index(inner, params->inner_strides, - params->inner_extents, params->m); + int32_t out_index = + csi_ref_get_reduction_index(out, params->out_strides, params->out_extents, params->n); + for (int32_t inner = 0; inner < inner_size; inner++) { + int32_t index = + out_index + csi_ref_get_reduction_index(inner, params->inner_strides, + params->inner_extents, params->m); float val = input_data[index]; result = fmin(result, val); } @@ -60,8 +55,7 @@ int csi_ref_min_stride_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_min_stride_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_min_stride_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_min_stride_f32); diff --git a/source/reference/minimum.c b/source/reference/minimum.c index 35336375..592d67c4 100644 --- a/source/reference/minimum.c +++ b/source/reference/minimum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,15 +16,13 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_minimum_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_minimum_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { float *input0_data = input0->data; float *input1_data = input1->data; @@ -32,11 +30,11 @@ int csi_ref_minimum_f32(struct csi_tensor *input0, int size0 = csi_tensor_size(input0); int size1 = csi_tensor_size(input1); - if (size0 == size1){ + if (size0 == size1) { for (int i = 0; i < size0; i++) { output_data[i] = fmin(input0_data[i], input1_data[i]); } - }else{ + } else { if (size1 != 0 && size1 != 1) return CSINN_FALSE; for (int i = 0; i < size0; i++) { output_data[i] = fmin(input0_data[i], input1_data[0]); @@ -45,10 +43,8 @@ int csi_ref_minimum_f32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_minimum_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_minimum_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { return csi_ref_diso_callback_base(input0, input1, output, params, csi_ref_minimum_f32); } diff --git a/source/reference/mod.c b/source/reference/mod.c index c10f97de..e3eeb58f 100644 --- a/source/reference/mod.c +++ b/source/reference/mod.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,20 +16,18 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -static void element_mod_f32(float *src0, float *src1, float *dest, - int input_idx, int output_idx) +static void element_mod_f32(float *src0, float *src1, float *dest, int input_idx, int output_idx) { - dest[output_idx] = src0[output_idx] - floor(src0[output_idx] / src1[output_idx]) * src1[input_idx]; + dest[output_idx] = + src0[output_idx] - floor(src0[output_idx] / src1[output_idx]) * src1[input_idx]; } -int csi_ref_mod_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_mod_f32(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params) { struct csi_ref_diso_callback cb; @@ -39,10 +37,8 @@ int csi_ref_mod_f32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_mod_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_mod_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { return csi_ref_diso_callback_base(input0, input1, output, params, csi_ref_mod_f32); } diff --git a/source/reference/mul.c b/source/reference/mul.c index 4a4f400f..e5d4424b 100644 --- a/source/reference/mul.c +++ b/source/reference/mul.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,20 +16,17 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -static void element_mul_f32(float *src0, float *src1, float *dest, - int input_idx, int output_idx) +static void element_mul_f32(float *src0, float *src1, float *dest, int input_idx, int output_idx) { dest[output_idx] = src0[output_idx] * src1[input_idx]; } -int csi_ref_mul_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_mul_f32(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params) { struct csi_ref_diso_callback cb; @@ -39,10 +36,8 @@ int csi_ref_mul_f32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_mul_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_mul_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { return csi_ref_diso_callback_base(input0, input1, output, params, csi_ref_mul_f32); } diff --git a/source/reference/ndarray_size.c b/source/reference/ndarray_size.c index f5e6c65c..a5fdba64 100644 --- a/source/reference/ndarray_size.c +++ b/source/reference/ndarray_size.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_ndarray_size_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_ndarray_size_f32(struct csi_tensor *input, struct csi_tensor *output, struct ndarray_size_params *params) { float *output_data = output->data; @@ -30,8 +29,7 @@ int csi_ref_ndarray_size_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_ndarray_size_u8(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_ndarray_size_u8(struct csi_tensor *input, struct csi_tensor *output, struct ndarray_size_params *params) { uint8_t *output_data = output->data; @@ -39,8 +37,7 @@ int csi_ref_ndarray_size_u8(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_ndarray_size_i8(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_ndarray_size_i8(struct csi_tensor *input, struct csi_tensor *output, struct ndarray_size_params *params) { int8_t *output_data = output->data; @@ -48,8 +45,7 @@ int csi_ref_ndarray_size_i8(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_ndarray_size_i32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_ndarray_size_i32(struct csi_tensor *input, struct csi_tensor *output, struct ndarray_size_params *params) { int32_t *output_data = output->data; diff --git a/source/reference/negative.c b/source/reference/negative.c index 45610d15..8e69f492 100644 --- a/source/reference/negative.c +++ b/source/reference/negative.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -#include -int csi_ref_negative_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_negative_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = input->data; @@ -39,8 +37,7 @@ int csi_ref_negative_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_negative_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_negative_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_negative_f32); diff --git a/source/reference/non_max_suppression.c b/source/reference/non_max_suppression.c index ce99edbd..f6243272 100644 --- a/source/reference/non_max_suppression.c +++ b/source/reference/non_max_suppression.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,18 +16,17 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" - static int find_max_score_idx(const float *scores, int *flag, int len) { int res = 0; float max = FLT_MIN; - for(int i = 0; i < len; i++) { - if(scores[i] > max && !flag[i]) { + for (int i = 0; i < len; i++) { + if (scores[i] > max && !flag[i]) { max = scores[i]; res = i; } @@ -47,21 +46,21 @@ static float get_iou(const float *box1, const float *box2) float inter_area = fmax(0, x2 - x1) * fmax(0, y2 - y1); // compute the area of both the prediction and ground-truth rectangles float box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1]); - float box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1]);; - // compute the intersection over union by taking the intersection area and + float box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1]); + ; + // compute the intersection over union by taking the intersection area and // dividing it by the sum of prediction + ground-truth areas - the interesection area float iou = inter_area / (box1_area + box2_area - inter_area); return iou; } -int csi_ref_non_max_suppression_std(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct non_max_suppression_params *params) +int csi_ref_non_max_suppression_std(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, + struct non_max_suppression_params *params) { - float *boxes = (float *)input0->data; + float *boxes = (float *)input0->data; float *scores = (float *)input1->data; - int *indices = (int *)output->data; + int *indices = (int *)output->data; float iou_threshold = params->iou_threshold; int max_output_size = params->max_output_size; @@ -72,21 +71,21 @@ int csi_ref_non_max_suppression_std(struct csi_tensor *input0, int *flag = (int *)csi_mem_alloc(box_num * sizeof(int)); int box_cnt = 0; - while(box_num_exist) { + while (box_num_exist) { int max_box_idx = find_max_score_idx(scores, flag, box_num); flag[max_box_idx] = 1; box_num_exist--; *indices++ = max_box_idx; box_cnt++; - if(box_cnt == max_output_size) { + if (box_cnt == max_output_size) { break; } - for(int i = 0; i < box_num; i++) { - if(!flag[i]) { + for (int i = 0; i < box_num; i++) { + if (!flag[i]) { float *box1_addr = boxes + 4 * max_box_idx; float *box2_addr = boxes + 4 * i; float iou = get_iou(box1_addr, box2_addr); - if(iou > iou_threshold) { + if (iou > iou_threshold) { flag[i] = 1; box_num_exist--; } diff --git a/source/reference/not.c b/source/reference/not.c index 995dbabb..e2428d53 100644 --- a/source/reference/not.c +++ b/source/reference/not.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -#include -int csi_ref_not_u32(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params) +int csi_ref_not_u32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { uint32_t *input_data = input->data; @@ -36,9 +33,7 @@ int csi_ref_not_u32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_not_u8(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params) +int csi_ref_not_u8(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { uint8_t *input_data = input->data; uint8_t *output_data = output->data; @@ -50,9 +45,7 @@ int csi_ref_not_u8(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_not_i8(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params) +int csi_ref_not_i8(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { int8_t *input_data = input->data; int8_t *output_data = output->data; diff --git a/source/reference/not_equal.c b/source/reference/not_equal.c index ff454e93..fdf6ac9a 100644 --- a/source/reference/not_equal.c +++ b/source/reference/not_equal.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,15 +16,13 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_not_equal_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_not_equal_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { float *input0_data = input0->data; float *input1_data = input1->data; @@ -37,10 +35,8 @@ int csi_ref_not_equal_f32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_not_equal_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_not_equal_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { return csi_ref_diso_callback_base(input0, input1, output, params, csi_ref_not_equal_f32); } diff --git a/source/reference/or.c b/source/reference/or.c index c76afd11..ed692c4b 100644 --- a/source/reference/or.c +++ b/source/reference/or.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -#include -int csi_ref_or_u32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_or_u32(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params) { uint32_t *input0_data = input0->data; @@ -37,9 +34,7 @@ int csi_ref_or_u32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_or_u8(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_or_u8(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params) { uint8_t *input0_data = input0->data; @@ -53,9 +48,7 @@ int csi_ref_or_u8(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_or_i8(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_or_i8(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params) { int8_t *input0_data = input0->data; diff --git a/source/reference/pad.c b/source/reference/pad.c index b2c6ba8a..1b732772 100644 --- a/source/reference/pad.c +++ b/source/reference/pad.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -static int csi_ref_pad_nhwc_f32(struct csi_tensor *input, - struct csi_tensor *output, +static int csi_ref_pad_nhwc_f32(struct csi_tensor *input, struct csi_tensor *output, struct pad_params *params) { const int output_batch = output->dim[0]; @@ -73,8 +72,7 @@ static int csi_ref_pad_nhwc_f32(struct csi_tensor *input, return CSINN_TRUE; } -static int csi_ref_pad_nchw_f32(struct csi_tensor *input, - struct csi_tensor *output, +static int csi_ref_pad_nchw_f32(struct csi_tensor *input, struct csi_tensor *output, struct pad_params *params) { const int output_batch = output->dim[0]; @@ -125,9 +123,7 @@ static int csi_ref_pad_nchw_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_pad_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct pad_params *params) +int csi_ref_pad_f32(struct csi_tensor *input, struct csi_tensor *output, struct pad_params *params) { if (params->base.layout == CSINN_LAYOUT_NCHW) { csi_ref_pad_nchw_f32(input, output, params); @@ -138,8 +134,7 @@ int csi_ref_pad_f32(struct csi_tensor *input, } } -int csi_ref_pad_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_pad_quant(struct csi_tensor *input, struct csi_tensor *output, struct pad_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_pad_f32); diff --git a/source/reference/power.c b/source/reference/power.c index 8169c9da..da4692cc 100644 --- a/source/reference/power.c +++ b/source/reference/power.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,21 +16,18 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -static void element_pow_f32(float *src0, float *src1, float *dest, - int input_idx, int output_idx) +static void element_pow_f32(float *src0, float *src1, float *dest, int input_idx, int output_idx) { dest[output_idx] = powf(src0[output_idx], src1[input_idx]); } -int csi_ref_power_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_power_f32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { struct csi_ref_diso_callback cb; @@ -39,10 +36,8 @@ int csi_ref_power_f32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_power_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_power_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { return csi_ref_diso_callback_base(input0, input1, output, params, csi_ref_power_f32); } diff --git a/source/reference/prelu.c b/source/reference/prelu.c index d3712db5..8d417849 100644 --- a/source/reference/prelu.c +++ b/source/reference/prelu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" @@ -56,10 +56,8 @@ int csi_ref_prelu_f32(struct csi_tensor *input, struct csi_tensor *alpha, struct return CSINN_TRUE; } -int csi_ref_prelu_quant(struct csi_tensor *input, - struct csi_tensor *alpha, - struct csi_tensor *output, - struct prelu_params *params) +int csi_ref_prelu_quant(struct csi_tensor *input, struct csi_tensor *alpha, + struct csi_tensor *output, struct prelu_params *params) { return csi_ref_diso_callback_base(input, alpha, output, params, csi_ref_prelu_f32); } diff --git a/source/reference/prod.c b/source/reference/prod.c index 2530b740..0a5e5efe 100644 --- a/source/reference/prod.c +++ b/source/reference/prod.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,41 +16,36 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_prod_stride_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_prod_stride_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { - float *input_data = input->data; float *output_data = output->data; int32_t inner_size = 1; int32_t out_size = 1; - for (int32_t k = 0; k < params->n; k++) - { + for (int32_t k = 0; k < params->n; k++) { out_size *= params->out_extents[k]; } - for (int32_t k = 0; k < params->m; k++) - { + for (int32_t k = 0; k < params->m; k++) { inner_size *= params->inner_extents[k]; } - for (int32_t out = 0; out < out_size; out++) - { - + for (int32_t out = 0; out < out_size; out++) { float result = 1; - int32_t out_index = csi_ref_get_reduction_index(out, params->out_strides, params->out_extents, params->n); - for (int32_t inner = 0; inner < inner_size; inner++) - { - int32_t index = out_index + csi_ref_get_reduction_index(inner, params->inner_strides, - params->inner_extents, params->m); + int32_t out_index = + csi_ref_get_reduction_index(out, params->out_strides, params->out_extents, params->n); + for (int32_t inner = 0; inner < inner_size; inner++) { + int32_t index = + out_index + csi_ref_get_reduction_index(inner, params->inner_strides, + params->inner_extents, params->m); float val = input_data[index]; result *= val; } @@ -60,8 +55,7 @@ int csi_ref_prod_stride_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_prod_stride_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_prod_stride_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_prod_stride_f32); diff --git a/source/reference/proposal.c b/source/reference/proposal.c index 7d932601..01cdc8d5 100644 --- a/source/reference/proposal.c +++ b/source/reference/proposal.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,324 +16,311 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ + +#include #include "csi_ref.h" #include "csi_utils.h" -#include #define MAX(a, b) (a > b ? a : b) #define MIN(a, b) (a > b ? b : a) struct bbox { - float x1; - float y1; - float x2; - float y2; + float x1; + float y1; + float x2; + float y2; }; -static struct bbox reg_iou(float x1, float y1, float x2, float y2, float dx1, - float dy1, float dx2, float dy2) +static struct bbox reg_iou(float x1, float y1, float x2, float y2, float dx1, float dy1, float dx2, + float dy2) { - struct bbox pred; - pred.x1 = x1 + dx1; - pred.y1 = y1 + dy1; - pred.x2 = x2 + dx2; - pred.y2 = y1 + dy2; - return pred; + struct bbox pred; + pred.x1 = x1 + dx1; + pred.y1 = y1 + dy1; + pred.x2 = x2 + dx2; + pred.y2 = y1 + dy2; + return pred; } -static struct bbox reg_bbox(float x1, float y1, float x2, float y2, float dx, float dy, - float dw, float dh) +static struct bbox reg_bbox(float x1, float y1, float x2, float y2, float dx, float dy, float dw, + float dh) { - float bbox_w = x2 - x1 + 1.0; - float bbox_h = y2 - y1 + 1.0; - float ctr_x = x1 + 0.5 * (bbox_w - 1.0); - float ctr_y = y1 + 0.5 * (bbox_h - 1.0); - - float pred_ctr_x = dx * bbox_w + ctr_x; - float pred_ctr_y = dy * bbox_h + ctr_y; - float pred_w = exp(dw) * bbox_w; - float pred_h = exp(dh) * bbox_h; - - struct bbox pred; - pred.x1 = pred_ctr_x - 0.5 * (pred_w - 1.0); - pred.y1 = pred_ctr_y - 0.5 * (pred_h - 1.0); - pred.x2 = pred_ctr_x + 0.5 * (pred_w - 1.0); - pred.y2 = pred_ctr_y + 0.5 * (pred_h - 1.0); - return pred; + float bbox_w = x2 - x1 + 1.0; + float bbox_h = y2 - y1 + 1.0; + float ctr_x = x1 + 0.5 * (bbox_w - 1.0); + float ctr_y = y1 + 0.5 * (bbox_h - 1.0); + + float pred_ctr_x = dx * bbox_w + ctr_x; + float pred_ctr_y = dy * bbox_h + ctr_y; + float pred_w = exp(dw) * bbox_w; + float pred_h = exp(dh) * bbox_h; + + struct bbox pred; + pred.x1 = pred_ctr_x - 0.5 * (pred_w - 1.0); + pred.y1 = pred_ctr_y - 0.5 * (pred_h - 1.0); + pred.x2 = pred_ctr_x + 0.5 * (pred_w - 1.0); + pred.y2 = pred_ctr_y + 0.5 * (pred_h - 1.0); + return pred; } static struct bbox generate_anchor(float ratio, float scale, int32_t base_size) { - float w, h; - w = h = (float)base_size; - float x_ctr = 0.5 * (w - 1.0); - float y_ctr = 0.5 * (h - 1.0); - float size = w * h; - int size_ratios = floor(size / ratio); - int new_w = floor(sqrt(size_ratios) + 0.5) * scale; - int new_h = floor((new_w / scale * ratio) + 0.5) * scale; - struct bbox _bbox; - _bbox.x1 = x_ctr - 0.5 * (new_w - 1.0); - _bbox.y1 = y_ctr - 0.5 * (new_h - 1.0); - _bbox.x2 = x_ctr + 0.5 * (new_w - 1.0); - _bbox.y2 = y_ctr + 0.5 * (new_h - 1.0); - - return _bbox; + float w, h; + w = h = (float)base_size; + float x_ctr = 0.5 * (w - 1.0); + float y_ctr = 0.5 * (h - 1.0); + float size = w * h; + int size_ratios = floor(size / ratio); + int new_w = floor(sqrt(size_ratios) + 0.5) * scale; + int new_h = floor((new_w / scale * ratio) + 0.5) * scale; + struct bbox _bbox; + _bbox.x1 = x_ctr - 0.5 * (new_w - 1.0); + _bbox.y1 = y_ctr - 0.5 * (new_h - 1.0); + _bbox.x2 = x_ctr + 0.5 * (new_w - 1.0); + _bbox.y2 = y_ctr + 0.5 * (new_h - 1.0); + + return _bbox; } -static float *predict_bbox(struct csi_tensor *cls_prob_tensor, - struct csi_tensor *bbox_pred_tensor, - struct csi_tensor *im_info_tensor, float *ratios, - int32_t ratios_num, float *scales, int32_t scales_num, - int32_t feature_stride, int32_t iou_loss, - int32_t rpn_min_size) +static float *predict_bbox(struct csi_tensor *cls_prob_tensor, struct csi_tensor *bbox_pred_tensor, + struct csi_tensor *im_info_tensor, float *ratios, int32_t ratios_num, + float *scales, int32_t scales_num, int32_t feature_stride, + int32_t iou_loss, int32_t rpn_min_size) { - int len_scales = scales_num; - int len_ratios = ratios_num; - int batch = cls_prob_tensor->dim[0]; - int num_anchors = cls_prob_tensor->dim[1]; - int height = cls_prob_tensor->dim[2]; - int width = cls_prob_tensor->dim[3]; - num_anchors = num_anchors / 2; - - float *cls_prob = cls_prob_tensor->data; - float *bbox_pred = bbox_pred_tensor->data; - float *im_info = im_info_tensor->data; - - float *output = - csi_mem_alloc(batch * height * width * num_anchors * 5 * sizeof(float)); - - for (int i = 0; i < batch * height * width; i++) { - int w = i % width; - int h = (i / width) % height; - int b = (i / width) / height; - - for (int k = 0; k < num_anchors; k++) { - int out_index = i * num_anchors + k; - float ratio = ratios[k / scales_num]; - float scale = scales[k % scales_num]; - struct bbox anchor = generate_anchor(ratio, scale, feature_stride); - int im_height = im_info[b * 3]; - int im_width = im_info[b * 3 + 1]; - int x1 = anchor.x1 + w * feature_stride; - int y1 = anchor.y1 + h * feature_stride; - int x2 = anchor.x2 + w * feature_stride; - int y2 = anchor.y2 + h * feature_stride; - - float *delta = csi_mem_alloc(4 * sizeof(float)); - for (int j = 0; j < 4; j++) { - delta[j] = - bbox_pred[(((b * num_anchors + k) * 4 + j) * height + h) * width + - w]; - } - struct bbox pred; - if (iou_loss) { - pred = reg_iou(x1, y1, x2, y2, delta[0], delta[1], delta[2], delta[3]); - } else { - pred = reg_bbox(x1, y1, x2, y2, delta[0], delta[1], delta[2], delta[3]); - } - pred.x1 = MAX(MIN(pred.x1, im_width - 1.0), 0.0); - pred.y1 = MAX(MIN(pred.y1, im_height - 1.0), 0.0); - pred.x2 = MAX(MIN(pred.x2, im_width - 1.0), 0.0); - pred.y2 = MAX(MIN(pred.y2, im_height - 1.0), 0.0); - - int real_height = im_height / feature_stride; - int real_width = im_width / feature_stride; - - float bbox_w = pred.x2 - pred.x1 + 1.0; - float bbox_h = pred.y2 - pred.y1 + 1.0; - int min_size = im_info[b * 3 + 2] * rpn_min_size; - - float pred_score = cls_prob[( - int)(((b * num_anchors * 2 + num_anchors + k) * height + h) * width + - w)]; - if ((h >= real_height) || (w >= real_width)) { - pred_score = -1; - } - output[out_index * 5 + 0] = pred.x1; - output[out_index * 5 + 1] = pred.y1; - output[out_index * 5 + 2] = pred.x2; - output[out_index * 5 + 3] = pred.y2; - output[out_index * 5 + 4] = pred_score; - if ((bbox_w < min_size) || (bbox_h < min_size)) { - output[out_index * 5 + 0] = output[out_index * 5 + 0] - min_size / 2.0; - output[out_index * 5 + 1] = output[out_index * 5 + 1] - min_size / 2.0; - output[out_index * 5 + 2] = output[out_index * 5 + 2] + min_size / 2.0; - output[out_index * 5 + 3] = output[out_index * 5 + 3] + min_size / 2.0; - output[out_index * 5 + 4] = -1.0; - } + int len_scales = scales_num; + int len_ratios = ratios_num; + int batch = cls_prob_tensor->dim[0]; + int num_anchors = cls_prob_tensor->dim[1]; + int height = cls_prob_tensor->dim[2]; + int width = cls_prob_tensor->dim[3]; + num_anchors = num_anchors / 2; + + float *cls_prob = cls_prob_tensor->data; + float *bbox_pred = bbox_pred_tensor->data; + float *im_info = im_info_tensor->data; + + float *output = csi_mem_alloc(batch * height * width * num_anchors * 5 * sizeof(float)); + + for (int i = 0; i < batch * height * width; i++) { + int w = i % width; + int h = (i / width) % height; + int b = (i / width) / height; + + for (int k = 0; k < num_anchors; k++) { + int out_index = i * num_anchors + k; + float ratio = ratios[k / scales_num]; + float scale = scales[k % scales_num]; + struct bbox anchor = generate_anchor(ratio, scale, feature_stride); + int im_height = im_info[b * 3]; + int im_width = im_info[b * 3 + 1]; + int x1 = anchor.x1 + w * feature_stride; + int y1 = anchor.y1 + h * feature_stride; + int x2 = anchor.x2 + w * feature_stride; + int y2 = anchor.y2 + h * feature_stride; + + float *delta = csi_mem_alloc(4 * sizeof(float)); + for (int j = 0; j < 4; j++) { + delta[j] = bbox_pred[(((b * num_anchors + k) * 4 + j) * height + h) * width + w]; + } + struct bbox pred; + if (iou_loss) { + pred = reg_iou(x1, y1, x2, y2, delta[0], delta[1], delta[2], delta[3]); + } else { + pred = reg_bbox(x1, y1, x2, y2, delta[0], delta[1], delta[2], delta[3]); + } + pred.x1 = MAX(MIN(pred.x1, im_width - 1.0), 0.0); + pred.y1 = MAX(MIN(pred.y1, im_height - 1.0), 0.0); + pred.x2 = MAX(MIN(pred.x2, im_width - 1.0), 0.0); + pred.y2 = MAX(MIN(pred.y2, im_height - 1.0), 0.0); + + int real_height = im_height / feature_stride; + int real_width = im_width / feature_stride; + + float bbox_w = pred.x2 - pred.x1 + 1.0; + float bbox_h = pred.y2 - pred.y1 + 1.0; + int min_size = im_info[b * 3 + 2] * rpn_min_size; + + float pred_score = + cls_prob[(int)(((b * num_anchors * 2 + num_anchors + k) * height + h) * width + w)]; + if ((h >= real_height) || (w >= real_width)) { + pred_score = -1; + } + output[out_index * 5 + 0] = pred.x1; + output[out_index * 5 + 1] = pred.y1; + output[out_index * 5 + 2] = pred.x2; + output[out_index * 5 + 3] = pred.y2; + output[out_index * 5 + 4] = pred_score; + if ((bbox_w < min_size) || (bbox_h < min_size)) { + output[out_index * 5 + 0] = output[out_index * 5 + 0] - min_size / 2.0; + output[out_index * 5 + 1] = output[out_index * 5 + 1] - min_size / 2.0; + output[out_index * 5 + 2] = output[out_index * 5 + 2] + min_size / 2.0; + output[out_index * 5 + 3] = output[out_index * 5 + 3] + min_size / 2.0; + output[out_index * 5 + 4] = -1.0; + } + } } - } - return output; + return output; } typedef struct { - int index; - float data; + int index; + float data; } index_value; static int argsort(const void *a, const void *b) { - return ((((index_value *)a)->data - ((index_value *)b)->data > 0) ? -1 : 1); + return ((((index_value *)a)->data - ((index_value *)b)->data > 0) ? -1 : 1); } static float calculate_overlap(float *out_tensor, int box_a_idx, int box_b_idx) { - float w = - MAX(0.0, MIN(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2]) - - MAX(out_tensor[box_a_idx], out_tensor[box_b_idx]) + 1.0); - float h = MAX( - 0.0, MIN(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3]) - - MAX(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1]) + 1.0); - float i = w * h; - float u = (out_tensor[box_a_idx + 2] - out_tensor[box_a_idx] + 1.0) * - (out_tensor[box_a_idx + 3] - out_tensor[box_a_idx + 1] + 1.0) + - (out_tensor[box_b_idx + 2] - out_tensor[box_b_idx] + 1.0) * - (out_tensor[box_b_idx + 3] - out_tensor[box_b_idx + 1] + 1.0) - - i; - return i / u; + float w = MAX(0.0, MIN(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2]) - + MAX(out_tensor[box_a_idx], out_tensor[box_b_idx]) + 1.0); + float h = MAX(0.0, MIN(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3]) - + MAX(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1]) + 1.0); + float i = w * h; + float u = (out_tensor[box_a_idx + 2] - out_tensor[box_a_idx] + 1.0) * + (out_tensor[box_a_idx + 3] - out_tensor[box_a_idx + 1] + 1.0) + + (out_tensor[box_b_idx + 2] - out_tensor[box_b_idx] + 1.0) * + (out_tensor[box_b_idx + 3] - out_tensor[box_b_idx + 1] + 1.0) - + i; + return i / u; } -static float *compute_nms(int batch, int num_bbox, float *sorted_bbox, - float threshold) +static float *compute_nms(int batch, int num_bbox, float *sorted_bbox, float threshold) { - float *out = csi_mem_alloc(batch * num_bbox * sizeof(float)); - for (int b = 0; b < batch; b++) { - int base_idx = b * num_bbox; - for (int i = 0; i < num_bbox; i++) { - out[base_idx + i] = 0; - } - for (int l = 0; l < num_bbox - 1; l++) { - for (int i = 0; i < num_bbox; i++) { - if ((i < num_bbox) && (i > l)) { - if (out[base_idx + l] == 0) { - float iou = calculate_overlap(sorted_bbox, (base_idx + l) * 5, - (base_idx + i) * 5); - if (iou > threshold) { - out[base_idx + i] = 1; + float *out = csi_mem_alloc(batch * num_bbox * sizeof(float)); + for (int b = 0; b < batch; b++) { + int base_idx = b * num_bbox; + for (int i = 0; i < num_bbox; i++) { + out[base_idx + i] = 0; + } + for (int l = 0; l < num_bbox - 1; l++) { + for (int i = 0; i < num_bbox; i++) { + if ((i < num_bbox) && (i > l)) { + if (out[base_idx + l] == 0) { + float iou = + calculate_overlap(sorted_bbox, (base_idx + l) * 5, (base_idx + i) * 5); + if (iou > threshold) { + out[base_idx + i] = 1; + } + } + } } - } } - } } - } - return out; + return out; } -static float *prepare_output(float *sorted_bbox, float *remove_mask, int batch, - int num_bbox, int rpn_post_nms_top_n) +static float *prepare_output(float *sorted_bbox, float *remove_mask, int batch, int num_bbox, + int rpn_post_nms_top_n) { - int *i = csi_mem_alloc(batch * sizeof(int)); - int *nkeep = csi_mem_alloc(batch * sizeof(int)); - float *output = csi_mem_alloc(batch * rpn_post_nms_top_n * 5 * sizeof(int)); - - for (int b = 0; b < batch; b++) { - nkeep[b] = 0; - i[b] = 0; - } - for (int j = 0; j < num_bbox; j++) { + int *i = csi_mem_alloc(batch * sizeof(int)); + int *nkeep = csi_mem_alloc(batch * sizeof(int)); + float *output = csi_mem_alloc(batch * rpn_post_nms_top_n * 5 * sizeof(int)); + for (int b = 0; b < batch; b++) { - if (remove_mask[b * num_bbox + j] == 0) { - nkeep[b] = nkeep[b] + 1; - } + nkeep[b] = 0; + i[b] = 0; } - } - for (int b = 0; b < batch; b++) { - if (nkeep[b] > 0) { - int ceil_idx = ceil((float)rpn_post_nms_top_n / nkeep[b]); - for (int m = 0; m < ceil_idx; m++) { - for (int j = 0; j < num_bbox; j++) { - int offset_j = (b * num_bbox + j) * 5; - int offset_i = (b * rpn_post_nms_top_n + i[b]) * 5; - if ((i[b] < rpn_post_nms_top_n) && - (remove_mask[(b * num_bbox + j)] == 0)) { - output[offset_i] = b; - for (int k = 0; k < 4; k++) { - output[offset_i + k + 1] = sorted_bbox[offset_j + k]; + for (int j = 0; j < num_bbox; j++) { + for (int b = 0; b < batch; b++) { + if (remove_mask[b * num_bbox + j] == 0) { + nkeep[b] = nkeep[b] + 1; } - i[b] = i[b] + 1; - } } - } } - } - return output; + for (int b = 0; b < batch; b++) { + if (nkeep[b] > 0) { + int ceil_idx = ceil((float)rpn_post_nms_top_n / nkeep[b]); + for (int m = 0; m < ceil_idx; m++) { + for (int j = 0; j < num_bbox; j++) { + int offset_j = (b * num_bbox + j) * 5; + int offset_i = (b * rpn_post_nms_top_n + i[b]) * 5; + if ((i[b] < rpn_post_nms_top_n) && (remove_mask[(b * num_bbox + j)] == 0)) { + output[offset_i] = b; + for (int k = 0; k < 4; k++) { + output[offset_i + k + 1] = sorted_bbox[offset_j + k]; + } + i[b] = i[b] + 1; + } + } + } + } + } + return output; } -int csi_ref_proposal_f32(struct csi_tensor *cls_prob, - struct csi_tensor *bbox_pred, - struct csi_tensor *im_info, - struct csi_tensor *output, +int csi_ref_proposal_f32(struct csi_tensor *cls_prob, struct csi_tensor *bbox_pred, + struct csi_tensor *im_info, struct csi_tensor *output, struct proposal_params *params) { - float *output_data = output->data; + float *output_data = output->data; - int num_anchors = params->scales_num * params->ratios_num; + int num_anchors = params->scales_num * params->ratios_num; - int batch = cls_prob->dim[0]; - int height = cls_prob->dim[2]; - int width = cls_prob->dim[3]; + int batch = cls_prob->dim[0]; + int height = cls_prob->dim[2]; + int width = cls_prob->dim[3]; - int num_bbox = height * width * num_anchors; - params->rpn_pre_nms_top_n = - params->rpn_pre_nms_top_n > 0 ? MIN(params->rpn_pre_nms_top_n, num_bbox) : num_bbox; + int num_bbox = height * width * num_anchors; + params->rpn_pre_nms_top_n = + params->rpn_pre_nms_top_n > 0 ? MIN(params->rpn_pre_nms_top_n, num_bbox) : num_bbox; - float *bbox = - predict_bbox(cls_prob, bbox_pred, im_info, params->ratios, params->ratios_num, params->scales, - params->scales_num, params->feature_stride, params->iou_loss, params->rpn_min_size); - index_value *score = csi_mem_alloc(batch * num_bbox * sizeof(index_value)); - for (int i = 0; i < batch; i++) { - for (int j = 0; j < num_bbox; j++) { - int id = j + i * num_bbox; - int lid = j * 5 + 4 + i * num_bbox; - score[id].index = id; - score[id].data = bbox[lid]; + float *bbox = predict_bbox(cls_prob, bbox_pred, im_info, params->ratios, params->ratios_num, + params->scales, params->scales_num, params->feature_stride, + params->iou_loss, params->rpn_min_size); + index_value *score = csi_mem_alloc(batch * num_bbox * sizeof(index_value)); + for (int i = 0; i < batch; i++) { + for (int j = 0; j < num_bbox; j++) { + int id = j + i * num_bbox; + int lid = j * 5 + 4 + i * num_bbox; + score[id].index = id; + score[id].data = bbox[lid]; + } } - } - - qsort(score, batch * num_bbox, sizeof(index_value), argsort); - - float *sorted_bbox = csi_mem_alloc(batch * params->rpn_pre_nms_top_n * 5 * sizeof(float)); - for (int b = 0; b < batch; b++) { - for (int i = 0; i < params->rpn_pre_nms_top_n; i++) { - int sorted_index = score[b * params->rpn_pre_nms_top_n + i].index; - for (int j = 0; j < 5; j++) { - int bbox_index = b * params->rpn_pre_nms_top_n + sorted_index * 5 + j; - int id = b * params->rpn_pre_nms_top_n + i * 5 + j; - sorted_bbox[id] = bbox[bbox_index]; - } + + qsort(score, batch * num_bbox, sizeof(index_value), argsort); + + float *sorted_bbox = csi_mem_alloc(batch * params->rpn_pre_nms_top_n * 5 * sizeof(float)); + for (int b = 0; b < batch; b++) { + for (int i = 0; i < params->rpn_pre_nms_top_n; i++) { + int sorted_index = score[b * params->rpn_pre_nms_top_n + i].index; + for (int j = 0; j < 5; j++) { + int bbox_index = b * params->rpn_pre_nms_top_n + sorted_index * 5 + j; + int id = b * params->rpn_pre_nms_top_n + i * 5 + j; + sorted_bbox[id] = bbox[bbox_index]; + } + } } - } - float *nms_remove_mask = - compute_nms(batch, params->rpn_pre_nms_top_n, sorted_bbox, params->threshold); - float *nms_out = prepare_output(sorted_bbox, nms_remove_mask, batch, - params->rpn_pre_nms_top_n, params->rpn_post_nms_top_n); + float *nms_remove_mask = + compute_nms(batch, params->rpn_pre_nms_top_n, sorted_bbox, params->threshold); + float *nms_out = prepare_output(sorted_bbox, nms_remove_mask, batch, params->rpn_pre_nms_top_n, + params->rpn_post_nms_top_n); - for (int i = 0; i < batch * params->rpn_post_nms_top_n * 5; i++) { - output_data[i] = nms_out[i]; - } + for (int i = 0; i < batch * params->rpn_post_nms_top_n * 5; i++) { + output_data[i] = nms_out[i]; + } - return CSINN_TRUE; + return CSINN_TRUE; } -int csi_ref_proposal_quant(struct csi_tensor *cls_prob, - struct csi_tensor *bbox_pred, - struct csi_tensor *im_info, - struct csi_tensor *output, +int csi_ref_proposal_quant(struct csi_tensor *cls_prob, struct csi_tensor *bbox_pred, + struct csi_tensor *im_info, struct csi_tensor *output, struct proposal_params *params) { float *scales = (float *)csi_mem_alloc(params->scales_num * sizeof(float)); - for(int i = 0; i < params->scales_num; i++){ - scales[i] = csi_ref_get_scale(params->scale_multipliers[i],params->scale_shifts[i]); + for (int i = 0; i < params->scales_num; i++) { + scales[i] = csi_ref_get_scale(params->scale_multipliers[i], params->scale_shifts[i]); } float *ratios = (float *)csi_mem_alloc(params->scales_num * sizeof(float)); - for(int i = 0; i < params->ratios_num; i++){ - ratios[i] = csi_ref_get_scale(params->ratio_multipliers[i],params->ratio_shifts[i]); + for (int i = 0; i < params->ratios_num; i++) { + ratios[i] = csi_ref_get_scale(params->ratio_multipliers[i], params->ratio_shifts[i]); } - float threshold = csi_ref_get_scale(params->threshold_multiplier,params->threshold_shift); + float threshold = csi_ref_get_scale(params->threshold_multiplier, params->threshold_shift); params->ratios = ratios; params->scales = scales; @@ -348,5 +335,4 @@ int csi_ref_proposal_quant(struct csi_tensor *cls_prob, csi_ref_tensor_transform_free_f32(fbbox); csi_ref_tensor_transform_free_f32(foutput); return CSINN_TRUE; - } diff --git a/source/reference/psroipooling.c b/source/reference/psroipooling.c index 5bac9279..90edea23 100644 --- a/source/reference/psroipooling.c +++ b/source/reference/psroipooling.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,16 +16,15 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ + +#include #include "csi_ref.h" #include "csi_utils.h" -#include -int csi_ref_psroipooling_f32(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, - struct psroipooling_params *params) +int csi_ref_psroipooling_f32(struct csi_tensor *data, struct csi_tensor *rois, + struct csi_tensor *output, struct psroipooling_params *params) { float *output_data = output->data; float *bottom_data = data->data; @@ -35,57 +34,51 @@ int csi_ref_psroipooling_f32(struct csi_tensor *data, int height = data->dim[2]; int num_rois = rois->dim[0]; - for(int n = 0; n < num_rois; n++){ - int roi_add = n * 5; + for (int n = 0; n < num_rois; n++) { + int roi_add = n * 5; float roi_start_w = (float)(round(bottom_rois[roi_add + 1]) * params->spatial_scale); float roi_start_h = (float)(round(bottom_rois[roi_add + 2]) * params->spatial_scale); - float roi_end_w = (float)(round(bottom_rois[roi_add + 3] + 1.0) * params->spatial_scale); - float roi_end_h = (float)(round(bottom_rois[roi_add + 4] + 1.0) * params->spatial_scale); + float roi_end_w = (float)(round(bottom_rois[roi_add + 3] + 1.0) * params->spatial_scale); + float roi_end_h = (float)(round(bottom_rois[roi_add + 4] + 1.0) * params->spatial_scale); float roi_height = fmaxf(roi_end_h - roi_start_h, 0.1); - float roi_width = fmaxf(roi_end_w - roi_start_w, 0.1); + float roi_width = fmaxf(roi_end_w - roi_start_w, 0.1); float bin_size_h = (float)(roi_height) / (float)(params->group_size); float bin_size_w = (float)(roi_width) / (float)(params->group_size); int ctop, ph, pw, h, w; - for (ctop = 0; ctop < params->output_dim; ++ctop) - { - for (ph = 0; ph < params->group_size; ++ph) - { - for (pw = 0; pw < params->group_size; ++pw) - { - int index = n * params->output_dim * params->group_size * params->group_size + \ - ctop * params->group_size * params->group_size + ph * params->group_size + pw; + for (ctop = 0; ctop < params->output_dim; ++ctop) { + for (ph = 0; ph < params->group_size; ++ph) { + for (pw = 0; pw < params->group_size; ++pw) { + int index = n * params->output_dim * params->group_size * params->group_size + + ctop * params->group_size * params->group_size + + ph * params->group_size + pw; - int hstart = (int)(floor((float)(ph) * bin_size_h + roi_start_h)); - int wstart = (int)(floor((float)(pw) * bin_size_w + roi_start_w)); - int hend = (int)(ceil((float)(ph + 1) * bin_size_h + roi_start_h)); - int wend = (int)(ceil((float)(pw + 1) * bin_size_w + roi_start_w)); + int hstart = (int)(floor((float)(ph)*bin_size_h + roi_start_h)); + int wstart = (int)(floor((float)(pw)*bin_size_w + roi_start_w)); + int hend = (int)(ceil((float)(ph + 1) * bin_size_h + roi_start_h)); + int wend = (int)(ceil((float)(pw + 1) * bin_size_w + roi_start_w)); hstart = fminf(fmaxf(hstart, 0), height); - hend = fminf(fmaxf(hend , 0), height); + hend = fminf(fmaxf(hend, 0), height); wstart = fminf(fmaxf(wstart, 0), width); - wend = fminf(fmaxf(wend , 0), width); + wend = fminf(fmaxf(wend, 0), width); int is_empty = (hend <= hstart) || (wend <= wstart); int gw = pw; int gh = ph; - int c = (ctop*params->group_size + gh)*params->group_size + gw; + int c = (ctop * params->group_size + gh) * params->group_size + gw; float out_sum = 0; - for (h = hstart; h < hend; ++h) - { - for (w = wstart; w < wend; ++w) - { + for (h = hstart; h < hend; ++h) { + for (w = wstart; w < wend; ++w) { int bottom_index = h * width + w; out_sum += bottom_data[c * height * width + bottom_index]; } } float bin_area = (hend - hstart) * (wend - wstart); - if (is_empty) - { + if (is_empty) { output_data[index] = 0; - }else - { - output_data[index] = out_sum/bin_area; + } else { + output_data[index] = out_sum / bin_area; } } } @@ -95,10 +88,8 @@ int csi_ref_psroipooling_f32(struct csi_tensor *data, return CSINN_TRUE; } -int csi_ref_psroipooling_quant(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, - struct psroipooling_params *params) +int csi_ref_psroipooling_quant(struct csi_tensor *data, struct csi_tensor *rois, + struct csi_tensor *output, struct psroipooling_params *params) { int ret; struct csi_tensor *finput = csi_ref_tensor_transform_f32(data); diff --git a/source/reference/reduce_logsumexp.c b/source/reference/reduce_logsumexp.c index f23442d7..7cfa73af 100644 --- a/source/reference/reduce_logsumexp.c +++ b/source/reference/reduce_logsumexp.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,45 +16,44 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_reduce_logsumexp_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_logsumexp_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { float *input_data = (float *)input->data; float *output_data = (float *)output->data; - assert(params->axis_count==1); //the Function realization assumption axis_count=1 - //axis=none - if(*(params->axis) == -1) { + assert(params->axis_count == 1); // the Function realization assumption axis_count=1 + // axis=none + if (*(params->axis) == -1) { int size = 1; - for(int i=0; idim_count; i++) { + for (int i = 0; i < input->dim_count; i++) { size = size * input->dim[i]; } float res = exp(*input_data); - for(int j = 1; j < size; j++) { + for (int j = 1; j < size; j++) { res = res + exp(input_data[j]); } *output_data = log(res); } else { int axis = *(params->axis); int64_t outer_size = 1; - for(int i = 0; i < axis; i++) { + for (int i = 0; i < axis; i++) { outer_size *= input->dim[i]; } int64_t inner_size = 1; - for(int i = axis + 1; i < input->dim_count; i++) { + for (int i = axis + 1; i < input->dim_count; i++) { inner_size *= input->dim[i]; } int cnt = input->dim[axis]; - for(int i = 0; i < outer_size; i++) { - for(int k = 0; k < inner_size; k++) { + for (int i = 0; i < outer_size; i++) { + for (int k = 0; k < inner_size; k++) { float temp = 0.0f; - for(int j = 0; j < cnt; j++) { + for (int j = 0; j < cnt; j++) { float input_val = *(input_data + j * inner_size + k); temp += exp(input_val); } @@ -67,8 +66,7 @@ int csi_ref_reduce_logsumexp_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_reduce_logsumexp_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_logsumexp_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_reduce_logsumexp_f32); diff --git a/source/reference/reduce_max.c b/source/reference/reduce_max.c index 23ef31f5..8ff1af2c 100644 --- a/source/reference/reduce_max.c +++ b/source/reference/reduce_max.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,45 +16,44 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_reduce_max_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_max_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { float *input_data = (float *)input->data; float *output_data = (float *)output->data; - assert(params->axis_count==1); //the Function realization assumption axis_count=1 - //axis=none - if(*(params->axis) == -1) { + assert(params->axis_count == 1); // the Function realization assumption axis_count=1 + // axis=none + if (*(params->axis) == -1) { int size = 1; - for(int i = 0; i < input->dim_count; i++) { + for (int i = 0; i < input->dim_count; i++) { size = size * input->dim[i]; } float res = *input_data; - for(int j = 1; j < size; j++) { + for (int j = 1; j < size; j++) { res = fmax(res, input_data[j]); } *output_data = res; } else { int axis = *(params->axis); int64_t outer_size = 1; - for(int i = 0; i < axis; i++) { + for (int i = 0; i < axis; i++) { outer_size *= input->dim[i]; } int64_t inner_size = 1; - for(int i = axis + 1; i < input->dim_count; i++) { + for (int i = axis + 1; i < input->dim_count; i++) { inner_size *= input->dim[i]; } int cnt = input->dim[axis]; - for(int i = 0; i < outer_size; i++) { - for(int k = 0; k < inner_size; k++) { + for (int i = 0; i < outer_size; i++) { + for (int k = 0; k < inner_size; k++) { float temp = *(input_data + k); - for(int j = 1; j < cnt; j++) { + for (int j = 1; j < cnt; j++) { temp = fmax(temp, *(input_data + j * inner_size + k)); } *(output_data + k) = temp; @@ -66,8 +65,7 @@ int csi_ref_reduce_max_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_reduce_max_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_max_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_reduce_max_f32); diff --git a/source/reference/reduce_mean.c b/source/reference/reduce_mean.c index 61fc3d2f..2c3be614 100644 --- a/source/reference/reduce_mean.c +++ b/source/reference/reduce_mean.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,45 +16,44 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_reduce_mean_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_mean_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { float *input_data = (float *)input->data; float *output_data = (float *)output->data; - assert(params->axis_count==1); //the Function realization assumption axis_count=1 - //axis=none - if(*(params->axis) == -1) { + assert(params->axis_count == 1); // the Function realization assumption axis_count=1 + // axis=none + if (*(params->axis) == -1) { int size = 1; - for(int i = 0; i < input->dim_count; i++) { + for (int i = 0; i < input->dim_count; i++) { size = size * input->dim[i]; } float res = 0.0f; - for(int j = 0; j < size; j++) { + for (int j = 0; j < size; j++) { res = res + input_data[j]; } *output_data = res / size; } else { int axis = *(params->axis); int64_t outer_size = 1; - for(int i = 0; i < axis; i++) { + for (int i = 0; i < axis; i++) { outer_size *= input->dim[i]; } int64_t inner_size = 1; - for(int i = axis + 1; i < input->dim_count; i++) { + for (int i = axis + 1; i < input->dim_count; i++) { inner_size *= input->dim[i]; } int cnt = input->dim[axis]; - for(int i = 0; i < outer_size; i++) { - for(int k = 0; k < inner_size; k++) { + for (int i = 0; i < outer_size; i++) { + for (int k = 0; k < inner_size; k++) { float temp = 0.0f; - for(int j = 0; j < cnt; j++) { + for (int j = 0; j < cnt; j++) { temp += *(input_data + j * inner_size + k); } *(output_data + k) = temp / cnt; @@ -66,8 +65,7 @@ int csi_ref_reduce_mean_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_reduce_mean_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_mean_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_reduce_mean_f32); diff --git a/source/reference/reduce_min.c b/source/reference/reduce_min.c index 4be86a54..2fef1a2a 100644 --- a/source/reference/reduce_min.c +++ b/source/reference/reduce_min.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,45 +16,44 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_reduce_min_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_min_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { float *input_data = (float *)input->data; float *output_data = (float *)output->data; - assert(params->axis_count==1); //the Function realization assumption axis_count=1 - //axis=none - if(*(params->axis) == -1) { + assert(params->axis_count == 1); // the Function realization assumption axis_count=1 + // axis=none + if (*(params->axis) == -1) { int size = 1; - for(int i = 0; i < input->dim_count; i++) { + for (int i = 0; i < input->dim_count; i++) { size = size * input->dim[i]; } float res = *input_data; - for(int j = 1; j < size; j++) { + for (int j = 1; j < size; j++) { res = fmin(res, input_data[j]); } *output_data = res; } else { int axis = *(params->axis); int64_t outer_size = 1; - for(int i = 0; i < axis; i++) { + for (int i = 0; i < axis; i++) { outer_size *= input->dim[i]; } int64_t inner_size = 1; - for(int i = axis + 1; i < input->dim_count; i++) { + for (int i = axis + 1; i < input->dim_count; i++) { inner_size *= input->dim[i]; } int cnt = input->dim[axis]; - for(int i = 0; i < outer_size; i++) { - for(int k = 0; k < inner_size; k++) { + for (int i = 0; i < outer_size; i++) { + for (int k = 0; k < inner_size; k++) { float temp = *(input_data + k); - for(int j = 1; j < cnt; j++) { + for (int j = 1; j < cnt; j++) { temp = fmin(temp, *(input_data + j * inner_size + k)); } *(output_data + k) = temp; @@ -66,8 +65,7 @@ int csi_ref_reduce_min_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_reduce_min_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_min_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_reduce_min_f32); diff --git a/source/reference/reduce_prod.c b/source/reference/reduce_prod.c index 1a6ac21e..a3f0f3f1 100644 --- a/source/reference/reduce_prod.c +++ b/source/reference/reduce_prod.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,45 +16,44 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_reduce_prod_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_prod_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { float *input_data = (float *)input->data; float *output_data = (float *)output->data; - assert(params->axis_count==1); //the Function realization assumption axis_count=1 - //axis=none - if(*(params->axis) == -1) { + assert(params->axis_count == 1); // the Function realization assumption axis_count=1 + // axis=none + if (*(params->axis) == -1) { int size = 1; - for(int i = 0; i < input->dim_count; i++) { + for (int i = 0; i < input->dim_count; i++) { size = size * input->dim[i]; } float res = 1.0f; - for(int j = 0; j < size; j++) { + for (int j = 0; j < size; j++) { res = res * input_data[j]; } *output_data = res; } else { int axis = *(params->axis); int64_t outer_size = 1; - for(int i = 0; i < axis; i++) { + for (int i = 0; i < axis; i++) { outer_size *= input->dim[i]; } int64_t inner_size = 1; - for(int i = axis + 1; i < input->dim_count; i++) { + for (int i = axis + 1; i < input->dim_count; i++) { inner_size *= input->dim[i]; } int cnt = input->dim[axis]; - for(int i = 0; i < outer_size; i++) { - for(int k = 0; k < inner_size; k++) { + for (int i = 0; i < outer_size; i++) { + for (int k = 0; k < inner_size; k++) { float temp = 1.0f; - for(int j = 0; j < cnt; j++) { + for (int j = 0; j < cnt; j++) { temp *= *(input_data + j * inner_size + k); } *(output_data + k) = temp; @@ -66,8 +65,7 @@ int csi_ref_reduce_prod_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_reduce_prod_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_prod_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_reduce_prod_f32); diff --git a/source/reference/reduce_sum.c b/source/reference/reduce_sum.c index 2b2cb01e..e4380715 100644 --- a/source/reference/reduce_sum.c +++ b/source/reference/reduce_sum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,45 +16,44 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_reduce_sum_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_sum_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { float *input_data = (float *)input->data; float *output_data = (float *)output->data; - assert(params->axis_count==1); //the Function realization assumption axis_count=1 - //axis=none - if(*(params->axis) == -1) { + assert(params->axis_count == 1); // the Function realization assumption axis_count=1 + // axis=none + if (*(params->axis) == -1) { int size = 1; - for(int i = 0; i < input->dim_count; i++) { + for (int i = 0; i < input->dim_count; i++) { size = size * input->dim[i]; } float res = 0.0f; - for(int j = 0; j < size; j++) { + for (int j = 0; j < size; j++) { res = res + input_data[j]; } *output_data = res; } else { int axis = *(params->axis); int64_t outer_size = 1; - for(int i = 0; i < axis; i++) { + for (int i = 0; i < axis; i++) { outer_size *= input->dim[i]; } int64_t inner_size = 1; - for(int i = axis + 1; i < input->dim_count; i++) { + for (int i = axis + 1; i < input->dim_count; i++) { inner_size *= input->dim[i]; } int cnt = input->dim[axis]; - for(int i = 0; i < outer_size; i++) { - for(int k = 0; k < inner_size; k++) { + for (int i = 0; i < outer_size; i++) { + for (int k = 0; k < inner_size; k++) { float temp = 0.0f; - for(int j = 0; j < cnt; j++) { + for (int j = 0; j < cnt; j++) { temp += *(input_data + j * inner_size + k); } *(output_data + k) = temp; @@ -66,8 +65,7 @@ int csi_ref_reduce_sum_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_reduce_sum_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reduce_sum_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_reduce_sum_f32); diff --git a/source/reference/relu.c b/source/reference/relu.c index bc9a6fe0..0a6712a4 100644 --- a/source/reference/relu.c +++ b/source/reference/relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,18 +16,14 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -#include -static float relu(float x){ - return x > 0 ? x : 0; -} +static float relu(float x) { return x > 0 ? x : 0; } -int csi_ref_relu_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_relu_f32(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params) { float *input_data = input->data; @@ -36,15 +32,14 @@ int csi_ref_relu_f32(struct csi_tensor *input, for (int i = 0; i < input->dim_count; i++) { size = size * input->dim[i]; } - #pragma omp parallel for num_threads(8) +#pragma omp parallel for num_threads(8) for (int i = 0; i < size; i++) { output_data[i] = relu(input_data[i]); } return CSINN_TRUE; } -int csi_ref_relu_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_relu_quant(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_relu_f32); diff --git a/source/reference/relu1.c b/source/reference/relu1.c index 247769a0..87f1985c 100644 --- a/source/reference/relu1.c +++ b/source/reference/relu1.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,18 +16,14 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -#include -static float relu1(float x){ - return fmin(x > 0 ? x : 0, 1); -} +static float relu1(float x) { return fmin(x > 0 ? x : 0, 1); } -int csi_ref_relu1_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_relu1_f32(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params) { float *input_data = input->data; @@ -43,8 +39,7 @@ int csi_ref_relu1_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_relu1_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_relu1_quant(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_relu1_f32); diff --git a/source/reference/relu6.c b/source/reference/relu6.c index 7fb647af..08343ac9 100644 --- a/source/reference/relu6.c +++ b/source/reference/relu6.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,18 +16,14 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -#include -static float relu6(float x){ - return fmin(x > 0 ? x : 0, 6); -} +static float relu6(float x) { return fmin(x > 0 ? x : 0, 6); } -int csi_ref_relu6_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_relu6_f32(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params) { float *input_data = input->data; @@ -43,8 +39,7 @@ int csi_ref_relu6_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_relu6_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_relu6_quant(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_relu6_f32); diff --git a/source/reference/relun.c b/source/reference/relun.c index 0570e9de..c4d0f715 100644 --- a/source/reference/relun.c +++ b/source/reference/relun.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,17 +16,14 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -static float relun(float x, float y){ - return fmin(x > 0.0 ? x : 0.0, y); -} +static float relun(float x, float y) { return fmin(x > 0.0 ? x : 0.0, y); } -int csi_ref_relun_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_relun_f32(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params) { float *input_data = input->data; @@ -42,8 +39,7 @@ int csi_ref_relun_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_relun_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_relun_quant(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_relun_f32); diff --git a/source/reference/reshape.c b/source/reference/reshape.c index 9540932f..3f7c1ad2 100644 --- a/source/reference/reshape.c +++ b/source/reference/reshape.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,29 +16,27 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_reshape_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reshape_init(struct csi_tensor *input, struct csi_tensor *output, struct reshape_params *params) { - if (input->quant_channel == output->quant_channel){ + if (input->quant_channel == output->quant_channel) { int quant_size = input->quant_channel * sizeof(struct csi_quant_info); int t = memcmp(input->qinfo, output->qinfo, quant_size); - if (t == 0){ + if (t == 0) { params->base.bc = csi_ref_reshape; return CSINN_TRUE; } } - params->base.bc = csi_ref_reshape_requant; + params->base.bc = csi_ref_reshape_quant; return CSINN_TRUE; } -int csi_ref_reshape(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reshape(struct csi_tensor *input, struct csi_tensor *output, struct reshape_params *params) { float *input_data = input->data; @@ -50,9 +48,8 @@ int csi_ref_reshape(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_reshape_requant(struct csi_tensor *input, - struct csi_tensor *output, - struct reshape_params *params) +int csi_ref_reshape_quant(struct csi_tensor *input, struct csi_tensor *output, + struct reshape_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_reshape); } diff --git a/source/reference/resize.c b/source/reference/resize.c index 204ad07e..2d23be1b 100644 --- a/source/reference/resize.c +++ b/source/reference/resize.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" @@ -57,13 +57,13 @@ static void csi_ref_resize_bilinear_nhwc_f32(struct csi_tensor *input, struct cs for (int c = 0; c < depth; ++c) { float interpolation = (float)(input_data[csi_ref_get_index(input->dim, b, y0, x0, c)] * - (1 - (input_y - y0)) * (1 - (input_x - x0)) + - input_data[csi_ref_get_index(input->dim, b, y1, x0, c)] * - (input_y - y0) * (1 - (input_x - x0)) + - input_data[csi_ref_get_index(input->dim, b, y0, x1, c)] * - (1 - (input_y - y0)) * (input_x - x0) + - input_data[csi_ref_get_index(input->dim, b, y1, x1, c)] * - (input_y - y0) * (input_x - x0)); + (1 - (input_y - y0)) * (1 - (input_x - x0)) + + input_data[csi_ref_get_index(input->dim, b, y1, x0, c)] * + (input_y - y0) * (1 - (input_x - x0)) + + input_data[csi_ref_get_index(input->dim, b, y0, x1, c)] * + (1 - (input_y - y0)) * (input_x - x0) + + input_data[csi_ref_get_index(input->dim, b, y1, x1, c)] * + (input_y - y0) * (input_x - x0)); output_data[csi_ref_get_index(output->dim, b, y, x, c)] = interpolation; } } @@ -71,8 +71,9 @@ static void csi_ref_resize_bilinear_nhwc_f32(struct csi_tensor *input, struct cs } } - -/*reference https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h */ +/*reference + * https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h + */ static void csi_ref_resize_nearest_neighbor_f32(struct csi_tensor *input, struct csi_tensor *output, bool align_corners) { @@ -105,12 +106,16 @@ static void csi_ref_resize_nearest_neighbor_f32(struct csi_tensor *input, struct float *output_ptr = output_data; for (int b = 0; b < batches; ++b) { for (int y = 0; y < output_height; ++y) { - int32_t in_y = csi_ref_min_internal_s32(align_corners ? (int32_t)(round(y * height_scale)) - :(int32_t)(floor(y * height_scale)), input_height - 1); + int32_t in_y = + csi_ref_min_internal_s32(align_corners ? (int32_t)(round(y * height_scale)) + : (int32_t)(floor(y * height_scale)), + input_height - 1); const float *y_input_ptr = input_ptr + in_y * row_offset; for (int x = 0; x < output_width; ++x) { - int32_t in_x = csi_ref_min_internal_s32(align_corners?(int32_t)(round(x * width_scale)) - :(int32_t)(floor(x * width_scale)), input_width - 1); + int32_t in_x = + csi_ref_min_internal_s32(align_corners ? (int32_t)(round(x * width_scale)) + : (int32_t)(floor(x * width_scale)), + input_width - 1); const float *x_input_ptr = y_input_ptr + in_x * col_offset; memcpy(output_ptr, x_input_ptr, depth * sizeof(float)); output_ptr += depth; @@ -120,18 +125,19 @@ static void csi_ref_resize_nearest_neighbor_f32(struct csi_tensor *input, struct } } -static void csi_ref_resize_nearest_neighbor_nchw_f32(struct csi_tensor *o_input, struct csi_tensor *o_output, +static void csi_ref_resize_nearest_neighbor_nchw_f32(struct csi_tensor *o_input, + struct csi_tensor *o_output, bool align_corners) { - struct csi_tensor* input = csi_ref_nchw_to_nhwc_f32(o_input); - struct csi_tensor* output = csi_ref_nchw_to_nhwc_f32(o_output); + struct csi_tensor *input = csi_ref_nchw_to_nhwc_f32(o_input); + struct csi_tensor *output = csi_ref_nchw_to_nhwc_f32(o_output); csi_ref_resize_nearest_neighbor_f32(input, output, align_corners); csi_ref_nhwc_to_nchw_f32(o_output, output); csi_ref_free_float_tensor(input); } -static void csi_ref_resize_bilinear_nchw_f32(struct csi_tensor *o_input, struct csi_tensor *o_output, - bool align_corners) +static void csi_ref_resize_bilinear_nchw_f32(struct csi_tensor *o_input, + struct csi_tensor *o_output, bool align_corners) { struct csi_tensor *input = csi_ref_nchw_to_nhwc_f32(o_input); struct csi_tensor *output = csi_ref_nchw_to_nhwc_f32(o_output); @@ -140,20 +146,19 @@ static void csi_ref_resize_bilinear_nchw_f32(struct csi_tensor *o_input, struct csi_ref_free_float_tensor(input); } -int csi_ref_resize_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_resize_f32(struct csi_tensor *input, struct csi_tensor *output, struct resize_params *params) { if (params->resize_mode == CSINN_RESIZE_BILINEAR) { - if (params->base.layout == CSINN_LAYOUT_NCHW){ + if (params->base.layout == CSINN_LAYOUT_NCHW) { csi_ref_resize_bilinear_nchw_f32(input, output, params->align_corners); - }else{ + } else { csi_ref_resize_bilinear_nhwc_f32(input, output, params->align_corners); } } else if (params->resize_mode == CSINN_RESIZE_NEAREST_NEIGHBOR) { - if (params->base.layout == CSINN_LAYOUT_NCHW){ + if (params->base.layout == CSINN_LAYOUT_NCHW) { csi_ref_resize_nearest_neighbor_nchw_f32(input, output, params->align_corners); - }else{ + } else { csi_ref_resize_nearest_neighbor_f32(input, output, params->align_corners); } } else { @@ -162,8 +167,7 @@ int csi_ref_resize_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_resize_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_resize_quant(struct csi_tensor *input, struct csi_tensor *output, struct resize_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_resize_f32); diff --git a/source/reference/reverse.c b/source/reference/reverse.c index 5e128996..0130e865 100644 --- a/source/reference/reverse.c +++ b/source/reference/reverse.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" @@ -24,14 +24,13 @@ static int Multiplication(struct csi_tensor *input, int s, int e) { int res = 1; - for(int i=s; i<=e; i++) { + for (int i = s; i <= e; i++) { res = res * input->dim[i]; } return res; } -int csi_ref_reverse_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reverse_f32(struct csi_tensor *input, struct csi_tensor *output, struct reverse_params *params) { float *input_data = (float *)input->data; @@ -43,19 +42,19 @@ int csi_ref_reverse_f32(struct csi_tensor *input, } int axis = params->axis; int num = Multiplication(input, 0, axis) / (input->dim[axis]); - int step = Multiplication(input, axis, input->dim_count-1) / (input->dim[axis]); - int cnt = (input->dim[axis])/2; + int step = Multiplication(input, axis, input->dim_count - 1) / (input->dim[axis]); + int cnt = (input->dim[axis]) / 2; memcpy(output_data, input_data, size * sizeof(float)); - - for(int i=0; idim[axis]); - float *end_addr = start_addr + step*(input->dim[axis]) - 1; - for(int j=0; jdim[axis]); + float *end_addr = start_addr + step * (input->dim[axis]) - 1; + for (int j = 0; j < cnt; j++) { float *temp = (float *)csi_mem_alloc(step * sizeof(float)); - memcpy(temp, start_addr, step*sizeof(float)); - memcpy(start_addr, end_addr-step+1, step*sizeof(float)); - memcpy(end_addr-step+1, temp, step*sizeof(float)); + memcpy(temp, start_addr, step * sizeof(float)); + memcpy(start_addr, end_addr - step + 1, step * sizeof(float)); + memcpy(end_addr - step + 1, temp, step * sizeof(float)); start_addr += step; end_addr -= step; csi_mem_free(temp); @@ -64,8 +63,7 @@ int csi_ref_reverse_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_reverse_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_reverse_quant(struct csi_tensor *input, struct csi_tensor *output, struct reverse_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_reverse_f32); diff --git a/source/reference/roialign.c b/source/reference/roialign.c index 72096f2f..6f1783d8 100644 --- a/source/reference/roialign.c +++ b/source/reference/roialign.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,11 +16,9 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -#include -#include // https://github.com/AceCoooool/RoIAlign-RoIPool-pytorch/blob/master/roialign/roi_align_cpu.cpp @@ -30,15 +28,19 @@ struct PreCalc { float w1, w2, w3, w4; }; -static void pre_calc_for_bilinear(const int h, const int w, const int pool_h, const int pool_w, int b_grid_h, int b_grid_w, - float start_y, float start_x, float b_size_h, float b_size_w, struct PreCalc *pre_calc) { +static void pre_calc_for_bilinear(const int h, const int w, const int pool_h, const int pool_w, + int b_grid_h, int b_grid_w, float start_y, float start_x, + float b_size_h, float b_size_w, struct PreCalc *pre_calc) +{ int idx = 0; for (int ph = 0; ph < pool_h; ++ph) { for (int pw = 0; pw < pool_w; ++pw) { for (int iy = 0; iy < b_grid_h; ++iy) { - float yy = start_y + ph * b_size_h + (float)(iy + 0.5f) * b_size_h / (float)(b_grid_h); + float yy = + start_y + ph * b_size_h + (float)(iy + 0.5f) * b_size_h / (float)(b_grid_h); for (int ix = 0; ix < b_grid_w; ++ix) { - float xx = start_x + pw * b_size_w + (float)(ix + 0.5f) * b_size_w / (float)(b_grid_w); + float xx = + start_x + pw * b_size_w + (float)(ix + 0.5f) * b_size_w / (float)(b_grid_w); float x = xx, y = yy; // situation 1: out of range if (y < -1.0 || y > h || x < -1.0 || x > w) { @@ -50,8 +52,8 @@ static void pre_calc_for_bilinear(const int h, const int w, const int pool_h, co // not exceed 1.0 y = y <= 0 ? 0 : (y >= h - 1 ? h - 1 : y); x = x <= 0 ? 0 : (x >= w - 1 ? w - 1 : x); - int y_low = (int) y; - int x_low = (int) x; + int y_low = (int)y; + int x_low = (int)x; int y_high = y_low >= h - 1 ? y_low : y_low + 1; int x_high = x_low >= w - 1 ? x_low : x_low + 1; float ly = y - y_low, lx = x - x_low; @@ -72,10 +74,8 @@ static void pre_calc_for_bilinear(const int h, const int w, const int pool_h, co } } -int csi_ref_roi_align_f32(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, - struct roi_align_params *params) +int csi_ref_roi_align_f32(struct csi_tensor *data, struct csi_tensor *rois, + struct csi_tensor *output, struct roi_align_params *params) { float *bottom_rois = (float *)rois->data; float *input_data = (float *)data->data; @@ -86,8 +86,8 @@ int csi_ref_roi_align_f32(struct csi_tensor *data, int w = data->dim[3]; int n_rois = rois->dim[0]; - int pool_h = params->pooled_size_h; // output->dim[2] - int pool_w = params->pooled_size_w; // output->dim[3] + int pool_h = params->pooled_size_h; // output->dim[2] + int pool_w = params->pooled_size_w; // output->dim[3] int ratio = params->sample_ratio; for (int n = 0; n < n_rois; ++n) { @@ -112,7 +112,7 @@ int csi_ref_roi_align_f32(struct csi_tensor *data, // get each bin's corresponding position and weights struct PreCalc pre_calc[count * pool_h * pool_w]; pre_calc_for_bilinear(h, w, pool_h, pool_w, bin_grid_h, bin_grid_w, start_y, start_x, - bin_size_h, bin_size_w, pre_calc); + bin_size_h, bin_size_w, pre_calc); // map to feature map for (int c = 0; c < channel; ++c) { @@ -126,8 +126,9 @@ int csi_ref_roi_align_f32(struct csi_tensor *data, for (int iy = 0; iy < bin_grid_h; ++iy) { for (int ix = 0; ix < bin_grid_w; ++ix) { struct PreCalc pc = pre_calc[pre_calc_idx]; - output_val += pc.w1 * offset_feat[pc.pos1] + pc.w2 * offset_feat[pc.pos2] + - pc.w3 * offset_feat[pc.pos3] + pc.w4 * offset_feat[pc.pos4]; + output_val += + pc.w1 * offset_feat[pc.pos1] + pc.w2 * offset_feat[pc.pos2] + + pc.w3 * offset_feat[pc.pos3] + pc.w4 * offset_feat[pc.pos4]; pre_calc_idx += 1; } } diff --git a/source/reference/roipool.c b/source/reference/roipool.c index 5bfb889c..0047475b 100644 --- a/source/reference/roipool.c +++ b/source/reference/roipool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,18 +16,16 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ -#include "csi_ref.h" -#include "csi_utils.h" #include +#include "csi_ref.h" +#include "csi_utils.h" // https://github.com/pytorch/pytorch/blob/master/caffe2/operators/roi_pool_op.cc // defalut input layout: NCHW -int csi_ref_roipool_f32(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, +int csi_ref_roipool_f32(struct csi_tensor *data, struct csi_tensor *rois, struct csi_tensor *output, struct roi_pool_params *params) { float *output_data = (float *)output->data; @@ -37,23 +35,23 @@ int csi_ref_roipool_f32(struct csi_tensor *data, int batch = data->dim[0]; int channel = data->dim[1]; int height = data->dim[2]; - int width = data->dim[3]; + int width = data->dim[3]; int num_rois = rois->dim[0]; int pooled_height = params->pooled_size_h; - int pooled_width = params->pooled_size_w; + int pooled_width = params->pooled_size_w; - for(int n = 0; n < num_rois; n++) { + for (int n = 0; n < num_rois; n++) { int roi_add = n * 5; int roi_batch_idx = bottom_rois[roi_add]; assert(roi_batch_idx < batch); float roi_start_w = (float)round(bottom_rois[roi_add + 1] * params->spatial_scale); float roi_start_h = (float)round(bottom_rois[roi_add + 2] * params->spatial_scale); - float roi_end_w = (float)round(bottom_rois[roi_add + 3] * params->spatial_scale); - float roi_end_h = (float)round(bottom_rois[roi_add + 4] * params->spatial_scale); + float roi_end_w = (float)round(bottom_rois[roi_add + 3] * params->spatial_scale); + float roi_end_h = (float)round(bottom_rois[roi_add + 4] * params->spatial_scale); float roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1); - float roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1); + float roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1); float bin_size_h = (float)(roi_height) / (float)(pooled_height); float bin_size_w = (float)(roi_width) / (float)(pooled_width); @@ -65,14 +63,14 @@ int csi_ref_roipool_f32(struct csi_tensor *data, // Compute pooling region for this output unit: // start (included) = floor(ph * roi_height / pooled_height_) // end (excluded) = ceil((ph + 1) * roi_height / pooled_height_) - int hstart = (int)(floor((float)(ph) * bin_size_h + roi_start_h)); - int wstart = (int)(floor((float)(pw) * bin_size_w + roi_start_w)); - int hend = (int)(ceil((float)(ph + 1) * bin_size_h + roi_start_h)); - int wend = (int)(ceil((float)(pw + 1) * bin_size_w + roi_start_w)); + int hstart = (int)(floor((float)(ph)*bin_size_h + roi_start_h)); + int wstart = (int)(floor((float)(pw)*bin_size_w + roi_start_w)); + int hend = (int)(ceil((float)(ph + 1) * bin_size_h + roi_start_h)); + int wend = (int)(ceil((float)(pw + 1) * bin_size_w + roi_start_w)); hstart = fminf(fmaxf(hstart, 0), height); - hend = fminf(fmaxf(hend , 0), height); + hend = fminf(fmaxf(hend, 0), height); wstart = fminf(fmaxf(wstart, 0), width); - wend = fminf(fmaxf(wend , 0), width); + wend = fminf(fmaxf(wend, 0), width); const int pool_index = ph * pooled_width + pw; int is_empty = (hend <= hstart) || (wend <= wstart); @@ -82,7 +80,7 @@ int csi_ref_roipool_f32(struct csi_tensor *data, for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { int index = h * width + w; - if(*(batch_data + index) > *(output_data + pool_index)) { + if (*(batch_data + index) > *(output_data + pool_index)) { *(output_data + pool_index) = *(batch_data + index); } } @@ -97,10 +95,8 @@ int csi_ref_roipool_f32(struct csi_tensor *data, return CSINN_TRUE; } -int csi_ref_roipool_quant(struct csi_tensor *data, - struct csi_tensor *rois, - struct csi_tensor *output, - struct roi_pool_params *params) +int csi_ref_roipool_quant(struct csi_tensor *data, struct csi_tensor *rois, + struct csi_tensor *output, struct roi_pool_params *params) { int ret; struct csi_tensor *finput = csi_ref_tensor_transform_f32(data); diff --git a/source/reference/round.c b/source/reference/round.c index 04d53de9..92c715d0 100644 --- a/source/reference/round.c +++ b/source/reference/round.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -#include -int csi_ref_round_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_round_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = input->data; @@ -39,8 +37,7 @@ int csi_ref_round_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_round_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_round_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_round_f32); diff --git a/source/reference/rsqrt.c b/source/reference/rsqrt.c index 79417258..b9e05475 100644 --- a/source/reference/rsqrt.c +++ b/source/reference/rsqrt.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -#include -int csi_ref_rsqrt_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_rsqrt_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = input->data; @@ -31,13 +29,12 @@ int csi_ref_rsqrt_f32(struct csi_tensor *input, int size = csi_tensor_size(input); for (int i = 0; i < size; i++) { - output_data[i] = 1.0/sqrt(input_data[i]); + output_data[i] = 1.0 / sqrt(input_data[i]); } return CSINN_TRUE; } -int csi_ref_rsqrt_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_rsqrt_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_rsqrt_f32); diff --git a/source/reference/scatter.c b/source/reference/scatter.c index 5d25b025..a207a3db 100644 --- a/source/reference/scatter.c +++ b/source/reference/scatter.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" @@ -25,45 +25,48 @@ int csi_ref_scatter_nd_f32(struct csi_tensor *input, struct csi_tensor *indices, struct csi_tensor *updates, struct csi_tensor *output, struct scatter_nd_params *params) { - if (input->dim_count != 5 && indices->dim[indices->dim_count - 1] != 5) { - return CSINN_FALSE; - } - float* input_data = (float*)input->data; - int32_t* indices_data = (int32_t*)indices->data; - float* updates_data = (float*)updates->data; - float* output_data = (float*)output->data; + if (input->dim_count != 5 && indices->dim[indices->dim_count - 1] != 5) { + return CSINN_FALSE; + } + float *input_data = (float *)input->data; + int32_t *indices_data = (int32_t *)indices->data; + float *updates_data = (float *)updates->data; + float *output_data = (float *)output->data; - int size = 1; - for (int i = 0; i < input->dim_count; i++) { - size = size * input->dim[i]; - } - for (int i = 0; i < size; i++) { - output_data[i] = input_data[i]; - } + int size = 1; + for (int i = 0; i < input->dim_count; i++) { + size = size * input->dim[i]; + } + for (int i = 0; i < size; i++) { + output_data[i] = input_data[i]; + } - for (int i = 0; i < indices->dim[0]; i++) { - for (int j = 0; j < indices->dim[1]; j++) { - for (int k = 0; k < indices->dim[2]; k++) { - for (int l = 0; l < indices->dim[3]; l++) { - for (int m = 0; m < indices->dim[4]; m++) { - int indices_base = - ((((i * indices->dim[1] + j) * indices->dim[2] + k) * indices->dim[3] + l) * - indices->dim[4] + m) * indices->dim[5]; + for (int i = 0; i < indices->dim[0]; i++) { + for (int j = 0; j < indices->dim[1]; j++) { + for (int k = 0; k < indices->dim[2]; k++) { + for (int l = 0; l < indices->dim[3]; l++) { + for (int m = 0; m < indices->dim[4]; m++) { + int indices_base = + ((((i * indices->dim[1] + j) * indices->dim[2] + k) * indices->dim[3] + + l) * + indices->dim[4] + + m) * + indices->dim[5]; - int output_index = - csi_ref_get_index_5(input->dim, indices_data[indices_base], - indices_data[indices_base + 1], indices_data[indices_base + 2], - indices_data[indices_base + 3], indices_data[indices_base + 4]); + int output_index = csi_ref_get_index_5( + input->dim, indices_data[indices_base], indices_data[indices_base + 1], + indices_data[indices_base + 2], indices_data[indices_base + 3], + indices_data[indices_base + 4]); - int updates_index = csi_ref_get_index_5(updates->dim, i, j, k, l, m); - output_data[output_index] = updates_data[updates_index]; - } + int updates_index = csi_ref_get_index_5(updates->dim, i, j, k, l, m); + output_data[output_index] = updates_data[updates_index]; + } + } + } } - } } - } - return CSINN_TRUE; + return CSINN_TRUE; } int csi_ref_scatter_nd_quant(struct csi_tensor *input, struct csi_tensor *indices, diff --git a/source/reference/segment_max.c b/source/reference/segment_max.c index 1dedf947..44598441 100644 --- a/source/reference/segment_max.c +++ b/source/reference/segment_max.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,114 +16,108 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_unsorted_segment_max_f32(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params) +int csi_ref_unsorted_segment_max_f32(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params) { - float *input_data = input->data; - int *segment_data = segment_ids->data; + float *input_data = input->data; + int *segment_data = segment_ids->data; float *output_data = output->data; - int input_dim = input->dim_count; + int input_dim = input->dim_count; int num_segments = params->num_segments; - for(int n = 0; n < num_segments; n++) { + for (int n = 0; n < num_segments; n++) { /* init the outputdata data */ - for(int h = 0; h < input->dim[1]; h++) { - for(int w = 0; w < input->dim[2]; w++) { - for(int c = 0; c < input->dim[3]; c++) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { + for (int c = 0; c < input->dim[3]; c++) { int32_t output_index = csi_ref_get_index(input->dim, n, h, w, c); output_data[output_index] = -FLT_MAX; } } } int flag = 0; - for(int i = 0; i < input->dim[0]; i++) { + for (int i = 0; i < input->dim[0]; i++) { if (segment_data[i] == n) { flag = 1; } if (flag) { - for(int h = 0; h < input->dim[1]; h++) { - for(int w = 0; w < input->dim[2]; w++) { - for(int c = 0; c < input->dim[3]; c++) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { + for (int c = 0; c < input->dim[3]; c++) { int32_t input_index = csi_ref_get_index(input->dim, i, h, w, c); int32_t output_index = csi_ref_get_index(input->dim, n, h, w, c); - output_data[output_index] - = input_data[input_index] > output_data[output_index] - ? input_data[input_index] : output_data[output_index]; + output_data[output_index] = + input_data[input_index] > output_data[output_index] + ? input_data[input_index] + : output_data[output_index]; } } } flag = 0; } } - } return CSINN_TRUE; } -int csi_ref_segment_max_f32(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params) +int csi_ref_segment_max_f32(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params) { - float *input_data = input->data; - int *segment_data = segment_ids->data; + float *input_data = input->data; + int *segment_data = segment_ids->data; float *output_data = output->data; - int input_dim = input->dim_count; + int input_dim = input->dim_count; int num_segments = params->num_segments; int i = 0; - for(int n = 0; n < num_segments; n++) { + for (int n = 0; n < num_segments; n++) { /* init the outputdata data */ - for(int h = 0; h < input->dim[1]; h++) { - for(int w = 0; w < input->dim[2]; w++) { - for(int c = 0; c < input->dim[3]; c++) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { + for (int c = 0; c < input->dim[3]; c++) { int32_t output_index = csi_ref_get_index(input->dim, n, h, w, c); output_data[output_index] = -FLT_MAX; } } } int flag = 0; - for(; i < input->dim[0]; i++) { + for (; i < input->dim[0]; i++) { if (segment_data[i] == n) { flag = 1; - } else { + } else { break; } if (flag) { - for(int h = 0; h < input->dim[1]; h++) { - for(int w = 0; w < input->dim[2]; w++) { - for(int c = 0; c < input->dim[3]; c++) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { + for (int c = 0; c < input->dim[3]; c++) { int32_t input_index = csi_ref_get_index(input->dim, i, h, w, c); int32_t output_index = csi_ref_get_index(input->dim, n, h, w, c); - output_data[output_index] - = input_data[input_index] > output_data[output_index] - ? input_data[input_index] : output_data[output_index]; + output_data[output_index] = + input_data[input_index] > output_data[output_index] + ? input_data[input_index] + : output_data[output_index]; } } } flag = 0; } } - } return CSINN_TRUE; } -int csi_ref_unsorted_segment_max_quant(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params) +int csi_ref_unsorted_segment_max_quant(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params) { int ret; struct csi_tensor *finput = csi_ref_tensor_transform_f32(input); @@ -135,10 +129,8 @@ int csi_ref_unsorted_segment_max_quant(struct csi_tensor *input, return ret; } -int csi_ref_segment_max_quant(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params) +int csi_ref_segment_max_quant(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params) { int ret; struct csi_tensor *finput = csi_ref_tensor_transform_f32(input); diff --git a/source/reference/segment_mean.c b/source/reference/segment_mean.c index 7e63edb4..def9a277 100644 --- a/source/reference/segment_mean.c +++ b/source/reference/segment_mean.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,29 +16,27 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_unsorted_segment_mean_f32(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params) +int csi_ref_unsorted_segment_mean_f32(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params) { - float *input_data = input->data; - int *segment_data = segment_ids->data; + float *input_data = input->data; + int *segment_data = segment_ids->data; float *output_data = output->data; - int input_dim = input->dim_count; + int input_dim = input->dim_count; int num_segments = params->num_segments; int index[input->dim[0]]; - for(int n = 0; n < num_segments; n++) { + for (int n = 0; n < num_segments; n++) { /* init the outputdata data */ - for(int h = 0; h < input->dim[1]; h++) { - for(int w = 0; w < input->dim[2]; w++) { - for(int c = 0; c < input->dim[3]; c++) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { + for (int c = 0; c < input->dim[3]; c++) { int32_t output_index = csi_ref_get_index(input->dim, n, h, w, c); output_data[output_index] = 0; } @@ -46,21 +44,21 @@ int csi_ref_unsorted_segment_mean_f32(struct csi_tensor *input, } int num = 0; - for(int i = 0; i < input->dim[0]; i++) { + for (int i = 0; i < input->dim[0]; i++) { if (segment_data[i] == n) { index[num] = i; num++; } } int mean_n = num; - if(num > 0) { - for(int h = 0; h < input->dim[1]; h++) { - for(int w = 0; w < input->dim[2]; w++) { - for(int c = 0; c < input->dim[3]; c++) { + if (num > 0) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { + for (int c = 0; c < input->dim[3]; c++) { int32_t output_index = csi_ref_get_index(input->dim, n, h, w, c); - for(int k = 0; k < num; k++) { + for (int k = 0; k < num; k++) { int32_t input_index = csi_ref_get_index(input->dim, index[k], h, w, c); - output_data[output_index] += input_data[input_index]; + output_data[output_index] += input_data[input_index]; } output_data[output_index] /= mean_n; } @@ -72,32 +70,30 @@ int csi_ref_unsorted_segment_mean_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_segment_mean_f32(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params) +int csi_ref_segment_mean_f32(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params) { - float *input_data = input->data; - int *segment_data = segment_ids->data; + float *input_data = input->data; + int *segment_data = segment_ids->data; float *output_data = output->data; - int input_dim = input->dim_count; + int input_dim = input->dim_count; int num_segments = params->num_segments; int index[input->dim[0]]; int i = 0; - for(int n = 0; n < num_segments; n++) { + for (int n = 0; n < num_segments; n++) { /* init the outputdata data */ - for(int h = 0; h < input->dim[1]; h++) { - for(int w = 0; w < input->dim[2]; w++) { - for(int c = 0; c < input->dim[3]; c++) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { + for (int c = 0; c < input->dim[3]; c++) { int32_t output_index = csi_ref_get_index(input->dim, n, h, w, c); output_data[output_index] = 0; } } } int num = 0; - for(; i < input->dim[0]; i++) { + for (; i < input->dim[0]; i++) { if (segment_data[i] == n) { index[num] = i; num++; @@ -106,14 +102,14 @@ int csi_ref_segment_mean_f32(struct csi_tensor *input, } } int mean_n = num; - if(num > 0) { - for(int h = 0; h < input->dim[1]; h++) { - for(int w = 0; w < input->dim[2]; w++) { - for(int c = 0; c < input->dim[3]; c++) { + if (num > 0) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { + for (int c = 0; c < input->dim[3]; c++) { int32_t output_index = csi_ref_get_index(input->dim, n, h, w, c); - for(int k = 0; k < num; k++) { + for (int k = 0; k < num; k++) { int32_t input_index = csi_ref_get_index(input->dim, index[k], h, w, c); - output_data[output_index] += input_data[input_index]; + output_data[output_index] += input_data[input_index]; } output_data[output_index] /= mean_n; } @@ -125,10 +121,8 @@ int csi_ref_segment_mean_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_unsorted_segment_mean_quant(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params) +int csi_ref_unsorted_segment_mean_quant(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params) { int ret; struct csi_tensor *finput = csi_ref_tensor_transform_f32(input); @@ -140,10 +134,8 @@ int csi_ref_unsorted_segment_mean_quant(struct csi_tensor *input, return ret; } -int csi_ref_segment_mean_quant(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params) +int csi_ref_segment_mean_quant(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params) { int ret; struct csi_tensor *finput = csi_ref_tensor_transform_f32(input); diff --git a/source/reference/segment_min.c b/source/reference/segment_min.c index 0fdfcfe0..8bdf984e 100644 --- a/source/reference/segment_min.c +++ b/source/reference/segment_min.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,114 +16,108 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_unsorted_segment_min_f32(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params) +int csi_ref_unsorted_segment_min_f32(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params) { - float *input_data = input->data; - int *segment_data = segment_ids->data; + float *input_data = input->data; + int *segment_data = segment_ids->data; float *output_data = output->data; - int input_dim = input->dim_count; + int input_dim = input->dim_count; int num_segments = params->num_segments; - for(int n = 0; n < num_segments; n++) { + for (int n = 0; n < num_segments; n++) { /* init the outputdata data */ - for(int h = 0; h < input->dim[1]; h++) { - for(int w = 0; w < input->dim[2]; w++) { - for(int c = 0; c < input->dim[3]; c++) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { + for (int c = 0; c < input->dim[3]; c++) { int32_t output_index = csi_ref_get_index(input->dim, n, h, w, c); output_data[output_index] = FLT_MAX; } } } int flag = 0; - for(int i = 0; i < input->dim[0]; i++) { + for (int i = 0; i < input->dim[0]; i++) { if (segment_data[i] == n) { flag = 1; } if (flag) { - for(int h = 0; h < input->dim[1]; h++) { - for(int w = 0; w < input->dim[2]; w++) { - for(int c = 0; c < input->dim[3]; c++) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { + for (int c = 0; c < input->dim[3]; c++) { int32_t input_index = csi_ref_get_index(input->dim, i, h, w, c); int32_t output_index = csi_ref_get_index(input->dim, n, h, w, c); - output_data[output_index] - = input_data[input_index] < output_data[output_index] - ? input_data[input_index] : output_data[output_index]; + output_data[output_index] = + input_data[input_index] < output_data[output_index] + ? input_data[input_index] + : output_data[output_index]; } } } flag = 0; } } - } return CSINN_TRUE; } -int csi_ref_segment_min_f32(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params) +int csi_ref_segment_min_f32(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params) { - float *input_data = input->data; - int *segment_data = segment_ids->data; + float *input_data = input->data; + int *segment_data = segment_ids->data; float *output_data = output->data; - int input_dim = input->dim_count; + int input_dim = input->dim_count; int num_segments = params->num_segments; int i = 0; - for(int n = 0; n < num_segments; n++) { + for (int n = 0; n < num_segments; n++) { /* init the outputdata data */ - for(int h = 0; h < input->dim[1]; h++) { - for(int w = 0; w < input->dim[2]; w++) { - for(int c = 0; c < input->dim[3]; c++) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { + for (int c = 0; c < input->dim[3]; c++) { int32_t output_index = csi_ref_get_index(input->dim, n, h, w, c); output_data[output_index] = FLT_MAX; } } } int flag = 0; - for(; i < input->dim[0]; i++) { + for (; i < input->dim[0]; i++) { if (segment_data[i] == n) { flag = 1; - } else { + } else { break; } if (flag) { - for(int h = 0; h < input->dim[1]; h++) { - for(int w = 0; w < input->dim[2]; w++) { - for(int c = 0; c < input->dim[3]; c++) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { + for (int c = 0; c < input->dim[3]; c++) { int32_t input_index = csi_ref_get_index(input->dim, i, h, w, c); int32_t output_index = csi_ref_get_index(input->dim, n, h, w, c); - output_data[output_index] - = input_data[input_index] < output_data[output_index] - ? input_data[input_index] : output_data[output_index]; + output_data[output_index] = + input_data[input_index] < output_data[output_index] + ? input_data[input_index] + : output_data[output_index]; } } } flag = 0; } } - } return CSINN_TRUE; } -int csi_ref_unsorted_segment_min_quant(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params) +int csi_ref_unsorted_segment_min_quant(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params) { int ret; struct csi_tensor *finput = csi_ref_tensor_transform_f32(input); @@ -135,10 +129,8 @@ int csi_ref_unsorted_segment_min_quant(struct csi_tensor *input, return ret; } -int csi_ref_segment_min_quant(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params) +int csi_ref_segment_min_quant(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params) { int ret; struct csi_tensor *finput = csi_ref_tensor_transform_f32(input); diff --git a/source/reference/segment_prod.c b/source/reference/segment_prod.c index 3af6c6d0..849cba97 100644 --- a/source/reference/segment_prod.c +++ b/source/reference/segment_prod.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,42 +16,40 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_unsorted_segment_prod_f32(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params) +int csi_ref_unsorted_segment_prod_f32(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params) { - float *input_data = input->data; - int *segment_data = segment_ids->data; + float *input_data = input->data; + int *segment_data = segment_ids->data; float *output_data = output->data; - int input_dim = input->dim_count; + int input_dim = input->dim_count; int num_segments = params->num_segments; - for(int n = 0; n < num_segments; n++) { + for (int n = 0; n < num_segments; n++) { /* init the outputdata data */ - for(int h = 0; h < input->dim[1]; h++) { - for(int w = 0; w < input->dim[2]; w++) { - for(int c = 0; c < input->dim[3]; c++) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { + for (int c = 0; c < input->dim[3]; c++) { int32_t output_index = csi_ref_get_index(input->dim, n, h, w, c); output_data[output_index] = 1; } } } int flag = 0; - for(int i = 0; i < input->dim[0]; i++) { + for (int i = 0; i < input->dim[0]; i++) { if (segment_data[i] == n) { flag = 1; } if (flag) { - for(int h = 0; h < input->dim[1]; h++) { - for(int w = 0; w < input->dim[2]; w++) { - for(int c = 0; c < input->dim[3]; c++) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { + for (int c = 0; c < input->dim[3]; c++) { int32_t input_index = csi_ref_get_index(input->dim, i, h, w, c); int32_t output_index = csi_ref_get_index(input->dim, n, h, w, c); output_data[output_index] *= input_data[input_index]; @@ -61,46 +59,43 @@ int csi_ref_unsorted_segment_prod_f32(struct csi_tensor *input, flag = 0; } } - } return CSINN_TRUE; } -int csi_ref_segment_prod_f32(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params) +int csi_ref_segment_prod_f32(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params) { - float *input_data = input->data; - int *segment_data = segment_ids->data; + float *input_data = input->data; + int *segment_data = segment_ids->data; float *output_data = output->data; - int input_dim = input->dim_count; + int input_dim = input->dim_count; int num_segments = params->num_segments; int i = 0; - for(int n = 0; n < num_segments; n++) { + for (int n = 0; n < num_segments; n++) { /* init the outputdata data */ - for(int h = 0; h < input->dim[1]; h++) { - for(int w = 0; w < input->dim[2]; w++) { - for(int c = 0; c < input->dim[3]; c++) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { + for (int c = 0; c < input->dim[3]; c++) { int32_t output_index = csi_ref_get_index(input->dim, n, h, w, c); output_data[output_index] = 1; } } } int flag = 0; - for(; i < input->dim[0]; i++) { + for (; i < input->dim[0]; i++) { if (segment_data[i] == n) { flag = 1; - } else { + } else { break; } if (flag) { - for(int h = 0; h < input->dim[1]; h++) { - for(int w = 0; w < input->dim[2]; w++) { - for(int c = 0; c < input->dim[3]; c++) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { + for (int c = 0; c < input->dim[3]; c++) { int32_t input_index = csi_ref_get_index(input->dim, i, h, w, c); int32_t output_index = csi_ref_get_index(input->dim, n, h, w, c); output_data[output_index] *= input_data[input_index]; @@ -110,16 +105,13 @@ int csi_ref_segment_prod_f32(struct csi_tensor *input, flag = 0; } } - } return CSINN_TRUE; } -int csi_ref_unsorted_segment_prod_quant(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params) +int csi_ref_unsorted_segment_prod_quant(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params) { int ret; struct csi_tensor *finput = csi_ref_tensor_transform_f32(input); @@ -131,10 +123,8 @@ int csi_ref_unsorted_segment_prod_quant(struct csi_tensor *input, return ret; } -int csi_ref_segment_prod_quant(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params) +int csi_ref_segment_prod_quant(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params) { int ret; struct csi_tensor *finput = csi_ref_tensor_transform_f32(input); diff --git a/source/reference/segment_sum.c b/source/reference/segment_sum.c index 5fab1331..fe88dc7a 100644 --- a/source/reference/segment_sum.c +++ b/source/reference/segment_sum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,29 +16,27 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_unsorted_segment_sum_f32(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params) +int csi_ref_unsorted_segment_sum_f32(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params) { - float *input_data = input->data; - int *segment_data = segment_ids->data; + float *input_data = input->data; + int *segment_data = segment_ids->data; float *output_data = output->data; - int input_dim = input->dim_count; + int input_dim = input->dim_count; int num_segments = params->num_segments; int index[input->dim[0]]; - for(int n = 0; n < num_segments; n++) { + for (int n = 0; n < num_segments; n++) { /* init the outputdata data */ - for(int h = 0; h < input->dim[1]; h++) { - for(int w = 0; w < input->dim[2]; w++) { - for(int c = 0; c < input->dim[3]; c++) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { + for (int c = 0; c < input->dim[3]; c++) { int32_t output_index = csi_ref_get_index(input->dim, n, h, w, c); output_data[output_index] = 0; } @@ -46,19 +44,19 @@ int csi_ref_unsorted_segment_sum_f32(struct csi_tensor *input, } int num = 0; - for(int i = 0; i < input->dim[0]; i++) { + for (int i = 0; i < input->dim[0]; i++) { if (segment_data[i] == n) { index[num] = i; num++; } } while (num--) { - for(int h = 0; h < input->dim[1]; h++) { - for(int w = 0; w < input->dim[2]; w++) { - for(int c = 0; c < input->dim[3]; c++) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { + for (int c = 0; c < input->dim[3]; c++) { int32_t input_index = csi_ref_get_index(input->dim, index[num], h, w, c); int32_t output_index = csi_ref_get_index(input->dim, n, h, w, c); - output_data[output_index] += input_data[input_index]; + output_data[output_index] += input_data[input_index]; } } } @@ -68,32 +66,30 @@ int csi_ref_unsorted_segment_sum_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_segment_sum_f32(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params) +int csi_ref_segment_sum_f32(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params) { - float *input_data = input->data; - int *segment_data = segment_ids->data; + float *input_data = input->data; + int *segment_data = segment_ids->data; float *output_data = output->data; - int input_dim = input->dim_count; + int input_dim = input->dim_count; int num_segments = params->num_segments; int index[input->dim[0]]; int i = 0; - for(int n = 0; n < num_segments; n++) { + for (int n = 0; n < num_segments; n++) { /* init the outputdata data */ - for(int h = 0; h < input->dim[1]; h++) { - for(int w = 0; w < input->dim[2]; w++) { - for(int c = 0; c < input->dim[3]; c++) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { + for (int c = 0; c < input->dim[3]; c++) { int32_t output_index = csi_ref_get_index(input->dim, n, h, w, c); output_data[output_index] = 0; } } } int num = 0; - for(; i < input->dim[0]; i++) { + for (; i < input->dim[0]; i++) { if (segment_data[i] == n) { index[num] = i; num++; @@ -102,12 +98,12 @@ int csi_ref_segment_sum_f32(struct csi_tensor *input, } } while (num--) { - for(int h = 0; h < input->dim[1]; h++) { - for(int w = 0; w < input->dim[2]; w++) { - for(int c = 0; c < input->dim[3]; c++) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { + for (int c = 0; c < input->dim[3]; c++) { int32_t input_index = csi_ref_get_index(input->dim, index[num], h, w, c); int32_t output_index = csi_ref_get_index(input->dim, n, h, w, c); - output_data[output_index] += input_data[input_index]; + output_data[output_index] += input_data[input_index]; } } } @@ -117,10 +113,8 @@ int csi_ref_segment_sum_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_unsorted_segment_sum_quant(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params) +int csi_ref_unsorted_segment_sum_quant(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params) { int ret; struct csi_tensor *finput = csi_ref_tensor_transform_f32(input); @@ -132,10 +126,8 @@ int csi_ref_unsorted_segment_sum_quant(struct csi_tensor *input, return ret; } -int csi_ref_segment_sum_quant(struct csi_tensor *input, - struct csi_tensor *segment_ids, - struct csi_tensor *output, - struct segment_params *params) +int csi_ref_segment_sum_quant(struct csi_tensor *input, struct csi_tensor *segment_ids, + struct csi_tensor *output, struct segment_params *params) { int ret; struct csi_tensor *finput = csi_ref_tensor_transform_f32(input); diff --git a/source/reference/select.c b/source/reference/select.c index 004f6209..cce83c71 100644 --- a/source/reference/select.c +++ b/source/reference/select.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,61 +16,55 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_select_f32(struct csi_tensor *condition, - struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_select_f32(struct csi_tensor *condition, struct csi_tensor *input0, + struct csi_tensor *input1, struct csi_tensor *output, struct select_params *params) { - float *input_data0 = input0->data; - float *input_data1 = input1->data; + float *input_data0 = input0->data; + float *input_data1 = input1->data; float *conlist_data = condition->data; - float *output_data = output->data; + float *output_data = output->data; int size = csi_tensor_size(input0); for (int i = 0; i < size; i++) { - output_data[i] = conlist_data[i] ? input_data0[i]:input_data1[i]; + output_data[i] = conlist_data[i] ? input_data0[i] : input_data1[i]; } return CSINN_TRUE; } -int csi_ref_select_u8(struct csi_tensor *condition, - struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_select_u8(struct csi_tensor *condition, struct csi_tensor *input0, + struct csi_tensor *input1, struct csi_tensor *output, struct select_params *params) { - uint8_t *input_data0 = input0->data; - uint8_t *input_data1 = input1->data; + uint8_t *input_data0 = input0->data; + uint8_t *input_data1 = input1->data; uint8_t *conlist_data = condition->data; - uint8_t *output_data = output->data; + uint8_t *output_data = output->data; int size = csi_tensor_size(input0); for (int i = 0; i < size; i++) { - output_data[i] = conlist_data[i] ? input_data0[i]:input_data1[i]; + output_data[i] = conlist_data[i] ? input_data0[i] : input_data1[i]; } return CSINN_TRUE; } -int csi_ref_select_i8(struct csi_tensor *condition, - struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_select_i8(struct csi_tensor *condition, struct csi_tensor *input0, + struct csi_tensor *input1, struct csi_tensor *output, struct select_params *params) { - int8_t *input_data0 = input0->data; - int8_t *input_data1 = input1->data; + int8_t *input_data0 = input0->data; + int8_t *input_data1 = input1->data; int8_t *conlist_data = condition->data; - int8_t *output_data = output->data; + int8_t *output_data = output->data; int size = csi_tensor_size(input0); for (int i = 0; i < size; i++) { - output_data[i] = conlist_data[i] ? input_data0[i]:input_data1[i]; + output_data[i] = conlist_data[i] ? input_data0[i] : input_data1[i]; } return CSINN_TRUE; } diff --git a/source/reference/setup.c b/source/reference/setup.c index e186820f..9ad91e4b 100644 --- a/source/reference/setup.c +++ b/source/reference/setup.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" @@ -26,15 +26,18 @@ void *csi_init_map_ref(int op, int dtype) return csi_ref_flatten_init; } else if (op == CSINN_OP_RESHAPE) { return csi_ref_reshape_init; - } else if(op == CSINN_OP_TRANSPOSE) { + } else if (op == CSINN_OP_TRANSPOSE) { return csi_ref_transpose_init; + } else if (op == CSINN_OP_CACHE_MATMUL) { + return csi_ref_cache_matmul_init; + } else if (op == CSINN_OP_CACHE_CONV1D) { + return csi_ref_cache_conv1d_init; } return NULL; } -void csi_ref_nn_init(struct csi_tensor *input, - struct csi_tensor *output) +void csi_ref_nn_init(struct csi_tensor *input, struct csi_tensor *output) { int size = 1; for (int i = 0; i < input->dim_count; i++) { @@ -42,13 +45,36 @@ void csi_ref_nn_init(struct csi_tensor *input, } int q_size = output->quant_channel; int inner_size = size / q_size; - if (output->dtype == CSINN_DTYPE_UINT8){ + if (output->dtype == CSINN_DTYPE_INT4) { + float *input_data = input->data; + int8_t *output_data = output->data; + for (int i = 0; i < q_size; i++) { + for (int j = 0; j < inner_size; j++) { + int index = i * inner_size + j; + int32_t input_val = + round(input_data[index] / output->qinfo[i].scale) + output->qinfo[i].zero_point; + if (input_val < -8) { + input_val = -8; + } else if (input_val > 7) { + input_val = 7; + } + int out_index = index / 2; + /* int4 little endian */ + if (index % 2) { + output_data[out_index] = (output_data[out_index] & 0xf) | (input_val << 4); + } else { + output_data[out_index] = (output_data[out_index] & 0xf0) | (input_val & 0xf); + } + } + } + } else if (output->dtype == CSINN_DTYPE_UINT8) { float *input_data = input->data; uint8_t *output_data = output->data; - for (int i = 0; i < q_size; i++){ + for (int i = 0; i < q_size; i++) { for (int j = 0; j < inner_size; j++) { int index = i * inner_size + j; - int32_t input_val = round(input_data[index] / output->qinfo[i].scale) + output->qinfo[i].zero_point; + int32_t input_val = + round(input_data[index] / output->qinfo[i].scale) + output->qinfo[i].zero_point; if (input_val < 0) { input_val = 0; } else if (input_val > 255) { @@ -57,21 +83,38 @@ void csi_ref_nn_init(struct csi_tensor *input, output_data[index] = input_val; } } - } else if (output->dtype == CSINN_DTYPE_INT8){ + } else if (output->dtype == CSINN_DTYPE_INT8) { float *input_data = input->data; int8_t *output_data = output->data; for (int i = 0; i < q_size; i++) { for (int j = 0; j < inner_size; j++) { int index = i * inner_size + j; - int32_t input_val = round(input_data[index] / output->qinfo[i].scale) + output->qinfo[i].zero_point; + int32_t input_val = + round(input_data[index] / output->qinfo[i].scale) + output->qinfo[i].zero_point; if (input_val < -127) { - input_val = 0; + input_val = -127; } else if (input_val > 127) { input_val = 127; } output_data[index] = input_val; } } + } else if (output->dtype == CSINN_DTYPE_INT16) { + float *input_data = input->data; + int16_t *output_data = output->data; + for (int i = 0; i < q_size; i++) { + for (int j = 0; j < inner_size; j++) { + int index = i * inner_size + j; + int32_t input_val = + round(input_data[index] / output->qinfo[i].scale) + output->qinfo[i].zero_point; + if (input_val < -32768) { + input_val = -32768; + } else if (input_val > 32767) { + input_val = 32767; + } + output_data[index] = input_val; + } + } } else if (output->dtype == CSINN_DTYPE_FLOAT16) { float *input_data = input->data; int16_t *output_data = output->data; @@ -81,13 +124,21 @@ void csi_ref_nn_init(struct csi_tensor *input, output_data[index] = csi_ref_float32_to_float16(input_data[index]); } } + } else if (output->dtype == CSINN_DTYPE_BFLOAT16) { + float *input_data = input->data; + int16_t *output_data = output->data; + for (int i = 0; i < q_size; i++) { + for (int j = 0; j < inner_size; j++) { + int index = i * inner_size + j; + output_data[index] = csi_ref_float32_to_bfloat16(input_data[index]); + } + } } else { csi_debug_error("csi_ref_nn_init: unsupport dtype\n"); } } -void csi_ref_nn_deinit(struct csi_tensor *input, - struct csi_tensor *output) +void csi_ref_nn_deinit(struct csi_tensor *input, struct csi_tensor *output) { int size = 1; for (int i = 0; i < input->dim_count; i++) { @@ -95,7 +146,28 @@ void csi_ref_nn_deinit(struct csi_tensor *input, } int q_size = input->quant_channel; int inner_size = size / q_size; - if (input->dtype == CSINN_DTYPE_UINT8){ + if (input->dtype == CSINN_DTYPE_INT4) { + int8_t *input_data = input->data; + float *output_data = output->data; + for (int i = 0; i < q_size; i++) { + for (int j = 0; j < inner_size; j++) { + int index = i * inner_size + j; + int in_index = index / 2; + float x; + int8_t tmp_in = 0; + /* int4 little endian */ + if (index % 2) { + tmp_in = input_data[in_index] & 0xf0; + x = tmp_in >> 4; + } else { + tmp_in = (input_data[in_index] & 0xf) << 4; + x = tmp_in >> 4; + } + x -= input->qinfo[i].zero_point; + output_data[index] = x * input->qinfo[i].scale; + } + } + } else if (input->dtype == CSINN_DTYPE_UINT8) { uint8_t *input_data = input->data; float *output_data = output->data; for (int i = 0; i < q_size; i++) { @@ -106,7 +178,7 @@ void csi_ref_nn_deinit(struct csi_tensor *input, output_data[index] = x * input->qinfo[i].scale; } } - } else if (input->dtype == CSINN_DTYPE_INT8){ + } else if (input->dtype == CSINN_DTYPE_INT8) { int8_t *input_data = input->data; float *output_data = output->data; for (int i = 0; i < q_size; i++) { @@ -117,9 +189,9 @@ void csi_ref_nn_deinit(struct csi_tensor *input, output_data[index] = x * input->qinfo[i].scale; } } - } else if (input->dtype == CSINN_DTYPE_INT32){ + } else if (input->dtype == CSINN_DTYPE_INT32) { int size = csi_tensor_size(input); - memcpy(output->data, input->data, size*4); + memcpy(output->data, input->data, size * 4); } else if (input->dtype == CSINN_DTYPE_FLOAT16) { int16_t *input_data = input->data; float *output_data = output->data; @@ -129,6 +201,15 @@ void csi_ref_nn_deinit(struct csi_tensor *input, output_data[index] = csi_ref_float16_to_float32(input_data[index]); } } + } else if (input->dtype == CSINN_DTYPE_BFLOAT16) { + int16_t *input_data = input->data; + float *output_data = output->data; + for (int i = 0; i < q_size; i++) { + for (int j = 0; j < inner_size; j++) { + int index = i * inner_size + j; + output_data[index] = csi_ref_bfloat16_to_float32(input_data[index]); + } + } } else if (input->dtype == CSINN_DTYPE_BOOL) { int size = csi_tensor_size(input); memcpy(output->data, input->data, size); @@ -139,8 +220,8 @@ void csi_ref_nn_deinit(struct csi_tensor *input, static void *setup_bc_map() { - static void* bc_map[CSINN_OP_AND_UTILS_SIZE][CSINN_DTYPE_SIZE]; - for (int i = CSINN_DTYPE_UINT8; i <= CSINN_DTYPE_FLOAT16; i++) { + static void *bc_map[CSINN_OP_AND_UTILS_SIZE][CSINN_DTYPE_SIZE]; + for (int i = CSINN_DTYPE_INT4; i <= CSINN_DTYPE_BFLOAT16; i++) { bc_map[CSINN_OP_ABS][i] = csi_ref_abs_quant; bc_map[CSINN_OP_ACOS][i] = csi_ref_acos_quant; bc_map[CSINN_OP_ACOSH][i] = csi_ref_acosh_quant; @@ -157,12 +238,15 @@ static void *setup_bc_map() bc_map[CSINN_OP_BN][i] = csi_ref_batch_normalization_quant; bc_map[CSINN_OP_BATCH_TO_SPACE][i] = csi_ref_batch_to_space_quant; bc_map[CSINN_OP_BROADCOST][i] = csi_ref_broadcast_to_quant; + bc_map[CSINN_OP_CACHE_MATMUL][i] = csi_ref_cache_matmul_quant; + bc_map[CSINN_OP_CACHE_CONV1D][i] = csi_ref_cache_conv1d_quant; bc_map[CSINN_OP_CEIL][i] = csi_ref_ceil_quant; bc_map[CSINN_OP_CLIP][i] = csi_ref_clip_quant; bc_map[CSINN_OP_CONCAT][i] = csi_ref_concat_quant; bc_map[CSINN_OP_COS][i] = csi_ref_cos_quant; bc_map[CSINN_OP_COSH][i] = csi_ref_cosh_quant; bc_map[CSINN_OP_CUMPROD][i] = csi_ref_cumprod_quant; + bc_map[CSINN_OP_DATA_CONVERT][i] = csi_ref_data_convert_quant; bc_map[CSINN_OP_CUMSUM][i] = csi_ref_cumsum_quant; bc_map[CSINN_OP_DEPTH_TO_SPACE][i] = csi_ref_depth_to_space_quant; bc_map[CSINN_OP_DIV][i] = csi_ref_div_quant; @@ -279,8 +363,10 @@ static void *setup_bc_map() bc_map[CSINN_OP_DEPTHWISE_CONV2D_RELU][i] = csi_ref_depthwise_conv2d_relu_quant; bc_map[CSINN_OP_DEPTHWISE_CONV2D_RELU6][i] = csi_ref_depthwise_conv2d_relu6_quant; bc_map[CSINN_OP_DEPTHWISE_CONV2D_CHANNEL][i] = csi_ref_depthwise_conv2d_channel_quant; - bc_map[CSINN_OP_DEPTHWISE_CONV2D_CHANNEL_RELU][i] = csi_ref_depthwise_conv2d_channel_relu_quant; - bc_map[CSINN_OP_DEPTHWISE_CONV2D_CHANNEL_RELU6][i] = csi_ref_depthwise_conv2d_channel_relu6_quant; + bc_map[CSINN_OP_DEPTHWISE_CONV2D_CHANNEL_RELU][i] = + csi_ref_depthwise_conv2d_channel_relu_quant; + bc_map[CSINN_OP_DEPTHWISE_CONV2D_CHANNEL_RELU6][i] = + csi_ref_depthwise_conv2d_channel_relu6_quant; bc_map[CSINN_OP_GROUP_CONV2D][i] = csi_ref_group_conv2d_quant; bc_map[CSINN_OP_GROUP_CONV2D_RELU][i] = csi_ref_group_conv2d_relu_quant; bc_map[CSINN_OP_GROUP_CONV2D_RELU6][i] = csi_ref_group_conv2d_relu6_quant; @@ -338,6 +424,8 @@ static void *setup_bc_map() bc_map[CSINN_OP_BN][CSINN_DTYPE_FLOAT32] = csi_ref_batch_normalization_f32; bc_map[CSINN_OP_BATCH_TO_SPACE][CSINN_DTYPE_FLOAT32] = csi_ref_batch_to_space_f32; bc_map[CSINN_OP_BROADCOST][CSINN_DTYPE_FLOAT32] = csi_ref_broadcast_to_f32; + bc_map[CSINN_OP_CACHE_MATMUL][CSINN_DTYPE_FLOAT32] = csi_ref_cache_matmul_f32; + bc_map[CSINN_OP_CACHE_CONV1D][CSINN_DTYPE_FLOAT32] = csi_ref_cache_conv1d_f32; bc_map[CSINN_OP_CEIL][CSINN_DTYPE_FLOAT32] = csi_ref_ceil_f32; bc_map[CSINN_OP_CLIP][CSINN_DTYPE_FLOAT32] = csi_ref_clip_f32; bc_map[CSINN_OP_CONCAT][CSINN_DTYPE_FLOAT32] = csi_ref_concat_f32; @@ -470,10 +558,7 @@ static void *setup_bc_map() return bc_map; } -static int get_bc_map_index(int op, int dtype) -{ - return op * CSINN_DTYPE_SIZE + dtype; -} +static int get_bc_map_index(int op, int dtype) { return op * CSINN_DTYPE_SIZE + dtype; } void *csi_bc_map_ref(int op, int dtype) { diff --git a/source/reference/shape.c b/source/reference/shape.c index 8dc8009b..9c16cd56 100644 --- a/source/reference/shape.c +++ b/source/reference/shape.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_shape_i32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_shape_i32(struct csi_tensor *input, struct csi_tensor *output, struct shape_params *params) { int32_t *data = output->data; @@ -32,22 +31,20 @@ int csi_ref_shape_i32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_shape_u8(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_shape_u8(struct csi_tensor *input, struct csi_tensor *output, struct shape_params *params) { - uint8_t * data = output->data; + uint8_t *data = output->data; for (int i = 0; i < input->dim_count; i++) { data[i] = input->dim[i]; } return CSINN_TRUE; } -int csi_ref_shape_i8(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_shape_i8(struct csi_tensor *input, struct csi_tensor *output, struct shape_params *params) { - uint8_t * data = output->data; + uint8_t *data = output->data; for (int i = 0; i < input->dim_count; i++) { data[i] = input->dim[i]; } diff --git a/source/reference/shuffle_channel.c b/source/reference/shuffle_channel.c index e7ea469d..60d38381 100644 --- a/source/reference/shuffle_channel.c +++ b/source/reference/shuffle_channel.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,22 +16,20 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" - -static int csi_ref_shuffle_channel_nhwc_f32(struct csi_tensor *input, - struct csi_tensor *output, +static int csi_ref_shuffle_channel_nhwc_f32(struct csi_tensor *input, struct csi_tensor *output, struct shuffle_channel_params *params) { float *input_data = (float *)input->data; float *output_data = (float *)output->data; - int batch = input->dim[0]; - int height = input->dim[1]; - int width = input->dim[2]; + int batch = input->dim[0]; + int height = input->dim[1]; + int width = input->dim[2]; int channel = input->dim[3]; int group = params->group; int group_channel = channel / group; @@ -39,10 +37,11 @@ static int csi_ref_shuffle_channel_nhwc_f32(struct csi_tensor *input, int input_inner_size = 1; float *input_data_addr = input_data; - for(int i = 0; i < input_outer_size; i++) { - for(int j = 0; j < group_channel; j++) { - for(int k = 0; k < group; k++) { - float *input_data_addr1 = input_data_addr + (k * group_channel + j) * input_inner_size; + for (int i = 0; i < input_outer_size; i++) { + for (int j = 0; j < group_channel; j++) { + for (int k = 0; k < group; k++) { + float *input_data_addr1 = + input_data_addr + (k * group_channel + j) * input_inner_size; memcpy(output_data, input_data_addr1, input_inner_size * sizeof(float)); output_data += input_inner_size; } @@ -52,14 +51,12 @@ static int csi_ref_shuffle_channel_nhwc_f32(struct csi_tensor *input, return CSINN_TRUE; } - -static int csi_ref_shuffle_channel_nchw_f32(struct csi_tensor *o_input, - struct csi_tensor *o_output, +static int csi_ref_shuffle_channel_nchw_f32(struct csi_tensor *o_input, struct csi_tensor *o_output, struct shuffle_channel_params *params) { struct csi_tensor *input; struct csi_tensor *output; - input = csi_ref_nchw_to_nhwc_f32(o_input); + input = csi_ref_nchw_to_nhwc_f32(o_input); output = csi_ref_nchw_to_nhwc_f32(o_output); csi_ref_shuffle_channel_nhwc_f32(input, output, params); csi_ref_nhwc_to_nchw_f32(o_output, output); @@ -67,8 +64,7 @@ static int csi_ref_shuffle_channel_nchw_f32(struct csi_tensor *o_input, return CSINN_TRUE; } -int csi_ref_shuffle_channel_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_shuffle_channel_f32(struct csi_tensor *input, struct csi_tensor *output, struct shuffle_channel_params *params) { if (params->base.layout == CSINN_LAYOUT_NCHW) { @@ -80,8 +76,7 @@ int csi_ref_shuffle_channel_f32(struct csi_tensor *input, } } -int csi_ref_shuffle_channel_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_shuffle_channel_quant(struct csi_tensor *input, struct csi_tensor *output, struct shuffle_channel_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_shuffle_channel_f32); diff --git a/source/reference/sigmoid.c b/source/reference/sigmoid.c index 6b6f2ca5..c2dd538d 100644 --- a/source/reference/sigmoid.c +++ b/source/reference/sigmoid.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_sigmoid_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sigmoid_f32(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params) { float *input_data = input->data; @@ -39,8 +38,7 @@ int csi_ref_sigmoid_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_sigmoid_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sigmoid_quant(struct csi_tensor *input, struct csi_tensor *output, struct sigmoid_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_sigmoid_f32); diff --git a/source/reference/sign.c b/source/reference/sign.c index 72257079..2a009035 100644 --- a/source/reference/sign.c +++ b/source/reference/sign.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,23 +16,22 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -#include -float sign(float v){ - if(v == 0) - return 0; - else if(v > 0) - return 1; +float sign(float v) +{ + if (v == 0) + return 0; + else if (v > 0) + return 1; else - return -1; + return -1; } -int csi_ref_sign_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sign_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = input->data; @@ -48,8 +47,7 @@ int csi_ref_sign_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_sign_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sign_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_sign_f32); diff --git a/source/reference/sin.c b/source/reference/sin.c index 1d22828b..eb52de1f 100644 --- a/source/reference/sin.c +++ b/source/reference/sin.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,15 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -#include -int csi_ref_sin_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params) +int csi_ref_sin_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = input->data; float *output_data = output->data; @@ -36,8 +33,7 @@ int csi_ref_sin_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_sin_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sin_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_sin_f32); diff --git a/source/reference/sinh.c b/source/reference/sinh.c index aa4e41c5..8faf61ee 100644 --- a/source/reference/sinh.c +++ b/source/reference/sinh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -#include -int csi_ref_sinh_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sinh_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = (float *)input->data; @@ -36,8 +34,7 @@ int csi_ref_sinh_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_sinh_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sinh_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_sinh_f32); diff --git a/source/reference/slice.c b/source/reference/slice.c index d6090c53..1fc048db 100644 --- a/source/reference/slice.c +++ b/source/reference/slice.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,39 +16,42 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_slice_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_slice_f32(struct csi_tensor *input, struct csi_tensor *output, struct slice_params *params) { float *input_data = input->data; float *output_data = output->data; if (input->dim_count == 4) { for (int b = params->begin[0]; b < params->end[0]; b++) { - for(int c = params->begin[1]; c < params->end[1]; c++){ - for(int h = params->begin[2]; h < params->end[2]; h++){ - for(int w = params->begin[3]; w < params->end[3]; w++){ + for (int c = params->begin[1]; c < params->end[1]; c++) { + for (int h = params->begin[2]; h < params->end[2]; h++) { + for (int w = params->begin[3]; w < params->end[3]; w++) { int32_t input_index = csi_ref_get_index(input->dim, b, c, h, w); float out_val = input_data[input_index]; - int32_t out_index = csi_ref_get_index(output->dim, b-params->begin[0], c-params->begin[1], h-params->begin[2], w-params->begin[3]); + int32_t out_index = csi_ref_get_index( + output->dim, b - params->begin[0], c - params->begin[1], + h - params->begin[2], w - params->begin[3]); output_data[out_index] = out_val; } } } } - } else if (input->dim_count == 5){ + } else if (input->dim_count == 5) { for (int i = params->begin[0]; i < params->end[0]; i++) { - for(int j = params->begin[1]; j < params->end[1]; j++){ - for(int k = params->begin[2]; k < params->end[2]; k++){ - for(int l = params->begin[3]; l < params->end[3]; l++){ - for(int m = params->begin[4]; m < params->end[4]; m++){ + for (int j = params->begin[1]; j < params->end[1]; j++) { + for (int k = params->begin[2]; k < params->end[2]; k++) { + for (int l = params->begin[3]; l < params->end[3]; l++) { + for (int m = params->begin[4]; m < params->end[4]; m++) { int32_t input_index = csi_ref_get_index_5(input->dim, i, j, k, l, m); float out_val = input_data[input_index]; - int32_t out_index = csi_ref_get_index_5(output->dim, i-params->begin[0], j-params->begin[1], k-params->begin[2], l-params->begin[3], m-params->begin[4]); + int32_t out_index = csi_ref_get_index_5( + output->dim, i - params->begin[0], j - params->begin[1], + k - params->begin[2], l - params->begin[3], m - params->begin[4]); output_data[out_index] = out_val; } } @@ -59,8 +62,7 @@ int csi_ref_slice_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_slice_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_slice_quant(struct csi_tensor *input, struct csi_tensor *output, struct slice_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_slice_f32); diff --git a/source/reference/softmax.c b/source/reference/softmax.c index c8f70e3f..7199fbd8 100644 --- a/source/reference/softmax.c +++ b/source/reference/softmax.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,52 +16,51 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_softmax_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_softmax_f32(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params) { - float *input_data = (float *)input->data; + float *input_data = (float *)input->data; float *output_data = (float *)output->data; int axis = params->axis; // FlatSize() = outer_size * inner_size * cnt; int64_t outer_size = 1; - for(int i = 0; i < axis; i++) { + for (int i = 0; i < axis; i++) { outer_size *= input->dim[i]; } int64_t inner_size = 1; - for(int i = axis + 1; i < input->dim_count; i++) { + for (int i = axis + 1; i < input->dim_count; i++) { inner_size *= input->dim[i]; } int cnt = input->dim[axis]; - for(int i = 0; i < outer_size; i++) { - for(int k = 0; k < inner_size; k++) { - + for (int i = 0; i < outer_size; i++) { + for (int k = 0; k < inner_size; k++) { float acc_exp = 0.0f; float max = -FLT_MAX; // Find max element value which we'll use to ensure numerical stability // taking advantage of the following equality: // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C)) - for(int j = 0; j < cnt; j++) { + for (int j = 0; j < cnt; j++) { max = fmax(max, *(input_data + j * inner_size + k)); } // compute sum - for(int j = 0; j < cnt; j++) { + for (int j = 0; j < cnt; j++) { acc_exp += exp(*(input_data + j * inner_size + k) - max); } // compute final result - for(int j = 0; j < cnt; j++) { - *(output_data + j * inner_size + k) = exp(*(input_data + j * inner_size + k) - max) / acc_exp; + for (int j = 0; j < cnt; j++) { + *(output_data + j * inner_size + k) = + exp(*(input_data + j * inner_size + k) - max) / acc_exp; } } input_data += inner_size * cnt; @@ -70,8 +69,7 @@ int csi_ref_softmax_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_softmax_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_softmax_quant(struct csi_tensor *input, struct csi_tensor *output, struct softmax_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_softmax_f32); diff --git a/source/reference/softplus.c b/source/reference/softplus.c index 2bb5c049..7f57def9 100644 --- a/source/reference/softplus.c +++ b/source/reference/softplus.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,29 +16,27 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_softplus_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_softplus_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = (float *)input->data; float *output_data = (float *)output->data; int size = 1; - for(int i = 0; i < input->dim_count; i++) { + for (int i = 0; i < input->dim_count; i++) { size = size * input->dim[i]; } - for(int i = 0; i < size; i++) { - output_data[i] = log(1+exp(input_data[i])); + for (int i = 0; i < size; i++) { + output_data[i] = log(1 + exp(input_data[i])); } return CSINN_TRUE; } -int csi_ref_softplus_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_softplus_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_softplus_f32); diff --git a/source/reference/softrelu.c b/source/reference/softrelu.c index 2146f55c..eeee842c 100644 --- a/source/reference/softrelu.c +++ b/source/reference/softrelu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,17 +16,14 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -static float softrelu(float x, float y){ - return log(1 + exp(fmax(fmin(x, y), y))); -} +static float softrelu(float x, float y) { return log(1 + exp(fmax(fmin(x, y), y))); } -int csi_ref_softrelu_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_softrelu_f32(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params) { float *input_data = input->data; @@ -42,8 +39,7 @@ int csi_ref_softrelu_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_softrelu_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_softrelu_quant(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_softrelu_f32); diff --git a/source/reference/softsign.c b/source/reference/softsign.c index c9e19e46..84ed6e55 100644 --- a/source/reference/softsign.c +++ b/source/reference/softsign.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,29 +16,27 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_softsign_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_softsign_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = (float *)input->data; float *output_data = (float *)output->data; int size = 1; - for(int i = 0; i < input->dim_count; i++) { + for (int i = 0; i < input->dim_count; i++) { size = size * input->dim[i]; } - for(int i = 0; i < size; i++) { - output_data[i] = input_data[i] / (1+fabs(input_data[i])); + for (int i = 0; i < size; i++) { + output_data[i] = input_data[i] / (1 + fabs(input_data[i])); } return CSINN_TRUE; } -int csi_ref_softsign_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_softsign_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_softsign_f32); diff --git a/source/reference/space_to_batch.c b/source/reference/space_to_batch.c index 71a76222..d54b2a70 100644 --- a/source/reference/space_to_batch.c +++ b/source/reference/space_to_batch.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,15 +16,14 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -//tf.nn.space_to_batch:the input mast a 4-D Tensor with shape [batch, height, width, depth]. +// tf.nn.space_to_batch:the input mast a 4-D Tensor with shape [batch, height, width, depth]. -int csi_ref_space_to_batch_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_space_to_batch_f32(struct csi_tensor *input, struct csi_tensor *output, struct space_to_batch_params *params) { float *input_data = (float *)input->data; @@ -38,32 +37,34 @@ int csi_ref_space_to_batch_f32(struct csi_tensor *input, int block_size = params->block_size; int block_size2 = block_size * block_size; - int out_batch = output->dim[0]; //out_batch = in_batch * block_size * block_size; - int out_channel = output->dim[1]; //out_channel = in_channel; - int out_height = output->dim[2]; //out_height = (in_height) / block_size; - int out_width = output->dim[3]; //out_width = (in_width = params->) / block_size; - - for(int in_b = 0; in_b < batch; ++in_b) { - for(int out_h = 0; out_h < out_height * block_size; out_h = out_h + block_size) { - for(int out_w = 0; out_w < out_width * block_size; out_w = out_w + block_size) { - for(int out_c = 0; out_c < in_channel; ++out_c) { + int out_batch = output->dim[0]; // out_batch = in_batch * block_size * block_size; + int out_channel = output->dim[1]; // out_channel = in_channel; + int out_height = output->dim[2]; // out_height = (in_height) / block_size; + int out_width = output->dim[3]; // out_width = (in_width = params->) / block_size; + for (int in_b = 0; in_b < batch; ++in_b) { + for (int out_h = 0; out_h < out_height * block_size; out_h = out_h + block_size) { + for (int out_w = 0; out_w < out_width * block_size; out_w = out_w + block_size) { + for (int out_c = 0; out_c < in_channel; ++out_c) { float *temp = (float *)csi_mem_alloc(block_size2 * sizeof(float)); int h_origin = out_h - params->pad_top; int w_origin = out_w - params->pad_left; - for(int h = 0; h < block_size; ++h) { - for(int w = 0; w < block_size; ++w) { + for (int h = 0; h < block_size; ++h) { + for (int w = 0; w < block_size; ++w) { int h_now = h_origin + h; int w_now = w_origin + w; - if(h_now >= 0 && h_now < in_height && w_now >= 0 && w_now < in_width) { - int in_addr = csi_ref_get_index(input->dim, in_b, out_c, h_now, w_now); + if (h_now >= 0 && h_now < in_height && w_now >= 0 && w_now < in_width) { + int in_addr = + csi_ref_get_index(input->dim, in_b, out_c, h_now, w_now); temp[h * block_size + w] = input_data[in_addr]; } } } - int out_start_addr = csi_ref_get_index(output->dim, in_b, out_c, out_h / block_size, out_w / block_size); - for(int i = 0; i < block_size2; ++i) { - output_data[out_start_addr + i * batch * out_channel * out_height * out_width] = temp[i]; + int out_start_addr = csi_ref_get_index(output->dim, in_b, out_c, + out_h / block_size, out_w / block_size); + for (int i = 0; i < block_size2; ++i) { + output_data[out_start_addr + + i * batch * out_channel * out_height * out_width] = temp[i]; } csi_mem_free(temp); } @@ -73,8 +74,7 @@ int csi_ref_space_to_batch_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_space_to_batch_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_space_to_batch_quant(struct csi_tensor *input, struct csi_tensor *output, struct space_to_batch_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_space_to_batch_f32); diff --git a/source/reference/space_to_depth.c b/source/reference/space_to_depth.c index 81fb3f66..08889470 100644 --- a/source/reference/space_to_depth.c +++ b/source/reference/space_to_depth.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,13 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -//the input->data is a 4-D Tensor with shape [batch, depth, height, width]. -int csi_ref_space_to_depth_f32(struct csi_tensor *input, - struct csi_tensor *output, +// the input->data is a 4-D Tensor with shape [batch, depth, height, width]. +int csi_ref_space_to_depth_f32(struct csi_tensor *input, struct csi_tensor *output, struct space_to_depth_params *params) { float *input_data = (float *)input->data; @@ -36,27 +35,28 @@ int csi_ref_space_to_depth_f32(struct csi_tensor *input, int block_size = params->block_size; int block_size2 = block_size * block_size; - assert(in_height%block_size==0 && in_width%block_size==0); + assert(in_height % block_size == 0 && in_width % block_size == 0); - int out_channel = output->dim[1]; //out_channel = in_channel * block_size * block_size; - int out_height = output->dim[2]; //out_height = in_height / block_size; - int out_width = output->dim[3]; //out_width = in_width / block_size; - - for(int out_b=0; out_bdim[1]; // out_channel = in_channel * block_size * block_size; + int out_height = output->dim[2]; // out_height = in_height / block_size; + int out_width = output->dim[3]; // out_width = in_width / block_size; + for (int out_b = 0; out_b < batch; ++out_b) { + for (int out_h = 0; out_h < out_height; ++out_h) { + for (int out_w = 0; out_w < out_width; ++out_w) { + for (int in_c = 0; in_c < in_channel; ++in_c) { float *temp = (float *)csi_mem_alloc(block_size2 * sizeof(float)); - int in_start_addr = csi_ref_get_index(input->dim, out_b, in_c, out_h*block_size, out_w*block_size); - for(int h=0; hdim, out_b, in_c, + out_h * block_size, out_w * block_size); + for (int h = 0; h < block_size; h++) { + for (int w = 0; w < block_size; w++) { + temp[h * block_size + w] = input_data[in_start_addr + h * in_width + w]; } } int out_start_addr = csi_ref_get_index(output->dim, out_b, in_c, out_h, out_w); - for(int i=0; isplit_index[i - 1]; } - float* output_i_data = output[i]->data; + float *output_i_data = output[i]->data; for (int out = 0; out < out_size; out++) { int in_index = out * input->dim[params->axis] * inner_size + s_index * inner_size; @@ -59,25 +59,21 @@ int csi_ref_split_f32(struct csi_tensor *input, struct csi_tensor **output, } } - return CSINN_TRUE; } - - -int csi_ref_split_quant(struct csi_tensor *input, - struct csi_tensor **output, +int csi_ref_split_quant(struct csi_tensor *input, struct csi_tensor **output, struct split_params *params) { struct csi_tensor *finput = csi_ref_tensor_transform_f32(input); struct csi_tensor *foutput[params->output_num]; - for (int i = 0; i < params->output_num; i++){ + for (int i = 0; i < params->output_num; i++) { foutput[i] = csi_ref_tensor_transform_f32(output[i]); } int ret = csi_ref_split_f32(finput, foutput, params); - for (int i = 0; i < params->output_num; i++){ + for (int i = 0; i < params->output_num; i++) { csi_tensor_data_convert(output[i], foutput[i]); csi_ref_tensor_transform_free_f32(foutput[i]); } diff --git a/source/reference/sqrt.c b/source/reference/sqrt.c index 38132171..4ee9fef8 100644 --- a/source/reference/sqrt.c +++ b/source/reference/sqrt.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -#include -int csi_ref_sqrt_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sqrt_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = input->data; @@ -39,8 +37,7 @@ int csi_ref_sqrt_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_sqrt_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sqrt_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_sqrt_f32); diff --git a/source/reference/square.c b/source/reference/square.c index 4e66005b..e01eb739 100644 --- a/source/reference/square.c +++ b/source/reference/square.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_square_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_square_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = input->data; diff --git a/source/reference/squeeze.c b/source/reference/squeeze.c index ee046ded..e3346cba 100644 --- a/source/reference/squeeze.c +++ b/source/reference/squeeze.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_squeeze(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_squeeze(struct csi_tensor *input, struct csi_tensor *output, struct squeeze_params *params) { float *input_data = input->data; diff --git a/source/reference/stack.c b/source/reference/stack.c index 65a780ea..7f879d06 100644 --- a/source/reference/stack.c +++ b/source/reference/stack.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_stack_f32(struct csi_tensor **input, - struct csi_tensor *output, +int csi_ref_stack_f32(struct csi_tensor **input, struct csi_tensor *output, struct stack_params *params) { int input_count = params->inputs_count; @@ -31,18 +30,18 @@ int csi_ref_stack_f32(struct csi_tensor **input, // For all input arrays, // FlatSize() = outer_size * base_inner_size; int64_t outer_size = 1; - for(int i = 0; i < axis; ++i) { + for (int i = 0; i < axis; ++i) { outer_size *= output->dim[i]; } int64_t inner_size = 1; - for(int i = axis+1; i < output->dim_count; ++i) { + for (int i = axis + 1; i < output->dim_count; ++i) { inner_size *= output->dim[i]; } int copy_size = inner_size; float *output_data = (float *)output->data; - for(int i = 0; i < outer_size; ++i) { - for(int j = 0; j < input_count; ++j) { + for (int i = 0; i < outer_size; ++i) { + for (int j = 0; j < input_count; ++j) { struct csi_tensor *input_item = input[j]; float *input_item_data = (float *)input_item->data; const float *input_ptr = input_item_data + i * copy_size; @@ -53,12 +52,11 @@ int csi_ref_stack_f32(struct csi_tensor **input, return CSINN_TRUE; } -int csi_ref_stack_quant(struct csi_tensor **input, - struct csi_tensor *output, +int csi_ref_stack_quant(struct csi_tensor **input, struct csi_tensor *output, struct stack_params *params) { - if (params->axis == -1){ - params->axis = input[0]->dim_count -1; + if (params->axis == -1) { + params->axis = input[0]->dim_count - 1; } int input_count = params->inputs_count; int ret; diff --git a/source/reference/strided_slice.c b/source/reference/strided_slice.c index 6b21a019..4d20692a 100644 --- a/source/reference/strided_slice.c +++ b/source/reference/strided_slice.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_strided_slice_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_strided_slice_f32(struct csi_tensor *input, struct csi_tensor *output, struct strided_slice_params *params) { float *input_data = (float *)input->data; @@ -35,45 +34,46 @@ int csi_ref_strided_slice_f32(struct csi_tensor *input, // } int outer_size = 1; int inner_size = 1; - int inner_size_copy_num = 1; + int inner_size_copy_num = 1; - for(int i = 0; i < input->dim_count; i++) { + for (int i = 0; i < input->dim_count; i++) { inner_size *= input->dim[i]; } - for(int slice_dim = 0; slice_dim < slice_dim_count; slice_dim++) { - + for (int slice_dim = 0; slice_dim < slice_dim_count; slice_dim++) { int begin = params->begin[slice_dim]; int end = params->end[slice_dim]; int stride = params->stride[slice_dim]; - if(begin >= end) { + if (begin >= end) { return CSINN_FALSE; } - if(end > input->dim[slice_dim]) { + if (end > input->dim[slice_dim]) { end = input->dim[slice_dim]; } inner_size /= input->dim[slice_dim]; outer_size *= inner_size_copy_num; - inner_size_copy_num = 1 + (end -1 - begin) / stride; + inner_size_copy_num = 1 + (end - 1 - begin) / stride; out_size *= inner_size_copy_num; - float *temp = (float *)csi_mem_alloc(outer_size * inner_size * inner_size_copy_num * sizeof(float)); + float *temp = + (float *)csi_mem_alloc(outer_size * inner_size * inner_size_copy_num * sizeof(float)); float *temp_copy = NULL; float *temp_addr = temp; - for(int n = 0; n < outer_size; n++) { - for(int i = begin; i < end; i = i + stride) { + for (int n = 0; n < outer_size; n++) { + for (int i = begin; i < end; i = i + stride) { memcpy(temp_addr, input_data + i * inner_size, inner_size * sizeof(float)); temp_addr += inner_size; } input_data += inner_size * input->dim[slice_dim]; } - if(temp != NULL) { + if (temp != NULL) { csi_mem_free(temp_copy); } - temp_copy = (float *)csi_mem_alloc(outer_size * inner_size * inner_size_copy_num * sizeof(float)); + temp_copy = + (float *)csi_mem_alloc(outer_size * inner_size * inner_size_copy_num * sizeof(float)); memcpy(temp_copy, temp, outer_size * inner_size * inner_size_copy_num * sizeof(float)); input_data = temp_copy; csi_mem_free(temp); @@ -85,8 +85,7 @@ int csi_ref_strided_slice_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_strided_slice_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_strided_slice_quant(struct csi_tensor *input, struct csi_tensor *output, struct strided_slice_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_strided_slice_f32); diff --git a/source/reference/sub.c b/source/reference/sub.c index c2af629d..119124c2 100644 --- a/source/reference/sub.c +++ b/source/reference/sub.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,20 +16,17 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -static void element_sub_f32(float *src0, float *src1, float *dest, - int input_idx, int output_idx) +static void element_sub_f32(float *src0, float *src1, float *dest, int input_idx, int output_idx) { dest[output_idx] = src0[output_idx] - src1[input_idx]; } -int csi_ref_sub_f32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_sub_f32(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params) { struct csi_ref_diso_callback cb; @@ -39,10 +36,8 @@ int csi_ref_sub_f32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_sub_quant(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params) +int csi_ref_sub_quant(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) { return csi_ref_diso_callback_base(input0, input1, output, params, csi_ref_sub_f32); } diff --git a/source/reference/sum.c b/source/reference/sum.c index adf14637..c68353f5 100644 --- a/source/reference/sum.c +++ b/source/reference/sum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,41 +16,36 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_sum_stride_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sum_stride_f32(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { - float *input_data = input->data; float *output_data = output->data; int32_t inner_size = 1; int32_t out_size = 1; - for (int32_t k = 0; k < params->n; k++) - { + for (int32_t k = 0; k < params->n; k++) { out_size *= params->out_extents[k]; } - for (int32_t k = 0; k < params->m; k++) - { + for (int32_t k = 0; k < params->m; k++) { inner_size *= params->inner_extents[k]; } - for (int32_t out = 0; out < out_size; out++) - { - + for (int32_t out = 0; out < out_size; out++) { float result = 0; - int32_t out_index = csi_ref_get_reduction_index(out, params->out_strides, params->out_extents, params->n); - for (int32_t inner = 0; inner < inner_size; inner++) - { - int32_t index = out_index + csi_ref_get_reduction_index(inner, params->inner_strides, - params->inner_extents, params->m); + int32_t out_index = + csi_ref_get_reduction_index(out, params->out_strides, params->out_extents, params->n); + for (int32_t inner = 0; inner < inner_size; inner++) { + int32_t index = + out_index + csi_ref_get_reduction_index(inner, params->inner_strides, + params->inner_extents, params->m); float val = input_data[index]; result += val; } @@ -60,8 +55,7 @@ int csi_ref_sum_stride_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_sum_stride_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_sum_stride_quant(struct csi_tensor *input, struct csi_tensor *output, struct reduce_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_sum_stride_f32); diff --git a/source/reference/tan.c b/source/reference/tan.c index df1746b6..c3d78fd8 100644 --- a/source/reference/tan.c +++ b/source/reference/tan.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,15 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -#include -int csi_ref_tan_f32(struct csi_tensor *input, - struct csi_tensor *output, - struct siso_params *params) +int csi_ref_tan_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = input->data; float *output_data = output->data; @@ -39,8 +36,7 @@ int csi_ref_tan_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_tan_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_tan_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_tan_f32); diff --git a/source/reference/tanh.c b/source/reference/tanh.c index 9a2d5e30..243ae068 100644 --- a/source/reference/tanh.c +++ b/source/reference/tanh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -#include -int csi_ref_tanh_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_tanh_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = input->data; @@ -36,8 +34,7 @@ int csi_ref_tanh_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_tanh_f64(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_tanh_f64(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { double *input_data = input->data; @@ -50,8 +47,7 @@ int csi_ref_tanh_f64(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_tanh_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_tanh_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_tanh_f32); diff --git a/source/reference/threshold_relu.c b/source/reference/threshold_relu.c index 05c6e520..a5234652 100644 --- a/source/reference/threshold_relu.c +++ b/source/reference/threshold_relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,18 +16,14 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -#include -static float threshold_relu(float x, float theta){ - return x > theta ? x : 0; -} +static float threshold_relu(float x, float theta) { return x > theta ? x : 0; } -int csi_ref_threshold_relu_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_threshold_relu_f32(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params) { float *input_data = input->data; @@ -44,8 +40,7 @@ int csi_ref_threshold_relu_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_threshold_relu_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_threshold_relu_quant(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_threshold_relu_f32); diff --git a/source/reference/tile.c b/source/reference/tile.c index 98ac8094..650ce851 100644 --- a/source/reference/tile.c +++ b/source/reference/tile.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,12 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -static int Multiplication(int *dim, int s, int e) +static int Multiplication(int32_t *dim, int s, int e) { int res = 1; for (int i = s; i <= e; i++) { @@ -30,8 +30,7 @@ static int Multiplication(int *dim, int s, int e) return res; } -int csi_ref_tile_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_tile_f32(struct csi_tensor *input, struct csi_tensor *output, struct tile_params *params) { float *input_data = (float *)input->data; @@ -41,11 +40,11 @@ int csi_ref_tile_f32(struct csi_tensor *input, assert(reps_count == input->dim_count); int in_size = 1; - for(int i = 0; i < input->dim_count; i++) { + for (int i = 0; i < input->dim_count; i++) { in_size *= input->dim[i]; } int out_size = 1; - for(int i = 0; i < input->dim_count; i++) { + for (int i = 0; i < input->dim_count; i++) { out_size *= params->reps[i]; } out_size = out_size * in_size; @@ -53,7 +52,8 @@ int csi_ref_tile_f32(struct csi_tensor *input, for (int dim_idx = reps_count - 1; dim_idx >= 0; dim_idx--) { int reps_num = params->reps[dim_idx]; int num = Multiplication(input->dim, 0, dim_idx) / (input->dim[dim_idx]); - int step = Multiplication(input->dim, dim_idx, input->dim_count - 1) * Multiplication(params->reps, dim_idx, reps_count - 1) / (params->reps[dim_idx]); + int step = Multiplication(input->dim, dim_idx, input->dim_count - 1) * + Multiplication(params->reps, dim_idx, reps_count - 1) / (params->reps[dim_idx]); float *temp = (float *)csi_mem_alloc(reps_num * num * step * sizeof(float)); float *temp_cpy_addr = temp; for (int input_pre_i = 0; input_pre_i < num; input_pre_i++) { @@ -72,8 +72,7 @@ int csi_ref_tile_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_tile_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_tile_quant(struct csi_tensor *input, struct csi_tensor *output, struct tile_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_tile_f32); diff --git a/source/reference/topk.c b/source/reference/topk.c index 4c7f2482..1571dceb 100644 --- a/source/reference/topk.c +++ b/source/reference/topk.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,35 +16,32 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_topk_f32(struct csi_tensor *input, - struct csi_tensor *output1, - struct csi_tensor *output2, - struct topk_params *params) +int csi_ref_topk_f32(struct csi_tensor *input, struct csi_tensor *output1, + struct csi_tensor *output2, struct topk_params *params) { - float *input_data = (float *)input->data; + float *input_data = (float *)input->data; float *values_data = (float *)output1->data; - int *indices_data = (int *)output2->data; + int *indices_data = (int *)output2->data; int k = params->k; int last_dim = input->dim[input->dim_count - 1]; int inner_size = 1; - for(int i = 0; i < input->dim_count - 1; i++) - { + for (int i = 0; i < input->dim_count - 1; i++) { inner_size *= input->dim[i]; } float *input_sort_addr = input_data; - for(int n = 0; n < inner_size; n++) { + for (int n = 0; n < inner_size; n++) { int *flag = (int *)csi_mem_alloc(last_dim * sizeof(int)); - for(int i = 0; i < k; i++) { + for (int i = 0; i < k; i++) { values_data[i] = -FLT_MAX; - for(int j = 0; j < last_dim; j++) { - if(input_sort_addr[j] > values_data[i] && !flag[j]) { - values_data[i] = input_sort_addr[j]; + for (int j = 0; j < last_dim; j++) { + if (input_sort_addr[j] > values_data[i] && !flag[j]) { + values_data[i] = input_sort_addr[j]; indices_data[i] = j; } } @@ -59,10 +56,8 @@ int csi_ref_topk_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_topk_quant(struct csi_tensor *input, - struct csi_tensor *output0, - struct csi_tensor *output1, - struct topk_params *params) +int csi_ref_topk_quant(struct csi_tensor *input, struct csi_tensor *output0, + struct csi_tensor *output1, struct topk_params *params) { int ret; struct csi_tensor *finput = csi_ref_tensor_transform_f32(input); diff --git a/source/reference/transpose.c b/source/reference/transpose.c index 54fd0a72..9eba342b 100644 --- a/source/reference/transpose.c +++ b/source/reference/transpose.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,32 +16,28 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -#include - -int csi_ref_transpose_init(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_transpose_init(struct csi_tensor *input, struct csi_tensor *output, struct transpose_params *params) { - if (input->quant_channel == output->quant_channel){ + if (input->quant_channel == output->quant_channel) { int quant_size = input->quant_channel * sizeof(struct csi_quant_info); int t = memcmp(input->qinfo, output->qinfo, quant_size); - if (t == 0){ + if (t == 0) { params->base.bc = csi_ref_transpose; return CSINN_TRUE; } } - params->base.bc = csi_ref_transpose_requant; + params->base.bc = csi_ref_transpose_quant; return CSINN_TRUE; } -static void copy_element(struct csi_tensor *input, - struct csi_tensor *output, - int input_idx, int output_idx) +static void copy_element(struct csi_tensor *input, struct csi_tensor *output, int input_idx, + int output_idx) { if (input->dtype == CSINN_DTYPE_FLOAT32) { float *src32 = input->data; @@ -51,13 +47,18 @@ static void copy_element(struct csi_tensor *input, int8_t *src8 = input->data; int8_t *dest8 = output->data; dest8[output_idx] = src8[input_idx]; + } else if (input->dtype == CSINN_DTYPE_INT16) { + int16_t *src16 = input->data; + int16_t *dest16 = output->data; + dest16[output_idx] = src16[input_idx]; } } static void swap(int32_t *out_idx, int32_t *in_idx, struct csi_tensor *input, struct csi_tensor *output, int32_t *perm, int iter_count) { - for (out_idx[iter_count] = 0; out_idx[iter_count] < output->dim[iter_count]; out_idx[iter_count]++) { + for (out_idx[iter_count] = 0; out_idx[iter_count] < output->dim[iter_count]; + out_idx[iter_count]++) { in_idx[perm[iter_count]] = out_idx[iter_count]; if (iter_count == 0) { int input_idx = csi_ref_get_index_iter(input->dim, input->dim_count - 1, in_idx); @@ -69,15 +70,14 @@ static void swap(int32_t *out_idx, int32_t *in_idx, struct csi_tensor *input, } } -int csi_ref_transpose(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_transpose(struct csi_tensor *input, struct csi_tensor *output, struct transpose_params *params) { - const int unextended_output_size = output->dim_count;; - int32_t o[unextended_output_size]; - int32_t i[unextended_output_size]; + const int unextended_output_size = output->dim_count; + int32_t *o = csi_mem_alloc(unextended_output_size * sizeof(int32_t)); + int32_t *i = csi_mem_alloc(unextended_output_size * sizeof(int32_t)); if (input->dtype != CSINN_DTYPE_FLOAT32 && input->qinfo->scale != output->qinfo->scale && - input->qinfo->zero_point != output->qinfo->zero_point){ + input->qinfo->zero_point != output->qinfo->zero_point) { int ret; struct csi_tensor *finput = csi_ref_tensor_transform_f32(input); struct csi_tensor *foutput = csi_ref_tensor_transform_f32(output); @@ -85,16 +85,16 @@ int csi_ref_transpose(struct csi_tensor *input, csi_tensor_data_convert(output, foutput); csi_ref_tensor_transform_free_f32(finput); csi_ref_tensor_transform_free_f32(foutput); - }else{ + } else { swap(o, i, input, output, params->permute, unextended_output_size - 1); } - + csi_mem_free(o); + csi_mem_free(i); return CSINN_TRUE; } -int csi_ref_transpose_requant(struct csi_tensor *input, - struct csi_tensor *output, - struct transpose_params *params) +int csi_ref_transpose_quant(struct csi_tensor *input, struct csi_tensor *output, + struct transpose_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_transpose); } diff --git a/source/reference/trunc.c b/source/reference/trunc.c index 08f129a2..81f2694a 100644 --- a/source/reference/trunc.c +++ b/source/reference/trunc.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -#include -int csi_ref_trunc_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_trunc_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = input->data; @@ -39,8 +37,7 @@ int csi_ref_trunc_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_trunc_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_trunc_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_trunc_f32); diff --git a/source/reference/unpooling.c b/source/reference/unpooling.c index 49465ba5..7f7eae61 100644 --- a/source/reference/unpooling.c +++ b/source/reference/unpooling.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,15 +16,13 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -static int csi_ref_unpooling_nhwc_f32(struct csi_tensor *input, - struct csi_tensor *mask, - struct csi_tensor *output, - struct unpooling_params *params) +static int csi_ref_unpooling_nhwc_f32(struct csi_tensor *input, struct csi_tensor *mask, + struct csi_tensor *output, struct unpooling_params *params) { float *input_data = input->data; int *mask_data = mask->data; @@ -38,17 +36,16 @@ static int csi_ref_unpooling_nhwc_f32(struct csi_tensor *input, const int output_height = output->dim[1]; const int output_width = output->dim[2]; - int size = csi_tensor_size(output); memset(output_data, 0, size * sizeof(float)); - for(int b = 0; b < batches; b++){ - for(int h = 0; h < input_height; h++){ - for(int w = 0; w < input_width; w++){ - for(int c = 0; c < depth; c++){ + for (int b = 0; b < batches; b++) { + for (int h = 0; h < input_height; h++) { + for (int w = 0; w < input_width; w++) { + for (int c = 0; c < depth; c++) { int index = csi_ref_get_index(input->dim, b, h, w, c); int id = mask_data[index]; - if(id < output_height * output_width){ + if (id < output_height * output_width) { int id_h = id / output_width; int id_w = id % output_width; int o_index = csi_ref_get_index(output->dim, b, id_h, id_w, c); @@ -61,10 +58,8 @@ static int csi_ref_unpooling_nhwc_f32(struct csi_tensor *input, return CSINN_TRUE; } -static int csi_ref_unpooling_nchw_f32(struct csi_tensor *input, - struct csi_tensor *mask, - struct csi_tensor *output, - struct unpooling_params *params) +static int csi_ref_unpooling_nchw_f32(struct csi_tensor *input, struct csi_tensor *mask, + struct csi_tensor *output, struct unpooling_params *params) { float *input_data = input->data; int *mask_data = mask->data; @@ -78,17 +73,16 @@ static int csi_ref_unpooling_nchw_f32(struct csi_tensor *input, const int output_height = output->dim[2]; const int output_width = output->dim[3]; - int size = csi_tensor_size(output); memset(output_data, 0, size * sizeof(float)); - for(int b = 0; b < batches; b++){ - for(int c = 0; c < depth; c++){ - for(int h = 0; h < input_height; h++){ - for(int w = 0; w < input_width; w++){ + for (int b = 0; b < batches; b++) { + for (int c = 0; c < depth; c++) { + for (int h = 0; h < input_height; h++) { + for (int w = 0; w < input_width; w++) { int index = csi_ref_get_index(input->dim, b, c, h, w); int id = mask_data[index]; - if(id < output_height * output_width){ + if (id < output_height * output_width) { int id_h = id / output_width; int id_w = id % output_width; int o_index = csi_ref_get_index(output->dim, b, c, id_h, id_w); @@ -101,10 +95,8 @@ static int csi_ref_unpooling_nchw_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_unpooling_f32(struct csi_tensor *input, - struct csi_tensor *mask, - struct csi_tensor *output, - struct unpooling_params *params) +int csi_ref_unpooling_f32(struct csi_tensor *input, struct csi_tensor *mask, + struct csi_tensor *output, struct unpooling_params *params) { if (params->base.layout == CSINN_LAYOUT_NCHW) { csi_ref_unpooling_nchw_f32(input, mask, output, params); @@ -116,10 +108,8 @@ int csi_ref_unpooling_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_unpooling_quant(struct csi_tensor *input, - struct csi_tensor *mask, - struct csi_tensor *output, - struct unpooling_params *params) +int csi_ref_unpooling_quant(struct csi_tensor *input, struct csi_tensor *mask, + struct csi_tensor *output, struct unpooling_params *params) { struct csi_tensor *finput = csi_ref_tensor_transform_f32(input); struct csi_tensor *foutput = csi_ref_tensor_transform_f32(output); diff --git a/source/reference/unstack.c b/source/reference/unstack.c index f332a7d5..15dfa7df 100644 --- a/source/reference/unstack.c +++ b/source/reference/unstack.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,13 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -int csi_ref_unstack_f32(struct csi_tensor *input, - struct csi_tensor **output, +int csi_ref_unstack_f32(struct csi_tensor *input, struct csi_tensor **output, struct unstack_params *params) { int axis = params->axis; @@ -31,18 +30,18 @@ int csi_ref_unstack_f32(struct csi_tensor *input, // For all output arrays, // FlatSize() = outer_size * base_inner_size; int64_t outer_size = 1; - for(int i = 0; i < axis; ++i) { + for (int i = 0; i < axis; ++i) { outer_size *= input->dim[i]; } int64_t inner_size = 1; - for(int i = axis+1; i < input->dim_count; ++i) { + for (int i = axis + 1; i < input->dim_count; ++i) { inner_size *= input->dim[i]; } int copy_size = inner_size; float *input_data = (float *)input->data; - for(int i = 0; i < outer_size; i++) { - for(int j = 0; j < output_count; j++) { + for (int i = 0; i < outer_size; i++) { + for (int j = 0; j < output_count; j++) { struct csi_tensor *output_item = output[j]; float *output_item_data = (float *)output_item->data; float *output_ptr = output_item_data + i * copy_size; @@ -53,8 +52,7 @@ int csi_ref_unstack_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_unstack_qunat(struct csi_tensor *input, - struct csi_tensor **output, +int csi_ref_unstack_qunat(struct csi_tensor *input, struct csi_tensor **output, struct unstack_params *params) { int ret; diff --git a/source/reference/utils.c b/source/reference/utils.c index 61fe3d03..a010c2e7 100644 --- a/source/reference/utils.c +++ b/source/reference/utils.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,11 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ + +#include #include "csi_ref.h" #include "csi_utils.h" -#include int32_t csi_ref_max_internal_s32(int32_t a, int32_t b) { @@ -40,12 +41,14 @@ int32_t csi_ref_min_internal_s32(int32_t a, int32_t b) } } -int32_t csi_ref_get_index(int32_t *dim, int32_t index0, int32_t index1, int32_t index2, int32_t index3) +int32_t csi_ref_get_index(int32_t *dim, int32_t index0, int32_t index1, int32_t index2, + int32_t index3) { return ((index0 * dim[1] + index1) * dim[2] + index2) * dim[3] + index3; } -int32_t csi_ref_get_index_5(int32_t *dim, int32_t index0, int32_t index1, int32_t index2, int32_t index3, int32_t index4) +int32_t csi_ref_get_index_5(int32_t *dim, int32_t index0, int32_t index1, int32_t index2, + int32_t index3, int32_t index4) { return dim[4] * (dim[3] * (dim[2] * (dim[1] * index0 + index1) + index2) + index3) + index4; } @@ -63,7 +66,8 @@ int32_t csi_ref_get_index_iter(int32_t *dim, int dim_idx, int32_t *index) return ret; } -int32_t *csi_ref_get_input_dim(struct csi_tensor *input, int dim_count, int32_t *axis, int axis_size) +int32_t *csi_ref_get_input_dim(struct csi_tensor *input, int dim_count, int32_t *axis, + int axis_size) { int8_t alloc_size = dim_count * sizeof(int32_t *); int32_t *ret = csi_mem_alloc(alloc_size); @@ -83,17 +87,16 @@ int csi_check_rhs_shape(struct csi_tensor *input) { int axis = -1; int in_size = csi_tensor_size(input); - for (int i = 0; i < input->dim_count; i++) - { - if (input->dim[i] == in_size){axis = i;} + for (int i = 0; i < input->dim_count; i++) { + if (input->dim[i] == in_size) { + axis = i; + } } return axis; } -int csi_ref_diso_broadcast_base(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - struct diso_params *params, +int csi_ref_diso_broadcast_base(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params, struct csi_ref_diso_callback *cb) { float *input0_data = input0->data; @@ -113,13 +116,13 @@ int csi_ref_diso_broadcast_base(struct csi_tensor *input0, b_input0->data = in0_data_b; b_input1->data = in1_data_b; - if (csi_ref_broadcast_to_shape(input0, b_input0, output->dim, output->dim_count) == CSINN_FALSE) - { + if (csi_ref_broadcast_to_shape(input0, b_input0, output->dim, output->dim_count) == + CSINN_FALSE) { CSI_DEBUG_CALL(csi_debug_info("%s: broadcast input0 failed.", __func__)); return CSINN_FALSE; }; - if (csi_ref_broadcast_to_shape(input1, b_input1, output->dim, output->dim_count) == CSINN_FALSE) - { + if (csi_ref_broadcast_to_shape(input1, b_input1, output->dim, output->dim_count) == + CSINN_FALSE) { CSI_DEBUG_CALL(csi_debug_info("%s: broadcast input1 failed.", __func__)); return CSINN_FALSE; }; @@ -131,7 +134,7 @@ int csi_ref_diso_broadcast_base(struct csi_tensor *input0, for (int i = 0; i < size0; i++) { cb->bc(in0_data_b, in1_data_b, output_data, i, i); } - }else{ + } else { return CSINN_FALSE; } csi_mem_free(in0_data_b); @@ -175,13 +178,15 @@ static int32_t high_mul_sat_round_double(int32_t a, int32_t b) return overflow ? INT32_MAX : ab_x2_high32; } -uint8_t csi_ref_quantize_channel_u8(int32_t data, struct csi_tensor* input, struct csi_tensor* output, float wscale) +uint8_t csi_ref_quantize_channel_u8(int32_t data, struct csi_tensor *input, + struct csi_tensor *output, float wscale) { float out = data * input->qinfo->scale * wscale; return csi_ref_quantize_f32_to_u8(out, output->qinfo); } -int8_t csi_ref_quantize_channel_i8(int32_t data, struct csi_tensor* input, struct csi_tensor* output, float wscale) +int8_t csi_ref_quantize_channel_i8(int32_t data, struct csi_tensor *input, + struct csi_tensor *output, float wscale) { float out = data * input->qinfo->scale * wscale; return csi_ref_quantize_f32_to_i8(out, output->qinfo); @@ -370,15 +375,13 @@ void csi_ref_nhwc_to_nchw_f32(struct csi_tensor *nt, struct csi_tensor *t) csi_mem_free(t); } -int32_t csi_ref_get_reduction_index(int32_t k, const int32_t *strides, - const int32_t *extents, int32_t n) +int32_t csi_ref_get_reduction_index(int32_t k, const int32_t *strides, const int32_t *extents, + int32_t n) { int32_t index = 0; - for (int32_t i = 0; i < n; i++) - { + for (int32_t i = 0; i < n; i++) { int32_t div = 1; - for (int32_t j = i + 1; j < n; j++) - { + for (int32_t j = i + 1; j < n; j++) { div *= extents[j]; } int32_t mod = div * extents[i]; @@ -423,7 +426,7 @@ float csi_ref_float16_to_float32(int16_t value) { float ret; if (value == 0 || value == 0x8000) { - return 0; + return 0; } int32_t ret_format = 0; int32_t sign = (value & 0x8000) << 16; @@ -434,6 +437,23 @@ float csi_ref_float16_to_float32(int16_t value) return ret; } +int16_t csi_ref_float32_to_bfloat16(float value) +{ + int16_t ret; + int32_t org_format = *(int32_t *)&value; + ret = (org_format & 0xffff0000) >> 16; + return ret; +} + +float csi_ref_bfloat16_to_float32(int16_t value) +{ + float ret; + int32_t ret_format = value << 16; + ; + ret = *(float *)&ret_format; + return ret; +} + struct csi_tensor *csi_ref_alloc_float_tensor(struct csi_tensor *src) { struct csi_tensor *ret = csi_alloc_tensor(NULL); @@ -474,9 +494,8 @@ struct csi_tensor *csi_ref_convert_float_tensor(struct csi_tensor *src) return ret; } -void csi_ref_conv_free_float_tensor(struct csi_tensor *input, - struct csi_tensor *output, struct csi_tensor *kernel, - struct csi_tensor *bias) +void csi_ref_conv_free_float_tensor(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias) { csi_ref_free_float_tensor(input); csi_ref_free_float_tensor(output); @@ -511,9 +530,7 @@ int csi_ref_tensor_transform_free_f32(struct csi_tensor *input) return CSINN_TRUE; } -int csi_ref_siso_callback_base(struct csi_tensor *input, - struct csi_tensor *output, - void *params, +int csi_ref_siso_callback_base(struct csi_tensor *input, struct csi_tensor *output, void *params, void *cb) { int (*callback)() = cb; @@ -527,11 +544,8 @@ int csi_ref_siso_callback_base(struct csi_tensor *input, return ret; } -int csi_ref_diso_callback_base(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, - void *params, - void *cb) +int csi_ref_diso_callback_base(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, void *params, void *cb) { int (*callback)() = cb; int ret; @@ -546,11 +560,8 @@ int csi_ref_diso_callback_base(struct csi_tensor *input0, return ret; } -int csi_ref_conv_callback_base(struct csi_tensor *input, - struct csi_tensor *output, - struct csi_tensor *kernel, - struct csi_tensor *bias, - void *params, +int csi_ref_conv_callback_base(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, void *params, void *cb) { int (*callback)() = cb; @@ -583,13 +594,11 @@ uint8_t *csi_ref_f32_to_input_dtype(uint32_t index, float *data, struct csi_sess return ret_data; } -int csi_ref_broadcast_to_shape(struct csi_tensor *input, - struct csi_tensor *output, - int32_t *shape, +int csi_ref_broadcast_to_shape(struct csi_tensor *input, struct csi_tensor *output, int32_t *shape, int32_t shape_count) { int ret; - if (input->dtype < CSINN_DTYPE_FLOAT16){ + if (input->dtype != CSINN_DTYPE_FLOAT32) { ret = csi_ref_broadcast_to_shape_quant(input, output, shape, shape_count); } else { ret = csi_ref_broadcast_to_shape_f32(input, output, shape, shape_count); @@ -597,11 +606,8 @@ int csi_ref_broadcast_to_shape(struct csi_tensor *input, return ret; } - -int csi_ref_broadcast_to_shape_f32(struct csi_tensor *input, - struct csi_tensor *output, - int32_t *shape, - int32_t shape_count) +int csi_ref_broadcast_to_shape_f32(struct csi_tensor *input, struct csi_tensor *output, + int32_t *shape, int32_t shape_count) { float *input_data = (float *)input->data; float *output_data = (float *)output->data; @@ -611,22 +617,25 @@ int csi_ref_broadcast_to_shape_f32(struct csi_tensor *input, int32_t target_shape_rank = shape_count; // check for broadcast rule - if (target_shape_rank < in_shape_rank){return CSINN_FALSE;} - for (int i = 0; i < in_shape_rank; i++){ - if ((in_shape[in_shape_rank - i -1] != target_shape[target_shape_rank - i - 1]) && - (in_shape[in_shape_rank - i - 1] != 1)){ - return CSINN_FALSE; - } + if (target_shape_rank < in_shape_rank) { + return CSINN_FALSE; + } + for (int i = 0; i < in_shape_rank; i++) { + if ((in_shape[in_shape_rank - i - 1] != target_shape[target_shape_rank - i - 1]) && + (in_shape[in_shape_rank - i - 1] != 1)) { + csi_debug_error("The shapes of input and target do not meet the rules of broadcast!"); + return CSINN_FALSE; + } } // full in_shape int32_t new_shape[target_shape_rank]; - memcpy(new_shape, in_shape, in_shape_rank*4); - if (target_shape_rank > in_shape_rank){ - for (int i = 0; i < target_shape_rank - in_shape_rank; i++){ + memcpy(new_shape, in_shape, in_shape_rank * 4); + if (target_shape_rank > in_shape_rank) { + for (int i = 0; i < target_shape_rank - in_shape_rank; i++) { new_shape[i] = 1; } - for (int i = 0; i < in_shape_rank; i++){ + for (int i = 0; i < in_shape_rank; i++) { int index = target_shape_rank - in_shape_rank + i; new_shape[index] = in_shape[i]; } @@ -639,34 +648,34 @@ int csi_ref_broadcast_to_shape_f32(struct csi_tensor *input, memcpy(output_data_t, input_data, data_size * 4); memcpy(output_data, input_data, data_size * 4); - for(int i=0; i< target_shape_rank; i++){ - - int origin_dim = in_shape[target_shape_rank - i -1]; - int target_dim = target_shape[target_shape_rank - i -1]; + for (int i = 0; i < target_shape_rank; i++) { + int origin_dim = in_shape[target_shape_rank - i - 1]; + int target_dim = target_shape[target_shape_rank - i - 1]; - if (origin_dim != target_dim){ + if (origin_dim != target_dim) { data_size = 1; - for (int i=0; i< target_shape_rank; i++){ + for (int i = 0; i < target_shape_rank; i++) { data_size *= in_shape[i]; } int inner_size = 1; - for (int j = target_shape_rank - i - 1; j < target_shape_rank; j++){ + for (int j = target_shape_rank - i - 1; j < target_shape_rank; j++) { inner_size *= in_shape[j]; } int target_inner_size = 1; - for (int j = target_shape_rank - i - 1; j < target_shape_rank; j++){ + for (int j = target_shape_rank - i - 1; j < target_shape_rank; j++) { target_inner_size *= target_shape[j]; } float tmp_arr[inner_size]; - for (int idx = 0; idx < data_size; idx++){ + for (int idx = 0; idx < data_size; idx++) { // at first output equal to input, then tmp data be saved in output tmp_arr[idx % inner_size] = output_data_t[idx]; - if ((idx + 1) % inner_size == 0){ + if ((idx + 1) % inner_size == 0) { int out_index = ((idx + 1) / inner_size - 1) * target_inner_size; - for (int cp_num = 0; cp_num < target_dim; cp_num++){ - for (int elem_id =0; elem_id < inner_size; elem_id++){ - output_data[out_index + cp_num * inner_size + elem_id] = tmp_arr[elem_id]; + for (int cp_num = 0; cp_num < target_dim; cp_num++) { + for (int elem_id = 0; elem_id < inner_size; elem_id++) { + output_data[out_index + cp_num * inner_size + elem_id] = + tmp_arr[elem_id]; } } } @@ -679,10 +688,8 @@ int csi_ref_broadcast_to_shape_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_broadcast_to_shape_quant(struct csi_tensor *input, - struct csi_tensor *output, - int32_t *shape, - int32_t shape_count) +int csi_ref_broadcast_to_shape_quant(struct csi_tensor *input, struct csi_tensor *output, + int32_t *shape, int32_t shape_count) { struct csi_tensor *finput = csi_ref_tensor_transform_f32(input); struct csi_tensor *foutput = csi_ref_tensor_transform_f32(output); diff --git a/source/reference/xor.c b/source/reference/xor.c index 1c8dc6e9..86e4a749 100644 --- a/source/reference/xor.c +++ b/source/reference/xor.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,11 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" -#include -int csi_ref_xor_u32(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_xor_u32(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params) { uint32_t *input0_data = input0->data; @@ -37,9 +34,7 @@ int csi_ref_xor_u32(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_xor_u8(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_xor_u8(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params) { uint8_t *input0_data = input0->data; @@ -53,9 +48,7 @@ int csi_ref_xor_u8(struct csi_tensor *input0, return CSINN_TRUE; } -int csi_ref_xor_i8(struct csi_tensor *input0, - struct csi_tensor *input1, - struct csi_tensor *output, +int csi_ref_xor_i8(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, struct diso_params *params) { int8_t *input0_data = input0->data; diff --git a/source/reference/yuv_rgb_scale.c b/source/reference/yuv_rgb_scale.c index ca0b1b23..f19df80d 100644 --- a/source/reference/yuv_rgb_scale.c +++ b/source/reference/yuv_rgb_scale.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,29 +16,29 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_ref.h" #include "csi_utils.h" -/* https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/ops/image_ops_impl.py#L3279-L3325 line 3279*/ +/* https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/ops/image_ops_impl.py#L3279-L3325 + * line 3279*/ -int csi_ref_yuv_rgb_scale_f32(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_yuv_rgb_scale_f32(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { float *input_data = input->data; float *output_data = output->data; - for(int n = 0; n < input->dim[0]; n++){ - for(int h = 0; h < input->dim[1]; h++){ - for(int w = 0; w < input->dim[2]; w++){ + for (int n = 0; n < input->dim[0]; n++) { + for (int h = 0; h < input->dim[1]; h++) { + for (int w = 0; w < input->dim[2]; w++) { float y = input_data[0]; float u = input_data[1]; float v = input_data[2]; float r = y + 1.13988303 * v; - float g = y -0.394642334 * u - 0.58062185 * v; + float g = y - 0.394642334 * u - 0.58062185 * v; float b = y + 2.03206185 * u; input_data += 3; @@ -53,8 +53,7 @@ int csi_ref_yuv_rgb_scale_f32(struct csi_tensor *input, return CSINN_TRUE; } -int csi_ref_yuv_rgb_scale_quant(struct csi_tensor *input, - struct csi_tensor *output, +int csi_ref_yuv_rgb_scale_quant(struct csi_tensor *input, struct csi_tensor *output, struct siso_params *params) { return csi_ref_siso_callback_base(input, output, params, csi_ref_yuv_rgb_scale_f32); diff --git a/source/thead_rvv/add.c b/source/thead_rvv/add.c new file mode 100644 index 00000000..0b10e9b0 --- /dev/null +++ b/source/thead_rvv/add.c @@ -0,0 +1,283 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128/256 +*************************************************************/ +static void element_add_fp32(float *input0, float *input1, float *output, int size) +{ + while (size > 0) { + int vl = vsetvl_e32m2(size); + vfloat32m2_t _in0 = vle32_v_f32m2(input0, vl); + vfloat32m2_t _in1 = vle32_v_f32m2(input1, vl); + vfloat32m2_t _sum = vfadd_vv_f32m2(_in0, _in1, vl); + vse32_v_f32m2(output, _sum, vl); + input0 += vl; + input1 += vl; + output += vl; + size -= vl; + } +} + +int csi_nn_rvv_add_fp32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) +{ + float *input0_data = (float *)input0->data; + float *input1_data = (float *)input1->data; + float *output_data = (float *)output->data; + + int in_size0 = csi_tensor_size(input0); + int in_size1 = csi_tensor_size(input1); + int out_size = csi_tensor_size(output); + + // example: [1, 3, 224, 224] + [1] = [1, 3, 224, 224] + if (in_size1 == 1) { + int size = out_size; + while (size > 0) { + int vl = vsetvl_e32m2(size); + vfloat32m2_t _in0 = vle32_v_f32m2(input0_data, vl); + vfloat32m2_t _sum = vfadd_vf_f32m2(_in0, input1_data[0], vl); + vse32_v_f32m2(output_data, _sum, vl); + input0 += vl; + output += vl; + size -= vl; + } + } + // example: [1, 3, 224, 224] + [1, 3, 224, 224] = [1, 3, 224, 224] + else if (in_size0 == in_size1) { + element_add_fp32(input0_data, input1_data, output_data, out_size); + } else { + int flag = 1; + for (int i = input1->dim_count - 1, j = input0->dim_count - 1; i >= 0; i--, j--) { + if (input0->dim[j] != input1->dim[i]) { + flag = 0; + } + } + // example: [1, 3, 224, 224] + [3, 224, 1] or [1, 3, 224, 224] + [3, 1, 224] + if (!flag) { + float *in0_data_b = csi_mem_alloc(out_size * sizeof(float)); + float *in1_data_b = csi_mem_alloc(out_size * sizeof(float)); + + struct csi_tensor *b_input0 = csi_alloc_tensor(NULL); + struct csi_tensor *b_input1 = csi_alloc_tensor(NULL); + csi_tensor_copy(b_input0, output); + csi_tensor_copy(b_input1, output); + b_input0->data = in0_data_b; + b_input1->data = in1_data_b; + + csi_ref_broadcast_to_shape_f32(input0, b_input0, output->dim, output->dim_count); + csi_ref_broadcast_to_shape_f32(input1, b_input1, output->dim, output->dim_count); + + input0_data = b_input0->data; + input1_data = b_input1->data; + + element_add_fp32(input0_data, input1_data, output_data, out_size); + + csi_mem_free(in0_data_b); + csi_mem_free(in1_data_b); + csi_mem_free(b_input0); + csi_mem_free(b_input1); + } + // example: [1, 3, 224, 224] + [224] = [1, 3, 224, 224] or + // [1, 3, 224, 224] + [224, 224] = [1, 3, 224, 224] + else { + int inner_size = in_size1; + int outer_size = out_size / in_size1; + for (int i = 0; i < outer_size; i++) { + element_add_fp32(input0_data, input1_data, output_data, inner_size); + input0_data += inner_size; + output_data += inner_size; + } + } + } + return CSINN_TRUE; +} + +static void element_add_fp16(__fp16 *input0, __fp16 *input1, __fp16 *output, int size) +{ + while (size > 0) { + int vl = vsetvl_e16m2(size); + vfloat16m2_t _in0 = vle16_v_f16m2(input0, vl); + vfloat16m2_t _in1 = vle16_v_f16m2(input1, vl); + vfloat16m2_t _sum = vfadd_vv_f16m2(_in0, _in1, vl); + vse16_v_f16m2(output, _sum, vl); + input0 += vl; + input1 += vl; + output += vl; + size -= vl; + } +} + +int csi_nn_rvv_add_fp16(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) +{ + __fp16 *input0_data = (__fp16 *)input0->data; + __fp16 *input1_data = (__fp16 *)input1->data; + __fp16 *output_data = (__fp16 *)output->data; + + int in_size0 = csi_tensor_size(input0); + int in_size1 = csi_tensor_size(input1); + int out_size = csi_tensor_size(output); + + // example: [1, 3, 224, 224] + [1] = [1, 3, 224, 224] + if (in_size1 == 1) { + int size = out_size; + while (size > 0) { + int vl = vsetvl_e16m2(size); + vfloat16m2_t _in0 = vle16_v_f16m2(input0_data, vl); + vfloat16m2_t _sum = vfadd_vf_f16m2(_in0, input1_data[0], vl); + vse16_v_f16m2(output_data, _sum, vl); + input0 += vl; + output += vl; + size -= vl; + } + } + // example: [1, 3, 224, 224] + [1, 3, 224, 224] = [1, 3, 224, 224] + else if (in_size0 == in_size1) { + element_add_fp16(input0_data, input1_data, output_data, out_size); + } else { + int flag = 1; + for (int i = input1->dim_count - 1, j = input0->dim_count - 1; i >= 0; i--, j--) { + if (input0->dim[j] != input1->dim[i]) { + flag = 0; + } + } + // example: [1, 3, 224, 224] + [3, 224, 1] or [1, 3, 224, 224] + [3, 1, 224] + if (!flag) { + __fp16 *in0_data_b = csi_mem_alloc(out_size * sizeof(__fp16)); + __fp16 *in1_data_b = csi_mem_alloc(out_size * sizeof(__fp16)); + + struct csi_tensor *b_input0 = csi_alloc_tensor(NULL); + struct csi_tensor *b_input1 = csi_alloc_tensor(NULL); + csi_tensor_copy(b_input0, output); + csi_tensor_copy(b_input1, output); + b_input0->data = in0_data_b; + b_input1->data = in1_data_b; + + csi_ref_broadcast_to_shape_quant(input0, b_input0, output->dim, output->dim_count); + csi_ref_broadcast_to_shape_quant(input1, b_input1, output->dim, output->dim_count); + + input0_data = b_input0->data; + input1_data = b_input1->data; + + element_add_fp16(input0_data, input1_data, output_data, out_size); + + csi_mem_free(in0_data_b); + csi_mem_free(in1_data_b); + csi_mem_free(b_input0); + csi_mem_free(b_input1); + } + // example: [1, 3, 224, 224] + [224] = [1, 3, 224, 224] or + // [1, 3, 224, 224] + [224, 224] = [1, 3, 224, 224] + else { + int inner_size = in_size1; + int outer_size = out_size / in_size1; + for (int i = 0; i < outer_size; i++) { + element_add_fp16(input0_data, input1_data, output_data, inner_size); + input0_data += inner_size; + output_data += inner_size; + } + } + } + return CSINN_TRUE; +} + +// s2(q2-z2) = s0(q0-z0) + s1(q1-z1) +// q2 = s0/s2(q0-z0) + s1/s2(q1-z1) + z2 +static void element_add_int8(int8_t *input0, int8_t *input1, int8_t *output, int size, + int32_t mult0, int32_t shift0, int32_t mult1, int32_t shift1, + int32_t zero_point0, int32_t zero_point1, int32_t zero_point2) +{ + while (size > 0) { + int vl = vsetvl_e8m1(size); + vint8m1_t _in0 = vle8_v_i8m1(input0, vl); + vint8m1_t _in1 = vle8_v_i8m1(input1, vl); + vint16m2_t _in0_w = vwadd_vx_i16m2(_in0, 0, vl); + vint16m2_t _in1_w = vwadd_vx_i16m2(_in1, 0, vl); // widden 8 -> 16 + vint32m4_t _in0_ww = vwadd_vx_i32m4(_in0_w, 0, vl); + vint32m4_t _in1_ww = vwadd_vx_i32m4(_in1_w, 0, vl); // widden 16 -> 32 + + vint32m4_t _q0_z0 = vsub_vx_i32m4(_in0_ww, zero_point0, vl); + vint32m4_t _q1_z1 = vsub_vx_i32m4(_in1_ww, zero_point1, vl); + + int32_t shift_tmp0 = 0, shift_tmp1 = 0; + if (shift0 < 0) { + shift_tmp0 = -shift0 - 1; + } else { + _q0_z0 = vsll_vx_i32m4(_q0_z0, shift0 + 2, vl); + shift_tmp0 = 1; + } + + if (shift1 < 0) { + shift_tmp1 = -shift1 - 1; + } else { + _q1_z1 = vsll_vx_i32m4(_q1_z1, shift1 + 2, vl); + shift_tmp1 = 1; + } + + vint32m4_t _mulh0 = vmulh_vx_i32m4(_q0_z0, mult0, vl); + vint32m4_t _mulh1 = vmulh_vx_i32m4(_q1_z1, mult1, vl); + + _mulh0 = vssra_vx_i32m4(_mulh0, shift_tmp0, vl); + _mulh1 = vssra_vx_i32m4(_mulh1, shift_tmp1, vl); + + vint32m4_t _res0 = vadd_vv_i32m4(_mulh0, _mulh1, vl); + _res0 = vadd_vx_i32m4(_res0, zero_point2, vl); + vint16m2_t _res1 = vnclip_wx_i16m2(_res0, 0, vl); + vint8m1_t _res2 = vnclip_wx_i8m1(_res1, 0, vl); + vse8_v_i8m1(output, _res2, vl); + + input0 += vl; + input1 += vl; + output += vl; + size -= vl; + } +} + +int csi_nn_rvv_add_int8(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) +{ + int8_t *input0_data = (int8_t *)input0->data; + int8_t *input1_data = (int8_t *)input1->data; + int8_t *output_data = (int8_t *)output->data; + + int in_size0 = csi_tensor_size(input0); + int in_size1 = csi_tensor_size(input1); + int out_size = csi_tensor_size(output); + + // TODO: move to init api + float real_scale0 = input0->qinfo->scale / output->qinfo->scale; + float real_scale1 = input1->qinfo->scale / output->qinfo->scale; + csi_quantize_multiplier(real_scale0, &input0->qinfo->multiplier, &input0->qinfo->shift); + csi_quantize_multiplier(real_scale1, &input1->qinfo->multiplier, &input1->qinfo->shift); + + if (in_size0 == in_size1) { + element_add_int8(input0_data, input1_data, output_data, in_size0, input0->qinfo->multiplier, + input0->qinfo->shift, input1->qinfo->multiplier, input1->qinfo->shift, + input0->qinfo->zero_point, input1->qinfo->zero_point, + output->qinfo->zero_point); + } else { + csi_debug_error("Only support elementwise add on RVV CPU\n"); + } + + return CSINN_TRUE; +} diff --git a/source/thead_rvv/avgpool.c b/source/thead_rvv/avgpool.c new file mode 100644 index 00000000..eeab563a --- /dev/null +++ b/source/thead_rvv/avgpool.c @@ -0,0 +1,127 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.13.x */ + +#include "csi_thead_rvv.h" + +int csi_nn_rvv_avgpool2d_init(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + int32_t input_h = input->dim[2]; + int32_t input_w = input->dim[3]; + + int32_t kernel_h = params->filter_height; + int32_t kernel_w = params->filter_width; + int32_t stride_h = params->stride_height; + int32_t stride_w = params->stride_width; + + int32_t pad_left = params->pad_left; + int32_t pad_right = params->pad_right; + int32_t pad_top = params->pad_top; + int32_t pad_down = params->pad_down; + + params->base.bc = NULL; + + // global avgpool2d + if (input_h == kernel_h && input_w == kernel_w) { + if (input->dtype == CSINN_DTYPE_FLOAT32) { + params->base.bc = csi_nn_rvv_global_avgpool2d_fp32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + params->base.bc = csi_nn_rvv_global_avgpool2d_fp16; + } else if (input->dtype == CSINN_DTYPE_INT8) { + params->base.bc = csi_ref_avgpool2d_quant; + } + return CSINN_TRUE; + } + + if (stride_h == 2 && stride_w == 2) { + if (kernel_h == 2 && kernel_w == 2) { + if (pad_left == 0 && pad_top == 0) { + // adjust pad according to ceil_mode (ceil mode on caffe pytorch..) + if (input_h % 2 == 1 && params->ceil_mode == 1) { + if (params->pad_down) params->pad_down++; + } + if (input_w % 2 == 1 && params->ceil_mode == 1) { + if (params->pad_right) params->pad_right++; + } + // end consider ceil_mode 2x2s2p0 + + if (input->dtype == CSINN_DTYPE_FLOAT32) { + params->base.bc = csi_nn_rvv_avgpool2x2s2_fp32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + params->base.bc = csi_nn_rvv_avgpool2x2s2_fp16; + } + } else if (pad_left == 1 && pad_top == 1) { + if (input->dtype == CSINN_DTYPE_FLOAT32) { + params->base.bc = csi_nn_rvv_avgpool2x2s2_p1_fp32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + params->base.bc = csi_nn_rvv_avgpool2x2s2_p1_fp16; + } + } + } else if (kernel_h == 3 && kernel_w == 3) { + if (pad_left == 0 && pad_top == 0) { + // adjust pad according to ceil_mode (ceil mode on caffe pytorch..) + if (input_h % 2 == 0 && params->ceil_mode == 1) { + if (params->pad_down) + params->pad_down++; // origin pad_down mast be equal to zero ? + } + if (input_w % 2 == 0 && params->ceil_mode == 1) { + if (params->pad_right) params->pad_right++; + } + // end consider ceil_mode 3x3s2p0 + + if (input->dtype == CSINN_DTYPE_FLOAT32) { + params->base.bc = csi_nn_rvv_avgpool3x3s2_fp32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + params->base.bc = csi_nn_rvv_avgpool3x3s2_fp16; + } + } else if (pad_left == 1 && pad_top == 1) { + if (input->dtype == CSINN_DTYPE_FLOAT32) { + params->base.bc = csi_nn_rvv_avgpool3x3s2_p1_fp32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + params->base.bc = csi_nn_rvv_avgpool3x3s2_p1_fp16; + } + } + } + } else if (stride_h == 1 && stride_w == 1) { + if (kernel_h == 3 && kernel_w == 3) { + if (pad_left == 1 && pad_top == 1 && pad_right == 1 && pad_down == 1) { + if (input->dtype == CSINN_DTYPE_FLOAT32) { + params->base.bc = csi_nn_rvv_avgpool3x3s1_p1_fp32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + params->base.bc = csi_nn_rvv_avgpool3x3s1_p1_fp16; + } + } + } + } + + if (params->base.bc == NULL) { + csi_debug_warning( + "avgpool is not optimized to achieve under this condition on RVV, call reference func " + "replaced.\n"); + if (input->dtype == CSINN_DTYPE_FLOAT32) { + params->base.bc = csi_ref_avgpool2d_f32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + params->base.bc = csi_ref_avgpool2d_quant; + } else if (input->dtype == CSINN_DTYPE_INT8) { + params->base.bc = csi_ref_avgpool2d_quant; + } + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/avgpool_2x2.c b/source/thead_rvv/avgpool_2x2.c new file mode 100644 index 00000000..6919a5bc --- /dev/null +++ b/source/thead_rvv/avgpool_2x2.c @@ -0,0 +1,277 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128/256 +*************************************************************/ +/* + pad_left = pad_top = 0 + pad_right = 0 or 1 + pad_down = 0 or 1 +*/ +int csi_nn_rvv_avgpool2x2s2_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int out_hw = out_h * out_w; + int output_size = in_c * out_h * out_w; + + int extend_h = 0; + int extend_w = 0; + + if (in_h % 2 == 1 && params->pad_down == 1) { + extend_h = 1; + out_h--; + } + if (in_w % 2 == 1 && params->pad_right == 1) { + extend_w = 1; + out_w--; + } + + int remain_w = in_w - 2 * out_w; + float ratio = 0.25f; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const float *line0 = input_data + c * in_h * in_w; + const float *line1 = line0 + in_w; + float *outptr = output_data + c * out_hw; + + for (int h = 0; h < out_h; h++) { + ratio = 0.25f; + int w = out_w; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_6, _line0_1_7; + vfloat32m1_t _line1_0_6, _line1_1_7; + + vlseg2e32_v_f32m1(&_line0_0_6, &_line0_1_7, line0, vl); + vlseg2e32_v_f32m1(&_line1_0_6, &_line1_1_7, line1, vl); + + vfloat32m1_t _sum0 = vfadd_vv_f32m1(_line0_0_6, _line0_1_7, vl); + vfloat32m1_t _sum1 = vfadd_vv_f32m1(_line1_0_6, _line1_1_7, vl); + vfloat32m1_t _sum = vfadd_vv_f32m1(_sum0, _sum1, vl); + vfloat32m1_t _avg = vfmul_vf_f32m1(_sum, ratio, vl); + + vse32_v_f32m1(outptr, _avg, vl); + line0 += 2 * vl; + line1 += 2 * vl; + outptr += vl; + w -= vl; + } + if (extend_w) { + ratio = (params->count_include_pad) ? 0.25f : 0.5f; + outptr[0] = (line0[0] + line1[0]) * ratio; + outptr++; + } + line0 += remain_w + in_w; + line1 += remain_w + in_w; + } + if (extend_h) { + ratio = (params->count_include_pad) ? 0.25f : 0.5f; + int w = out_w; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_6, _line0_1_7; + vlseg2e32_v_f32m1(&_line0_0_6, &_line0_1_7, line0, vl); + + vfloat32m1_t _sum0 = vfadd_vv_f32m1(_line0_0_6, _line0_1_7, vl); + vfloat32m1_t _avg = vfmul_vf_f32m1(_sum0, ratio, vl); + + vse32_v_f32m1(outptr, _avg, vl); + line0 += 2 * vl; + outptr += vl; + w -= vl; + } + if (extend_w) { + ratio = (params->count_include_pad) ? 0.25f : 1.0f; + outptr[0] = line0[0] * ratio; + outptr++; + } + } + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} + +/* + pad_left = pad_top = 1 + pad_right = 0 or 1 + pad_down = 0 or 1 +*/ +int csi_nn_rvv_avgpool2x2s2_p1_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int out_hw = out_h * out_w; + int output_size = in_c * out_h * out_w; + + int extend_h = 0; + int extend_w = 0; + + if (in_h % 2 == 0 && params->pad_down == 1) { + extend_h = 1; + out_h--; + } + if (in_w % 2 == 0 && params->pad_right == 1) { + extend_w = 1; + out_w--; + } + + int remain_w = in_w - 2 * out_w + 1; + float ratio = 0.25f; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const float *line00 = input_data + c * in_h * in_w; + float *outptr = output_data + c * out_hw; + + // h top ---- w left + ratio = (params->count_include_pad) ? 0.25f : 1.0f; + outptr[0] = line00[0] * ratio; + outptr++; + line00++; + // h top ---- w mid + ratio = (params->count_include_pad) ? 0.25f : 0.5f; + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_6, _line0_1_7; + vlseg2e32_v_f32m1(&_line0_0_6, &_line0_1_7, line00, vl); + + vfloat32m1_t _sum0 = vfadd_vv_f32m1(_line0_0_6, _line0_1_7, vl); + vfloat32m1_t _avg = vfmul_vf_f32m1(_sum0, ratio, vl); + + vse32_v_f32m1(outptr, _avg, vl); + line00 += 2 * vl; + outptr += vl; + w -= vl; + } + + // h top ---- w right + ratio = (params->count_include_pad) ? 0.25f : 1.0f; + if (extend_w) { + outptr[0] = line00[0] * ratio; + outptr++; + } + line00 += remain_w; + + // h mid + const float *line0 = line00; + const float *line1 = line0 + in_w; + for (int h = 0; h < out_h - 1; h++) { + // h mid ---- w left + ratio = (params->count_include_pad) ? 0.25f : 0.5f; + outptr[0] = (line0[0] + line1[0]) * ratio; + outptr++; + line0++; + line1++; + // h mid ---- w mid + float ratio = 0.25f; + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_6, _line0_1_7; + vfloat32m1_t _line1_0_6, _line1_1_7; + + vlseg2e32_v_f32m1(&_line0_0_6, &_line0_1_7, line0, vl); + vlseg2e32_v_f32m1(&_line1_0_6, &_line1_1_7, line1, vl); + + vfloat32m1_t _sum0 = vfadd_vv_f32m1(_line0_0_6, _line0_1_7, vl); + vfloat32m1_t _sum1 = vfadd_vv_f32m1(_line1_0_6, _line1_1_7, vl); + vfloat32m1_t _sum = vfadd_vv_f32m1(_sum0, _sum1, vl); + vfloat32m1_t _avg = vfmul_vf_f32m1(_sum, ratio, vl); + + vse32_v_f32m1(outptr, _avg, vl); + line0 += 2 * vl; + line1 += 2 * vl; + outptr += vl; + w -= vl; + } + // h mid ---- w right + ratio = (params->count_include_pad) ? 0.25f : 0.5f; + if (extend_w) { + outptr[0] = (line0[0] + line1[0]) * ratio; + outptr++; + } + line0 += remain_w + in_w; + line1 += remain_w + in_w; + } + // h bottom + if (extend_h) { + // h bottom ---- w left + ratio = (params->count_include_pad) ? 0.25f : 1.0f; + outptr[0] = line0[0] * ratio; + outptr++; + line0++; + // h bottom ---- w mid + ratio = (params->count_include_pad) ? 0.25f : 0.5f; + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_6, _line0_1_7; + vlseg2e32_v_f32m1(&_line0_0_6, &_line0_1_7, line0, vl); + + vfloat32m1_t _sum0 = vfadd_vv_f32m1(_line0_0_6, _line0_1_7, vl); + vfloat32m1_t _avg = vfmul_vf_f32m1(_sum0, ratio, vl); + + vse32_v_f32m1(outptr, _avg, vl); + line0 += 2 * vl; + outptr += vl; + w -= vl; + } + // h bottom ---- w right + ratio = (params->count_include_pad) ? 0.25f : 1.0f; + if (extend_w) { + outptr[0] = line0[0] * ratio; + } + } + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/avgpool_2x2_fp16.c b/source/thead_rvv/avgpool_2x2_fp16.c new file mode 100644 index 00000000..f9d34264 --- /dev/null +++ b/source/thead_rvv/avgpool_2x2_fp16.c @@ -0,0 +1,267 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128/256 +*************************************************************/ +int csi_nn_rvv_avgpool2x2s2_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int out_hw = out_h * out_w; + int output_size = in_c * out_h * out_w; + + int extend_h = 0; + int extend_w = 0; + + if (in_h % 2 == 1 && params->pad_down == 1) { + extend_h = 1; + out_h--; + } + if (in_w % 2 == 1 && params->pad_right == 1) { + extend_w = 1; + out_w--; + } + + int remain_w = in_w - 2 * out_w; + __fp16 ratio = 0.25f; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const __fp16 *line0 = input_data + c * in_h * in_w; + const __fp16 *line1 = line0 + in_w; + __fp16 *outptr = output_data + c * out_hw; + + for (int h = 0; h < out_h; h++) { + ratio = 0.25f; + int w = out_w; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_6, _line0_1_7; + vfloat16m1_t _line1_0_6, _line1_1_7; + + vlseg2e16_v_f16m1(&_line0_0_6, &_line0_1_7, line0, vl); + vlseg2e16_v_f16m1(&_line1_0_6, &_line1_1_7, line1, vl); + + vfloat16m1_t _sum0 = vfadd_vv_f16m1(_line0_0_6, _line0_1_7, vl); + vfloat16m1_t _sum1 = vfadd_vv_f16m1(_line1_0_6, _line1_1_7, vl); + vfloat16m1_t _sum = vfadd_vv_f16m1(_sum0, _sum1, vl); + vfloat16m1_t _avg = vfmul_vf_f16m1(_sum, ratio, vl); + + vse16_v_f16m1(outptr, _avg, vl); + line0 += 2 * vl; + line1 += 2 * vl; + outptr += vl; + w -= vl; + } + if (extend_w) { + ratio = (params->count_include_pad) ? 0.25f : 0.5f; + outptr[0] = (line0[0] + line1[0]) * ratio; + outptr++; + } + line0 += remain_w + in_w; + line1 += remain_w + in_w; + } + if (extend_h) { + ratio = (params->count_include_pad) ? 0.25f : 0.5f; + int w = out_w; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_6, _line0_1_7; + vlseg2e16_v_f16m1(&_line0_0_6, &_line0_1_7, line0, vl); + + vfloat16m1_t _sum0 = vfadd_vv_f16m1(_line0_0_6, _line0_1_7, vl); + vfloat16m1_t _avg = vfmul_vf_f16m1(_sum0, ratio, vl); + + vse16_v_f16m1(outptr, _avg, vl); + line0 += 2 * vl; + outptr += vl; + w -= vl; + } + if (extend_w) { + ratio = (params->count_include_pad) ? 0.25f : 1.0f; + outptr[0] = line0[0] * ratio; + outptr++; + } + } + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} + +int csi_nn_rvv_avgpool2x2s2_p1_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int out_hw = out_h * out_w; + int output_size = in_c * out_h * out_w; + + int extend_h = 0; + int extend_w = 0; + + if (in_h % 2 == 0 && params->pad_down == 1) { + extend_h = 1; + out_h--; + } + if (in_w % 2 == 0 && params->pad_right == 1) { + extend_w = 1; + out_w--; + } + + int remain_w = in_w - 2 * out_w + 1; + __fp16 ratio = 0.25f; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const __fp16 *line00 = input_data + c * in_h * in_w; + __fp16 *outptr = output_data + c * out_hw; + + // h top ---- w left + ratio = (params->count_include_pad) ? 0.25f : 1.0f; + outptr[0] = line00[0] * ratio; + outptr++; + line00++; + // h top ---- w mid + ratio = (params->count_include_pad) ? 0.25f : 0.5f; + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_6, _line0_1_7; + vlseg2e16_v_f16m1(&_line0_0_6, &_line0_1_7, line00, vl); + + vfloat16m1_t _sum0 = vfadd_vv_f16m1(_line0_0_6, _line0_1_7, vl); + vfloat16m1_t _avg = vfmul_vf_f16m1(_sum0, ratio, vl); + + vse16_v_f16m1(outptr, _avg, vl); + line00 += 2 * vl; + outptr += vl; + w -= vl; + } + + // h top ---- w right + ratio = (params->count_include_pad) ? 0.25f : 1.0f; + if (extend_w) { + outptr[0] = line00[0] * ratio; + outptr++; + } + line00 += remain_w; + + // h mid + const __fp16 *line0 = line00; + const __fp16 *line1 = line0 + in_w; + for (int h = 0; h < out_h - 1; h++) { + // h mid ---- w left + ratio = (params->count_include_pad) ? 0.25f : 0.5f; + outptr[0] = (line0[0] + line1[0]) * ratio; + outptr++; + line0++; + line1++; + // h mid ---- w mid + ratio = 0.25f; + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_6, _line0_1_7; + vfloat16m1_t _line1_0_6, _line1_1_7; + + vlseg2e16_v_f16m1(&_line0_0_6, &_line0_1_7, line0, vl); + vlseg2e16_v_f16m1(&_line1_0_6, &_line1_1_7, line1, vl); + + vfloat16m1_t _sum0 = vfadd_vv_f16m1(_line0_0_6, _line0_1_7, vl); + vfloat16m1_t _sum1 = vfadd_vv_f16m1(_line1_0_6, _line1_1_7, vl); + vfloat16m1_t _sum = vfadd_vv_f16m1(_sum0, _sum1, vl); + vfloat16m1_t _avg = vfmul_vf_f16m1(_sum, ratio, vl); + + vse16_v_f16m1(outptr, _avg, vl); + line0 += 2 * vl; + line1 += 2 * vl; + outptr += vl; + w -= vl; + } + // h mid ---- w right + ratio = (params->count_include_pad) ? 0.25f : 0.5f; + if (extend_w) { + outptr[0] = (line0[0] + line1[0]) * ratio; + outptr++; + } + line0 += remain_w + in_w; + line1 += remain_w + in_w; + } + // h bottom + if (extend_h) { + // h bottom ---- w left + ratio = (params->count_include_pad) ? 0.25f : 1.0f; + outptr[0] = line0[0] * ratio; + outptr++; + line0++; + // h bottom ---- w mid + ratio = (params->count_include_pad) ? 0.25f : 0.5f; + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_6, _line0_1_7; + vlseg2e16_v_f16m1(&_line0_0_6, &_line0_1_7, line0, vl); + + vfloat16m1_t _sum0 = vfadd_vv_f16m1(_line0_0_6, _line0_1_7, vl); + vfloat16m1_t _avg = vfmul_vf_f16m1(_sum0, ratio, vl); + + vse16_v_f16m1(outptr, _avg, vl); + line0 += 2 * vl; + outptr += vl; + w -= vl; + } + // h bottom ---- w right + ratio = (params->count_include_pad) ? 0.25f : 1.0f; + if (extend_w) { + outptr[0] = line0[0] * ratio; + } + } + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/avgpool_3x3.c b/source/thead_rvv/avgpool_3x3.c new file mode 100644 index 00000000..0dbf61d3 --- /dev/null +++ b/source/thead_rvv/avgpool_3x3.c @@ -0,0 +1,531 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128/256 +*************************************************************/ +/* + pad_left = pad_top = 0 + pad_right = 0 or 1 + pad_down = 0 or 1 +*/ +int csi_nn_rvv_avgpool3x3s2_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int out_hw = out_h * out_w; + int output_size = in_c * out_h * out_w; + + int extend_h = 0; + int extend_w = 0; + + if (in_h % 2 == 0 && params->pad_down == 1) { + extend_h = 1; + out_h--; + } + if (in_w % 2 == 0 && params->pad_right == 1) { + extend_w = 1; + out_w--; + } + + int remain_w = in_w - 2 * out_w; + float ratio = 0.11111111f; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const float *line0 = input_data + c * in_h * in_w; + const float *line1 = line0 + in_w; + const float *line2 = line1 + in_w; + float *outptr = output_data + c * out_hw; + + for (int h = 0; h < out_h; h++) { + ratio = 0.11111111f; + int w = out_w; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_6, _line0_1_7; + vfloat32m1_t _line1_0_6, _line1_1_7; + vfloat32m1_t _line2_0_6, _line2_1_7; + + vlseg2e32_v_f32m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vfloat32m1_t _line0_2_8 = vlse32_v_f32m1(line0, 2 * sizeof(float), vl); + line0 += (vl - 1) * 2; + vfloat32m1_t _sum0 = + vfadd_vv_f32m1(_line0_2_8, vfadd_vv_f32m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e32_v_f32m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vfloat32m1_t _line1_2_8 = vlse32_v_f32m1(line1, 2 * sizeof(float), vl); + line1 += (vl - 1) * 2; + vfloat32m1_t _sum1 = + vfadd_vv_f32m1(_line1_2_8, vfadd_vv_f32m1(_line1_0_6, _line1_1_7, vl), vl); + + vlseg2e32_v_f32m1(&_line2_0_6, &_line2_1_7, line2, vl); + line2 += 2; + vfloat32m1_t _line2_2_8 = vlse32_v_f32m1(line2, 2 * sizeof(float), vl); + line2 += (vl - 1) * 2; + vfloat32m1_t _sum2 = + vfadd_vv_f32m1(_line2_2_8, vfadd_vv_f32m1(_line2_0_6, _line2_1_7, vl), vl); + + vfloat32m1_t _sum = vfadd_vv_f32m1(_sum2, vfadd_vv_f32m1(_sum0, _sum1, vl), vl); + vfloat32m1_t _avg = vfmul_vf_f32m1(_sum, ratio, vl); + vse32_v_f32m1(outptr, _avg, vl); + + outptr += vl; + w -= vl; + } + + if (extend_w) { + ratio = (params->count_include_pad) ? 0.11111111f : 0.16666667f; + outptr[0] = + (line0[0] + line0[1] + line1[0] + line1[1] + line2[0] + line2[1]) * ratio; + outptr++; + } + line0 += remain_w + in_w; + line1 += remain_w + in_w; + line2 += remain_w + in_w; + } + if (extend_h) { + ratio = (params->count_include_pad) ? 0.11111111f : 0.16666667f; + int w = out_w; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_6, _line0_1_7; + vfloat32m1_t _line1_0_6, _line1_1_7; + + vlseg2e32_v_f32m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vfloat32m1_t _line0_2_8 = vlse32_v_f32m1(line0, 2 * sizeof(float), vl); + line0 += (vl - 1) * 2; + vfloat32m1_t _sum0 = + vfadd_vv_f32m1(_line0_2_8, vfadd_vv_f32m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e32_v_f32m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vfloat32m1_t _line1_2_8 = vlse32_v_f32m1(line1, 2 * sizeof(float), vl); + line1 += (vl - 1) * 2; + vfloat32m1_t _sum1 = + vfadd_vv_f32m1(_line1_2_8, vfadd_vv_f32m1(_line1_0_6, _line1_1_7, vl), vl); + + vfloat32m1_t _sum = vfadd_vv_f32m1(_sum0, _sum1, vl); + vfloat32m1_t _avg = vfmul_vf_f32m1(_sum, ratio, vl); + vse32_v_f32m1(outptr, _avg, vl); + + outptr += vl; + w -= vl; + } + if (extend_w) { + ratio = (params->count_include_pad) ? 0.11111111f : 0.25f; + outptr[0] = (line0[0] + line0[1] + line1[0] + line1[1]) * ratio; + outptr++; + } + } + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} + +/* + pad_left = pad_top = 1 + pad_right = 0 or 1 + pad_down = 0 or 1 +*/ +int csi_nn_rvv_avgpool3x3s2_p1_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int out_hw = out_h * out_w; + int output_size = in_c * out_h * out_w; + + int extend_h = 0; + int extend_w = 0; + + if (in_h % 2 == 1 && params->pad_down == 1) { + extend_h = 1; + out_h--; + } + if (in_w % 2 == 1 && params->pad_right == 1) { + extend_w = 1; + out_w--; + } + + int remain_w = in_w - 2 * out_w + 1; + float ratio = 0.11111111f; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const float *line0 = input_data + c * in_h * in_w; + const float *line1 = line0 + in_w; + float *outptr = output_data + c * out_hw; + + // h top ---- w left + ratio = (params->count_include_pad) ? 0.11111111f : 0.25f; + outptr[0] = (line0[0] + line0[1] + line1[0] + line1[1]) * ratio; + outptr++; + line0++; + line1++; + // h top ---- w mid + ratio = (params->count_include_pad) ? 0.11111111f : 0.16666667f; + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_6, _line0_1_7; + vfloat32m1_t _line1_0_6, _line1_1_7; + + vlseg2e32_v_f32m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vfloat32m1_t _line0_2_8 = vlse32_v_f32m1(line0, 2 * sizeof(float), vl); + line0 += (vl - 1) * 2; + vfloat32m1_t _sum0 = + vfadd_vv_f32m1(_line0_2_8, vfadd_vv_f32m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e32_v_f32m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vfloat32m1_t _line1_2_8 = vlse32_v_f32m1(line1, 2 * sizeof(float), vl); + line1 += (vl - 1) * 2; + vfloat32m1_t _sum1 = + vfadd_vv_f32m1(_line1_2_8, vfadd_vv_f32m1(_line1_0_6, _line1_1_7, vl), vl); + + vfloat32m1_t _sum = vfadd_vv_f32m1(_sum0, _sum1, vl); + vfloat32m1_t _avg = vfmul_vf_f32m1(_sum, ratio, vl); + vse32_v_f32m1(outptr, _avg, vl); + + outptr += vl; + w -= vl; + } + + // h top ---- w right + ratio = (params->count_include_pad) ? 0.11111111f : 0.25f; + if (extend_w) { + outptr[0] = (line0[0] + line0[1] + line1[0] + line1[1]) * ratio; + outptr++; + } + line0 += remain_w; + line1 += remain_w; + + // h mid + const float *line2 = line1 + in_w; + for (int h = 0; h < out_h - 1; h++) { + // h mid ---- w left + ratio = (params->count_include_pad) ? 0.11111111f : 0.16666667f; + outptr[0] = + (line0[0] + line0[1] + line1[0] + line1[1] + line2[0] + line2[1]) * ratio; + outptr++; + line0++; + line1++; + line2++; + // h mid ---- w mid + ratio = 0.11111111f; + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_6, _line0_1_7; + vfloat32m1_t _line1_0_6, _line1_1_7; + vfloat32m1_t _line2_0_6, _line2_1_7; + + vlseg2e32_v_f32m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vfloat32m1_t _line0_2_8 = vlse32_v_f32m1(line0, 2 * sizeof(float), vl); + line0 += (vl - 1) * 2; + vfloat32m1_t _sum0 = + vfadd_vv_f32m1(_line0_2_8, vfadd_vv_f32m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e32_v_f32m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vfloat32m1_t _line1_2_8 = vlse32_v_f32m1(line1, 2 * sizeof(float), vl); + line1 += (vl - 1) * 2; + vfloat32m1_t _sum1 = + vfadd_vv_f32m1(_line1_2_8, vfadd_vv_f32m1(_line1_0_6, _line1_1_7, vl), vl); + + vlseg2e32_v_f32m1(&_line2_0_6, &_line2_1_7, line2, vl); + line2 += 2; + vfloat32m1_t _line2_2_8 = vlse32_v_f32m1(line2, 2 * sizeof(float), vl); + line2 += (vl - 1) * 2; + vfloat32m1_t _sum2 = + vfadd_vv_f32m1(_line2_2_8, vfadd_vv_f32m1(_line2_0_6, _line2_1_7, vl), vl); + + vfloat32m1_t _sum = vfadd_vv_f32m1(_sum2, vfadd_vv_f32m1(_sum0, _sum1, vl), vl); + vfloat32m1_t _avg = vfmul_vf_f32m1(_sum, ratio, vl); + vse32_v_f32m1(outptr, _avg, vl); + + outptr += vl; + w -= vl; + } + + // h mid ---- w right + ratio = (params->count_include_pad) ? 0.11111111f : 0.16666667f; + if (extend_w) { + outptr[0] = + (line0[0] + line0[1] + line1[0] + line1[1] + line2[0] + line2[1]) * ratio; + outptr++; + } + line0 += in_w + remain_w; + line1 += in_w + remain_w; + line2 += in_w + remain_w; + } + + // h bottom + if (extend_h) { + // h bottom ---- w left + ratio = (params->count_include_pad) ? 0.11111111f : 0.25f; + outptr[0] = (line0[0] + line0[1] + line1[0] + line1[1]) * ratio; + outptr++; + line0++; + line1++; + + // h bottom ---- w mid + ratio = (params->count_include_pad) ? 0.11111111f : 0.16666667f; + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_6, _line0_1_7; + vfloat32m1_t _line1_0_6, _line1_1_7; + + vlseg2e32_v_f32m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vfloat32m1_t _line0_2_8 = vlse32_v_f32m1(line0, 2 * sizeof(float), vl); + line0 += (vl - 1) * 2; + vfloat32m1_t _sum0 = + vfadd_vv_f32m1(_line0_2_8, vfadd_vv_f32m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e32_v_f32m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vfloat32m1_t _line1_2_8 = vlse32_v_f32m1(line1, 2 * sizeof(float), vl); + line1 += (vl - 1) * 2; + vfloat32m1_t _sum1 = + vfadd_vv_f32m1(_line1_2_8, vfadd_vv_f32m1(_line1_0_6, _line1_1_7, vl), vl); + + vfloat32m1_t _sum = vfadd_vv_f32m1(_sum0, _sum1, vl); + vfloat32m1_t _avg = vfmul_vf_f32m1(_sum, ratio, vl); + vse32_v_f32m1(outptr, _avg, vl); + + outptr += vl; + w -= vl; + } + // h bottom ---- w right + ratio = (params->count_include_pad) ? 0.11111111f : 0.25f; + if (extend_w) { + outptr[0] = (line0[0] + line0[1] + line1[0] + line1[1]) * ratio; + outptr++; + } + } + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} + +/* + pad_left = pad_right = pad_top = pad_down = 1 + in_w = out_w in_h = out_h +*/ +int csi_nn_rvv_avgpool3x3s1_p1_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int output_size = in_c * out_h * out_w; + + float ratio = 0.11111111f; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const float *line1 = input_data + c * in_h * in_w; + const float *line2 = line1 + in_w; + float *outptr = output_data + c * out_h * out_w; + // h top ---- w left + ratio = (params->count_include_pad) ? 0.11111111f : 0.25f; + outptr[0] = (line1[0] + line1[1] + line2[0] + line2[1]) * ratio; + outptr++; + // h top ---- w mid + ratio = (params->count_include_pad) ? 0.11111111f : 0.16666667f; + int w = out_w - 2; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line1_0_3 = vle32_v_f32m1(line1, vl); + line1++; + vfloat32m1_t _line1_1_4 = vle32_v_f32m1(line1, vl); + line1++; + vfloat32m1_t _line1_2_5 = vle32_v_f32m1(line1, vl); + line1 += vl - 2; + vfloat32m1_t _sum1 = + vfadd_vv_f32m1(_line1_2_5, vfadd_vv_f32m1(_line1_0_3, _line1_1_4, vl), vl); + + vfloat32m1_t _line2_0_3 = vle32_v_f32m1(line2, vl); + line2++; + vfloat32m1_t _line2_1_4 = vle32_v_f32m1(line2, vl); + line2++; + vfloat32m1_t _line2_2_5 = vle32_v_f32m1(line2, vl); + line2 += vl - 2; + vfloat32m1_t _sum2 = + vfadd_vv_f32m1(_line2_2_5, vfadd_vv_f32m1(_line2_0_3, _line2_1_4, vl), vl); + + vfloat32m1_t _sum = vfadd_vv_f32m1(_sum1, _sum2, vl); + vfloat32m1_t _avg = vfmul_vf_f32m1(_sum, ratio, vl); + vse32_v_f32m1(outptr, _avg, vl); + + outptr += vl; + w -= vl; + } + // h top ---- w right + ratio = (params->count_include_pad) ? 0.11111111f : 0.25f; + outptr[0] = (line1[0] + line1[1] + line2[0] + line2[1]) * ratio; + outptr++; + line1 += 2; // bump next line: line1 --> line2 + line2 += 2; + + // h mid + const float *line0 = input_data + c * in_h * in_w; + for (int h = 0; h < out_h - 2; h++) { + // h mid ---- w left + ratio = (params->count_include_pad) ? 0.11111111f : 0.16666667f; + outptr[0] = + (line0[0] + line0[1] + line1[0] + line1[1] + line2[0] + line2[1]) * ratio; + outptr++; + // h mid ---- w mid + ratio = 0.11111111f; + w = out_w - 2; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_3 = vle32_v_f32m1(line0, vl); + line0++; + vfloat32m1_t _line0_1_4 = vle32_v_f32m1(line0, vl); + line0++; + vfloat32m1_t _line0_2_5 = vle32_v_f32m1(line0, vl); + line0 += vl - 2; + vfloat32m1_t _sum0 = + vfadd_vv_f32m1(_line0_2_5, vfadd_vv_f32m1(_line0_0_3, _line0_1_4, vl), vl); + + vfloat32m1_t _line1_0_3 = vle32_v_f32m1(line1, vl); + line1++; + vfloat32m1_t _line1_1_4 = vle32_v_f32m1(line1, vl); + line1++; + vfloat32m1_t _line1_2_5 = vle32_v_f32m1(line1, vl); + line1 += vl - 2; + vfloat32m1_t _sum1 = + vfadd_vv_f32m1(_line1_2_5, vfadd_vv_f32m1(_line1_0_3, _line1_1_4, vl), vl); + + vfloat32m1_t _line2_0_3 = vle32_v_f32m1(line2, vl); + line2++; + vfloat32m1_t _line2_1_4 = vle32_v_f32m1(line2, vl); + line2++; + vfloat32m1_t _line2_2_5 = vle32_v_f32m1(line2, vl); + line2 += vl - 2; + vfloat32m1_t _sum2 = + vfadd_vv_f32m1(_line2_2_5, vfadd_vv_f32m1(_line2_0_3, _line2_1_4, vl), vl); + + vfloat32m1_t _sum = vfadd_vv_f32m1(_sum2, vfadd_vv_f32m1(_sum0, _sum1, vl), vl); + vfloat32m1_t _avg = vfmul_vf_f32m1(_sum, ratio, vl); + vse32_v_f32m1(outptr, _avg, vl); + + outptr += vl; + w -= vl; + } + // h mid ---- w right + ratio = (params->count_include_pad) ? 0.11111111f : 0.16666667f; + outptr[0] = + (line0[0] + line0[1] + line1[0] + line1[1] + line2[0] + line2[1]) * ratio; + outptr++; + line0 += 2; + line1 += 2; + line2 += 2; + } + + // h bottom ---- w left + ratio = (params->count_include_pad) ? 0.11111111f : 0.25f; + outptr[0] = (line0[0] + line0[1] + line1[0] + line1[1]) * ratio; + outptr++; + // h bottom ---- w mid + ratio = (params->count_include_pad) ? 0.11111111f : 0.16666667f; + w = out_w - 2; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_3 = vle32_v_f32m1(line0, vl); + line0++; + vfloat32m1_t _line0_1_4 = vle32_v_f32m1(line0, vl); + line0++; + vfloat32m1_t _line0_2_5 = vle32_v_f32m1(line0, vl); + line0 += vl - 2; + vfloat32m1_t _sum0 = + vfadd_vv_f32m1(_line0_2_5, vfadd_vv_f32m1(_line0_0_3, _line0_1_4, vl), vl); + + vfloat32m1_t _line1_0_3 = vle32_v_f32m1(line1, vl); + line1++; + vfloat32m1_t _line1_1_4 = vle32_v_f32m1(line1, vl); + line1++; + vfloat32m1_t _line1_2_5 = vle32_v_f32m1(line1, vl); + line1 += vl - 2; + vfloat32m1_t _sum1 = + vfadd_vv_f32m1(_line1_2_5, vfadd_vv_f32m1(_line1_0_3, _line1_1_4, vl), vl); + + vfloat32m1_t _sum = vfadd_vv_f32m1(_sum0, _sum1, vl); + vfloat32m1_t _avg = vfmul_vf_f32m1(_sum, ratio, vl); + vse32_v_f32m1(outptr, _avg, vl); + + outptr += vl; + w -= vl; + } + // h bottom ---- w right + ratio = (params->count_include_pad) ? 0.11111111f : 0.25f; + outptr[0] = (line0[0] + line0[1] + line1[0] + line1[1]) * ratio; + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/avgpool_3x3_fp16.c b/source/thead_rvv/avgpool_3x3_fp16.c new file mode 100644 index 00000000..bbe72fe0 --- /dev/null +++ b/source/thead_rvv/avgpool_3x3_fp16.c @@ -0,0 +1,517 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128/256 +*************************************************************/ +int csi_nn_rvv_avgpool3x3s2_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int out_hw = out_h * out_w; + int output_size = in_c * out_h * out_w; + + int extend_h = 0; + int extend_w = 0; + + if (in_h % 2 == 0 && params->pad_down == 1) { + extend_h = 1; + out_h--; + } + if (in_w % 2 == 0 && params->pad_right == 1) { + extend_w = 1; + out_w--; + } + + int remain_w = in_w - 2 * out_w; + __fp16 ratio = 0.11111111f; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const __fp16 *line0 = input_data + c * in_h * in_w; + const __fp16 *line1 = line0 + in_w; + const __fp16 *line2 = line1 + in_w; + __fp16 *outptr = output_data + c * out_hw; + + for (int h = 0; h < out_h; h++) { + ratio = 0.11111111f; + int w = out_w; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_6, _line0_1_7; + vfloat16m1_t _line1_0_6, _line1_1_7; + vfloat16m1_t _line2_0_6, _line2_1_7; + + vlseg2e16_v_f16m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vfloat16m1_t _line0_2_8 = vlse16_v_f16m1(line0, 2 * sizeof(__fp16), vl); + line0 += (vl - 1) * 2; + vfloat16m1_t _sum0 = + vfadd_vv_f16m1(_line0_2_8, vfadd_vv_f16m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e16_v_f16m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vfloat16m1_t _line1_2_8 = vlse16_v_f16m1(line1, 2 * sizeof(__fp16), vl); + line1 += (vl - 1) * 2; + vfloat16m1_t _sum1 = + vfadd_vv_f16m1(_line1_2_8, vfadd_vv_f16m1(_line1_0_6, _line1_1_7, vl), vl); + + vlseg2e16_v_f16m1(&_line2_0_6, &_line2_1_7, line2, vl); + line2 += 2; + vfloat16m1_t _line2_2_8 = vlse16_v_f16m1(line2, 2 * sizeof(__fp16), vl); + line2 += (vl - 1) * 2; + vfloat16m1_t _sum2 = + vfadd_vv_f16m1(_line2_2_8, vfadd_vv_f16m1(_line2_0_6, _line2_1_7, vl), vl); + + vfloat16m1_t _sum = vfadd_vv_f16m1(_sum2, vfadd_vv_f16m1(_sum0, _sum1, vl), vl); + vfloat16m1_t _avg = vfmul_vf_f16m1(_sum, ratio, vl); + vse16_v_f16m1(outptr, _avg, vl); + + outptr += vl; + w -= vl; + } + + if (extend_w) { + ratio = (params->count_include_pad) ? 0.11111111f : 0.16666667f; + outptr[0] = + (line0[0] + line0[1] + line1[0] + line1[1] + line2[0] + line2[1]) * ratio; + outptr++; + } + line0 += remain_w + in_w; + line1 += remain_w + in_w; + line2 += remain_w + in_w; + } + if (extend_h) { + ratio = (params->count_include_pad) ? 0.11111111f : 0.16666667f; + int w = out_w; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_6, _line0_1_7; + vfloat16m1_t _line1_0_6, _line1_1_7; + + vlseg2e16_v_f16m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vfloat16m1_t _line0_2_8 = vlse16_v_f16m1(line0, 2 * sizeof(__fp16), vl); + line0 += (vl - 1) * 2; + vfloat16m1_t _sum0 = + vfadd_vv_f16m1(_line0_2_8, vfadd_vv_f16m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e16_v_f16m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vfloat16m1_t _line1_2_8 = vlse16_v_f16m1(line1, 2 * sizeof(__fp16), vl); + line1 += (vl - 1) * 2; + vfloat16m1_t _sum1 = + vfadd_vv_f16m1(_line1_2_8, vfadd_vv_f16m1(_line1_0_6, _line1_1_7, vl), vl); + + vfloat16m1_t _sum = vfadd_vv_f16m1(_sum0, _sum1, vl); + vfloat16m1_t _avg = vfmul_vf_f16m1(_sum, ratio, vl); + vse16_v_f16m1(outptr, _avg, vl); + + outptr += vl; + w -= vl; + } + if (extend_w) { + ratio = (params->count_include_pad) ? 0.11111111f : 0.25f; + outptr[0] = (line0[0] + line0[1] + line1[0] + line1[1]) * ratio; + outptr++; + } + } + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} + +int csi_nn_rvv_avgpool3x3s2_p1_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int out_hw = out_h * out_w; + int output_size = in_c * out_h * out_w; + + int extend_h = 0; + int extend_w = 0; + + if (in_h % 2 == 1 && params->pad_down == 1) { + extend_h = 1; + out_h--; + } + if (in_w % 2 == 1 && params->pad_right == 1) { + extend_w = 1; + out_w--; + } + + int remain_w = in_w - 2 * out_w + 1; + __fp16 ratio = 0.11111111f; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const __fp16 *line0 = input_data + c * in_h * in_w; + const __fp16 *line1 = line0 + in_w; + __fp16 *outptr = output_data + c * out_hw; + + // h top ---- w left + ratio = (params->count_include_pad) ? 0.11111111f : 0.25f; + outptr[0] = (line0[0] + line0[1] + line1[0] + line1[1]) * ratio; + outptr++; + line0++; + line1++; + // h top ---- w mid + ratio = (params->count_include_pad) ? 0.11111111f : 0.16666667f; + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_6, _line0_1_7; + vfloat16m1_t _line1_0_6, _line1_1_7; + + vlseg2e16_v_f16m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vfloat16m1_t _line0_2_8 = vlse16_v_f16m1(line0, 2 * sizeof(__fp16), vl); + line0 += (vl - 1) * 2; + vfloat16m1_t _sum0 = + vfadd_vv_f16m1(_line0_2_8, vfadd_vv_f16m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e16_v_f16m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vfloat16m1_t _line1_2_8 = vlse16_v_f16m1(line1, 2 * sizeof(__fp16), vl); + line1 += (vl - 1) * 2; + vfloat16m1_t _sum1 = + vfadd_vv_f16m1(_line1_2_8, vfadd_vv_f16m1(_line1_0_6, _line1_1_7, vl), vl); + + vfloat16m1_t _sum = vfadd_vv_f16m1(_sum0, _sum1, vl); + vfloat16m1_t _avg = vfmul_vf_f16m1(_sum, ratio, vl); + vse16_v_f16m1(outptr, _avg, vl); + + outptr += vl; + w -= vl; + } + + // h top ---- w right + ratio = (params->count_include_pad) ? 0.11111111f : 0.25f; + if (extend_w) { + outptr[0] = (line0[0] + line0[1] + line1[0] + line1[1]) * ratio; + outptr++; + } + line0 += remain_w; + line1 += remain_w; + + // h mid + const __fp16 *line2 = line1 + in_w; + for (int h = 0; h < out_h - 1; h++) { + // h mid ---- w left + ratio = (params->count_include_pad) ? 0.11111111f : 0.16666667f; + outptr[0] = + (line0[0] + line0[1] + line1[0] + line1[1] + line2[0] + line2[1]) * ratio; + outptr++; + line0++; + line1++; + line2++; + // h mid ---- w mid + ratio = 0.11111111f; + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_6, _line0_1_7; + vfloat16m1_t _line1_0_6, _line1_1_7; + vfloat16m1_t _line2_0_6, _line2_1_7; + + vlseg2e16_v_f16m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vfloat16m1_t _line0_2_8 = vlse16_v_f16m1(line0, 2 * sizeof(__fp16), vl); + line0 += (vl - 1) * 2; + vfloat16m1_t _sum0 = + vfadd_vv_f16m1(_line0_2_8, vfadd_vv_f16m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e16_v_f16m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vfloat16m1_t _line1_2_8 = vlse16_v_f16m1(line1, 2 * sizeof(__fp16), vl); + line1 += (vl - 1) * 2; + vfloat16m1_t _sum1 = + vfadd_vv_f16m1(_line1_2_8, vfadd_vv_f16m1(_line1_0_6, _line1_1_7, vl), vl); + + vlseg2e16_v_f16m1(&_line2_0_6, &_line2_1_7, line2, vl); + line2 += 2; + vfloat16m1_t _line2_2_8 = vlse16_v_f16m1(line2, 2 * sizeof(__fp16), vl); + line2 += (vl - 1) * 2; + vfloat16m1_t _sum2 = + vfadd_vv_f16m1(_line2_2_8, vfadd_vv_f16m1(_line2_0_6, _line2_1_7, vl), vl); + + vfloat16m1_t _sum = vfadd_vv_f16m1(_sum2, vfadd_vv_f16m1(_sum0, _sum1, vl), vl); + vfloat16m1_t _avg = vfmul_vf_f16m1(_sum, ratio, vl); + vse16_v_f16m1(outptr, _avg, vl); + + outptr += vl; + w -= vl; + } + + // h mid ---- w right + ratio = (params->count_include_pad) ? 0.11111111f : 0.16666667f; + if (extend_w) { + outptr[0] = + (line0[0] + line0[1] + line1[0] + line1[1] + line2[0] + line2[1]) * ratio; + outptr++; + } + line0 += in_w + remain_w; + line1 += in_w + remain_w; + line2 += in_w + remain_w; + } + + // h bottom + if (extend_h) { + // h bottom ---- w left + ratio = (params->count_include_pad) ? 0.11111111f : 0.25f; + outptr[0] = (line0[0] + line0[1] + line1[0] + line1[1]) * ratio; + outptr++; + line0++; + line1++; + + // h bottom ---- w mid + ratio = (params->count_include_pad) ? 0.11111111f : 0.16666667f; + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_6, _line0_1_7; + vfloat16m1_t _line1_0_6, _line1_1_7; + + vlseg2e16_v_f16m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vfloat16m1_t _line0_2_8 = vlse16_v_f16m1(line0, 2 * sizeof(__fp16), vl); + line0 += (vl - 1) * 2; + vfloat16m1_t _sum0 = + vfadd_vv_f16m1(_line0_2_8, vfadd_vv_f16m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e16_v_f16m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vfloat16m1_t _line1_2_8 = vlse16_v_f16m1(line1, 2 * sizeof(__fp16), vl); + line1 += (vl - 1) * 2; + vfloat16m1_t _sum1 = + vfadd_vv_f16m1(_line1_2_8, vfadd_vv_f16m1(_line1_0_6, _line1_1_7, vl), vl); + + vfloat16m1_t _sum = vfadd_vv_f16m1(_sum0, _sum1, vl); + vfloat16m1_t _avg = vfmul_vf_f16m1(_sum, ratio, vl); + vse16_v_f16m1(outptr, _avg, vl); + + outptr += vl; + w -= vl; + } + // h bottom ---- w right + ratio = (params->count_include_pad) ? 0.11111111f : 0.25f; + if (extend_w) { + outptr[0] = (line0[0] + line0[1] + line1[0] + line1[1]) * ratio; + outptr++; + } + } + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} + +int csi_nn_rvv_avgpool3x3s1_p1_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int output_size = in_c * out_h * out_w; + + __fp16 ratio = 0.11111111f; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const __fp16 *line1 = input_data + c * in_h * in_w; + const __fp16 *line2 = line1 + in_w; + __fp16 *outptr = output_data + c * out_h * out_w; + // h top ---- w left + ratio = (params->count_include_pad) ? 0.11111111f : 0.25f; + outptr[0] = (line1[0] + line1[1] + line2[0] + line2[1]) * ratio; + outptr++; + // h top ---- w mid + ratio = (params->count_include_pad) ? 0.11111111f : 0.16666667f; + int w = out_w - 2; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line1_0_3 = vle16_v_f16m1(line1, vl); + line1++; + vfloat16m1_t _line1_1_4 = vle16_v_f16m1(line1, vl); + line1++; + vfloat16m1_t _line1_2_5 = vle16_v_f16m1(line1, vl); + line1 += vl - 2; + vfloat16m1_t _sum1 = + vfadd_vv_f16m1(_line1_2_5, vfadd_vv_f16m1(_line1_0_3, _line1_1_4, vl), vl); + + vfloat16m1_t _line2_0_3 = vle16_v_f16m1(line2, vl); + line2++; + vfloat16m1_t _line2_1_4 = vle16_v_f16m1(line2, vl); + line2++; + vfloat16m1_t _line2_2_5 = vle16_v_f16m1(line2, vl); + line2 += vl - 2; + vfloat16m1_t _sum2 = + vfadd_vv_f16m1(_line2_2_5, vfadd_vv_f16m1(_line2_0_3, _line2_1_4, vl), vl); + + vfloat16m1_t _sum = vfadd_vv_f16m1(_sum1, _sum2, vl); + vfloat16m1_t _avg = vfmul_vf_f16m1(_sum, ratio, vl); + vse16_v_f16m1(outptr, _avg, vl); + + outptr += vl; + w -= vl; + } + // h top ---- w right + ratio = (params->count_include_pad) ? 0.11111111f : 0.25f; + outptr[0] = (line1[0] + line1[1] + line2[0] + line2[1]) * ratio; + outptr++; + line1 += 2; // bump next line: line1 --> line2 + line2 += 2; + + // h mid + const __fp16 *line0 = input_data + c * in_h * in_w; + for (int h = 0; h < out_h - 2; h++) { + // h mid ---- w left + ratio = (params->count_include_pad) ? 0.11111111f : 0.16666667f; + outptr[0] = + (line0[0] + line0[1] + line1[0] + line1[1] + line2[0] + line2[1]) * ratio; + outptr++; + // h mid ---- w mid + ratio = 0.11111111f; + w = out_w - 2; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_3 = vle16_v_f16m1(line0, vl); + line0++; + vfloat16m1_t _line0_1_4 = vle16_v_f16m1(line0, vl); + line0++; + vfloat16m1_t _line0_2_5 = vle16_v_f16m1(line0, vl); + line0 += vl - 2; + vfloat16m1_t _sum0 = + vfadd_vv_f16m1(_line0_2_5, vfadd_vv_f16m1(_line0_0_3, _line0_1_4, vl), vl); + + vfloat16m1_t _line1_0_3 = vle16_v_f16m1(line1, vl); + line1++; + vfloat16m1_t _line1_1_4 = vle16_v_f16m1(line1, vl); + line1++; + vfloat16m1_t _line1_2_5 = vle16_v_f16m1(line1, vl); + line1 += vl - 2; + vfloat16m1_t _sum1 = + vfadd_vv_f16m1(_line1_2_5, vfadd_vv_f16m1(_line1_0_3, _line1_1_4, vl), vl); + + vfloat16m1_t _line2_0_3 = vle16_v_f16m1(line2, vl); + line2++; + vfloat16m1_t _line2_1_4 = vle16_v_f16m1(line2, vl); + line2++; + vfloat16m1_t _line2_2_5 = vle16_v_f16m1(line2, vl); + line2 += vl - 2; + vfloat16m1_t _sum2 = + vfadd_vv_f16m1(_line2_2_5, vfadd_vv_f16m1(_line2_0_3, _line2_1_4, vl), vl); + + vfloat16m1_t _sum = vfadd_vv_f16m1(_sum2, vfadd_vv_f16m1(_sum0, _sum1, vl), vl); + vfloat16m1_t _avg = vfmul_vf_f16m1(_sum, ratio, vl); + vse16_v_f16m1(outptr, _avg, vl); + + outptr += vl; + w -= vl; + } + // h mid ---- w right + ratio = (params->count_include_pad) ? 0.11111111f : 0.16666667f; + outptr[0] = + (line0[0] + line0[1] + line1[0] + line1[1] + line2[0] + line2[1]) * ratio; + outptr++; + line0 += 2; + line1 += 2; + line2 += 2; + } + + // h bottom ---- w left + ratio = (params->count_include_pad) ? 0.11111111f : 0.25f; + outptr[0] = (line0[0] + line0[1] + line1[0] + line1[1]) * ratio; + outptr++; + // h bottom ---- w mid + ratio = (params->count_include_pad) ? 0.11111111f : 0.16666667f; + w = out_w - 2; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_3 = vle16_v_f16m1(line0, vl); + line0++; + vfloat16m1_t _line0_1_4 = vle16_v_f16m1(line0, vl); + line0++; + vfloat16m1_t _line0_2_5 = vle16_v_f16m1(line0, vl); + line0 += vl - 2; + vfloat16m1_t _sum0 = + vfadd_vv_f16m1(_line0_2_5, vfadd_vv_f16m1(_line0_0_3, _line0_1_4, vl), vl); + + vfloat16m1_t _line1_0_3 = vle16_v_f16m1(line1, vl); + line1++; + vfloat16m1_t _line1_1_4 = vle16_v_f16m1(line1, vl); + line1++; + vfloat16m1_t _line1_2_5 = vle16_v_f16m1(line1, vl); + line1 += vl - 2; + vfloat16m1_t _sum1 = + vfadd_vv_f16m1(_line1_2_5, vfadd_vv_f16m1(_line1_0_3, _line1_1_4, vl), vl); + + vfloat16m1_t _sum = vfadd_vv_f16m1(_sum0, _sum1, vl); + vfloat16m1_t _avg = vfmul_vf_f16m1(_sum, ratio, vl); + vse16_v_f16m1(outptr, _avg, vl); + + outptr += vl; + w -= vl; + } + // h bottom ---- w right + ratio = (params->count_include_pad) ? 0.11111111f : 0.25f; + outptr[0] = (line0[0] + line0[1] + line1[0] + line1[1]) * ratio; + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/concat.c b/source/thead_rvv/concat.c new file mode 100644 index 00000000..5675d87d --- /dev/null +++ b/source/thead_rvv/concat.c @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ +#include "csi_thead_rvv.h" + +int csi_nn_rvv_concat_fp32(struct csi_tensor **input, struct csi_tensor *output, + struct concat_params *params) +{ + int64_t outer_size = 1; + for (int i = 0; i < params->axis; ++i) { + outer_size *= output->dim[i]; + } + + int64_t base_inner_size = 1; + for (int i = params->axis + 1; i < output->dim_count; ++i) { + base_inner_size *= output->dim[i]; + } + int vl; + float *output_ptr = output->data; + for (int k = 0; k < outer_size; k++) { + for (int i = 0; i < params->inputs_count; ++i) { + struct csi_tensor *input_item = input[i]; + float *input_item_data = input_item->data; + int copy_size = input_item->dim[params->axis] * base_inner_size; + const float *input_ptr = input_item_data + k * copy_size; + while (copy_size > 0) { + vl = vsetvl_e32m2(copy_size); + vfloat32m2_t _input = vle32_v_f32m2(input_ptr, vl); + input_ptr += vl; + vse32_v_f32m2(output_ptr, _input, vl); + output_ptr += vl; + copy_size -= vl; + } + } + } + return CSINN_TRUE; +} + +int csi_nn_rvv_concat_fp16(struct csi_tensor **input, struct csi_tensor *output, + struct concat_params *params) +{ + int64_t outer_size = 1; + for (int i = 0; i < params->axis; ++i) { + outer_size *= output->dim[i]; + } + + int64_t base_inner_size = 1; + for (int i = params->axis + 1; i < output->dim_count; ++i) { + base_inner_size *= output->dim[i]; + } + int vl; + __fp16 *output_ptr = output->data; + for (int k = 0; k < outer_size; k++) { + for (int i = 0; i < params->inputs_count; ++i) { + struct csi_tensor *input_item = input[i]; + __fp16 *input_item_data = input_item->data; + int copy_size = input_item->dim[params->axis] * base_inner_size; + const __fp16 *input_ptr = input_item_data + k * copy_size; + while (copy_size > 0) { + vl = vsetvl_e16m2(copy_size); + vfloat16m2_t _input = vle16_v_f16m2(input_ptr, vl); + input_ptr += vl; + vse16_v_f16m2(output_ptr, _input, vl); + output_ptr += vl; + copy_size -= vl; + } + } + } + return CSINN_TRUE; +} + +int csi_nn_rvv_concat_int8(struct csi_tensor **input, struct csi_tensor *output, + struct concat_params *params) +{ + int64_t outer_size = 1; + for (int i = 0; i < params->axis; ++i) { + outer_size *= output->dim[i]; + } + int64_t base_inner_size = 1; + for (int i = params->axis + 1; i < output->dim_count; ++i) { + base_inner_size *= output->dim[i]; + } + int vl; + int8_t *output_ptr = (int8_t *)output->data; + for (int k = 0; k < outer_size; k++) { + for (int i = 0; i < params->inputs_count; ++i) { + struct csi_tensor *input_item = input[i]; + int8_t *input_item_data = (int8_t *)input_item->data; + int copy_size = input_item->dim[params->axis] * base_inner_size; + const int8_t *input_ptr = input_item_data + k * copy_size; + while (copy_size > 0) { + vl = vsetvl_e8m2(copy_size); + vint8m2_t _input = vle8_v_i8m2(input_ptr, vl); + input_ptr += vl; + vse8_v_i8m2(output_ptr, _input, vl); + output_ptr += vl; + copy_size -= vl; + } + } + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/convolution.c b/source/thead_rvv/convolution.c new file mode 100644 index 00000000..098f88ce --- /dev/null +++ b/source/thead_rvv/convolution.c @@ -0,0 +1,218 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.13.x */ + +#include "csi_thead_rvv.h" + +/* + only support layout:NCHW + input layout: N C H W + kernel layout: O I h w + output layout: N O H W +*/ +int csi_nn_rvv_conv2d_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + int32_t out_c = kernel->dim[0]; + int32_t in_c = kernel->dim[1]; + int32_t in_h = input->dim[2]; + int32_t in_w = input->dim[3]; + int32_t kernel_h = kernel->dim[2]; + int32_t kernel_w = kernel->dim[3]; + int32_t stride_h = params->stride_height; + int32_t stride_w = params->stride_width; + int32_t dalition_h = params->dilation_height; + int32_t dalition_w = params->dilation_width; + + // check + int out_height = (in_h + params->pad_top + params->pad_down - kernel_h) / stride_h + 1; + int out_width = (in_w + params->pad_left + params->pad_right - kernel_w) / stride_w + 1; + if (out_height != output->dim[2] || out_width != output->dim[3]) { + printf("output dim don't match.\n"); + return CSINN_FALSE; + } + + if (kernel_h == 1 && kernel_w == 1 && stride_h == 1 && stride_w == 1 && dalition_h == 1 && + dalition_w == 1) { + params->conv_extra.conv_mode = CSINN_GEMM; + if (input->dtype == CSINN_DTYPE_FLOAT32) { + csi_nn_rvv_conv1x1s1_gemm_transform_kernel_fp32(kernel, params); + params->base.bc = csi_nn_rvv_conv1x1s1_gemm_fp32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + csi_nn_rvv_conv1x1s1_gemm_transform_kernel_fp16(kernel, params); + params->base.bc = csi_nn_rvv_conv1x1s1_gemm_fp16; + } else if (input->dtype == CSINN_DTYPE_INT8) { +#ifdef __riscv_xtheadv + params->conv_extra.kernel_tm = csi_alloc_tensor(NULL); + csi_nn_rvv_conv1x1s1_gemm_transform_kernel_int8(kernel, params); + // support channel quantization + for (int i = 0; i < kernel->quant_channel; i++) { + float real_scale = + input->qinfo->scale * kernel->qinfo[i].scale / output->qinfo->scale; + csi_quantize_multiplier(real_scale, &(kernel->qinfo[i].multiplier), + &(kernel->qinfo[i].shift)); + } + params->base.bc = csi_nn_rvv_conv1x1s1_gemm_int8; +#endif + } + // winograd convolution condition: + } else if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && + dalition_h == 1 && dalition_w == 1) { + if (input->dtype == CSINN_DTYPE_FLOAT32) { + if (params->group > 1) { + params->conv_extra.conv_mode = CSINN_GEMM; + csi_nn_rvv_conv_im2col_sgemm_transform_kernel_fp32(kernel, params); + params->base.bc = csi_nn_rvv_conv_im2col_gemm_fp32; + return CSINN_TRUE; + } + + // pack4 for winograd convolution + if ((out_c % 4 == 0) && (in_c % 4 == 0)) { + params->conv_extra.conv_mode = CSINN_WINOGRAD; + struct csi_tensor *t_kernel = csi_alloc_tensor(NULL); + csi_nn_rvv_conv3x3s1_winograd64_transform_kernel_packn_fp32(kernel, t_kernel); + params->conv_extra.kernel_tm = t_kernel; + params->base.bc = csi_nn_rvv_conv3x3s1_winograd64_packn_fp32; + } else { + params->conv_extra.conv_mode = CSINN_GEMM; + csi_nn_rvv_conv_im2col_sgemm_transform_kernel_fp32(kernel, params); + params->base.bc = csi_nn_rvv_conv_im2col_gemm_fp32; + } + + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + if (params->group > 1) { + params->conv_extra.conv_mode = CSINN_GEMM; + csi_nn_rvv_conv_im2col_sgemm_transform_kernel_fp16(kernel, params); + params->base.bc = csi_nn_rvv_conv_im2col_gemm_fp16; + return CSINN_TRUE; + } + + // pack8 for winograd convolution + if ((out_c % 8 == 0) && (in_c % 8 == 0)) { + params->conv_extra.conv_mode = CSINN_WINOGRAD; + struct csi_tensor *t_kernel = csi_alloc_tensor(NULL); + csi_nn_rvv_conv3x3s1_winograd64_transform_kernel_packn_fp16(kernel, t_kernel); + params->conv_extra.kernel_tm = t_kernel; + params->base.bc = csi_nn_rvv_conv3x3s1_winograd64_packn_fp16; + } else { + params->conv_extra.conv_mode = CSINN_GEMM; + csi_nn_rvv_conv_im2col_sgemm_transform_kernel_fp16(kernel, params); + params->base.bc = csi_nn_rvv_conv_im2col_gemm_fp16; + } + } else if (input->dtype == CSINN_DTYPE_INT8) { +#ifdef __riscv_xtheadv + params->conv_extra.conv_mode = CSINN_GEMM; + params->conv_extra.kernel_tm = csi_alloc_tensor(NULL); + csi_nn_rvv_conv_im2col_sgemm_transform_kernel_int8(kernel, params); + // support channel quantization + for (int i = 0; i < kernel->quant_channel; i++) { + float real_scale = + input->qinfo->scale * kernel->qinfo[i].scale / output->qinfo->scale; + csi_quantize_multiplier(real_scale, &(kernel->qinfo[i].multiplier), + &(kernel->qinfo[i].shift)); + } + params->base.bc = csi_nn_rvv_conv_im2col_gemm_int8; +#endif + } + + } else { + params->conv_extra.conv_mode = CSINN_GEMM; + if (input->dtype == CSINN_DTYPE_FLOAT32) { + csi_nn_rvv_conv_im2col_sgemm_transform_kernel_fp32(kernel, params); + params->base.bc = csi_nn_rvv_conv_im2col_gemm_fp32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + csi_nn_rvv_conv_im2col_sgemm_transform_kernel_fp16(kernel, params); + params->base.bc = csi_nn_rvv_conv_im2col_gemm_fp16; + } else if (input->dtype == CSINN_DTYPE_INT8) { +#ifdef __riscv_xtheadv + params->conv_extra.conv_mode = CSINN_GEMM; + params->conv_extra.kernel_tm = csi_alloc_tensor(NULL); + csi_nn_rvv_conv_im2col_sgemm_transform_kernel_int8(kernel, params); + // support channel quantization + for (int i = 0; i < kernel->quant_channel; i++) { + float real_scale = + input->qinfo->scale * kernel->qinfo[i].scale / output->qinfo->scale; + csi_quantize_multiplier(real_scale, &(kernel->qinfo[i].multiplier), + &(kernel->qinfo[i].shift)); + } + params->base.bc = csi_nn_rvv_conv_im2col_gemm_int8; +#endif + } + } + return CSINN_TRUE; +} + +int csi_nn_rvv_depthwise_conv2d_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + int32_t batch = input->dim[0]; + int32_t in_ch = input->dim[1]; + int32_t in_h = input->dim[2]; + int32_t in_w = input->dim[3]; + + int32_t out_ch = output->dim[1]; + int32_t out_h = output->dim[2]; + int32_t out_w = output->dim[3]; + + int32_t kernel_h = kernel->dim[2]; + int32_t kernel_w = kernel->dim[3]; + int32_t stride_h = params->stride_height; + int32_t stride_w = params->stride_width; + + if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1) { + if (input->dtype == CSINN_DTYPE_FLOAT32) { + params->base.bc = csi_nn_rvv_dwconv3x3s1_fp32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + params->base.bc = csi_nn_rvv_dwconv3x3s1_fp16; + } else if (input->dtype == CSINN_DTYPE_INT8) { + // support channel quantization + for (int i = 0; i < kernel->quant_channel; i++) { + float real_scale = + input->qinfo->scale * kernel->qinfo[i].scale / output->qinfo->scale; + csi_quantize_multiplier(real_scale, &(kernel->qinfo[i].multiplier), + &(kernel->qinfo[i].shift)); + } + params->base.bc = csi_nn_rvv_dwconv3x3s1_int8; + } + } else if (kernel_h == 3 && kernel_w == 3 && stride_h == 2 && stride_w == 2) { + if (input->dtype == CSINN_DTYPE_FLOAT32) { + params->base.bc = csi_nn_rvv_dwconv3x3s2_fp32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + params->base.bc = csi_nn_rvv_dwconv3x3s2_fp16; + } else if (input->dtype == CSINN_DTYPE_INT8) { + // support channel quantization + for (int i = 0; i < kernel->quant_channel; i++) { + float real_scale = + input->qinfo->scale * kernel->qinfo[i].scale / output->qinfo->scale; + csi_quantize_multiplier(real_scale, &(kernel->qinfo[i].multiplier), + &(kernel->qinfo[i].shift)); + } + params->base.bc = csi_nn_rvv_dwconv3x3s2_int8; + } + } else { + if (input->dtype == CSINN_DTYPE_FLOAT32) { + params->base.bc = csi_ref_depthwise_conv2d_f32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + params->base.bc = csi_ref_depthwise_conv2d_quant; + } + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/convolution_1x1.c b/source/thead_rvv/convolution_1x1.c new file mode 100644 index 00000000..53d7408c --- /dev/null +++ b/source/thead_rvv/convolution_1x1.c @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +void csi_nn_rvv_conv1x1s1_gemm_transform_kernel_fp32(struct csi_tensor *kernel, + struct conv2d_params *params) +{ + float *kernel_data = (float *)kernel->data; + int group = params->group; + + int m = kernel->dim[0] / group; // out_ch / group + int k = kernel->dim[1]; // in_ch ( kernel->dim[2] = kernel->dim[3] = 1) + + float *pa_reorder = (float *)csi_mem_alloc(group * m * k * sizeof(float)); + for (int g = 0; g < group; g++) { + csi_nn_rvv_reorder_kernel_n8_fp32(kernel_data + g * m * k, pa_reorder + g * m * k, m, k, k); + } + memcpy(kernel_data, pa_reorder, group * m * k * sizeof(float)); + csi_mem_free(pa_reorder); +} + +int csi_nn_rvv_conv1x1s1_gemm_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + float *kernel_data = (float *)kernel->data; + float *bias_data = (float *)bias->data; + + int32_t group = params->group; + int32_t batch = input->dim[0]; // assert(batch == 1); + int32_t in_ch = input->dim[1]; + int32_t out_ch = kernel->dim[0]; + int32_t out_h = output->dim[2]; + int32_t out_w = output->dim[3]; + + int32_t m = out_ch / group; + int32_t k = in_ch / group; + int32_t n = out_h * out_w; + + float *pb_reorder = (float *)csi_mem_alloc(k * n * sizeof(float)); + + for (int i = 0; i < batch; i++) { + for (int g = 0; g < group; g++) { + float *pa = kernel_data + g * m * k; + float *pb = pb_reorder; + float *pc = output_data; + // pack + csi_nn_rvv_reorder_input_z8_fp32(input_data, pb, k, n, n); + // GEMM + csi_nn_rvv_gemm_8x8_fp32(pc, pa, pb, m, k, n, n, bias_data + g * m); + input_data += k * n; + output_data += m * n; + } + } + csi_mem_free(pb_reorder); + return CSINN_TRUE; +} diff --git a/source/thead_rvv/convolution_1x1_fp16.c b/source/thead_rvv/convolution_1x1_fp16.c new file mode 100644 index 00000000..aced0510 --- /dev/null +++ b/source/thead_rvv/convolution_1x1_fp16.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +void csi_nn_rvv_conv1x1s1_gemm_transform_kernel_fp16(struct csi_tensor *kernel, + struct conv2d_params *params) +{ + __fp16 *kernel_data = (__fp16 *)kernel->data; + int group = params->group; + + int m = kernel->dim[0] / group; // out_ch + int k = kernel->dim[1]; // in_ch ( kernel->dim[2] = kernel->dim[3] = 1) + + __fp16 *pa_reorder = (__fp16 *)csi_mem_alloc(group * m * k * sizeof(__fp16)); + for (int g = 0; g < group; g++) { + csi_nn_rvv_reorder_kernel_n8_fp16(kernel_data + g * m * k, pa_reorder + g * m * k, m, k, k); + } + memcpy(kernel_data, pa_reorder, group * m * k * sizeof(__fp16)); + csi_mem_free(pa_reorder); +} + +int csi_nn_rvv_conv1x1s1_gemm_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + __fp16 *kernel_data = (__fp16 *)kernel->data; + __fp16 *bias_data = (__fp16 *)bias->data; + + int32_t group = params->group; + int32_t batch = input->dim[0]; // assert(batch == 1); + int32_t in_ch = input->dim[1]; + int32_t out_ch = kernel->dim[0]; + int32_t out_h = output->dim[2]; + int32_t out_w = output->dim[3]; + + int32_t m = out_ch / group; + int32_t k = in_ch / group; + int32_t n = out_h * out_w; + + __fp16 *pb_reorder = (__fp16 *)csi_mem_alloc(k * n * sizeof(__fp16)); + + for (int i = 0; i < batch; i++) { + for (int g = 0; g < group; g++) { + __fp16 *pa = kernel_data + g * m * k; + __fp16 *pb = pb_reorder; + __fp16 *pc = output_data; + + // pack + csi_nn_rvv_reorder_input_z16_fp16(input_data, pb, k, n, n); + // GEMM + csi_nn_rvv_gemm_8x16_fp16(pc, pa, pb, m, k, n, n, bias_data + g * m); + + input_data += k * n; + output_data += m * n; + } + } + csi_mem_free(pb_reorder); + return CSINN_TRUE; +} diff --git a/source/thead_rvv/convolution_1x1_int4.c b/source/thead_rvv/convolution_1x1_int4.c new file mode 100644 index 00000000..293d9ef9 --- /dev/null +++ b/source/thead_rvv/convolution_1x1_int4.c @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +// kernel_layout: [o, h, w, i] +void csi_nn_rvv_conv1x1s1_gemm_transform_kernel_int4(struct csi_tensor *kernel, + struct conv2d_params *params) +{ + int8_t *kernel_data = (int8_t *)kernel->data; + int group = params->group; + + int n = kernel->dim[0] / group; // out_ch + int k = kernel->dim[3]; // in_ch ( kernel->dim[1] = kernel->dim[2] = 1) + int k_2 = (((k - 1) & -2) + 2) >> 1; // pair of int4, col of kernel_matrix + int k4 = ((k_2 - 1) & -4) + 4; // align of 4 for int8 + + params->conv_extra.kernel_tm->data = (int8_t *)csi_mem_alloc(group * n * k4 * sizeof(int8_t)); + int8_t *pa_reorder = (int8_t *)params->conv_extra.kernel_tm->data; + + for (int g = 0; g < group; g++) { + csi_nn_rvv_reorder_kernel_n8_int8(kernel_data + g * n * k_2, pa_reorder + g * n * k4, n, + k_2, k_2); + } + // FIXME: free params->conv_extra.kernel_tm->data + // memcpy(kernel_data, pa_reorder, group * m * k * sizeof(int8_t)); + // csi_mem_free(pa_reorder); +} + +int csi_nn_rvv_conv1x1s1_gemm_int4(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + int8_t *input_data = (int8_t *)input->data; + int8_t *output_data = (int8_t *)output->data; + int8_t *kernel_data = (int8_t *)params->conv_extra.kernel_tm->data; + int32_t *bias_data = (int32_t *)bias->data; + + int32_t group = params->group; + int32_t batch = input->dim[0]; // assert(batch == 1); + int32_t in_ch = input->dim[3]; + int32_t out_ch = kernel->dim[0]; + int32_t out_h = output->dim[1]; + int32_t out_w = output->dim[2]; + + int32_t m = out_h * out_w; + int32_t k = in_ch / group; + int32_t n = out_ch / group; + + int32_t k_2 = (((k - 1) & -2) + 2) >> 1; + int32_t k4 = ((k_2 - 1) & -4) + 4; + + int8_t *pa_reorder = (int8_t *)csi_mem_alloc(m * k4 * sizeof(int8_t)); + int32_t *multiplier = (int32_t *)csi_mem_alloc(n * sizeof(int32_t)); + int32_t *shift = (int32_t *)csi_mem_alloc(n * sizeof(int32_t)); + + int j = 0; + for (int i = 0; i < batch; i++) { + for (int g = 0; g < group; g++) { + int8_t *pa = pa_reorder; + int8_t *pb = kernel_data + g * n * k4; + int8_t *pc = output_data; + + if (kernel->quant_channel > 1) { + for (int c = 0; c < n; c++, j++) { + multiplier[c] = kernel->qinfo[j].multiplier; + shift[c] = kernel->qinfo[j].shift; + } + } else if (kernel->quant_channel == 1) { + for (int c = 0; c < n; c++) { + multiplier[c] = kernel->qinfo[0].multiplier; + shift[c] = kernel->qinfo[0].shift; + } + } + + // pack + csi_nn_rvv_reorder_input_n8_int4(input_data, pa, m, k_2, k_2); + // GEMM + csi_nn_rvv_gemm_8x8_int4(pc, pa, pb, m, k4, n, n / 2, bias_data + g * n, + output->qinfo->zero_point, multiplier, shift); + input_data += m * k_2; + output_data += m * n / 2; + } + } + csi_mem_free(pa_reorder); + csi_mem_free(multiplier); + csi_mem_free(shift); + return CSINN_TRUE; +} diff --git a/source/thead_rvv/convolution_1x1_int8.c b/source/thead_rvv/convolution_1x1_int8.c new file mode 100644 index 00000000..855cfe37 --- /dev/null +++ b/source/thead_rvv/convolution_1x1_int8.c @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +void csi_nn_rvv_conv1x1s1_gemm_transform_kernel_int8(struct csi_tensor *kernel, + struct conv2d_params *params) +{ + int8_t *kernel_data = (int8_t *)kernel->data; + int group = params->group; + + int m = kernel->dim[0] / group; // out_ch + int k = kernel->dim[1]; // in_ch ( kernel->dim[2] = kernel->dim[3] = 1) + int k4 = (k % 4 != 0) ? ((k / 4 + 1) * 4) : k; + + params->conv_extra.kernel_tm->data = (int8_t *)csi_mem_alloc(group * m * k4 * sizeof(int8_t)); + int8_t *pa_reorder = (int8_t *)params->conv_extra.kernel_tm->data; + + for (int g = 0; g < group; g++) { + csi_nn_rvv_reorder_kernel_n8_int8(kernel_data + g * m * k, pa_reorder + g * m * k4, m, k, + k); + } + // FIXME: free params->conv_extra.kernel_tm->data + // memcpy(kernel_data, pa_reorder, group * m * k * sizeof(int8_t)); + // csi_mem_free(pa_reorder); +} + +int csi_nn_rvv_conv1x1s1_gemm_int8(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + int8_t *input_data = (int8_t *)input->data; + int8_t *output_data = (int8_t *)output->data; + int8_t *kernel_data = (int8_t *)params->conv_extra.kernel_tm->data; + int32_t *bias_data = (int32_t *)bias->data; + + int32_t group = params->group; + int32_t batch = input->dim[0]; // assert(batch == 1); + int32_t in_ch = input->dim[1]; + int32_t out_ch = kernel->dim[0]; + int32_t out_h = output->dim[2]; + int32_t out_w = output->dim[3]; + + int32_t m = out_ch / group; + int32_t k = in_ch / group; + int32_t n = out_h * out_w; + int32_t k4 = (k % 4 != 0) ? ((k / 4 + 1) * 4) : k; + + int8_t *pb_reorder = (int8_t *)csi_mem_alloc(k4 * n * sizeof(int8_t)); + int32_t *multiplier = (int32_t *)csi_mem_alloc(m * sizeof(int32_t)); + int32_t *shift = (int32_t *)csi_mem_alloc(m * sizeof(int32_t)); + + int j = 0; + for (int i = 0; i < batch; i++) { + for (int g = 0; g < group; g++) { + int8_t *pa = kernel_data + g * m * k4; + int8_t *pb = pb_reorder; + int8_t *pc = output_data; + + if (kernel->quant_channel > 1) { + for (int c = 0; c < m; c++, j++) { + multiplier[c] = kernel->qinfo[j].multiplier; + shift[c] = kernel->qinfo[j].shift; + } + } else if (kernel->quant_channel == 1) { + for (int c = 0; c < m; c++) { + multiplier[c] = kernel->qinfo[0].multiplier; + shift[c] = kernel->qinfo[0].shift; + } + } + + // pack + csi_nn_rvv_reorder_input_z8_int8(input_data, pb, k, n, n); + // GEMM + csi_nn_rvv_gemm_8x8_int8(pc, pa, pb, m, k4, n, n, bias_data + g * m, + output->qinfo->zero_point, multiplier, shift); + + input_data += k * n; + output_data += m * n; + } + } + csi_mem_free(pb_reorder); + csi_mem_free(multiplier); + csi_mem_free(shift); + return CSINN_TRUE; +} diff --git a/source/thead_rvv/convolution_3x3.c b/source/thead_rvv/convolution_3x3.c new file mode 100644 index 00000000..466d7675 --- /dev/null +++ b/source/thead_rvv/convolution_3x3.c @@ -0,0 +1,807 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128/256 ... +*************************************************************/ +/* + padding input for winograd input transform , and change memory layout to [n c/4 h w 4] + input layout: [n c h w] + input_padded layout: [n c/packn h w packn] + constrain: input channel % packn = 0 +*/ + +static void winograd_pad_input_pack1ton_fp32(const float *input, float *input_padded, int inc, + int inh, int inw, int padded_h, int padded_w, + int pad_top, int pad_left) +{ + const int packn = csrr_vlenb() / sizeof(float); + const int vl = vsetvl_e32m1(packn); + + int padded_hw = padded_h * padded_w; + const int in_size = inh * inw; // per-channel size + + float *pad_ptr = input_padded; + float *inp_ptr = (float *)input; + int pad_down = padded_h - pad_top - inh; // remain to pad on h (pad_down) + int pad_right = padded_w - pad_left - inw; // remain to pad on w (pad_right) + + vfloat32m1_t _zero = vfmv_v_f_f32m1(0.0f, vl); + + int c = 0; + for (; c + packn - 1 < inc; c += packn) { + inp_ptr = (float *)input + c * in_size; + // pad h_top + for (int i = 0; i < pad_top * padded_w; i++) { + vse32_v_f32m1(pad_ptr, _zero, vl); + pad_ptr += packn; + } + // pad h_mid + for (int i = 0; i < inh; i++) { + // pad w_left + for (int j = 0; j < pad_left; j++) { + vse32_v_f32m1(pad_ptr, _zero, vl); + pad_ptr += packn; + } + // pad w_mid + for (int j = 0; j < inw; j++) { + vfloat32m1_t _tmp = vlse32_v_f32m1(inp_ptr, in_size * sizeof(float), vl); + inp_ptr++; + vse32_v_f32m1(pad_ptr, _tmp, vl); + pad_ptr += packn; + } + // pad w_end + for (int j = 0; j < pad_right; j++) { + vse32_v_f32m1(pad_ptr, _zero, vl); + pad_ptr += packn; + } + } + // pad h_bottom + for (int i = 0; i < pad_down * padded_w; i++) { + vse32_v_f32m1(pad_ptr, _zero, vl); + pad_ptr += packn; + } + } +} + +static void winograd_crop_output_packnto1_fp32(const float *output_trans, float *output, int out_c, + int out_h, int out_w, int wino_h, int wino_w) +{ + const int packn = csrr_vlenb() / sizeof(float); + const int vl = vsetvl_e32m1(packn); + + const int out_size = out_h * out_w; // per-channel size + const int crop_size = wino_h * wino_w; + + float *out_tm_ptr = (float *)output_trans; + float *out_ptr = output; + + int c = 0; + for (; c + packn - 1 < out_c; c += packn) { + out_tm_ptr = (float *)output_trans + c * crop_size; + out_ptr = output + c * out_size; + + for (int h = 0; h < out_h; h++) { + float *crop_ptr = out_tm_ptr + h * wino_w * packn; + for (int w = 0; w < out_w; w++) { + vfloat32m1_t _tmp = vle32_v_f32m1(crop_ptr, vl); + crop_ptr += packn; + vsse32_v_f32m1(out_ptr, out_size * sizeof(float), _tmp, vl); + out_ptr++; + } + } + } +} + +/* + packn = VLEN / 32 (128/32=4 or 256/32=8) + constrain: output channel % packn = 0 + input channel % packn = 0 + kernel before: [O I 3*3] + kernel after : [O/packn 8*8 I packn] +*/ +void csi_nn_rvv_conv3x3s1_winograd64_transform_kernel_packn_fp32(struct csi_tensor *o_kernel, + struct csi_tensor *t_kernel) +{ + int32_t outch = o_kernel->dim[0]; + int32_t inch = o_kernel->dim[1]; + + float *kernel_data = (float *)o_kernel->data; + // for kernel transform buf, 3x3 --> 8x8 + float *kernel_tm = (float *)csi_mem_alloc(outch * inch * 8 * 8 * sizeof(float)); + // kernel transform matrix: G + const float ktm[8][3] = {{1.0f, 0.0f, 0.0f}, + {-2.0f / 9, -2.0f / 9, -2.0f / 9}, + {-2.0f / 9, 2.0f / 9, -2.0f / 9}, + {1.0f / 90, 1.0f / 45, 2.0f / 45}, + {1.0f / 90, -1.0f / 45, 2.0f / 45}, + {1.0f / 45, 1.0f / 90, 1.0f / 180}, + {1.0f / 45, -1.0f / 90, 1.0f / 180}, + {0.0f, 0.0f, 1.0f}}; + + // const float ktm[8][3] = { + // {1.0f, 0.0f, 0.0f}, + // {-2.0f / 9, -2.0f / 9, -2.0f / 9}, + // {-2.0f / 9, 2.0f / 9, -2.0f / 9}, + // {1.0f / 90, 1.0f / 45, 2.0f / 45}, + // {1.0f / 90, -1.0f / 45, 2.0f / 45}, + // {32.0f / 45, 16.0f / 45, 8.0f / 45}, + // {32.0f / 45, -16.0f / 45, 8.0f / 45}, + // {0.0f, 0.0f, 1.0f} + // }; + + csi_tensor_copy(t_kernel, o_kernel); + + for (int p = 0; p < outch; p++) { + for (int q = 0; q < inch; q++) { + const float *kernel0 = kernel_data + p * inch * 9 + q * 9; + float *kernel_tmp = kernel_tm + p * inch * 64 + q * 64; + + // transform kernel + const float *k0 = kernel0; + const float *k1 = kernel0 + 3; + const float *k2 = kernel0 + 6; + + // h : first compute the transport matrix tmp = (g * GT)T + float tmp[8][3]; + for (int i = 0; i < 8; i++) { + tmp[i][0] = k0[0] * ktm[i][0] + k0[1] * ktm[i][1] + k0[2] * ktm[i][2]; + tmp[i][1] = k1[0] * ktm[i][0] + k1[1] * ktm[i][1] + k1[2] * ktm[i][2]; + tmp[i][2] = k2[0] * ktm[i][0] + k2[1] * ktm[i][1] + k2[2] * ktm[i][2]; + } + + // U + for (int j = 0; j < 8; j++) { + float *tmpp = &tmp[j][0]; + + for (int i = 0; i < 8; i++) { + kernel_tmp[j * 8 + i] = + tmpp[0] * ktm[i][0] + tmpp[1] * ktm[i][1] + tmpp[2] * ktm[i][2]; + } + } + } + } + // optimized layout for winograd64 + + const int packn = csrr_vlenb() / sizeof(float); + + float *kernel_tm_packn = (float *)csi_mem_alloc(outch * inch * 8 * 8 * sizeof(float)); + t_kernel->data = kernel_tm_packn; + + for (int oc = 0; oc < outch / packn; oc++) { + float *g0 = kernel_tm_packn + oc * 64 * inch * packn; + + for (int k = 0; k < 64; k++) { + float *g00 = g0 + k * inch * packn; + + for (int ic = 0; ic < inch / packn; ic++) { + for (int i = 0; i < packn; i++) { + for (int j = 0; j < packn; j++) { + const float *k00 = + kernel_tm + (oc * packn + j) * 64 * inch + (ic * packn + i) * 64; + *g00++ = k00[k]; + } + } + } + } + } + csi_mem_free(kernel_tm); +} + +/* + n = VLEN / 32 + constrain: output channel % n = 0 + input channel % n = 0 +*/ +int csi_nn_rvv_conv3x3s1_winograd64_packn_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + float *kernel_data = (float *)params->conv_extra.kernel_tm->data; + float *bias_data = (float *)bias->data; + + // param + int kernel_h = kernel->dim[2]; + int kernel_w = kernel->dim[3]; + int stride_h = params->stride_height; + int stride_w = params->stride_width; + int dilation_h = params->dilation_height; + int dilation_w = params->dilation_width; + int pad_left = params->pad_left; + int pad_top = params->pad_top; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + int kernel_size = in_c * kernel_h * kernel_w; + + int out_c = kernel->dim[0]; + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int output_size = out_c * out_h * out_w; + + // winograd param + int block_h = (out_h + 5) / 6; + int block_w = (out_w + 5) / 6; + + // block * 4 for alignment with 4,kernel = 3 * 3 ,stride = 1,thus input_size + 2 + int padded_in_h = block_h * 6 + 2; + int padded_in_w = block_w * 6 + 2; + int padded_in_hw = padded_in_h * padded_in_w; // element size after padding per channel + + /****************************** bias *****************************/ + bool flag_bias = 1; // default: conv2d layer include bias + if (bias_data == NULL) { + flag_bias = 0; + bias_data = (float *)csi_mem_alloc(out_c * sizeof(float)); + } + + const int packn = csrr_vlenb() / sizeof(float); + const int vl = vsetvl_e32m1(packn); + + for (int n = 0; n < batch; n++) { + // pad buffer: [in_c/8 h w 8] + float *input_padd_buf = (float *)csi_mem_alloc(in_c * padded_in_hw * sizeof(float)); + + // pad input + winograd_pad_input_pack1ton_fp32(input_data, input_padd_buf, in_c, in_h, in_w, padded_in_h, + padded_in_w, pad_top, pad_left); + input_data += input_size; + + // input transform buffer1: [in_ch/8, 64, blocks, 8] + float *input_tm1_buf = + (float *)csi_mem_alloc(in_c * block_h * block_w * 8 * 8 * sizeof(float)); + + /****************************** transform input *****************************/ + /* + BT = { + { 1 0 -5.25 0 5.25 0 -1 0 }; + { 0 1 1 -4.25 -4.25 1 1 0 }; + { 0 -1 1 4.25 -4.25 -1 1 0 }; + { 0 0.5 0.25 -2.5 -1.25 2 1 0 }; + { 0 -0.5 0.25 2.5 -1.25 -2 1 0 }; + { 0 2 4 -2.5 -5 0.5 1 0 }; + { 0 -2 4 2.5 -5 -0.5 1 0 }; + { 0 -1 0 5.25 0 -5.25 0 1 } + }; + */ + int tiles = block_h * block_w; + +#pragma omp parallel for num_threads(1) + for (int q = 0; q < in_c / packn; q++) { + float *img0 = input_padd_buf + q * padded_in_h * padded_in_w * + packn; // feature map after padding - q channel + float *img0_tm = + input_tm1_buf + q * 64 * tiles * packn; // transform and interleave - q channel + + float tmp[8][8][packn]; + + for (int i = 0; i < block_h; i++) { + for (int j = 0; j < block_w; j++) { + float *r0 = img0 + (i * padded_in_w * 6 + j * 6) * + packn; // feature map after padding 8*8 start addr + float *r0_tm = + img0_tm + (i * block_w + j) * packn; // input_tm1 8*8 block start addr + + for (int m = 0; m < 8; m++) { + vfloat32m1_t _r00 = vle32_v_f32m1(r0, vl); + vfloat32m1_t _r01 = vle32_v_f32m1(r0 + packn * 1, vl); + vfloat32m1_t _r02 = vle32_v_f32m1(r0 + packn * 2, vl); + vfloat32m1_t _r03 = vle32_v_f32m1(r0 + packn * 3, vl); + vfloat32m1_t _r04 = vle32_v_f32m1(r0 + packn * 4, vl); + vfloat32m1_t _r05 = vle32_v_f32m1(r0 + packn * 5, vl); + vfloat32m1_t _r06 = vle32_v_f32m1(r0 + packn * 6, vl); + vfloat32m1_t _r07 = vle32_v_f32m1(r0 + packn * 7, vl); + + vfloat32m1_t _tmp0m = vfmacc_vf_f32m1(vfsub_vv_f32m1(_r00, _r06, vl), 5.25f, + vfsub_vv_f32m1(_r04, _r02, vl), vl); + vfloat32m1_t _tmp7m = vfmacc_vf_f32m1(vfsub_vv_f32m1(_r07, _r01, vl), 5.25f, + vfsub_vv_f32m1(_r03, _r05, vl), vl); + + vfloat32m1_t _tmp12a = + vfmacc_vf_f32m1(vfadd_vv_f32m1(_r02, _r06, vl), -4.25f, _r04, vl); + vfloat32m1_t _tmp12b = + vfmacc_vf_f32m1(vfadd_vv_f32m1(_r01, _r05, vl), -4.25f, _r03, vl); + vfloat32m1_t _tmp1m = vfadd_vv_f32m1(_tmp12a, _tmp12b, vl); + vfloat32m1_t _tmp2m = vfsub_vv_f32m1(_tmp12a, _tmp12b, vl); + + vfloat32m1_t _tmp34a = vfmacc_vf_f32m1( + vfmacc_vf_f32m1(_r06, 0.25f, _r02, vl), -1.25f, _r04, vl); + vfloat32m1_t _tmp34b = vfmacc_vf_f32m1( + vfmacc_vf_f32m1(vfmul_vf_f32m1(_r01, 0.5f, vl), -2.5f, _r03, vl), 2.f, + _r05, vl); + vfloat32m1_t _tmp3m = vfadd_vv_f32m1(_tmp34a, _tmp34b, vl); + vfloat32m1_t _tmp4m = vfsub_vv_f32m1(_tmp34a, _tmp34b, vl); + + vfloat32m1_t _tmp56a = + vfmacc_vf_f32m1(_r06, 4.f, vfmacc_vf_f32m1(_r02, -1.25f, _r04, vl), vl); + vfloat32m1_t _tmp56b = vfmacc_vf_f32m1( + vfmacc_vf_f32m1(vfmul_vf_f32m1(_r01, 2.f, vl), -2.5f, _r03, vl), 0.5f, + _r05, vl); + vfloat32m1_t _tmp5m = vfadd_vv_f32m1(_tmp56a, _tmp56b, vl); + vfloat32m1_t _tmp6m = vfsub_vv_f32m1(_tmp56a, _tmp56b, vl); + + vse32_v_f32m1(tmp[0][m], _tmp0m, vl); + vse32_v_f32m1(tmp[7][m], _tmp7m, vl); + vse32_v_f32m1(tmp[1][m], _tmp1m, vl); + vse32_v_f32m1(tmp[2][m], _tmp2m, vl); + vse32_v_f32m1(tmp[3][m], _tmp3m, vl); + vse32_v_f32m1(tmp[4][m], _tmp4m, vl); + vse32_v_f32m1(tmp[5][m], _tmp5m, vl); + vse32_v_f32m1(tmp[6][m], _tmp6m, vl); + + r0 += padded_in_w * packn; + } + + for (int m = 0; m < 8; m++) { + float *r0_tm0 = r0_tm; + float *r0_tm1 = r0_tm0 + tiles * packn; + float *r0_tm2 = r0_tm1 + tiles * packn; + float *r0_tm3 = r0_tm2 + tiles * packn; + float *r0_tm4 = r0_tm3 + tiles * packn; + float *r0_tm5 = r0_tm4 + tiles * packn; + float *r0_tm6 = r0_tm5 + tiles * packn; + float *r0_tm7 = r0_tm6 + tiles * packn; + + vfloat32m1_t _tmp00 = vle32_v_f32m1(tmp[m][0], vl); + vfloat32m1_t _tmp01 = vle32_v_f32m1(tmp[m][1], vl); + vfloat32m1_t _tmp02 = vle32_v_f32m1(tmp[m][2], vl); + vfloat32m1_t _tmp03 = vle32_v_f32m1(tmp[m][3], vl); + vfloat32m1_t _tmp04 = vle32_v_f32m1(tmp[m][4], vl); + vfloat32m1_t _tmp05 = vle32_v_f32m1(tmp[m][5], vl); + vfloat32m1_t _tmp06 = vle32_v_f32m1(tmp[m][6], vl); + vfloat32m1_t _tmp07 = vle32_v_f32m1(tmp[m][7], vl); + + vfloat32m1_t _r0tm0 = + vfmacc_vf_f32m1(vfsub_vv_f32m1(_tmp00, _tmp06, vl), 5.25f, + vfsub_vv_f32m1(_tmp04, _tmp02, vl), vl); + vfloat32m1_t _r0tm7 = + vfmacc_vf_f32m1(vfsub_vv_f32m1(_tmp07, _tmp01, vl), 5.25f, + vfsub_vv_f32m1(_tmp03, _tmp05, vl), vl); + + vfloat32m1_t _tmp12a = + vfmacc_vf_f32m1(vfadd_vv_f32m1(_tmp02, _tmp06, vl), -4.25f, _tmp04, vl); + vfloat32m1_t _tmp12b = + vfmacc_vf_f32m1(vfadd_vv_f32m1(_tmp01, _tmp05, vl), -4.25f, _tmp03, vl); + vfloat32m1_t _r0tm1 = vfadd_vv_f32m1(_tmp12a, _tmp12b, vl); + vfloat32m1_t _r0tm2 = vfsub_vv_f32m1(_tmp12a, _tmp12b, vl); + + vfloat32m1_t _tmp34a = vfmacc_vf_f32m1( + vfmacc_vf_f32m1(_tmp06, 0.25f, _tmp02, vl), -1.25f, _tmp04, vl); + vfloat32m1_t _tmp34b = vfmacc_vf_f32m1( + vfmacc_vf_f32m1(vfmul_vf_f32m1(_tmp01, 0.5f, vl), -2.5f, _tmp03, vl), + 2.f, _tmp05, vl); + vfloat32m1_t _r0tm3 = vfadd_vv_f32m1(_tmp34a, _tmp34b, vl); + vfloat32m1_t _r0tm4 = vfsub_vv_f32m1(_tmp34a, _tmp34b, vl); + + vfloat32m1_t _tmp56a = vfmacc_vf_f32m1( + _tmp06, 4.f, vfmacc_vf_f32m1(_tmp02, -1.25f, _tmp04, vl), vl); + vfloat32m1_t _tmp56b = vfmacc_vf_f32m1( + vfmacc_vf_f32m1(vfmul_vf_f32m1(_tmp01, 2.f, vl), -2.5f, _tmp03, vl), + 0.5f, _tmp05, vl); + vfloat32m1_t _r0tm5 = vfadd_vv_f32m1(_tmp56a, _tmp56b, vl); + vfloat32m1_t _r0tm6 = vfsub_vv_f32m1(_tmp56a, _tmp56b, vl); + + vse32_v_f32m1(r0_tm0, _r0tm0, vl); + vse32_v_f32m1(r0_tm7, _r0tm7, vl); + vse32_v_f32m1(r0_tm1, _r0tm1, vl); + vse32_v_f32m1(r0_tm2, _r0tm2, vl); + vse32_v_f32m1(r0_tm3, _r0tm3, vl); + vse32_v_f32m1(r0_tm4, _r0tm4, vl); + vse32_v_f32m1(r0_tm5, _r0tm5, vl); + vse32_v_f32m1(r0_tm6, _r0tm6, vl); + + r0_tm += tiles * packn * 8; + } + } + } + } + csi_mem_free(input_padd_buf); + + /*********************************** dot ***************************************/ + // reorder input_tm1_buf + int size_input_tm2 = 0; + if (tiles >= 8) { + size_input_tm2 = + 64 * (tiles / 8 + (tiles % 8) / 4 + (tiles % 4) / 2 + tiles % 2) * in_c * 8; + } else if (tiles >= 4) { + size_input_tm2 = 64 * (tiles / 4 + (tiles % 4) / 2 + tiles % 2) * in_c * 4; + } else if (tiles >= 2) { + size_input_tm2 = 64 * (tiles / 2 + tiles % 2) * in_c * 2; + } else { + size_input_tm2 = 64 * tiles * in_c; + } + float *input_tm2_buf = (float *)csi_mem_alloc(size_input_tm2 * sizeof(float)); + +#pragma omp parallel for num_threads(1) + for (int r = 0; r < 64; r++) { + float *img_tm2 = input_tm2_buf + r * size_input_tm2 / 64; // input_tm2 r channel data + + int t = 0; + for (; t + 7 < tiles; t += 8) { + float *tm2 = img_tm2 + t * in_c; // img_tm2 row data + float *tm1 = input_tm1_buf; + + tm1 += (r * tiles + t) * packn; + for (int q = 0; q < in_c / packn; q++) { + vfloat32m1_t _tmp0 = vle32_v_f32m1(tm1, vl); + vfloat32m1_t _tmp1 = vle32_v_f32m1(tm1 + packn * 1, vl); + vfloat32m1_t _tmp2 = vle32_v_f32m1(tm1 + packn * 2, vl); + vfloat32m1_t _tmp3 = vle32_v_f32m1(tm1 + packn * 3, vl); + vfloat32m1_t _tmp4 = vle32_v_f32m1(tm1 + packn * 4, vl); + vfloat32m1_t _tmp5 = vle32_v_f32m1(tm1 + packn * 5, vl); + vfloat32m1_t _tmp6 = vle32_v_f32m1(tm1 + packn * 6, vl); + vfloat32m1_t _tmp7 = vle32_v_f32m1(tm1 + packn * 7, vl); + + vsseg8e32_v_f32m1(tm2, _tmp0, _tmp1, _tmp2, _tmp3, _tmp4, _tmp5, _tmp6, _tmp7, + vl); + tm1 += 64 * tiles * packn; + tm2 += 8 * packn; + } + } + for (; t + 3 < tiles; t += 4) { + float *tm2 = img_tm2 + (t / 8 + (t % 8) / 4) * in_c * 8; // img_tm2 row data + float *tm1 = input_tm1_buf; + + tm1 += (r * tiles + t) * packn; + for (int q = 0; q < in_c / packn; q++) { + vfloat32m1_t _tmp0 = vle32_v_f32m1(tm1, vl); + vfloat32m1_t _tmp1 = vle32_v_f32m1(tm1 + packn * 1, vl); + vfloat32m1_t _tmp2 = vle32_v_f32m1(tm1 + packn * 2, vl); + vfloat32m1_t _tmp3 = vle32_v_f32m1(tm1 + packn * 3, vl); + + vsseg4e32_v_f32m1(tm2, _tmp0, _tmp1, _tmp2, _tmp3, vl); + tm1 += 64 * tiles * packn; + tm2 += 4 * packn; + } + } + for (; t + 1 < tiles; t += 2) { + float *tm2 = + img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2) * in_c * 8; // img_tm2 row data + float *tm1 = input_tm1_buf; + + tm1 += (r * tiles + t) * packn; + for (int q = 0; q < in_c / packn; q++) { + vfloat32m1_t _tmp0 = vle32_v_f32m1(tm1, vl); + vfloat32m1_t _tmp1 = vle32_v_f32m1(tm1 + packn * 1, vl); + + vsseg2e32_v_f32m1(tm2, _tmp0, _tmp1, vl); + tm1 += 64 * tiles * packn; + tm2 += 2 * packn; + } + } + for (; t < tiles; t++) { + float *tm2 = img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2 + t % 2) * in_c * + 8; // img_tm2 row data + float *tm1 = input_tm1_buf; + + tm1 += (r * tiles + t) * packn; + for (int q = 0; q < in_c / packn; q++) { + vfloat32m1_t _tmp0 = vle32_v_f32m1(tm1, vl); + + vse32_v_f32m1(tm2, _tmp0, vl); + tm1 += 64 * tiles * packn; + tm2 += 1 * packn; + } + } + } + csi_mem_free(input_tm1_buf); + + // output_dot_buf: [out_c/packn, 64, blocks, packn] + float *output_dot_buf = + (float *)csi_mem_alloc(out_c * block_h * block_w * 8 * 8 * sizeof(float)); +#pragma omp parallel for num_threads(1) + for (int p = 0; p < out_c / packn; p++) { + float *output0_tm = output_dot_buf + p * 64 * tiles * packn; // 4 channel dot output + float *kernel0_tm = kernel_data + p * 64 * in_c * packn; // 4 channel kernel + + for (int r = 0; r < 64; r++) { + float *img_tm2 = input_tm2_buf + r * size_input_tm2 / 64; // img_tm2 第r个channel + + int t = 0; + for (; t + 7 < tiles; t += 8) { + float *r0 = img_tm2 + t * in_c; + float *k0 = kernel0_tm + r * in_c * packn; + + vfloat32m1_t _acc0 = vfmv_v_f_f32m1(0.0f, vl); + vfloat32m1_t _acc1 = vfmv_v_f_f32m1(0.0f, vl); + vfloat32m1_t _acc2 = vfmv_v_f_f32m1(0.0f, vl); + vfloat32m1_t _acc3 = vfmv_v_f_f32m1(0.0f, vl); + vfloat32m1_t _acc4 = vfmv_v_f_f32m1(0.0f, vl); + vfloat32m1_t _acc5 = vfmv_v_f_f32m1(0.0f, vl); + vfloat32m1_t _acc6 = vfmv_v_f_f32m1(0.0f, vl); + vfloat32m1_t _acc7 = vfmv_v_f_f32m1(0.0f, vl); + + for (int c = 0; c < in_c; c++) { + vfloat32m1_t _kernel = vle32_v_f32m1(k0, vl); + k0 += packn; + _acc0 = vfmacc_vf_f32m1(_acc0, r0[0], _kernel, vl); + _acc1 = vfmacc_vf_f32m1(_acc1, r0[1], _kernel, vl); + _acc2 = vfmacc_vf_f32m1(_acc2, r0[2], _kernel, vl); + _acc3 = vfmacc_vf_f32m1(_acc3, r0[3], _kernel, vl); + _acc4 = vfmacc_vf_f32m1(_acc4, r0[4], _kernel, vl); + _acc5 = vfmacc_vf_f32m1(_acc5, r0[5], _kernel, vl); + _acc6 = vfmacc_vf_f32m1(_acc6, r0[6], _kernel, vl); + _acc7 = vfmacc_vf_f32m1(_acc7, r0[7], _kernel, vl); + r0 += 8; + } + + vse32_v_f32m1(output0_tm, _acc0, vl); + vse32_v_f32m1(output0_tm + packn * 1, _acc1, vl); + vse32_v_f32m1(output0_tm + packn * 2, _acc2, vl); + vse32_v_f32m1(output0_tm + packn * 3, _acc3, vl); + vse32_v_f32m1(output0_tm + packn * 4, _acc4, vl); + vse32_v_f32m1(output0_tm + packn * 5, _acc5, vl); + vse32_v_f32m1(output0_tm + packn * 6, _acc6, vl); + vse32_v_f32m1(output0_tm + packn * 7, _acc7, vl); + output0_tm += packn * 8; + } + + for (; t + 3 < tiles; t += 4) { + float *r0 = img_tm2 + (t / 8 + (t % 8) / 4) * in_c * 8; + float *k0 = kernel0_tm + r * in_c * packn; + + vfloat32m1_t _acc0 = vfmv_v_f_f32m1(0.0f, vl); + vfloat32m1_t _acc1 = vfmv_v_f_f32m1(0.0f, vl); + vfloat32m1_t _acc2 = vfmv_v_f_f32m1(0.0f, vl); + vfloat32m1_t _acc3 = vfmv_v_f_f32m1(0.0f, vl); + + for (int c = 0; c < in_c; c++) { + vfloat32m1_t _kernel = vle32_v_f32m1(k0, vl); + k0 += packn; + _acc0 = vfmacc_vf_f32m1(_acc0, r0[0], _kernel, vl); + _acc1 = vfmacc_vf_f32m1(_acc1, r0[1], _kernel, vl); + _acc2 = vfmacc_vf_f32m1(_acc2, r0[2], _kernel, vl); + _acc3 = vfmacc_vf_f32m1(_acc3, r0[3], _kernel, vl); + r0 += 4; + } + + vse32_v_f32m1(output0_tm, _acc0, vl); + vse32_v_f32m1(output0_tm + packn * 1, _acc1, vl); + vse32_v_f32m1(output0_tm + packn * 2, _acc2, vl); + vse32_v_f32m1(output0_tm + packn * 3, _acc3, vl); + output0_tm += packn * 4; + } + for (; t + 1 < tiles; t += 2) { + float *r0 = img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2) * in_c * 8; + float *k0 = kernel0_tm + r * in_c * packn; + + vfloat32m1_t _acc0 = vfmv_v_f_f32m1(0.0f, vl); + vfloat32m1_t _acc1 = vfmv_v_f_f32m1(0.0f, vl); + + for (int c = 0; c < in_c; c++) { + vfloat32m1_t _kernel = vle32_v_f32m1(k0, vl); + k0 += packn; + _acc0 = vfmacc_vf_f32m1(_acc0, r0[0], _kernel, vl); + _acc1 = vfmacc_vf_f32m1(_acc1, r0[1], _kernel, vl); + r0 += 2; + } + + vse32_v_f32m1(output0_tm, _acc0, vl); + vse32_v_f32m1(output0_tm + packn * 1, _acc1, vl); + output0_tm += packn * 2; + } + for (; t < tiles; t++) { + float *r0 = img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2 + t % 2) * in_c * 8; + float *k0 = kernel0_tm + r * in_c * packn; + + vfloat32m1_t _acc0 = vfmv_v_f_f32m1(0.0f, vl); + + for (int c = 0; c < in_c; c++) { + vfloat32m1_t _kernel = vle32_v_f32m1(k0, vl); + k0 += packn; + _acc0 = vfmacc_vf_f32m1(_acc0, r0[0], _kernel, vl); + r0 += 1; + } + + vse32_v_f32m1(output0_tm, _acc0, vl); + output0_tm += packn * 1; + } + } + } + + csi_mem_free(input_tm2_buf); + + /*************************** transform output ****************************/ + // output_tm1_buf: [out_c/packn, out_h6, out_w6, packn] + float *output_tm1_buf = + (float *)csi_mem_alloc(out_c * block_h * block_w * 6 * 6 * sizeof(float)); + +/* +AT = { + { 1 1 1 1 1 1 1 0 }; + { 0 1 -1 2 -2 1/2 -1/2 0 }; + { 0 1 1 4 4 1/4 1/4 0 }; + { 0 1 -1 8 -8 1/8 -1/8 0 }; + { 0 1 1 16 16 1/16 1/16 0 }; + { 0 1 -1 32 -32 1/32 -1/32 1 } +}; +AT = { + { 1 1 1 1 1 32 32 0 }; + { 0 1 -1 2 -2 16 -16 0 }; + { 0 1 1 4 4 8 8 0 }; + { 0 1 -1 8 -8 4 -4 0 }; + { 0 1 1 16 16 2 2 0 }; + { 0 1 -1 32 -32 1 -1 1 } +}; +*/ +#pragma omp parallel for num_threads(1) + for (int p = 0; p < out_c / packn; p++) { + float *bias_tmp = bias_data + p * packn; + + float *out0_tm = output_dot_buf + + p * 64 * block_h * block_w * packn; // 输出转换前/dot后 第p个channel + float *out0 = + output_tm1_buf + p * 6 * block_h * 6 * block_w * packn; // 转换后输出 第p个channel + + float tmp[6][8][packn]; + + for (int i = 0; i < block_h; i++) { + for (int j = 0; j < block_w; j++) { + float *output0_tm_0 = out0_tm + (i * block_w + j) * packn; // 8*8 起始地址 + float *output0_tm_1 = output0_tm_0 + tiles * packn * 1; + float *output0_tm_2 = output0_tm_0 + tiles * packn * 2; + float *output0_tm_3 = output0_tm_0 + tiles * packn * 3; + float *output0_tm_4 = output0_tm_0 + tiles * packn * 4; + float *output0_tm_5 = output0_tm_0 + tiles * packn * 5; + float *output0_tm_6 = output0_tm_0 + tiles * packn * 6; + float *output0_tm_7 = output0_tm_0 + tiles * packn * 7; + + float *output0 = + out0 + (i * block_w * 6 * 6 + j * 6) * packn; // 输出 6*6 的起始地址 + + for (int m = 0; m < 8; m++) { + vfloat32m1_t _r00 = vle32_v_f32m1(output0_tm_0, vl); + vfloat32m1_t _r01 = vle32_v_f32m1(output0_tm_1, vl); + vfloat32m1_t _r02 = vle32_v_f32m1(output0_tm_2, vl); + vfloat32m1_t _r03 = vle32_v_f32m1(output0_tm_3, vl); + vfloat32m1_t _r04 = vle32_v_f32m1(output0_tm_4, vl); + vfloat32m1_t _r05 = vle32_v_f32m1(output0_tm_5, vl); + vfloat32m1_t _r06 = vle32_v_f32m1(output0_tm_6, vl); + vfloat32m1_t _r07 = vle32_v_f32m1(output0_tm_7, vl); + + vfloat32m1_t _tmp024a = vfadd_vv_f32m1(_r01, _r02, vl); + vfloat32m1_t _tmp135a = vfsub_vv_f32m1(_r01, _r02, vl); + + vfloat32m1_t _tmp024b = vfadd_vv_f32m1(_r03, _r04, vl); + vfloat32m1_t _tmp135b = vfsub_vv_f32m1(_r03, _r04, vl); + + vfloat32m1_t _tmp024c = vfadd_vv_f32m1(_r05, _r06, vl); + vfloat32m1_t _tmp135c = vfsub_vv_f32m1(_r05, _r06, vl); + + vfloat32m1_t _tmp0m = + vfadd_vv_f32m1(vfadd_vv_f32m1(_r00, _tmp024a, vl), + vfmacc_vf_f32m1(_tmp024b, 32.f, _tmp024c, vl), vl); + vfloat32m1_t _tmp2m = vfmacc_vf_f32m1( + vfmacc_vf_f32m1(_tmp024a, 4.f, _tmp024b, vl), 8.f, _tmp024c, vl); + vfloat32m1_t _tmp4m = vfmacc_vf_f32m1( + vfmacc_vf_f32m1(_tmp024a, 16.f, _tmp024b, vl), 2.f, _tmp024c, vl); + + vfloat32m1_t _tmp1m = vfmacc_vf_f32m1( + vfmacc_vf_f32m1(_tmp135a, 2.f, _tmp135b, vl), 16.f, _tmp135c, vl); + vfloat32m1_t _tmp3m = vfmacc_vf_f32m1( + vfmacc_vf_f32m1(_tmp135a, 8.f, _tmp135b, vl), 4.f, _tmp135c, vl); + vfloat32m1_t _tmp5m = + vfadd_vv_f32m1(vfadd_vv_f32m1(_r07, _tmp135a, vl), + vfmacc_vf_f32m1(_tmp135c, 32.f, _tmp135b, vl), vl); + + vse32_v_f32m1(tmp[0][m], _tmp0m, vl); + vse32_v_f32m1(tmp[2][m], _tmp2m, vl); + vse32_v_f32m1(tmp[4][m], _tmp4m, vl); + vse32_v_f32m1(tmp[1][m], _tmp1m, vl); + vse32_v_f32m1(tmp[3][m], _tmp3m, vl); + vse32_v_f32m1(tmp[5][m], _tmp5m, vl); + + output0_tm_0 += tiles * packn * 8; + output0_tm_1 += tiles * packn * 8; + output0_tm_2 += tiles * packn * 8; + output0_tm_3 += tiles * packn * 8; + output0_tm_4 += tiles * packn * 8; + output0_tm_5 += tiles * packn * 8; + output0_tm_6 += tiles * packn * 8; + output0_tm_7 += tiles * packn * 8; + } + + vfloat32m1_t _bias = vle32_v_f32m1(bias_tmp, vl); + for (int m = 0; m < 6; m++) { + vfloat32m1_t _tmp00 = vle32_v_f32m1(tmp[m][0], vl); + vfloat32m1_t _tmp01 = vle32_v_f32m1(tmp[m][1], vl); + vfloat32m1_t _tmp02 = vle32_v_f32m1(tmp[m][2], vl); + vfloat32m1_t _tmp03 = vle32_v_f32m1(tmp[m][3], vl); + vfloat32m1_t _tmp04 = vle32_v_f32m1(tmp[m][4], vl); + vfloat32m1_t _tmp05 = vle32_v_f32m1(tmp[m][5], vl); + vfloat32m1_t _tmp06 = vle32_v_f32m1(tmp[m][6], vl); + vfloat32m1_t _tmp07 = vle32_v_f32m1(tmp[m][7], vl); + + vfloat32m1_t _tmp024a = vfadd_vv_f32m1(_tmp01, _tmp02, vl); + vfloat32m1_t _tmp135a = vfsub_vv_f32m1(_tmp01, _tmp02, vl); + + vfloat32m1_t _tmp024b = vfadd_vv_f32m1(_tmp03, _tmp04, vl); + vfloat32m1_t _tmp135b = vfsub_vv_f32m1(_tmp03, _tmp04, vl); + + vfloat32m1_t _tmp024c = vfadd_vv_f32m1(_tmp05, _tmp06, vl); + vfloat32m1_t _tmp135c = vfsub_vv_f32m1(_tmp05, _tmp06, vl); + + vfloat32m1_t _output00 = vfadd_vv_f32m1( + _bias, + vfadd_vv_f32m1(vfadd_vv_f32m1(_tmp00, _tmp024a, vl), + vfmacc_vf_f32m1(_tmp024b, 32.f, _tmp024c, vl), vl), + vl); + vfloat32m1_t _output02 = vfadd_vv_f32m1( + _bias, + vfmacc_vf_f32m1(vfmacc_vf_f32m1(_tmp024a, 4.f, _tmp024b, vl), 8.f, + _tmp024c, vl), + vl); + vfloat32m1_t _output04 = vfadd_vv_f32m1( + _bias, + vfmacc_vf_f32m1(vfmacc_vf_f32m1(_tmp024a, 16.f, _tmp024b, vl), 2.f, + _tmp024c, vl), + vl); + + vfloat32m1_t _output01 = vfadd_vv_f32m1( + _bias, + vfmacc_vf_f32m1(vfmacc_vf_f32m1(_tmp135a, 2.f, _tmp135b, vl), 16.f, + _tmp135c, vl), + vl); + vfloat32m1_t _output03 = vfadd_vv_f32m1( + _bias, + vfmacc_vf_f32m1(vfmacc_vf_f32m1(_tmp135a, 8.f, _tmp135b, vl), 4.f, + _tmp135c, vl), + vl); + vfloat32m1_t _output05 = vfadd_vv_f32m1( + _bias, + vfadd_vv_f32m1(vfadd_vv_f32m1(_tmp07, _tmp135a, vl), + vfmacc_vf_f32m1(_tmp135c, 32.f, _tmp135b, vl), vl), + vl); + + vse32_v_f32m1(output0, _output00, vl); + vse32_v_f32m1(output0 + packn * 2, _output02, vl); + vse32_v_f32m1(output0 + packn * 4, _output04, vl); + vse32_v_f32m1(output0 + packn * 1, _output01, vl); + vse32_v_f32m1(output0 + packn * 3, _output03, vl); + vse32_v_f32m1(output0 + packn * 5, _output05, vl); + + output0 += block_w * 6 * packn; + } + } + } + } + + csi_mem_free(output_dot_buf); + + // crop the output after transform: cut extra part (right , bottom) + winograd_crop_output_packnto1_fp32(output_tm1_buf, output_data, out_c, out_h, out_w, + block_h * 6, block_w * 6); + output_data += output_size; + csi_mem_free(output_tm1_buf); + } + + if (!flag_bias) { + csi_mem_free(bias_data); + bias_data = NULL; + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/convolution_3x3_fp16.c b/source/thead_rvv/convolution_3x3_fp16.c new file mode 100644 index 00000000..a886ce0f --- /dev/null +++ b/source/thead_rvv/convolution_3x3_fp16.c @@ -0,0 +1,802 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128/256 ... +*************************************************************/ +static void winograd_pad_input_pack1ton_fp16(const __fp16 *input, __fp16 *input_padded, int inc, + int inh, int inw, int padded_h, int padded_w, + int pad_top, int pad_left) +{ + const int packn = csrr_vlenb() / sizeof(__fp16); + const int vl = vsetvl_e16m1(packn); + + int padded_hw = padded_h * padded_w; + const int in_size = inh * inw; // per-channel size + + __fp16 *pad_ptr = input_padded; + __fp16 *inp_ptr = (__fp16 *)input; + int pad_down = padded_h - pad_top - inh; // remain to pad on h (pad_down) + int pad_right = padded_w - pad_left - inw; // remain to pad on w (pad_right) + + vfloat16m1_t _zero = vfmv_v_f_f16m1(0.0f, vl); + + int c = 0; + for (; c + packn - 1 < inc; c += packn) { + inp_ptr = (__fp16 *)input + c * in_size; + // pad h_top + for (int i = 0; i < pad_top * padded_w; i++) { + vse16_v_f16m1(pad_ptr, _zero, vl); + pad_ptr += packn; + } + // pad h_mid + for (int i = 0; i < inh; i++) { + // pad w_left + for (int j = 0; j < pad_left; j++) { + vse16_v_f16m1(pad_ptr, _zero, vl); + pad_ptr += packn; + } + // pad w_mid + for (int j = 0; j < inw; j++) { + vfloat16m1_t _tmp = vlse16_v_f16m1(inp_ptr, in_size * sizeof(__fp16), vl); + inp_ptr++; + vse16_v_f16m1(pad_ptr, _tmp, vl); + pad_ptr += packn; + } + // pad w_end + for (int j = 0; j < pad_right; j++) { + vse16_v_f16m1(pad_ptr, _zero, vl); + pad_ptr += packn; + } + } + // pad h_bottom + for (int i = 0; i < pad_down * padded_w; i++) { + vse16_v_f16m1(pad_ptr, _zero, vl); + pad_ptr += packn; + } + } +} + +static void winograd_crop_output_packnto1_fp16(const __fp16 *output_trans, __fp16 *output, + int out_c, int out_h, int out_w, int wino_h, + int wino_w) +{ + const int packn = csrr_vlenb() / sizeof(__fp16); + const int vl = vsetvl_e16m1(packn); + + const int out_size = out_h * out_w; // per-channel size + const int crop_size = wino_h * wino_w; + + __fp16 *out_tm_ptr = (__fp16 *)output_trans; + __fp16 *out_ptr = output; + + int c = 0; + for (; c + packn - 1 < out_c; c += packn) { + out_tm_ptr = (__fp16 *)output_trans + c * crop_size; + out_ptr = output + c * out_size; + + for (int h = 0; h < out_h; h++) { + __fp16 *crop_ptr = out_tm_ptr + h * wino_w * packn; + for (int w = 0; w < out_w; w++) { + vfloat16m1_t _tmp = vle16_v_f16m1(crop_ptr, vl); + crop_ptr += packn; + vsse16_v_f16m1(out_ptr, out_size * sizeof(__fp16), _tmp, vl); + out_ptr++; + } + } + } +} + +/* + pack n = VLEN / 16 (128/16=8 or 256/16=16) + constrain: output channel % n = 0 + input channel % n = 0 + kernel before: [O I 3*3] + kernel after : [O/n 8*8 I n] +*/ +void csi_nn_rvv_conv3x3s1_winograd64_transform_kernel_packn_fp16(struct csi_tensor *o_kernel, + struct csi_tensor *t_kernel) +{ + int32_t outch = o_kernel->dim[0]; + int32_t inch = o_kernel->dim[1]; + + __fp16 *kernel_data = (__fp16 *)o_kernel->data; + // for kernel transform buf, 3x3 --> 8x8 + __fp16 *kernel_tm = (__fp16 *)csi_mem_alloc(outch * inch * 8 * 8 * sizeof(__fp16)); + // kernel transform matrix: G + const __fp16 ktm[8][3] = {{1.0f, 0.0f, 0.0f}, + {-2.0f / 9, -2.0f / 9, -2.0f / 9}, + {-2.0f / 9, 2.0f / 9, -2.0f / 9}, + {1.0f / 90, 1.0f / 45, 2.0f / 45}, + {1.0f / 90, -1.0f / 45, 2.0f / 45}, + {1.0f / 45, 1.0f / 90, 1.0f / 180}, + {1.0f / 45, -1.0f / 90, 1.0f / 180}, + {0.0f, 0.0f, 1.0f}}; + + // const __fp16 ktm[8][3] = { + // {1.0f, 0.0f, 0.0f}, + // {-2.0f / 9, -2.0f / 9, -2.0f / 9}, + // {-2.0f / 9, 2.0f / 9, -2.0f / 9}, + // {1.0f / 90, 1.0f / 45, 2.0f / 45}, + // {1.0f / 90, -1.0f / 45, 2.0f / 45}, + // {32.0f / 45, 16.0f / 45, 8.0f / 45}, + // {32.0f / 45, -16.0f / 45, 8.0f / 45}, + // {0.0f, 0.0f, 1.0f} + // }; + + csi_tensor_copy(t_kernel, o_kernel); + + for (int p = 0; p < outch; p++) { + for (int q = 0; q < inch; q++) { + const __fp16 *kernel0 = kernel_data + p * inch * 9 + q * 9; + __fp16 *kernel_tmp = kernel_tm + p * inch * 64 + q * 64; + + // transform kernel + const __fp16 *k0 = kernel0; + const __fp16 *k1 = kernel0 + 3; + const __fp16 *k2 = kernel0 + 6; + + // h : first compute the transport matrix tmp = (g * GT)T + __fp16 tmp[8][3]; + for (int i = 0; i < 8; i++) { + tmp[i][0] = k0[0] * ktm[i][0] + k0[1] * ktm[i][1] + k0[2] * ktm[i][2]; + tmp[i][1] = k1[0] * ktm[i][0] + k1[1] * ktm[i][1] + k1[2] * ktm[i][2]; + tmp[i][2] = k2[0] * ktm[i][0] + k2[1] * ktm[i][1] + k2[2] * ktm[i][2]; + } + + // U + for (int j = 0; j < 8; j++) { + __fp16 *tmpp = &tmp[j][0]; + + for (int i = 0; i < 8; i++) { + kernel_tmp[j * 8 + i] = + tmpp[0] * ktm[i][0] + tmpp[1] * ktm[i][1] + tmpp[2] * ktm[i][2]; + } + } + } + } + // optimized layout for winograd64 + const int packn = csrr_vlenb() / sizeof(__fp16); + + __fp16 *kernel_tm_packn = (__fp16 *)csi_mem_alloc(outch * inch * 8 * 8 * sizeof(__fp16)); + t_kernel->data = kernel_tm_packn; + + for (int oc = 0; oc < outch / packn; oc++) { + __fp16 *g0 = kernel_tm_packn + oc * 64 * inch * packn; + + for (int k = 0; k < 64; k++) { + __fp16 *g00 = g0 + k * inch * packn; + + for (int ic = 0; ic < inch / packn; ic++) { + for (int i = 0; i < packn; i++) { + for (int j = 0; j < packn; j++) { + const __fp16 *k00 = + kernel_tm + (oc * packn + j) * 64 * inch + (ic * packn + i) * 64; + *g00++ = k00[k]; + } + } + } + } + } + csi_mem_free(kernel_tm); +} + +/* + n = VLEN / 16 + constrain: output channel % n = 0 + input channel % n = 0 +*/ +int csi_nn_rvv_conv3x3s1_winograd64_packn_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + __fp16 *kernel_data = (__fp16 *)params->conv_extra.kernel_tm->data; + __fp16 *bias_data = (__fp16 *)bias->data; + + // param + int kernel_h = kernel->dim[2]; + int kernel_w = kernel->dim[3]; + int stride_h = params->stride_height; + int stride_w = params->stride_width; + int dilation_h = params->dilation_height; + int dilation_w = params->dilation_width; + int pad_left = params->pad_left; + int pad_top = params->pad_top; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + int kernel_size = in_c * kernel_h * kernel_w; + + int out_c = kernel->dim[0]; + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int output_size = out_c * out_h * out_w; + + // winograd param + int block_h = (out_h + 5) / 6; + int block_w = (out_w + 5) / 6; + + int padded_in_h = + block_h * 6 + + 2; // block * 4 for alignment with 4,kernel = 3 * 3 ,stride = 1,thus input_size + 2 + int padded_in_w = block_w * 6 + 2; + int padded_in_hw = padded_in_h * padded_in_w; // element size after padding per channel + + /****************************** bias *****************************/ + bool flag_bias = 1; // default: conv2d layer include bias + if (bias_data == NULL) { + flag_bias = 0; + bias_data = (__fp16 *)csi_mem_alloc(out_c * sizeof(__fp16)); + } + + const int packn = csrr_vlenb() / sizeof(__fp16); + const int vl = vsetvl_e16m1(packn); + + for (int n = 0; n < batch; n++) { + // pad buffer: [in_c/8 h w 8] + __fp16 *input_padd_buf = (__fp16 *)csi_mem_alloc(in_c * padded_in_hw * sizeof(__fp16)); + + // pad input + winograd_pad_input_pack1ton_fp16(input_data, input_padd_buf, in_c, in_h, in_w, padded_in_h, + padded_in_w, pad_top, pad_left); + input_data += input_size; + + // input transform buffer1: [in_ch/8, 64, blocks, 8] + __fp16 *input_tm1_buf = + (__fp16 *)csi_mem_alloc(in_c * block_h * block_w * 8 * 8 * sizeof(__fp16)); + + /****************************** transform input *****************************/ + /* + BT = { + { 1 0 -5.25 0 5.25 0 -1 0 }; + { 0 1 1 -4.25 -4.25 1 1 0 }; + { 0 -1 1 4.25 -4.25 -1 1 0 }; + { 0 0.5 0.25 -2.5 -1.25 2 1 0 }; + { 0 -0.5 0.25 2.5 -1.25 -2 1 0 }; + { 0 2 4 -2.5 -5 0.5 1 0 }; + { 0 -2 4 2.5 -5 -0.5 1 0 }; + { 0 -1 0 5.25 0 -5.25 0 1 } + }; + */ + + int tiles = block_h * block_w; + +#pragma omp parallel for num_threads(1) + for (int q = 0; q < in_c / packn; q++) { + __fp16 *img0 = input_padd_buf + q * padded_in_h * padded_in_w * + packn; // feature map after padding - q channel + __fp16 *img0_tm = + input_tm1_buf + q * 64 * tiles * packn; // transform and interleave - q channel + + __fp16 tmp[8][8][packn]; + + for (int i = 0; i < block_h; i++) { + for (int j = 0; j < block_w; j++) { + __fp16 *r0 = img0 + (i * padded_in_w * 6 + j * 6) * + packn; // feature map after padding 8*8 start addr + __fp16 *r0_tm = + img0_tm + (i * block_w + j) * packn; // input_tm1 8*8 block start addr + + for (int m = 0; m < 8; m++) { + vfloat16m1_t _r00 = vle16_v_f16m1(r0, vl); + vfloat16m1_t _r01 = vle16_v_f16m1(r0 + packn * 1, vl); + vfloat16m1_t _r02 = vle16_v_f16m1(r0 + packn * 2, vl); + vfloat16m1_t _r03 = vle16_v_f16m1(r0 + packn * 3, vl); + vfloat16m1_t _r04 = vle16_v_f16m1(r0 + packn * 4, vl); + vfloat16m1_t _r05 = vle16_v_f16m1(r0 + packn * 5, vl); + vfloat16m1_t _r06 = vle16_v_f16m1(r0 + packn * 6, vl); + vfloat16m1_t _r07 = vle16_v_f16m1(r0 + packn * 7, vl); + + vfloat16m1_t _tmp0m = vfmacc_vf_f16m1(vfsub_vv_f16m1(_r00, _r06, vl), 5.25f, + vfsub_vv_f16m1(_r04, _r02, vl), vl); + vfloat16m1_t _tmp7m = vfmacc_vf_f16m1(vfsub_vv_f16m1(_r07, _r01, vl), 5.25f, + vfsub_vv_f16m1(_r03, _r05, vl), vl); + + vfloat16m1_t _tmp12a = + vfmacc_vf_f16m1(vfadd_vv_f16m1(_r02, _r06, vl), -4.25f, _r04, vl); + vfloat16m1_t _tmp12b = + vfmacc_vf_f16m1(vfadd_vv_f16m1(_r01, _r05, vl), -4.25f, _r03, vl); + vfloat16m1_t _tmp1m = vfadd_vv_f16m1(_tmp12a, _tmp12b, vl); + vfloat16m1_t _tmp2m = vfsub_vv_f16m1(_tmp12a, _tmp12b, vl); + + vfloat16m1_t _tmp34a = vfmacc_vf_f16m1( + vfmacc_vf_f16m1(_r06, 0.25f, _r02, vl), -1.25f, _r04, vl); + vfloat16m1_t _tmp34b = vfmacc_vf_f16m1( + vfmacc_vf_f16m1(vfmul_vf_f16m1(_r01, 0.5f, vl), -2.5f, _r03, vl), 2.f, + _r05, vl); + vfloat16m1_t _tmp3m = vfadd_vv_f16m1(_tmp34a, _tmp34b, vl); + vfloat16m1_t _tmp4m = vfsub_vv_f16m1(_tmp34a, _tmp34b, vl); + + vfloat16m1_t _tmp56a = + vfmacc_vf_f16m1(_r06, 4.f, vfmacc_vf_f16m1(_r02, -1.25f, _r04, vl), vl); + vfloat16m1_t _tmp56b = vfmacc_vf_f16m1( + vfmacc_vf_f16m1(vfmul_vf_f16m1(_r01, 2.f, vl), -2.5f, _r03, vl), 0.5f, + _r05, vl); + vfloat16m1_t _tmp5m = vfadd_vv_f16m1(_tmp56a, _tmp56b, vl); + vfloat16m1_t _tmp6m = vfsub_vv_f16m1(_tmp56a, _tmp56b, vl); + + vse16_v_f16m1(tmp[0][m], _tmp0m, vl); + vse16_v_f16m1(tmp[7][m], _tmp7m, vl); + vse16_v_f16m1(tmp[1][m], _tmp1m, vl); + vse16_v_f16m1(tmp[2][m], _tmp2m, vl); + vse16_v_f16m1(tmp[3][m], _tmp3m, vl); + vse16_v_f16m1(tmp[4][m], _tmp4m, vl); + vse16_v_f16m1(tmp[5][m], _tmp5m, vl); + vse16_v_f16m1(tmp[6][m], _tmp6m, vl); + + r0 += padded_in_w * packn; + } + + for (int m = 0; m < 8; m++) { + __fp16 *r0_tm0 = r0_tm; + __fp16 *r0_tm1 = r0_tm0 + tiles * packn; + __fp16 *r0_tm2 = r0_tm1 + tiles * packn; + __fp16 *r0_tm3 = r0_tm2 + tiles * packn; + __fp16 *r0_tm4 = r0_tm3 + tiles * packn; + __fp16 *r0_tm5 = r0_tm4 + tiles * packn; + __fp16 *r0_tm6 = r0_tm5 + tiles * packn; + __fp16 *r0_tm7 = r0_tm6 + tiles * packn; + + vfloat16m1_t _tmp00 = vle16_v_f16m1(tmp[m][0], vl); + vfloat16m1_t _tmp01 = vle16_v_f16m1(tmp[m][1], vl); + vfloat16m1_t _tmp02 = vle16_v_f16m1(tmp[m][2], vl); + vfloat16m1_t _tmp03 = vle16_v_f16m1(tmp[m][3], vl); + vfloat16m1_t _tmp04 = vle16_v_f16m1(tmp[m][4], vl); + vfloat16m1_t _tmp05 = vle16_v_f16m1(tmp[m][5], vl); + vfloat16m1_t _tmp06 = vle16_v_f16m1(tmp[m][6], vl); + vfloat16m1_t _tmp07 = vle16_v_f16m1(tmp[m][7], vl); + + vfloat16m1_t _r0tm0 = + vfmacc_vf_f16m1(vfsub_vv_f16m1(_tmp00, _tmp06, vl), 5.25f, + vfsub_vv_f16m1(_tmp04, _tmp02, vl), vl); + vfloat16m1_t _r0tm7 = + vfmacc_vf_f16m1(vfsub_vv_f16m1(_tmp07, _tmp01, vl), 5.25f, + vfsub_vv_f16m1(_tmp03, _tmp05, vl), vl); + + vfloat16m1_t _tmp12a = + vfmacc_vf_f16m1(vfadd_vv_f16m1(_tmp02, _tmp06, vl), -4.25f, _tmp04, vl); + vfloat16m1_t _tmp12b = + vfmacc_vf_f16m1(vfadd_vv_f16m1(_tmp01, _tmp05, vl), -4.25f, _tmp03, vl); + vfloat16m1_t _r0tm1 = vfadd_vv_f16m1(_tmp12a, _tmp12b, vl); + vfloat16m1_t _r0tm2 = vfsub_vv_f16m1(_tmp12a, _tmp12b, vl); + + vfloat16m1_t _tmp34a = vfmacc_vf_f16m1( + vfmacc_vf_f16m1(_tmp06, 0.25f, _tmp02, vl), -1.25f, _tmp04, vl); + vfloat16m1_t _tmp34b = vfmacc_vf_f16m1( + vfmacc_vf_f16m1(vfmul_vf_f16m1(_tmp01, 0.5f, vl), -2.5f, _tmp03, vl), + 2.f, _tmp05, vl); + vfloat16m1_t _r0tm3 = vfadd_vv_f16m1(_tmp34a, _tmp34b, vl); + vfloat16m1_t _r0tm4 = vfsub_vv_f16m1(_tmp34a, _tmp34b, vl); + + vfloat16m1_t _tmp56a = vfmacc_vf_f16m1( + _tmp06, 4.f, vfmacc_vf_f16m1(_tmp02, -1.25f, _tmp04, vl), vl); + vfloat16m1_t _tmp56b = vfmacc_vf_f16m1( + vfmacc_vf_f16m1(vfmul_vf_f16m1(_tmp01, 2.f, vl), -2.5f, _tmp03, vl), + 0.5f, _tmp05, vl); + vfloat16m1_t _r0tm5 = vfadd_vv_f16m1(_tmp56a, _tmp56b, vl); + vfloat16m1_t _r0tm6 = vfsub_vv_f16m1(_tmp56a, _tmp56b, vl); + + vse16_v_f16m1(r0_tm0, _r0tm0, vl); + vse16_v_f16m1(r0_tm7, _r0tm7, vl); + vse16_v_f16m1(r0_tm1, _r0tm1, vl); + vse16_v_f16m1(r0_tm2, _r0tm2, vl); + vse16_v_f16m1(r0_tm3, _r0tm3, vl); + vse16_v_f16m1(r0_tm4, _r0tm4, vl); + vse16_v_f16m1(r0_tm5, _r0tm5, vl); + vse16_v_f16m1(r0_tm6, _r0tm6, vl); + + r0_tm += tiles * packn * 8; + } + } + } + } + csi_mem_free(input_padd_buf); + + /*********************************** dot ***************************************/ + // reorder input_tm1_buf + int size_input_tm2 = 0; + if (tiles >= 8) { + size_input_tm2 = + 64 * (tiles / 8 + (tiles % 8) / 4 + (tiles % 4) / 2 + tiles % 2) * in_c * 8; + } else if (tiles >= 4) { + size_input_tm2 = 64 * (tiles / 4 + (tiles % 4) / 2 + tiles % 2) * in_c * 4; + } else if (tiles >= 2) { + size_input_tm2 = 64 * (tiles / 2 + tiles % 2) * in_c * 2; + } else { + size_input_tm2 = 64 * tiles * in_c; + } + __fp16 *input_tm2_buf = (__fp16 *)csi_mem_alloc(size_input_tm2 * sizeof(__fp16)); + +#pragma omp parallel for num_threads(1) + for (int r = 0; r < 64; r++) { + __fp16 *img_tm2 = input_tm2_buf + r * size_input_tm2 / 64; // input_tm2 r channel data + + int t = 0; + for (; t + 7 < tiles; t += 8) { + __fp16 *tm2 = img_tm2 + t * in_c; // img_tm2 row data + __fp16 *tm1 = input_tm1_buf; + + tm1 += (r * tiles + t) * packn; + for (int q = 0; q < in_c / packn; q++) { + vfloat16m1_t _tmp0 = vle16_v_f16m1(tm1, vl); + vfloat16m1_t _tmp1 = vle16_v_f16m1(tm1 + packn * 1, vl); + vfloat16m1_t _tmp2 = vle16_v_f16m1(tm1 + packn * 2, vl); + vfloat16m1_t _tmp3 = vle16_v_f16m1(tm1 + packn * 3, vl); + vfloat16m1_t _tmp4 = vle16_v_f16m1(tm1 + packn * 4, vl); + vfloat16m1_t _tmp5 = vle16_v_f16m1(tm1 + packn * 5, vl); + vfloat16m1_t _tmp6 = vle16_v_f16m1(tm1 + packn * 6, vl); + vfloat16m1_t _tmp7 = vle16_v_f16m1(tm1 + packn * 7, vl); + + vsseg8e16_v_f16m1(tm2, _tmp0, _tmp1, _tmp2, _tmp3, _tmp4, _tmp5, _tmp6, _tmp7, + vl); + tm1 += 64 * tiles * packn; + tm2 += 8 * packn; + } + } + for (; t + 3 < tiles; t += 4) { + __fp16 *tm2 = img_tm2 + (t / 8 + (t % 8) / 4) * in_c * 8; // img_tm2 row data + __fp16 *tm1 = input_tm1_buf; + + tm1 += (r * tiles + t) * packn; + for (int q = 0; q < in_c / packn; q++) { + vfloat16m1_t _tmp0 = vle16_v_f16m1(tm1, vl); + vfloat16m1_t _tmp1 = vle16_v_f16m1(tm1 + packn * 1, vl); + vfloat16m1_t _tmp2 = vle16_v_f16m1(tm1 + packn * 2, vl); + vfloat16m1_t _tmp3 = vle16_v_f16m1(tm1 + packn * 3, vl); + + vsseg4e16_v_f16m1(tm2, _tmp0, _tmp1, _tmp2, _tmp3, vl); + tm1 += 64 * tiles * packn; + tm2 += 4 * packn; + } + } + for (; t + 1 < tiles; t += 2) { + __fp16 *tm2 = + img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2) * in_c * 8; // img_tm2 row data + __fp16 *tm1 = input_tm1_buf; + + tm1 += (r * tiles + t) * packn; + for (int q = 0; q < in_c / packn; q++) { + vfloat16m1_t _tmp0 = vle16_v_f16m1(tm1, vl); + vfloat16m1_t _tmp1 = vle16_v_f16m1(tm1 + packn * 1, vl); + + vsseg2e16_v_f16m1(tm2, _tmp0, _tmp1, vl); + tm1 += 64 * tiles * packn; + tm2 += 2 * packn; + } + } + for (; t < tiles; t++) { + __fp16 *tm2 = img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2 + t % 2) * in_c * + 8; // img_tm2 row data + __fp16 *tm1 = input_tm1_buf; + + tm1 += (r * tiles + t) * packn; + for (int q = 0; q < in_c / packn; q++) { + vfloat16m1_t _tmp0 = vle16_v_f16m1(tm1, vl); + + vse16_v_f16m1(tm2, _tmp0, vl); + tm1 += 64 * tiles * packn; + tm2 += 1 * packn; + } + } + } + + csi_mem_free(input_tm1_buf); + + // output_dot_buf: [out_c/8, 64, blocks, 8] + __fp16 *output_dot_buf = + (__fp16 *)csi_mem_alloc(out_c * block_h * block_w * 8 * 8 * sizeof(__fp16)); + +#pragma omp parallel for num_threads(1) + for (int p = 0; p < out_c / packn; p++) { + __fp16 *output0_tm = output_dot_buf + p * 64 * tiles * packn; + __fp16 *kernel0_tm = kernel_data + p * 64 * in_c * packn; + + for (int r = 0; r < 64; r++) { + __fp16 *img_tm2 = input_tm2_buf + r * size_input_tm2 / 64; // img_tm2 第r个channel + + int t = 0; + for (; t + 7 < tiles; t += 8) { + __fp16 *r0 = img_tm2 + t * in_c; + __fp16 *k0 = kernel0_tm + r * in_c * packn; + + vfloat16m1_t _acc0 = vfmv_v_f_f16m1(0.0f, vl); + vfloat16m1_t _acc1 = vfmv_v_f_f16m1(0.0f, vl); + vfloat16m1_t _acc2 = vfmv_v_f_f16m1(0.0f, vl); + vfloat16m1_t _acc3 = vfmv_v_f_f16m1(0.0f, vl); + vfloat16m1_t _acc4 = vfmv_v_f_f16m1(0.0f, vl); + vfloat16m1_t _acc5 = vfmv_v_f_f16m1(0.0f, vl); + vfloat16m1_t _acc6 = vfmv_v_f_f16m1(0.0f, vl); + vfloat16m1_t _acc7 = vfmv_v_f_f16m1(0.0f, vl); + + for (int c = 0; c < in_c; c++) { + vfloat16m1_t _kernel = vle16_v_f16m1(k0, vl); + k0 += packn; + _acc0 = vfmacc_vf_f16m1(_acc0, r0[0], _kernel, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, r0[1], _kernel, vl); + _acc2 = vfmacc_vf_f16m1(_acc2, r0[2], _kernel, vl); + _acc3 = vfmacc_vf_f16m1(_acc3, r0[3], _kernel, vl); + _acc4 = vfmacc_vf_f16m1(_acc4, r0[4], _kernel, vl); + _acc5 = vfmacc_vf_f16m1(_acc5, r0[5], _kernel, vl); + _acc6 = vfmacc_vf_f16m1(_acc6, r0[6], _kernel, vl); + _acc7 = vfmacc_vf_f16m1(_acc7, r0[7], _kernel, vl); + r0 += 8; + } + + vse16_v_f16m1(output0_tm, _acc0, vl); + vse16_v_f16m1(output0_tm + packn * 1, _acc1, vl); + vse16_v_f16m1(output0_tm + packn * 2, _acc2, vl); + vse16_v_f16m1(output0_tm + packn * 3, _acc3, vl); + vse16_v_f16m1(output0_tm + packn * 4, _acc4, vl); + vse16_v_f16m1(output0_tm + packn * 5, _acc5, vl); + vse16_v_f16m1(output0_tm + packn * 6, _acc6, vl); + vse16_v_f16m1(output0_tm + packn * 7, _acc7, vl); + output0_tm += packn * 8; + } + for (; t + 3 < tiles; t += 4) { + __fp16 *r0 = img_tm2 + (t / 8 + (t % 8) / 4) * in_c * 8; + __fp16 *k0 = kernel0_tm + r * in_c * packn; + + vfloat16m1_t _acc0 = vfmv_v_f_f16m1(0.0f, vl); + vfloat16m1_t _acc1 = vfmv_v_f_f16m1(0.0f, vl); + vfloat16m1_t _acc2 = vfmv_v_f_f16m1(0.0f, vl); + vfloat16m1_t _acc3 = vfmv_v_f_f16m1(0.0f, vl); + + for (int c = 0; c < in_c; c++) { + vfloat16m1_t _kernel = vle16_v_f16m1(k0, vl); + k0 += packn; + _acc0 = vfmacc_vf_f16m1(_acc0, r0[0], _kernel, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, r0[1], _kernel, vl); + _acc2 = vfmacc_vf_f16m1(_acc2, r0[2], _kernel, vl); + _acc3 = vfmacc_vf_f16m1(_acc3, r0[3], _kernel, vl); + r0 += 4; + } + + vse16_v_f16m1(output0_tm, _acc0, vl); + vse16_v_f16m1(output0_tm + packn * 1, _acc1, vl); + vse16_v_f16m1(output0_tm + packn * 2, _acc2, vl); + vse16_v_f16m1(output0_tm + packn * 3, _acc3, vl); + output0_tm += packn * 4; + } + for (; t + 1 < tiles; t += 2) { + __fp16 *r0 = img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2) * in_c * 8; + __fp16 *k0 = kernel0_tm + r * in_c * packn; + + vfloat16m1_t _acc0 = vfmv_v_f_f16m1(0.0f, vl); + vfloat16m1_t _acc1 = vfmv_v_f_f16m1(0.0f, vl); + + for (int c = 0; c < in_c; c++) { + vfloat16m1_t _kernel = vle16_v_f16m1(k0, vl); + k0 += packn; + _acc0 = vfmacc_vf_f16m1(_acc0, r0[0], _kernel, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, r0[1], _kernel, vl); + r0 += 2; + } + + vse16_v_f16m1(output0_tm, _acc0, vl); + vse16_v_f16m1(output0_tm + packn * 1, _acc1, vl); + output0_tm += packn * 2; + } + for (; t < tiles; t++) { + __fp16 *r0 = img_tm2 + (t / 8 + (t % 8) / 4 + (t % 4) / 2 + t % 2) * in_c * 8; + __fp16 *k0 = kernel0_tm + r * in_c * packn; + + vfloat16m1_t _acc0 = vfmv_v_f_f16m1(0.0f, vl); + + for (int c = 0; c < in_c; c++) { + vfloat16m1_t _kernel = vle16_v_f16m1(k0, vl); + k0 += packn; + _acc0 = vfmacc_vf_f16m1(_acc0, r0[0], _kernel, vl); + r0 += 1; + } + + vse16_v_f16m1(output0_tm, _acc0, vl); + output0_tm += packn * 1; + } + } + } + + csi_mem_free(input_tm2_buf); + /*************************** transform output ****************************/ + // output_tm1_buf: [out_c/8, out_h6, out_w6, 8] + __fp16 *output_tm1_buf = + (__fp16 *)csi_mem_alloc(out_c * block_h * block_w * 6 * 6 * sizeof(__fp16)); + + /* + AT = { + { 1 1 1 1 1 1 1 0 }; + { 0 1 -1 2 -2 1/2 -1/2 0 }; + { 0 1 1 4 4 1/4 1/4 0 }; + { 0 1 -1 8 -8 1/8 -1/8 0 }; + { 0 1 1 16 16 1/16 1/16 0 }; + { 0 1 -1 32 -32 1/32 -1/32 1 } + }; + AT = { + { 1 1 1 1 1 32 32 0 }; + { 0 1 -1 2 -2 16 -16 0 }; + { 0 1 1 4 4 8 8 0 }; + { 0 1 -1 8 -8 4 -4 0 }; + { 0 1 1 16 16 2 2 0 }; + { 0 1 -1 32 -32 1 -1 1 } + }; + */ + +#pragma omp parallel for num_threads(1) + for (int p = 0; p < out_c / packn; p++) { + __fp16 *bias_tmp = bias_data + p * packn; + + __fp16 *out0_tm = output_dot_buf + + p * 64 * block_h * block_w * packn; // 输出转换前/dot后 第p个channel + __fp16 *out0 = + output_tm1_buf + p * 6 * block_h * 6 * block_w * packn; // 转换后输出 第p个channel + + __fp16 tmp[6][8][packn]; + + for (int i = 0; i < block_h; i++) { + for (int j = 0; j < block_w; j++) { + __fp16 *output0_tm_0 = out0_tm + (i * block_w + j) * packn; // 8*8 起始地址 + __fp16 *output0_tm_1 = output0_tm_0 + tiles * packn * 1; + __fp16 *output0_tm_2 = output0_tm_0 + tiles * packn * 2; + __fp16 *output0_tm_3 = output0_tm_0 + tiles * packn * 3; + __fp16 *output0_tm_4 = output0_tm_0 + tiles * packn * 4; + __fp16 *output0_tm_5 = output0_tm_0 + tiles * packn * 5; + __fp16 *output0_tm_6 = output0_tm_0 + tiles * packn * 6; + __fp16 *output0_tm_7 = output0_tm_0 + tiles * packn * 7; + + __fp16 *output0 = + out0 + (i * block_w * 6 * 6 + j * 6) * packn; // 输出 6*6 的起始地址 + + for (int m = 0; m < 8; m++) { + vfloat16m1_t _r00 = vle16_v_f16m1(output0_tm_0, vl); + vfloat16m1_t _r01 = vle16_v_f16m1(output0_tm_1, vl); + vfloat16m1_t _r02 = vle16_v_f16m1(output0_tm_2, vl); + vfloat16m1_t _r03 = vle16_v_f16m1(output0_tm_3, vl); + vfloat16m1_t _r04 = vle16_v_f16m1(output0_tm_4, vl); + vfloat16m1_t _r05 = vle16_v_f16m1(output0_tm_5, vl); + vfloat16m1_t _r06 = vle16_v_f16m1(output0_tm_6, vl); + vfloat16m1_t _r07 = vle16_v_f16m1(output0_tm_7, vl); + + vfloat16m1_t _tmp024a = vfadd_vv_f16m1(_r01, _r02, vl); + vfloat16m1_t _tmp135a = vfsub_vv_f16m1(_r01, _r02, vl); + + vfloat16m1_t _tmp024b = vfadd_vv_f16m1(_r03, _r04, vl); + vfloat16m1_t _tmp135b = vfsub_vv_f16m1(_r03, _r04, vl); + + vfloat16m1_t _tmp024c = vfadd_vv_f16m1(_r05, _r06, vl); + vfloat16m1_t _tmp135c = vfsub_vv_f16m1(_r05, _r06, vl); + + vfloat16m1_t _tmp0m = + vfadd_vv_f16m1(vfadd_vv_f16m1(_r00, _tmp024a, vl), + vfmacc_vf_f16m1(_tmp024b, 32.f, _tmp024c, vl), vl); + vfloat16m1_t _tmp2m = vfmacc_vf_f16m1( + vfmacc_vf_f16m1(_tmp024a, 4.f, _tmp024b, vl), 8.f, _tmp024c, vl); + vfloat16m1_t _tmp4m = vfmacc_vf_f16m1( + vfmacc_vf_f16m1(_tmp024a, 16.f, _tmp024b, vl), 2.f, _tmp024c, vl); + + vfloat16m1_t _tmp1m = vfmacc_vf_f16m1( + vfmacc_vf_f16m1(_tmp135a, 2.f, _tmp135b, vl), 16.f, _tmp135c, vl); + vfloat16m1_t _tmp3m = vfmacc_vf_f16m1( + vfmacc_vf_f16m1(_tmp135a, 8.f, _tmp135b, vl), 4.f, _tmp135c, vl); + vfloat16m1_t _tmp5m = + vfadd_vv_f16m1(vfadd_vv_f16m1(_r07, _tmp135a, vl), + vfmacc_vf_f16m1(_tmp135c, 32.f, _tmp135b, vl), vl); + + vse16_v_f16m1(tmp[0][m], _tmp0m, vl); + vse16_v_f16m1(tmp[2][m], _tmp2m, vl); + vse16_v_f16m1(tmp[4][m], _tmp4m, vl); + vse16_v_f16m1(tmp[1][m], _tmp1m, vl); + vse16_v_f16m1(tmp[3][m], _tmp3m, vl); + vse16_v_f16m1(tmp[5][m], _tmp5m, vl); + + output0_tm_0 += tiles * packn * 8; + output0_tm_1 += tiles * packn * 8; + output0_tm_2 += tiles * packn * 8; + output0_tm_3 += tiles * packn * 8; + output0_tm_4 += tiles * packn * 8; + output0_tm_5 += tiles * packn * 8; + output0_tm_6 += tiles * packn * 8; + output0_tm_7 += tiles * packn * 8; + } + + vfloat16m1_t _bias = vle16_v_f16m1(bias_tmp, vl); + for (int m = 0; m < 6; m++) { + vfloat16m1_t _tmp00 = vle16_v_f16m1(tmp[m][0], vl); + vfloat16m1_t _tmp01 = vle16_v_f16m1(tmp[m][1], vl); + vfloat16m1_t _tmp02 = vle16_v_f16m1(tmp[m][2], vl); + vfloat16m1_t _tmp03 = vle16_v_f16m1(tmp[m][3], vl); + vfloat16m1_t _tmp04 = vle16_v_f16m1(tmp[m][4], vl); + vfloat16m1_t _tmp05 = vle16_v_f16m1(tmp[m][5], vl); + vfloat16m1_t _tmp06 = vle16_v_f16m1(tmp[m][6], vl); + vfloat16m1_t _tmp07 = vle16_v_f16m1(tmp[m][7], vl); + + vfloat16m1_t _tmp024a = vfadd_vv_f16m1(_tmp01, _tmp02, vl); + vfloat16m1_t _tmp135a = vfsub_vv_f16m1(_tmp01, _tmp02, vl); + + vfloat16m1_t _tmp024b = vfadd_vv_f16m1(_tmp03, _tmp04, vl); + vfloat16m1_t _tmp135b = vfsub_vv_f16m1(_tmp03, _tmp04, vl); + + vfloat16m1_t _tmp024c = vfadd_vv_f16m1(_tmp05, _tmp06, vl); + vfloat16m1_t _tmp135c = vfsub_vv_f16m1(_tmp05, _tmp06, vl); + + vfloat16m1_t _output00 = vfadd_vv_f16m1( + _bias, + vfadd_vv_f16m1(vfadd_vv_f16m1(_tmp00, _tmp024a, vl), + vfmacc_vf_f16m1(_tmp024b, 32.f, _tmp024c, vl), vl), + vl); + vfloat16m1_t _output02 = vfadd_vv_f16m1( + _bias, + vfmacc_vf_f16m1(vfmacc_vf_f16m1(_tmp024a, 4.f, _tmp024b, vl), 8.f, + _tmp024c, vl), + vl); + vfloat16m1_t _output04 = vfadd_vv_f16m1( + _bias, + vfmacc_vf_f16m1(vfmacc_vf_f16m1(_tmp024a, 16.f, _tmp024b, vl), 2.f, + _tmp024c, vl), + vl); + + vfloat16m1_t _output01 = vfadd_vv_f16m1( + _bias, + vfmacc_vf_f16m1(vfmacc_vf_f16m1(_tmp135a, 2.f, _tmp135b, vl), 16.f, + _tmp135c, vl), + vl); + vfloat16m1_t _output03 = vfadd_vv_f16m1( + _bias, + vfmacc_vf_f16m1(vfmacc_vf_f16m1(_tmp135a, 8.f, _tmp135b, vl), 4.f, + _tmp135c, vl), + vl); + vfloat16m1_t _output05 = vfadd_vv_f16m1( + _bias, + vfadd_vv_f16m1(vfadd_vv_f16m1(_tmp07, _tmp135a, vl), + vfmacc_vf_f16m1(_tmp135c, 32.f, _tmp135b, vl), vl), + vl); + + vse16_v_f16m1(output0, _output00, vl); + vse16_v_f16m1(output0 + packn * 2, _output02, vl); + vse16_v_f16m1(output0 + packn * 4, _output04, vl); + vse16_v_f16m1(output0 + packn * 1, _output01, vl); + vse16_v_f16m1(output0 + packn * 3, _output03, vl); + vse16_v_f16m1(output0 + packn * 5, _output05, vl); + + output0 += block_w * 6 * packn; + } + } + } + } + + csi_mem_free(output_dot_buf); + // crop the output after transform: cut extra part (right , bottom) + winograd_crop_output_packnto1_fp16(output_tm1_buf, output_data, out_c, out_h, out_w, + block_h * 6, block_w * 6); + output_data += output_size; + csi_mem_free(output_tm1_buf); + } + + if (!flag_bias) { + csi_mem_free(bias_data); + bias_data = NULL; + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/convolution_gemm.c b/source/thead_rvv/convolution_gemm.c new file mode 100644 index 00000000..52f0ef45 --- /dev/null +++ b/source/thead_rvv/convolution_gemm.c @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/* + pack kernel_data inplace, means the origin kernel_data be destoried. + The reason to do this is that the packaging process must not consume more memory. +*/ +void csi_nn_rvv_conv_im2col_sgemm_transform_kernel_fp32(struct csi_tensor *kernel, + struct conv2d_params *params) +{ + float *kernel_data = (float *)kernel->data; + int group = params->group; + + int m = kernel->dim[0] / group; // m = out_ch / group + int k = kernel->dim[1] * kernel->dim[2] * kernel->dim[3]; + + float *pa_reorder = (float *)csi_mem_alloc(group * m * k * sizeof(float)); + for (int g = 0; g < group; g++) { + csi_nn_rvv_reorder_kernel_n8_fp32(kernel_data + g * m * k, pa_reorder + g * m * k, m, k, k); + } + memcpy(kernel_data, pa_reorder, group * m * k * sizeof(float)); + csi_mem_free(pa_reorder); +} + +int csi_nn_rvv_conv_im2col_gemm_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + float *kernel_data = (float *)kernel->data; + float *bias_data = (float *)bias->data; + + int32_t group = params->group; + int32_t batch = input->dim[0]; + int32_t in_ch = input->dim[1]; + int32_t in_height = input->dim[2]; + int32_t in_width = input->dim[3]; + int32_t out_ch = kernel->dim[0]; + int32_t out_height = output->dim[2]; + int32_t out_width = output->dim[3]; + int32_t ksize_h = kernel->dim[2]; + int32_t ksize_w = kernel->dim[3]; + int32_t stride_h = params->stride_height; + int32_t stride_w = params->stride_width; + int32_t pad_left = params->pad_left; + int32_t pad_top = params->pad_top; + + // im2col matrix_col = out_height * out_width + // im2col matrix_row = channel_col + int channel_col = in_ch / group * ksize_h * ksize_w; + + int32_t m = out_ch / group; + int32_t k = channel_col; + int32_t n = out_height * out_width; + + float *im2col_data = (float *)csi_mem_alloc(k * n * sizeof(float)); + float *pb_reorder = (float *)csi_mem_alloc(k * n * sizeof(float)); + + for (int i = 0; i < batch; i++) { + for (int g = 0; g < group; g++) { + // im2col + for (int c = 0; c < channel_col; ++c) { + int w_offset = c % ksize_w; + int h_offset = c / ksize_w % ksize_h; + int c_im = c / ksize_h / ksize_w; + for (int h = 0; h < out_height; ++h) { + for (int w = 0; w < out_width; ++w) { + int im_row = h_offset + h * stride_h; + int im_col = w_offset + w * stride_w; + int col_index = + (c * out_height + h) * out_width + w; // [channel_col, out_h, out_w] + im_row = im_row - params->pad_top; + im_col = im_col - params->pad_left; + if (im_row < 0 || im_col < 0 || im_row >= in_height || im_col >= in_width) { + im2col_data[col_index] = 0.0f; + } else { + im2col_data[col_index] = + input_data[(c_im * input->dim[2] + im_row) * input->dim[3] + + im_col]; + } + } + } + } + + float *pa = kernel_data + g * m * k; + float *pb = pb_reorder; + float *pc = output_data; + + // pack + csi_nn_rvv_reorder_input_z8_fp32(im2col_data, pb, k, n, n); + // GEMM + csi_nn_rvv_gemm_8x8_fp32(pc, pa, pb, m, k, n, n, bias_data + g * m); + input_data += in_ch / group * in_height * in_width; + output_data += m * n; + } + } + csi_mem_free(pb_reorder); + csi_mem_free(im2col_data); + return CSINN_TRUE; +} diff --git a/source/thead_rvv/convolution_gemm_fp16.c b/source/thead_rvv/convolution_gemm_fp16.c new file mode 100644 index 00000000..954d136a --- /dev/null +++ b/source/thead_rvv/convolution_gemm_fp16.c @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/* + pack kernel_data inplace, means the origin kernel_data be destoried. + The reason to do this is that the packaging process must not consume more memory. +*/ +void csi_nn_rvv_conv_im2col_sgemm_transform_kernel_fp16(struct csi_tensor *kernel, + struct conv2d_params *params) +{ + __fp16 *kernel_data = (__fp16 *)kernel->data; + int group = params->group; + + int m = kernel->dim[0] / group; // m = out_ch / group + int k = kernel->dim[1] * kernel->dim[2] * kernel->dim[3]; + + __fp16 *pa_reorder = (__fp16 *)csi_mem_alloc(group * m * k * sizeof(__fp16)); + for (int g = 0; g < group; g++) { + csi_nn_rvv_reorder_kernel_n8_fp16(kernel_data + g * m * k, pa_reorder + g * m * k, m, k, k); + } + memcpy(kernel_data, pa_reorder, group * m * k * sizeof(__fp16)); + csi_mem_free(pa_reorder); +} + +int csi_nn_rvv_conv_im2col_gemm_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + __fp16 *kernel_data = (__fp16 *)kernel->data; + __fp16 *bias_data = (__fp16 *)bias->data; + + int32_t group = params->group; + int32_t batch = input->dim[0]; + int32_t in_ch = input->dim[1]; + int32_t in_height = input->dim[2]; + int32_t in_width = input->dim[3]; + int32_t out_ch = kernel->dim[0]; + int32_t out_height = output->dim[2]; + int32_t out_width = output->dim[3]; + int32_t ksize_h = kernel->dim[2]; + int32_t ksize_w = kernel->dim[3]; + int32_t stride_h = params->stride_height; + int32_t stride_w = params->stride_width; + int32_t pad_left = params->pad_left; + int32_t pad_top = params->pad_top; + + // im2col matrix_col = out_height * out_width + // im2col matrix_row = channel_col + int channel_col = in_ch / group * ksize_h * ksize_w; + + int32_t m = out_ch / group; + int32_t k = channel_col; + int32_t n = out_height * out_width; + + __fp16 *im2col_data = (__fp16 *)csi_mem_alloc(k * n * sizeof(__fp16)); + __fp16 *pb_reorder = (__fp16 *)csi_mem_alloc(k * n * sizeof(__fp16)); + + for (int i = 0; i < batch; i++) { + for (int g = 0; g < group; g++) { + // im2col + for (int c = 0; c < channel_col; ++c) { + int w_offset = c % ksize_w; + int h_offset = c / ksize_w % ksize_h; + int c_im = c / ksize_h / ksize_w; + for (int h = 0; h < out_height; ++h) { + for (int w = 0; w < out_width; ++w) { + int im_row = h_offset + h * stride_h; + int im_col = w_offset + w * stride_w; + int col_index = + (c * out_height + h) * out_width + w; // [channel_col, out_h, out_w] + im_row = im_row - params->pad_top; + im_col = im_col - params->pad_left; + if (im_row < 0 || im_col < 0 || im_row >= in_height || im_col >= in_width) { + im2col_data[col_index] = 0.0f; + } else { + im2col_data[col_index] = + input_data[(c_im * input->dim[2] + im_row) * input->dim[3] + + im_col]; + } + } + } + } + + __fp16 *pa = kernel_data + g * m * k; + __fp16 *pb = pb_reorder; + __fp16 *pc = output_data; + + // pack + csi_nn_rvv_reorder_input_z16_fp16(im2col_data, pb, k, n, n); + // GEMM + csi_nn_rvv_gemm_8x16_fp16(pc, pa, pb, m, k, n, n, bias_data + g * m); + input_data += in_ch / group * in_height * in_width; + output_data += m * n; + } + } + csi_mem_free(pb_reorder); + csi_mem_free(im2col_data); + return CSINN_TRUE; +} \ No newline at end of file diff --git a/source/thead_rvv/convolution_gemm_int4.c b/source/thead_rvv/convolution_gemm_int4.c new file mode 100644 index 00000000..d904696f --- /dev/null +++ b/source/thead_rvv/convolution_gemm_int4.c @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +void csi_nn_rvv_conv_im2col_sgemm_transform_kernel_int4(struct csi_tensor *kernel, + struct conv2d_params *params) +{ + int8_t *kernel_data = (int8_t *)kernel->data; + int group = params->group; + + int n = kernel->dim[0] / group; // m = out_ch / group + int k = kernel->dim[1] * kernel->dim[2] * kernel->dim[3]; + + int k_2 = (((k - 1) & -2) + 2) >> 1; + int k4 = ((k_2 - 1) & -4) + 4; // align of 4 for int8 + + params->conv_extra.kernel_tm->data = (int8_t *)csi_mem_alloc(group * n * k4 * sizeof(int8_t)); + int8_t *pa_reorder = (int8_t *)params->conv_extra.kernel_tm->data; + + for (int g = 0; g < group; g++) { + csi_nn_rvv_reorder_kernel_n8_int8(kernel_data + g * n * k_2, pa_reorder + g * n * k4, n, + k_2, k_2); + } + // FIXME: free params->conv_extra.kernel_tm->data +} + +int csi_nn_rvv_conv_im2col_gemm_int4(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + int8_t *input_data = (int8_t *)input->data; + int8_t *output_data = (int8_t *)output->data; + int8_t *kernel_data = (int8_t *)params->conv_extra.kernel_tm->data; + int32_t *bias_data = (int32_t *)bias->data; + + int32_t group = params->group; + int32_t batch = input->dim[0]; + int32_t in_height = input->dim[1]; + int32_t in_width = input->dim[2]; + int32_t in_ch = input->dim[3]; + int32_t out_ch = kernel->dim[0]; + int32_t out_height = output->dim[1]; + int32_t out_width = output->dim[2]; + int32_t ksize_h = kernel->dim[1]; + int32_t ksize_w = kernel->dim[2]; + int32_t stride_h = params->stride_height; + int32_t stride_w = params->stride_width; + int32_t pad_left = params->pad_left; + int32_t pad_top = params->pad_top; + + // im2col matrix_col = out_height * out_width + // im2col matrix_row = channel_col + int channel_col = in_ch / group * ksize_h * ksize_w; + + int32_t m = out_height * out_width; + int32_t k_2 = (channel_col - 1) / 2 + 1; + int32_t n = out_ch / group; + int32_t k4 = ((k_2 - 1) & -4) + 4; + + int32_t *multiplier = (int32_t *)csi_mem_alloc(n * sizeof(int32_t)); + int32_t *shift = (int32_t *)csi_mem_alloc(n * sizeof(int32_t)); + + int8_t *im2col_data = (int8_t *)csi_mem_alloc(m * k_2 * sizeof(int8_t)); + int8_t *pa_reorder = (int8_t *)csi_mem_alloc(m * k4 * sizeof(int8_t)); + + int8_t *im2col_shadow = NULL; + int8_t pad_value = 0; + + int j = 0; + for (int i = 0; i < batch; i++) { + for (int g = 0; g < group; g++) { + // im2col + if (in_ch & 1) { + int8_t *buffer_int4_to_int8 = + (int8_t *)csi_mem_alloc(in_height * in_width * in_ch * sizeof(int8_t)); + csi_nn_rvv_int4_to_int8(input_data, buffer_int4_to_int8, + in_height * in_width * in_ch); + int8_t *buffer_im2col = (int8_t *)csi_mem_alloc(m * channel_col * sizeof(int8_t)); + im2col_shadow = buffer_im2col; + pad_value = input->qinfo->zero_point & 0x0f; + + for (int i_out_h = 0; i_out_h < out_height; i_out_h++) { + for (int i_out_w = 0; i_out_w < out_width; i_out_w++) { + int ker_start_h = i_out_h * stride_h - pad_top; + int ker_start_w = i_out_w * stride_w - pad_left; + for (int i_ker_h = ker_start_h; i_ker_h < ker_start_h + ksize_h; + i_ker_h++) { + for (int i_ker_w = ker_start_w; i_ker_w < ker_start_w + ksize_w; + i_ker_w++) { + if (i_ker_h < 0 || i_ker_h >= in_height || i_ker_w < 0 || + i_ker_w >= in_width) { + memset(im2col_shadow, pad_value, in_ch * sizeof(int8_t)); + } else { + memcpy(im2col_shadow, + buffer_int4_to_int8 + + in_ch * (i_ker_h * in_width + i_ker_w), + in_ch * sizeof(int8_t)); + } + im2col_shadow += in_ch; + } + } + } + } + for (int k = 0; k < m; k++) { + csi_nn_rvv_int8_to_int4(buffer_im2col + k * channel_col, im2col_data + k * k_2, + channel_col); + } + csi_mem_free(buffer_int4_to_int8); + csi_mem_free(buffer_im2col); + + } else { + im2col_shadow = im2col_data; + pad_value = (input->qinfo->zero_point << 4) | (input->qinfo->zero_point & 0x0f); + + for (int i_out_h = 0; i_out_h < out_height; i_out_h++) { + for (int i_out_w = 0; i_out_w < out_width; i_out_w++) { + int ker_start_h = i_out_h * stride_h - pad_top; + int ker_start_w = i_out_w * stride_w - pad_left; + for (int i_ker_h = ker_start_h; i_ker_h < ker_start_h + ksize_h; + i_ker_h++) { + for (int i_ker_w = ker_start_w; i_ker_w < ker_start_w + ksize_w; + i_ker_w++) { + if (i_ker_h < 0 || i_ker_h >= in_height || i_ker_w < 0 || + i_ker_w >= in_width) { + memset(im2col_shadow, pad_value, in_ch / 2 * sizeof(int8_t)); + } else { + memcpy(im2col_shadow, + input_data + (i_ker_h * in_width + i_ker_w) * in_ch / 2, + in_ch / 2 * sizeof(int8_t)); + } + im2col_shadow += in_ch / 2; + } + } + } + } + } + + int8_t *pa = pa_reorder; + int8_t *pb = kernel_data + g * n * k4; + int8_t *pc = output_data; + + if (kernel->quant_channel > 1) { + for (int c = 0; c < n; c++, j++) { + multiplier[c] = kernel->qinfo[j].multiplier; + shift[c] = kernel->qinfo[j].shift; + } + } else if (kernel->quant_channel == 1) { + for (int c = 0; c < n; c++) { + multiplier[c] = kernel->qinfo[0].multiplier; + shift[c] = kernel->qinfo[0].shift; + } + } + + // pack + csi_nn_rvv_reorder_input_n8_int4(im2col_data, pa, m, k_2, k_2); + // GEMM + csi_nn_rvv_gemm_8x8_int4(pc, pa, pb, m, k4, n, n / 2, bias_data + g * n, + output->qinfo->zero_point, multiplier, shift); + + input_data += in_ch / group * in_height * in_width / 2; + output_data += m * n / 2; + } + } + csi_mem_free(pa_reorder); + csi_mem_free(im2col_data); + csi_mem_free(multiplier); + csi_mem_free(shift); + return CSINN_TRUE; +} diff --git a/source/thead_rvv/convolution_gemm_int8.c b/source/thead_rvv/convolution_gemm_int8.c new file mode 100644 index 00000000..8bbb0768 --- /dev/null +++ b/source/thead_rvv/convolution_gemm_int8.c @@ -0,0 +1,142 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +void csi_nn_rvv_conv_im2col_sgemm_transform_kernel_int8(struct csi_tensor *kernel, + struct conv2d_params *params) +{ + int8_t *kernel_data = (int8_t *)kernel->data; + int group = params->group; + + int m = kernel->dim[0] / group; // m = out_ch / group + int k = kernel->dim[1] * kernel->dim[2] * kernel->dim[3]; + int k4 = (k % 4 != 0) ? ((k / 4 + 1) * 4) : k; + + params->conv_extra.kernel_tm->data = (int8_t *)csi_mem_alloc(group * m * k4 * sizeof(int8_t)); + int8_t *pa_reorder = (int8_t *)params->conv_extra.kernel_tm->data; + + for (int g = 0; g < group; g++) { + csi_nn_rvv_reorder_kernel_n8_int8(kernel_data + g * m * k, pa_reorder + g * m * k4, m, k, + k); + } + // FIXME: free params->conv_extra.kernel_tm->data + // memcpy(kernel_data, pa_reorder, group * m * k * sizeof(__fp16)); + // csi_mem_free(pa_reorder); +} + +int csi_nn_rvv_conv_im2col_gemm_int8(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + int8_t *input_data = (int8_t *)input->data; + int8_t *output_data = (int8_t *)output->data; + int8_t *kernel_data = (int8_t *)params->conv_extra.kernel_tm->data; + int32_t *bias_data = (int32_t *)bias->data; + + int32_t group = params->group; + int32_t batch = input->dim[0]; + int32_t in_ch = input->dim[1]; + int32_t in_height = input->dim[2]; + int32_t in_width = input->dim[3]; + int32_t out_ch = kernel->dim[0]; + int32_t out_height = output->dim[2]; + int32_t out_width = output->dim[3]; + int32_t ksize_h = kernel->dim[2]; + int32_t ksize_w = kernel->dim[3]; + int32_t stride_h = params->stride_height; + int32_t stride_w = params->stride_width; + int32_t pad_left = params->pad_left; + int32_t pad_top = params->pad_top; + + // im2col matrix_col = out_height * out_width + // im2col matrix_row = channel_col + int channel_col = in_ch / group * ksize_h * ksize_w; + + int32_t m = out_ch / group; + int32_t k = channel_col; + int32_t n = out_height * out_width; + int32_t k4 = (k % 4 != 0) ? ((k / 4 + 1) * 4) : k; + + int8_t *im2col_data = (int8_t *)csi_mem_alloc(k * n * sizeof(int8_t)); + int8_t *pb_reorder = (int8_t *)csi_mem_alloc(k4 * n * sizeof(int8_t)); + + int32_t *multiplier = (int32_t *)csi_mem_alloc(m * sizeof(int32_t)); + int32_t *shift = (int32_t *)csi_mem_alloc(m * sizeof(int32_t)); + + int j = 0; + for (int i = 0; i < batch; i++) { + for (int g = 0; g < group; g++) { + // im2col + for (int c = 0; c < channel_col; ++c) { + int w_offset = c % ksize_w; + int h_offset = c / ksize_w % ksize_h; + int c_im = c / ksize_h / ksize_w; + for (int h = 0; h < out_height; ++h) { + for (int w = 0; w < out_width; ++w) { + int im_row = h_offset + h * stride_h; + int im_col = w_offset + w * stride_w; + int col_index = + (c * out_height + h) * out_width + w; // [channel_col, out_h, out_w] + im_row = im_row - params->pad_top; + im_col = im_col - params->pad_left; + if (im_row < 0 || im_col < 0 || im_row >= in_height || im_col >= in_width) { + im2col_data[col_index] = input->qinfo->zero_point; + } else { + im2col_data[col_index] = + input_data[(c_im * input->dim[2] + im_row) * input->dim[3] + + im_col]; + } + } + } + } + + int8_t *pa = kernel_data + g * m * k4; + int8_t *pb = pb_reorder; + int8_t *pc = output_data; + + if (kernel->quant_channel > 1) { + for (int c = 0; c < m; c++, j++) { + multiplier[c] = kernel->qinfo[j].multiplier; + shift[c] = kernel->qinfo[j].shift; + } + } else if (kernel->quant_channel == 1) { + for (int c = 0; c < m; c++) { + multiplier[c] = kernel->qinfo[0].multiplier; + shift[c] = kernel->qinfo[0].shift; + } + } + + // pack + csi_nn_rvv_reorder_input_z8_int8(im2col_data, pb, k, n, n); + // GEMM + csi_nn_rvv_gemm_8x8_int8(pc, pa, pb, m, k4, n, n, bias_data + g * m, + output->qinfo->zero_point, multiplier, shift); + + input_data += in_ch / group * in_height * in_width; + output_data += m * n; + } + } + csi_mem_free(pb_reorder); + csi_mem_free(im2col_data); + csi_mem_free(multiplier); + csi_mem_free(shift); + return CSINN_TRUE; +} diff --git a/source/thead_rvv/depthwise_convolution_3x3.c b/source/thead_rvv/depthwise_convolution_3x3.c new file mode 100644 index 00000000..95d7e760 --- /dev/null +++ b/source/thead_rvv/depthwise_convolution_3x3.c @@ -0,0 +1,479 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128/256 +*************************************************************/ +int csi_nn_rvv_dwconv3x3s1_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + float *kernel_data = (float *)kernel->data; + float *bias_data = (float *)bias->data; + + int32_t batch = input->dim[0]; + int32_t in_c = input->dim[1]; // group = in_channel + int32_t in_h = input->dim[2]; + int32_t in_w = input->dim[3]; + + int32_t out_c = output->dim[1]; + int32_t out_h = output->dim[2]; + int32_t out_w = output->dim[3]; + + float *input_padd_buf = + (float *)csi_mem_alloc(in_c * (in_h + params->pad_top + params->pad_down) * + (in_w + params->pad_left + params->pad_right) * sizeof(float)); + + csi_nn_rvv_pad_input_fp32( + input_data, input_padd_buf, in_c, in_h, in_w, in_h + params->pad_top + params->pad_down, + in_w + params->pad_left + params->pad_right, params->pad_top, params->pad_left); + + in_h = in_h + params->pad_top + params->pad_down; + in_w = in_w + params->pad_left + params->pad_right; + +#pragma omp parallel for num_threads(1) + for (int c = 0; c < in_c; c++) { + float *out = output_data + c * out_h * out_w; + float *outptr0 = out; + float *outptr1 = outptr0 + out_w; + + const float bias0 = bias_data ? bias_data[c] : 0.0f; + + float *img0 = input_padd_buf + c * in_h * in_w; + float *r0 = img0; + float *r1 = r0 + in_w; + float *r2 = r1 + in_w; + float *r3 = r2 + in_w; + + const float *kernel0 = kernel_data + c * 9; + + float k00 = kernel0[0]; + float k01 = kernel0[1]; + float k02 = kernel0[2]; + float k10 = kernel0[3]; + float k11 = kernel0[4]; + float k12 = kernel0[5]; + float k20 = kernel0[6]; + float k21 = kernel0[7]; + float k22 = kernel0[8]; + + int vl; + int w_loop = csrr_vlenb() / sizeof(float); // VLEN128=4 VLEN256=8 + int w2_loop = w_loop * 2; + + // TODO: 优化指令序列,调整 intrinsic ,达到和汇编类似的指令序列 + int h = 0; + // h2 loop + for (; h + 1 < out_h; h += 2) { + vl = vsetvl_e32m2(w2_loop); + int w = 0; + // h2w8 loop + for (; w + w2_loop - 1 < out_w; w += w2_loop) { + vfloat32m2_t _acc0 = vfmv_v_f_f32m2(bias0, vl); + vfloat32m2_t _acc1 = vfmv_v_f_f32m2(bias0, vl); + + vfloat32m2_t _r0_0_7 = vle32_v_f32m2(r0, vl); + vfloat32m2_t _r0_1_8 = vle32_v_f32m2(r0 + 1, vl); + vfloat32m2_t _r0_2_9 = vle32_v_f32m2(r0 + 2, vl); + + vfloat32m2_t _r1_0_7 = vle32_v_f32m2(r1, vl); + vfloat32m2_t _r1_1_8 = vle32_v_f32m2(r1 + 1, vl); + vfloat32m2_t _r1_2_9 = vle32_v_f32m2(r1 + 2, vl); + + vfloat32m2_t _r2_0_7 = vle32_v_f32m2(r2, vl); + vfloat32m2_t _r2_1_8 = vle32_v_f32m2(r2 + 1, vl); + vfloat32m2_t _r2_2_9 = vle32_v_f32m2(r2 + 2, vl); + + vfloat32m2_t _r3_0_7 = vle32_v_f32m2(r3, vl); + vfloat32m2_t _r3_1_8 = vle32_v_f32m2(r3 + 1, vl); + vfloat32m2_t _r3_2_9 = vle32_v_f32m2(r3 + 2, vl); + + _acc0 = vfmacc_vf_f32m2(_acc0, k00, _r0_0_7, vl); + _acc0 = vfmacc_vf_f32m2(_acc0, k01, _r0_1_8, vl); + _acc0 = vfmacc_vf_f32m2(_acc0, k02, _r0_2_9, vl); + _acc0 = vfmacc_vf_f32m2(_acc0, k10, _r1_0_7, vl); + _acc0 = vfmacc_vf_f32m2(_acc0, k11, _r1_1_8, vl); + _acc0 = vfmacc_vf_f32m2(_acc0, k12, _r1_2_9, vl); + _acc0 = vfmacc_vf_f32m2(_acc0, k20, _r2_0_7, vl); + _acc0 = vfmacc_vf_f32m2(_acc0, k21, _r2_1_8, vl); + _acc0 = vfmacc_vf_f32m2(_acc0, k22, _r2_2_9, vl); + + _acc1 = vfmacc_vf_f32m2(_acc1, k00, _r1_0_7, vl); + _acc1 = vfmacc_vf_f32m2(_acc1, k01, _r1_1_8, vl); + _acc1 = vfmacc_vf_f32m2(_acc1, k02, _r1_2_9, vl); + _acc1 = vfmacc_vf_f32m2(_acc1, k10, _r2_0_7, vl); + _acc1 = vfmacc_vf_f32m2(_acc1, k11, _r2_1_8, vl); + _acc1 = vfmacc_vf_f32m2(_acc1, k12, _r2_2_9, vl); + _acc1 = vfmacc_vf_f32m2(_acc1, k20, _r3_0_7, vl); + _acc1 = vfmacc_vf_f32m2(_acc1, k21, _r3_1_8, vl); + _acc1 = vfmacc_vf_f32m2(_acc1, k22, _r3_2_9, vl); + + vse32_v_f32m2(outptr0, _acc0, vl); + vse32_v_f32m2(outptr1, _acc1, vl); + + r0 += vl; + r1 += vl; + r2 += vl; + r3 += vl; + outptr0 += vl; + outptr1 += vl; + } + + // h2w4 + for (; w + w_loop - 1 < out_w; w += w_loop) { + vl = vsetvl_e32m1(w_loop); + + vfloat32m1_t _acc0 = vfmv_v_f_f32m1(bias0, vl); + vfloat32m1_t _acc1 = vfmv_v_f_f32m1(bias0, vl); + + vfloat32m1_t _r0_0_3 = vle32_v_f32m1(r0, vl); + vfloat32m1_t _r0_1_4 = vle32_v_f32m1(r0 + 1, vl); + vfloat32m1_t _r0_2_5 = vle32_v_f32m1(r0 + 2, vl); + + vfloat32m1_t _r1_0_3 = vle32_v_f32m1(r1, vl); + vfloat32m1_t _r1_1_4 = vle32_v_f32m1(r1 + 1, vl); + vfloat32m1_t _r1_2_5 = vle32_v_f32m1(r1 + 2, vl); + + vfloat32m1_t _r2_0_3 = vle32_v_f32m1(r2, vl); + vfloat32m1_t _r2_1_4 = vle32_v_f32m1(r2 + 1, vl); + vfloat32m1_t _r2_2_5 = vle32_v_f32m1(r2 + 2, vl); + + vfloat32m1_t _r3_0_3 = vle32_v_f32m1(r3, vl); + vfloat32m1_t _r3_1_4 = vle32_v_f32m1(r3 + 1, vl); + vfloat32m1_t _r3_2_5 = vle32_v_f32m1(r3 + 2, vl); + + _acc0 = vfmacc_vf_f32m1(_acc0, k00, _r0_0_3, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, k01, _r0_1_4, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, k02, _r0_2_5, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, k10, _r1_0_3, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, k11, _r1_1_4, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, k12, _r1_2_5, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, k20, _r2_0_3, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, k21, _r2_1_4, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, k22, _r2_2_5, vl); + + _acc1 = vfmacc_vf_f32m1(_acc1, k00, _r1_0_3, vl); + _acc1 = vfmacc_vf_f32m1(_acc1, k01, _r1_1_4, vl); + _acc1 = vfmacc_vf_f32m1(_acc1, k02, _r1_2_5, vl); + _acc1 = vfmacc_vf_f32m1(_acc1, k10, _r2_0_3, vl); + _acc1 = vfmacc_vf_f32m1(_acc1, k11, _r2_1_4, vl); + _acc1 = vfmacc_vf_f32m1(_acc1, k12, _r2_2_5, vl); + _acc1 = vfmacc_vf_f32m1(_acc1, k20, _r3_0_3, vl); + _acc1 = vfmacc_vf_f32m1(_acc1, k21, _r3_1_4, vl); + _acc1 = vfmacc_vf_f32m1(_acc1, k22, _r3_2_5, vl); + + vse32_v_f32m1(outptr0, _acc0, vl); + vse32_v_f32m1(outptr1, _acc1, vl); + + r0 += vl; + r1 += vl; + r2 += vl; + r3 += vl; + outptr0 += vl; + outptr1 += vl; + } + + vl = vsetvl_e32m1(3); + + vfloat32m1_t _k0 = vle32_v_f32m1(kernel0, vl); + vfloat32m1_t _k1 = vle32_v_f32m1(kernel0 + 3, vl); + vfloat32m1_t _k2 = vle32_v_f32m1(kernel0 + 6, vl); + + vfloat32m1_t _tmp = vfmv_v_f_f32m1(bias0, vl); + + // h2w_tail + for (; w < out_w; w++) { + vfloat32m1_t _r0 = vle32_v_f32m1(r0, vl); + vfloat32m1_t _r1 = vle32_v_f32m1(r1, vl); + vfloat32m1_t _r2 = vle32_v_f32m1(r2, vl); + vfloat32m1_t _r3 = vle32_v_f32m1(r3, vl); + + vfloat32m1_t _acc0 = vfmul_vv_f32m1(_k0, _r0, vl); + _acc0 = vfmacc_vv_f32m1(_acc0, _k1, _r1, vl); + _acc0 = vfmacc_vv_f32m1(_acc0, _k2, _r2, vl); + vfloat32m1_t _acc0_tmp = + vfredusum_vs_f32m1_f32m1(vundefined_f32m1(), _acc0, _tmp, vl); + float res0 = vfmv_f_s_f32m1_f32(_acc0_tmp); + + vfloat32m1_t _acc1 = vfmul_vv_f32m1(_k0, _r1, vl); + _acc1 = vfmacc_vv_f32m1(_acc1, _k1, _r2, vl); + _acc1 = vfmacc_vv_f32m1(_acc1, _k2, _r3, vl); + vfloat32m1_t _acc1_tmp = + vfredusum_vs_f32m1_f32m1(vundefined_f32m1(), _acc1, _tmp, vl); + float res1 = vfmv_f_s_f32m1_f32(_acc1_tmp); + + r0++; + r1++; + r2++; + r3++; + *outptr0++ = res0; + *outptr1++ = res1; + } + r0 += 2 + in_w; + r1 += 2 + in_w; + r2 += 2 + in_w; + r3 += 2 + in_w; + + outptr0 += out_w; + outptr1 += out_w; + } + + // h1 + for (; h < out_h; h++) { + vl = vsetvl_e32m2(w2_loop); + int w = 0; + // h1w8 loop + for (; w + w2_loop - 1 < out_w; w += w2_loop) { + vfloat32m2_t _acc0 = vfmv_v_f_f32m2(bias0, vl); + + vfloat32m2_t _r0_0_7 = vle32_v_f32m2(r0, vl); + vfloat32m2_t _r0_1_8 = vle32_v_f32m2(r0 + 1, vl); + vfloat32m2_t _r0_2_9 = vle32_v_f32m2(r0 + 2, vl); + + vfloat32m2_t _r1_0_7 = vle32_v_f32m2(r1, vl); + vfloat32m2_t _r1_1_8 = vle32_v_f32m2(r1 + 1, vl); + vfloat32m2_t _r1_2_9 = vle32_v_f32m2(r1 + 2, vl); + + vfloat32m2_t _r2_0_7 = vle32_v_f32m2(r2, vl); + vfloat32m2_t _r2_1_8 = vle32_v_f32m2(r2 + 1, vl); + vfloat32m2_t _r2_2_9 = vle32_v_f32m2(r2 + 2, vl); + + _acc0 = vfmacc_vf_f32m2(_acc0, k00, _r0_0_7, vl); + _acc0 = vfmacc_vf_f32m2(_acc0, k01, _r0_1_8, vl); + _acc0 = vfmacc_vf_f32m2(_acc0, k02, _r0_2_9, vl); + _acc0 = vfmacc_vf_f32m2(_acc0, k10, _r1_0_7, vl); + _acc0 = vfmacc_vf_f32m2(_acc0, k11, _r1_1_8, vl); + _acc0 = vfmacc_vf_f32m2(_acc0, k12, _r1_2_9, vl); + _acc0 = vfmacc_vf_f32m2(_acc0, k20, _r2_0_7, vl); + _acc0 = vfmacc_vf_f32m2(_acc0, k21, _r2_1_8, vl); + _acc0 = vfmacc_vf_f32m2(_acc0, k22, _r2_2_9, vl); + + vse32_v_f32m2(outptr0, _acc0, vl); + + r0 += vl; + r1 += vl; + r2 += vl; + outptr0 += vl; + } + + // h1w4 + for (; w + w_loop - 1 < out_w; w += w_loop) { + vl = vsetvl_e32m1(w_loop); + + vfloat32m1_t _acc0 = vfmv_v_f_f32m1(bias0, vl); + + vfloat32m1_t _r0_0_3 = vle32_v_f32m1(r0, vl); + vfloat32m1_t _r0_1_4 = vle32_v_f32m1(r0 + 1, vl); + vfloat32m1_t _r0_2_5 = vle32_v_f32m1(r0 + 2, vl); + + vfloat32m1_t _r1_0_3 = vle32_v_f32m1(r1, vl); + vfloat32m1_t _r1_1_4 = vle32_v_f32m1(r1 + 1, vl); + vfloat32m1_t _r1_2_5 = vle32_v_f32m1(r1 + 2, vl); + + vfloat32m1_t _r2_0_3 = vle32_v_f32m1(r2, vl); + vfloat32m1_t _r2_1_4 = vle32_v_f32m1(r2 + 1, vl); + vfloat32m1_t _r2_2_5 = vle32_v_f32m1(r2 + 2, vl); + + _acc0 = vfmacc_vf_f32m1(_acc0, k00, _r0_0_3, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, k01, _r0_1_4, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, k02, _r0_2_5, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, k10, _r1_0_3, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, k11, _r1_1_4, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, k12, _r1_2_5, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, k20, _r2_0_3, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, k21, _r2_1_4, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, k22, _r2_2_5, vl); + + vse32_v_f32m1(outptr0, _acc0, vl); + + r0 += vl; + r1 += vl; + r2 += vl; + outptr0 += vl; + } + + vl = vsetvl_e32m1(3); + + vfloat32m1_t _k0 = vle32_v_f32m1(kernel0, vl); + vfloat32m1_t _k1 = vle32_v_f32m1(kernel0 + 3, vl); + vfloat32m1_t _k2 = vle32_v_f32m1(kernel0 + 6, vl); + + vfloat32m1_t _tmp = vfmv_v_f_f32m1(bias0, vl); + // h1w_tail + for (; w < out_w; w++) { + vfloat32m1_t _r0 = vle32_v_f32m1(r0, vl); + vfloat32m1_t _r1 = vle32_v_f32m1(r1, vl); + vfloat32m1_t _r2 = vle32_v_f32m1(r2, vl); + + vfloat32m1_t _acc0 = vfmul_vv_f32m1(_k0, _r0, vl); + _acc0 = vfmacc_vv_f32m1(_acc0, _k1, _r1, vl); + _acc0 = vfmacc_vv_f32m1(_acc0, _k2, _r2, vl); + vfloat32m1_t _acc0_tmp = + vfredusum_vs_f32m1_f32m1(vundefined_f32m1(), _acc0, _tmp, vl); + float res0 = vfmv_f_s_f32m1_f32(_acc0_tmp); + + r0++; + r1++; + r2++; + *outptr0++ = res0; + } + } + } + + csi_mem_free(input_padd_buf); + return CSINN_TRUE; +} + +int csi_nn_rvv_dwconv3x3s2_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + float *kernel_data = (float *)kernel->data; + float *bias_data = (float *)bias->data; + + int32_t batch = input->dim[0]; + int32_t in_c = input->dim[1]; // group = in_channel + int32_t in_h = input->dim[2]; + int32_t in_w = input->dim[3]; + + int32_t out_c = output->dim[1]; + int32_t out_h = output->dim[2]; + int32_t out_w = output->dim[3]; + + float *input_padd_buf = + (float *)csi_mem_alloc(in_c * (in_h + params->pad_top + params->pad_down) * + (in_w + params->pad_left + params->pad_right) * sizeof(float)); + + csi_nn_rvv_pad_input_fp32( + input_data, input_padd_buf, in_c, in_h, in_w, in_h + params->pad_top + params->pad_down, + in_w + params->pad_left + params->pad_right, params->pad_top, params->pad_left); + + in_h = in_h + params->pad_top + params->pad_down; + in_w = in_w + params->pad_left + params->pad_right; + + int tailstep = in_w - 2 * out_w + in_w; + +#pragma omp parallel for num_threads(1) + for (int c = 0; c < in_c; c++) { + float *out = output_data + c * out_h * out_w; + float *outptr0 = out; + + const float bias0 = bias_data ? bias_data[c] : 0.0f; + + float *img0 = input_padd_buf + c * in_h * in_w; + float *r0 = img0; + float *r1 = r0 + in_w; + float *r2 = r1 + in_w; + + const float *kernel0 = kernel_data + c * 9; + + float k00 = kernel0[0]; + float k01 = kernel0[1]; + float k02 = kernel0[2]; + float k10 = kernel0[3]; + float k11 = kernel0[4]; + float k12 = kernel0[5]; + float k20 = kernel0[6]; + float k21 = kernel0[7]; + float k22 = kernel0[8]; + int vl; + int w_loop = csrr_vlenb() / sizeof(float); // VLEN128=4 VLEN256=8 + + for (int h = 0; h < out_h; h++) { + vl = vsetvl_e32m1(w_loop); + int w = 0; + // h1w4 loop + for (; w + w_loop - 1 < out_w; w += w_loop) { + vfloat32m1_t _acc = vfmv_v_f_f32m1(bias0, vl); + + vfloat32m1_t _r0_0_6, _r0_1_7; + vfloat32m1_t _r1_0_6, _r1_1_7; + vfloat32m1_t _r2_0_6, _r2_1_7; + + vlseg2e32_v_f32m1(&_r0_0_6, &_r0_1_7, r0, vl); + r0 += 2; + vfloat32m1_t _r0_2_8 = vlse32_v_f32m1(r0, 2 * sizeof(float), vl); + r0 += (w_loop - 1) * 2; + + vlseg2e32_v_f32m1(&_r1_0_6, &_r1_1_7, r1, vl); + r1 += 2; + vfloat32m1_t _r1_2_8 = vlse32_v_f32m1(r1, 2 * sizeof(float), vl); + r1 += (w_loop - 1) * 2; + + vlseg2e32_v_f32m1(&_r2_0_6, &_r2_1_7, r2, vl); + r2 += 2; + vfloat32m1_t _r2_2_8 = vlse32_v_f32m1(r2, 2 * sizeof(float), vl); + r2 += (w_loop - 1) * 2; + + _acc = vfmacc_vf_f32m1(_acc, k00, _r0_0_6, vl); + _acc = vfmacc_vf_f32m1(_acc, k01, _r0_1_7, vl); + _acc = vfmacc_vf_f32m1(_acc, k02, _r0_2_8, vl); + _acc = vfmacc_vf_f32m1(_acc, k10, _r1_0_6, vl); + _acc = vfmacc_vf_f32m1(_acc, k11, _r1_1_7, vl); + _acc = vfmacc_vf_f32m1(_acc, k12, _r1_2_8, vl); + _acc = vfmacc_vf_f32m1(_acc, k20, _r2_0_6, vl); + _acc = vfmacc_vf_f32m1(_acc, k21, _r2_1_7, vl); + _acc = vfmacc_vf_f32m1(_acc, k22, _r2_2_8, vl); + + vse32_v_f32m1(outptr0, _acc, vl); + outptr0 += vl; + } + + vl = vsetvl_e32m1(3); + + vfloat32m1_t _k0 = vle32_v_f32m1(kernel0, vl); + vfloat32m1_t _k1 = vle32_v_f32m1(kernel0 + 3, vl); + vfloat32m1_t _k2 = vle32_v_f32m1(kernel0 + 6, vl); + + vfloat32m1_t _tmp = vfmv_v_f_f32m1(bias0, vl); + // h1w_tail + for (; w < out_w; w++) { + vfloat32m1_t _r0 = vle32_v_f32m1(r0, vl); + vfloat32m1_t _r1 = vle32_v_f32m1(r1, vl); + vfloat32m1_t _r2 = vle32_v_f32m1(r2, vl); + + vfloat32m1_t _acc0 = vfmul_vv_f32m1(_k0, _r0, vl); + _acc0 = vfmacc_vv_f32m1(_acc0, _k1, _r1, vl); + _acc0 = vfmacc_vv_f32m1(_acc0, _k2, _r2, vl); + vfloat32m1_t _acc0_tmp = + vfredusum_vs_f32m1_f32m1(vundefined_f32m1(), _acc0, _tmp, vl); + float res0 = vfmv_f_s_f32m1_f32(_acc0_tmp); + + r0 += 2; + r1 += 2; + r2 += 2; + *outptr0++ = res0; + } + + r0 += tailstep; + r1 += tailstep; + r2 += tailstep; + } + } + + csi_mem_free(input_padd_buf); + return CSINN_TRUE; +} diff --git a/source/thead_rvv/depthwise_convolution_3x3_fp16.c b/source/thead_rvv/depthwise_convolution_3x3_fp16.c new file mode 100644 index 00000000..6af0b363 --- /dev/null +++ b/source/thead_rvv/depthwise_convolution_3x3_fp16.c @@ -0,0 +1,513 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128/256 +*************************************************************/ +int csi_nn_rvv_dwconv3x3s1_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + __fp16 *kernel_data = (__fp16 *)kernel->data; + __fp16 *bias_data = (__fp16 *)bias->data; + + int32_t batch = input->dim[0]; + int32_t in_c = input->dim[1]; // group = in_channel + int32_t in_h = input->dim[2]; + int32_t in_w = input->dim[3]; + + int32_t out_c = output->dim[1]; + int32_t out_h = output->dim[2]; + int32_t out_w = output->dim[3]; + + __fp16 *input_padd_buf = + (__fp16 *)csi_mem_alloc(in_c * (in_h + params->pad_top + params->pad_down) * + (in_w + params->pad_left + params->pad_right) * sizeof(float)); + + csi_nn_rvv_pad_input_fp16( + input_data, input_padd_buf, in_c, in_h, in_w, in_h + params->pad_top + params->pad_down, + in_w + params->pad_left + params->pad_right, params->pad_top, params->pad_left); + + in_h = in_h + params->pad_top + params->pad_down; + in_w = in_w + params->pad_left + params->pad_right; + +#pragma omp parallel for num_threads(1) + for (int c = 0; c < in_c; c++) { + __fp16 *out = output_data + c * out_h * out_w; + __fp16 *outptr0 = out; + __fp16 *outptr1 = outptr0 + out_w; + + const __fp16 bias0 = bias_data ? bias_data[c] : 0.0f; + + __fp16 *img0 = input_padd_buf + c * in_h * in_w; + __fp16 *r0 = img0; + __fp16 *r1 = r0 + in_w; + __fp16 *r2 = r1 + in_w; + __fp16 *r3 = r2 + in_w; + + const __fp16 *kernel0 = kernel_data + c * 9; + + __fp16 k00 = kernel0[0]; + __fp16 k01 = kernel0[1]; + __fp16 k02 = kernel0[2]; + __fp16 k10 = kernel0[3]; + __fp16 k11 = kernel0[4]; + __fp16 k12 = kernel0[5]; + __fp16 k20 = kernel0[6]; + __fp16 k21 = kernel0[7]; + __fp16 k22 = kernel0[8]; + + int vl; + int w_loop = csrr_vlenb() / sizeof(__fp16); // VLEN128=8 VLEN256=16 + + int h = 0; + // h2 loop + for (; h + 1 < out_h; h += 2) { + vl = vsetvl_e16m1(w_loop); + + int w = 0; + // h2w8 loop + for (; w + w_loop - 1 < out_w; w += w_loop) { + vfloat16m1_t _acc0 = vfmv_v_f_f16m1(bias0, vl); + vfloat16m1_t _acc1 = vfmv_v_f_f16m1(bias0, vl); + + vfloat16m1_t _r0_0_7 = vle16_v_f16m1(r0, vl); + vfloat16m1_t _r0_1_8 = vle16_v_f16m1(r0 + 1, vl); + vfloat16m1_t _r0_2_9 = vle16_v_f16m1(r0 + 2, vl); + + vfloat16m1_t _r1_0_7 = vle16_v_f16m1(r1, vl); + vfloat16m1_t _r1_1_8 = vle16_v_f16m1(r1 + 1, vl); + vfloat16m1_t _r1_2_9 = vle16_v_f16m1(r1 + 2, vl); + + vfloat16m1_t _r2_0_7 = vle16_v_f16m1(r2, vl); + vfloat16m1_t _r2_1_8 = vle16_v_f16m1(r2 + 1, vl); + vfloat16m1_t _r2_2_9 = vle16_v_f16m1(r2 + 2, vl); + + vfloat16m1_t _r3_0_7 = vle16_v_f16m1(r3, vl); + vfloat16m1_t _r3_1_8 = vle16_v_f16m1(r3 + 1, vl); + vfloat16m1_t _r3_2_9 = vle16_v_f16m1(r3 + 2, vl); + + _acc0 = vfmacc_vf_f16m1(_acc0, k00, _r0_0_7, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k01, _r0_1_8, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k02, _r0_2_9, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k10, _r1_0_7, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k11, _r1_1_8, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k12, _r1_2_9, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k20, _r2_0_7, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k21, _r2_1_8, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k22, _r2_2_9, vl); + + _acc1 = vfmacc_vf_f16m1(_acc1, k00, _r1_0_7, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, k01, _r1_1_8, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, k02, _r1_2_9, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, k10, _r2_0_7, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, k11, _r2_1_8, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, k12, _r2_2_9, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, k20, _r3_0_7, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, k21, _r3_1_8, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, k22, _r3_2_9, vl); + + vse16_v_f16m1(outptr0, _acc0, vl); + vse16_v_f16m1(outptr1, _acc1, vl); + + r0 += vl; + r1 += vl; + r2 += vl; + r3 += vl; + outptr0 += vl; + outptr1 += vl; + } + + // h2w4 + for (; w + w_loop / 2 - 1 < out_w; w += w_loop / 2) { + vl = vsetvl_e16m1(w_loop / 2); + + vfloat16m1_t _acc0 = vfmv_v_f_f16m1(bias0, vl); + vfloat16m1_t _acc1 = vfmv_v_f_f16m1(bias0, vl); + + vfloat16m1_t _r0_0_3 = vle16_v_f16m1(r0, vl); + vfloat16m1_t _r0_1_4 = vle16_v_f16m1(r0 + 1, vl); + vfloat16m1_t _r0_2_5 = vle16_v_f16m1(r0 + 2, vl); + + vfloat16m1_t _r1_0_3 = vle16_v_f16m1(r1, vl); + vfloat16m1_t _r1_1_4 = vle16_v_f16m1(r1 + 1, vl); + vfloat16m1_t _r1_2_5 = vle16_v_f16m1(r1 + 2, vl); + + vfloat16m1_t _r2_0_3 = vle16_v_f16m1(r2, vl); + vfloat16m1_t _r2_1_4 = vle16_v_f16m1(r2 + 1, vl); + vfloat16m1_t _r2_2_5 = vle16_v_f16m1(r2 + 2, vl); + + vfloat16m1_t _r3_0_3 = vle16_v_f16m1(r3, vl); + vfloat16m1_t _r3_1_4 = vle16_v_f16m1(r3 + 1, vl); + vfloat16m1_t _r3_2_5 = vle16_v_f16m1(r3 + 2, vl); + + _acc0 = vfmacc_vf_f16m1(_acc0, k00, _r0_0_3, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k01, _r0_1_4, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k02, _r0_2_5, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k10, _r1_0_3, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k11, _r1_1_4, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k12, _r1_2_5, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k20, _r2_0_3, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k21, _r2_1_4, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k22, _r2_2_5, vl); + + _acc1 = vfmacc_vf_f16m1(_acc1, k00, _r1_0_3, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, k01, _r1_1_4, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, k02, _r1_2_5, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, k10, _r2_0_3, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, k11, _r2_1_4, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, k12, _r2_2_5, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, k20, _r3_0_3, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, k21, _r3_1_4, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, k22, _r3_2_5, vl); + + vse16_v_f16m1(outptr0, _acc0, vl); + vse16_v_f16m1(outptr1, _acc1, vl); + + r0 += vl; + r1 += vl; + r2 += vl; + r3 += vl; + outptr0 += vl; + outptr1 += vl; + } + + vl = vsetvl_e16m1(3); + + vfloat16m1_t _k0 = vle16_v_f16m1(kernel0, vl); + vfloat16m1_t _k1 = vle16_v_f16m1(kernel0 + 3, vl); + vfloat16m1_t _k2 = vle16_v_f16m1(kernel0 + 6, vl); + + vfloat16m1_t _tmp = vfmv_v_f_f16m1(bias0, vl); + + // h2w_tail + for (; w < out_w; w++) { + vfloat16m1_t _r0 = vle16_v_f16m1(r0, vl); + vfloat16m1_t _r1 = vle16_v_f16m1(r1, vl); + vfloat16m1_t _r2 = vle16_v_f16m1(r2, vl); + vfloat16m1_t _r3 = vle16_v_f16m1(r3, vl); + + vfloat16m1_t _acc0 = vfmul_vv_f16m1(_k0, _r0, vl); + _acc0 = vfmacc_vv_f16m1(_acc0, _k1, _r1, vl); + _acc0 = vfmacc_vv_f16m1(_acc0, _k2, _r2, vl); + vfloat16m1_t _acc0_tmp = + vfredusum_vs_f16m1_f16m1(vundefined_f16m1(), _acc0, _tmp, vl); + __fp16 res0 = vfmv_f_s_f16m1_f16(_acc0_tmp); + + vfloat16m1_t _acc1 = vfmul_vv_f16m1(_k0, _r1, vl); + _acc1 = vfmacc_vv_f16m1(_acc1, _k1, _r2, vl); + _acc1 = vfmacc_vv_f16m1(_acc1, _k2, _r3, vl); + vfloat16m1_t _acc1_tmp = + vfredusum_vs_f16m1_f16m1(vundefined_f16m1(), _acc1, _tmp, vl); + __fp16 res1 = vfmv_f_s_f16m1_f16(_acc1_tmp); + + r0++; + r1++; + r2++; + r3++; + *outptr0++ = res0; + *outptr1++ = res1; + } + r0 += 2 + in_w; + r1 += 2 + in_w; + r2 += 2 + in_w; + r3 += 2 + in_w; + + outptr0 += out_w; + outptr1 += out_w; + } + + // h1 + for (; h < out_h; h++) { + vl = vsetvl_e16m1(w_loop); + int w = 0; + // h1w8 loop 使用了 v 寄存器一半位宽资源 + for (; w + w_loop - 1 < out_w; w += w_loop) { + vfloat16m1_t _acc0 = vfmv_v_f_f16m1(bias0, vl); + + vfloat16m1_t _r0_0_7 = vle16_v_f16m1(r0, vl); + vfloat16m1_t _r0_1_8 = vle16_v_f16m1(r0 + 1, vl); + vfloat16m1_t _r0_2_9 = vle16_v_f16m1(r0 + 2, vl); + + vfloat16m1_t _r1_0_7 = vle16_v_f16m1(r1, vl); + vfloat16m1_t _r1_1_8 = vle16_v_f16m1(r1 + 1, vl); + vfloat16m1_t _r1_2_9 = vle16_v_f16m1(r1 + 2, vl); + + vfloat16m1_t _r2_0_7 = vle16_v_f16m1(r2, vl); + vfloat16m1_t _r2_1_8 = vle16_v_f16m1(r2 + 1, vl); + vfloat16m1_t _r2_2_9 = vle16_v_f16m1(r2 + 2, vl); + + _acc0 = vfmacc_vf_f16m1(_acc0, k00, _r0_0_7, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k01, _r0_1_8, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k02, _r0_2_9, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k10, _r1_0_7, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k11, _r1_1_8, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k12, _r1_2_9, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k20, _r2_0_7, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k21, _r2_1_8, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k22, _r2_2_9, vl); + + vse16_v_f16m1(outptr0, _acc0, vl); + + r0 += vl; + r1 += vl; + r2 += vl; + outptr0 += vl; + } + + // h1w4 + for (; w + w_loop / 2 - 1 < out_w; w += w_loop / 2) { + vl = vsetvl_e16m1(w_loop / 2); + + vfloat16m1_t _acc0 = vfmv_v_f_f16m1(bias0, vl); + + vfloat16m1_t _r0_0_3 = vle16_v_f16m1(r0, vl); + vfloat16m1_t _r0_1_4 = vle16_v_f16m1(r0 + 1, vl); + vfloat16m1_t _r0_2_5 = vle16_v_f16m1(r0 + 2, vl); + + vfloat16m1_t _r1_0_3 = vle16_v_f16m1(r1, vl); + vfloat16m1_t _r1_1_4 = vle16_v_f16m1(r1 + 1, vl); + vfloat16m1_t _r1_2_5 = vle16_v_f16m1(r1 + 2, vl); + + vfloat16m1_t _r2_0_3 = vle16_v_f16m1(r2, vl); + vfloat16m1_t _r2_1_4 = vle16_v_f16m1(r2 + 1, vl); + vfloat16m1_t _r2_2_5 = vle16_v_f16m1(r2 + 2, vl); + + _acc0 = vfmacc_vf_f16m1(_acc0, k00, _r0_0_3, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k01, _r0_1_4, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k02, _r0_2_5, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k10, _r1_0_3, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k11, _r1_1_4, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k12, _r1_2_5, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k20, _r2_0_3, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k21, _r2_1_4, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, k22, _r2_2_5, vl); + + vse16_v_f16m1(outptr0, _acc0, vl); + + r0 += vl; + r1 += vl; + r2 += vl; + outptr0 += vl; + } + vl = vsetvl_e16m1(3); + + vfloat16m1_t _k0 = vle16_v_f16m1(kernel0, vl); + vfloat16m1_t _k1 = vle16_v_f16m1(kernel0 + 3, vl); + vfloat16m1_t _k2 = vle16_v_f16m1(kernel0 + 6, vl); + + vfloat16m1_t _tmp = vfmv_v_f_f16m1(bias0, vl); + // h1w_tail + for (; w < out_w; w++) { + vfloat16m1_t _r0 = vle16_v_f16m1(r0, vl); + vfloat16m1_t _r1 = vle16_v_f16m1(r1, vl); + vfloat16m1_t _r2 = vle16_v_f16m1(r2, vl); + + vfloat16m1_t _acc0 = vfmul_vv_f16m1(_k0, _r0, vl); + _acc0 = vfmacc_vv_f16m1(_acc0, _k1, _r1, vl); + _acc0 = vfmacc_vv_f16m1(_acc0, _k2, _r2, vl); + vfloat16m1_t _acc0_tmp = + vfredusum_vs_f16m1_f16m1(vundefined_f16m1(), _acc0, _tmp, vl); + float res0 = vfmv_f_s_f16m1_f16(_acc0_tmp); + + r0++; + r1++; + r2++; + *outptr0++ = res0; + } + } + } + csi_mem_free(input_padd_buf); + return CSINN_TRUE; +} + +int csi_nn_rvv_dwconv3x3s2_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + __fp16 *kernel_data = (__fp16 *)kernel->data; + __fp16 *bias_data = (__fp16 *)bias->data; + + int32_t batch = input->dim[0]; + int32_t in_c = input->dim[1]; // group = in_channel + int32_t in_h = input->dim[2]; + int32_t in_w = input->dim[3]; + + int32_t out_c = output->dim[1]; + int32_t out_h = output->dim[2]; + int32_t out_w = output->dim[3]; + + __fp16 *input_padd_buf = + (__fp16 *)csi_mem_alloc(in_c * (in_h + params->pad_top + params->pad_down) * + (in_w + params->pad_left + params->pad_right) * sizeof(float)); + + csi_nn_rvv_pad_input_fp16( + input_data, input_padd_buf, in_c, in_h, in_w, in_h + params->pad_top + params->pad_down, + in_w + params->pad_left + params->pad_right, params->pad_top, params->pad_left); + + in_h = in_h + params->pad_top + params->pad_down; + in_w = in_w + params->pad_left + params->pad_right; + + int tailstep = in_w - 2 * out_w + in_w; + +#pragma omp parallel for num_threads(1) + for (int c = 0; c < in_c; c++) { + __fp16 *out = output_data + c * out_h * out_w; + __fp16 *outptr0 = out; + + const __fp16 bias0 = bias_data ? bias_data[c] : 0.0f; + + __fp16 *img0 = input_padd_buf + c * in_h * in_w; + __fp16 *r0 = img0; + __fp16 *r1 = r0 + in_w; + __fp16 *r2 = r1 + in_w; + + const __fp16 *kernel0 = kernel_data + c * 9; + + __fp16 k00 = kernel0[0]; + __fp16 k01 = kernel0[1]; + __fp16 k02 = kernel0[2]; + __fp16 k10 = kernel0[3]; + __fp16 k11 = kernel0[4]; + __fp16 k12 = kernel0[5]; + __fp16 k20 = kernel0[6]; + __fp16 k21 = kernel0[7]; + __fp16 k22 = kernel0[8]; + int vl; + int w_loop = csrr_vlenb() / sizeof(__fp16); // VLEN128=8 VLEN256=16 + + for (int h = 0; h < out_h; h++) { + vl = vsetvl_e16m1(w_loop); + int w = 0; + // h1w8 loop + for (; w + w_loop - 1 < out_w; w += w_loop) { + vfloat16m1_t _acc = vfmv_v_f_f16m1(bias0, vl); + + vfloat16m1_t _r0_0_6, _r0_1_7; + vfloat16m1_t _r1_0_6, _r1_1_7; + vfloat16m1_t _r2_0_6, _r2_1_7; + + vlseg2e16_v_f16m1(&_r0_0_6, &_r0_1_7, r0, vl); + r0 += 2; + vfloat16m1_t _r0_2_8 = vlse16_v_f16m1(r0, 2 * 2, vl); + r0 += (w_loop - 1) * 2; + + vlseg2e16_v_f16m1(&_r1_0_6, &_r1_1_7, r1, vl); + r1 += 2; + vfloat16m1_t _r1_2_8 = vlse16_v_f16m1(r1, 2 * 2, vl); + r1 += (w_loop - 1) * 2; + + vlseg2e16_v_f16m1(&_r2_0_6, &_r2_1_7, r2, vl); + r2 += 2; + vfloat16m1_t _r2_2_8 = vlse16_v_f16m1(r2, 2 * 2, vl); + r2 += (w_loop - 1) * 2; + + _acc = vfmacc_vf_f16m1(_acc, k00, _r0_0_6, vl); + _acc = vfmacc_vf_f16m1(_acc, k01, _r0_1_7, vl); + _acc = vfmacc_vf_f16m1(_acc, k02, _r0_2_8, vl); + _acc = vfmacc_vf_f16m1(_acc, k10, _r1_0_6, vl); + _acc = vfmacc_vf_f16m1(_acc, k11, _r1_1_7, vl); + _acc = vfmacc_vf_f16m1(_acc, k12, _r1_2_8, vl); + _acc = vfmacc_vf_f16m1(_acc, k20, _r2_0_6, vl); + _acc = vfmacc_vf_f16m1(_acc, k21, _r2_1_7, vl); + _acc = vfmacc_vf_f16m1(_acc, k22, _r2_2_8, vl); + + vse16_v_f16m1(outptr0, _acc, vl); + outptr0 += vl; + } + + // h1w4 + for (; w + w_loop / 2 - 1 < out_w; w += w_loop / 2) { + vl = vsetvl_e16m1(w_loop / 2); + vfloat16m1_t _acc = vfmv_v_f_f16m1(bias0, vl); + + vfloat16m1_t _r0_0_3, _r0_1_4; + vfloat16m1_t _r1_0_3, _r1_1_4; + vfloat16m1_t _r2_0_3, _r2_1_4; + + vlseg2e16_v_f16m1(&_r0_0_3, &_r0_1_4, r0, vl); + r0 += 2; + vfloat16m1_t _r0_2_5 = vlse16_v_f16m1(r0, 2 * 2, vl); + r0 += w_loop - 2; + + vlseg2e16_v_f16m1(&_r1_0_3, &_r1_1_4, r1, vl); + r1 += 2; + vfloat16m1_t _r1_2_5 = vlse16_v_f16m1(r1, 2 * 2, vl); + r1 += w_loop - 2; + + vlseg2e16_v_f16m1(&_r2_0_3, &_r2_1_4, r2, vl); + r2 += 2; + vfloat16m1_t _r2_2_5 = vlse16_v_f16m1(r2, 2 * 2, vl); + r2 += w_loop - 2; + + _acc = vfmacc_vf_f16m1(_acc, k00, _r0_0_3, vl); + _acc = vfmacc_vf_f16m1(_acc, k01, _r0_1_4, vl); + _acc = vfmacc_vf_f16m1(_acc, k02, _r0_2_5, vl); + _acc = vfmacc_vf_f16m1(_acc, k10, _r1_0_3, vl); + _acc = vfmacc_vf_f16m1(_acc, k11, _r1_1_4, vl); + _acc = vfmacc_vf_f16m1(_acc, k12, _r1_2_5, vl); + _acc = vfmacc_vf_f16m1(_acc, k20, _r2_0_3, vl); + _acc = vfmacc_vf_f16m1(_acc, k21, _r2_1_4, vl); + _acc = vfmacc_vf_f16m1(_acc, k22, _r2_2_5, vl); + + vse16_v_f16m1(outptr0, _acc, vl); + outptr0 += vl; + } + vl = vsetvl_e16m1(3); + + vfloat16m1_t _k0 = vle16_v_f16m1(kernel0, vl); + vfloat16m1_t _k1 = vle16_v_f16m1(kernel0 + 3, vl); + vfloat16m1_t _k2 = vle16_v_f16m1(kernel0 + 6, vl); + + vfloat16m1_t _tmp = vfmv_v_f_f16m1(bias0, vl); + // h1w_tail + for (; w < out_w; w++) { + vfloat16m1_t _r0 = vle16_v_f16m1(r0, vl); + vfloat16m1_t _r1 = vle16_v_f16m1(r1, vl); + vfloat16m1_t _r2 = vle16_v_f16m1(r2, vl); + + vfloat16m1_t _acc0 = vfmul_vv_f16m1(_k0, _r0, vl); + _acc0 = vfmacc_vv_f16m1(_acc0, _k1, _r1, vl); + _acc0 = vfmacc_vv_f16m1(_acc0, _k2, _r2, vl); + vfloat16m1_t _acc0_tmp = + vfredusum_vs_f16m1_f16m1(vundefined_f16m1(), _acc0, _tmp, vl); + __fp16 res0 = vfmv_f_s_f16m1_f16(_acc0_tmp); + + r0 += 2; + r1 += 2; + r2 += 2; + *outptr0++ = res0; + } + + r0 += tailstep; + r1 += tailstep; + r2 += tailstep; + } + } + + csi_mem_free(input_padd_buf); + return CSINN_TRUE; +} diff --git a/source/thead_rvv/depthwise_convolution_3x3_int4.c b/source/thead_rvv/depthwise_convolution_3x3_int4.c new file mode 100644 index 00000000..fda312fb --- /dev/null +++ b/source/thead_rvv/depthwise_convolution_3x3_int4.c @@ -0,0 +1,391 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +static vint8m1_t requantize_m4(vint32m4_t _src, int32_t multiplier, int32_t shift, int32_t out_zp, + int vl) +{ + vint32m4_t _mulh = vmulh_vx_i32m4(_src, multiplier, vl); + _mulh = vssra_vx_i32m4(_mulh, -shift - 1, vl); + _mulh = vadd_vx_i32m4(_mulh, out_zp, vl); + vint16m2_t _tmp1 = vnclip_wx_i16m2(_mulh, 0, vl); + vint8m1_t _tmp2 = vnclip_wx_i8m1(_tmp1, 0, vl); + return _tmp2; +} + +int csi_nn_rvv_dwconv3x3s1_int4(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + int8_t *input_data = (int8_t *)input->data; + int8_t *output_data = (int8_t *)output->data; + int8_t *kernel_data = (int8_t *)kernel->data; + int32_t *bias_data = (int32_t *)bias->data; + + int32_t batch = input->dim[0]; + int32_t in_h = input->dim[1]; + int32_t in_w = input->dim[2]; + int32_t in_c = input->dim[3]; // group = in_channel + + int32_t out_h = output->dim[1]; + int32_t out_w = output->dim[2]; + int32_t out_c = output->dim[3]; + + int8_t *input_padd_buf = (int8_t *)csi_mem_alloc((in_h + params->pad_top + params->pad_down) * + (in_w + params->pad_left + params->pad_right) * + in_c * sizeof(int8_t)); + + int8_t pad_value = input->qinfo->zero_point; + csi_nn_rvv_pad_input_int4_trans_int8( + input_data, input_padd_buf, in_c, in_h, in_w, in_h + params->pad_top + params->pad_down, + in_w + params->pad_left + params->pad_right, params->pad_top, params->pad_left, + input->qinfo->zero_point); + + int8_t *kernel_tran_buf = (int8_t *)csi_mem_alloc(9 * in_c * sizeof(int8_t)); + int8_t *output_tran_buf = (int8_t *)csi_mem_alloc(out_h * out_w * out_c * sizeof(int8_t)); + + csi_nn_rvv_int4_trans_int8(kernel_data, kernel_tran_buf, 9 * in_c); + + in_h = in_h + params->pad_top + params->pad_down; + in_w = in_w + params->pad_left + params->pad_right; + +#pragma omp parallel for num_threads(1) + for (int c = 0; c < in_c; c++) { + int8_t *outptr0 = output_tran_buf + c; + int8_t *outptr1 = outptr0 + out_w * out_c; + + // please use fuse_zp2bias option in hhb, thus bias_data wont be NULL + int32_t bias0 = bias_data[c]; + + int8_t *img0 = input_padd_buf + c; + int8_t *r0 = img0; + int8_t *r1 = r0 + in_w * in_c; + int8_t *r2 = r1 + in_w * in_c; + int8_t *r3 = r2 + in_w * in_c; + + const int8_t *kernel0 = kernel_tran_buf + c; + + int8_t k00 = kernel0[0]; + int8_t k01 = kernel0[1 * in_c]; + int8_t k02 = kernel0[2 * in_c]; + int8_t k10 = kernel0[3 * in_c]; + int8_t k11 = kernel0[4 * in_c]; + int8_t k12 = kernel0[5 * in_c]; + int8_t k20 = kernel0[6 * in_c]; + int8_t k21 = kernel0[7 * in_c]; + int8_t k22 = kernel0[8 * in_c]; + int vl; + int h = 0; + // h2 loop + for (; h + 1 < out_h; h += 2) { + int w = out_w; + // h2w8 loop + while (w > 0) { + vl = vsetvl_e32m4(w); + vint32m4_t _acc0 = vmv_v_x_i32m4(bias0, vl); + vint32m4_t _acc1 = vmv_v_x_i32m4(bias0, vl); + + vint8m1_t _r0_0_7 = vlse8_v_i8m1(r0, in_c * sizeof(int8_t), vl); + vint8m1_t _r0_1_8 = vlse8_v_i8m1(r0 + 1 * in_c, in_c * sizeof(int8_t), vl); + vint8m1_t _r0_2_9 = vlse8_v_i8m1(r0 + 2 * in_c, in_c * sizeof(int8_t), vl); + + vint8m1_t _r1_0_7 = vlse8_v_i8m1(r1, in_c * sizeof(int8_t), vl); + vint8m1_t _r1_1_8 = vlse8_v_i8m1(r1 + 1 * in_c, in_c * sizeof(int8_t), vl); + vint8m1_t _r1_2_9 = vlse8_v_i8m1(r1 + 2 * in_c, in_c * sizeof(int8_t), vl); + + vint8m1_t _r2_0_7 = vlse8_v_i8m1(r2, in_c * sizeof(int8_t), vl); + vint8m1_t _r2_1_8 = vlse8_v_i8m1(r2 + 1 * in_c, in_c * sizeof(int8_t), vl); + vint8m1_t _r2_2_9 = vlse8_v_i8m1(r2 + 2 * in_c, in_c * sizeof(int8_t), vl); + + vint8m1_t _r3_0_7 = vlse8_v_i8m1(r3, in_c * sizeof(int8_t), vl); + vint8m1_t _r3_1_8 = vlse8_v_i8m1(r3 + 1 * in_c, in_c * sizeof(int8_t), vl); + vint8m1_t _r3_2_9 = vlse8_v_i8m1(r3 + 2 * in_c, in_c * sizeof(int8_t), vl); + + vint16m2_t _r0_0_7_w = vwadd_vx_i16m2(_r0_0_7, 0, vl); // widden 8->16 + vint16m2_t _r0_1_8_w = vwadd_vx_i16m2(_r0_1_8, 0, vl); + vint16m2_t _r0_2_9_w = vwadd_vx_i16m2(_r0_2_9, 0, vl); + + vint16m2_t _r1_0_7_w = vwadd_vx_i16m2(_r1_0_7, 0, vl); + vint16m2_t _r1_1_8_w = vwadd_vx_i16m2(_r1_1_8, 0, vl); + vint16m2_t _r1_2_9_w = vwadd_vx_i16m2(_r1_2_9, 0, vl); + + vint16m2_t _r2_0_7_w = vwadd_vx_i16m2(_r2_0_7, 0, vl); + vint16m2_t _r2_1_8_w = vwadd_vx_i16m2(_r2_1_8, 0, vl); + vint16m2_t _r2_2_9_w = vwadd_vx_i16m2(_r2_2_9, 0, vl); + + vint16m2_t _r3_0_7_w = vwadd_vx_i16m2(_r3_0_7, 0, vl); + vint16m2_t _r3_1_8_w = vwadd_vx_i16m2(_r3_1_8, 0, vl); + vint16m2_t _r3_2_9_w = vwadd_vx_i16m2(_r3_2_9, 0, vl); + + _acc0 = vwmacc_vx_i32m4(_acc0, k00, _r0_0_7_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k01, _r0_1_8_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k02, _r0_2_9_w, vl); + _acc1 = vwmacc_vx_i32m4(_acc1, k00, _r1_0_7_w, vl); + _acc1 = vwmacc_vx_i32m4(_acc1, k01, _r1_1_8_w, vl); + _acc1 = vwmacc_vx_i32m4(_acc1, k02, _r1_2_9_w, vl); + + _acc0 = vwmacc_vx_i32m4(_acc0, k10, _r1_0_7_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k11, _r1_1_8_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k12, _r1_2_9_w, vl); + _acc1 = vwmacc_vx_i32m4(_acc1, k10, _r2_0_7_w, vl); + _acc1 = vwmacc_vx_i32m4(_acc1, k11, _r2_1_8_w, vl); + _acc1 = vwmacc_vx_i32m4(_acc1, k12, _r2_2_9_w, vl); + + _acc0 = vwmacc_vx_i32m4(_acc0, k20, _r2_0_7_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k21, _r2_1_8_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k22, _r2_2_9_w, vl); + _acc1 = vwmacc_vx_i32m4(_acc1, k20, _r3_0_7_w, vl); + _acc1 = vwmacc_vx_i32m4(_acc1, k21, _r3_1_8_w, vl); + _acc1 = vwmacc_vx_i32m4(_acc1, k22, _r3_2_9_w, vl); + + vint8m1_t _res0, _res1; + if (kernel->quant_channel > 1) { + _res0 = requantize_m4(_acc0, kernel->qinfo[c].multiplier, + kernel->qinfo[c].shift, output->qinfo->zero_point, vl); + _res1 = requantize_m4(_acc1, kernel->qinfo[c].multiplier, + kernel->qinfo[c].shift, output->qinfo->zero_point, vl); + } else if (kernel->quant_channel == 1) { + _res0 = requantize_m4(_acc0, kernel->qinfo[0].multiplier, + kernel->qinfo[0].shift, output->qinfo->zero_point, vl); + _res1 = requantize_m4(_acc1, kernel->qinfo[0].multiplier, + kernel->qinfo[0].shift, output->qinfo->zero_point, vl); + } + vsse8_v_i8m1(outptr0, in_c * sizeof(int8_t), _res0, vl); + vsse8_v_i8m1(outptr1, in_c * sizeof(int8_t), _res1, vl); + + r0 += vl * in_c; + r1 += vl * in_c; + r2 += vl * in_c; + r3 += vl * in_c; + outptr0 += vl * in_c; + outptr1 += vl * in_c; + w -= vl; + } + r0 += (2 + in_w) * in_c; + r1 += (2 + in_w) * in_c; + r2 += (2 + in_w) * in_c; + r3 += (2 + in_w) * in_c; + outptr0 += out_w * in_c; + outptr1 += out_w * in_c; + } + for (; h < out_h; h++) { + int w = out_w; + // h2w8 loop + while (w > 0) { + vl = vsetvl_e32m4(w); + vint32m4_t _acc0 = vmv_v_x_i32m4(bias0, vl); + + vint8m1_t _r0_0_7 = vlse8_v_i8m1(r0, in_c * sizeof(int8_t), vl); + vint8m1_t _r0_1_8 = vlse8_v_i8m1(r0 + 1 * in_c, in_c * sizeof(int8_t), vl); + vint8m1_t _r0_2_9 = vlse8_v_i8m1(r0 + 2 * in_c, in_c * sizeof(int8_t), vl); + + vint8m1_t _r1_0_7 = vlse8_v_i8m1(r1, in_c * sizeof(int8_t), vl); + vint8m1_t _r1_1_8 = vlse8_v_i8m1(r1 + 1 * in_c, in_c * sizeof(int8_t), vl); + vint8m1_t _r1_2_9 = vlse8_v_i8m1(r1 + 2 * in_c, in_c * sizeof(int8_t), vl); + + vint8m1_t _r2_0_7 = vlse8_v_i8m1(r2, in_c * sizeof(int8_t), vl); + vint8m1_t _r2_1_8 = vlse8_v_i8m1(r2 + 1 * in_c, in_c * sizeof(int8_t), vl); + vint8m1_t _r2_2_9 = vlse8_v_i8m1(r2 + 2 * in_c, in_c * sizeof(int8_t), vl); + + vint16m2_t _r0_0_7_w = vwadd_vx_i16m2(_r0_0_7, 0, vl); // widden 8->16 + vint16m2_t _r0_1_8_w = vwadd_vx_i16m2(_r0_1_8, 0, vl); + vint16m2_t _r0_2_9_w = vwadd_vx_i16m2(_r0_2_9, 0, vl); + + vint16m2_t _r1_0_7_w = vwadd_vx_i16m2(_r1_0_7, 0, vl); + vint16m2_t _r1_1_8_w = vwadd_vx_i16m2(_r1_1_8, 0, vl); + vint16m2_t _r1_2_9_w = vwadd_vx_i16m2(_r1_2_9, 0, vl); + + vint16m2_t _r2_0_7_w = vwadd_vx_i16m2(_r2_0_7, 0, vl); + vint16m2_t _r2_1_8_w = vwadd_vx_i16m2(_r2_1_8, 0, vl); + vint16m2_t _r2_2_9_w = vwadd_vx_i16m2(_r2_2_9, 0, vl); + + _acc0 = vwmacc_vx_i32m4(_acc0, k00, _r0_0_7_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k01, _r0_1_8_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k02, _r0_2_9_w, vl); + + _acc0 = vwmacc_vx_i32m4(_acc0, k10, _r1_0_7_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k11, _r1_1_8_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k12, _r1_2_9_w, vl); + + _acc0 = vwmacc_vx_i32m4(_acc0, k20, _r2_0_7_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k21, _r2_1_8_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k22, _r2_2_9_w, vl); + + vint8m1_t _res0; + if (kernel->quant_channel > 1) { + _res0 = requantize_m4(_acc0, kernel->qinfo[c].multiplier, + kernel->qinfo[c].shift, output->qinfo->zero_point, vl); + } else if (kernel->quant_channel == 1) { + _res0 = requantize_m4(_acc0, kernel->qinfo[0].multiplier, + kernel->qinfo[0].shift, output->qinfo->zero_point, vl); + } + vsse8_v_i8m1(outptr0, in_c * sizeof(int8_t), _res0, vl); + + r0 += vl * in_c; + r1 += vl * in_c; + r2 += vl * in_c; + outptr0 += vl * in_c; + w -= vl; + } + } + } + csi_nn_rvv_int8_to_int4(output_tran_buf, output_data, out_h * out_w * in_c); + csi_mem_free(input_padd_buf); + csi_mem_free(kernel_tran_buf); + csi_mem_free(output_tran_buf); + return CSINN_TRUE; +} + +int csi_nn_rvv_dwconv3x3s2_int4(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + int8_t *input_data = (int8_t *)input->data; + int8_t *output_data = (int8_t *)output->data; + int8_t *kernel_data = (int8_t *)kernel->data; + int32_t *bias_data = (int32_t *)bias->data; + + int32_t batch = input->dim[0]; + int32_t in_h = input->dim[1]; + int32_t in_w = input->dim[2]; + int32_t in_c = input->dim[3]; + + int32_t out_h = output->dim[1]; + int32_t out_w = output->dim[2]; + int32_t out_c = output->dim[3]; + + int8_t *input_padd_buf = (int8_t *)csi_mem_alloc((in_h + params->pad_top + params->pad_down) * + (in_w + params->pad_left + params->pad_right) * + in_c * sizeof(int8_t)); + + csi_nn_rvv_pad_input_int4_trans_int8( + input_data, input_padd_buf, in_c, in_h, in_w, in_h + params->pad_top + params->pad_down, + in_w + params->pad_left + params->pad_right, params->pad_top, params->pad_left, + input->qinfo->zero_point); + + int8_t *kernel_tran_buf = (int8_t *)csi_mem_alloc(9 * in_c * sizeof(int8_t)); + int8_t *output_tran_buf = (int8_t *)csi_mem_alloc(out_h * out_w * out_c * sizeof(int8_t)); + + csi_nn_rvv_int4_trans_int8(kernel_data, kernel_tran_buf, 9 * in_c); + + in_h = in_h + params->pad_top + params->pad_down; + in_w = in_w + params->pad_left + params->pad_right; + + int tailstep = (in_w - 2 * out_w + in_w) * in_c; + +#pragma omp parallel for num_threads(1) + for (int c = 0; c < in_c; c++) { + int8_t *outptr0 = output_tran_buf + c; + + int32_t bias0 = bias_data[c]; + + int8_t *img0 = input_padd_buf + c; + int8_t *r0 = img0; + int8_t *r1 = r0 + in_w * in_c; + int8_t *r2 = r1 + in_w * in_c; + + const int8_t *kernel0 = kernel_tran_buf + c; + + int8_t k00 = kernel0[0]; + int8_t k01 = kernel0[1 * in_c]; + int8_t k02 = kernel0[2 * in_c]; + int8_t k10 = kernel0[3 * in_c]; + int8_t k11 = kernel0[4 * in_c]; + int8_t k12 = kernel0[5 * in_c]; + int8_t k20 = kernel0[6 * in_c]; + int8_t k21 = kernel0[7 * in_c]; + int8_t k22 = kernel0[8 * in_c]; + int vl; + + for (int h = 0; h < out_h; h++) { + int w = out_w; + while (w > 0) { + vl = vsetvl_e32m4(w); + vint32m4_t _acc0 = vmv_v_x_i32m4(bias0, vl); + + vint8m1_t _r0_0_7 = vlse8_v_i8m1(r0, 2 * in_c * sizeof(int8_t), vl); + r0 += in_c; + vint8m1_t _r0_1_8 = vlse8_v_i8m1(r0, 2 * in_c * sizeof(int8_t), vl); + r0 += in_c; + vint8m1_t _r0_2_9 = vlse8_v_i8m1(r0, 2 * in_c * sizeof(int8_t), vl); + r0 += (vl - 1) * 2 * in_c; + + vint8m1_t _r1_0_7 = vlse8_v_i8m1(r1, 2 * in_c * sizeof(int8_t), vl); + r1 += in_c; + vint8m1_t _r1_1_8 = vlse8_v_i8m1(r1, 2 * in_c * sizeof(int8_t), vl); + r1 += in_c; + vint8m1_t _r1_2_9 = vlse8_v_i8m1(r1, 2 * in_c * sizeof(int8_t), vl); + r1 += (vl - 1) * 2 * in_c; + + vint8m1_t _r2_0_7 = vlse8_v_i8m1(r2, 2 * in_c * sizeof(int8_t), vl); + r2 += in_c; + vint8m1_t _r2_1_8 = vlse8_v_i8m1(r2, 2 * in_c * sizeof(int8_t), vl); + r2 += in_c; + vint8m1_t _r2_2_9 = vlse8_v_i8m1(r2, 2 * in_c * sizeof(int8_t), vl); + r2 += (vl - 1) * 2 * in_c; + + vint16m2_t _r0_0_7_w = vwadd_vx_i16m2(_r0_0_7, 0, vl); // widden 8->16 + vint16m2_t _r0_1_8_w = vwadd_vx_i16m2(_r0_1_8, 0, vl); + vint16m2_t _r0_2_9_w = vwadd_vx_i16m2(_r0_2_9, 0, vl); + + vint16m2_t _r1_0_7_w = vwadd_vx_i16m2(_r1_0_7, 0, vl); + vint16m2_t _r1_1_8_w = vwadd_vx_i16m2(_r1_1_8, 0, vl); + vint16m2_t _r1_2_9_w = vwadd_vx_i16m2(_r1_2_9, 0, vl); + + vint16m2_t _r2_0_7_w = vwadd_vx_i16m2(_r2_0_7, 0, vl); + vint16m2_t _r2_1_8_w = vwadd_vx_i16m2(_r2_1_8, 0, vl); + vint16m2_t _r2_2_9_w = vwadd_vx_i16m2(_r2_2_9, 0, vl); + + _acc0 = vwmacc_vx_i32m4(_acc0, k00, _r0_0_7_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k01, _r0_1_8_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k02, _r0_2_9_w, vl); + + _acc0 = vwmacc_vx_i32m4(_acc0, k10, _r1_0_7_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k11, _r1_1_8_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k12, _r1_2_9_w, vl); + + _acc0 = vwmacc_vx_i32m4(_acc0, k20, _r2_0_7_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k21, _r2_1_8_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k22, _r2_2_9_w, vl); + + vint8m1_t _res0; + if (kernel->quant_channel > 1) { + _res0 = requantize_m4(_acc0, kernel->qinfo[c].multiplier, + kernel->qinfo[c].shift, output->qinfo->zero_point, vl); + } else if (kernel->quant_channel == 1) { + _res0 = requantize_m4(_acc0, kernel->qinfo[0].multiplier, + kernel->qinfo[0].shift, output->qinfo->zero_point, vl); + } + vsse8_v_i8m1(outptr0, in_c * sizeof(int8_t), _res0, vl); + outptr0 += vl * in_c; + w -= vl; + } + r0 += tailstep; + r1 += tailstep; + r2 += tailstep; + } + } + csi_nn_rvv_int8_to_int4(output_tran_buf, output_data, out_h * out_w * in_c); + csi_mem_free(input_padd_buf); + csi_mem_free(kernel_tran_buf); + csi_mem_free(output_tran_buf); + return CSINN_TRUE; +} \ No newline at end of file diff --git a/source/thead_rvv/depthwise_convolution_3x3_int8.c b/source/thead_rvv/depthwise_convolution_3x3_int8.c new file mode 100644 index 00000000..e6084026 --- /dev/null +++ b/source/thead_rvv/depthwise_convolution_3x3_int8.c @@ -0,0 +1,440 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +static vint8m1_t requantize_m4(vint32m4_t _src, int32_t multiplier, int32_t shift, int32_t out_zp, + int vl) +{ + vint32m4_t _mulh = vmulh_vx_i32m4(_src, multiplier, vl); + _mulh = vssra_vx_i32m4(_mulh, -shift - 1, vl); + _mulh = vadd_vx_i32m4(_mulh, out_zp, vl); + vint16m2_t _tmp1 = vnclip_wx_i16m2(_mulh, 0, vl); + vint8m1_t _tmp2 = vnclip_wx_i8m1(_tmp1, 0, vl); + return _tmp2; +} + +int csi_nn_rvv_dwconv3x3s1_int8(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + int8_t *input_data = (int8_t *)input->data; + int8_t *output_data = (int8_t *)output->data; + int8_t *kernel_data = (int8_t *)kernel->data; + int32_t *bias_data = (int32_t *)bias->data; + + int32_t batch = input->dim[0]; + int32_t in_c = input->dim[1]; // group = in_channel + int32_t in_h = input->dim[2]; + int32_t in_w = input->dim[3]; + + int32_t out_c = output->dim[1]; + int32_t out_h = output->dim[2]; + int32_t out_w = output->dim[3]; + + int8_t *input_padd_buf = (int8_t *)csi_mem_alloc((in_h + params->pad_top + params->pad_down) * + (in_w + params->pad_left + params->pad_right) * + in_c * sizeof(int8_t)); + + csi_nn_rvv_pad_input_int8(input_data, input_padd_buf, in_c, in_h, in_w, + in_h + params->pad_top + params->pad_down, + in_w + params->pad_left + params->pad_right, params->pad_top, + params->pad_left, input->qinfo->zero_point); + + in_h = in_h + params->pad_top + params->pad_down; + in_w = in_w + params->pad_left + params->pad_right; + +#pragma omp parallel for num_threads(1) + for (int c = 0; c < in_c; c++) { + int8_t *outptr0 = output_data; + int8_t *outptr1 = outptr0 + out_w; + + // please use fuse_zp2bias option in hhb, thus bias_data wont be NULL + int32_t bias0 = bias_data[c]; + + int8_t *img0 = input_padd_buf + c * in_h * in_w; + int8_t *r0 = img0; + int8_t *r1 = r0 + in_w; + int8_t *r2 = r1 + in_w; + int8_t *r3 = r2 + in_w; + + const int8_t *kernel0 = kernel_data + c * 9; + + int8_t k00 = kernel0[0]; + int8_t k01 = kernel0[1]; + int8_t k02 = kernel0[2]; + int8_t k10 = kernel0[3]; + int8_t k11 = kernel0[4]; + int8_t k12 = kernel0[5]; + int8_t k20 = kernel0[6]; + int8_t k21 = kernel0[7]; + int8_t k22 = kernel0[8]; + int vl; + int h = 0; + // h2 loop + for (; h + 1 < out_h; h += 2) { + int w = out_w; + // h2w8 loop + while (w > 0) { + vl = vsetvl_e32m4(w); + vint32m4_t _acc0 = vmv_v_x_i32m4(bias0, vl); + vint32m4_t _acc1 = vmv_v_x_i32m4(bias0, vl); + + vint8m1_t _r0_0_7 = vle8_v_i8m1(r0, vl); + vint8m1_t _r0_1_8 = vle8_v_i8m1(r0 + 1, vl); + vint8m1_t _r0_2_9 = vle8_v_i8m1(r0 + 2, vl); + + vint8m1_t _r1_0_7 = vle8_v_i8m1(r1, vl); + vint8m1_t _r1_1_8 = vle8_v_i8m1(r1 + 1, vl); + vint8m1_t _r1_2_9 = vle8_v_i8m1(r1 + 2, vl); + + vint8m1_t _r2_0_7 = vle8_v_i8m1(r2, vl); + vint8m1_t _r2_1_8 = vle8_v_i8m1(r2 + 1, vl); + vint8m1_t _r2_2_9 = vle8_v_i8m1(r2 + 2, vl); + + vint8m1_t _r3_0_7 = vle8_v_i8m1(r3, vl); + vint8m1_t _r3_1_8 = vle8_v_i8m1(r3 + 1, vl); + vint8m1_t _r3_2_9 = vle8_v_i8m1(r3 + 2, vl); + + vint16m2_t _r0_0_7_w = vwadd_vx_i16m2(_r0_0_7, 0, vl); // widden 8->16 + vint16m2_t _r0_1_8_w = vwadd_vx_i16m2(_r0_1_8, 0, vl); + vint16m2_t _r0_2_9_w = vwadd_vx_i16m2(_r0_2_9, 0, vl); + + vint16m2_t _r1_0_7_w = vwadd_vx_i16m2(_r1_0_7, 0, vl); + vint16m2_t _r1_1_8_w = vwadd_vx_i16m2(_r1_1_8, 0, vl); + vint16m2_t _r1_2_9_w = vwadd_vx_i16m2(_r1_2_9, 0, vl); + + vint16m2_t _r2_0_7_w = vwadd_vx_i16m2(_r2_0_7, 0, vl); + vint16m2_t _r2_1_8_w = vwadd_vx_i16m2(_r2_1_8, 0, vl); + vint16m2_t _r2_2_9_w = vwadd_vx_i16m2(_r2_2_9, 0, vl); + + vint16m2_t _r3_0_7_w = vwadd_vx_i16m2(_r3_0_7, 0, vl); + vint16m2_t _r3_1_8_w = vwadd_vx_i16m2(_r3_1_8, 0, vl); + vint16m2_t _r3_2_9_w = vwadd_vx_i16m2(_r3_2_9, 0, vl); + + _acc0 = vwmacc_vx_i32m4(_acc0, k00, _r0_0_7_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k01, _r0_1_8_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k02, _r0_2_9_w, vl); + _acc1 = vwmacc_vx_i32m4(_acc1, k00, _r1_0_7_w, vl); + _acc1 = vwmacc_vx_i32m4(_acc1, k01, _r1_1_8_w, vl); + _acc1 = vwmacc_vx_i32m4(_acc1, k02, _r1_2_9_w, vl); + + _acc0 = vwmacc_vx_i32m4(_acc0, k10, _r1_0_7_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k11, _r1_1_8_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k12, _r1_2_9_w, vl); + _acc1 = vwmacc_vx_i32m4(_acc1, k10, _r2_0_7_w, vl); + _acc1 = vwmacc_vx_i32m4(_acc1, k11, _r2_1_8_w, vl); + _acc1 = vwmacc_vx_i32m4(_acc1, k12, _r2_2_9_w, vl); + + _acc0 = vwmacc_vx_i32m4(_acc0, k20, _r2_0_7_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k21, _r2_1_8_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k22, _r2_2_9_w, vl); + _acc1 = vwmacc_vx_i32m4(_acc1, k20, _r3_0_7_w, vl); + _acc1 = vwmacc_vx_i32m4(_acc1, k21, _r3_1_8_w, vl); + _acc1 = vwmacc_vx_i32m4(_acc1, k22, _r3_2_9_w, vl); + + // int32_t q1z2 = (k00 + k01 + k02 + k10 + k11 + k12 + + // k20 + k21 + k22) * input->qinfo->zero_point; + // _acc0 = vsub_vx_i32m2(_acc0, q1z2, vl); + // _acc1 = vsub_vx_i32m2(_acc1, q1z2, vl); + + // vint16m1_t _mul0_0 = vwmul_vx_i16m1(_r0_0_7, k00, vl); + // vint16m1_t _mul0_1 = vwmul_vx_i16m1(_r0_1_8, k01, vl); + // vint16m1_t _mul0_2 = vwmul_vx_i16m1(_r0_2_9, k02, vl); + // vint16m1_t _mul1_0 = vwmul_vx_i16m1(_r1_0_7, k00, vl); + // vint16m1_t _mul1_1 = vwmul_vx_i16m1(_r1_1_8, k01, vl); + // vint16m1_t _mul1_2 = vwmul_vx_i16m1(_r1_2_9, k02, vl); + + // vint16m1_t _mul0_3 = vwmul_vx_i16m1(_r1_0_7, k10, vl); + // vint16m1_t _mul0_4 = vwmul_vx_i16m1(_r1_1_8, k11, vl); + // vint16m1_t _mul0_5 = vwmul_vx_i16m1(_r1_2_9, k12, vl); + // vint16m1_t _mul1_3 = vwmul_vx_i16m1(_r2_0_7, k10, vl); + // vint16m1_t _mul1_4 = vwmul_vx_i16m1(_r2_1_8, k11, vl); + // vint16m1_t _mul1_5 = vwmul_vx_i16m1(_r2_2_9, k12, vl); + + // vint16m1_t _mul0_6 = vwmul_vx_i16m1(_r2_0_7, k20, vl); + // vint16m1_t _mul0_7 = vwmul_vx_i16m1(_r2_1_8, k21, vl); + // vint16m1_t _mul0_8 = vwmul_vx_i16m1(_r2_2_9, k22, vl); + // vint16m1_t _mul1_6 = vwmul_vx_i16m1(_r3_0_7, k20, vl); + // vint16m1_t _mul1_7 = vwmul_vx_i16m1(_r3_1_8, k21, vl); + // vint16m1_t _mul1_8 = vwmul_vx_i16m1(_r3_2_9, k22, vl); + + // _acc0 = vwadd_wv_i32m2(_acc0, _mul0_0, vl); + // _acc0 = vwadd_wv_i32m2(_acc0, _mul0_1, vl); + // _acc0 = vwadd_wv_i32m2(_acc0, _mul0_2, vl); + // _acc1 = vwadd_wv_i32m2(_acc1, _mul1_0, vl); + // _acc1 = vwadd_wv_i32m2(_acc1, _mul1_1, vl); + // _acc1 = vwadd_wv_i32m2(_acc1, _mul1_2, vl); + + // _acc0 = vwadd_wv_i32m2(_acc0, _mul0_3, vl); + // _acc0 = vwadd_wv_i32m2(_acc0, _mul0_4, vl); + // _acc0 = vwadd_wv_i32m2(_acc0, _mul0_5, vl); + // _acc1 = vwadd_wv_i32m2(_acc1, _mul1_3, vl); + // _acc1 = vwadd_wv_i32m2(_acc1, _mul1_4, vl); + // _acc1 = vwadd_wv_i32m2(_acc1, _mul1_5, vl); + + // _acc0 = vwadd_wv_i32m2(_acc0, _mul0_6, vl); + // _acc0 = vwadd_wv_i32m2(_acc0, _mul0_7, vl); + // _acc0 = vwadd_wv_i32m2(_acc0, _mul0_8, vl); + // _acc1 = vwadd_wv_i32m2(_acc1, _mul1_6, vl); + // _acc1 = vwadd_wv_i32m2(_acc1, _mul1_7, vl); + // _acc1 = vwadd_wv_i32m2(_acc1, _mul1_8, vl); + vint8m1_t _res0, _res1; + if (kernel->quant_channel > 1) { + _res0 = requantize_m4(_acc0, kernel->qinfo[c].multiplier, + kernel->qinfo[c].shift, output->qinfo->zero_point, vl); + _res1 = requantize_m4(_acc1, kernel->qinfo[c].multiplier, + kernel->qinfo[c].shift, output->qinfo->zero_point, vl); + } else if (kernel->quant_channel == 1) { + _res0 = requantize_m4(_acc0, kernel->qinfo[0].multiplier, + kernel->qinfo[0].shift, output->qinfo->zero_point, vl); + _res1 = requantize_m4(_acc1, kernel->qinfo[0].multiplier, + kernel->qinfo[0].shift, output->qinfo->zero_point, vl); + } + vse8_v_i8m1(outptr0, _res0, vl); + vse8_v_i8m1(outptr1, _res1, vl); + + r0 += vl; + r1 += vl; + r2 += vl; + r3 += vl; + outptr0 += vl; + outptr1 += vl; + w -= vl; + } + r0 += 2 + in_w; + r1 += 2 + in_w; + r2 += 2 + in_w; + r3 += 2 + in_w; + outptr0 += out_w; + outptr1 += out_w; + } + for (; h < out_h; h++) { + int w = out_w; + // h2w8 loop + while (w > 0) { + vl = vsetvl_e32m4(w); + vint32m4_t _acc0 = vmv_v_x_i32m4(bias0, vl); + + vint8m1_t _r0_0_7 = vle8_v_i8m1(r0, vl); + vint8m1_t _r0_1_8 = vle8_v_i8m1(r0 + 1, vl); + vint8m1_t _r0_2_9 = vle8_v_i8m1(r0 + 2, vl); + + vint8m1_t _r1_0_7 = vle8_v_i8m1(r1, vl); + vint8m1_t _r1_1_8 = vle8_v_i8m1(r1 + 1, vl); + vint8m1_t _r1_2_9 = vle8_v_i8m1(r1 + 2, vl); + + vint8m1_t _r2_0_7 = vle8_v_i8m1(r2, vl); + vint8m1_t _r2_1_8 = vle8_v_i8m1(r2 + 1, vl); + vint8m1_t _r2_2_9 = vle8_v_i8m1(r2 + 2, vl); + + vint16m2_t _r0_0_7_w = vwadd_vx_i16m2(_r0_0_7, 0, vl); // widden 8->16 + vint16m2_t _r0_1_8_w = vwadd_vx_i16m2(_r0_1_8, 0, vl); + vint16m2_t _r0_2_9_w = vwadd_vx_i16m2(_r0_2_9, 0, vl); + + vint16m2_t _r1_0_7_w = vwadd_vx_i16m2(_r1_0_7, 0, vl); + vint16m2_t _r1_1_8_w = vwadd_vx_i16m2(_r1_1_8, 0, vl); + vint16m2_t _r1_2_9_w = vwadd_vx_i16m2(_r1_2_9, 0, vl); + + vint16m2_t _r2_0_7_w = vwadd_vx_i16m2(_r2_0_7, 0, vl); + vint16m2_t _r2_1_8_w = vwadd_vx_i16m2(_r2_1_8, 0, vl); + vint16m2_t _r2_2_9_w = vwadd_vx_i16m2(_r2_2_9, 0, vl); + + _acc0 = vwmacc_vx_i32m4(_acc0, k00, _r0_0_7_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k01, _r0_1_8_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k02, _r0_2_9_w, vl); + + _acc0 = vwmacc_vx_i32m4(_acc0, k10, _r1_0_7_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k11, _r1_1_8_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k12, _r1_2_9_w, vl); + + _acc0 = vwmacc_vx_i32m4(_acc0, k20, _r2_0_7_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k21, _r2_1_8_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k22, _r2_2_9_w, vl); + + vint8m1_t _res0; + if (kernel->quant_channel > 1) { + _res0 = requantize_m4(_acc0, kernel->qinfo[c].multiplier, + kernel->qinfo[c].shift, output->qinfo->zero_point, vl); + } else if (kernel->quant_channel == 1) { + _res0 = requantize_m4(_acc0, kernel->qinfo[0].multiplier, + kernel->qinfo[0].shift, output->qinfo->zero_point, vl); + } + vse8_v_i8m1(outptr0, _res0, vl); + + r0 += vl; + r1 += vl; + r2 += vl; + outptr0 += vl; + w -= vl; + } + } + output_data += out_h * out_w; + } + csi_mem_free(input_padd_buf); + return CSINN_TRUE; +} + +int csi_nn_rvv_dwconv3x3s2_int8(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *kernel, struct csi_tensor *bias, + struct conv2d_params *params) +{ + int8_t *input_data = (int8_t *)input->data; + int8_t *output_data = (int8_t *)output->data; + int8_t *kernel_data = (int8_t *)kernel->data; + int32_t *bias_data = (int32_t *)bias->data; + + int32_t batch = input->dim[0]; + int32_t in_c = input->dim[1]; // group = in_channel + int32_t in_h = input->dim[2]; + int32_t in_w = input->dim[3]; + + int32_t out_c = output->dim[1]; + int32_t out_h = output->dim[2]; + int32_t out_w = output->dim[3]; + + int8_t *input_padd_buf = (int8_t *)csi_mem_alloc((in_h + params->pad_top + params->pad_down) * + (in_w + params->pad_left + params->pad_right) * + in_c * sizeof(int8_t)); + + csi_nn_rvv_pad_input_int8(input_data, input_padd_buf, in_c, in_h, in_w, + in_h + params->pad_top + params->pad_down, + in_w + params->pad_left + params->pad_right, params->pad_top, + params->pad_left, input->qinfo->zero_point); + + in_h = in_h + params->pad_top + params->pad_down; + in_w = in_w + params->pad_left + params->pad_right; + + int tailstep = in_w - 2 * out_w + in_w; + +#pragma omp parallel for num_threads(1) + for (int c = 0; c < in_c; c++) { + int8_t *outptr0 = output_data; + + int32_t bias0 = bias_data[c]; + + int8_t *img0 = input_padd_buf + c * in_h * in_w; + int8_t *r0 = img0; + int8_t *r1 = r0 + in_w; + int8_t *r2 = r1 + in_w; + + const int8_t *kernel0 = kernel_data + c * 9; + + int8_t k00 = kernel0[0]; + int8_t k01 = kernel0[1]; + int8_t k02 = kernel0[2]; + int8_t k10 = kernel0[3]; + int8_t k11 = kernel0[4]; + int8_t k12 = kernel0[5]; + int8_t k20 = kernel0[6]; + int8_t k21 = kernel0[7]; + int8_t k22 = kernel0[8]; + int vl; + + for (int h = 0; h < out_h; h++) { + int w = out_w; + while (w > 0) { + vl = vsetvl_e32m4(w); + vint32m4_t _acc0 = vmv_v_x_i32m4(bias0, vl); + + // vint8mf2_t _r0_0_7, _r0_1_8; + // vint8mf2_t _r1_0_7, _r1_1_8; + // vint8mf2_t _r2_0_7, _r2_1_8; + // vlseg2e8_v_i8mf2(&_r0_0_7, &_r0_1_8, r0, vl); + // r0 += 2; + // vint8mf2_t _r0_2_9 = vlse8_v_i8mf2(r0, 2 * sizeof(int8_t), vl); + // r0 += (vl - 1) * 2; + + // vlseg2e8_v_i8mf2(&_r1_0_7, &_r1_1_8, r1, vl); + // r1 += 2; + // vint8mf2_t _r1_2_9 = vlse8_v_i8mf2(r1, 2 * sizeof(int8_t), vl); + // r1 += (vl - 1) * 2; + + // vlseg2e8_v_i8mf2(&_r2_0_7, &_r2_1_8, r2, vl); + // r2 += 2; + // vint8mf2_t _r2_2_9 = vlse8_v_i8mf2(r2, 2 * sizeof(int8_t), vl); + // r2 += (vl - 1) * 2; + + vint8m1_t _r0_0_7 = vlse8_v_i8m1(r0, 2 * sizeof(int8_t), vl); + r0 += 1; + vint8m1_t _r0_1_8 = vlse8_v_i8m1(r0, 2 * sizeof(int8_t), vl); + r0 += 1; + vint8m1_t _r0_2_9 = vlse8_v_i8m1(r0, 2 * sizeof(int8_t), vl); + r0 += (vl - 1) * 2; + + vint8m1_t _r1_0_7 = vlse8_v_i8m1(r1, 2 * sizeof(int8_t), vl); + r1 += 1; + vint8m1_t _r1_1_8 = vlse8_v_i8m1(r1, 2 * sizeof(int8_t), vl); + r1 += 1; + vint8m1_t _r1_2_9 = vlse8_v_i8m1(r1, 2 * sizeof(int8_t), vl); + r1 += (vl - 1) * 2; + + vint8m1_t _r2_0_7 = vlse8_v_i8m1(r2, 2 * sizeof(int8_t), vl); + r2 += 1; + vint8m1_t _r2_1_8 = vlse8_v_i8m1(r2, 2 * sizeof(int8_t), vl); + r2 += 1; + vint8m1_t _r2_2_9 = vlse8_v_i8m1(r2, 2 * sizeof(int8_t), vl); + r2 += (vl - 1) * 2; + + vint16m2_t _r0_0_7_w = vwadd_vx_i16m2(_r0_0_7, 0, vl); // widden 8->16 + vint16m2_t _r0_1_8_w = vwadd_vx_i16m2(_r0_1_8, 0, vl); + vint16m2_t _r0_2_9_w = vwadd_vx_i16m2(_r0_2_9, 0, vl); + + vint16m2_t _r1_0_7_w = vwadd_vx_i16m2(_r1_0_7, 0, vl); + vint16m2_t _r1_1_8_w = vwadd_vx_i16m2(_r1_1_8, 0, vl); + vint16m2_t _r1_2_9_w = vwadd_vx_i16m2(_r1_2_9, 0, vl); + + vint16m2_t _r2_0_7_w = vwadd_vx_i16m2(_r2_0_7, 0, vl); + vint16m2_t _r2_1_8_w = vwadd_vx_i16m2(_r2_1_8, 0, vl); + vint16m2_t _r2_2_9_w = vwadd_vx_i16m2(_r2_2_9, 0, vl); + + _acc0 = vwmacc_vx_i32m4(_acc0, k00, _r0_0_7_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k01, _r0_1_8_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k02, _r0_2_9_w, vl); + + _acc0 = vwmacc_vx_i32m4(_acc0, k10, _r1_0_7_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k11, _r1_1_8_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k12, _r1_2_9_w, vl); + + _acc0 = vwmacc_vx_i32m4(_acc0, k20, _r2_0_7_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k21, _r2_1_8_w, vl); + _acc0 = vwmacc_vx_i32m4(_acc0, k22, _r2_2_9_w, vl); + + vint8m1_t _res0; + if (kernel->quant_channel > 1) { + _res0 = requantize_m4(_acc0, kernel->qinfo[c].multiplier, + kernel->qinfo[c].shift, output->qinfo->zero_point, 16); + } else if (kernel->quant_channel == 1) { + _res0 = requantize_m4(_acc0, kernel->qinfo[0].multiplier, + kernel->qinfo[0].shift, output->qinfo->zero_point, 16); + } + vse8_v_i8m1(outptr0, _res0, vl); + outptr0 += vl; + w -= vl; + } + r0 += tailstep; + r1 += tailstep; + r2 += tailstep; + } + output_data += out_h * out_w; + } + csi_mem_free(input_padd_buf); + return CSINN_TRUE; +} diff --git a/source/thead_rvv/fullyconnected.c b/source/thead_rvv/fullyconnected.c new file mode 100644 index 00000000..78728d82 --- /dev/null +++ b/source/thead_rvv/fullyconnected.c @@ -0,0 +1,148 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128/256 +*************************************************************/ +static void csi_nn_rvv_reorder_weight_npackn_fp32(float *src, float *dst, int m, int k, int ldx) +{ + int packn = csrr_vlenb() / sizeof(float); // VLEN128=4 VLEN256=8 + int vl = vsetvl_e32m1(packn); + int i = 0; + for (; i + packn - 1 < m; i += packn) { + float *in_ptr = src + i * k; + for (int j = 0; j < k; j++) { + vfloat32m1_t _input = vlse32_v_f32m1(in_ptr, k * sizeof(float), vl); + in_ptr++; + vse32_v_f32m1(dst, _input, vl); + dst += packn; + } + } + src += i * k; + for (; i < m; i++) { + memcpy(dst, src, sizeof(float) * k); + dst += k; + src += k; + } +} + +void csi_nn_rvv_fc_gemv_transform_weight_fp32(struct csi_tensor *weights) +{ + float *weight_data = (float *)weights->data; + + int n = weights->dim[0]; // out_nodes + int k = weights->dim[1]; // in_nodes + + float *pa_reorder = (float *)csi_mem_alloc(n * k * sizeof(float)); + csi_nn_rvv_reorder_weight_npackn_fp32(weight_data, pa_reorder, n, k, k); + memcpy(weight_data, pa_reorder, n * k * sizeof(float)); + csi_mem_free(pa_reorder); +} + +int csi_nn_rvv_fullyconnected_packn_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, + struct fc_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + float *weights_data = (float *)weights->data; + float *bias_data = (float *)bias->data; + const int output_dims_count = output->dim_count; + const int weights_dims_count = weights->dim_count; + const int bias_dims_count = bias->dim_count; + int batches = 1; + /* compute the outer size */ + for (int i = 0; i < output_dims_count - 1; i++) { + batches *= output->dim[i]; + } + int output_depth = weights->dim[weights_dims_count - 2]; // output_nodes + int accum_depth = weights->dim[weights_dims_count - 1]; // input_nodes + + bool flag_bias = 1; // default: fc layer include bias + if (bias_data == NULL) { + flag_bias = 0; + bias_data = (float *)csi_mem_alloc(output_depth * 2); + } + int packn = csrr_vlenb() / sizeof(float); // VLEN128=4 VLEN256=8 + int vl; + + for (int b = 0; b < batches; b++) { + float *init_output = output_data + b * output_depth; + float *init_input = input_data + b * accum_depth; + float *init_weight = weights_data; + float *init_bias = bias_data; + + vl = vsetvl_e32m1(packn); + int n = 0; + for (; n + packn - 1 < output_depth; n += packn) { + float *in_ptr = init_input; + vfloat32m1_t _acc = vle32_v_f32m1(init_bias, vl); + init_bias += vl; + + for (int k = 0; k < accum_depth; k++) { + vfloat32m1_t _weight = vle32_v_f32m1(init_weight, vl); + _acc = vfmacc_vf_f32m1(_acc, in_ptr[k], _weight, vl); + init_weight += vl; + } + vse32_v_f32m1(init_output, _acc, vl); + init_output += vl; + } + for (; n < output_depth; n++) { + float *in_ptr = init_input; + float acc = init_bias[0]; + for (int k = 0; k < accum_depth; k++) { + acc += in_ptr[k] * init_weight[k]; + } + *init_output++ = acc; + init_bias++; + init_weight += accum_depth; + } + } + if (!flag_bias) { + csi_mem_free(bias_data); + bias_data = NULL; + } + return CSINN_TRUE; +} + +int csi_nn_rvv_fullyconnected_init(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, + struct fc_params *params) +{ + if (input->dtype == CSINN_DTYPE_FLOAT32) { + csi_nn_rvv_fc_gemv_transform_weight_fp32(weights); + params->base.bc = csi_nn_rvv_fullyconnected_packn_fp32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + csi_nn_rvv_fc_gemv_transform_weight_fp16(weights); + params->base.bc = csi_nn_rvv_fullyconnected_packn_fp16; + } else if (input->dtype == CSINN_DTYPE_INT8) { + csi_nn_rvv_fc_gemv_transform_weight_int8(weights); + // support channel quantization + for (int i = 0; i < weights->quant_channel; i++) { + float real_scale = input->qinfo->scale * weights->qinfo[i].scale / output->qinfo->scale; + csi_quantize_multiplier(real_scale, &(weights->qinfo[i].multiplier), + &(weights->qinfo[i].shift)); + } + params->base.bc = csi_nn_rvv_fullyconnected_packn_int8; + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/fullyconnected_fp16.c b/source/thead_rvv/fullyconnected_fp16.c new file mode 100644 index 00000000..802e6a5c --- /dev/null +++ b/source/thead_rvv/fullyconnected_fp16.c @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128/256 +*************************************************************/ +static void csi_nn_rvv_reorder_weight_npackn_fp16(__fp16 *src, __fp16 *dst, int m, int k, int ldx) +{ + int packn = csrr_vlenb() / sizeof(__fp16); // VLEN128=8 VLEN256=16 + int vl = vsetvl_e16m1(packn); + int i = 0; + for (; i + packn - 1 < m; i += packn) { + __fp16 *in_ptr = src + i * k; + for (int j = 0; j < k; j++) { + vfloat16m1_t _input = vlse16_v_f16m1(in_ptr, k * sizeof(__fp16), vl); + in_ptr++; + vse16_v_f16m1(dst, _input, vl); + dst += packn; + } + } + src += i * k; + for (; i < m; i++) { + memcpy(dst, src, sizeof(__fp16) * k); + dst += k; + src += k; + } +} + +void csi_nn_rvv_fc_gemv_transform_weight_fp16(struct csi_tensor *weights) +{ + __fp16 *weight_data = (__fp16 *)weights->data; + + int n = weights->dim[0]; // out_nodes + int k = weights->dim[1]; // in_nodes + + __fp16 *pa_reorder = (__fp16 *)csi_mem_alloc(n * k * sizeof(__fp16)); + csi_nn_rvv_reorder_weight_npackn_fp16(weight_data, pa_reorder, n, k, k); + memcpy(weight_data, pa_reorder, n * k * sizeof(__fp16)); + csi_mem_free(pa_reorder); +} + +int csi_nn_rvv_fullyconnected_packn_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, + struct fc_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + __fp16 *weights_data = (__fp16 *)weights->data; + __fp16 *bias_data = (__fp16 *)bias->data; + const int output_dims_count = output->dim_count; + const int weights_dims_count = weights->dim_count; + const int bias_dims_count = bias->dim_count; + int batches = 1; + /* compute the outer size */ + for (int i = 0; i < output_dims_count - 1; i++) { + batches *= output->dim[i]; + } + int output_depth = weights->dim[weights_dims_count - 2]; // output_nodes + int accum_depth = weights->dim[weights_dims_count - 1]; // input_nodes + + bool flag_bias = 1; // default: fc layer include bias + if (bias_data == NULL) { + flag_bias = 0; + bias_data = (__fp16 *)csi_mem_alloc(output_depth * 2); + } + + int packn = csrr_vlenb() / sizeof(__fp16); // VLEN128=8 VLEN256=16 + int vl; + + for (int b = 0; b < batches; b++) { + __fp16 *init_output = output_data + b * output_depth; + __fp16 *init_input = input_data + b * accum_depth; + __fp16 *init_weight = weights_data; + __fp16 *init_bias = bias_data; + + vl = vsetvl_e16m1(packn); + int n = 0; + for (; n + packn - 1 < output_depth; n += packn) { + __fp16 *in_ptr = init_input; + vfloat16m1_t _acc = vle16_v_f16m1(init_bias, vl); + init_bias += vl; + + for (int k = 0; k < accum_depth; k++) { + vfloat16m1_t _weight = vle16_v_f16m1(init_weight, vl); + _acc = vfmacc_vf_f16m1(_acc, in_ptr[k], _weight, vl); + init_weight += vl; + } + vse16_v_f16m1(init_output, _acc, vl); + init_output += vl; + } + for (; n < output_depth; n++) { + __fp16 *in_ptr = init_input; + __fp16 acc = init_bias[0]; + for (int k = 0; k < accum_depth; k++) { + acc += in_ptr[k] * init_weight[k]; + } + *init_output++ = acc; + init_bias++; + init_weight += accum_depth; + } + } + if (!flag_bias) { + csi_mem_free(bias_data); + bias_data = NULL; + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/fullyconnected_int8.c b/source/thead_rvv/fullyconnected_int8.c new file mode 100644 index 00000000..729e9b7f --- /dev/null +++ b/source/thead_rvv/fullyconnected_int8.c @@ -0,0 +1,146 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128/256 +*************************************************************/ +static void csi_nn_rvv_reorder_weight_packn_int8(int8_t *src, int8_t *dst, int m, int k, int ldx) +{ + int packn = csrr_vlenb() / sizeof(int8_t); // VLEN128=16 VLEN256=32 + int vl = vsetvl_e8m1(packn); + int i = 0; + for (; i + packn - 1 < m; i += packn) { + int8_t *in_ptr = src + i * k; + for (int j = 0; j < k; j++) { + vint8m1_t _input = vlse8_v_i8m1(in_ptr, k * sizeof(int8_t), vl); + in_ptr++; + vse8_v_i8m1(dst, _input, vl); + dst += packn; + } + } + if (i < m) { + vl = vsetvl_e8m1(m & (packn - 1)); + int8_t *in_ptr = src + i * k; + for (int j = 0; j < k; j++) { + vint8m1_t _input = vlse8_v_i8m1(in_ptr, k * sizeof(int8_t), vl); + in_ptr++; + vse8_v_i8m1(dst, _input, vl); + dst += vl; + } + } +} + +void csi_nn_rvv_fc_gemv_transform_weight_int8(struct csi_tensor *weights) +{ + int8_t *weight_data = (int8_t *)weights->data; + + int n = weights->dim[0]; // out_nodes + int k = weights->dim[1]; // in_nodes + + int8_t *pa_reorder = (int8_t *)csi_mem_alloc(n * k * sizeof(int8_t)); + csi_nn_rvv_reorder_weight_packn_int8(weight_data, pa_reorder, n, k, k); + memcpy(weight_data, pa_reorder, n * k * sizeof(int8_t)); + csi_mem_free(pa_reorder); +} + +static void csi_nn_rvv_fullyconnectd_packn_int8_internel(const int8_t *input, int32_t *output, + int8_t *weight, const int32_t *bias, + int in_nodes, int out_nodes) +{ + int i = 0; + int packn = csrr_vlenb() / sizeof(int8_t); + int vl = vsetvl_e8m1(packn); + for (; i + packn - 1 < out_nodes; i += packn) { + vint32m4_t _acc = vle32_v_i32m4(bias, vl); + for (int j = 0; j < in_nodes; j++) { + vint8m1_t _weight = vle8_v_i8m1(weight, vl); + vint16m2_t _mul = vwmul_vx_i16m2(_weight, input[j], vl); + _acc = vwmacc_vx_i32m4(_acc, 1, _mul, vl); + weight += vl; + } + bias += vl; + vse32_v_i32m4(output, _acc, vl); + output += vl; + } + if (i < out_nodes) { + vl = vsetvl_e32m4(out_nodes & (packn - 1)); // tail out_node + vint32m4_t _acc = vle32_v_i32m4(bias, vl); + for (int j = 0; j < in_nodes; j++) { + vint8m1_t _weight = vle8_v_i8m1(weight, vl); + vint16m2_t _mul = vwmul_vx_i16m2(_weight, input[j], vl); + _acc = vwmacc_vx_i32m4(_acc, 1, _mul, vl); + weight += vl; + } + vse32_v_i32m4(output, _acc, vl); + } +} + +int csi_nn_rvv_fullyconnected_packn_int8(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weights, struct csi_tensor *bias, + struct fc_params *params) +{ + int8_t *input_data = (int8_t *)input->data; + int8_t *output_data = (int8_t *)output->data; + int8_t *weights_data = (int8_t *)weights->data; + int32_t *bias_data = (int32_t *)bias->data; + const int output_dims_count = output->dim_count; + const int weights_dims_count = weights->dim_count; + const int bias_dims_count = bias->dim_count; + int batches = 1; + /* compute the outer size */ + for (int i = 0; i < output_dims_count - 1; i++) { + batches *= output->dim[i]; + } + const int output_depth = weights->dim[weights_dims_count - 2]; // output_nodes + const int accum_depth = weights->dim[weights_dims_count - 1]; // input_nodes + + int32_t *output_tmp = (int32_t *)csi_mem_alloc(output_depth * sizeof(int32_t)); + int vl; + + for (int b = 0; b < batches; b++) { + int8_t *input_ptr = input_data + b * accum_depth; + int8_t *weight_ptr = weights_data; + int32_t *bias_ptr = bias_data; + int32_t *output_ptr = output_tmp; + + csi_nn_rvv_fullyconnectd_packn_int8_internel(input_ptr, output_ptr, weight_ptr, bias_ptr, + accum_depth, output_depth); + + if (weights->quant_channel == 1) { + csi_nn_rvv_requantize(output_ptr, weights->qinfo->multiplier, weights->qinfo->shift, + output_depth); + } else if (weights->quant_channel == output_depth) { + // support channel quantization + for (int c = 0; c < weights->quant_channel; c++) { + csi_nn_rvv_requantize(output_ptr + c, weights->qinfo[c].multiplier, + weights->qinfo[c].shift, 1); + } + } + csi_nn_rvv_saturated_int8(output_ptr, output_data + b * output_depth, + output->qinfo->zero_point, output_depth); + } + if (output_tmp) { + csi_mem_free(output_tmp); + output_tmp = NULL; + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/gemm_fp16.c b/source/thead_rvv/gemm_fp16.c new file mode 100644 index 00000000..c707509e --- /dev/null +++ b/source/thead_rvv/gemm_fp16.c @@ -0,0 +1,1149 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128 +*************************************************************/ +void csi_nn_rvv_reorder_kernel_n8_fp16(__fp16 *a, __fp16 *sa, int m, int k, int ldx) +{ + int i = 0; + for (; i + 7 < m; i += 8) { + for (int j = 0; j < k; j++) { + sa[i * k + 8 * j + 0] = a[(i + 0) * k + j]; + sa[i * k + 8 * j + 1] = a[(i + 1) * k + j]; + sa[i * k + 8 * j + 2] = a[(i + 2) * k + j]; + sa[i * k + 8 * j + 3] = a[(i + 3) * k + j]; + sa[i * k + 8 * j + 4] = a[(i + 4) * k + j]; + sa[i * k + 8 * j + 5] = a[(i + 5) * k + j]; + sa[i * k + 8 * j + 6] = a[(i + 6) * k + j]; + sa[i * k + 8 * j + 7] = a[(i + 7) * k + j]; + } + } + + for (; i + 3 < m; i += 4) { + for (int j = 0; j < k; j++) { + sa[i * k + 4 * j + 0] = a[(i + 0) * k + j]; + sa[i * k + 4 * j + 1] = a[(i + 1) * k + j]; + sa[i * k + 4 * j + 2] = a[(i + 2) * k + j]; + sa[i * k + 4 * j + 3] = a[(i + 3) * k + j]; + } + } + + for (; i + 1 < m; i += 2) { + for (int j = 0; j < k; j++) { + sa[i * k + 2 * j + 0] = a[(i + 0) * k + j]; + sa[i * k + 2 * j + 1] = a[(i + 1) * k + j]; + } + } + + for (; i < m; i++) { + for (int j = 0; j < k; j++) { + sa[i * k + 1 * j + 0] = a[(i + 0) * k + j]; + } + } +} + +void csi_nn_rvv_reorder_input_z16_fp16(__fp16 *b, __fp16 *sb, int k, int n, int ldx) +{ + int vl = vsetvl_e16m2(16); + __fp16 *b0 = NULL; + int i = 0; + for (; i + 15 < n; i += 16) { + b0 = b + i; + for (int j = 0; j < k; j++) { + vfloat16m2_t _tmp = vle16_v_f16m2(b0, vl); + b0 += ldx; + vse16_v_f16m2(sb, _tmp, vl); + sb += 16; + } + } + + for (; i + 7 < n; i += 8) { + vl = vsetvl_e16m1(8); + b0 = b + i; + for (int j = 0; j < k; j++) { + vfloat16m1_t _tmp = vle16_v_f16m1(b0, vl); + b0 += ldx; + vse16_v_f16m1(sb, _tmp, vl); + sb += 8; + } + } + + for (; i < n; i++) { + vl = vsetvl_e16m2(16); + b0 = b + i; + int j = 0; + for (; j + 15 < k; j += 16) { + vfloat16m2_t _tmp = vlse16_v_f16m2(b0, ldx * sizeof(__fp16), vl); + b0 += 16 * ldx; + vse16_v_f16m2(sb, _tmp, vl); + sb += 16; + } + if (j < k) { + vl = vsetvl_e16m2(k & 15); + vfloat16m2_t _tmp = vlse16_v_f16m2(b0, ldx * sizeof(__fp16), vl); + vse16_v_f16m2(sb, _tmp, vl); + sb += vl; + } + } +} + +void csi_nn_rvv_gemm_8x16_fp16(__fp16 *dst, const __fp16 *sa, const __fp16 *sb, int m, int k, int n, + int ldc, __fp16 *bias) +{ + __fp16 *kernel_data = (__fp16 *)sa; + __fp16 *input_data = (__fp16 *)sb; + __fp16 *output_data = dst; + + int flag_bias = 1; // default: conv2d layer include bias + if (bias == NULL) { + flag_bias = 0; + bias = (__fp16 *)csi_mem_alloc(m * sizeof(__fp16)); + } + __fp16 *bias_ptr = bias; + + int vl; + + int i = 0; + // m8 loop + for (; i + 7 < m; i += 8) { + vl = vsetvl_e16m2(16); + + __fp16 *in_ptr = input_data; + + __fp16 *out_ptr0 = output_data; + __fp16 *out_ptr1 = out_ptr0 + ldc; + __fp16 *out_ptr2 = out_ptr1 + ldc; + __fp16 *out_ptr3 = out_ptr2 + ldc; + __fp16 *out_ptr4 = out_ptr3 + ldc; + __fp16 *out_ptr5 = out_ptr4 + ldc; + __fp16 *out_ptr6 = out_ptr5 + ldc; + __fp16 *out_ptr7 = out_ptr6 + ldc; + + int j = 0; + // m8n16 loop + for (; j + 15 < n; j += 16) { + __fp16 *kernel_ptr = kernel_data; + vfloat16m2_t _acc0 = vfmv_v_f_f16m2(bias_ptr[0], vl); + vfloat16m2_t _acc1 = vfmv_v_f_f16m2(bias_ptr[1], vl); + vfloat16m2_t _acc2 = vfmv_v_f_f16m2(bias_ptr[2], vl); + vfloat16m2_t _acc3 = vfmv_v_f_f16m2(bias_ptr[3], vl); + vfloat16m2_t _acc4 = vfmv_v_f_f16m2(bias_ptr[4], vl); + vfloat16m2_t _acc5 = vfmv_v_f_f16m2(bias_ptr[5], vl); + vfloat16m2_t _acc6 = vfmv_v_f_f16m2(bias_ptr[6], vl); + vfloat16m2_t _acc7 = vfmv_v_f_f16m2(bias_ptr[7], vl); // init acc with bias_data + + for (int c = 0; c < k; c++) { + vfloat16m2_t _input = vle16_v_f16m2(in_ptr, vl); + + __fp16 k0 = kernel_ptr[0]; + __fp16 k1 = kernel_ptr[1]; + __fp16 k2 = kernel_ptr[2]; + __fp16 k3 = kernel_ptr[3]; + __fp16 k4 = kernel_ptr[4]; + __fp16 k5 = kernel_ptr[5]; + __fp16 k6 = kernel_ptr[6]; + __fp16 k7 = kernel_ptr[7]; + + _acc0 = vfmacc_vf_f16m2(_acc0, k0, _input, vl); + _acc1 = vfmacc_vf_f16m2(_acc1, k1, _input, vl); + _acc2 = vfmacc_vf_f16m2(_acc2, k2, _input, vl); + _acc3 = vfmacc_vf_f16m2(_acc3, k3, _input, vl); + _acc4 = vfmacc_vf_f16m2(_acc4, k4, _input, vl); + _acc5 = vfmacc_vf_f16m2(_acc5, k5, _input, vl); + _acc6 = vfmacc_vf_f16m2(_acc6, k6, _input, vl); + _acc7 = vfmacc_vf_f16m2(_acc7, k7, _input, vl); + + kernel_ptr += 8; + in_ptr += 16; + } + vse16_v_f16m2(out_ptr0, _acc0, vl); + vse16_v_f16m2(out_ptr1, _acc1, vl); + vse16_v_f16m2(out_ptr2, _acc2, vl); + vse16_v_f16m2(out_ptr3, _acc3, vl); + vse16_v_f16m2(out_ptr4, _acc4, vl); + vse16_v_f16m2(out_ptr5, _acc5, vl); + vse16_v_f16m2(out_ptr6, _acc6, vl); + vse16_v_f16m2(out_ptr7, _acc7, vl); + out_ptr0 += 16; + out_ptr1 += 16; + out_ptr2 += 16; + out_ptr3 += 16; + out_ptr4 += 16; + out_ptr5 += 16; + out_ptr6 += 16; + out_ptr7 += 16; + } + + vl = vsetvl_e16m1(8); + + // m8n8 + for (; j + 7 < n; j += 8) { + __fp16 *kernel_ptr = kernel_data; + vfloat16m1_t _acc0 = vfmv_v_f_f16m1(bias_ptr[0], vl); + vfloat16m1_t _acc1 = vfmv_v_f_f16m1(bias_ptr[1], vl); + vfloat16m1_t _acc2 = vfmv_v_f_f16m1(bias_ptr[2], vl); + vfloat16m1_t _acc3 = vfmv_v_f_f16m1(bias_ptr[3], vl); + vfloat16m1_t _acc4 = vfmv_v_f_f16m1(bias_ptr[4], vl); + vfloat16m1_t _acc5 = vfmv_v_f_f16m1(bias_ptr[5], vl); + vfloat16m1_t _acc6 = vfmv_v_f_f16m1(bias_ptr[6], vl); + vfloat16m1_t _acc7 = vfmv_v_f_f16m1(bias_ptr[7], vl); // init acc with bias_data + + for (int c = 0; c < k; c++) { + vfloat16m1_t _input = vle16_v_f16m1(in_ptr, vl); + + __fp16 k0 = kernel_ptr[0]; + __fp16 k1 = kernel_ptr[1]; + __fp16 k2 = kernel_ptr[2]; + __fp16 k3 = kernel_ptr[3]; + __fp16 k4 = kernel_ptr[4]; + __fp16 k5 = kernel_ptr[5]; + __fp16 k6 = kernel_ptr[6]; + __fp16 k7 = kernel_ptr[7]; + + _acc0 = vfmacc_vf_f16m1(_acc0, k0, _input, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, k1, _input, vl); + _acc2 = vfmacc_vf_f16m1(_acc2, k2, _input, vl); + _acc3 = vfmacc_vf_f16m1(_acc3, k3, _input, vl); + _acc4 = vfmacc_vf_f16m1(_acc4, k4, _input, vl); + _acc5 = vfmacc_vf_f16m1(_acc5, k5, _input, vl); + _acc6 = vfmacc_vf_f16m1(_acc6, k6, _input, vl); + _acc7 = vfmacc_vf_f16m1(_acc7, k7, _input, vl); + + kernel_ptr += 8; + in_ptr += 8; + } + vse16_v_f16m1(out_ptr0, _acc0, vl); + vse16_v_f16m1(out_ptr1, _acc1, vl); + vse16_v_f16m1(out_ptr2, _acc2, vl); + vse16_v_f16m1(out_ptr3, _acc3, vl); + vse16_v_f16m1(out_ptr4, _acc4, vl); + vse16_v_f16m1(out_ptr5, _acc5, vl); + vse16_v_f16m1(out_ptr6, _acc6, vl); + vse16_v_f16m1(out_ptr7, _acc7, vl); + out_ptr0 += 8; + out_ptr1 += 8; + out_ptr2 += 8; + out_ptr3 += 8; + out_ptr4 += 8; + out_ptr5 += 8; + out_ptr6 += 8; + out_ptr7 += 8; + } + + // m8n4 + for (; j + 3 < n; j += 4) { + vfloat16m1_t _acc0 = vle16_v_f16m1(bias_ptr, vl); + vfloat16m1_t _acc1 = vle16_v_f16m1(bias_ptr, vl); + vfloat16m1_t _acc2 = vle16_v_f16m1(bias_ptr, vl); + vfloat16m1_t _acc3 = vle16_v_f16m1(bias_ptr, vl); // init acc with bias_data + + __fp16 *kernel_ptr = kernel_data; + + __fp16 *in_ptr0 = in_ptr; + __fp16 *in_ptr1 = in_ptr0 + k; + __fp16 *in_ptr2 = in_ptr1 + k; + __fp16 *in_ptr3 = in_ptr2 + k; + + out_ptr1 = out_ptr0 + 1; + out_ptr2 = out_ptr0 + 2; + out_ptr3 = out_ptr0 + 3; + + for (int c = 0; c < k; c++) { + vfloat16m1_t _kernel = vle16_v_f16m1(kernel_ptr, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, in_ptr0[c], _kernel, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, in_ptr1[c], _kernel, vl); + _acc2 = vfmacc_vf_f16m1(_acc2, in_ptr2[c], _kernel, vl); + _acc3 = vfmacc_vf_f16m1(_acc3, in_ptr3[c], _kernel, vl); + kernel_ptr += 8; + } + vsse16_v_f16m1(out_ptr0, ldc * sizeof(__fp16), _acc0, vl); + vsse16_v_f16m1(out_ptr1, ldc * sizeof(__fp16), _acc1, vl); + vsse16_v_f16m1(out_ptr2, ldc * sizeof(__fp16), _acc2, vl); + vsse16_v_f16m1(out_ptr3, ldc * sizeof(__fp16), _acc3, vl); + out_ptr0 += 4; + in_ptr += 4 * k; + } + + // m8n2 + for (; j + 1 < n; j += 2) { + vfloat16m1_t _acc0 = vle16_v_f16m1(bias_ptr, vl); + vfloat16m1_t _acc1 = vle16_v_f16m1(bias_ptr, vl); + + __fp16 *kernel_ptr = kernel_data; + + __fp16 *in_ptr0 = in_ptr; + __fp16 *in_ptr1 = in_ptr0 + k; + + out_ptr1 = out_ptr0 + 1; + + for (int c = 0; c < k; c++) { + vfloat16m1_t _kernel = vle16_v_f16m1(kernel_ptr, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, in_ptr0[c], _kernel, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, in_ptr1[c], _kernel, vl); + kernel_ptr += 8; + } + vsse16_v_f16m1(out_ptr0, ldc * sizeof(__fp16), _acc0, vl); + vsse16_v_f16m1(out_ptr1, ldc * sizeof(__fp16), _acc1, vl); + out_ptr0 += 2; + in_ptr += 2 * k; + } + + // m8n1 + for (; j < n; j++) { + vfloat16m1_t _acc0 = vle16_v_f16m1(bias_ptr, vl); + __fp16 *kernel_ptr = kernel_data; + __fp16 *in_ptr0 = in_ptr; + + for (int c = 0; c < k; c++) { + vfloat16m1_t _kernel = vle16_v_f16m1(kernel_ptr, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, in_ptr0[c], _kernel, vl); + kernel_ptr += 8; + } + vsse16_v_f16m1(out_ptr0, ldc * sizeof(__fp16), _acc0, vl); + } + kernel_data += 8 * k; + output_data += 8 * ldc; + bias_ptr += 8; + } + + // m4 + for (; i + 3 < m; i += 4) { + vl = vsetvl_e16m2(16); + + __fp16 *in_ptr = input_data; + + __fp16 *out_ptr0 = output_data; + __fp16 *out_ptr1 = out_ptr0 + ldc; + __fp16 *out_ptr2 = out_ptr1 + ldc; + __fp16 *out_ptr3 = out_ptr2 + ldc; + + int j = 0; + // m4n16 loop + for (; j + 15 < n; j += 16) { + __fp16 *kernel_ptr = kernel_data; + vfloat16m2_t _acc0 = vfmv_v_f_f16m2(bias_ptr[0], vl); + vfloat16m2_t _acc1 = vfmv_v_f_f16m2(bias_ptr[1], vl); + vfloat16m2_t _acc2 = vfmv_v_f_f16m2(bias_ptr[2], vl); + vfloat16m2_t _acc3 = vfmv_v_f_f16m2(bias_ptr[3], vl); + + for (int c = 0; c < k; c++) { + vfloat16m2_t _input = vle16_v_f16m2(in_ptr, vl); + + __fp16 k0 = kernel_ptr[0]; + __fp16 k1 = kernel_ptr[1]; + __fp16 k2 = kernel_ptr[2]; + __fp16 k3 = kernel_ptr[3]; + _acc0 = vfmacc_vf_f16m2(_acc0, k0, _input, vl); + _acc1 = vfmacc_vf_f16m2(_acc1, k1, _input, vl); + _acc2 = vfmacc_vf_f16m2(_acc2, k2, _input, vl); + _acc3 = vfmacc_vf_f16m2(_acc3, k3, _input, vl); + + kernel_ptr += 4; + in_ptr += 16; + } + vse16_v_f16m2(out_ptr0, _acc0, vl); + vse16_v_f16m2(out_ptr1, _acc1, vl); + vse16_v_f16m2(out_ptr2, _acc2, vl); + vse16_v_f16m2(out_ptr3, _acc3, vl); + out_ptr0 += 16; + out_ptr1 += 16; + out_ptr2 += 16; + out_ptr3 += 16; + } + + // m4n8 + for (; j + 7 < n; j += 8) { + vl = vsetvl_e16m1(8); + + __fp16 *kernel_ptr = kernel_data; + vfloat16m1_t _acc0 = vfmv_v_f_f16m1(bias_ptr[0], vl); + vfloat16m1_t _acc1 = vfmv_v_f_f16m1(bias_ptr[1], vl); + vfloat16m1_t _acc2 = vfmv_v_f_f16m1(bias_ptr[2], vl); + vfloat16m1_t _acc3 = vfmv_v_f_f16m1(bias_ptr[3], vl); + for (int c = 0; c < k; c++) { + vfloat16m1_t _input = vle16_v_f16m1(in_ptr, vl); + + __fp16 k0 = kernel_ptr[0]; + __fp16 k1 = kernel_ptr[1]; + __fp16 k2 = kernel_ptr[2]; + __fp16 k3 = kernel_ptr[3]; + + _acc0 = vfmacc_vf_f16m1(_acc0, k0, _input, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, k1, _input, vl); + _acc2 = vfmacc_vf_f16m1(_acc2, k2, _input, vl); + _acc3 = vfmacc_vf_f16m1(_acc3, k3, _input, vl); + + kernel_ptr += 4; + in_ptr += 8; + } + vse16_v_f16m1(out_ptr0, _acc0, vl); + vse16_v_f16m1(out_ptr1, _acc1, vl); + vse16_v_f16m1(out_ptr2, _acc2, vl); + vse16_v_f16m1(out_ptr3, _acc3, vl); + + out_ptr0 += 8; + out_ptr1 += 8; + out_ptr2 += 8; + out_ptr3 += 8; + } + + // TODO: rvv opt + for (; j < n; j++) { + __fp16 acc0 = bias_ptr[0]; + __fp16 acc1 = bias_ptr[1]; + __fp16 acc2 = bias_ptr[2]; + __fp16 acc3 = bias_ptr[3]; + for (int c = 0; c < k; c++) { + acc0 += kernel_data[4 * c] * in_ptr[c]; + acc1 += kernel_data[4 * c + 1] * in_ptr[c]; + acc2 += kernel_data[4 * c + 2] * in_ptr[c]; + acc3 += kernel_data[4 * c + 3] * in_ptr[c]; + } + *out_ptr0++ = acc0; + *out_ptr1++ = acc1; + *out_ptr2++ = acc2; + *out_ptr3++ = acc3; + in_ptr += k; + } + kernel_data += 4 * k; + output_data += 4 * ldc; + bias_ptr += 4; + } + + // m2 + for (; i + 1 < m; i += 2) { + vl = vsetvl_e16m2(16); + + __fp16 *in_ptr = input_data; + __fp16 *out_ptr0 = output_data; + __fp16 *out_ptr1 = out_ptr0 + ldc; + + int j = 0; + // m2n16 loop + for (; j + 15 < n; j += 16) { + __fp16 *kernel_ptr = kernel_data; + vfloat16m2_t _acc0 = vfmv_v_f_f16m2(bias_ptr[0], vl); + vfloat16m2_t _acc1 = vfmv_v_f_f16m2(bias_ptr[1], vl); + + for (int c = 0; c < k; c++) { + vfloat16m2_t _input = vle16_v_f16m2(in_ptr, vl); + + __fp16 k0 = kernel_ptr[0]; + __fp16 k1 = kernel_ptr[1]; + _acc0 = vfmacc_vf_f16m2(_acc0, k0, _input, vl); + _acc1 = vfmacc_vf_f16m2(_acc1, k1, _input, vl); + kernel_ptr += 2; + in_ptr += 16; + } + vse16_v_f16m2(out_ptr0, _acc0, vl); + vse16_v_f16m2(out_ptr1, _acc1, vl); + out_ptr0 += 16; + out_ptr1 += 16; + } + + vl = vsetvl_e16m1(8); + // m2n8 + for (; j + 7 < n; j += 8) { + __fp16 *kernel_ptr = kernel_data; + vfloat16m1_t _acc0 = vfmv_v_f_f16m1(bias_ptr[0], vl); + vfloat16m1_t _acc1 = vfmv_v_f_f16m1(bias_ptr[1], vl); + for (int c = 0; c < k; c++) { + vfloat16m1_t _input = vle16_v_f16m1(in_ptr, vl); + + __fp16 k0 = kernel_ptr[0]; + __fp16 k1 = kernel_ptr[1]; + _acc0 = vfmacc_vf_f16m1(_acc0, k0, _input, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, k1, _input, vl); + + kernel_ptr += 2; + in_ptr += 8; + } + vse16_v_f16m1(out_ptr0, _acc0, vl); + vse16_v_f16m1(out_ptr1, _acc1, vl); + out_ptr0 += 8; + out_ptr1 += 8; + } + + // TODO: rvv opt + for (; j < n; j++) { + __fp16 acc0 = bias_ptr[0]; + __fp16 acc1 = bias_ptr[1]; + for (int c = 0; c < k; c++) { + acc0 += kernel_data[2 * c] * in_ptr[c]; + acc1 += kernel_data[2 * c + 1] * in_ptr[c]; + } + *out_ptr0++ = acc0; + *out_ptr1++ = acc1; + in_ptr += k; + } + kernel_data += 2 * k; + output_data += 2 * ldc; + bias_ptr += 2; + } + + // m1 + for (; i < m; i++) { + vl = vsetvl_e16m2(16); + + __fp16 *in_ptr = input_data; + __fp16 *out_ptr0 = output_data; + + int j = 0; + // m1n16 loop + for (; j + 15 < n; j += 16) { + __fp16 *kernel_ptr = kernel_data; + vfloat16m2_t _acc0 = vfmv_v_f_f16m2(bias_ptr[0], vl); + + for (int c = 0; c < k; c++) { + vfloat16m2_t _input = vle16_v_f16m2(in_ptr, vl); + __fp16 k0 = kernel_ptr[0]; + _acc0 = vfmacc_vf_f16m2(_acc0, k0, _input, vl); + kernel_ptr += 1; + in_ptr += 16; + } + vse16_v_f16m2(out_ptr0, _acc0, vl); + out_ptr0 += 16; + } + + vl = vsetvl_e16m1(8); + // m1n8 + for (; j + 7 < n; j += 8) { + __fp16 *kernel_ptr = kernel_data; + vfloat16m1_t _acc0 = vfmv_v_f_f16m1(bias_ptr[0], vl); + for (int c = 0; c < k; c++) { + vfloat16m1_t _input = vle16_v_f16m1(in_ptr, vl); + __fp16 k0 = kernel_ptr[0]; + _acc0 = vfmacc_vf_f16m1(_acc0, k0, _input, vl); + kernel_ptr += 1; + in_ptr += 8; + } + vse16_v_f16m1(out_ptr0, _acc0, vl); + out_ptr0 += 8; + } + + // TODO: rvv opt + for (; j < n; j++) { + __fp16 acc0 = bias_ptr[0]; + for (int c = 0; c < k; c++) { + acc0 += kernel_data[c] * in_ptr[c]; + } + *out_ptr0++ = acc0; + in_ptr += k; + } + } + + if (!flag_bias) { + csi_mem_free(bias); + bias = NULL; + } +} + +/************************************************************* + note: VLEN = 256 +*************************************************************/ +void csi_nn_rvv256_reorder_kernel_n16_fp16(__fp16 *a, __fp16 *sa, int m, int k, int ldx) +{ + int i = 0; + + for (; i + 15 < m; i += 16) { + for (int j = 0; j < k; j++) { + sa[i * k + 16 * j + 0] = a[(i + 0) * k + j]; + sa[i * k + 16 * j + 1] = a[(i + 1) * k + j]; + sa[i * k + 16 * j + 2] = a[(i + 2) * k + j]; + sa[i * k + 16 * j + 3] = a[(i + 3) * k + j]; + sa[i * k + 16 * j + 4] = a[(i + 4) * k + j]; + sa[i * k + 16 * j + 5] = a[(i + 5) * k + j]; + sa[i * k + 16 * j + 6] = a[(i + 6) * k + j]; + sa[i * k + 16 * j + 7] = a[(i + 7) * k + j]; + sa[i * k + 16 * j + 8] = a[(i + 8) * k + j]; + sa[i * k + 16 * j + 9] = a[(i + 9) * k + j]; + sa[i * k + 16 * j + 10] = a[(i + 10) * k + j]; + sa[i * k + 16 * j + 11] = a[(i + 11) * k + j]; + sa[i * k + 16 * j + 12] = a[(i + 12) * k + j]; + sa[i * k + 16 * j + 13] = a[(i + 13) * k + j]; + sa[i * k + 16 * j + 14] = a[(i + 14) * k + j]; + sa[i * k + 16 * j + 15] = a[(i + 15) * k + j]; + } + } + + for (; i + 7 < m; i += 8) { + for (int j = 0; j < k; j++) { + sa[i * k + 8 * j + 0] = a[(i + 0) * k + j]; + sa[i * k + 8 * j + 1] = a[(i + 1) * k + j]; + sa[i * k + 8 * j + 2] = a[(i + 2) * k + j]; + sa[i * k + 8 * j + 3] = a[(i + 3) * k + j]; + sa[i * k + 8 * j + 4] = a[(i + 4) * k + j]; + sa[i * k + 8 * j + 5] = a[(i + 5) * k + j]; + sa[i * k + 8 * j + 6] = a[(i + 6) * k + j]; + sa[i * k + 8 * j + 7] = a[(i + 7) * k + j]; + } + } + + for (; i + 3 < m; i += 4) { + for (int j = 0; j < k; j++) { + sa[i * k + 4 * j + 0] = a[(i + 0) * k + j]; + sa[i * k + 4 * j + 1] = a[(i + 1) * k + j]; + sa[i * k + 4 * j + 2] = a[(i + 2) * k + j]; + sa[i * k + 4 * j + 3] = a[(i + 3) * k + j]; + } + } + + for (; i + 1 < m; i += 2) { + for (int j = 0; j < k; j++) { + sa[i * k + 2 * j + 0] = a[(i + 0) * k + j]; + sa[i * k + 2 * j + 1] = a[(i + 1) * k + j]; + } + } + + for (; i < m; i++) { + for (int j = 0; j < k; j++) { + sa[i * k + 1 * j + 0] = a[(i + 0) * k + j]; + } + } +} + +void csi_nn_rvv256_reorder_input_z16_fp16(__fp16 *b, __fp16 *sb, int k, int n, int ldx) +{ + int vl = vsetvl_e16m1(16); + __fp16 *b0 = NULL; + int i = 0; + for (; i + 15 < n; i += 16) { + b0 = b + i; + for (int j = 0; j < k; j++) { + vfloat16m1_t _tmp = vle16_v_f16m1(b0, vl); + b0 += ldx; + vse16_v_f16m1(sb, _tmp, vl); + sb += 16; + } + } + + for (; i < n; i++) { + vl = vsetvl_e16m1(16); + b0 = b + i; + int j = 0; + for (; j + 15 < k; j += 16) { + vfloat16m1_t _tmp = vlse16_v_f16m1(b0, ldx * sizeof(__fp16), vl); + b0 += 16 * ldx; + vse16_v_f16m1(sb, _tmp, vl); + sb += 16; + } + if (j < k) { + vl = vsetvl_e16m1(k & 15); + vfloat16m1_t _tmp = vlse16_v_f16m1(b0, ldx * sizeof(__fp16), vl); + vse16_v_f16m1(sb, _tmp, vl); + sb += vl; + } + } +} + +void csi_nn_rvv256_gemm_16x16_fp16(__fp16 *dst, const __fp16 *sa, const __fp16 *sb, int m, int k, + int n, int ldc, __fp16 *bias) +{ + __fp16 *kernel_data = (__fp16 *)sa; + __fp16 *input_data = (__fp16 *)sb; + __fp16 *output_data = dst; + + int flag_bias = 1; // default: conv2d layer include bias + if (bias == NULL) { + flag_bias = 0; + bias = (__fp16 *)csi_mem_alloc(m * 2); + } + __fp16 *bias_ptr = bias; + + int vl; + + int i = 0; + // m16 loop + for (; i + 15 < m; i += 16) { + vl = vsetvl_e16m1(16); + + __fp16 *in_ptr = input_data; + + __fp16 *out_ptr0 = output_data; + __fp16 *out_ptr1 = out_ptr0 + ldc; + __fp16 *out_ptr2 = out_ptr1 + ldc; + __fp16 *out_ptr3 = out_ptr2 + ldc; + __fp16 *out_ptr4 = out_ptr3 + ldc; + __fp16 *out_ptr5 = out_ptr4 + ldc; + __fp16 *out_ptr6 = out_ptr5 + ldc; + __fp16 *out_ptr7 = out_ptr6 + ldc; + __fp16 *out_ptr8 = out_ptr7 + ldc; + __fp16 *out_ptr9 = out_ptr8 + ldc; + __fp16 *out_ptr10 = out_ptr9 + ldc; + __fp16 *out_ptr11 = out_ptr10 + ldc; + __fp16 *out_ptr12 = out_ptr11 + ldc; + __fp16 *out_ptr13 = out_ptr12 + ldc; + __fp16 *out_ptr14 = out_ptr13 + ldc; + __fp16 *out_ptr15 = out_ptr14 + ldc; + + int j = 0; + // m16n16 loop + for (; j + 15 < n; j += 16) { + __fp16 *kernel_ptr = kernel_data; + vfloat16m1_t _acc0 = vfmv_v_f_f16m1(bias_ptr[0], vl); + vfloat16m1_t _acc1 = vfmv_v_f_f16m1(bias_ptr[1], vl); + vfloat16m1_t _acc2 = vfmv_v_f_f16m1(bias_ptr[2], vl); + vfloat16m1_t _acc3 = vfmv_v_f_f16m1(bias_ptr[3], vl); + vfloat16m1_t _acc4 = vfmv_v_f_f16m1(bias_ptr[4], vl); + vfloat16m1_t _acc5 = vfmv_v_f_f16m1(bias_ptr[5], vl); + vfloat16m1_t _acc6 = vfmv_v_f_f16m1(bias_ptr[6], vl); + vfloat16m1_t _acc7 = vfmv_v_f_f16m1(bias_ptr[7], vl); + vfloat16m1_t _acc8 = vfmv_v_f_f16m1(bias_ptr[8], vl); + vfloat16m1_t _acc9 = vfmv_v_f_f16m1(bias_ptr[9], vl); + vfloat16m1_t _acc10 = vfmv_v_f_f16m1(bias_ptr[10], vl); + vfloat16m1_t _acc11 = vfmv_v_f_f16m1(bias_ptr[11], vl); + vfloat16m1_t _acc12 = vfmv_v_f_f16m1(bias_ptr[12], vl); + vfloat16m1_t _acc13 = vfmv_v_f_f16m1(bias_ptr[13], vl); + vfloat16m1_t _acc14 = vfmv_v_f_f16m1(bias_ptr[14], vl); + vfloat16m1_t _acc15 = vfmv_v_f_f16m1(bias_ptr[15], vl); + + for (int c = 0; c < k; c++) { + vfloat16m1_t _input = vle16_v_f16m1(in_ptr, vl); + + _acc0 = vfmacc_vf_f16m1(_acc0, kernel_ptr[0], _input, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, kernel_ptr[1], _input, vl); + _acc2 = vfmacc_vf_f16m1(_acc2, kernel_ptr[2], _input, vl); + _acc3 = vfmacc_vf_f16m1(_acc3, kernel_ptr[3], _input, vl); + _acc4 = vfmacc_vf_f16m1(_acc4, kernel_ptr[4], _input, vl); + _acc5 = vfmacc_vf_f16m1(_acc5, kernel_ptr[5], _input, vl); + _acc6 = vfmacc_vf_f16m1(_acc6, kernel_ptr[6], _input, vl); + _acc7 = vfmacc_vf_f16m1(_acc7, kernel_ptr[7], _input, vl); + _acc8 = vfmacc_vf_f16m1(_acc8, kernel_ptr[8], _input, vl); + _acc9 = vfmacc_vf_f16m1(_acc9, kernel_ptr[9], _input, vl); + _acc10 = vfmacc_vf_f16m1(_acc10, kernel_ptr[10], _input, vl); + _acc11 = vfmacc_vf_f16m1(_acc11, kernel_ptr[11], _input, vl); + _acc12 = vfmacc_vf_f16m1(_acc12, kernel_ptr[12], _input, vl); + _acc13 = vfmacc_vf_f16m1(_acc13, kernel_ptr[13], _input, vl); + _acc14 = vfmacc_vf_f16m1(_acc14, kernel_ptr[14], _input, vl); + _acc15 = vfmacc_vf_f16m1(_acc15, kernel_ptr[15], _input, vl); + + kernel_ptr += 16; + in_ptr += 16; + } + vse16_v_f16m1(out_ptr0, _acc0, vl); + vse16_v_f16m1(out_ptr1, _acc1, vl); + vse16_v_f16m1(out_ptr2, _acc2, vl); + vse16_v_f16m1(out_ptr3, _acc3, vl); + vse16_v_f16m1(out_ptr4, _acc4, vl); + vse16_v_f16m1(out_ptr5, _acc5, vl); + vse16_v_f16m1(out_ptr6, _acc6, vl); + vse16_v_f16m1(out_ptr7, _acc7, vl); + vse16_v_f16m1(out_ptr8, _acc8, vl); + vse16_v_f16m1(out_ptr9, _acc9, vl); + vse16_v_f16m1(out_ptr10, _acc10, vl); + vse16_v_f16m1(out_ptr11, _acc11, vl); + vse16_v_f16m1(out_ptr12, _acc12, vl); + vse16_v_f16m1(out_ptr13, _acc13, vl); + vse16_v_f16m1(out_ptr14, _acc14, vl); + vse16_v_f16m1(out_ptr15, _acc15, vl); + out_ptr0 += 16; + out_ptr1 += 16; + out_ptr2 += 16; + out_ptr3 += 16; + out_ptr4 += 16; + out_ptr5 += 16; + out_ptr6 += 16; + out_ptr7 += 16; + out_ptr8 += 16; + out_ptr9 += 16; + out_ptr10 += 16; + out_ptr11 += 16; + out_ptr12 += 16; + out_ptr13 += 16; + out_ptr14 += 16; + out_ptr15 += 16; + } + // m16n8 + for (; j + 7 < n; j += 8) { + vfloat16m1_t _acc0 = vle16_v_f16m1(bias_ptr, vl); + vfloat16m1_t _acc1 = vle16_v_f16m1(bias_ptr, vl); + vfloat16m1_t _acc2 = vle16_v_f16m1(bias_ptr, vl); + vfloat16m1_t _acc3 = vle16_v_f16m1(bias_ptr, vl); + vfloat16m1_t _acc4 = vle16_v_f16m1(bias_ptr, vl); + vfloat16m1_t _acc5 = vle16_v_f16m1(bias_ptr, vl); + vfloat16m1_t _acc6 = vle16_v_f16m1(bias_ptr, vl); + vfloat16m1_t _acc7 = vle16_v_f16m1(bias_ptr, vl); + + __fp16 *kernel_ptr = kernel_data; + + __fp16 *in_ptr0 = in_ptr; + __fp16 *in_ptr1 = in_ptr0 + k; + __fp16 *in_ptr2 = in_ptr1 + k; + __fp16 *in_ptr3 = in_ptr2 + k; + __fp16 *in_ptr4 = in_ptr3 + k; + __fp16 *in_ptr5 = in_ptr4 + k; + __fp16 *in_ptr6 = in_ptr5 + k; + __fp16 *in_ptr7 = in_ptr6 + k; + + out_ptr1 = out_ptr0 + 1; + out_ptr2 = out_ptr0 + 2; + out_ptr3 = out_ptr0 + 3; + out_ptr4 = out_ptr0 + 4; + out_ptr5 = out_ptr0 + 5; + out_ptr6 = out_ptr0 + 6; + out_ptr7 = out_ptr0 + 7; + + for (int c = 0; c < k; c++) { + vfloat16m1_t _kernel = vle16_v_f16m1(kernel_ptr, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, in_ptr0[c], _kernel, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, in_ptr1[c], _kernel, vl); + _acc2 = vfmacc_vf_f16m1(_acc2, in_ptr2[c], _kernel, vl); + _acc3 = vfmacc_vf_f16m1(_acc3, in_ptr3[c], _kernel, vl); + _acc4 = vfmacc_vf_f16m1(_acc4, in_ptr4[c], _kernel, vl); + _acc5 = vfmacc_vf_f16m1(_acc5, in_ptr5[c], _kernel, vl); + _acc6 = vfmacc_vf_f16m1(_acc6, in_ptr6[c], _kernel, vl); + _acc7 = vfmacc_vf_f16m1(_acc7, in_ptr7[c], _kernel, vl); + kernel_ptr += 16; + } + vsse16_v_f16m1(out_ptr0, ldc * sizeof(__fp16), _acc0, vl); + vsse16_v_f16m1(out_ptr1, ldc * sizeof(__fp16), _acc1, vl); + vsse16_v_f16m1(out_ptr2, ldc * sizeof(__fp16), _acc2, vl); + vsse16_v_f16m1(out_ptr3, ldc * sizeof(__fp16), _acc3, vl); + vsse16_v_f16m1(out_ptr4, ldc * sizeof(__fp16), _acc4, vl); + vsse16_v_f16m1(out_ptr5, ldc * sizeof(__fp16), _acc5, vl); + vsse16_v_f16m1(out_ptr6, ldc * sizeof(__fp16), _acc6, vl); + vsse16_v_f16m1(out_ptr7, ldc * sizeof(__fp16), _acc7, vl); + out_ptr0 += 8; + in_ptr += 8 * k; + } + // m16n4 + for (; j + 3 < n; j += 4) { + vfloat16m1_t _acc0 = vle16_v_f16m1(bias_ptr, vl); + vfloat16m1_t _acc1 = vle16_v_f16m1(bias_ptr, vl); + vfloat16m1_t _acc2 = vle16_v_f16m1(bias_ptr, vl); + vfloat16m1_t _acc3 = vle16_v_f16m1(bias_ptr, vl); + + __fp16 *kernel_ptr = kernel_data; + + __fp16 *in_ptr0 = in_ptr; + __fp16 *in_ptr1 = in_ptr0 + k; + __fp16 *in_ptr2 = in_ptr1 + k; + __fp16 *in_ptr3 = in_ptr2 + k; + + out_ptr1 = out_ptr0 + 1; + out_ptr2 = out_ptr0 + 2; + out_ptr3 = out_ptr0 + 3; + + for (int c = 0; c < k; c++) { + vfloat16m1_t _kernel = vle16_v_f16m1(kernel_ptr, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, in_ptr0[c], _kernel, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, in_ptr1[c], _kernel, vl); + _acc2 = vfmacc_vf_f16m1(_acc2, in_ptr2[c], _kernel, vl); + _acc3 = vfmacc_vf_f16m1(_acc3, in_ptr3[c], _kernel, vl); + kernel_ptr += 16; + } + vsse16_v_f16m1(out_ptr0, ldc * sizeof(__fp16), _acc0, vl); + vsse16_v_f16m1(out_ptr1, ldc * sizeof(__fp16), _acc1, vl); + vsse16_v_f16m1(out_ptr2, ldc * sizeof(__fp16), _acc2, vl); + vsse16_v_f16m1(out_ptr3, ldc * sizeof(__fp16), _acc3, vl); + out_ptr0 += 4; + in_ptr += 4 * k; + } + // m16n2 + for (; j + 1 < n; j += 2) { + vfloat16m1_t _acc0 = vle16_v_f16m1(bias_ptr, vl); + vfloat16m1_t _acc1 = vle16_v_f16m1(bias_ptr, vl); + + __fp16 *kernel_ptr = kernel_data; + + __fp16 *in_ptr0 = in_ptr; + __fp16 *in_ptr1 = in_ptr0 + k; + + out_ptr1 = out_ptr0 + 1; + + for (int c = 0; c < k; c++) { + vfloat16m1_t _kernel = vle16_v_f16m1(kernel_ptr, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, in_ptr0[c], _kernel, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, in_ptr1[c], _kernel, vl); + kernel_ptr += 16; + } + vsse16_v_f16m1(out_ptr0, ldc * sizeof(__fp16), _acc0, vl); + vsse16_v_f16m1(out_ptr1, ldc * sizeof(__fp16), _acc1, vl); + out_ptr0 += 2; + in_ptr += 2 * k; + } + // m16n1 + for (; j < n; j++) { + vfloat16m1_t _acc0 = vle16_v_f16m1(bias_ptr, vl); + __fp16 *kernel_ptr = kernel_data; + __fp16 *in_ptr0 = in_ptr; + + for (int c = 0; c < k; c++) { + vfloat16m1_t _kernel = vle16_v_f16m1(kernel_ptr, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, in_ptr0[c], _kernel, vl); + kernel_ptr += 16; + } + vsse16_v_f16m1(out_ptr0, ldc * sizeof(__fp16), _acc0, vl); + out_ptr0 += 1; + in_ptr += 1 * k; + } + kernel_data += 16 * k; + output_data += 16 * ldc; + bias_ptr += 16; + } + + // m8 + for (; i + 7 < m; i += 8) { + vl = vsetvl_e16m1(16); + + __fp16 *in_ptr = input_data; + + __fp16 *out_ptr0 = output_data; + __fp16 *out_ptr1 = out_ptr0 + ldc; + __fp16 *out_ptr2 = out_ptr1 + ldc; + __fp16 *out_ptr3 = out_ptr2 + ldc; + __fp16 *out_ptr4 = out_ptr3 + ldc; + __fp16 *out_ptr5 = out_ptr4 + ldc; + __fp16 *out_ptr6 = out_ptr5 + ldc; + __fp16 *out_ptr7 = out_ptr6 + ldc; + + int j = 0; + // m8n16 loop + for (; j + 15 < n; j += 16) { + __fp16 *kernel_ptr = kernel_data; + vfloat16m1_t _acc0 = vfmv_v_f_f16m1(bias_ptr[0], vl); + vfloat16m1_t _acc1 = vfmv_v_f_f16m1(bias_ptr[1], vl); + vfloat16m1_t _acc2 = vfmv_v_f_f16m1(bias_ptr[2], vl); + vfloat16m1_t _acc3 = vfmv_v_f_f16m1(bias_ptr[3], vl); + vfloat16m1_t _acc4 = vfmv_v_f_f16m1(bias_ptr[4], vl); + vfloat16m1_t _acc5 = vfmv_v_f_f16m1(bias_ptr[5], vl); + vfloat16m1_t _acc6 = vfmv_v_f_f16m1(bias_ptr[6], vl); + vfloat16m1_t _acc7 = vfmv_v_f_f16m1(bias_ptr[7], vl); + + for (int c = 0; c < k; c++) { + vfloat16m1_t _input = vle16_v_f16m1(in_ptr, vl); + + _acc0 = vfmacc_vf_f16m1(_acc0, kernel_ptr[0], _input, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, kernel_ptr[1], _input, vl); + _acc2 = vfmacc_vf_f16m1(_acc2, kernel_ptr[2], _input, vl); + _acc3 = vfmacc_vf_f16m1(_acc3, kernel_ptr[3], _input, vl); + _acc4 = vfmacc_vf_f16m1(_acc4, kernel_ptr[4], _input, vl); + _acc5 = vfmacc_vf_f16m1(_acc5, kernel_ptr[5], _input, vl); + _acc6 = vfmacc_vf_f16m1(_acc6, kernel_ptr[6], _input, vl); + _acc7 = vfmacc_vf_f16m1(_acc7, kernel_ptr[7], _input, vl); + + kernel_ptr += 8; + in_ptr += 16; + } + vse16_v_f16m1(out_ptr0, _acc0, vl); + vse16_v_f16m1(out_ptr1, _acc1, vl); + vse16_v_f16m1(out_ptr2, _acc2, vl); + vse16_v_f16m1(out_ptr3, _acc3, vl); + vse16_v_f16m1(out_ptr4, _acc4, vl); + vse16_v_f16m1(out_ptr5, _acc5, vl); + vse16_v_f16m1(out_ptr6, _acc6, vl); + vse16_v_f16m1(out_ptr7, _acc7, vl); + out_ptr0 += 16; + out_ptr1 += 16; + out_ptr2 += 16; + out_ptr3 += 16; + out_ptr4 += 16; + out_ptr5 += 16; + out_ptr6 += 16; + out_ptr7 += 16; + } + // m8n8 + // TODO: rvv opt + for (; j < n; j++) { + float acc0 = bias_ptr[0]; + float acc1 = bias_ptr[1]; + float acc2 = bias_ptr[2]; + float acc3 = bias_ptr[3]; + float acc4 = bias_ptr[4]; + float acc5 = bias_ptr[5]; + float acc6 = bias_ptr[6]; + float acc7 = bias_ptr[7]; + for (int c = 0; c < k; c++) { + acc0 += kernel_data[8 * c] * in_ptr[c]; + acc1 += kernel_data[8 * c + 1] * in_ptr[c]; + acc2 += kernel_data[8 * c + 2] * in_ptr[c]; + acc3 += kernel_data[8 * c + 3] * in_ptr[c]; + acc4 += kernel_data[8 * c + 4] * in_ptr[c]; + acc5 += kernel_data[8 * c + 5] * in_ptr[c]; + acc6 += kernel_data[8 * c + 6] * in_ptr[c]; + acc7 += kernel_data[8 * c + 7] * in_ptr[c]; + } + *out_ptr0++ = acc0; + *out_ptr1++ = acc1; + *out_ptr2++ = acc2; + *out_ptr3++ = acc3; + *out_ptr4++ = acc4; + *out_ptr5++ = acc5; + *out_ptr6++ = acc6; + *out_ptr7++ = acc7; + in_ptr += k; + } + kernel_data += 8 * k; + output_data += 8 * ldc; + bias_ptr += 8; + } + + // m4 + for (; i + 3 < m; m += 4) { + vl = vsetvl_e16m1(16); + + __fp16 *in_ptr = input_data; + + __fp16 *out_ptr0 = output_data; + __fp16 *out_ptr1 = out_ptr0 + ldc; + __fp16 *out_ptr2 = out_ptr1 + ldc; + __fp16 *out_ptr3 = out_ptr2 + ldc; + + int j = 0; + // m4n16 loop + for (; j + 15 < n; j += 16) { + __fp16 *kernel_ptr = kernel_data; + vfloat16m1_t _acc0 = vfmv_v_f_f16m1(bias_ptr[0], vl); + vfloat16m1_t _acc1 = vfmv_v_f_f16m1(bias_ptr[1], vl); + vfloat16m1_t _acc2 = vfmv_v_f_f16m1(bias_ptr[2], vl); + vfloat16m1_t _acc3 = vfmv_v_f_f16m1(bias_ptr[3], vl); + + for (int c = 0; c < k; c++) { + vfloat16m1_t _input = vle16_v_f16m1(in_ptr, vl); + + _acc0 = vfmacc_vf_f16m1(_acc0, kernel_ptr[0], _input, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, kernel_ptr[1], _input, vl); + _acc2 = vfmacc_vf_f16m1(_acc2, kernel_ptr[2], _input, vl); + _acc3 = vfmacc_vf_f16m1(_acc3, kernel_ptr[3], _input, vl); + + kernel_ptr += 4; + in_ptr += 16; + } + vse16_v_f16m1(out_ptr0, _acc0, vl); + vse16_v_f16m1(out_ptr1, _acc1, vl); + vse16_v_f16m1(out_ptr2, _acc2, vl); + vse16_v_f16m1(out_ptr3, _acc3, vl); + out_ptr0 += 16; + out_ptr1 += 16; + out_ptr2 += 16; + out_ptr3 += 16; + } + // m4n8 + // TODO: rvv opt + for (; j < n; j++) { + float acc0 = bias_ptr[0]; + float acc1 = bias_ptr[1]; + float acc2 = bias_ptr[2]; + float acc3 = bias_ptr[3]; + for (int c = 0; c < k; c++) { + acc0 += kernel_data[4 * c] * in_ptr[c]; + acc1 += kernel_data[4 * c + 1] * in_ptr[c]; + acc2 += kernel_data[4 * c + 2] * in_ptr[c]; + acc3 += kernel_data[4 * c + 3] * in_ptr[c]; + } + *out_ptr0++ = acc0; + *out_ptr1++ = acc1; + *out_ptr2++ = acc2; + *out_ptr3++ = acc3; + in_ptr += k; + } + kernel_data += 4 * k; + output_data += 4 * ldc; + bias_ptr += 4; + } + + // m2 + for (; i + 1 < m; m += 2) { + vl = vsetvl_e16m1(16); + + __fp16 *in_ptr = input_data; + + __fp16 *out_ptr0 = output_data; + __fp16 *out_ptr1 = out_ptr0 + ldc; + + int j = 0; + // m2n16 loop + for (; j + 15 < n; j += 16) { + __fp16 *kernel_ptr = kernel_data; + vfloat16m1_t _acc0 = vfmv_v_f_f16m1(bias_ptr[0], vl); + vfloat16m1_t _acc1 = vfmv_v_f_f16m1(bias_ptr[1], vl); + + for (int c = 0; c < k; c++) { + vfloat16m1_t _input = vle16_v_f16m1(in_ptr, vl); + + _acc0 = vfmacc_vf_f16m1(_acc0, kernel_ptr[0], _input, vl); + _acc1 = vfmacc_vf_f16m1(_acc1, kernel_ptr[1], _input, vl); + + kernel_ptr += 2; + in_ptr += 16; + } + vse16_v_f16m1(out_ptr0, _acc0, vl); + vse16_v_f16m1(out_ptr1, _acc1, vl); + out_ptr0 += 16; + out_ptr1 += 16; + } + // m2n8 + for (; j < n; j++) { + float acc0 = bias_ptr[0]; + float acc1 = bias_ptr[1]; + for (int c = 0; c < k; c++) { + acc0 += kernel_data[2 * c] * in_ptr[c]; + acc1 += kernel_data[2 * c + 1] * in_ptr[c]; + } + *out_ptr0++ = acc0; + *out_ptr1++ = acc1; + in_ptr += k; + } + kernel_data += 2 * k; + output_data += 2 * ldc; + bias_ptr += 2; + } + + // m1 + for (; i < m; i++) { + vl = vsetvl_e16m1(16); + + __fp16 *in_ptr = input_data; + __fp16 *out_ptr0 = output_data; + + int j = 0; + // m1n16 loop + for (; j + 15 < n; j += 16) { + __fp16 *kernel_ptr = kernel_data; + vfloat16m1_t _acc0 = vfmv_v_f_f16m1(bias_ptr[0], vl); + + for (int c = 0; c < k; c++) { + vfloat16m1_t _input = vle16_v_f16m1(in_ptr, vl); + _acc0 = vfmacc_vf_f16m1(_acc0, kernel_ptr[0], _input, vl); + kernel_ptr += 1; + in_ptr += 16; + } + vse16_v_f16m1(out_ptr0, _acc0, vl); + out_ptr0 += 16; + } + // m1n8 + for (; j < n; j++) { + float acc0 = bias_ptr[0]; + for (int c = 0; c < k; c++) { + acc0 += kernel_data[c] * in_ptr[c]; + } + *out_ptr0++ = acc0; + in_ptr += k; + } + } + + if (!flag_bias) { + csi_mem_free(bias); + bias = NULL; + } +} diff --git a/source/thead_rvv/gemm_int4.c b/source/thead_rvv/gemm_int4.c new file mode 100644 index 00000000..918b2581 --- /dev/null +++ b/source/thead_rvv/gemm_int4.c @@ -0,0 +1,486 @@ +/* + * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ +#ifdef __riscv_xtheadv + +#include "csi_thead_rvv.h" + +static vint8mf4_t requantize_m2(vint32m2_t _src, int32_t multiplier, int32_t shift, int32_t out_zp, + int vl) +{ + vint32m2_t _mulh = vmulh_vx_i32m2(_src, multiplier, vl); + _mulh = vssra_vx_i32m2(_mulh, -shift - 1, vl); + _mulh = vadd_vx_i32m2(_mulh, out_zp, vl); + vint16m1_t _tmp1 = vnclip_wx_i16m1(_mulh, 0, vl); + vint8mf2_t _tmp2 = vnclip_wx_i8mf2(_tmp1, 0, vl); + vint8mf4_t _res = vpnclip_wx_i8mf4(vreinterpret_v_i8mf2_i16mf2(_tmp2), 0, vl / 2); + return _res; +} + +static vint8mf8_t requantize_m1(vint32m1_t _src, int32_t multiplier, int32_t shift, int32_t out_zp, + int vl) +{ + vint32m1_t _mulh = vmulh_vx_i32m1(_src, multiplier, vl); + _mulh = vssra_vx_i32m1(_mulh, -shift - 1, vl); + _mulh = vadd_vx_i32m1(_mulh, out_zp, vl); + vint16mf2_t _tmp1 = vnclip_wx_i16mf2(_mulh, 0, vl); + vint8mf4_t _tmp2 = vnclip_wx_i8mf4(_tmp1, 0, vl); + vint8mf8_t _res = vpnclip_wx_i8mf8(vreinterpret_v_i8mf4_i16mf4(_tmp2), 0, vl / 2); + return _res; +} + +/************************************************************* + * note: VLEN = 128 + * layerout: input/output-[n, h, w , c] kernel-[o, h, w, i] + *************************************************************/ +void csi_nn_rvv_reorder_input_n8_int4(int8_t *a, int8_t *sa, int m, int k, int ldx) +{ + int k4 = ((k - 1) & -4) + 4; + int i = 0; + // m8 + for (; i + 7 < m; i += 8) { + int j = 0; + // k16 + int32_t *in_ptr0 = (int32_t *)a; + int32_t *out_ptr0 = (int32_t *)sa; + for (; j + 15 < k; j += 16) { + vint32m2_t _nf0, _nf1, _nf2, _nf3; + vlsseg4e32_v_i32m2(&_nf0, &_nf1, &_nf2, &_nf3, in_ptr0, k * sizeof(int8_t), 8); + in_ptr0 += 4; + vse32_v_i32m2(out_ptr0, _nf0, 8); + out_ptr0 += 8; + vse32_v_i32m2(out_ptr0, _nf1, 8); + out_ptr0 += 8; + vse32_v_i32m2(out_ptr0, _nf2, 8); + out_ptr0 += 8; + vse32_v_i32m2(out_ptr0, _nf3, 8); + out_ptr0 += 8; + } + for (; j + 3 < k; j += 4) { + vint32m2_t _input = vlse32_v_i32m2(in_ptr0, k * sizeof(int8_t), 8); + in_ptr0++; + vse32_v_i32m2(out_ptr0, _input, 8); + out_ptr0 += 8; + } + if (j < k) { + int8_t *in_ptr1 = (int8_t *)in_ptr0; + int8_t *out_ptr1 = (int8_t *)out_ptr0; + for (int c = 0; c < 8; c++) { + vint8m1_t _input1 = vle8_v_i8m1(in_ptr1, k & 3); + in_ptr1 += k; + vse8_v_i8m1(out_ptr1, _input1, 4); + out_ptr1 += 4; + } + } + a += 8 * k; + sa += 8 * k4; + } + // m4 + for (; i + 3 < m; i += 4) { + int j = 0; + int32_t *in_ptr0 = (int32_t *)a; + int32_t *out_ptr0 = (int32_t *)sa; + for (; j + 15 < k; j += 16) { + vint32m1_t _nf0, _nf1, _nf2, _nf3; + vlsseg4e32_v_i32m1(&_nf0, &_nf1, &_nf2, &_nf3, in_ptr0, k * sizeof(int8_t), 4); + in_ptr0 += 4; + vse32_v_i32m1(out_ptr0, _nf0, 4); + out_ptr0 += 4; + vse32_v_i32m1(out_ptr0, _nf1, 4); + out_ptr0 += 4; + vse32_v_i32m1(out_ptr0, _nf2, 4); + out_ptr0 += 4; + vse32_v_i32m1(out_ptr0, _nf3, 4); + out_ptr0 += 4; + } + for (; j + 3 < k; j += 4) { + vint32m1_t _input = vlse32_v_i32m1(in_ptr0, k * sizeof(int8_t), 4); + in_ptr0++; + vse32_v_i32m1(out_ptr0, _input, 4); + out_ptr0 += 4; + } + if (j < k) { + int8_t *in_ptr1 = (int8_t *)in_ptr0; + int8_t *out_ptr1 = (int8_t *)out_ptr0; + for (int c = 0; c < 4; c++) { + vint8m1_t _input1 = vle8_v_i8m1(in_ptr1, k & 3); + in_ptr1 += k; + vse8_v_i8m1(out_ptr1, _input1, 4); + out_ptr1 += 4; + } + } + a += 4 * k; + sa += 4 * k4; + } + // m2 + for (; i + 1 < m; i += 2) { + int j = 0; + for (; j + 3 < k; j += 4) { + int8_t *in_ptr = a + j; + for (int c = 0; c < 2; c++) { + vint8m1_t _input = vle8_v_i8m1(in_ptr, 4); + in_ptr += k; + vse8_v_i8m1(sa, _input, 4); + sa += 4; + } + } + if (j < k) { + int8_t *in_ptr = a + j; + for (int c = 0; c < 2; c++) { + vint8m1_t _input = vle8_v_i8m1(in_ptr, k & 3); + in_ptr += k; + vse8_v_i8m1(sa, _input, k & 3); + sa += 4; + } + } + a += 2 * k; + } + // m1 + for (; i < m; i++) { + memcpy(sa, a, k * sizeof(int8_t)); + } +} + +// 和 csi_nn_rvv_reorder_kernel_n8_int8 实现相同, 可以直接调用 csi_nn_rvv_reorder_kernel_n8_int8 +void csi_nn_rvv_reorder_kernel_n8_int4(int8_t *b, int8_t *sb, int n, int k, int ldx) +{ + // TODO: +} + +void csi_nn_rvv_gemm_8x8_int4(int8_t *dst, const int8_t *sa, const int8_t *sb, int m, int k, int n, + int ldc, int32_t *bias, int32_t out_zp, int32_t *mult, int32_t *shift) +{ + int8_t *input_data = (int8_t *)sa; + int8_t *kernel_data = (int8_t *)sb; + int8_t *output_data = dst; + // please use fuse_zp2bias option in hhb, thus bias_data wont be NULL + int32_t *bias_data = bias; + int vl = 0; + int i = 0; + // m8 loop + vl = vsetvl_e32m2(8); + for (; i + 7 < m; i += 8) { + int8_t *kernel_ptr = kernel_data; + + int8_t *out_ptr0 = output_data; + int8_t *out_ptr1 = out_ptr0 + ldc; + int8_t *out_ptr2 = out_ptr1 + ldc; + int8_t *out_ptr3 = out_ptr2 + ldc; + int8_t *out_ptr4 = out_ptr3 + ldc; + int8_t *out_ptr5 = out_ptr4 + ldc; + int8_t *out_ptr6 = out_ptr5 + ldc; + int8_t *out_ptr7 = out_ptr6 + ldc; // ldc = m = h * w * inc + int j = 0; + // n8m8 loop + for (; j + 7 < n; j += 8) { + int32_t *in_ptr = (int32_t *)input_data; + vint32m2_t _acc0 = vle32_v_i32m2(bias_data + j, 8); + vint32m2_t _acc1 = vle32_v_i32m2(bias_data + j, 8); + vint32m2_t _acc2 = vle32_v_i32m2(bias_data + j, 8); + vint32m2_t _acc3 = vle32_v_i32m2(bias_data + j, 8); + vint32m2_t _acc4 = vle32_v_i32m2(bias_data + j, 8); + vint32m2_t _acc5 = vle32_v_i32m2(bias_data + j, 8); + vint32m2_t _acc6 = vle32_v_i32m2(bias_data + j, 8); + vint32m2_t _acc7 = vle32_v_i32m2(bias_data + j, 8); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m2_t _kernel = vle8_v_i8m2(kernel_ptr, 32); + _acc0 = vpmaqa_vx_i32m2(_acc0, in_ptr[0], _kernel, 8); + _acc1 = vpmaqa_vx_i32m2(_acc1, in_ptr[1], _kernel, 8); + _acc2 = vpmaqa_vx_i32m2(_acc2, in_ptr[2], _kernel, 8); + _acc3 = vpmaqa_vx_i32m2(_acc3, in_ptr[3], _kernel, 8); + _acc4 = vpmaqa_vx_i32m2(_acc4, in_ptr[4], _kernel, 8); + _acc5 = vpmaqa_vx_i32m2(_acc5, in_ptr[5], _kernel, 8); + _acc6 = vpmaqa_vx_i32m2(_acc6, in_ptr[6], _kernel, 8); + _acc7 = vpmaqa_vx_i32m2(_acc7, in_ptr[7], _kernel, 8); + + in_ptr += 8; + kernel_ptr += 32; + } + vint8mf4_t _res0 = requantize_m2(_acc0, mult[j], shift[j], out_zp, 8); + vint8mf4_t _res1 = requantize_m2(_acc1, mult[j], shift[j], out_zp, 8); + vint8mf4_t _res2 = requantize_m2(_acc2, mult[j], shift[j], out_zp, 8); + vint8mf4_t _res3 = requantize_m2(_acc3, mult[j], shift[j], out_zp, 8); + vint8mf4_t _res4 = requantize_m2(_acc4, mult[j], shift[j], out_zp, 8); + vint8mf4_t _res5 = requantize_m2(_acc5, mult[j], shift[j], out_zp, 8); + vint8mf4_t _res6 = requantize_m2(_acc6, mult[j], shift[j], out_zp, 8); + vint8mf4_t _res7 = requantize_m2(_acc7, mult[j], shift[j], out_zp, 8); + + vse8_v_i8mf4(out_ptr0, _res0, 4); + vse8_v_i8mf4(out_ptr1, _res1, 4); + vse8_v_i8mf4(out_ptr2, _res2, 4); + vse8_v_i8mf4(out_ptr3, _res3, 4); + vse8_v_i8mf4(out_ptr4, _res4, 4); + vse8_v_i8mf4(out_ptr5, _res5, 4); + vse8_v_i8mf4(out_ptr6, _res6, 4); + vse8_v_i8mf4(out_ptr7, _res7, 4); + out_ptr0 += 4; + out_ptr1 += 4; + out_ptr2 += 4; + out_ptr3 += 4; + out_ptr4 += 4; + out_ptr5 += 4; + out_ptr6 += 4; + out_ptr7 += 4; + } + // m8n4 + for (; j + 3 < n; j += 4) { + int32_t *in_ptr = (int32_t *)input_data; + vint32m1_t _acc0 = vle32_v_i32m1(bias_data + j, 4); + vint32m1_t _acc1 = vle32_v_i32m1(bias_data + j, 4); + vint32m1_t _acc2 = vle32_v_i32m1(bias_data + j, 4); + vint32m1_t _acc3 = vle32_v_i32m1(bias_data + j, 4); + vint32m1_t _acc4 = vle32_v_i32m1(bias_data + j, 4); + vint32m1_t _acc5 = vle32_v_i32m1(bias_data + j, 4); + vint32m1_t _acc6 = vle32_v_i32m1(bias_data + j, 4); + vint32m1_t _acc7 = vle32_v_i32m1(bias_data + j, 4); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _kernel = vle8_v_i8m1(kernel_ptr, 16); + _acc0 = vpmaqa_vx_i32m1(_acc0, in_ptr[0], _kernel, 4); + _acc1 = vpmaqa_vx_i32m1(_acc1, in_ptr[1], _kernel, 4); + _acc2 = vpmaqa_vx_i32m1(_acc2, in_ptr[2], _kernel, 4); + _acc3 = vpmaqa_vx_i32m1(_acc3, in_ptr[3], _kernel, 4); + _acc4 = vpmaqa_vx_i32m1(_acc4, in_ptr[4], _kernel, 4); + _acc5 = vpmaqa_vx_i32m1(_acc5, in_ptr[5], _kernel, 4); + _acc6 = vpmaqa_vx_i32m1(_acc6, in_ptr[6], _kernel, 4); + _acc7 = vpmaqa_vx_i32m1(_acc7, in_ptr[7], _kernel, 4); + + in_ptr += 8; + kernel_ptr += 16; + } + vint8mf8_t _res0 = requantize_m1(_acc0, mult[j], shift[j], out_zp, 4); + vint8mf8_t _res1 = requantize_m1(_acc1, mult[j], shift[j], out_zp, 4); + vint8mf8_t _res2 = requantize_m1(_acc2, mult[j], shift[j], out_zp, 4); + vint8mf8_t _res3 = requantize_m1(_acc3, mult[j], shift[j], out_zp, 4); + vint8mf8_t _res4 = requantize_m1(_acc4, mult[j], shift[j], out_zp, 4); + vint8mf8_t _res5 = requantize_m1(_acc5, mult[j], shift[j], out_zp, 4); + vint8mf8_t _res6 = requantize_m1(_acc6, mult[j], shift[j], out_zp, 4); + vint8mf8_t _res7 = requantize_m1(_acc7, mult[j], shift[j], out_zp, 4); + vse8_v_i8mf8(out_ptr0, _res0, 2); + vse8_v_i8mf8(out_ptr1, _res1, 2); + vse8_v_i8mf8(out_ptr2, _res2, 2); + vse8_v_i8mf8(out_ptr3, _res3, 2); + vse8_v_i8mf8(out_ptr4, _res4, 2); + vse8_v_i8mf8(out_ptr5, _res5, 2); + vse8_v_i8mf8(out_ptr6, _res6, 2); + vse8_v_i8mf8(out_ptr7, _res7, 2); + out_ptr0 += 2; + out_ptr1 += 2; + out_ptr2 += 2; + out_ptr3 += 2; + out_ptr4 += 2; + out_ptr5 += 2; + out_ptr6 += 2; + out_ptr7 += 2; + } + // m8n2 + for (; j + 1 < n; j += 2) { + // TODO: + } + + input_data += 8 * k; + output_data += 8 * ldc; + } + // m4 + for (; i + 3 < m; i += 4) { + int8_t *kernel_ptr = kernel_data; + + int8_t *out_ptr0 = output_data; + int8_t *out_ptr1 = out_ptr0 + ldc; + int8_t *out_ptr2 = out_ptr1 + ldc; + int8_t *out_ptr3 = out_ptr2 + ldc; + int j = 0; + // m4n8 loop + for (; j + 7 < n; j += 8) { + int32_t *in_ptr = (int32_t *)input_data; + vint32m2_t _acc0 = vle32_v_i32m2(bias_data + j, 8); + vint32m2_t _acc1 = vle32_v_i32m2(bias_data + j, 8); + vint32m2_t _acc2 = vle32_v_i32m2(bias_data + j, 8); + vint32m2_t _acc3 = vle32_v_i32m2(bias_data + j, 8); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m2_t _kernel = vle8_v_i8m2(kernel_ptr, 32); + _acc0 = vpmaqa_vx_i32m2(_acc0, in_ptr[0], _kernel, 8); + _acc1 = vpmaqa_vx_i32m2(_acc1, in_ptr[1], _kernel, 8); + _acc2 = vpmaqa_vx_i32m2(_acc2, in_ptr[2], _kernel, 8); + _acc3 = vpmaqa_vx_i32m2(_acc3, in_ptr[3], _kernel, 8); + + in_ptr += 4; + kernel_ptr += 32; + } + vint8mf4_t _res0 = requantize_m2(_acc0, mult[j], shift[j], out_zp, 8); + vint8mf4_t _res1 = requantize_m2(_acc1, mult[j], shift[j], out_zp, 8); + vint8mf4_t _res2 = requantize_m2(_acc2, mult[j], shift[j], out_zp, 8); + vint8mf4_t _res3 = requantize_m2(_acc3, mult[j], shift[j], out_zp, 8); + vse8_v_i8mf4(out_ptr0, _res0, 4); + vse8_v_i8mf4(out_ptr1, _res1, 4); + vse8_v_i8mf4(out_ptr2, _res2, 4); + vse8_v_i8mf4(out_ptr3, _res3, 4); + out_ptr0 += 4; + out_ptr1 += 4; + out_ptr2 += 4; + out_ptr3 += 4; + } + // m4n4 + for (; j + 3 < n; j += 4) { + int32_t *in_ptr = (int32_t *)input_data; + vint32m1_t _acc0 = vle32_v_i32m1(bias_data + j, 4); + vint32m1_t _acc1 = vle32_v_i32m1(bias_data + j, 4); + vint32m1_t _acc2 = vle32_v_i32m1(bias_data + j, 4); + vint32m1_t _acc3 = vle32_v_i32m1(bias_data + j, 4); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _kernel = vle8_v_i8m1(kernel_ptr, 16); + _acc0 = vpmaqa_vx_i32m1(_acc0, in_ptr[0], _kernel, 4); + _acc1 = vpmaqa_vx_i32m1(_acc1, in_ptr[1], _kernel, 4); + _acc2 = vpmaqa_vx_i32m1(_acc2, in_ptr[2], _kernel, 4); + _acc3 = vpmaqa_vx_i32m1(_acc3, in_ptr[3], _kernel, 4); + + in_ptr += 4; + kernel_ptr += 16; + } + vint8mf8_t _res0 = requantize_m1(_acc0, mult[j], shift[j], out_zp, 4); + vint8mf8_t _res1 = requantize_m1(_acc1, mult[j], shift[j], out_zp, 4); + vint8mf8_t _res2 = requantize_m1(_acc2, mult[j], shift[j], out_zp, 4); + vint8mf8_t _res3 = requantize_m1(_acc3, mult[j], shift[j], out_zp, 4); + vse8_v_i8mf8(out_ptr0, _res0, 2); + vse8_v_i8mf8(out_ptr1, _res1, 2); + vse8_v_i8mf8(out_ptr2, _res2, 2); + vse8_v_i8mf8(out_ptr3, _res3, 2); + out_ptr0 += 2; + out_ptr1 += 2; + out_ptr2 += 2; + out_ptr3 += 2; + } + // m4n2 + for (; j + 1 < n; j += 2) { + // TODO: + } + + input_data += 4 * k; + output_data += 4 * ldc; + } + // m2 + for (; i + 1 < m; i += 2) { + int8_t *kernel_ptr = kernel_data; + + int8_t *out_ptr0 = output_data; + int8_t *out_ptr1 = out_ptr0 + ldc; + int j = 0; + // m2n8 loop + for (; j + 7 < n; j += 8) { + int32_t *in_ptr = (int32_t *)input_data; + vint32m2_t _acc0 = vle32_v_i32m2(bias_data + j, 8); + vint32m2_t _acc1 = vle32_v_i32m2(bias_data + j, 8); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m2_t _kernel = vle8_v_i8m2(kernel_ptr, 32); + _acc0 = vpmaqa_vx_i32m2(_acc0, in_ptr[0], _kernel, 8); + _acc1 = vpmaqa_vx_i32m2(_acc1, in_ptr[1], _kernel, 8); + in_ptr += 2; + kernel_ptr += 32; + } + vint8mf4_t _res0 = requantize_m2(_acc0, mult[j], shift[j], out_zp, 8); + vint8mf4_t _res1 = requantize_m2(_acc1, mult[j], shift[j], out_zp, 8); + vse8_v_i8mf4(out_ptr0, _res0, 4); + vse8_v_i8mf4(out_ptr1, _res1, 4); + out_ptr0 += 4; + out_ptr1 += 4; + } + // m2n4 + for (; j + 3 < n; j += 4) { + int32_t *in_ptr = (int32_t *)input_data; + vint32m1_t _acc0 = vle32_v_i32m1(bias_data + j, 4); + vint32m1_t _acc1 = vle32_v_i32m1(bias_data + j, 4); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _kernel = vle8_v_i8m1(kernel_ptr, 16); + _acc0 = vpmaqa_vx_i32m1(_acc0, in_ptr[0], _kernel, 4); + _acc1 = vpmaqa_vx_i32m1(_acc1, in_ptr[1], _kernel, 4); + in_ptr += 2; + kernel_ptr += 16; + } + vint8mf8_t _res0 = requantize_m1(_acc0, mult[j], shift[j], out_zp, 4); + vint8mf8_t _res1 = requantize_m1(_acc1, mult[j], shift[j], out_zp, 4); + vse8_v_i8mf8(out_ptr0, _res0, 2); + vse8_v_i8mf8(out_ptr1, _res1, 2); + out_ptr0 += 2; + out_ptr1 += 2; + } + // m2n2 + for (; j + 1 < n; j += 2) { + // TODO: + } + + input_data += 2 * k; + output_data += 2 * ldc; + } + // m1 + for (; i < m; i++) { + int8_t *kernel_ptr = kernel_data; + + int8_t *out_ptr0 = output_data; + int j = 0; + // m1n8 loop + for (; j + 7 < n; j += 8) { + int32_t *in_ptr = (int32_t *)input_data; + vint32m2_t _acc0 = vle32_v_i32m2(bias_data + j, 8); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m2_t _kernel = vle8_v_i8m2(kernel_ptr, 32); + _acc0 = vpmaqa_vx_i32m2(_acc0, in_ptr[0], _kernel, 8); + in_ptr += 1; + kernel_ptr += 32; + } + vint8mf4_t _res0 = requantize_m2(_acc0, mult[j], shift[j], out_zp, 8); + vse8_v_i8mf4(out_ptr0, _res0, 4); + out_ptr0 += 4; + } + // m1n4 + for (; j + 3 < n; j += 4) { + int32_t *in_ptr = (int32_t *)input_data; + vint32m1_t _acc0 = vle32_v_i32m1(bias_data + j, 4); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _kernel = vle8_v_i8m1(kernel_ptr, 16); + _acc0 = vpmaqa_vx_i32m1(_acc0, in_ptr[0], _kernel, 4); + in_ptr += 1; + kernel_ptr += 16; + } + vint8mf8_t _res0 = requantize_m1(_acc0, mult[j], shift[j], out_zp, 4); + vse8_v_i8mf8(out_ptr0, _res0, 2); + out_ptr0 += 2; + } + // m1n2 + for (; j + 1 < n; j += 2) { + // TODO: + } + } +} + +#endif \ No newline at end of file diff --git a/source/thead_rvv/gemm_int8.c b/source/thead_rvv/gemm_int8.c new file mode 100644 index 00000000..a953d88a --- /dev/null +++ b/source/thead_rvv/gemm_int8.c @@ -0,0 +1,1564 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ +#ifdef __riscv_xtheadv + +#include "csi_thead_rvv.h" + +static vint8mf2_t requantize_m2(vint32m2_t _src, int32_t multiplier, int32_t shift, int32_t out_zp, + int vl) +{ + vint32m2_t _mulh = vmulh_vx_i32m2(_src, multiplier, vl); + _mulh = vssra_vx_i32m2(_mulh, -shift - 1, vl); + _mulh = vadd_vx_i32m2(_mulh, out_zp, vl); + vint16m1_t _tmp1 = vnclip_wx_i16m1(_mulh, 0, vl); + vint8mf2_t _tmp2 = vnclip_wx_i8mf2(_tmp1, 0, vl); + return _tmp2; +} + +static vint8mf4_t requantize_m1(vint32m1_t _src, int32_t multiplier, int32_t shift, int32_t out_zp, + int vl) +{ + vint32m1_t _mulh = vmulh_vx_i32m1(_src, multiplier, vl); + _mulh = vssra_vx_i32m1(_mulh, -shift - 1, vl); + _mulh = vadd_vx_i32m1(_mulh, out_zp, vl); + vint16mf2_t _tmp1 = vnclip_wx_i16mf2(_mulh, 0, vl); + vint8mf4_t _tmp2 = vnclip_wx_i8mf4(_tmp1, 0, vl); + return _tmp2; +} + +static int8_t requantize_single(int32_t src, int32_t multiplier, int32_t shift, int32_t out_zp) +{ + int64_t src_64 = (int64_t)src; + int64_t mult_64 = (int64_t)multiplier; + int64_t mulw = src * multiplier; + int32_t nudge = mulw >= 0 ? (1 << 30) : (1 - (1 << 30)); + int32_t mulh = (int32_t)((mulw + nudge) / (1ll << 31)); + int32_t res = mulh >> (-shift); + res += out_zp; + if (res > 127) res = 127; + if (res < -128) res = -128; + return (int8_t)res; +} + +static vint8mf2_t requantize_m2_s(vint32m2_t _src, int32_t *multiplier, int32_t *shift, + int32_t out_zp, int vl) +{ + vint32m2_t _mult = vle32_v_i32m2(multiplier, vl); + vint32m2_t _shift = vle32_v_i32m2(shift, vl); + vint32m2_t _mulh = vmulh_vv_i32m2(_src, _mult, vl); + _shift = vrsub_vx_i32m2(_shift, -1, vl); + _mulh = vssra_vv_i32m2(_mulh, vreinterpret_v_i32m2_u32m2(_shift), vl); + _mulh = vadd_vx_i32m2(_mulh, out_zp, vl); + vint16m1_t _tmp1 = vnclip_wx_i16m1(_mulh, 0, vl); + vint8mf2_t _tmp2 = vnclip_wx_i8mf2(_tmp1, 0, vl); + return _tmp2; +} + +static vint8mf4_t requantize_m1_s(vint32m1_t _src, int32_t *multiplier, int32_t *shift, + int32_t out_zp, int vl) +{ + vint32m1_t _mult = vle32_v_i32m1(multiplier, vl); + vint32m1_t _shift = vle32_v_i32m1(shift, vl); + vint32m1_t _mulh = vmulh_vv_i32m1(_src, _mult, vl); + _shift = vrsub_vx_i32m1(_shift, -1, vl); + _mulh = vssra_vv_i32m1(_mulh, vreinterpret_v_i32m1_u32m1(_shift), vl); + _mulh = vadd_vx_i32m1(_mulh, out_zp, vl); + vint16mf2_t _tmp1 = vnclip_wx_i16mf2(_mulh, 0, vl); + vint8mf4_t _tmp2 = vnclip_wx_i8mf4(_tmp1, 0, vl); + return _tmp2; +} + +/************************************************************* + note: VLEN = 128 +*************************************************************/ +void csi_nn_rvv_reorder_kernel_n8_int8(int8_t *a, int8_t *sa, int m, int k, int ldx) +{ + int i = 0; + for (; i + 7 < m; i += 8) { + int j = 0; + for (; j + 3 < k; j += 4) { + int8_t *in_ptr = a + j; + for (int c = 0; c < 8; c++) { + vint8m1_t _input = vle8_v_i8m1(in_ptr, 4); + in_ptr += k; + vse8_v_i8m1(sa, _input, 4); + sa += 4; + } + } + // k_tail + if (j < k) { + int8_t *in_ptr = a + j; + for (int c = 0; c < 8; c++) { + vint8m1_t _input = vle8_v_i8m1(in_ptr, k & 3); + in_ptr += k; + vse8_v_i8m1(sa, _input, k & 3); + sa += 4; + } + } + a += 8 * k; + } + for (; i + 3 < m; i += 4) { + int j = 0; + for (; j + 3 < k; j += 4) { + int8_t *in_ptr = a + j; + for (int c = 0; c < 4; c++) { + vint8m1_t _input = vle8_v_i8m1(in_ptr, 4); + in_ptr += k; + vse8_v_i8m1(sa, _input, 4); + sa += 4; + } + } + if (j < k) { + int8_t *in_ptr = a + j; + for (int c = 0; c < 4; c++) { + vint8m1_t _input = vle8_v_i8m1(in_ptr, k & 3); + in_ptr += k; + vse8_v_i8m1(sa, _input, k & 3); + sa += 4; + } + } + a += 4 * k; + } + for (; i + 1 < m; i += 2) { + int j = 0; + for (; j + 3 < k; j += 4) { + int8_t *in_ptr = a + j; + for (int c = 0; c < 2; c++) { + vint8m1_t _input = vle8_v_i8m1(in_ptr, 4); + in_ptr += k; + vse8_v_i8m1(sa, _input, 4); + sa += 4; + } + } + if (j < k) { + int8_t *in_ptr = a + j; + for (int c = 0; c < 2; c++) { + vint8m1_t _input = vle8_v_i8m1(in_ptr, k & 3); + in_ptr += k; + vse8_v_i8m1(sa, _input, k & 3); + sa += 4; + } + } + a += 2 * k; + } + for (; i < m; i++) { + memcpy(sa, a, k * sizeof(int8_t)); + } +} + +void csi_nn_rvv_reorder_input_z8_int8(int8_t *b, int8_t *sb, int k, int n, int ldx) +{ + int vl = vsetvl_e8m1(8); + int i = 0; + for (; i + 7 < n; i += 8) { + int8_t *b0 = b + i; + int j = 0; + for (; j + 3 < k; j += 4) { + vint8m1_t _tmp = vle8_v_i8m1(b0, vl); + b0 += n; + vsse8_v_i8m1(sb, 4 * sizeof(int8_t), _tmp, vl); + sb++; + _tmp = vle8_v_i8m1(b0, vl); + b0 += n; + vsse8_v_i8m1(sb, 4 * sizeof(int8_t), _tmp, vl); + sb++; + _tmp = vle8_v_i8m1(b0, vl); + b0 += n; + vsse8_v_i8m1(sb, 4 * sizeof(int8_t), _tmp, vl); + sb++; + _tmp = vle8_v_i8m1(b0, vl); + b0 += n; + vsse8_v_i8m1(sb, 4 * sizeof(int8_t), _tmp, vl); + sb += 32 - 3; + } + // k_tail + if (j < k) { + int8_t *sb0 = sb; + for (; j < k; j++) { + vint8m1_t _tmp = vle8_v_i8m1(b0, vl); + b0 += n; + vsse8_v_i8m1(sb0, 4 * sizeof(int8_t), _tmp, vl); + sb0++; + } + sb += 32; + } + } + for (; i + 3 < n; i += 4) { + vl = vsetvl_e8m1(4); + int8_t *b0 = b + i; + int j = 0; + for (; j + 3 < k; j += 4) { + vint8m1_t _tmp = vle8_v_i8m1(b0, vl); + b0 += n; + vsse8_v_i8m1(sb, 4 * sizeof(int8_t), _tmp, vl); + sb++; + _tmp = vle8_v_i8m1(b0, vl); + b0 += n; + vsse8_v_i8m1(sb, 4 * sizeof(int8_t), _tmp, vl); + sb++; + _tmp = vle8_v_i8m1(b0, vl); + b0 += n; + vsse8_v_i8m1(sb, 4 * sizeof(int8_t), _tmp, vl); + sb++; + _tmp = vle8_v_i8m1(b0, vl); + b0 += n; + vsse8_v_i8m1(sb, 4 * sizeof(int8_t), _tmp, vl); + sb += 13; + } + // k_tail + if (j < k) { + int8_t *sb0 = sb; + for (; j < k; j++) { + vint8m1_t _tmp = vle8_v_i8m1(b0, vl); + b0 += n; + vsse8_v_i8m1(sb0, 4 * sizeof(int8_t), _tmp, vl); + sb0++; + } + sb += 16; + } + } + // n_tail + for (; i < n; i++) { + vl = vsetvl_e8m1(16); + int8_t *b0 = b + i; + int j = 0; + for (; j + 15 < k; j += 16) { + vint8m1_t _tmp = vlse8_v_i8m1(b0, ldx * sizeof(int8_t), vl); + b0 += 16 * ldx; + vse8_v_i8m1(sb, _tmp, vl); + sb += 16; + } + if (j < k) { + vl = vsetvl_e8m1(k & 15); + vint8m1_t _tmp = vlse8_v_i8m1(b0, ldx * sizeof(int8_t), vl); + vse8_v_i8m1(sb, _tmp, vl); + sb += ((k & 15) / 4 + 1) * 4; + } + } +} + +void csi_nn_rvv_gemm_8x8_int32(int32_t *dst, const int8_t *sa, const int8_t *sb, int m, int k, + int n, int ldc, int32_t *bias) +{ + int8_t *kernel_data = (int8_t *)sa; + int8_t *input_data = (int8_t *)sb; + int32_t *output_data = dst; + // please use fuse_zp2bias option in hhb, thus bias_data wont be NULL + int32_t *bias_data = bias; + + int vl = 0; + int i = 0; + // m8 loop + vl = vsetvl_e32m2(8); + for (; i + 7 < m; i += 8) { + int8_t *in_ptr = input_data; + + int32_t *out_ptr0 = output_data; + int32_t *out_ptr1 = out_ptr0 + ldc; + int32_t *out_ptr2 = out_ptr1 + ldc; + int32_t *out_ptr3 = out_ptr2 + ldc; + int32_t *out_ptr4 = out_ptr3 + ldc; + int32_t *out_ptr5 = out_ptr4 + ldc; + int32_t *out_ptr6 = out_ptr5 + ldc; + int32_t *out_ptr7 = out_ptr6 + ldc; + int j = 0; + // m8n8 loop + for (; j + 7 < n; j += 8) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m2_t _acc0 = vmv_v_x_i32m2(bias_data[0], 8); + vint32m2_t _acc1 = vmv_v_x_i32m2(bias_data[1], 8); + vint32m2_t _acc2 = vmv_v_x_i32m2(bias_data[2], 8); + vint32m2_t _acc3 = vmv_v_x_i32m2(bias_data[3], 8); + vint32m2_t _acc4 = vmv_v_x_i32m2(bias_data[4], 8); + vint32m2_t _acc5 = vmv_v_x_i32m2(bias_data[5], 8); + vint32m2_t _acc6 = vmv_v_x_i32m2(bias_data[6], 8); + vint32m2_t _acc7 = vmv_v_x_i32m2(bias_data[7], 8); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m2_t _input = vle8_v_i8m2(in_ptr, 32); + _acc0 = vmaqa_vx_i32m2(_acc0, kernel_ptr[0], _input, 8); + _acc1 = vmaqa_vx_i32m2(_acc1, kernel_ptr[1], _input, 8); + _acc2 = vmaqa_vx_i32m2(_acc2, kernel_ptr[2], _input, 8); + _acc3 = vmaqa_vx_i32m2(_acc3, kernel_ptr[3], _input, 8); + _acc4 = vmaqa_vx_i32m2(_acc4, kernel_ptr[4], _input, 8); + _acc5 = vmaqa_vx_i32m2(_acc5, kernel_ptr[5], _input, 8); + _acc6 = vmaqa_vx_i32m2(_acc6, kernel_ptr[6], _input, 8); + _acc7 = vmaqa_vx_i32m2(_acc7, kernel_ptr[7], _input, 8); + + kernel_ptr += 8; + in_ptr += 32; + } + vse32_v_i32m2(out_ptr0, _acc0, 8); + vse32_v_i32m2(out_ptr1, _acc1, 8); + vse32_v_i32m2(out_ptr2, _acc2, 8); + vse32_v_i32m2(out_ptr3, _acc3, 8); + vse32_v_i32m2(out_ptr4, _acc4, 8); + vse32_v_i32m2(out_ptr5, _acc5, 8); + vse32_v_i32m2(out_ptr6, _acc6, 8); + vse32_v_i32m2(out_ptr7, _acc7, 8); + out_ptr0 += 8; + out_ptr1 += 8; + out_ptr2 += 8; + out_ptr3 += 8; + out_ptr4 += 8; + out_ptr5 += 8; + out_ptr6 += 8; + out_ptr7 += 8; + } + // m8n4 + for (; j + 3 < n; j += 4) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m1_t _acc0 = vmv_v_x_i32m1(bias_data[0], 4); + vint32m1_t _acc1 = vmv_v_x_i32m1(bias_data[1], 4); + vint32m1_t _acc2 = vmv_v_x_i32m1(bias_data[2], 4); + vint32m1_t _acc3 = vmv_v_x_i32m1(bias_data[3], 4); + vint32m1_t _acc4 = vmv_v_x_i32m1(bias_data[4], 4); + vint32m1_t _acc5 = vmv_v_x_i32m1(bias_data[5], 4); + vint32m1_t _acc6 = vmv_v_x_i32m1(bias_data[6], 4); + vint32m1_t _acc7 = vmv_v_x_i32m1(bias_data[7], 4); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _input = vle8_v_i8m1(in_ptr, 16); + _acc0 = vmaqa_vx_i32m1(_acc0, kernel_ptr[0], _input, 4); + _acc1 = vmaqa_vx_i32m1(_acc1, kernel_ptr[1], _input, 4); + _acc2 = vmaqa_vx_i32m1(_acc2, kernel_ptr[2], _input, 4); + _acc3 = vmaqa_vx_i32m1(_acc3, kernel_ptr[3], _input, 4); + _acc4 = vmaqa_vx_i32m1(_acc4, kernel_ptr[4], _input, 4); + _acc5 = vmaqa_vx_i32m1(_acc5, kernel_ptr[5], _input, 4); + _acc6 = vmaqa_vx_i32m1(_acc6, kernel_ptr[6], _input, 4); + _acc7 = vmaqa_vx_i32m1(_acc7, kernel_ptr[7], _input, 4); + + kernel_ptr += 8; + in_ptr += 16; + } + vse32_v_i32m1(out_ptr0, _acc0, 4); + vse32_v_i32m1(out_ptr1, _acc1, 4); + vse32_v_i32m1(out_ptr2, _acc2, 4); + vse32_v_i32m1(out_ptr3, _acc3, 4); + vse32_v_i32m1(out_ptr4, _acc4, 4); + vse32_v_i32m1(out_ptr5, _acc5, 4); + vse32_v_i32m1(out_ptr6, _acc6, 4); + vse32_v_i32m1(out_ptr7, _acc7, 4); + out_ptr0 += 4; + out_ptr1 += 4; + out_ptr2 += 4; + out_ptr3 += 4; + out_ptr4 += 4; + out_ptr5 += 4; + out_ptr6 += 4; + out_ptr7 += 4; + } + // m8n2 + for (; j + 1 < n; j += 2) { + int8_t *kernel_ptr = kernel_data; + vint32m2_t _acc0 = vle32_v_i32m2(bias_data, 8); + vint32m2_t _acc1 = vle32_v_i32m2(bias_data, 8); + + int32_t *in_ptr0 = (int32_t *)in_ptr; + int32_t *in_ptr1 = (int32_t *)(in_ptr + k); + + out_ptr1 = out_ptr0 + 1; + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m2_t _kernel = vle8_v_i8m2(kernel_ptr, 32); + _acc0 = vmaqa_vx_i32m2(_acc0, in_ptr0[0], _kernel, 8); + _acc1 = vmaqa_vx_i32m2(_acc1, in_ptr1[0], _kernel, 8); + in_ptr0++; + in_ptr1++; + kernel_ptr += 32; + } + vsse32_v_i32m2(out_ptr0, ldc * sizeof(int32_t), _acc0, 8); + vsse32_v_i32m2(out_ptr1, ldc * sizeof(int32_t), _acc1, 8); + out_ptr0 += 2; + in_ptr += 2 * k; + } + // m8n1 + for (; j < n; j++) { + int8_t *kernel_ptr = kernel_data; + vint32m2_t _acc0 = vle32_v_i32m2(bias_data, 8); + int32_t *in_ptr0 = (int32_t *)in_ptr; + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m2_t _kernel = vle8_v_i8m2(kernel_ptr, 32); + _acc0 = vmaqa_vx_i32m2(_acc0, in_ptr0[0], _kernel, 8); + in_ptr0++; + kernel_ptr += 32; + } + vsse32_v_i32m2(out_ptr0, ldc * sizeof(int32_t), _acc0, 8); + out_ptr0 += 1; + in_ptr += 1 * k; + } + kernel_data += 8 * k; + output_data += 8 * ldc; + bias_data += 8; + } + // m4 + for (; i + 3 < m; i += 4) { + int8_t *in_ptr = input_data; + + int32_t *out_ptr0 = output_data; + int32_t *out_ptr1 = out_ptr0 + ldc; + int32_t *out_ptr2 = out_ptr1 + ldc; + int32_t *out_ptr3 = out_ptr2 + ldc; + int j = 0; + // m4n8 loop + for (; j + 7 < n; j += 8) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m2_t _acc0 = vmv_v_x_i32m2(bias_data[0], 8); + vint32m2_t _acc1 = vmv_v_x_i32m2(bias_data[1], 8); + vint32m2_t _acc2 = vmv_v_x_i32m2(bias_data[2], 8); + vint32m2_t _acc3 = vmv_v_x_i32m2(bias_data[3], 8); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m2_t _input = vle8_v_i8m2(in_ptr, 32); + _acc0 = vmaqa_vx_i32m2(_acc0, kernel_ptr[0], _input, 8); + _acc1 = vmaqa_vx_i32m2(_acc1, kernel_ptr[1], _input, 8); + _acc2 = vmaqa_vx_i32m2(_acc2, kernel_ptr[2], _input, 8); + _acc3 = vmaqa_vx_i32m2(_acc3, kernel_ptr[3], _input, 8); + + kernel_ptr += 4; + in_ptr += 32; + } + vse32_v_i32m2(out_ptr0, _acc0, 8); + vse32_v_i32m2(out_ptr1, _acc1, 8); + vse32_v_i32m2(out_ptr2, _acc2, 8); + vse32_v_i32m2(out_ptr3, _acc3, 8); + out_ptr0 += 8; + out_ptr1 += 8; + out_ptr2 += 8; + out_ptr3 += 8; + } + // m4n4 + for (; j + 3 < n; j += 4) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m1_t _acc0 = vmv_v_x_i32m1(bias_data[0], 4); + vint32m1_t _acc1 = vmv_v_x_i32m1(bias_data[1], 4); + vint32m1_t _acc2 = vmv_v_x_i32m1(bias_data[2], 4); + vint32m1_t _acc3 = vmv_v_x_i32m1(bias_data[3], 4); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _input = vle8_v_i8m1(in_ptr, 16); + _acc0 = vmaqa_vx_i32m1(_acc0, kernel_ptr[0], _input, 4); + _acc1 = vmaqa_vx_i32m1(_acc1, kernel_ptr[1], _input, 4); + _acc2 = vmaqa_vx_i32m1(_acc2, kernel_ptr[2], _input, 4); + _acc3 = vmaqa_vx_i32m1(_acc3, kernel_ptr[3], _input, 4); + + kernel_ptr += 4; + in_ptr += 16; + } + vse32_v_i32m1(out_ptr0, _acc0, 4); + vse32_v_i32m1(out_ptr1, _acc1, 4); + vse32_v_i32m1(out_ptr2, _acc2, 4); + vse32_v_i32m1(out_ptr3, _acc3, 4); + out_ptr0 += 4; + out_ptr1 += 4; + out_ptr2 += 4; + out_ptr3 += 4; + } + // m4n2 + for (; j + 1 < n; j += 2) { + int8_t *kernel_ptr = kernel_data; + vint32m1_t _acc0 = vle32_v_i32m1(bias_data, 4); + vint32m1_t _acc1 = vle32_v_i32m1(bias_data, 4); + + int32_t *in_ptr0 = (int32_t *)in_ptr; + int32_t *in_ptr1 = (int32_t *)(in_ptr + k); + + out_ptr1 = out_ptr0 + 1; + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _kernel = vle8_v_i8m1(kernel_ptr, 16); + _acc0 = vmaqa_vx_i32m1(_acc0, in_ptr0[0], _kernel, 4); + _acc1 = vmaqa_vx_i32m1(_acc1, in_ptr1[0], _kernel, 4); + in_ptr0++; + in_ptr1++; + kernel_ptr += 16; + } + vsse32_v_i32m1(out_ptr0, ldc * sizeof(int32_t), _acc0, 4); + vsse32_v_i32m1(out_ptr1, ldc * sizeof(int32_t), _acc1, 4); + out_ptr0 += 2; + in_ptr += 2 * k; + } + // m4n1 + for (; j < n; j++) { + int8_t *kernel_ptr = kernel_data; + vint32m1_t _acc0 = vle32_v_i32m1(bias_data, 4); + int32_t *in_ptr0 = (int32_t *)in_ptr; + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _kernel = vle8_v_i8m1(kernel_ptr, 16); + _acc0 = vmaqa_vx_i32m1(_acc0, in_ptr0[0], _kernel, 4); + in_ptr0++; + kernel_ptr += 16; + } + vsse32_v_i32m1(out_ptr0, ldc * sizeof(int32_t), _acc0, 4); + out_ptr0 += 1; + in_ptr += 1 * k; + } + kernel_data += 4 * k; + output_data += 4 * ldc; + bias_data += 4; + } + // m2 + for (; i + 1 < m; i += 2) { + int8_t *in_ptr = input_data; + + int32_t *out_ptr0 = output_data; + int32_t *out_ptr1 = out_ptr0 + ldc; + int j = 0; + // m2n8 loop + for (; j + 7 < n; j += 8) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m2_t _acc0 = vmv_v_x_i32m2(bias_data[0], 8); + vint32m2_t _acc1 = vmv_v_x_i32m2(bias_data[1], 8); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m2_t _input = vle8_v_i8m2(in_ptr, 32); + _acc0 = vmaqa_vx_i32m2(_acc0, kernel_ptr[0], _input, 8); + _acc1 = vmaqa_vx_i32m2(_acc1, kernel_ptr[1], _input, 8); + + kernel_ptr += 2; + in_ptr += 32; + } + vse32_v_i32m2(out_ptr0, _acc0, 8); + vse32_v_i32m2(out_ptr1, _acc1, 8); + out_ptr0 += 8; + out_ptr1 += 8; + } + // m2n4 + for (; j + 3 < n; j += 4) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m1_t _acc0 = vmv_v_x_i32m1(bias_data[0], 4); + vint32m1_t _acc1 = vmv_v_x_i32m1(bias_data[1], 4); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _input = vle8_v_i8m1(in_ptr, 16); + _acc0 = vmaqa_vx_i32m1(_acc0, kernel_ptr[0], _input, 4); + _acc1 = vmaqa_vx_i32m1(_acc1, kernel_ptr[1], _input, 4); + + kernel_ptr += 2; + in_ptr += 16; + } + vse32_v_i32m1(out_ptr0, _acc0, 4); + vse32_v_i32m1(out_ptr1, _acc1, 4); + out_ptr0 += 4; + out_ptr1 += 4; + } + // m2n_tail + for (; j < n; j++) { + int32_t acc0 = bias_data[0]; + int32_t acc1 = bias_data[1]; + int8_t *k0 = kernel_data; + int c = 0; + for (; c + 3 < k; c += 4) { + acc0 += k0[0] * in_ptr[c + 0]; + acc0 += k0[1] * in_ptr[c + 1]; + acc0 += k0[2] * in_ptr[c + 2]; + acc0 += k0[3] * in_ptr[c + 3]; + acc1 += k0[4] * in_ptr[c + 0]; + acc1 += k0[5] * in_ptr[c + 1]; + acc1 += k0[6] * in_ptr[c + 2]; + acc1 += k0[7] * in_ptr[c + 3]; + k0 += 8; + } + *out_ptr0++ = acc0; + *out_ptr1++ = acc1; + in_ptr += k; + } + kernel_data += 2 * k; + output_data += 2 * ldc; + bias_data += 2; + } + // m1 + for (; i < m; i++) { + int8_t *in_ptr = input_data; + int32_t *out_ptr0 = output_data; + int j = 0; + // m1n8 loop + for (; j + 7 < n; j += 8) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m2_t _acc0 = vmv_v_x_i32m2(bias_data[0], 8); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m2_t _input = vle8_v_i8m2(in_ptr, 32); + _acc0 = vmaqa_vx_i32m2(_acc0, kernel_ptr[0], _input, 8); + kernel_ptr += 1; + in_ptr += 32; + } + vse32_v_i32m2(out_ptr0, _acc0, 8); + out_ptr0 += 8; + } + // m1n4 + for (; j + 3 < n; j += 4) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m1_t _acc0 = vmv_v_x_i32m1(bias_data[0], 4); + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _input = vle8_v_i8m1(in_ptr, 16); + _acc0 = vmaqa_vx_i32m1(_acc0, kernel_ptr[0], _input, 4); + kernel_ptr += 1; + in_ptr += 16; + } + vse32_v_i32m1(out_ptr0, _acc0, 4); + out_ptr0 += 4; + } + // m1n_tail + for (; j < n; j++) { + int32_t acc0 = bias_data[0]; + for (int c = 0; c < k; c++) { + acc0 += kernel_data[c] * in_ptr[c]; + } + *out_ptr0++ = acc0; + in_ptr += k; + } + } +} + +void csi_nn_rvv_gemm_8x8_int8(int8_t *dst, const int8_t *sa, const int8_t *sb, int m, int k, int n, + int ldc, int32_t *bias, int32_t out_zp, int32_t *mult, int32_t *shift) +{ + int8_t *kernel_data = (int8_t *)sa; + int8_t *input_data = (int8_t *)sb; + int8_t *output_data = dst; + // please use fuse_zp2bias option in hhb, thus bias_data wont be NULL + int32_t *bias_data = bias; + + int vl = 0; + int i = 0; + // m8 loop + vl = vsetvl_e32m2(8); + for (; i + 7 < m; i += 8) { + int8_t *in_ptr = input_data; + + int8_t *out_ptr0 = output_data; + int8_t *out_ptr1 = out_ptr0 + ldc; + int8_t *out_ptr2 = out_ptr1 + ldc; + int8_t *out_ptr3 = out_ptr2 + ldc; + int8_t *out_ptr4 = out_ptr3 + ldc; + int8_t *out_ptr5 = out_ptr4 + ldc; + int8_t *out_ptr6 = out_ptr5 + ldc; + int8_t *out_ptr7 = out_ptr6 + ldc; + int j = 0; + // m8n8 loop + for (; j + 7 < n; j += 8) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m2_t _acc0 = vmv_v_x_i32m2(bias_data[0], 8); + vint32m2_t _acc1 = vmv_v_x_i32m2(bias_data[1], 8); + vint32m2_t _acc2 = vmv_v_x_i32m2(bias_data[2], 8); + vint32m2_t _acc3 = vmv_v_x_i32m2(bias_data[3], 8); + vint32m2_t _acc4 = vmv_v_x_i32m2(bias_data[4], 8); + vint32m2_t _acc5 = vmv_v_x_i32m2(bias_data[5], 8); + vint32m2_t _acc6 = vmv_v_x_i32m2(bias_data[6], 8); + vint32m2_t _acc7 = vmv_v_x_i32m2(bias_data[7], 8); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m2_t _input = vle8_v_i8m2(in_ptr, 32); + _acc0 = vmaqa_vx_i32m2(_acc0, kernel_ptr[0], _input, 8); + _acc1 = vmaqa_vx_i32m2(_acc1, kernel_ptr[1], _input, 8); + _acc2 = vmaqa_vx_i32m2(_acc2, kernel_ptr[2], _input, 8); + _acc3 = vmaqa_vx_i32m2(_acc3, kernel_ptr[3], _input, 8); + _acc4 = vmaqa_vx_i32m2(_acc4, kernel_ptr[4], _input, 8); + _acc5 = vmaqa_vx_i32m2(_acc5, kernel_ptr[5], _input, 8); + _acc6 = vmaqa_vx_i32m2(_acc6, kernel_ptr[6], _input, 8); + _acc7 = vmaqa_vx_i32m2(_acc7, kernel_ptr[7], _input, 8); + + kernel_ptr += 8; + in_ptr += 32; + } + vint8mf2_t _res0 = requantize_m2(_acc0, mult[0], shift[0], out_zp, 8); + vint8mf2_t _res1 = requantize_m2(_acc1, mult[1], shift[1], out_zp, 8); + vint8mf2_t _res2 = requantize_m2(_acc2, mult[2], shift[2], out_zp, 8); + vint8mf2_t _res3 = requantize_m2(_acc3, mult[3], shift[3], out_zp, 8); + vint8mf2_t _res4 = requantize_m2(_acc4, mult[4], shift[4], out_zp, 8); + vint8mf2_t _res5 = requantize_m2(_acc5, mult[5], shift[5], out_zp, 8); + vint8mf2_t _res6 = requantize_m2(_acc6, mult[6], shift[6], out_zp, 8); + vint8mf2_t _res7 = requantize_m2(_acc7, mult[7], shift[7], out_zp, 8); + + vse8_v_i8mf2(out_ptr0, _res0, 8); + vse8_v_i8mf2(out_ptr1, _res1, 8); + vse8_v_i8mf2(out_ptr2, _res2, 8); + vse8_v_i8mf2(out_ptr3, _res3, 8); + vse8_v_i8mf2(out_ptr4, _res4, 8); + vse8_v_i8mf2(out_ptr5, _res5, 8); + vse8_v_i8mf2(out_ptr6, _res6, 8); + vse8_v_i8mf2(out_ptr7, _res7, 8); + out_ptr0 += 8; + out_ptr1 += 8; + out_ptr2 += 8; + out_ptr3 += 8; + out_ptr4 += 8; + out_ptr5 += 8; + out_ptr6 += 8; + out_ptr7 += 8; + } + // m8n4 + for (; j + 3 < n; j += 4) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m1_t _acc0 = vmv_v_x_i32m1(bias_data[0], 4); + vint32m1_t _acc1 = vmv_v_x_i32m1(bias_data[1], 4); + vint32m1_t _acc2 = vmv_v_x_i32m1(bias_data[2], 4); + vint32m1_t _acc3 = vmv_v_x_i32m1(bias_data[3], 4); + vint32m1_t _acc4 = vmv_v_x_i32m1(bias_data[4], 4); + vint32m1_t _acc5 = vmv_v_x_i32m1(bias_data[5], 4); + vint32m1_t _acc6 = vmv_v_x_i32m1(bias_data[6], 4); + vint32m1_t _acc7 = vmv_v_x_i32m1(bias_data[7], 4); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _input = vle8_v_i8m1(in_ptr, 16); + _acc0 = vmaqa_vx_i32m1(_acc0, kernel_ptr[0], _input, 4); + _acc1 = vmaqa_vx_i32m1(_acc1, kernel_ptr[1], _input, 4); + _acc2 = vmaqa_vx_i32m1(_acc2, kernel_ptr[2], _input, 4); + _acc3 = vmaqa_vx_i32m1(_acc3, kernel_ptr[3], _input, 4); + _acc4 = vmaqa_vx_i32m1(_acc4, kernel_ptr[4], _input, 4); + _acc5 = vmaqa_vx_i32m1(_acc5, kernel_ptr[5], _input, 4); + _acc6 = vmaqa_vx_i32m1(_acc6, kernel_ptr[6], _input, 4); + _acc7 = vmaqa_vx_i32m1(_acc7, kernel_ptr[7], _input, 4); + + kernel_ptr += 8; + in_ptr += 16; + } + vint8mf4_t _res0 = requantize_m1(_acc0, mult[0], shift[0], out_zp, 4); + vint8mf4_t _res1 = requantize_m1(_acc1, mult[1], shift[1], out_zp, 4); + vint8mf4_t _res2 = requantize_m1(_acc2, mult[2], shift[2], out_zp, 4); + vint8mf4_t _res3 = requantize_m1(_acc3, mult[3], shift[3], out_zp, 4); + vint8mf4_t _res4 = requantize_m1(_acc4, mult[4], shift[4], out_zp, 4); + vint8mf4_t _res5 = requantize_m1(_acc5, mult[5], shift[5], out_zp, 4); + vint8mf4_t _res6 = requantize_m1(_acc6, mult[6], shift[6], out_zp, 4); + vint8mf4_t _res7 = requantize_m1(_acc7, mult[7], shift[7], out_zp, 4); + vse8_v_i8mf4(out_ptr0, _res0, 4); + vse8_v_i8mf4(out_ptr1, _res1, 4); + vse8_v_i8mf4(out_ptr2, _res2, 4); + vse8_v_i8mf4(out_ptr3, _res3, 4); + vse8_v_i8mf4(out_ptr4, _res4, 4); + vse8_v_i8mf4(out_ptr5, _res5, 4); + vse8_v_i8mf4(out_ptr6, _res6, 4); + vse8_v_i8mf4(out_ptr7, _res7, 4); + out_ptr0 += 4; + out_ptr1 += 4; + out_ptr2 += 4; + out_ptr3 += 4; + out_ptr4 += 4; + out_ptr5 += 4; + out_ptr6 += 4; + out_ptr7 += 4; + } + // m8n2 + for (; j + 1 < n; j += 2) { + int8_t *kernel_ptr = kernel_data; + vint32m2_t _acc0 = vle32_v_i32m2(bias_data, 8); + vint32m2_t _acc1 = vle32_v_i32m2(bias_data, 8); + + int32_t *in_ptr0 = (int32_t *)in_ptr; + int32_t *in_ptr1 = (int32_t *)(in_ptr + k); + + out_ptr1 = out_ptr0 + 1; + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m2_t _kernel = vle8_v_i8m2(kernel_ptr, 32); + _acc0 = vmaqa_vx_i32m2(_acc0, in_ptr0[0], _kernel, 8); + _acc1 = vmaqa_vx_i32m2(_acc1, in_ptr1[0], _kernel, 8); + in_ptr0++; + in_ptr1++; + kernel_ptr += 32; + } + vint8mf2_t _res0 = requantize_m2_s(_acc0, mult, shift, out_zp, 8); + vint8mf2_t _res1 = requantize_m2_s(_acc1, mult, shift, out_zp, 8); + vsse8_v_i8mf2(out_ptr0, ldc * sizeof(int8_t), _res0, 8); + vsse8_v_i8mf2(out_ptr1, ldc * sizeof(int8_t), _res1, 8); + out_ptr0 += 2; + in_ptr += 2 * k; + } + // m8n1 + for (; j < n; j++) { + int8_t *kernel_ptr = kernel_data; + vint32m2_t _acc0 = vle32_v_i32m2(bias_data, 8); + int32_t *in_ptr0 = (int32_t *)in_ptr; + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m2_t _kernel = vle8_v_i8m2(kernel_ptr, 32); + _acc0 = vmaqa_vx_i32m2(_acc0, in_ptr0[0], _kernel, 8); + in_ptr0++; + kernel_ptr += 32; + } + vint8mf2_t _res0 = requantize_m2_s(_acc0, mult, shift, out_zp, 8); + vsse8_v_i8mf2(out_ptr0, ldc * sizeof(int8_t), _res0, 8); + out_ptr0 += 1; + in_ptr += 1 * k; + } + kernel_data += 8 * k; + output_data += 8 * ldc; + bias_data += 8; + mult += 8; + shift += 8; + } + // m4 + for (; i + 3 < m; i += 4) { + int8_t *in_ptr = input_data; + + int8_t *out_ptr0 = output_data; + int8_t *out_ptr1 = out_ptr0 + ldc; + int8_t *out_ptr2 = out_ptr1 + ldc; + int8_t *out_ptr3 = out_ptr2 + ldc; + int j = 0; + // m4n8 loop + for (; j + 7 < n; j += 8) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m2_t _acc0 = vmv_v_x_i32m2(bias_data[0], 8); + vint32m2_t _acc1 = vmv_v_x_i32m2(bias_data[1], 8); + vint32m2_t _acc2 = vmv_v_x_i32m2(bias_data[2], 8); + vint32m2_t _acc3 = vmv_v_x_i32m2(bias_data[3], 8); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m2_t _input = vle8_v_i8m2(in_ptr, 32); + _acc0 = vmaqa_vx_i32m2(_acc0, kernel_ptr[0], _input, 8); + _acc1 = vmaqa_vx_i32m2(_acc1, kernel_ptr[1], _input, 8); + _acc2 = vmaqa_vx_i32m2(_acc2, kernel_ptr[2], _input, 8); + _acc3 = vmaqa_vx_i32m2(_acc3, kernel_ptr[3], _input, 8); + + kernel_ptr += 4; + in_ptr += 32; + } + vint8mf2_t _res0 = requantize_m2(_acc0, mult[0], shift[0], out_zp, 8); + vint8mf2_t _res1 = requantize_m2(_acc1, mult[1], shift[1], out_zp, 8); + vint8mf2_t _res2 = requantize_m2(_acc2, mult[2], shift[2], out_zp, 8); + vint8mf2_t _res3 = requantize_m2(_acc3, mult[3], shift[3], out_zp, 8); + vse8_v_i8mf2(out_ptr0, _res0, 8); + vse8_v_i8mf2(out_ptr1, _res1, 8); + vse8_v_i8mf2(out_ptr2, _res2, 8); + vse8_v_i8mf2(out_ptr3, _res3, 8); + out_ptr0 += 8; + out_ptr1 += 8; + out_ptr2 += 8; + out_ptr3 += 8; + } + // m4n4 + for (; j + 3 < n; j += 4) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m1_t _acc0 = vmv_v_x_i32m1(bias_data[0], 4); + vint32m1_t _acc1 = vmv_v_x_i32m1(bias_data[1], 4); + vint32m1_t _acc2 = vmv_v_x_i32m1(bias_data[2], 4); + vint32m1_t _acc3 = vmv_v_x_i32m1(bias_data[3], 4); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _input = vle8_v_i8m1(in_ptr, 16); + _acc0 = vmaqa_vx_i32m1(_acc0, kernel_ptr[0], _input, 4); + _acc1 = vmaqa_vx_i32m1(_acc1, kernel_ptr[1], _input, 4); + _acc2 = vmaqa_vx_i32m1(_acc2, kernel_ptr[2], _input, 4); + _acc3 = vmaqa_vx_i32m1(_acc3, kernel_ptr[3], _input, 4); + + kernel_ptr += 4; + in_ptr += 16; + } + vint8mf4_t _res0 = requantize_m1(_acc0, mult[0], shift[0], out_zp, 4); + vint8mf4_t _res1 = requantize_m1(_acc1, mult[1], shift[1], out_zp, 4); + vint8mf4_t _res2 = requantize_m1(_acc2, mult[2], shift[2], out_zp, 4); + vint8mf4_t _res3 = requantize_m1(_acc3, mult[3], shift[3], out_zp, 4); + vse8_v_i8mf4(out_ptr0, _res0, 4); + vse8_v_i8mf4(out_ptr1, _res1, 4); + vse8_v_i8mf4(out_ptr2, _res2, 4); + vse8_v_i8mf4(out_ptr3, _res3, 4); + out_ptr0 += 4; + out_ptr1 += 4; + out_ptr2 += 4; + out_ptr3 += 4; + } + // m4n2 + for (; j + 1 < n; j += 2) { + int8_t *kernel_ptr = kernel_data; + vint32m1_t _acc0 = vle32_v_i32m1(bias_data, 4); + vint32m1_t _acc1 = vle32_v_i32m1(bias_data, 4); + + int32_t *in_ptr0 = (int32_t *)in_ptr; + int32_t *in_ptr1 = (int32_t *)(in_ptr + k); + + out_ptr1 = out_ptr0 + 1; + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _kernel = vle8_v_i8m1(kernel_ptr, 16); + _acc0 = vmaqa_vx_i32m1(_acc0, in_ptr0[0], _kernel, 4); + _acc1 = vmaqa_vx_i32m1(_acc1, in_ptr1[0], _kernel, 4); + in_ptr0++; + in_ptr1++; + kernel_ptr += 16; + } + vint8mf4_t _res0 = requantize_m1_s(_acc0, mult, shift, out_zp, 4); + vint8mf4_t _res1 = requantize_m1_s(_acc1, mult, shift, out_zp, 4); + vsse8_v_i8mf4(out_ptr0, ldc * sizeof(int8_t), _res0, 4); + vsse8_v_i8mf4(out_ptr1, ldc * sizeof(int8_t), _res1, 4); + out_ptr0 += 2; + in_ptr += 2 * k; + } + // m4n1 + for (; j < n; j++) { + int8_t *kernel_ptr = kernel_data; + vint32m1_t _acc0 = vle32_v_i32m1(bias_data, 4); + int32_t *in_ptr0 = (int32_t *)in_ptr; + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _kernel = vle8_v_i8m1(kernel_ptr, 16); + _acc0 = vmaqa_vx_i32m1(_acc0, in_ptr0[0], _kernel, 4); + in_ptr0++; + kernel_ptr += 16; + } + vint8mf4_t _res0 = requantize_m1_s(_acc0, mult, shift, out_zp, 4); + vsse8_v_i8mf4(out_ptr0, ldc * sizeof(int8_t), _res0, 4); + out_ptr0 += 1; + in_ptr += 1 * k; + } + kernel_data += 4 * k; + output_data += 4 * ldc; + bias_data += 4; + mult += 4; + shift += 4; + } + // m2 + for (; i + 1 < m; i += 2) { + int8_t *in_ptr = input_data; + + int8_t *out_ptr0 = output_data; + int8_t *out_ptr1 = out_ptr0 + ldc; + int j = 0; + // m2n8 loop + for (; j + 7 < n; j += 8) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m2_t _acc0 = vmv_v_x_i32m2(bias_data[0], 8); + vint32m2_t _acc1 = vmv_v_x_i32m2(bias_data[1], 8); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m2_t _input = vle8_v_i8m2(in_ptr, 32); + _acc0 = vmaqa_vx_i32m2(_acc0, kernel_ptr[0], _input, 8); + _acc1 = vmaqa_vx_i32m2(_acc1, kernel_ptr[1], _input, 8); + + kernel_ptr += 2; + in_ptr += 32; + } + vint8mf2_t _res0 = requantize_m2(_acc0, mult[0], shift[0], out_zp, 8); + vint8mf2_t _res1 = requantize_m2(_acc1, mult[1], shift[1], out_zp, 8); + vse8_v_i8mf2(out_ptr0, _res0, 8); + vse8_v_i8mf2(out_ptr1, _res1, 8); + out_ptr0 += 8; + out_ptr1 += 8; + } + // m2n4 + for (; j + 3 < n; j += 4) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m1_t _acc0 = vmv_v_x_i32m1(bias_data[0], 4); + vint32m1_t _acc1 = vmv_v_x_i32m1(bias_data[1], 4); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _input = vle8_v_i8m1(in_ptr, 16); + _acc0 = vmaqa_vx_i32m1(_acc0, kernel_ptr[0], _input, 4); + _acc1 = vmaqa_vx_i32m1(_acc1, kernel_ptr[1], _input, 4); + + kernel_ptr += 2; + in_ptr += 16; + } + vint8mf4_t _res0 = requantize_m1(_acc0, mult[0], shift[0], out_zp, 4); + vint8mf4_t _res1 = requantize_m1(_acc1, mult[1], shift[1], out_zp, 4); + vse8_v_i8mf4(out_ptr0, _res0, 4); + vse8_v_i8mf4(out_ptr1, _res1, 4); + out_ptr0 += 4; + out_ptr1 += 4; + } + // m2n_tail + for (; j < n; j++) { + int32_t acc0 = bias_data[0]; + int32_t acc1 = bias_data[1]; + int8_t *k0 = kernel_data; + int c = 0; + for (; c + 3 < k; c += 4) { + acc0 += k0[0] * in_ptr[c + 0]; + acc0 += k0[1] * in_ptr[c + 1]; + acc0 += k0[2] * in_ptr[c + 2]; + acc0 += k0[3] * in_ptr[c + 3]; + acc1 += k0[4] * in_ptr[c + 0]; + acc1 += k0[5] * in_ptr[c + 1]; + acc1 += k0[6] * in_ptr[c + 2]; + acc1 += k0[7] * in_ptr[c + 3]; + k0 += 8; + } + *out_ptr0++ = requantize_single(acc0, mult[0], shift[0], out_zp); + *out_ptr1++ = requantize_single(acc1, mult[1], shift[1], out_zp); + in_ptr += k; + } + kernel_data += 2 * k; + output_data += 2 * ldc; + bias_data += 2; + mult += 2; + shift += 2; + } + // m1 + for (; i < m; i++) { + int8_t *in_ptr = input_data; + int8_t *out_ptr0 = output_data; + int j = 0; + // m1n8 loop + for (; j + 7 < n; j += 8) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m2_t _acc0 = vmv_v_x_i32m2(bias_data[0], 8); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m2_t _input = vle8_v_i8m2(in_ptr, 32); + _acc0 = vmaqa_vx_i32m2(_acc0, kernel_ptr[0], _input, 8); + kernel_ptr += 1; + in_ptr += 32; + } + vint8mf2_t _res0 = requantize_m2(_acc0, mult[0], shift[0], out_zp, 8); + vse8_v_i8mf2(out_ptr0, _res0, 8); + out_ptr0 += 8; + } + // m1n4 + for (; j + 3 < n; j += 4) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m1_t _acc0 = vmv_v_x_i32m1(bias_data[0], 4); + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _input = vle8_v_i8m1(in_ptr, 16); + _acc0 = vmaqa_vx_i32m1(_acc0, kernel_ptr[0], _input, 4); + kernel_ptr += 1; + in_ptr += 16; + } + vint8mf4_t _res0 = requantize_m1(_acc0, mult[0], shift[0], out_zp, 4); + vse8_v_i8mf4(out_ptr0, _res0, 4); + out_ptr0 += 4; + } + // m1n_tail + for (; j < n; j++) { + int32_t acc0 = bias_data[0]; + for (int c = 0; c < k; c++) { + acc0 += kernel_data[c] * in_ptr[c]; + } + *out_ptr0++ = requantize_single(acc0, mult[0], shift[0], out_zp); + in_ptr += k; + } + } +} + +/************************************************************* + note: VLEN = 256 +*************************************************************/ +// kernel 数据排布 可复用 csi_nn_rvv_reorder_kernel_n8_int8 + +void csi_nn_rvv256_reorder_input_z16_int8(int8_t *b, int8_t *sb, int k, int n, int ldx) +{ + int vl = vsetvl_e8m1(16); + int i = 0; + for (; i + 15 < n; i += 16) { + int8_t *b0 = b + i; + int j = 0; + for (; j + 3 < k; j += 4) { + vint8m1_t _tmp = vle8_v_i8m1(b0, vl); + b0 += n; + vsse8_v_i8m1(sb, 4 * sizeof(int8_t), _tmp, vl); + sb++; + _tmp = vle8_v_i8m1(b0, vl); + b0 += n; + vsse8_v_i8m1(sb, 4 * sizeof(int8_t), _tmp, vl); + sb++; + _tmp = vle8_v_i8m1(b0, vl); + b0 += n; + vsse8_v_i8m1(sb, 4 * sizeof(int8_t), _tmp, vl); + sb++; + _tmp = vle8_v_i8m1(b0, vl); + b0 += n; + vsse8_v_i8m1(sb, 4 * sizeof(int8_t), _tmp, vl); + sb += 64 - 3; + } + // k_tail + if (j < k) { + int8_t *sb0 = sb; + for (; j < k; j++) { + vint8m1_t _tmp = vle8_v_i8m1(b0, vl); + b0 += n; + vsse8_v_i8m1(sb0, 4 * sizeof(int8_t), _tmp, vl); + sb0++; + } + sb += 64; + } + } + for (; i + 7 < n; i += 8) { + vl = vsetvl_e8m1(8); + int8_t *b0 = b + i; + int j = 0; + for (; j + 3 < k; j += 4) { + vint8m1_t _tmp = vle8_v_i8m1(b0, vl); + b0 += n; + vsse8_v_i8m1(sb, 4 * sizeof(int8_t), _tmp, vl); + sb++; + _tmp = vle8_v_i8m1(b0, vl); + b0 += n; + vsse8_v_i8m1(sb, 4 * sizeof(int8_t), _tmp, vl); + sb++; + _tmp = vle8_v_i8m1(b0, vl); + b0 += n; + vsse8_v_i8m1(sb, 4 * sizeof(int8_t), _tmp, vl); + sb++; + _tmp = vle8_v_i8m1(b0, vl); + b0 += n; + vsse8_v_i8m1(sb, 4 * sizeof(int8_t), _tmp, vl); + sb += 32 - 3; + } + // k_tail + if (j < k) { + int8_t *sb0 = sb; + for (; j < k; j++) { + vint8m1_t _tmp = vle8_v_i8m1(b0, vl); + b0 += n; + vsse8_v_i8m1(sb0, 4 * sizeof(int8_t), _tmp, vl); + sb0++; + } + sb += 32; + } + } + // n_tail + for (; i < n; i++) { + vl = vsetvl_e8m1(16); + int8_t *b0 = b + i; + int j = 0; + for (; j + 15 < k; j += 16) { + vint8m1_t _tmp = vlse8_v_i8m1(b0, ldx * sizeof(int8_t), vl); + b0 += 16 * ldx; + vse8_v_i8m1(sb, _tmp, vl); + sb += 16; + } + if (j < k) { + vl = vsetvl_e8m1(k & 15); + vint8m1_t _tmp = vlse8_v_i8m1(b0, ldx * sizeof(int8_t), vl); + vse8_v_i8m1(sb, _tmp, vl); + sb += ((k & 15) / 4 + 1) * 4; + } + } +} + +void csi_nn_rvv256_gemm_8x16_int32(int32_t *dst, const int8_t *sa, const int8_t *sb, int m, int k, + int n, int ldc, int32_t *bias) +{ + int8_t *kernel_data = (int8_t *)sa; + int8_t *input_data = (int8_t *)sb; + int32_t *output_data = dst; + // please use fuse_zp2bias option in hhb, thus bias_data wont be NULL + int32_t *bias_data = bias; + + int vl = 0; + int i = 0; + // m8 loop + vl = vsetvl_e32m2(16); + for (; i + 7 < m; i += 8) { + int8_t *in_ptr = input_data; + + int32_t *out_ptr0 = output_data; + int32_t *out_ptr1 = out_ptr0 + ldc; + int32_t *out_ptr2 = out_ptr1 + ldc; + int32_t *out_ptr3 = out_ptr2 + ldc; + int32_t *out_ptr4 = out_ptr3 + ldc; + int32_t *out_ptr5 = out_ptr4 + ldc; + int32_t *out_ptr6 = out_ptr5 + ldc; + int32_t *out_ptr7 = out_ptr6 + ldc; + int j = 0; + // m8n16 loop + for (; j + 15 < n; j += 16) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m2_t _acc0 = vmv_v_x_i32m2(bias_data[0], 16); + vint32m2_t _acc1 = vmv_v_x_i32m2(bias_data[1], 16); + vint32m2_t _acc2 = vmv_v_x_i32m2(bias_data[2], 16); + vint32m2_t _acc3 = vmv_v_x_i32m2(bias_data[3], 16); + vint32m2_t _acc4 = vmv_v_x_i32m2(bias_data[4], 16); + vint32m2_t _acc5 = vmv_v_x_i32m2(bias_data[5], 16); + vint32m2_t _acc6 = vmv_v_x_i32m2(bias_data[6], 16); + vint32m2_t _acc7 = vmv_v_x_i32m2(bias_data[7], 16); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m2_t _input = vle8_v_i8m2(in_ptr, 64); + _acc0 = vmaqa_vx_i32m2(_acc0, kernel_ptr[0], _input, 16); + _acc1 = vmaqa_vx_i32m2(_acc1, kernel_ptr[1], _input, 16); + _acc2 = vmaqa_vx_i32m2(_acc2, kernel_ptr[2], _input, 16); + _acc3 = vmaqa_vx_i32m2(_acc3, kernel_ptr[3], _input, 16); + _acc4 = vmaqa_vx_i32m2(_acc4, kernel_ptr[4], _input, 16); + _acc5 = vmaqa_vx_i32m2(_acc5, kernel_ptr[5], _input, 16); + _acc6 = vmaqa_vx_i32m2(_acc6, kernel_ptr[6], _input, 16); + _acc7 = vmaqa_vx_i32m2(_acc7, kernel_ptr[7], _input, 16); + + kernel_ptr += 8; + in_ptr += 64; + } + vse32_v_i32m2(out_ptr0, _acc0, 16); + vse32_v_i32m2(out_ptr1, _acc1, 16); + vse32_v_i32m2(out_ptr2, _acc2, 16); + vse32_v_i32m2(out_ptr3, _acc3, 16); + vse32_v_i32m2(out_ptr4, _acc4, 16); + vse32_v_i32m2(out_ptr5, _acc5, 16); + vse32_v_i32m2(out_ptr6, _acc6, 16); + vse32_v_i32m2(out_ptr7, _acc7, 16); + out_ptr0 += 16; + out_ptr1 += 16; + out_ptr2 += 16; + out_ptr3 += 16; + out_ptr4 += 16; + out_ptr5 += 16; + out_ptr6 += 16; + out_ptr7 += 16; + } + // m8n8 + for (; j + 7 < n; j += 8) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m1_t _acc0 = vmv_v_x_i32m1(bias_data[0], 8); + vint32m1_t _acc1 = vmv_v_x_i32m1(bias_data[1], 8); + vint32m1_t _acc2 = vmv_v_x_i32m1(bias_data[2], 8); + vint32m1_t _acc3 = vmv_v_x_i32m1(bias_data[3], 8); + vint32m1_t _acc4 = vmv_v_x_i32m1(bias_data[4], 8); + vint32m1_t _acc5 = vmv_v_x_i32m1(bias_data[5], 8); + vint32m1_t _acc6 = vmv_v_x_i32m1(bias_data[6], 8); + vint32m1_t _acc7 = vmv_v_x_i32m1(bias_data[7], 8); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _input = vle8_v_i8m1(in_ptr, 32); + _acc0 = vmaqa_vx_i32m1(_acc0, kernel_ptr[0], _input, 8); + _acc1 = vmaqa_vx_i32m1(_acc1, kernel_ptr[1], _input, 8); + _acc2 = vmaqa_vx_i32m1(_acc2, kernel_ptr[2], _input, 8); + _acc3 = vmaqa_vx_i32m1(_acc3, kernel_ptr[3], _input, 8); + _acc4 = vmaqa_vx_i32m1(_acc4, kernel_ptr[4], _input, 8); + _acc5 = vmaqa_vx_i32m1(_acc5, kernel_ptr[5], _input, 8); + _acc6 = vmaqa_vx_i32m1(_acc6, kernel_ptr[6], _input, 8); + _acc7 = vmaqa_vx_i32m1(_acc7, kernel_ptr[7], _input, 8); + + kernel_ptr += 8; + in_ptr += 32; + } + vse32_v_i32m1(out_ptr0, _acc0, 8); + vse32_v_i32m1(out_ptr1, _acc1, 8); + vse32_v_i32m1(out_ptr2, _acc2, 8); + vse32_v_i32m1(out_ptr3, _acc3, 8); + vse32_v_i32m1(out_ptr4, _acc4, 8); + vse32_v_i32m1(out_ptr5, _acc5, 8); + vse32_v_i32m1(out_ptr6, _acc6, 8); + vse32_v_i32m1(out_ptr7, _acc7, 8); + out_ptr0 += 8; + out_ptr1 += 8; + out_ptr2 += 8; + out_ptr3 += 8; + out_ptr4 += 8; + out_ptr5 += 8; + out_ptr6 += 8; + out_ptr7 += 8; + } + // m8n4 + for (; j + 3 < n; j += 4) { + int8_t *kernel_ptr = kernel_data; + vint32m1_t _acc0 = vle32_v_i32m1(bias_data, 8); + vint32m1_t _acc1 = vle32_v_i32m1(bias_data, 8); + vint32m1_t _acc2 = vle32_v_i32m1(bias_data, 8); + vint32m1_t _acc3 = vle32_v_i32m1(bias_data, 8); + + int32_t *in_ptr0 = (int32_t *)in_ptr; + int32_t *in_ptr1 = in_ptr0 + k; + int32_t *in_ptr2 = in_ptr1 + k; + int32_t *in_ptr3 = in_ptr2 + k; + + out_ptr1 = out_ptr0 + 1; + out_ptr2 = out_ptr0 + 2; + out_ptr3 = out_ptr0 + 3; + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _kernel = vle8_v_i8m1(kernel_ptr, 32); + _acc0 = vmaqa_vx_i32m1(_acc0, in_ptr0[0], _kernel, 8); + _acc1 = vmaqa_vx_i32m1(_acc1, in_ptr1[0], _kernel, 8); + _acc2 = vmaqa_vx_i32m1(_acc2, in_ptr2[0], _kernel, 8); + _acc3 = vmaqa_vx_i32m1(_acc3, in_ptr3[0], _kernel, 8); + in_ptr0++; + in_ptr1++; + in_ptr2++; + in_ptr3++; + kernel_ptr += 32; + } + vsse32_v_i32m1(out_ptr0, ldc * sizeof(int32_t), _acc0, 8); + vsse32_v_i32m1(out_ptr1, ldc * sizeof(int32_t), _acc1, 8); + vsse32_v_i32m1(out_ptr2, ldc * sizeof(int32_t), _acc2, 8); + vsse32_v_i32m1(out_ptr3, ldc * sizeof(int32_t), _acc3, 8); + out_ptr0 += 4; + in_ptr += 4 * k; + } + // m8n2 + for (; j + 1 < n; j += 2) { + int8_t *kernel_ptr = kernel_data; + vint32m1_t _acc0 = vle32_v_i32m1(bias_data, 8); + vint32m1_t _acc1 = vle32_v_i32m1(bias_data, 8); + + int32_t *in_ptr0 = (int32_t *)in_ptr; + int32_t *in_ptr1 = in_ptr0 + k; + + out_ptr1 = out_ptr0 + 1; + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _kernel = vle8_v_i8m1(kernel_ptr, 32); + _acc0 = vmaqa_vx_i32m1(_acc0, in_ptr0[0], _kernel, 8); + _acc1 = vmaqa_vx_i32m1(_acc1, in_ptr1[0], _kernel, 8); + in_ptr0++; + in_ptr1++; + kernel_ptr += 32; + } + vsse32_v_i32m1(out_ptr0, ldc * sizeof(int32_t), _acc0, 8); + vsse32_v_i32m1(out_ptr1, ldc * sizeof(int32_t), _acc1, 8); + out_ptr0 += 2; + in_ptr += 2 * k; + } + // m8n1 + for (; j < n; j++) { + int8_t *kernel_ptr = kernel_data; + vint32m1_t _acc0 = vle32_v_i32m1(bias_data, 8); + int32_t *in_ptr0 = (int32_t *)in_ptr; + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _kernel = vle8_v_i8m1(kernel_ptr, 32); + _acc0 = vmaqa_vx_i32m1(_acc0, in_ptr0[0], _kernel, 8); + in_ptr0++; + kernel_ptr += 32; + } + vsse32_v_i32m1(out_ptr0, ldc * sizeof(int32_t), _acc0, 8); + out_ptr0 += 1; + in_ptr += 1 * k; + } + kernel_data += 8 * k; + output_data += 8 * ldc; + bias_data += 8; + } + // m4 + for (; i + 3 < m; i += 4) { + int8_t *in_ptr = input_data; + + int32_t *out_ptr0 = output_data; + int32_t *out_ptr1 = out_ptr0 + ldc; + int32_t *out_ptr2 = out_ptr1 + ldc; + int32_t *out_ptr3 = out_ptr2 + ldc; + int j = 0; + // m4n16 loop + for (; j + 15 < n; j += 16) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m2_t _acc0 = vmv_v_x_i32m2(bias_data[0], 16); + vint32m2_t _acc1 = vmv_v_x_i32m2(bias_data[1], 16); + vint32m2_t _acc2 = vmv_v_x_i32m2(bias_data[2], 16); + vint32m2_t _acc3 = vmv_v_x_i32m2(bias_data[3], 16); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m2_t _input = vle8_v_i8m2(in_ptr, 64); + _acc0 = vmaqa_vx_i32m2(_acc0, kernel_ptr[0], _input, 16); + _acc1 = vmaqa_vx_i32m2(_acc1, kernel_ptr[1], _input, 16); + _acc2 = vmaqa_vx_i32m2(_acc2, kernel_ptr[2], _input, 16); + _acc3 = vmaqa_vx_i32m2(_acc3, kernel_ptr[3], _input, 16); + + kernel_ptr += 4; + in_ptr += 64; + } + vse32_v_i32m2(out_ptr0, _acc0, 16); + vse32_v_i32m2(out_ptr1, _acc1, 16); + vse32_v_i32m2(out_ptr2, _acc2, 16); + vse32_v_i32m2(out_ptr3, _acc3, 16); + out_ptr0 += 16; + out_ptr1 += 16; + out_ptr2 += 16; + out_ptr3 += 16; + } + // m4n8 + for (; j + 7 < n; j += 8) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m1_t _acc0 = vmv_v_x_i32m1(bias_data[0], 8); + vint32m1_t _acc1 = vmv_v_x_i32m1(bias_data[1], 8); + vint32m1_t _acc2 = vmv_v_x_i32m1(bias_data[2], 8); + vint32m1_t _acc3 = vmv_v_x_i32m1(bias_data[3], 8); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _input = vle8_v_i8m1(in_ptr, 16); + _acc0 = vmaqa_vx_i32m1(_acc0, kernel_ptr[0], _input, 8); + _acc1 = vmaqa_vx_i32m1(_acc1, kernel_ptr[1], _input, 8); + _acc2 = vmaqa_vx_i32m1(_acc2, kernel_ptr[2], _input, 8); + _acc3 = vmaqa_vx_i32m1(_acc3, kernel_ptr[3], _input, 8); + + kernel_ptr += 4; + in_ptr += 32; + } + vse32_v_i32m1(out_ptr0, _acc0, 8); + vse32_v_i32m1(out_ptr1, _acc1, 8); + vse32_v_i32m1(out_ptr2, _acc2, 8); + vse32_v_i32m1(out_ptr3, _acc3, 8); + out_ptr0 += 8; + out_ptr1 += 8; + out_ptr2 += 8; + out_ptr3 += 8; + } + for (; j < n; j++) { + int8_t *kernel_ptr = kernel_data; + int32_t acc0 = bias_data[0]; + int32_t acc1 = bias_data[1]; + int32_t acc2 = bias_data[2]; + int32_t acc3 = bias_data[3]; + int c = 0; + for (; c + 3 < k; c += 4) { + acc0 += kernel_ptr[0] * in_ptr[0] + kernel_ptr[1] * in_ptr[1] + + kernel_ptr[2] * in_ptr[2] + kernel_ptr[3] * in_ptr[3]; + acc1 += kernel_ptr[4] * in_ptr[0] + kernel_ptr[5] * in_ptr[1] + + kernel_ptr[6] * in_ptr[2] + kernel_ptr[7] * in_ptr[3]; + acc2 += kernel_ptr[8] * in_ptr[0] + kernel_ptr[9] * in_ptr[1] + + kernel_ptr[10] * in_ptr[2] + kernel_ptr[11] * in_ptr[3]; + acc3 += kernel_ptr[12] * in_ptr[0] + kernel_ptr[13] * in_ptr[1] + + kernel_ptr[14] * in_ptr[2] + kernel_ptr[15] * in_ptr[3]; + } + *out_ptr0++ = acc0; + *out_ptr1++ = acc1; + *out_ptr2++ = acc2; + *out_ptr3++ = acc3; + in_ptr += k; + } + kernel_data += 4 * k; + output_data += 4 * ldc; + bias_data += 4; + } + + // m2 + for (; i + 1 < m; i += 2) { + int8_t *in_ptr = input_data; + + int32_t *out_ptr0 = output_data; + int32_t *out_ptr1 = out_ptr0 + ldc; + int j = 0; + // m2n16 loop + for (; j + 15 < n; j += 16) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m2_t _acc0 = vmv_v_x_i32m2(bias_data[0], 16); + vint32m2_t _acc1 = vmv_v_x_i32m2(bias_data[1], 16); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m2_t _input = vle8_v_i8m2(in_ptr, 64); + _acc0 = vmaqa_vx_i32m2(_acc0, kernel_ptr[0], _input, 16); + _acc1 = vmaqa_vx_i32m2(_acc1, kernel_ptr[1], _input, 16); + + kernel_ptr += 2; + in_ptr += 64; + } + vse32_v_i32m2(out_ptr0, _acc0, 16); + vse32_v_i32m2(out_ptr1, _acc1, 16); + out_ptr0 += 16; + out_ptr1 += 16; + } + // m2n8 + for (; j + 7 < n; j += 8) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m1_t _acc0 = vmv_v_x_i32m1(bias_data[0], 8); + vint32m1_t _acc1 = vmv_v_x_i32m1(bias_data[1], 8); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _input = vle8_v_i8m1(in_ptr, 32); + _acc0 = vmaqa_vx_i32m1(_acc0, kernel_ptr[0], _input, 8); + _acc1 = vmaqa_vx_i32m1(_acc1, kernel_ptr[1], _input, 8); + + kernel_ptr += 2; + in_ptr += 32; + } + vse32_v_i32m1(out_ptr0, _acc0, 8); + vse32_v_i32m1(out_ptr1, _acc1, 8); + out_ptr0 += 8; + out_ptr1 += 8; + } + for (; j < n; j++) { + int8_t *kernel_ptr = kernel_data; + int32_t acc0 = bias_data[0]; + int32_t acc1 = bias_data[1]; + int c = 0; + for (; c + 3 < k; c += 4) { + acc0 += kernel_ptr[0] * in_ptr[0] + kernel_ptr[1] * in_ptr[1] + + kernel_ptr[2] * in_ptr[2] + kernel_ptr[3] * in_ptr[3]; + acc1 += kernel_ptr[4] * in_ptr[0] + kernel_ptr[5] * in_ptr[1] + + kernel_ptr[6] * in_ptr[2] + kernel_ptr[7] * in_ptr[3]; + } + *out_ptr0++ = acc0; + *out_ptr1++ = acc1; + in_ptr += k; + } + kernel_data += 2 * k; + output_data += 2 * ldc; + bias_data += 2; + } + + // m1 + for (; i < m; i++) { + int8_t *in_ptr = input_data; + int32_t *out_ptr0 = output_data; + int j = 0; + // m1n16 loop + for (; j + 15 < n; j += 16) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m2_t _acc0 = vmv_v_x_i32m2(bias_data[0], 16); + + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m2_t _input = vle8_v_i8m2(in_ptr, 64); + _acc0 = vmaqa_vx_i32m2(_acc0, kernel_ptr[0], _input, 16); + kernel_ptr += 1; + in_ptr += 64; + } + vse32_v_i32m2(out_ptr0, _acc0, 16); + out_ptr0 += 16; + } + // m1n8 + for (; j + 7 < n; j += 8) { + int32_t *kernel_ptr = (int32_t *)kernel_data; + vint32m1_t _acc0 = vmv_v_x_i32m1(bias_data[0], 8); + int c = 0; + for (; c + 3 < k; c += 4) { + vint8m1_t _input = vle8_v_i8m1(in_ptr, 32); + _acc0 = vmaqa_vx_i32m1(_acc0, kernel_ptr[0], _input, 8); + kernel_ptr += 1; + in_ptr += 32; + } + vse32_v_i32m1(out_ptr0, _acc0, 8); + out_ptr0 += 8; + } + // m1n_tail + for (; j < n; j++) { + int32_t acc0 = bias_data[0]; + for (int c = 0; c < k; c++) { + acc0 += kernel_data[c] * in_ptr[c]; + } + *out_ptr0++ = acc0; + in_ptr += k; + } + } +} + +#endif \ No newline at end of file diff --git a/source/thead_rvv/global_avgpool.c b/source/thead_rvv/global_avgpool.c new file mode 100644 index 00000000..69e949a6 --- /dev/null +++ b/source/thead_rvv/global_avgpool.c @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128/256 +*************************************************************/ +int csi_nn_rvv_global_avgpool2d_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int in_hw = in_h * in_w; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + vfloat32m1_t _res = vfmv_s_f_f32m1(vundefined_f32m1(), 0.0f, 4); + int size = in_hw; + while (size > 0) { + vl = vsetvl_e32m2(size); + vfloat32m2_t _input = vle32_v_f32m2(input_data, vl); + _res = vfredusum_vs_f32m2_f32m1(vundefined_f32m1(), _input, _res, vl); + input_data += vl; + size -= vl; + } + float avg = vfmv_f_s_f32m1_f32(_res) / in_hw; + *output_data++ = avg; + } + } + return CSINN_TRUE; +} + +int csi_nn_rvv_global_avgpool2d_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int in_hw = in_h * in_w; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + vfloat16m1_t _res = vfmv_s_f_f16m1(vundefined_f16m1(), 0.0f, 8); + int size = in_hw; + while (size > 0) { + vl = vsetvl_e16m2(size); + vfloat16m2_t _input = vle16_v_f16m2(input_data, vl); + _res = vfredusum_vs_f16m2_f16m1(vundefined_f16m1(), _input, _res, vl); + input_data += vl; + size -= vl; + } + __fp16 avg = vfmv_f_s_f16m1_f16(_res) / in_hw; + *output_data++ = avg; + } + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/global_maxpool.c b/source/thead_rvv/global_maxpool.c new file mode 100644 index 00000000..5eccf907 --- /dev/null +++ b/source/thead_rvv/global_maxpool.c @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128/256 +*************************************************************/ +int csi_nn_rvv_global_maxpool2d_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int in_hw = in_h * in_w; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + vfloat32m1_t _max = vfmv_s_f_f32m1(vundefined_f32m1(), -FLT_MAX, 4); // ??? + int size = in_hw; + while (size > 0) { + vl = vsetvl_e32m2(size); + vfloat32m2_t _input = vle32_v_f32m2(input_data, vl); + _max = vfredmax_vs_f32m2_f32m1(vundefined_f32m1(), _input, _max, vl); + input_data += vl; + size -= vl; + } + float max = vfmv_f_s_f32m1_f32(_max); + *output_data++ = max; + } + } + return CSINN_TRUE; +} + +int csi_nn_rvv_global_maxpool2d_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int in_hw = in_h * in_w; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + vfloat16m1_t _max = vfmv_s_f_f16m1(vundefined_f16m1(), -FLT_MAX, 8); // ??? + int size = in_hw; + while (size > 0) { + vl = vsetvl_e16m2(size); + vfloat16m2_t _input = vle16_v_f16m2(input_data, vl); + _max = vfredmax_vs_f16m2_f16m1(vundefined_f16m1(), _input, _max, vl); + input_data += vl; + size -= vl; + } + __fp16 max = vfmv_f_s_f16m1_f16(_max); + *output_data++ = max; + } + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/leaky_relu.c b/source/thead_rvv/leaky_relu.c new file mode 100644 index 00000000..9f4eb418 --- /dev/null +++ b/source/thead_rvv/leaky_relu.c @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128/256 ... +*************************************************************/ +int csi_nn_rvv_leaky_relu_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct relu_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + float alpha = params->n; + int size = csi_tensor_size(input); + while (size > 0) { + int vl = vsetvl_e32m2(size); + vfloat32m2_t _input = vle32_v_f32m2(input_data, vl); + vbool16_t _mask = vmflt_vf_f32m2_b16(_input, 0.0f, vl); + vfloat32m2_t _res = vfmul_vf_f32m2_m(_mask, _input, _input, alpha, vl); + vse32_v_f32m2(output_data, _res, vl); + input_data += vl; + output_data += vl; + size -= vl; + } + return CSINN_TRUE; +} + +int csi_nn_rvv_leaky_relu_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct relu_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + __fp16 alpha = (__fp16)params->n; + int size = csi_tensor_size(input); + while (size > 0) { + int vl = vsetvl_e16m2(size); + vfloat16m2_t _input = vle16_v_f16m2(input_data, vl); + vbool8_t _mask = vmflt_vf_f16m2_b8(_input, 0.0f, vl); + vfloat16m2_t _res = vfmul_vf_f16m2_m(_mask, _input, _input, alpha, vl); + vse16_v_f16m2(output_data, _res, vl); + input_data += vl; + output_data += vl; + size -= vl; + } + return CSINN_TRUE; +} + +/********************************************************************* + * s2 * (q2 - z2) = leaky_relu{ s1 * (q1 - z1) } + * if (q1 >= z1) q2 = s1/s2 * (q1 - z1) + z2 + * else q2 = s1/s2 * alpha * (q1 -z1) + z2 + * constrains: params->n < 0.5 + * ******************************************************************/ +int csi_nn_rvv_leaky_relu_int8(struct csi_tensor *input, struct csi_tensor *output, + struct relu_params *params) +{ + int8_t *input_data = (int8_t *)input->data; + int8_t *output_data = (int8_t *)output->data; + + // TODO: move to init api + float real_scale0 = input->qinfo->scale / output->qinfo->scale; + csi_quantize_multiplier(real_scale0, &output->qinfo->multiplier, &output->qinfo->shift); + + int size = csi_tensor_size(input); + while (size > 0) { + int vl = vsetvl_e8m1(size); + vint8m1_t _input = vle8_v_i8m1(input_data, vl); + vint16m2_t _input1 = vwadd_vx_i16m2(_input, 0, vl); // widden 8->16 + vint32m4_t _input2 = vwadd_vx_i32m4(_input1, 0, vl); // widden 16->32 + + vint32m4_t _tmp = vsub_vx_i32m4(_input2, input->qinfo->zero_point, vl); + + _tmp = vsll_vx_i32m4(_tmp, output->qinfo->shift + 2, vl); + vint32m4_t _mulh = vmulh_vx_i32m4(_tmp, output->qinfo->multiplier, vl); + _mulh = vssra_vx_i32m4(_mulh, 1, vl); + + vbool8_t _mask = vmslt_vx_i32m4_b8(_input2, input->qinfo->zero_point, vl); + vint32m4_t _mulh_neg = vmulh_vx_i32m4_m(_mask, _mulh, _mulh, params->n_multiplier, vl); + _mulh_neg = vssra_vx_i32m4_m(_mask, _mulh, _mulh_neg, -params->n_shift - 1, vl); + + vint32m4_t _res0 = vadd_vx_i32m4(_mulh_neg, output->qinfo->zero_point, vl); + vint16m2_t _res1 = vnclip_wx_i16m2(_res0, 0, vl); + vint8m1_t _res2 = vnclip_wx_i8m1(_res1, 0, vl); + + vse8_v_i8m1(output_data, _res2, vl); + input_data += vl; + output_data += vl; + size -= vl; + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/maxpool.c b/source/thead_rvv/maxpool.c new file mode 100644 index 00000000..3db8457c --- /dev/null +++ b/source/thead_rvv/maxpool.c @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.13.x */ + +#include "csi_thead_rvv.h" + +int csi_nn_rvv_maxpool2d_init(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + int32_t input_h = input->dim[2]; + int32_t input_w = input->dim[3]; + + int32_t kernel_h = params->filter_height; + int32_t kernel_w = params->filter_width; + int32_t stride_h = params->stride_height; + int32_t stride_w = params->stride_width; + + int32_t pad_left = params->pad_left; + int32_t pad_right = params->pad_right; + int32_t pad_top = params->pad_top; + int32_t pad_down = params->pad_down; + + params->base.bc = NULL; + + // global maxpool2d + if (input_h == kernel_h && input_w == kernel_w) { + if (input->dtype == CSINN_DTYPE_FLOAT32) { + params->base.bc = csi_nn_rvv_global_maxpool2d_fp32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + params->base.bc = csi_nn_rvv_global_maxpool2d_fp16; + } else if (input->dtype == CSINN_DTYPE_INT8) { + params->base.bc = csi_ref_global_maxpool2d_quant; + } + return CSINN_TRUE; + } + + if (stride_h == 2 && stride_w == 2) { + if (kernel_h == 2 && kernel_w == 2) { // 2x2s2 + if (pad_left == 0 && pad_top == 0) { + // adjust pad according to ceil_mode (ceil mode on caffe pytorch..) + if (input_h % 2 == 1 && params->ceil_mode == 1) { + if (params->pad_down == 0) params->pad_down++; + } + if (input_w % 2 == 1 && params->ceil_mode == 1) { + if (params->pad_right == 0) params->pad_right++; + } + // end consider ceil_mode 2x2s2p0 + + if (input->dtype == CSINN_DTYPE_FLOAT32) { + params->base.bc = csi_nn_rvv_maxpool2x2s2_fp32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + params->base.bc = csi_nn_rvv_maxpool2x2s2_fp16; + } else if (input->dtype == CSINN_DTYPE_INT8) { + params->base.bc = csi_nn_rvv_maxpool2x2s2_int8; + } + } else if (pad_left == 1 && pad_top == 1) { + if (input->dtype == CSINN_DTYPE_FLOAT32) { + params->base.bc = csi_nn_rvv_maxpool2x2s2_p1_fp32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + params->base.bc = csi_nn_rvv_maxpool2x2s2_p1_fp16; + } else if (input->dtype == CSINN_DTYPE_INT8) { + params->base.bc = csi_nn_rvv_maxpool2x2s2_p1_int8; + } + } + } else if (kernel_h == 3 && kernel_w == 3) { // 3x3s2 + if (pad_left == 0 && pad_top == 0) { + // adjust pad according to ceil_mode (ceil mode on caffe pytorch..) + if (input_h % 2 == 0 && params->ceil_mode == 1) { + if (params->pad_down == 0) + params->pad_down++; // origin pad_down mast be equal to zero ? + } + if (input_w % 2 == 0 && params->ceil_mode == 1) { + if (params->pad_right == 0) params->pad_right++; + } + // end consider ceil_mode 3x3s2p0 + + if (input->dtype == CSINN_DTYPE_FLOAT32) { + params->base.bc = csi_nn_rvv_maxpool3x3s2_fp32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + params->base.bc = csi_nn_rvv_maxpool3x3s2_fp16; + } else if (input->dtype == CSINN_DTYPE_INT8) { + params->base.bc = csi_nn_rvv_maxpool3x3s2_int8; + } + } else if (pad_left == 1 && pad_top == 1) { + if (input->dtype == CSINN_DTYPE_FLOAT32) { + params->base.bc = csi_nn_rvv_maxpool3x3s2_p1_fp32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + params->base.bc = csi_nn_rvv_maxpool3x3s2_p1_fp16; + } else if (input->dtype == CSINN_DTYPE_INT8) { + params->base.bc = csi_nn_rvv_maxpool3x3s2_p1_int8; + } + } + } + } else if (stride_h == 1 && stride_w == 1) { + if (kernel_h == 3 && kernel_w == 3) { + if (pad_left == 1 && pad_top == 1 && pad_right == 1 && pad_down == 1) { + if (input->dtype == CSINN_DTYPE_FLOAT32) { + params->base.bc = csi_nn_rvv_maxpool3x3s1_p1_fp32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + params->base.bc = csi_nn_rvv_maxpool3x3s1_p1_fp16; + } else if (input->dtype == CSINN_DTYPE_INT8) { + params->base.bc = csi_nn_rvv_maxpool3x3s1_p1_int8; + } + } + } + } + + if (params->base.bc == NULL) { + csi_debug_warning( + "maxpool is not optimized to achieve under this condition on RVV, call reference func " + "replaced.\n"); + if (input->dtype == CSINN_DTYPE_FLOAT32) { + params->base.bc = csi_ref_maxpool2d_f32; + } else if (input->dtype == CSINN_DTYPE_FLOAT16) { + params->base.bc = csi_ref_maxpool2d_quant; + } else if (input->dtype == CSINN_DTYPE_INT8) { + params->base.bc = csi_ref_maxpool2d_quant; + } + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/maxpool_2x2.c b/source/thead_rvv/maxpool_2x2.c new file mode 100644 index 00000000..1c1f44d4 --- /dev/null +++ b/source/thead_rvv/maxpool_2x2.c @@ -0,0 +1,261 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128/256 +*************************************************************/ +/* + pad_left = pad_top = 0 + pad_right = 0 or 1 + pad_down = 0 or 1 +*/ +int csi_nn_rvv_maxpool2x2s2_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int out_hw = out_h * out_w; + int output_size = in_c * out_h * out_w; + + int extend_h = 0; + int extend_w = 0; + + if (in_h % 2 == 1 && params->pad_down == 1) { + extend_h = 1; + out_h--; + } + if (in_w % 2 == 1 && params->pad_right == 1) { + extend_w = 1; + out_w--; + } + + int remain_w = in_w - 2 * out_w; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const float *line0 = input_data + c * in_h * in_w; + const float *line1 = line0 + in_w; + float *outptr = output_data + c * out_hw; + + for (int h = 0; h < out_h; h++) { + int w = out_w; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_6, _line0_1_7; + vfloat32m1_t _line1_0_6, _line1_1_7; + + vlseg2e32_v_f32m1(&_line0_0_6, &_line0_1_7, line0, vl); + vlseg2e32_v_f32m1(&_line1_0_6, &_line1_1_7, line1, vl); + + vfloat32m1_t _max0 = vfmax_vv_f32m1(_line0_0_6, _line0_1_7, vl); + vfloat32m1_t _max1 = vfmax_vv_f32m1(_line1_0_6, _line1_1_7, vl); + vfloat32m1_t _max = vfmax_vv_f32m1(_max0, _max1, vl); + + vse32_v_f32m1(outptr, _max, vl); + line0 += 2 * vl; + line1 += 2 * vl; + outptr += vl; + w -= vl; + } + if (extend_w) { + outptr[0] = fmax(fmax(line0[0], line1[0]), 0.0f); + outptr++; + } + line0 += remain_w + in_w; + line1 += remain_w + in_w; + } + if (extend_h) { + int w = out_w; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_6, _line0_1_7; + + vlseg2e32_v_f32m1(&_line0_0_6, &_line0_1_7, line0, vl); + + vfloat32m1_t _max0 = vfmax_vv_f32m1(_line0_0_6, _line0_1_7, vl); + vfloat32m1_t _max = vfmax_vf_f32m1(_max0, 0.0f, vl); + + vse32_v_f32m1(outptr, _max, vl); + line0 += 2 * vl; + outptr += vl; + w -= vl; + } + + if (extend_w) { + outptr[0] = fmax(line0[0], 0.0f); + outptr++; + } + } + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} + +/* + pad_left = pad_top = 1 + pad_right = 0 or 1 + pad_down = 0 or 1 +*/ +int csi_nn_rvv_maxpool2x2s2_p1_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int out_hw = out_h * out_w; + int output_size = in_c * out_h * out_w; + + int extend_h = 0; + int extend_w = 0; + + if (in_h % 2 == 0 && params->pad_down == 1) { + extend_h = 1; + out_h--; + } + if (in_w % 2 == 0 && params->pad_right == 1) { + extend_w = 1; + out_w--; + } + + int remain_w = in_w - 2 * out_w + 1; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const float *line00 = input_data + c * in_h * in_w; + float *outptr = output_data + c * out_hw; + + // h top ---- w left + outptr[0] = fmax(line00[0], 0.0f); + outptr++; + line00++; + // h top ---- w mid + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_6, _line0_1_7; + vlseg2e32_v_f32m1(&_line0_0_6, &_line0_1_7, line00, vl); + vfloat32m1_t _max0 = vfmax_vv_f32m1(_line0_0_6, _line0_1_7, vl); + vfloat32m1_t _max = vfmax_vf_f32m1(_max0, 0.0f, vl); + vse32_v_f32m1(outptr, _max, vl); + line00 += 2 * vl; + outptr += vl; + w -= vl; + } + // h top ---- w right + if (extend_w) { + outptr[0] = fmax(line00[0], 0.0f); + outptr++; + } + line00 += remain_w; + + // h mid + const float *line0 = line00; + const float *line1 = line0 + in_w; + for (int h = 0; h < out_h - 1; h++) { + // h mid ---- w left + outptr[0] = fmax(fmax(line0[0], line1[0]), 0.0f); + outptr++; + line0++; + line1++; + // h mid ---- w mid + w = out_w - 1; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_6, _line0_1_7; + vfloat32m1_t _line1_0_6, _line1_1_7; + + vlseg2e32_v_f32m1(&_line0_0_6, &_line0_1_7, line0, vl); + vlseg2e32_v_f32m1(&_line1_0_6, &_line1_1_7, line1, vl); + + vfloat32m1_t _max0 = vfmax_vv_f32m1(_line0_0_6, _line0_1_7, vl); + vfloat32m1_t _max1 = vfmax_vv_f32m1(_line1_0_6, _line1_1_7, vl); + vfloat32m1_t _max = vfmax_vv_f32m1(_max0, _max1, vl); + + vse32_v_f32m1(outptr, _max, vl); + line0 += 2 * vl; + line1 += 2 * vl; + outptr += vl; + w -= vl; + } + + // h mid ---- w right + if (extend_w) { + outptr[0] = fmax(fmax(line0[0], line1[0]), 0.0f); + outptr++; + } + line0 += remain_w + in_w; + line1 += remain_w + in_w; + } + // h bottom + if (extend_h) { + // h bottom ---- w left + outptr[0] = fmax(line0[0], 0.0f); + outptr++; + line0++; + // h bottom ---- w mid + w = out_w - 1; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_6, _line0_1_7; + + vlseg2e32_v_f32m1(&_line0_0_6, &_line0_1_7, line0, vl); + + vfloat32m1_t _max0 = vfmax_vv_f32m1(_line0_0_6, _line0_1_7, vl); + vfloat32m1_t _max = vfmax_vf_f32m1(_max0, 0.0f, vl); + + vse32_v_f32m1(outptr, _max, vl); + line0 += 2 * vl; + outptr += vl; + w -= vl; + } + // h bottom ---- w right + if (extend_w) { + outptr[0] = fmax(line0[0], 0.0f); + } + } + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/maxpool_2x2_fp16.c b/source/thead_rvv/maxpool_2x2_fp16.c new file mode 100644 index 00000000..b094377b --- /dev/null +++ b/source/thead_rvv/maxpool_2x2_fp16.c @@ -0,0 +1,254 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128/256 +*************************************************************/ +int csi_nn_rvv_maxpool2x2s2_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int out_hw = out_h * out_w; + int output_size = in_c * out_h * out_w; + + int extend_h = 0; + int extend_w = 0; + + if (in_h % 2 == 1 && params->pad_down == 1) { + extend_h = 1; + out_h--; + } + if (in_w % 2 == 1 && params->pad_right == 1) { + extend_w = 1; + out_w--; + } + + int remain_w = in_w - 2 * out_w; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const __fp16 *line0 = input_data + c * in_h * in_w; + const __fp16 *line1 = line0 + in_w; + __fp16 *outptr = output_data + c * out_hw; + + for (int h = 0; h < out_h; h++) { + int w = out_w; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_14, _line0_1_15; + vfloat16m1_t _line1_0_14, _line1_1_15; + + vlseg2e16_v_f16m1(&_line0_0_14, &_line0_1_15, line0, vl); + vlseg2e16_v_f16m1(&_line1_0_14, &_line1_1_15, line1, vl); + + vfloat16m1_t _max0 = vfmax_vv_f16m1(_line0_0_14, _line0_1_15, vl); + vfloat16m1_t _max1 = vfmax_vv_f16m1(_line1_0_14, _line1_1_15, vl); + vfloat16m1_t _max = vfmax_vv_f16m1(_max0, _max1, vl); + + vse16_v_f16m1(outptr, _max, vl); + line0 += 2 * vl; + line1 += 2 * vl; + outptr += vl; + w -= vl; + } + if (extend_w) { + outptr[0] = line0[0] > line1[0] ? line0[0] : line1[0]; + outptr[0] = outptr[0] > 0 ? outptr[0] : 0; + outptr++; + } + line0 += remain_w + in_w; + line1 += remain_w + in_w; + } + if (extend_h) { + int w = out_w; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_14, _line0_1_15; + + vlseg2e16_v_f16m1(&_line0_0_14, &_line0_1_15, line0, vl); + + vfloat16m1_t _max0 = vfmax_vv_f16m1(_line0_0_14, _line0_1_15, vl); + vfloat16m1_t _max = vfmax_vf_f16m1(_max0, 0.0f, vl); + + vse16_v_f16m1(outptr, _max, vl); + line0 += 2 * vl; + outptr += vl; + w -= vl; + } + + if (extend_w) { + outptr[0] = line0[0] > 0 ? line0[0] : 0; + outptr++; + } + } + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} + +int csi_nn_rvv_maxpool2x2s2_p1_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int out_hw = out_h * out_w; + int output_size = in_c * out_h * out_w; + + int extend_h = 0; + int extend_w = 0; + + if (in_h % 2 == 0 && params->pad_down == 1) { + extend_h = 1; + out_h--; + } + if (in_w % 2 == 0 && params->pad_right == 1) { + extend_w = 1; + out_w--; + } + + int remain_w = in_w - 2 * out_w + 1; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const __fp16 *line00 = input_data + c * in_h * in_w; + __fp16 *outptr = output_data + c * out_hw; + + // h top ---- w left + outptr[0] = line00[0] > 0 ? line00[0] : 0; + outptr++; + line00++; + // h top ---- w mid + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_6, _line0_1_7; + vlseg2e16_v_f16m1(&_line0_0_6, &_line0_1_7, line00, vl); + vfloat16m1_t _max0 = vfmax_vv_f16m1(_line0_0_6, _line0_1_7, vl); + vfloat16m1_t _max = vfmax_vf_f16m1(_max0, 0.0f, vl); + vse16_v_f16m1(outptr, _max, vl); + line00 += 2 * vl; + outptr += vl; + w -= vl; + } + // h top ---- w right + if (extend_w) { + outptr[0] = line00[0] > 0 ? line00[0] : 0; + outptr++; + } + line00 += remain_w; + + // h mid + const __fp16 *line0 = line00; + const __fp16 *line1 = line0 + in_w; + for (int h = 0; h < out_h - 1; h++) { + // h mid ---- w left + outptr[0] = line0[0] > line1[0] ? line0[0] : line1[0]; + outptr[0] = outptr[0] > 0 ? outptr[0] : 0; + outptr++; + line0++; + line1++; + // h mid ---- w mid + w = out_w - 1; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_6, _line0_1_7; + vfloat16m1_t _line1_0_6, _line1_1_7; + + vlseg2e16_v_f16m1(&_line0_0_6, &_line0_1_7, line0, vl); + vlseg2e16_v_f16m1(&_line1_0_6, &_line1_1_7, line1, vl); + + vfloat16m1_t _max0 = vfmax_vv_f16m1(_line0_0_6, _line0_1_7, vl); + vfloat16m1_t _max1 = vfmax_vv_f16m1(_line1_0_6, _line1_1_7, vl); + vfloat16m1_t _max = vfmax_vv_f16m1(_max0, _max1, vl); + + vse16_v_f16m1(outptr, _max, vl); + line0 += 2 * vl; + line1 += 2 * vl; + outptr += vl; + w -= vl; + } + + // h mid ---- w right + if (extend_w) { + outptr[0] = line0[0] > line1[0] ? line0[0] : line1[0]; + outptr[0] = outptr[0] > 0 ? outptr[0] : 0; + outptr++; + } + line0 += remain_w + in_w; + line1 += remain_w + in_w; + } + // h bottom + if (extend_h) { + // h bottom ---- w left + outptr[0] = line0[0] > 0 ? line0[0] : 0; + outptr++; + line0++; + // h bottom ---- w mid + w = out_w - 1; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_6, _line0_1_7; + + vlseg2e16_v_f16m1(&_line0_0_6, &_line0_1_7, line0, vl); + + vfloat16m1_t _max0 = vfmax_vv_f16m1(_line0_0_6, _line0_1_7, vl); + vfloat16m1_t _max = vfmax_vf_f16m1(_max0, 0.0f, vl); + + vse16_v_f16m1(outptr, _max, vl); + line0 += 2 * vl; + outptr += vl; + w -= vl; + } + // h bottom ---- w right + if (extend_w) { + outptr[0] = line0[0] > 0 ? line0[0] : 0; + } + } + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/maxpool_2x2_int8.c b/source/thead_rvv/maxpool_2x2_int8.c new file mode 100644 index 00000000..38f56630 --- /dev/null +++ b/source/thead_rvv/maxpool_2x2_int8.c @@ -0,0 +1,257 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/**************************************************************************** + * note: VLEN = 128/256 ... + * constrains: Input and outputs must all have same scale/zero_point + ****************************************************************************/ +int csi_nn_rvv_maxpool2x2s2_int8(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + int8_t *input_data = (int8_t *)input->data; + int8_t *output_data = (int8_t *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int out_hw = out_h * out_w; + int output_size = in_c * out_h * out_w; + + int extend_h = 0; + int extend_w = 0; + + if (in_h % 2 == 1 && params->pad_down == 1) { + extend_h = 1; + out_h--; + } + if (in_w % 2 == 1 && params->pad_right == 1) { + extend_w = 1; + out_w--; + } + + int remain_w = in_w - 2 * out_w; + int vl; + int8_t input_zp = (int8_t)input->qinfo->zero_point; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const int8_t *line0 = input_data + c * in_h * in_w; + const int8_t *line1 = line0 + in_w; + int8_t *outptr = output_data + c * out_hw; + + for (int h = 0; h < out_h; h++) { + int w = out_w; + while (w > 0) { + vl = vsetvl_e8m1(w); + vint8m1_t _line0_0_14, _line0_1_15; + vint8m1_t _line1_0_14, _line1_1_15; + + vlseg2e8_v_i8m1(&_line0_0_14, &_line0_1_15, line0, vl); + vlseg2e8_v_i8m1(&_line1_0_14, &_line1_1_15, line1, vl); + + vint8m1_t _max0 = vmax_vv_i8m1(_line0_0_14, _line0_1_15, vl); + vint8m1_t _max1 = vmax_vv_i8m1(_line1_0_14, _line1_1_15, vl); + vint8m1_t _max = vmax_vv_i8m1(_max0, _max1, vl); + + vse8_v_i8m1(outptr, _max, vl); + line0 += 2 * vl; + line1 += 2 * vl; + outptr += vl; + w -= vl; + } + if (extend_w) { + outptr[0] = line0[0] > line1[0] ? line0[0] : line1[0]; + outptr[0] = outptr[0] > input_zp ? outptr[0] : input_zp; + outptr++; + } + line0 += remain_w + in_w; + line1 += remain_w + in_w; + } + if (extend_h) { + int w = out_w; + while (w > 0) { + vl = vsetvl_e8m1(w); + vint8m1_t _line0_0_14, _line0_1_15; + + vlseg2e8_v_i8m1(&_line0_0_14, &_line0_1_15, line0, vl); + + vint8m1_t _max0 = vmax_vv_i8m1(_line0_0_14, _line0_1_15, vl); + vint8m1_t _max = vmax_vx_i8m1(_max0, input_zp, vl); + + vse8_v_i8m1(outptr, _max, vl); + line0 += 2 * vl; + outptr += vl; + w -= vl; + } + + if (extend_w) { + outptr[0] = line0[0] > input_zp ? line0[0] : input_zp; + outptr++; + } + } + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} + +int csi_nn_rvv_maxpool2x2s2_p1_int8(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + int8_t *input_data = (int8_t *)input->data; + int8_t *output_data = (int8_t *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int out_hw = out_h * out_w; + int output_size = in_c * out_h * out_w; + + int extend_h = 0; + int extend_w = 0; + + if (in_h % 2 == 0 && params->pad_down == 1) { + extend_h = 1; + out_h--; + } + if (in_w % 2 == 0 && params->pad_right == 1) { + extend_w = 1; + out_w--; + } + + int remain_w = in_w - 2 * out_w + 1; + int vl; + int8_t input_zp = (int8_t)input->qinfo->zero_point; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const int8_t *line00 = input_data + c * in_h * in_w; + int8_t *outptr = output_data + c * out_hw; + + // h top ---- w left + outptr[0] = line00[0] > input_zp ? line00[0] : input_zp; + outptr++; + line00++; + // h top ---- w mid + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e8m1(w); + vint8m1_t _line0_0_6, _line0_1_7; + vlseg2e8_v_i8m1(&_line0_0_6, &_line0_1_7, line00, vl); + vint8m1_t _max0 = vmax_vv_i8m1(_line0_0_6, _line0_1_7, vl); + vint8m1_t _max = vmax_vx_i8m1(_max0, input_zp, vl); + vse8_v_i8m1(outptr, _max, vl); + line00 += 2 * vl; + outptr += vl; + w -= vl; + } + // h top ---- w right + if (extend_w) { + outptr[0] = line00[0] > input_zp ? line00[0] : input_zp; + outptr++; + } + line00 += remain_w; + + // h mid + const int8_t *line0 = line00; + const int8_t *line1 = line0 + in_w; + for (int h = 0; h < out_h - 1; h++) { + // h mid ---- w left + outptr[0] = line0[0] > line1[0] ? line0[0] : line1[0]; + outptr[0] = outptr[0] > input_zp ? outptr[0] : input_zp; + outptr++; + line0++; + line1++; + // h mid ---- w mid + w = out_w - 1; + while (w > 0) { + vl = vsetvl_e8m1(w); + vint8m1_t _line0_0_6, _line0_1_7; + vint8m1_t _line1_0_6, _line1_1_7; + + vlseg2e8_v_i8m1(&_line0_0_6, &_line0_1_7, line0, vl); + vlseg2e8_v_i8m1(&_line1_0_6, &_line1_1_7, line1, vl); + + vint8m1_t _max0 = vmax_vv_i8m1(_line0_0_6, _line0_1_7, vl); + vint8m1_t _max1 = vmax_vv_i8m1(_line1_0_6, _line1_1_7, vl); + vint8m1_t _max = vmax_vv_i8m1(_max0, _max1, vl); + + vse8_v_i8m1(outptr, _max, vl); + line0 += 2 * vl; + line1 += 2 * vl; + outptr += vl; + w -= vl; + } + + // h mid ---- w right + if (extend_w) { + outptr[0] = line0[0] > line1[0] ? line0[0] : line1[0]; + outptr[0] = outptr[0] > input_zp ? outptr[0] : input_zp; + outptr++; + } + line0 += remain_w + in_w; + line1 += remain_w + in_w; + } + // h bottom + if (extend_h) { + // h bottom ---- w left + outptr[0] = line0[0] > input_zp ? line0[0] : input_zp; + outptr++; + line0++; + // h bottom ---- w mid + w = out_w - 1; + while (w > 0) { + vl = vsetvl_e8m1(w); + vint8m1_t _line0_0_6, _line0_1_7; + + vlseg2e8_v_i8m1(&_line0_0_6, &_line0_1_7, line0, vl); + + vint8m1_t _max0 = vmax_vv_i8m1(_line0_0_6, _line0_1_7, vl); + vint8m1_t _max = vmax_vx_i8m1(_max0, input_zp, vl); + + vse8_v_i8m1(outptr, _max, vl); + line0 += 2 * vl; + outptr += vl; + w -= vl; + } + // h bottom ---- w right + if (extend_w) { + outptr[0] = line0[0] > input_zp ? line0[0] : input_zp; + } + } + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/maxpool_3x3.c b/source/thead_rvv/maxpool_3x3.c new file mode 100644 index 00000000..8efeb11a --- /dev/null +++ b/source/thead_rvv/maxpool_3x3.c @@ -0,0 +1,521 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128/256 +*************************************************************/ +/* + pad_left = pad_top = 0 + pad_right = 0 or 1 + pad_down = 0 or 1 +*/ +int csi_nn_rvv_maxpool3x3s2_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int out_hw = out_h * out_w; + int output_size = in_c * out_h * out_w; + + int extend_h = 0; + int extend_w = 0; + + if (in_h % 2 == 0 && params->pad_down == 1) { + extend_h = 1; + out_h--; + } + if (in_w % 2 == 0 && params->pad_right == 1) { + extend_w = 1; + out_w--; + } + + int remain_w = in_w - 2 * out_w; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const float *line0 = input_data + c * in_h * in_w; + const float *line1 = line0 + in_w; + const float *line2 = line1 + in_w; + float *outptr = output_data + c * out_hw; + + for (int h = 0; h < out_h; h++) { + int w = out_w; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_6, _line0_1_7; + vfloat32m1_t _line1_0_6, _line1_1_7; + vfloat32m1_t _line2_0_6, _line2_1_7; + + vlseg2e32_v_f32m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vfloat32m1_t _line0_2_8 = vlse32_v_f32m1(line0, 2 * sizeof(float), vl); + line0 += (vl - 1) * 2; + vfloat32m1_t _max0 = + vfmax_vv_f32m1(_line0_2_8, vfmax_vv_f32m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e32_v_f32m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vfloat32m1_t _line1_2_8 = vlse32_v_f32m1(line1, 2 * sizeof(float), vl); + line1 += (vl - 1) * 2; + vfloat32m1_t _max1 = + vfmax_vv_f32m1(_line1_2_8, vfmax_vv_f32m1(_line1_0_6, _line1_1_7, vl), vl); + + vlseg2e32_v_f32m1(&_line2_0_6, &_line2_1_7, line2, vl); + line2 += 2; + vfloat32m1_t _line2_2_8 = vlse32_v_f32m1(line2, 2 * sizeof(float), vl); + line2 += (vl - 1) * 2; + vfloat32m1_t _max2 = + vfmax_vv_f32m1(_line2_2_8, vfmax_vv_f32m1(_line2_0_6, _line2_1_7, vl), vl); + + vfloat32m1_t _max = vfmax_vv_f32m1(_max2, vfmax_vv_f32m1(_max0, _max1, vl), vl); + vse32_v_f32m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + if (extend_w) { + float max0 = fmax(line0[0], line0[1]); + float max1 = fmax(line1[0], line1[1]); + float max2 = fmax(line2[0], line2[1]); + outptr[0] = fmax(max0, fmax(max1, max2)); + outptr[0] = fmax(outptr[0], 0.0f); // consider padding with constant "0" + outptr++; + } + line0 += remain_w + in_w; + line1 += remain_w + in_w; + line2 += remain_w + in_w; + } + if (extend_h) { + int w = out_w; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_6, _line0_1_7; + vfloat32m1_t _line1_0_6, _line1_1_7; + + vlseg2e32_v_f32m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vfloat32m1_t _line0_2_8 = vlse32_v_f32m1(line0, 2 * sizeof(float), vl); + line0 += (vl - 1) * 2; + vfloat32m1_t _max0 = + vfmax_vv_f32m1(_line0_2_8, vfmax_vv_f32m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e32_v_f32m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vfloat32m1_t _line1_2_8 = vlse32_v_f32m1(line1, 2 * sizeof(float), vl); + line1 += (vl - 1) * 2; + vfloat32m1_t _max1 = + vfmax_vv_f32m1(_line1_2_8, vfmax_vv_f32m1(_line1_0_6, _line1_1_7, vl), vl); + + vfloat32m1_t _max = vfmax_vf_f32m1(vfmax_vv_f32m1(_max0, _max1, vl), 0.0f, vl); + vse32_v_f32m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + + if (extend_w) { + float max0 = fmax(line0[0], line0[1]); + float max1 = fmax(line1[0], line1[1]); + outptr[0] = fmax(fmax(max0, max1), 0.0f); + outptr++; + } + } + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} + +/* + pad_left = pad_top = 1 + pad_right = 0 or 1 + pad_down = 0 or 1 +*/ +int csi_nn_rvv_maxpool3x3s2_p1_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int out_hw = out_h * out_w; + int output_size = in_c * out_h * out_w; + + int extend_h = 0; + int extend_w = 0; + + if (in_h % 2 == 1 && params->pad_down == 1) { + extend_h = 1; + out_h--; + } + if (in_w % 2 == 1 && params->pad_right == 1) { + extend_w = 1; + out_w--; + } + + int remain_w = in_w - 2 * out_w + 1; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const float *line0 = input_data + c * in_h * in_w; + const float *line1 = line0 + in_w; + float *outptr = output_data + c * out_hw; + + // h top ---- w left + outptr[0] = fmax(fmax(line0[0], line0[1]), fmax(line1[0], line1[1])); + outptr[0] = fmax(outptr[0], 0.0f); + outptr++; + line0++; + line1++; + // h top ---- w mid + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_6, _line0_1_7; + vfloat32m1_t _line1_0_6, _line1_1_7; + + vlseg2e32_v_f32m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vfloat32m1_t _line0_2_8 = vlse32_v_f32m1(line0, 2 * sizeof(float), vl); + line0 += (vl - 1) * 2; + vfloat32m1_t _max0 = + vfmax_vv_f32m1(_line0_2_8, vfmax_vv_f32m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e32_v_f32m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vfloat32m1_t _line1_2_8 = vlse32_v_f32m1(line1, 2 * sizeof(float), vl); + line1 += (vl - 1) * 2; + vfloat32m1_t _max1 = + vfmax_vv_f32m1(_line1_2_8, vfmax_vv_f32m1(_line1_0_6, _line1_1_7, vl), vl); + + vfloat32m1_t _max = vfmax_vf_f32m1(vfmax_vv_f32m1(_max0, _max1, vl), 0.0f, vl); + vse32_v_f32m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + // h top ---- w right + if (extend_w) { + outptr[0] = fmax(fmax(line0[0], line0[1]), fmax(line1[0], line1[1])); + outptr[0] = fmax(outptr[0], 0.0f); + outptr++; + } + line0 += remain_w; + line1 += remain_w; + + // h mid + const float *line2 = line1 + in_w; + for (int h = 0; h < out_h - 1; h++) { + // h mid ---- w left + float max0 = fmax(line0[0], line0[1]); + float max1 = fmax(line1[0], line1[1]); + float max2 = fmax(line2[0], line2[1]); + outptr[0] = fmax(max0, fmax(max1, max2)); + outptr[0] = fmax(outptr[0], 0.0f); // consider padding with constant "0" + outptr++; + line0++; + line1++; + line2++; + // h mid ---- w mid + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_6, _line0_1_7; + vfloat32m1_t _line1_0_6, _line1_1_7; + vfloat32m1_t _line2_0_6, _line2_1_7; + + vlseg2e32_v_f32m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vfloat32m1_t _line0_2_8 = vlse32_v_f32m1(line0, 2 * sizeof(float), vl); + line0 += (vl - 1) * 2; + vfloat32m1_t _max0 = + vfmax_vv_f32m1(_line0_2_8, vfmax_vv_f32m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e32_v_f32m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vfloat32m1_t _line1_2_8 = vlse32_v_f32m1(line1, 2 * sizeof(float), vl); + line1 += (vl - 1) * 2; + vfloat32m1_t _max1 = + vfmax_vv_f32m1(_line1_2_8, vfmax_vv_f32m1(_line1_0_6, _line1_1_7, vl), vl); + + vlseg2e32_v_f32m1(&_line2_0_6, &_line2_1_7, line2, vl); + line2 += 2; + vfloat32m1_t _line2_2_8 = vlse32_v_f32m1(line2, 2 * sizeof(float), vl); + line2 += (vl - 1) * 2; + vfloat32m1_t _max2 = + vfmax_vv_f32m1(_line2_2_8, vfmax_vv_f32m1(_line2_0_6, _line2_1_7, vl), vl); + + vfloat32m1_t _max = vfmax_vv_f32m1(_max2, vfmax_vv_f32m1(_max0, _max1, vl), vl); + vse32_v_f32m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + // h mid ---- w right + if (extend_w) { + float max0 = fmax(line0[0], line0[1]); + float max1 = fmax(line1[0], line1[1]); + float max2 = fmax(line2[0], line2[1]); + outptr[0] = fmax(max0, fmax(max1, max2)); + outptr[0] = fmax(outptr[0], 0.0f); + outptr++; + } + line0 += in_w + remain_w; + line1 += in_w + remain_w; + line2 += in_w + remain_w; + } + + // h bottom + if (extend_h) { + // h bottom ---- w left + outptr[0] = fmax(fmax(line0[0], line0[1]), fmax(line1[0], line1[1])); + outptr[0] = fmax(outptr[0], 0.0f); + outptr++; + line0++; + line1++; + + // h bottom ---- w mid + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_6, _line0_1_7; + vfloat32m1_t _line1_0_6, _line1_1_7; + + vlseg2e32_v_f32m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vfloat32m1_t _line0_2_8 = vlse32_v_f32m1(line0, 2 * sizeof(float), vl); + line0 += (vl - 1) * 2; + vfloat32m1_t _max0 = + vfmax_vv_f32m1(_line0_2_8, vfmax_vv_f32m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e32_v_f32m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vfloat32m1_t _line1_2_8 = vlse32_v_f32m1(line1, 2 * sizeof(float), vl); + line1 += (vl - 1) * 2; + vfloat32m1_t _max1 = + vfmax_vv_f32m1(_line1_2_8, vfmax_vv_f32m1(_line1_0_6, _line1_1_7, vl), vl); + + vfloat32m1_t _max = vfmax_vf_f32m1(vfmax_vv_f32m1(_max0, _max1, vl), 0.0f, vl); + vse32_v_f32m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + // h bottom ---- w right + if (extend_w) { + outptr[0] = fmax(fmax(line0[0], line0[1]), fmax(line1[0], line1[1])); + outptr[0] = fmax(outptr[0], 0.0f); + outptr++; + } + } + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} + +/* + pad_left = pad_right = pad_top = pad_down = 1 + in_w = out_w in_h = out_h +*/ +int csi_nn_rvv_maxpool3x3s1_p1_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + float *input_data = (float *)input->data; + float *output_data = (float *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int output_size = in_c * out_h * out_w; + int vl; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const float *line1 = input_data + c * in_h * in_w; + const float *line2 = line1 + in_w; + float *outptr = output_data + c * out_h * out_w; + // h top ---- w left + outptr[0] = fmax(fmax(line1[0], line1[1]), fmax(line2[0], line2[1])); + outptr[0] = fmax(outptr[0], 0.0f); + outptr++; + // h top ---- w mid + int w = out_w - 2; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line1_0_3 = vle32_v_f32m1(line1, vl); + line1++; + vfloat32m1_t _line1_1_4 = vle32_v_f32m1(line1, vl); + line1++; + vfloat32m1_t _line1_2_5 = vle32_v_f32m1(line1, vl); + line1 += vl - 2; + vfloat32m1_t _max1 = + vfmax_vv_f32m1(_line1_2_5, vfmax_vv_f32m1(_line1_0_3, _line1_1_4, vl), vl); + + vfloat32m1_t _line2_0_3 = vle32_v_f32m1(line2, vl); + line2++; + vfloat32m1_t _line2_1_4 = vle32_v_f32m1(line2, vl); + line2++; + vfloat32m1_t _line2_2_5 = vle32_v_f32m1(line2, vl); + line2 += vl - 2; + vfloat32m1_t _max2 = + vfmax_vv_f32m1(_line2_2_5, vfmax_vv_f32m1(_line2_0_3, _line2_1_4, vl), vl); + + vfloat32m1_t _max = vfmax_vf_f32m1(vfmax_vv_f32m1(_max1, _max2, vl), 0.0f, vl); + vse32_v_f32m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + // h top ---- w right + outptr[0] = fmax(fmax(line1[0], line1[1]), fmax(line2[0], line2[1])); + outptr[0] = fmax(outptr[0], 0.0f); + outptr++; + line1 += 2; // bump next line: line1 --> line2 + line2 += 2; + + // h mid + const float *line0 = input_data + c * in_h * in_w; + for (int h = 0; h < out_h - 2; h++) { + // h mid ---- w left + float max0 = fmax(line0[0], line0[1]); + float max1 = fmax(line1[0], line1[1]); + float max2 = fmax(line2[0], line2[1]); + outptr[0] = fmax(max0, fmax(max1, max2)); + outptr[0] = fmax(outptr[0], 0.0f); + outptr++; + // h mid ---- w mid + w = out_w - 2; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_3 = vle32_v_f32m1(line0, vl); + line0++; + vfloat32m1_t _line0_1_4 = vle32_v_f32m1(line0, vl); + line0++; + vfloat32m1_t _line0_2_5 = vle32_v_f32m1(line0, vl); + line0 += vl - 2; + vfloat32m1_t _max0 = + vfmax_vv_f32m1(_line0_2_5, vfmax_vv_f32m1(_line0_0_3, _line0_1_4, vl), vl); + + vfloat32m1_t _line1_0_3 = vle32_v_f32m1(line1, vl); + line1++; + vfloat32m1_t _line1_1_4 = vle32_v_f32m1(line1, vl); + line1++; + vfloat32m1_t _line1_2_5 = vle32_v_f32m1(line1, vl); + line1 += vl - 2; + vfloat32m1_t _max1 = + vfmax_vv_f32m1(_line1_2_5, vfmax_vv_f32m1(_line1_0_3, _line1_1_4, vl), vl); + + vfloat32m1_t _line2_0_3 = vle32_v_f32m1(line2, vl); + line2++; + vfloat32m1_t _line2_1_4 = vle32_v_f32m1(line2, vl); + line2++; + vfloat32m1_t _line2_2_5 = vle32_v_f32m1(line2, vl); + line2 += vl - 2; + vfloat32m1_t _max2 = + vfmax_vv_f32m1(_line2_2_5, vfmax_vv_f32m1(_line2_0_3, _line2_1_4, vl), vl); + + vfloat32m1_t _max = vfmax_vv_f32m1(_max2, vfmax_vv_f32m1(_max0, _max1, vl), vl); + vse32_v_f32m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + // h mid ---- w right + float max0_0 = fmax(line0[0], line0[1]); + float max1_0 = fmax(line1[0], line1[1]); + float max2_0 = fmax(line2[0], line2[1]); + outptr[0] = fmax(max0_0, fmax(max1_0, max2_0)); + outptr[0] = fmax(outptr[0], 0.0f); + + outptr++; + line0 += 2; + line1 += 2; + line2 += 2; + } + + // h bottom ---- w left + outptr[0] = fmax(fmax(line0[0], line0[1]), fmax(line1[0], line1[1])); + outptr[0] = fmax(outptr[0], 0.0f); + outptr++; + // h bottom ---- w mid + w = out_w - 2; + while (w > 0) { + vl = vsetvl_e32m1(w); + vfloat32m1_t _line0_0_3 = vle32_v_f32m1(line0, vl); + line0++; + vfloat32m1_t _line0_1_4 = vle32_v_f32m1(line0, vl); + line0++; + vfloat32m1_t _line0_2_5 = vle32_v_f32m1(line0, vl); + line0 += vl - 2; + vfloat32m1_t _max0 = + vfmax_vv_f32m1(_line0_2_5, vfmax_vv_f32m1(_line0_0_3, _line0_1_4, vl), vl); + + vfloat32m1_t _line1_0_3 = vle32_v_f32m1(line1, vl); + line1++; + vfloat32m1_t _line1_1_4 = vle32_v_f32m1(line1, vl); + line1++; + vfloat32m1_t _line1_2_5 = vle32_v_f32m1(line1, vl); + line1 += vl - 2; + vfloat32m1_t _max1 = + vfmax_vv_f32m1(_line1_2_5, vfmax_vv_f32m1(_line1_0_3, _line1_1_4, vl), vl); + + vfloat32m1_t _max = vfmax_vf_f32m1(vfmax_vv_f32m1(_max0, _max1, vl), 0.0f, vl); + vse32_v_f32m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + // h bottom ---- w right + outptr[0] = fmax(fmax(line0[0], line0[1]), fmax(line1[0], line1[1])); + outptr[0] = fmax(outptr[0], 0.0f); + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/maxpool_3x3_fp16.c b/source/thead_rvv/maxpool_3x3_fp16.c new file mode 100644 index 00000000..f6e2e88f --- /dev/null +++ b/source/thead_rvv/maxpool_3x3_fp16.c @@ -0,0 +1,535 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128/256 +*************************************************************/ +int csi_nn_rvv_maxpool3x3s2_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int out_hw = out_h * out_w; + int output_size = in_c * out_h * out_w; + + int extend_h = 0; + int extend_w = 0; + + if (in_h % 2 == 0 && params->pad_down == 1) { + extend_h = 1; + out_h--; + } + if (in_w % 2 == 0 && params->pad_right == 1) { + extend_w = 1; + out_w--; + } + + int remain_w = in_w - 2 * out_w; + + int vl; + int n = csrr_vlenb() / sizeof(__fp16); + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const __fp16 *line0 = input_data + c * in_h * in_w; + const __fp16 *line1 = line0 + in_w; + const __fp16 *line2 = line1 + in_w; + __fp16 *outptr = output_data + c * out_hw; + + for (int h = 0; h < out_h; h++) { + int w = out_w; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_6, _line0_1_7; + vfloat16m1_t _line1_0_6, _line1_1_7; + vfloat16m1_t _line2_0_6, _line2_1_7; + + vlseg2e16_v_f16m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vfloat16m1_t _line0_2_8 = vlse16_v_f16m1(line0, 2 * sizeof(__fp16), vl); + line0 += (vl - 1) * 2; + vfloat16m1_t _max0 = + vfmax_vv_f16m1(_line0_2_8, vfmax_vv_f16m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e16_v_f16m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vfloat16m1_t _line1_2_8 = vlse16_v_f16m1(line1, 2 * sizeof(__fp16), vl); + line1 += (vl - 1) * 2; + vfloat16m1_t _max1 = + vfmax_vv_f16m1(_line1_2_8, vfmax_vv_f16m1(_line1_0_6, _line1_1_7, vl), vl); + + vlseg2e16_v_f16m1(&_line2_0_6, &_line2_1_7, line2, vl); + line2 += 2; + vfloat16m1_t _line2_2_8 = vlse16_v_f16m1(line2, 2 * sizeof(__fp16), vl); + line2 += (vl - 1) * 2; + vfloat16m1_t _max2 = + vfmax_vv_f16m1(_line2_2_8, vfmax_vv_f16m1(_line2_0_6, _line2_1_7, vl), vl); + + vfloat16m1_t _max = vfmax_vv_f16m1(_max2, vfmax_vv_f16m1(_max0, _max1, vl), vl); + vse16_v_f16m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + if (extend_w) { + __fp16 max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + __fp16 max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + __fp16 max2 = line2[0] > line2[1] ? line2[0] : line2[1]; + outptr[0] = max1 > max2 ? max1 : max2; + outptr[0] = outptr[0] > max0 ? outptr[0] : max0; + outptr[0] = outptr[0] > 0 ? outptr[0] : 0; + outptr++; + } + line0 += remain_w + in_w; + line1 += remain_w + in_w; + line2 += remain_w + in_w; + } + if (extend_h) { + int w = out_w; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_6, _line0_1_7; + vfloat16m1_t _line1_0_6, _line1_1_7; + + vlseg2e16_v_f16m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vfloat16m1_t _line0_2_8 = vlse16_v_f16m1(line0, 2 * sizeof(__fp16), vl); + line0 += (vl - 1) * 2; + vfloat16m1_t _max0 = + vfmax_vv_f16m1(_line0_2_8, vfmax_vv_f16m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e16_v_f16m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vfloat16m1_t _line1_2_8 = vlse16_v_f16m1(line1, 2 * sizeof(__fp16), vl); + line1 += (vl - 1) * 2; + vfloat16m1_t _max1 = + vfmax_vv_f16m1(_line1_2_8, vfmax_vv_f16m1(_line1_0_6, _line1_1_7, vl), vl); + + vfloat16m1_t _max = vfmax_vf_f16m1(vfmax_vv_f16m1(_max0, _max1, vl), 0.0f, vl); + vse16_v_f16m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + + if (extend_w) { + __fp16 max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + __fp16 max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > 0 ? outptr[0] : 0; + outptr++; + } + } + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} + +int csi_nn_rvv_maxpool3x3s2_p1_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int out_hw = out_h * out_w; + int output_size = in_c * out_h * out_w; + + int extend_h = 0; + int extend_w = 0; + + if (in_h % 2 == 1 && params->pad_down == 1) { + extend_h = 1; + out_h--; + } + if (in_w % 2 == 1 && params->pad_right == 1) { + extend_w = 1; + out_w--; + } + + int remain_w = in_w - 2 * out_w + 1; + + int vl; + int n = csrr_vlenb() / sizeof(__fp16); + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const __fp16 *line0 = input_data + c * in_h * in_w; + const __fp16 *line1 = line0 + in_w; + __fp16 *outptr = output_data + c * out_hw; + + // h top ---- w left + __fp16 max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + __fp16 max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > 0 ? outptr[0] : 0; + outptr++; + line0++; + line1++; + // h top ---- w mid + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_6, _line0_1_7; + vfloat16m1_t _line1_0_6, _line1_1_7; + + vlseg2e16_v_f16m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vfloat16m1_t _line0_2_8 = vlse16_v_f16m1(line0, 2 * sizeof(__fp16), vl); + line0 += (vl - 1) * 2; + vfloat16m1_t _max0 = + vfmax_vv_f16m1(_line0_2_8, vfmax_vv_f16m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e16_v_f16m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vfloat16m1_t _line1_2_8 = vlse16_v_f16m1(line1, 2 * sizeof(__fp16), vl); + line1 += (vl - 1) * 2; + vfloat16m1_t _max1 = + vfmax_vv_f16m1(_line1_2_8, vfmax_vv_f16m1(_line1_0_6, _line1_1_7, vl), vl); + + vfloat16m1_t _max = vfmax_vf_f16m1(vfmax_vv_f16m1(_max0, _max1, vl), 0.0f, vl); + vse16_v_f16m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + // h top ---- w right + if (extend_w) { + max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > 0 ? outptr[0] : 0; + outptr++; + } + line0 += remain_w; + line1 += remain_w; + + // h mid + const __fp16 *line2 = line1 + in_w; + __fp16 max2 = 0; + for (int h = 0; h < out_h - 1; h++) { + // h mid ---- w left + max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + max2 = line2[0] > line2[1] ? line2[0] : line2[1]; + max1 = max1 > max2 ? max1 : max2; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > 0 ? outptr[0] : 0; // consider padding with constant "0" + outptr++; + line0++; + line1++; + line2++; + // h mid ---- w mid + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_6, _line0_1_7; + vfloat16m1_t _line1_0_6, _line1_1_7; + vfloat16m1_t _line2_0_6, _line2_1_7; + + vlseg2e16_v_f16m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vfloat16m1_t _line0_2_8 = vlse16_v_f16m1(line0, 2 * sizeof(__fp16), vl); + line0 += (vl - 1) * 2; + vfloat16m1_t _max0 = + vfmax_vv_f16m1(_line0_2_8, vfmax_vv_f16m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e16_v_f16m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vfloat16m1_t _line1_2_8 = vlse16_v_f16m1(line1, 2 * sizeof(__fp16), vl); + line1 += (vl - 1) * 2; + vfloat16m1_t _max1 = + vfmax_vv_f16m1(_line1_2_8, vfmax_vv_f16m1(_line1_0_6, _line1_1_7, vl), vl); + + vlseg2e16_v_f16m1(&_line2_0_6, &_line2_1_7, line2, vl); + line2 += 2; + vfloat16m1_t _line2_2_8 = vlse16_v_f16m1(line2, 2 * sizeof(__fp16), vl); + line2 += (vl - 1) * 2; + vfloat16m1_t _max2 = + vfmax_vv_f16m1(_line2_2_8, vfmax_vv_f16m1(_line2_0_6, _line2_1_7, vl), vl); + + vfloat16m1_t _max = vfmax_vv_f16m1(_max2, vfmax_vv_f16m1(_max0, _max1, vl), vl); + vse16_v_f16m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + // h mid ---- w right + if (extend_w) { + max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + max2 = line2[0] > line2[1] ? line2[0] : line2[1]; + max1 = max1 > max2 ? max1 : max2; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > 0 ? outptr[0] : 0; + outptr++; + } + line0 += in_w + remain_w; + line1 += in_w + remain_w; + line2 += in_w + remain_w; + } + + // h bottom + if (extend_h) { + // h bottom ---- w left + max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > 0 ? outptr[0] : 0; + outptr++; + line0++; + line1++; + + // h bottom ---- w mid + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_6, _line0_1_7; + vfloat16m1_t _line1_0_6, _line1_1_7; + + vlseg2e16_v_f16m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vfloat16m1_t _line0_2_8 = vlse16_v_f16m1(line0, 2 * sizeof(__fp16), vl); + line0 += (vl - 1) * 2; + vfloat16m1_t _max0 = + vfmax_vv_f16m1(_line0_2_8, vfmax_vv_f16m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e16_v_f16m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vfloat16m1_t _line1_2_8 = vlse16_v_f16m1(line1, 2 * sizeof(__fp16), vl); + line1 += (vl - 1) * 2; + vfloat16m1_t _max1 = + vfmax_vv_f16m1(_line1_2_8, vfmax_vv_f16m1(_line1_0_6, _line1_1_7, vl), vl); + + vfloat16m1_t _max = vfmax_vf_f16m1(vfmax_vv_f16m1(_max0, _max1, vl), 0.0f, vl); + vse16_v_f16m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + // h bottom ---- w right + if (extend_w) { + max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > 0 ? outptr[0] : 0; + outptr++; + } + } + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} + +int csi_nn_rvv_maxpool3x3s1_p1_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int output_size = in_c * out_h * out_w; + + int vl; + int n = csrr_vlenb() / sizeof(__fp16); + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const __fp16 *line1 = input_data + c * in_h * in_w; + const __fp16 *line2 = line1 + in_w; + __fp16 *outptr = output_data + c * out_h * out_w; + // h top ---- w left + __fp16 max0 = line1[0] > line1[1] ? line1[0] : line1[1]; + __fp16 max1 = line2[0] > line2[1] ? line2[0] : line2[1]; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > 0 ? outptr[0] : 0; + outptr++; + // h top ---- w mid + int w = out_w - 2; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line1_0_3 = vle16_v_f16m1(line1, vl); + line1++; + vfloat16m1_t _line1_1_4 = vle16_v_f16m1(line1, vl); + line1++; + vfloat16m1_t _line1_2_5 = vle16_v_f16m1(line1, vl); + line1 += vl - 2; + vfloat16m1_t _max1 = + vfmax_vv_f16m1(_line1_2_5, vfmax_vv_f16m1(_line1_0_3, _line1_1_4, vl), vl); + + vfloat16m1_t _line2_0_3 = vle16_v_f16m1(line2, vl); + line2++; + vfloat16m1_t _line2_1_4 = vle16_v_f16m1(line2, vl); + line2++; + vfloat16m1_t _line2_2_5 = vle16_v_f16m1(line2, vl); + line2 += vl - 2; + vfloat16m1_t _max2 = + vfmax_vv_f16m1(_line2_2_5, vfmax_vv_f16m1(_line2_0_3, _line2_1_4, vl), vl); + + vfloat16m1_t _max = vfmax_vf_f16m1(vfmax_vv_f16m1(_max1, _max2, vl), 0.0f, vl); + vse16_v_f16m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + // h top ---- w right + max0 = line1[0] > line1[1] ? line1[0] : line1[1]; + max1 = line2[0] > line2[1] ? line2[0] : line2[1]; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > 0 ? outptr[0] : 0; + outptr++; + line1 += 2; // bump next line: line1 --> line2 + line2 += 2; + + // h mid + const __fp16 *line0 = input_data + c * in_h * in_w; + __fp16 max2 = 0; + for (int h = 0; h < out_h - 2; h++) { + // h mid ---- w left + max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + max2 = line2[0] > line2[1] ? line2[0] : line2[1]; + max1 = max1 > max2 ? max1 : max2; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > 0 ? outptr[0] : 0; + outptr++; + // h mid ---- w mid + w = out_w - 2; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_3 = vle16_v_f16m1(line0, vl); + line0++; + vfloat16m1_t _line0_1_4 = vle16_v_f16m1(line0, vl); + line0++; + vfloat16m1_t _line0_2_5 = vle16_v_f16m1(line0, vl); + line0 += vl - 2; + vfloat16m1_t _max0 = + vfmax_vv_f16m1(_line0_2_5, vfmax_vv_f16m1(_line0_0_3, _line0_1_4, vl), vl); + + vfloat16m1_t _line1_0_3 = vle16_v_f16m1(line1, vl); + line1++; + vfloat16m1_t _line1_1_4 = vle16_v_f16m1(line1, vl); + line1++; + vfloat16m1_t _line1_2_5 = vle16_v_f16m1(line1, vl); + line1 += vl - 2; + vfloat16m1_t _max1 = + vfmax_vv_f16m1(_line1_2_5, vfmax_vv_f16m1(_line1_0_3, _line1_1_4, vl), vl); + + vfloat16m1_t _line2_0_3 = vle16_v_f16m1(line2, vl); + line2++; + vfloat16m1_t _line2_1_4 = vle16_v_f16m1(line2, vl); + line2++; + vfloat16m1_t _line2_2_5 = vle16_v_f16m1(line2, vl); + line2 += vl - 2; + vfloat16m1_t _max2 = + vfmax_vv_f16m1(_line2_2_5, vfmax_vv_f16m1(_line2_0_3, _line2_1_4, vl), vl); + + vfloat16m1_t _max = vfmax_vv_f16m1(_max2, vfmax_vv_f16m1(_max0, _max1, vl), vl); + vse16_v_f16m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + // h mid ---- w right + max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + max2 = line2[0] > line2[1] ? line2[0] : line2[1]; + max1 = max1 > max2 ? max1 : max2; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > 0 ? outptr[0] : 0; + + outptr++; + line0 += 2; + line1 += 2; + line2 += 2; + } + + // h bottom ---- w left + max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > 0 ? outptr[0] : 0; + outptr++; + // h bottom ---- w mid + w = out_w - 2; + while (w > 0) { + vl = vsetvl_e16m1(w); + vfloat16m1_t _line0_0_3 = vle16_v_f16m1(line0, vl); + line0++; + vfloat16m1_t _line0_1_4 = vle16_v_f16m1(line0, vl); + line0++; + vfloat16m1_t _line0_2_5 = vle16_v_f16m1(line0, vl); + line0 += vl - 2; + vfloat16m1_t _max0 = + vfmax_vv_f16m1(_line0_2_5, vfmax_vv_f16m1(_line0_0_3, _line0_1_4, vl), vl); + + vfloat16m1_t _line1_0_3 = vle16_v_f16m1(line1, vl); + line1++; + vfloat16m1_t _line1_1_4 = vle16_v_f16m1(line1, vl); + line1++; + vfloat16m1_t _line1_2_5 = vle16_v_f16m1(line1, vl); + line1 += vl - 2; + vfloat16m1_t _max1 = + vfmax_vv_f16m1(_line1_2_5, vfmax_vv_f16m1(_line1_0_3, _line1_1_4, vl), vl); + + vfloat16m1_t _max = vfmax_vf_f16m1(vfmax_vv_f16m1(_max0, _max1, vl), 0.0f, vl); + vse16_v_f16m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + // h bottom ---- w right + outptr[0] = fmax(fmax(line0[0], line0[1]), fmax(line1[0], line1[1])); + outptr[0] = fmax(outptr[0], 0.0f); + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/maxpool_3x3_int8.c b/source/thead_rvv/maxpool_3x3_int8.c new file mode 100644 index 00000000..99a69054 --- /dev/null +++ b/source/thead_rvv/maxpool_3x3_int8.c @@ -0,0 +1,536 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/**************************************************************************** + * note: VLEN = 128/256 ... + * constrains: Input and outputs must all have same scale/zero_point + ****************************************************************************/ +int csi_nn_rvv_maxpool3x3s2_int8(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + int8_t *input_data = (int8_t *)input->data; + int8_t *output_data = (int8_t *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int out_hw = out_h * out_w; + int output_size = in_c * out_h * out_w; + + int extend_h = 0; + int extend_w = 0; + + if (in_h % 2 == 0 && params->pad_down == 1) { + extend_h = 1; + out_h--; + } + if (in_w % 2 == 0 && params->pad_right == 1) { + extend_w = 1; + out_w--; + } + int remain_w = in_w - 2 * out_w; + int vl; + int8_t input_zp = (int8_t)input->qinfo->zero_point; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const int8_t *line0 = input_data + c * in_h * in_w; + const int8_t *line1 = line0 + in_w; + const int8_t *line2 = line1 + in_w; + int8_t *outptr = output_data + c * out_hw; + + for (int h = 0; h < out_h; h++) { + int w = out_w; + while (w > 0) { + vl = vsetvl_e8m1(w); + vint8m1_t _line0_0_6, _line0_1_7; + vint8m1_t _line1_0_6, _line1_1_7; + vint8m1_t _line2_0_6, _line2_1_7; + + vlseg2e8_v_i8m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vint8m1_t _line0_2_8 = vlse8_v_i8m1(line0, 2 * sizeof(int8_t), vl); + line0 += (vl - 1) * 2; + vint8m1_t _max0 = + vmax_vv_i8m1(_line0_2_8, vmax_vv_i8m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e8_v_i8m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vint8m1_t _line1_2_8 = vlse8_v_i8m1(line1, 2 * sizeof(int8_t), vl); + line1 += (vl - 1) * 2; + vint8m1_t _max1 = + vmax_vv_i8m1(_line1_2_8, vmax_vv_i8m1(_line1_0_6, _line1_1_7, vl), vl); + + vlseg2e8_v_i8m1(&_line2_0_6, &_line2_1_7, line2, vl); + line2 += 2; + vint8m1_t _line2_2_8 = vlse8_v_i8m1(line2, 2 * sizeof(int8_t), vl); + line2 += (vl - 1) * 2; + vint8m1_t _max2 = + vmax_vv_i8m1(_line2_2_8, vmax_vv_i8m1(_line2_0_6, _line2_1_7, vl), vl); + + vint8m1_t _max = vmax_vv_i8m1(_max2, vmax_vv_i8m1(_max0, _max1, vl), vl); + vse8_v_i8m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + if (extend_w) { + int8_t max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + int8_t max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + int8_t max2 = line2[0] > line2[1] ? line2[0] : line2[1]; + outptr[0] = max1 > max2 ? max1 : max2; + outptr[0] = outptr[0] > max0 ? outptr[0] : max0; + outptr[0] = outptr[0] > input_zp ? outptr[0] : input_zp; + outptr++; + } + line0 += remain_w + in_w; + line1 += remain_w + in_w; + line2 += remain_w + in_w; + } + if (extend_h) { + int w = out_w; + while (w > 0) { + vl = vsetvl_e8m1(w); + vint8m1_t _line0_0_6, _line0_1_7; + vint8m1_t _line1_0_6, _line1_1_7; + + vlseg2e8_v_i8m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vint8m1_t _line0_2_8 = vlse8_v_i8m1(line0, 2 * sizeof(int8_t), vl); + line0 += (vl - 1) * 2; + vint8m1_t _max0 = + vmax_vv_i8m1(_line0_2_8, vmax_vv_i8m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e8_v_i8m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vint8m1_t _line1_2_8 = vlse8_v_i8m1(line1, 2 * sizeof(int8_t), vl); + line1 += (vl - 1) * 2; + vint8m1_t _max1 = + vmax_vv_i8m1(_line1_2_8, vmax_vv_i8m1(_line1_0_6, _line1_1_7, vl), vl); + + vint8m1_t _max = vmax_vx_i8m1(vmax_vv_i8m1(_max0, _max1, vl), input_zp, vl); + vse8_v_i8m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + + if (extend_w) { + int8_t max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + int8_t max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > input_zp ? outptr[0] : input_zp; + outptr++; + } + } + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} + +int csi_nn_rvv_maxpool3x3s2_p1_int8(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + int8_t *input_data = (int8_t *)input->data; + int8_t *output_data = (int8_t *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int out_hw = out_h * out_w; + int output_size = in_c * out_h * out_w; + + int extend_h = 0; + int extend_w = 0; + + if (in_h % 2 == 1 && params->pad_down == 1) { + extend_h = 1; + out_h--; + } + if (in_w % 2 == 1 && params->pad_right == 1) { + extend_w = 1; + out_w--; + } + + int remain_w = in_w - 2 * out_w + 1; + int vl; + int8_t input_zp = (int8_t)input->qinfo->zero_point; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const int8_t *line0 = input_data + c * in_h * in_w; + const int8_t *line1 = line0 + in_w; + int8_t *outptr = output_data + c * out_hw; + + // h top ---- w left + int8_t max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + int8_t max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > input_zp ? outptr[0] : input_zp; + outptr++; + line0++; + line1++; + // h top ---- w mid + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e8m1(w); + vint8m1_t _line0_0_6, _line0_1_7; + vint8m1_t _line1_0_6, _line1_1_7; + + vlseg2e8_v_i8m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vint8m1_t _line0_2_8 = vlse8_v_i8m1(line0, 2 * sizeof(int8_t), vl); + line0 += (vl - 1) * 2; + vint8m1_t _max0 = + vmax_vv_i8m1(_line0_2_8, vmax_vv_i8m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e8_v_i8m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vint8m1_t _line1_2_8 = vlse8_v_i8m1(line1, 2 * sizeof(int8_t), vl); + line1 += (vl - 1) * 2; + vint8m1_t _max1 = + vmax_vv_i8m1(_line1_2_8, vmax_vv_i8m1(_line1_0_6, _line1_1_7, vl), vl); + + vint8m1_t _max = vmax_vx_i8m1(vmax_vv_i8m1(_max0, _max1, vl), input_zp, vl); + vse8_v_i8m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + // h top ---- w right + if (extend_w) { + max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > input_zp ? outptr[0] : input_zp; + outptr++; + } + line0 += remain_w; + line1 += remain_w; + + // h mid + const int8_t *line2 = line1 + in_w; + int8_t max2 = 0; + for (int h = 0; h < out_h - 1; h++) { + // h mid ---- w left + max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + max2 = line2[0] > line2[1] ? line2[0] : line2[1]; + max1 = max1 > max2 ? max1 : max2; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > input_zp ? outptr[0] + : input_zp; // consider padding with constant "0" + outptr++; + line0++; + line1++; + line2++; + // h mid ---- w mid + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e8m1(w); + vint8m1_t _line0_0_6, _line0_1_7; + vint8m1_t _line1_0_6, _line1_1_7; + vint8m1_t _line2_0_6, _line2_1_7; + + vlseg2e8_v_i8m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vint8m1_t _line0_2_8 = vlse8_v_i8m1(line0, 2 * sizeof(int8_t), vl); + line0 += (vl - 1) * 2; + vint8m1_t _max0 = + vmax_vv_i8m1(_line0_2_8, vmax_vv_i8m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e8_v_i8m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vint8m1_t _line1_2_8 = vlse8_v_i8m1(line1, 2 * sizeof(int8_t), vl); + line1 += (vl - 1) * 2; + vint8m1_t _max1 = + vmax_vv_i8m1(_line1_2_8, vmax_vv_i8m1(_line1_0_6, _line1_1_7, vl), vl); + + vlseg2e8_v_i8m1(&_line2_0_6, &_line2_1_7, line2, vl); + line2 += 2; + vint8m1_t _line2_2_8 = vlse8_v_i8m1(line2, 2 * sizeof(int8_t), vl); + line2 += (vl - 1) * 2; + vint8m1_t _max2 = + vmax_vv_i8m1(_line2_2_8, vmax_vv_i8m1(_line2_0_6, _line2_1_7, vl), vl); + + vint8m1_t _max = vmax_vv_i8m1(_max2, vmax_vv_i8m1(_max0, _max1, vl), vl); + vse8_v_i8m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + // h mid ---- w right + if (extend_w) { + max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + max2 = line2[0] > line2[1] ? line2[0] : line2[1]; + max1 = max1 > max2 ? max1 : max2; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > input_zp ? outptr[0] : input_zp; + outptr++; + } + line0 += in_w + remain_w; + line1 += in_w + remain_w; + line2 += in_w + remain_w; + } + + // h bottom + if (extend_h) { + // h bottom ---- w left + max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > input_zp ? outptr[0] : input_zp; + outptr++; + line0++; + line1++; + + // h bottom ---- w mid + int w = out_w - 1; + while (w > 0) { + vl = vsetvl_e8m1(w); + vint8m1_t _line0_0_6, _line0_1_7; + vint8m1_t _line1_0_6, _line1_1_7; + + vlseg2e8_v_i8m1(&_line0_0_6, &_line0_1_7, line0, vl); + line0 += 2; + vint8m1_t _line0_2_8 = vlse8_v_i8m1(line0, 2 * sizeof(int8_t), vl); + line0 += (vl - 1) * 2; + vint8m1_t _max0 = + vmax_vv_i8m1(_line0_2_8, vmax_vv_i8m1(_line0_0_6, _line0_1_7, vl), vl); + + vlseg2e8_v_i8m1(&_line1_0_6, &_line1_1_7, line1, vl); + line1 += 2; + vint8m1_t _line1_2_8 = vlse8_v_i8m1(line1, 2 * sizeof(int8_t), vl); + line1 += (vl - 1) * 2; + vint8m1_t _max1 = + vmax_vv_i8m1(_line1_2_8, vmax_vv_i8m1(_line1_0_6, _line1_1_7, vl), vl); + + vint8m1_t _max = vmax_vx_i8m1(vmax_vv_i8m1(_max0, _max1, vl), input_zp, vl); + vse8_v_i8m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + // h bottom ---- w right + if (extend_w) { + max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > input_zp ? outptr[0] : input_zp; + outptr++; + } + } + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} + +int csi_nn_rvv_maxpool3x3s1_p1_int8(struct csi_tensor *input, struct csi_tensor *output, + struct pool_params *params) +{ + int8_t *input_data = (int8_t *)input->data; + int8_t *output_data = (int8_t *)output->data; + + int batch = input->dim[0]; + int in_c = input->dim[1]; + int in_h = input->dim[2]; + int in_w = input->dim[3]; + int input_size = in_c * in_h * in_w; + + int out_h = output->dim[2]; + int out_w = output->dim[3]; + int output_size = in_c * out_h * out_w; + + int vl; + int8_t input_zp = (int8_t)input->qinfo->zero_point; + + for (int b = 0; b < batch; b++) { + for (int c = 0; c < in_c; c++) { + const int8_t *line1 = input_data + c * in_h * in_w; + const int8_t *line2 = line1 + in_w; + int8_t *outptr = output_data + c * out_h * out_w; + // h top ---- w left + int8_t max0 = line1[0] > line1[1] ? line1[0] : line1[1]; + int8_t max1 = line2[0] > line2[1] ? line2[0] : line2[1]; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > input_zp ? outptr[0] : input_zp; + outptr++; + // h top ---- w mid + int w = out_w - 2; + while (w > 0) { + vl = vsetvl_e8m1(w); + vint8m1_t _line1_0_3 = vle8_v_i8m1(line1, vl); + line1++; + vint8m1_t _line1_1_4 = vle8_v_i8m1(line1, vl); + line1++; + vint8m1_t _line1_2_5 = vle8_v_i8m1(line1, vl); + line1 += vl - 2; + vint8m1_t _max1 = + vmax_vv_i8m1(_line1_2_5, vmax_vv_i8m1(_line1_0_3, _line1_1_4, vl), vl); + + vint8m1_t _line2_0_3 = vle8_v_i8m1(line2, vl); + line2++; + vint8m1_t _line2_1_4 = vle8_v_i8m1(line2, vl); + line2++; + vint8m1_t _line2_2_5 = vle8_v_i8m1(line2, vl); + line2 += vl - 2; + vint8m1_t _max2 = + vmax_vv_i8m1(_line2_2_5, vmax_vv_i8m1(_line2_0_3, _line2_1_4, vl), vl); + + vint8m1_t _max = vmax_vx_i8m1(vmax_vv_i8m1(_max1, _max2, vl), input_zp, vl); + vse8_v_i8m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + // h top ---- w right + max0 = line1[0] > line1[1] ? line1[0] : line1[1]; + max1 = line2[0] > line2[1] ? line2[0] : line2[1]; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > input_zp ? outptr[0] : input_zp; + outptr++; + line1 += 2; // bump next line: line1 --> line2 + line2 += 2; + + // h mid + const int8_t *line0 = input_data + c * in_h * in_w; + int8_t max2 = 0; + for (int h = 0; h < out_h - 2; h++) { + // h mid ---- w left + max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + max2 = line2[0] > line2[1] ? line2[0] : line2[1]; + max1 = max1 > max2 ? max1 : max2; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > input_zp ? outptr[0] : input_zp; + outptr++; + // h mid ---- w mid + w = out_w - 2; + while (w > 0) { + vl = vsetvl_e8m1(w); + vint8m1_t _line0_0_3 = vle8_v_i8m1(line0, vl); + line0++; + vint8m1_t _line0_1_4 = vle8_v_i8m1(line0, vl); + line0++; + vint8m1_t _line0_2_5 = vle8_v_i8m1(line0, vl); + line0 += vl - 2; + vint8m1_t _max0 = + vmax_vv_i8m1(_line0_2_5, vmax_vv_i8m1(_line0_0_3, _line0_1_4, vl), vl); + + vint8m1_t _line1_0_3 = vle8_v_i8m1(line1, vl); + line1++; + vint8m1_t _line1_1_4 = vle8_v_i8m1(line1, vl); + line1++; + vint8m1_t _line1_2_5 = vle8_v_i8m1(line1, vl); + line1 += vl - 2; + vint8m1_t _max1 = + vmax_vv_i8m1(_line1_2_5, vmax_vv_i8m1(_line1_0_3, _line1_1_4, vl), vl); + + vint8m1_t _line2_0_3 = vle8_v_i8m1(line2, vl); + line2++; + vint8m1_t _line2_1_4 = vle8_v_i8m1(line2, vl); + line2++; + vint8m1_t _line2_2_5 = vle8_v_i8m1(line2, vl); + line2 += vl - 2; + vint8m1_t _max2 = + vmax_vv_i8m1(_line2_2_5, vmax_vv_i8m1(_line2_0_3, _line2_1_4, vl), vl); + + vint8m1_t _max = vmax_vv_i8m1(_max2, vmax_vv_i8m1(_max0, _max1, vl), vl); + vse8_v_i8m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + // h mid ---- w right + max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + max2 = line2[0] > line2[1] ? line2[0] : line2[1]; + max1 = max1 > max2 ? max1 : max2; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > input_zp ? outptr[0] : input_zp; + + outptr++; + line0 += 2; + line1 += 2; + line2 += 2; + } + + // h bottom ---- w left + max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > input_zp ? outptr[0] : input_zp; + outptr++; + // h bottom ---- w mid + w = out_w - 2; + while (w > 0) { + vl = vsetvl_e8m1(w); + vint8m1_t _line0_0_3 = vle8_v_i8m1(line0, vl); + line0++; + vint8m1_t _line0_1_4 = vle8_v_i8m1(line0, vl); + line0++; + vint8m1_t _line0_2_5 = vle8_v_i8m1(line0, vl); + line0 += vl - 2; + vint8m1_t _max0 = + vmax_vv_i8m1(_line0_2_5, vmax_vv_i8m1(_line0_0_3, _line0_1_4, vl), vl); + + vint8m1_t _line1_0_3 = vle8_v_i8m1(line1, vl); + line1++; + vint8m1_t _line1_1_4 = vle8_v_i8m1(line1, vl); + line1++; + vint8m1_t _line1_2_5 = vle8_v_i8m1(line1, vl); + line1 += vl - 2; + vint8m1_t _max1 = + vmax_vv_i8m1(_line1_2_5, vmax_vv_i8m1(_line1_0_3, _line1_1_4, vl), vl); + + vint8m1_t _max = vmax_vx_i8m1(vmax_vv_i8m1(_max0, _max1, vl), input_zp, vl); + vse8_v_i8m1(outptr, _max, vl); + + outptr += vl; + w -= vl; + } + // h bottom ---- w right + max0 = line0[0] > line0[1] ? line0[0] : line0[1]; + max1 = line1[0] > line1[1] ? line1[0] : line1[1]; + outptr[0] = max0 > max1 ? max0 : max1; + outptr[0] = outptr[0] > input_zp ? outptr[0] : input_zp; + } + input_data += input_size; + output_data += output_size; + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/mul.c b/source/thead_rvv/mul.c new file mode 100644 index 00000000..538eeaeb --- /dev/null +++ b/source/thead_rvv/mul.c @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +int csi_nn_rvv_mul_fp32(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) +{ + return CSINN_TRUE; +} + +int csi_nn_rvv_mul_fp16(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) +{ + return CSINN_TRUE; +} + +/************************************************************************************ + (1) s3*(q3-z3) = s1*(q1-z1) * s2*(q2-z2) + (2) q3 = [ (q1-z1) * (q2-z2) * (s1*s2/s3) ] + z3 + (3) output->qinfo->mulitipler means mult of s1*s2/s3 and output->qinfo->shift represents the +right shift(>0) + TODO: broadcast mul + note: if input1 is const, support per-channel quantization +************************************************************************************/ +int csi_nn_rvv_mul_int8(struct csi_tensor *input0, struct csi_tensor *input1, + struct csi_tensor *output, struct diso_params *params) +{ + int8_t *input0_data = (int8_t *)input0->data; + int8_t *input1_data = (int8_t *)input1->data; + int8_t *output_data = (int8_t *)output->data; + + int in_size0 = csi_tensor_size(input0); + int in_size1 = csi_tensor_size(input1); + int out_size = csi_tensor_size(output); + + // TODO: move to init api + for (int q = 0; q < input1->quant_channel; q++) { + float real_scale = input0->qinfo->scale * input1->qinfo[q].scale / output->qinfo->scale; + csi_quantize_multiplier(real_scale, &input1->qinfo[q].multiplier, &input1->qinfo[q].shift); + } + + if (in_size0 == in_size1) { + int i = 0; + int packn = csrr_vlenb() / sizeof(int8_t); + int outer_size = input1->quant_channel; + int inner_size = in_size1 / outer_size; + for (int c = 0; c < outer_size; c++) { + int32_t z1z2 = input0->qinfo->zero_point * input1->qinfo[c].zero_point; + int size = inner_size; + while (size > 0) { + int vl = vsetvl_e8m1(size); + vint8m1_t _in0 = vle8_v_i8m1(input0_data, vl); + vint8m1_t _in1 = vle8_v_i8m1(input1_data, vl); + + vint16m2_t _q1q2 = vwmul_vv_i16m2(_in0, _in1, vl); + vint16m2_t _q1z2 = vwmul_vx_i16m2(_in0, (int8_t)input1->qinfo[c].zero_point, vl); + vint16m2_t _q2z1 = vwmul_vx_i16m2(_in1, (int8_t)input0->qinfo->zero_point, vl); + + vint32m4_t _res = vwsub_vv_i32m4(_q1q2, _q1z2, vl); // q1q2 - q1z2 + _res = vwsub_wv_i32m4(_res, _q2z1, vl); // q1q2 - q1z2 - q2z1 + _res = vadd_vx_i32m4(_res, z1z2, vl); // q1q2 - q1z2 - q2z1 + z1z2 + input0_data += vl; + input1_data += vl; + // FIXME: precision error + vint32m4_t _mulh = vmulh_vx_i32m4(_res, input1->qinfo[c].multiplier, vl); + if (input1->qinfo[c].shift < 0) { + _res = vssra_vx_i32m4(_mulh, -input1->qinfo[c].shift - 1, vl); + } else { + _res = vsll_vx_i32m4(_mulh, input1->qinfo[c].shift + 1, vl); + } + + _res = vadd_vx_i32m4(_res, output->qinfo->zero_point, vl); + vint16m2_t _res1 = vnclip_wx_i16m2(_res, 0, vl); + vint8m1_t _res2 = vnclip_wx_i8m1(_res1, 0, vl); + vse8_v_i8m1(output_data, _res2, vl); + output_data += vl; + size -= vl; + } + } + } else { + csi_debug_error("Only support elementwise mul on RVV CPU\n"); + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/relu.c b/source/thead_rvv/relu.c new file mode 100644 index 00000000..d213be08 --- /dev/null +++ b/source/thead_rvv/relu.c @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128/256 ... +*************************************************************/ +int csi_nn_rvv_relu_fp32(struct csi_tensor *input, struct csi_tensor *output, + struct relu_params *params) +{ + float *input_data = input->data; + float *output_data = output->data; + int size = 1; + for (int i = 0; i < input->dim_count; i++) { + size = size * input->dim[i]; + } + + int vl = vsetvl_e32m2(size); // vl=8 if vlen=128 + + int i = 0; + for (; i + vl - 1 < size; i += vl) { + vfloat32m2_t _input = vle32_v_f32m2(input_data, vl); + input_data += vl; + vfloat32m2_t _output = vfmax_vf_f32m2(_input, 0.0f, vl); + vse32_v_f32m2(output_data, _output, vl); + output_data += vl; + } + if (i < size) { + vl = vsetvl_e32m2(size & (vl - 1)); // ??? + vfloat32m2_t _input = vle32_v_f32m2(input_data, vl); + vfloat32m2_t _output = vfmax_vf_f32m2(_input, 0.0f, vl); + vse32_v_f32m2(output_data, _output, vl); + } + return CSINN_TRUE; +} + +int csi_nn_rvv_relu_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct relu_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + + int size = 1; + for (int i = 0; i < input->dim_count; i++) { + size = size * input->dim[i]; + } + + int vl = vsetvl_e16m2(size); + + int i = 0; + for (; i + vl - 1 < size; i += vl) { + vfloat16m2_t _input = vle16_v_f16m2(input_data, vl); + input_data += vl; + vfloat16m2_t _output = vfmax_vf_f16m2(_input, 0.0f, vl); + vse16_v_f16m2(output_data, _output, vl); + output_data += vl; + } + if (i < size) { + vl = vsetvl_e16m2(size & (vl - 1)); + vfloat16m2_t _input = vle16_v_f16m2(input_data, vl); + vfloat16m2_t _output = vfmax_vf_f16m2(_input, 0.0f, vl); + vse16_v_f16m2(output_data, _output, vl); + } + return CSINN_TRUE; +} + +/************************************************************************************ + * s2(q2 - z2) = relu{ s1(q1 - z1) } + * q2 = (q1 - z1) * s1/s2 + z2 + * + * note:relu 一般接在全连接/卷积后面,可以直接和全连接/卷积 融合 + ************************************************************************************/ +int csi_nn_rvv_relu_int8(struct csi_tensor *input, struct csi_tensor *output, + struct relu_params *params) +{ + int8_t *input_data = (int8_t *)input->data; + int8_t *output_data = (int8_t *)output->data; + + // TODO: move to init api + // real_scale > 1 => output->qinfo->shift > 0 ==> shift left + float real_scale = input->qinfo->scale / output->qinfo->scale; + csi_quantize_multiplier(real_scale, &output->qinfo->multiplier, &output->qinfo->shift); + + int size = csi_tensor_size(input); + while (size > 0) { + int vl = vsetvl_e8m1(size); + + vint8m1_t _input = vle8_v_i8m1(input_data, vl); + vint16m2_t _input1 = vwadd_vx_i16m2(_input, 0, vl); // widden 8->16 + vint32m4_t _input2 = vwadd_vx_i32m4(_input1, 0, vl); // widden 16->32 + + vint32m4_t _tmp = vsub_vx_i32m4(_input2, input->qinfo->zero_point, vl); + // mulh 无 round 过程, 左移时多移1位,mulh 后再用带round的右移1位来实现类似round的功能 + _tmp = vsll_vx_i32m4(_tmp, output->qinfo->shift + 2, vl); + vint32m4_t _mulh = vmulh_vx_i32m4(_tmp, output->qinfo->multiplier, vl); + _mulh = vssra_vx_i32m4(_mulh, 1, vl); + + vint32m4_t _res0 = vadd_vx_i32m4(_mulh, output->qinfo->zero_point, vl); // +z2 (z2 = -128) + vint16m2_t _res1 = vnclip_wx_i16m2(_res0, 0, vl); // narrow 32->16 + vint8m1_t _res2 = vnclip_wx_i8m1(_res1, 0, vl); // narrow 16->8 + + vse8_v_i8m1(output_data, _res2, vl); + input_data += vl; + output_data += vl; + size -= vl; + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/rvv_mathfun.h b/source/thead_rvv/rvv_mathfun.h new file mode 100644 index 00000000..fae6bd0a --- /dev/null +++ b/source/thead_rvv/rvv_mathfun.h @@ -0,0 +1,75 @@ +#ifndef RVV_MATHFUN_H +#define RVV_MATHFUN_H + +#include + +#define c_exp_hi_f16 10.7421875f +#define c_exp_lo_f16 -10.7421875f + +#define c_cephes_LOG2EF 1.44269504088896341 +#define c_cephes_exp_C1 0.693359375 +#define c_cephes_exp_C2 -2.12194440e-4 + +#define c_cephes_exp_p0 1.9875691500E-4 +#define c_cephes_exp_p1 1.3981999507E-3 +#define c_cephes_exp_p2 8.3334519073E-3 +#define c_cephes_exp_p3 4.1665795894E-2 +#define c_cephes_exp_p4 1.6666665459E-1 +#define c_cephes_exp_p5 5.0000001201E-1 + +#define _RVV_FLOAT16_EXP_OP(LMUL, MLEN) \ + static inline vfloat16m##LMUL##_t exp_ps_vfloat16m##LMUL(vfloat16m##LMUL##_t x, word_type vl) \ + { \ + vfloat16m##LMUL##_t tmp, fx; \ + \ + x = vfmin_vf_f16m##LMUL(x, c_exp_hi_f16, vl); \ + x = vfmax_vf_f16m##LMUL(x, c_exp_lo_f16, vl); \ + \ + /* express exp(x) as exp(g + n*log(2)) */ \ + fx = vfmacc_vf_f16m##LMUL(vfmv_v_f_f16m##LMUL(0.5f, vl), c_cephes_LOG2EF, x, vl); \ + \ + /* perform a floorf */ \ + tmp = vfcvt_f_x_v_f16m##LMUL(vfcvt_x_f_v_i16m##LMUL(fx, vl), vl); \ + \ + /* if greater, substract 1 */ \ + vbool##MLEN##_t mask = vmfgt_vv_f16m##LMUL##_b##MLEN(tmp, fx, vl); \ + fx = vfsub_vf_f16m##LMUL##_m(mask, tmp, tmp, 1.f, vl); \ + \ + tmp = vfmul_vf_f16m##LMUL(fx, c_cephes_exp_C1, vl); \ + vfloat16m##LMUL##_t z = vfmul_vf_f16m##LMUL(fx, c_cephes_exp_C2, vl); \ + x = vfsub_vv_f16m##LMUL(x, tmp, vl); \ + x = vfsub_vv_f16m##LMUL(x, z, vl); \ + \ + vfloat16m##LMUL##_t y = vfmul_vf_f16m##LMUL(x, c_cephes_exp_p0, vl); \ + z = vfmul_vv_f16m##LMUL(x, x, vl); \ + \ + y = vfadd_vf_f16m##LMUL(y, c_cephes_exp_p1, vl); \ + y = vfmul_vv_f16m##LMUL(y, x, vl); \ + y = vfadd_vf_f16m##LMUL(y, c_cephes_exp_p2, vl); \ + y = vfmul_vv_f16m##LMUL(y, x, vl); \ + y = vfadd_vf_f16m##LMUL(y, c_cephes_exp_p3, vl); \ + y = vfmul_vv_f16m##LMUL(y, x, vl); \ + y = vfadd_vf_f16m##LMUL(y, c_cephes_exp_p4, vl); \ + y = vfmul_vv_f16m##LMUL(y, x, vl); \ + y = vfadd_vf_f16m##LMUL(y, c_cephes_exp_p5, vl); \ + \ + y = vfmul_vv_f16m##LMUL(y, z, vl); \ + y = vfadd_vv_f16m##LMUL(y, x, vl); \ + y = vfadd_vf_f16m##LMUL(y, 1.f, vl); \ + \ + /* build 2^n */ \ + vint16m##LMUL##_t mm = vfcvt_x_f_v_i16m##LMUL(fx, vl); \ + mm = vadd_vx_i16m##LMUL(mm, 0xf, vl); \ + mm = vsll_vx_i16m##LMUL(mm, 10, vl); \ + vfloat16m##LMUL##_t pow2n = vreinterpret_v_i16m##LMUL##_f16m##LMUL(mm); \ + \ + y = vfmul_vv_f16m##LMUL(y, pow2n, vl); \ + return y; \ + } + +_RVV_FLOAT16_EXP_OP(1, 16) +_RVV_FLOAT16_EXP_OP(2, 8) +_RVV_FLOAT16_EXP_OP(4, 4) +_RVV_FLOAT16_EXP_OP(8, 2) + +#endif // RVV_MATHFUN_H \ No newline at end of file diff --git a/source/thead_rvv/setup.c b/source/thead_rvv/setup.c new file mode 100644 index 00000000..28f15b70 --- /dev/null +++ b/source/thead_rvv/setup.c @@ -0,0 +1,392 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.13.x */ + +#include "csi_thead_rvv.h" + +void *csi_init_map_rvv(int op, int dtype) +{ + if (op == CSINN_OP_CONV2D || op == CSINN_OP_GROUP_CONV2D) { + return csi_nn_rvv_conv2d_init; + } else if (op == CSINN_OP_DEPTHWISE_CONV2D) { + return csi_nn_rvv_depthwise_conv2d_init; + } else if (op == CSINN_OP_MAXPOOL2D) { + return csi_nn_rvv_maxpool2d_init; + } else if (op == CSINN_OP_AVGPOOL2D) { + return csi_nn_rvv_avgpool2d_init; + } else if (op == CSINN_OP_FULLYCONNECTED) { + return csi_nn_rvv_fullyconnected_init; + } else if (op == CSINN_OP_CONV2D_RELU) { + if (dtype == CSINN_DTYPE_INT8 || dtype == CSINN_DTYPE_INT4) { + return csi_nn_rvv_conv2d_init; + } + } else if (op == CSINN_OP_DEPTHWISE_CONV2D_RELU) { + if (dtype == CSINN_DTYPE_INT8 || dtype == CSINN_DTYPE_INT4) { + return csi_nn_rvv_depthwise_conv2d_init; + } + } + return NULL; +} + +static void *setup_bc_map() +{ + static void *bc_map[CSINN_OP_AND_UTILS_SIZE][4]; + + bc_map[CSINN_OP_ABS][3] = csi_ref_abs_f32; + bc_map[CSINN_OP_ACOS][3] = csi_ref_acos_f32; + bc_map[CSINN_OP_ACOSH][3] = csi_ref_acosh_f32; + bc_map[CSINN_OP_ADD][3] = csi_nn_rvv_add_fp32; + bc_map[CSINN_OP_ARANGE][3] = csi_ref_arange_f32; + bc_map[CSINN_OP_ARGMAX][3] = csi_ref_argmax_stride_i32_f32; + bc_map[CSINN_OP_ARGMIN][3] = csi_ref_argmin_stride_i32_f32; + bc_map[CSINN_OP_ASIN][3] = csi_ref_asin_f32; + bc_map[CSINN_OP_ASINH][3] = csi_ref_asinh_f32; + bc_map[CSINN_OP_ATAN][3] = csi_ref_atan_f32; + bc_map[CSINN_OP_ATANH][3] = csi_ref_atanh_f32; + bc_map[CSINN_OP_AVGPOOL2D][3] = csi_ref_avgpool2d_f32; + bc_map[CSINN_OP_AVGPOOL3D][3] = csi_ref_avgpool3d_f32; + bc_map[CSINN_OP_BN][3] = csi_ref_batch_normalization_f32; + bc_map[CSINN_OP_BATCH_TO_SPACE][3] = csi_ref_batch_to_space_f32; + bc_map[CSINN_OP_BROADCOST][3] = csi_ref_broadcast_to_f32; + bc_map[CSINN_OP_CEIL][3] = csi_ref_ceil_f32; + bc_map[CSINN_OP_CLIP][3] = csi_ref_clip_f32; + bc_map[CSINN_OP_COL2IM][3] = csi_ref_col2im_f32; + bc_map[CSINN_OP_CONCAT][3] = csi_nn_rvv_concat_fp32; + bc_map[CSINN_OP_CONV2D][3] = csi_ref_conv2d_f32; + bc_map[CSINN_OP_CONV2D_RELU][3] = csi_ref_conv2d_relu_f32; + bc_map[CSINN_OP_DEPTHWISE_CONV2D][3] = csi_ref_depthwise_conv2d_f32; + bc_map[CSINN_OP_DEPTHWISE_CONV2D_RELU][3] = csi_ref_depthwise_conv2d_relu_f32; + bc_map[CSINN_OP_GROUP_CONV2D][3] = csi_ref_group_conv2d_f32; + bc_map[CSINN_OP_CONV3D][3] = csi_ref_conv3d_f32; + bc_map[CSINN_OP_DECONV2D][3] = csi_ref_deconv2d_f32; + bc_map[CSINN_OP_DEPTHWISE_DECONV2D][3] = csi_ref_depthwise_deconv2d_f32; + bc_map[CSINN_OP_DECONV3D][3] = csi_ref_deconv3d_f32; + bc_map[CSINN_OP_COS][3] = csi_ref_cos_f32; + bc_map[CSINN_OP_COSH][3] = csi_ref_cosh_f32; + bc_map[CSINN_OP_CUMPROD][3] = csi_ref_cumprod_f32; + bc_map[CSINN_OP_CUMSUM][3] = csi_ref_cumsum_f32; + bc_map[CSINN_OP_DEPTH_TO_SPACE][3] = csi_ref_depth_to_space_f32; + bc_map[CSINN_OP_DIV][3] = csi_ref_div_f32; + bc_map[CSINN_OP_ELU][3] = csi_ref_elu_f32; + bc_map[CSINN_OP_EQUANL][3] = csi_ref_equal_f32; + bc_map[CSINN_OP_ERF][3] = csi_ref_erf_f32; + bc_map[CSINN_OP_EXP][3] = csi_ref_exp_f32; + bc_map[CSINN_OP_EXPAND_DIMS][3] = csi_ref_expand_dims_f32; + bc_map[CSINN_OP_EXPM1][3] = csi_ref_expm1_f32; + bc_map[CSINN_OP_FLATTEN][3] = csi_ref_flatten; + bc_map[CSINN_OP_FLOOR_DIVIDE][3] = csi_ref_floor_divide_f32; + bc_map[CSINN_OP_FLOOR_MOD][3] = csi_ref_floor_mod_f32; + bc_map[CSINN_OP_FLOOR][3] = csi_ref_floor_f32; + bc_map[CSINN_OP_FSMN][3] = csi_ref_fsmn_f32; + bc_map[CSINN_OP_FULLYCONNECTED][3] = csi_ref_fullyconnected_f32; + bc_map[CSINN_OP_GATHER_ND][3] = csi_ref_gather_nd_f32; + bc_map[CSINN_OP_GATHER][3] = csi_ref_gather_f32; + bc_map[CSINN_OP_GLOBAL_AVGPOOL2D][3] = csi_nn_rvv_global_avgpool2d_fp32; + bc_map[CSINN_OP_GLOBAL_MAXPOOL2D][3] = csi_ref_global_maxpool2d_f32; + bc_map[CSINN_OP_GREATHER_EQUAL][3] = csi_ref_greater_equal_f32; + bc_map[CSINN_OP_GREATHER][3] = csi_ref_greater_f32; + bc_map[CSINN_OP_HARD_SIGMOID][3] = csi_ref_hard_sigmoid_f32; + bc_map[CSINN_OP_IM2COL][3] = csi_ref_im2col_f32; + bc_map[CSINN_OP_ISNAN][3] = csi_ref_isnan_bool_f32; + bc_map[CSINN_OP_L2N][3] = csi_ref_l2_normalization_f32; + bc_map[CSINN_OP_L2POOL2D][3] = csi_ref_l2pool_f32; + bc_map[CSINN_OP_LEAKY_RELU][3] = csi_nn_rvv_leaky_relu_fp32; + bc_map[CSINN_OP_LESS_EQUAL][3] = csi_ref_less_equal_f32; + bc_map[CSINN_OP_LESS][3] = csi_ref_less_f32; + bc_map[CSINN_OP_LOG_SOFTMAX][3] = csi_ref_log_softmax_f32; + bc_map[CSINN_OP_LOG][3] = csi_ref_log_f32; + bc_map[CSINN_OP_LOG1P][3] = csi_ref_log1p_f32; + bc_map[CSINN_OP_LOGICAL_AND][3] = csi_ref_logical_and_f32; + bc_map[CSINN_OP_LOGICAL_NOT][3] = csi_ref_logical_not_f32; + bc_map[CSINN_OP_LOGICAL_OR][3] = csi_ref_logical_or_f32; + bc_map[CSINN_OP_LOGICAL_XOR][3] = csi_ref_logical_xor_f32; + bc_map[CSINN_OP_LRN][3] = csi_ref_lrn_f32; + bc_map[CSINN_OP_MATMUL][3] = csi_ref_matmul_f32; + bc_map[CSINN_OP_MAX][3] = csi_ref_max_stride_f32; + bc_map[CSINN_OP_MAXIMUM][3] = csi_ref_maximum_f32; + bc_map[CSINN_OP_MAXPOOL2D][3] = csi_ref_maxpool2d_f32; + bc_map[CSINN_OP_MAXPOOL2D_LOCAT][3] = csi_ref_maxpool2d_locat_f32; + bc_map[CSINN_OP_MAXPOOL3D][3] = csi_ref_maxpool3d_f32; + bc_map[CSINN_OP_MEAN][3] = csi_ref_mean_stride_f32; + bc_map[CSINN_OP_MEAN_STRIDE][3] = csi_ref_mean_stride_f32; + bc_map[CSINN_OP_MINIMUM][3] = csi_ref_minimum_f32; + bc_map[CSINN_OP_MOD][3] = csi_ref_mod_f32; + bc_map[CSINN_OP_MUL][3] = csi_ref_mul_f32; + bc_map[CSINN_OP_NDARRAY_SIZE][3] = csi_ref_ndarray_size_f32; + bc_map[CSINN_OP_NEGATIIVE][3] = csi_ref_negative_f32; + bc_map[CSINN_OP_NOT_EQUAL][3] = csi_ref_not_equal_f32; + bc_map[CSINN_OP_PAD][3] = csi_ref_pad_f32; + bc_map[CSINN_OP_POWER][3] = csi_ref_power_f32; + bc_map[CSINN_OP_PRELU][3] = csi_ref_prelu_f32; + bc_map[CSINN_OP_PROD][3] = csi_ref_prod_stride_f32; + bc_map[CSINN_OP_PROPOSAL][3] = csi_ref_proposal_f32; + bc_map[CSINN_OP_PSROIPOOLING][3] = csi_ref_psroipooling_f32; + bc_map[CSINN_OP_REDUCE_LOGSUMEXP][3] = csi_ref_reduce_logsumexp_f32; + bc_map[CSINN_OP_REDUCE_MAX][3] = csi_ref_reduce_max_f32; + bc_map[CSINN_OP_REDUCE_MEAN][3] = csi_ref_reduce_mean_f32; + bc_map[CSINN_OP_REDUCE_MIN][3] = csi_ref_reduce_min_f32; + bc_map[CSINN_OP_REDUCE_PROD][3] = csi_ref_reduce_prod_f32; + bc_map[CSINN_OP_REDUCE_SUM][3] = csi_ref_reduce_sum_f32; + bc_map[CSINN_OP_RELU][3] = csi_nn_rvv_relu_fp32; + bc_map[CSINN_OP_RELU1][3] = csi_ref_relu1_f32; + bc_map[CSINN_OP_RELU6][3] = csi_ref_relu6_f32; + bc_map[CSINN_OP_RELUN][3] = csi_ref_relun_f32; + bc_map[CSINN_OP_RESHAPE][3] = csi_ref_reshape; + bc_map[CSINN_OP_RESIZE][3] = csi_ref_resize_f32; + bc_map[CSINN_OP_REVERSE][3] = csi_ref_reverse_f32; + bc_map[CSINN_OP_ROIALIGN][3] = csi_ref_roi_align_f32; + bc_map[CSINN_OP_ROIPOOL][3] = csi_ref_roipool_f32; + bc_map[CSINN_OP_ROUND][3] = csi_ref_round_f32; + bc_map[CSINN_OP_RSQRT][3] = csi_ref_rsqrt_f32; + bc_map[CSINN_OP_SCATTER_ND][3] = csi_ref_scatter_nd_f32; + bc_map[CSINN_OP_SEGMENT_MAX][3] = csi_ref_segment_max_f32; + bc_map[CSINN_OP_UNSORTED_SEGMENT_MAX][3] = csi_ref_unsorted_segment_max_f32; + bc_map[CSINN_OP_SEGMENT_MEAN][3] = csi_ref_segment_mean_f32; + bc_map[CSINN_OP_UNSORTED_SEGMENT_MEAN][3] = csi_ref_unsorted_segment_mean_f32; + bc_map[CSINN_OP_SEGMENT_MIN][3] = csi_ref_segment_min_f32; + bc_map[CSINN_OP_UNSORTED_SEGMENT_MIN][3] = csi_ref_unsorted_segment_min_f32; + bc_map[CSINN_OP_SEGMENT_PROD][3] = csi_ref_segment_prod_f32; + bc_map[CSINN_OP_UNSORTED_SEGMENT_PROD][3] = csi_ref_unsorted_segment_prod_f32; + bc_map[CSINN_OP_SEGMENT_SUM][3] = csi_ref_segment_sum_f32; + bc_map[CSINN_OP_UNSORTED_SEGMENT_SUM][3] = csi_ref_unsorted_segment_sum_f32; + bc_map[CSINN_OP_SELECT][3] = csi_ref_select_f32; + bc_map[CSINN_OP_SHUFFLE_CHANNEL][3] = csi_ref_shuffle_channel_f32; + bc_map[CSINN_OP_SIGMOID][3] = csi_ref_sigmoid_f32; + bc_map[CSINN_OP_SIGN][3] = csi_ref_sign_f32; + bc_map[CSINN_OP_SIN][3] = csi_ref_sin_f32; + bc_map[CSINN_OP_SINH][3] = csi_ref_sinh_f32; + bc_map[CSINN_OP_SLICE][3] = csi_ref_slice_f32; + bc_map[CSINN_OP_SOFTMAX][3] = csi_ref_softmax_f32; + bc_map[CSINN_OP_SOFTPLUS][3] = csi_ref_softplus_f32; + bc_map[CSINN_OP_SOFTRELU][3] = csi_ref_softrelu_f32; + bc_map[CSINN_OP_SOFTSIGN][3] = csi_ref_softsign_f32; + bc_map[CSINN_OP_SPACE_TO_BATCH][3] = csi_ref_space_to_batch_f32; + bc_map[CSINN_OP_SPACE_TO_DEPTH][3] = csi_ref_space_to_depth_f32; + bc_map[CSINN_OP_SPLIT][3] = csi_ref_split_f32; + bc_map[CSINN_OP_SQRT][3] = csi_ref_sqrt_f32; + bc_map[CSINN_OP_SQUARE][3] = csi_ref_square_f32; + bc_map[CSINN_OP_SQUEEZE][3] = csi_ref_squeeze; + bc_map[CSINN_OP_STACK][3] = csi_ref_stack_f32; + bc_map[CSINN_OP_STRIDED_SLICE][3] = csi_ref_strided_slice_f32; + bc_map[CSINN_OP_SUB][3] = csi_ref_sub_f32; + bc_map[CSINN_OP_SUM][3] = csi_ref_sum_stride_f32; + bc_map[CSINN_OP_TAN][3] = csi_ref_tan_f32; + bc_map[CSINN_OP_TANH][3] = csi_ref_tanh_f32; + bc_map[CSINN_OP_THRESHOLD_RELU][3] = csi_ref_threshold_relu_f32; + bc_map[CSINN_OP_TILE][3] = csi_ref_tile_f32; + bc_map[CSINN_OP_TOPK][3] = csi_ref_topk_f32; + bc_map[CSINN_OP_TRUNC][3] = csi_ref_trunc_f32; + bc_map[CSINN_OP_TRANSPOSE][3] = csi_ref_transpose; + bc_map[CSINN_OP_TRUNC][3] = csi_ref_trunc_f32; + bc_map[CSINN_OP_UNPOOLING][3] = csi_ref_unpooling_f32; + bc_map[CSINN_OP_UNSTACK][3] = csi_ref_unstack_f32; + bc_map[CSINN_OP_YUV_RGB_SCALE][3] = csi_ref_yuv_rgb_scale_f32; + + for (int i = 0; i < 3; i++) { + bc_map[CSINN_OP_ABS][i] = csi_ref_abs_quant; + bc_map[CSINN_OP_ACOS][i] = csi_ref_acos_quant; + bc_map[CSINN_OP_ACOSH][i] = csi_ref_acosh_quant; + bc_map[CSINN_OP_ADD][i] = csi_ref_add_quant; + bc_map[CSINN_OP_ARANGE][i] = csi_ref_arange_quant; + bc_map[CSINN_OP_ARGMAX][i] = csi_ref_argmax_stride_quant; + bc_map[CSINN_OP_ARGMIN][i] = csi_ref_argmin_stride_quant; + bc_map[CSINN_OP_ASIN][i] = csi_ref_asin_quant; + bc_map[CSINN_OP_ASINH][i] = csi_ref_asinh_quant; + bc_map[CSINN_OP_ATAN][i] = csi_ref_atan_quant; + bc_map[CSINN_OP_ATANH][i] = csi_ref_atanh_quant; + bc_map[CSINN_OP_AVGPOOL2D][i] = csi_ref_avgpool2d_quant; + bc_map[CSINN_OP_AVGPOOL3D][i] = csi_ref_avgpool3d_quant; + bc_map[CSINN_OP_BN][i] = csi_ref_batch_normalization_quant; + bc_map[CSINN_OP_BATCH_TO_SPACE][i] = csi_ref_batch_to_space_quant; + bc_map[CSINN_OP_BROADCOST][i] = csi_ref_broadcast_to_quant; + bc_map[CSINN_OP_CEIL][i] = csi_ref_ceil_quant; + bc_map[CSINN_OP_CLIP][i] = csi_ref_clip_quant; + bc_map[CSINN_OP_CONCAT][i] = csi_ref_concat_quant; + bc_map[CSINN_OP_CONV2D][i] = csi_ref_conv2d_quant; + bc_map[CSINN_OP_CONV2D_RELU][i] = csi_ref_conv2d_relu_quant; + bc_map[CSINN_OP_CONV2D_RELU6][i] = csi_ref_conv2d_relu6_quant; + bc_map[CSINN_OP_DEPTHWISE_CONV2D][i] = csi_ref_depthwise_conv2d_quant; + bc_map[CSINN_OP_DEPTHWISE_CONV2D_RELU][i] = csi_ref_depthwise_conv2d_relu_quant; + bc_map[CSINN_OP_DEPTHWISE_CONV2D_RELU6][i] = csi_ref_depthwise_conv2d_relu6_quant; + bc_map[CSINN_OP_GROUP_CONV2D][i] = csi_ref_group_conv2d_quant; + bc_map[CSINN_OP_CONV3D][i] = csi_ref_conv3d_quant; + bc_map[CSINN_OP_DECONV2D][i] = csi_ref_deconv2d_quant; + bc_map[CSINN_OP_DEPTHWISE_DECONV2D][i] = csi_ref_depthwise_deconv2d_quant; + bc_map[CSINN_OP_DECONV3D][i] = csi_ref_deconv3d_quant; + bc_map[CSINN_OP_COS][i] = csi_ref_cos_quant; + bc_map[CSINN_OP_COSH][i] = csi_ref_cosh_quant; + bc_map[CSINN_OP_CUMPROD][i] = csi_ref_cumprod_quant; + bc_map[CSINN_OP_CUMSUM][i] = csi_ref_cumsum_quant; + bc_map[CSINN_OP_DEPTH_TO_SPACE][i] = csi_ref_depth_to_space_quant; + bc_map[CSINN_OP_DIV][i] = csi_ref_div_quant; + bc_map[CSINN_OP_ELU][i] = csi_ref_elu_quant; + bc_map[CSINN_OP_EQUANL][i] = csi_ref_equal_quant; + bc_map[CSINN_OP_ERF][i] = csi_ref_erf_quant; + bc_map[CSINN_OP_EXP][i] = csi_ref_exp_quant; + bc_map[CSINN_OP_EXPAND_DIMS][i] = csi_ref_expand_dims_quant; + bc_map[CSINN_OP_EXPM1][i] = csi_ref_expm1_quant; + bc_map[CSINN_OP_FLATTEN][i] = csi_ref_flatten; + bc_map[CSINN_OP_FLOOR_DIVIDE][i] = csi_ref_floor_divide_quant; + bc_map[CSINN_OP_FLOOR_MOD][i] = csi_ref_floor_mod_quant; + bc_map[CSINN_OP_FLOOR][i] = csi_ref_floor_quant; + bc_map[CSINN_OP_FSMN][i] = csi_ref_fsmn_quant; + bc_map[CSINN_OP_FULLYCONNECTED][i] = csi_ref_fullyconnected_quant; + bc_map[CSINN_OP_GATHER_ND][i] = csi_ref_gather_nd_quant; + bc_map[CSINN_OP_GATHER][i] = csi_ref_gather_quant; + bc_map[CSINN_OP_GLOBAL_AVGPOOL2D][i] = csi_ref_global_avgpool2d_quant; + bc_map[CSINN_OP_GLOBAL_MAXPOOL2D][i] = csi_ref_global_maxpool2d_quant; + bc_map[CSINN_OP_GREATHER_EQUAL][i] = csi_ref_greater_equal_quant; + bc_map[CSINN_OP_GREATHER][i] = csi_ref_greater_quant; + bc_map[CSINN_OP_HARD_SIGMOID][i] = csi_ref_hard_sigmoid_quant; + bc_map[CSINN_OP_IM2COL][i] = csi_ref_im2col_quant; + bc_map[CSINN_OP_L2N][i] = csi_ref_l2_normalization_quant; + bc_map[CSINN_OP_LEAKY_RELU][i] = csi_ref_leaky_relu_quant; + bc_map[CSINN_OP_LESS_EQUAL][i] = csi_ref_less_equal_quant; + bc_map[CSINN_OP_LESS][i] = csi_ref_less_quant; + bc_map[CSINN_OP_LOG_SOFTMAX][i] = csi_ref_log_softmax_quant; + bc_map[CSINN_OP_LOG][i] = csi_ref_log_quant; + bc_map[CSINN_OP_LOG1P][i] = csi_ref_log1p_quant; + bc_map[CSINN_OP_LOGICAL_AND][i] = csi_ref_logical_and_quant; + bc_map[CSINN_OP_LOGICAL_NOT][i] = csi_ref_logical_not_quant; + bc_map[CSINN_OP_LOGICAL_OR][i] = csi_ref_logical_or_quant; + bc_map[CSINN_OP_LOGICAL_XOR][i] = csi_ref_logical_xor_quant; + bc_map[CSINN_OP_LRN][i] = csi_ref_lrn_quant; + bc_map[CSINN_OP_MATMUL][i] = csi_ref_matmul_quant; + bc_map[CSINN_OP_MAX][i] = csi_ref_max_stride_quant; + bc_map[CSINN_OP_MAXIMUM][i] = csi_ref_maximum_quant; + bc_map[CSINN_OP_MAXPOOL2D][i] = csi_ref_maxpool2d_quant; + bc_map[CSINN_OP_MAXPOOL2D_LOCAT][i] = csi_ref_maxpool2d_locat_quant; + bc_map[CSINN_OP_MAXPOOL3D][i] = csi_ref_maxpool3d_quant; + bc_map[CSINN_OP_MEAN][i] = csi_ref_mean_stride_quant; + bc_map[CSINN_OP_MEAN_STRIDE][i] = csi_ref_mean_stride_quant; + bc_map[CSINN_OP_MIN][i] = csi_ref_min_stride_quant; + bc_map[CSINN_OP_MINIMUM][i] = csi_ref_minimum_quant; + bc_map[CSINN_OP_MOD][i] = csi_ref_mod_quant; + bc_map[CSINN_OP_MUL][i] = csi_ref_mul_quant; + bc_map[CSINN_OP_NEGATIIVE][i] = csi_ref_negative_quant; + bc_map[CSINN_OP_NOT_EQUAL][i] = csi_ref_not_equal_quant; + bc_map[CSINN_OP_PAD][i] = csi_ref_pad_quant; + bc_map[CSINN_OP_POWER][i] = csi_ref_power_quant; + bc_map[CSINN_OP_PRELU][i] = csi_ref_prelu_quant; + bc_map[CSINN_OP_PROD][i] = csi_ref_prod_stride_quant; + bc_map[CSINN_OP_PROPOSAL][i] = csi_ref_proposal_quant; + bc_map[CSINN_OP_PSROIPOOLING][i] = csi_ref_psroipooling_quant; + bc_map[CSINN_OP_REDUCE_LOGSUMEXP][i] = csi_ref_reduce_logsumexp_quant; + bc_map[CSINN_OP_REDUCE_MAX][i] = csi_ref_reduce_max_quant; + bc_map[CSINN_OP_REDUCE_MEAN][i] = csi_ref_reduce_mean_quant; + bc_map[CSINN_OP_REDUCE_MIN][i] = csi_ref_reduce_min_quant; + bc_map[CSINN_OP_REDUCE_PROD][i] = csi_ref_reduce_prod_quant; + bc_map[CSINN_OP_REDUCE_SUM][i] = csi_ref_reduce_sum_quant; + bc_map[CSINN_OP_RELU][i] = csi_ref_relu_quant; + bc_map[CSINN_OP_RELU1][i] = csi_ref_relu1_quant; + bc_map[CSINN_OP_RELU6][i] = csi_ref_relu6_quant; + bc_map[CSINN_OP_RELUN][i] = csi_ref_relun_quant; + bc_map[CSINN_OP_RESHAPE][i] = csi_ref_reshape_quant; + bc_map[CSINN_OP_RESIZE][i] = csi_ref_resize_quant; + bc_map[CSINN_OP_REVERSE][i] = csi_ref_reverse_quant; + bc_map[CSINN_OP_ROIPOOL][i] = csi_ref_roipool_quant; + bc_map[CSINN_OP_ROUND][i] = csi_ref_round_quant; + bc_map[CSINN_OP_RSQRT][i] = csi_ref_rsqrt_quant; + bc_map[CSINN_OP_SCATTER_ND][i] = csi_ref_scatter_nd_quant; + bc_map[CSINN_OP_SEGMENT_MAX][i] = csi_ref_segment_max_quant; + bc_map[CSINN_OP_UNSORTED_SEGMENT_MAX][i] = csi_ref_unsorted_segment_max_quant; + bc_map[CSINN_OP_SEGMENT_MEAN][i] = csi_ref_segment_mean_quant; + bc_map[CSINN_OP_UNSORTED_SEGMENT_MEAN][i] = csi_ref_unsorted_segment_mean_quant; + bc_map[CSINN_OP_SEGMENT_MIN][i] = csi_ref_segment_min_quant; + bc_map[CSINN_OP_UNSORTED_SEGMENT_MIN][i] = csi_ref_unsorted_segment_min_quant; + bc_map[CSINN_OP_SEGMENT_PROD][i] = csi_ref_segment_prod_quant; + bc_map[CSINN_OP_UNSORTED_SEGMENT_PROD][i] = csi_ref_unsorted_segment_prod_quant; + bc_map[CSINN_OP_SEGMENT_SUM][i] = csi_ref_segment_sum_quant; + bc_map[CSINN_OP_UNSORTED_SEGMENT_SUM][i] = csi_ref_unsorted_segment_sum_quant; + bc_map[CSINN_OP_SHUFFLE_CHANNEL][i] = csi_ref_shuffle_channel_quant; + bc_map[CSINN_OP_SIGMOID][i] = csi_ref_sigmoid_quant; + bc_map[CSINN_OP_SIGN][i] = csi_ref_sign_quant; + bc_map[CSINN_OP_SIN][i] = csi_ref_sin_quant; + bc_map[CSINN_OP_SINH][i] = csi_ref_sinh_quant; + bc_map[CSINN_OP_SLICE][i] = csi_ref_slice_quant; + bc_map[CSINN_OP_SOFTMAX][i] = csi_ref_softmax_quant; + bc_map[CSINN_OP_SOFTPLUS][i] = csi_ref_softplus_quant; + bc_map[CSINN_OP_SOFTRELU][i] = csi_ref_softrelu_quant; + bc_map[CSINN_OP_SOFTSIGN][i] = csi_ref_softsign_quant; + bc_map[CSINN_OP_SPACE_TO_BATCH][i] = csi_ref_space_to_batch_quant; + bc_map[CSINN_OP_SPACE_TO_DEPTH][i] = csi_ref_space_to_depth_quant; + bc_map[CSINN_OP_SPLIT][i] = csi_ref_split_quant; + bc_map[CSINN_OP_SQRT][i] = csi_ref_sqrt_quant; + bc_map[CSINN_OP_STACK][i] = csi_ref_stack_quant; + bc_map[CSINN_OP_STRIDED_SLICE][i] = csi_ref_strided_slice_quant; + bc_map[CSINN_OP_SUB][i] = csi_ref_sub_quant; + bc_map[CSINN_OP_SUM][i] = csi_ref_sum_stride_quant; + bc_map[CSINN_OP_TAN][i] = csi_ref_tan_quant; + bc_map[CSINN_OP_TANH][i] = csi_ref_tanh_quant; + bc_map[CSINN_OP_THRESHOLD_RELU][i] = csi_ref_threshold_relu_quant; + bc_map[CSINN_OP_TILE][i] = csi_ref_tile_quant; + bc_map[CSINN_OP_TOPK][i] = csi_ref_topk_quant; + bc_map[CSINN_OP_TRUNC][i] = csi_ref_trunc_quant; + bc_map[CSINN_OP_TRANSPOSE][i] = csi_ref_transpose_quant; + bc_map[CSINN_OP_TRUNC][i] = csi_ref_trunc_quant; + bc_map[CSINN_OP_UNPOOLING][i] = csi_ref_unpooling_quant; + bc_map[CSINN_OP_UNSTACK][i] = csi_ref_unstack_qunat; + bc_map[CSINN_OP_YUV_RGB_SCALE][i] = csi_ref_yuv_rgb_scale_quant; + } + // fp16 opt interface + bc_map[CSINN_OP_ADD][2] = csi_nn_rvv_add_fp16; + bc_map[CSINN_OP_CONCAT][2] = csi_nn_rvv_concat_fp16; + bc_map[CSINN_OP_GLOBAL_AVGPOOL2D][2] = csi_nn_rvv_global_avgpool2d_fp16; + bc_map[CSINN_OP_LEAKY_RELU][2] = csi_nn_rvv_leaky_relu_fp16; + bc_map[CSINN_OP_RELU][2] = csi_nn_rvv_relu_fp16; + // int8 opt interface + bc_map[CSINN_OP_ADD][1] = csi_nn_rvv_add_int8; + bc_map[CSINN_OP_CONCAT][1] = csi_nn_rvv_concat_int8; + bc_map[CSINN_OP_LEAKY_RELU][1] = csi_nn_rvv_leaky_relu_int8; + bc_map[CSINN_OP_RELU][1] = csi_nn_rvv_relu_int8; + // int4 opt interface + + return bc_map; +} + +static int get_bc_map_index(int op, int dtype) +{ + switch (dtype) { + case CSINN_DTYPE_INT4: + return op * 4; + break; + case CSINN_DTYPE_INT8: + return op * 4 + 1; + break; + case CSINN_DTYPE_FLOAT16: + return op * 4 + 2; + break; + case CSINN_DTYPE_FLOAT32: + return op * 4 + 3; + break; + default: + return CSINN_UNSUPPORT_DTYPE; + } +} + +void *csi_bc_map_rvv(int op, int dtype) +{ + static int has_init; + static void **bc_map_table; + if (has_init == 0) { + bc_map_table = setup_bc_map(); + has_init = 1; + } + return bc_map_table[get_bc_map_index(op, dtype)]; +} diff --git a/source/thead_rvv/sgemm.c b/source/thead_rvv/sgemm.c new file mode 100644 index 00000000..148ea628 --- /dev/null +++ b/source/thead_rvv/sgemm.c @@ -0,0 +1,969 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +/************************************************************* + note: VLEN = 128 +*************************************************************/ +void csi_nn_rvv_reorder_kernel_n8_fp32(float *a, float *sa, int m, int k, int ldx) +{ + int i = 0; + for (; i + 7 < m; i += 8) { + for (int j = 0; j < k; j++) { + sa[i * k + 8 * j + 0] = a[(i + 0) * k + j]; + sa[i * k + 8 * j + 1] = a[(i + 1) * k + j]; + sa[i * k + 8 * j + 2] = a[(i + 2) * k + j]; + sa[i * k + 8 * j + 3] = a[(i + 3) * k + j]; + sa[i * k + 8 * j + 4] = a[(i + 4) * k + j]; + sa[i * k + 8 * j + 5] = a[(i + 5) * k + j]; + sa[i * k + 8 * j + 6] = a[(i + 6) * k + j]; + sa[i * k + 8 * j + 7] = a[(i + 7) * k + j]; + } + } + + for (; i + 3 < m; i += 4) { + for (int j = 0; j < k; j++) { + sa[i * k + 4 * j + 0] = a[(i + 0) * k + j]; + sa[i * k + 4 * j + 1] = a[(i + 1) * k + j]; + sa[i * k + 4 * j + 2] = a[(i + 2) * k + j]; + sa[i * k + 4 * j + 3] = a[(i + 3) * k + j]; + } + } + + for (; i + 1 < m; i += 2) { + for (int j = 0; j < k; j++) { + sa[i * k + 2 * j + 0] = a[(i + 0) * k + j]; + sa[i * k + 2 * j + 1] = a[(i + 1) * k + j]; + } + } + + for (; i < m; i++) { + for (int j = 0; j < k; j++) { + sa[i * k + 1 * j + 0] = a[(i + 0) * k + j]; + } + } +} + +/************************************************************** + * input—matrix: [k, n] + * src: b + * dst: sb + * Data arrangement: Z8 | | | + **************************************************************/ +void csi_nn_rvv_reorder_input_z8_fp32(float *b, float *sb, int k, int n, int ldx) +{ + int32_t vl = vsetvl_e32m2(8); + float *b0 = NULL; + int i = 0; + for (; i + 7 < n; i += 8) { + b0 = b + i; + for (int j = 0; j < k; j++) { + vfloat32m2_t _tmp = vle32_v_f32m2(b0, vl); + b0 += ldx; + vse32_v_f32m2(sb, _tmp, vl); + sb += 8; + } + } + + for (; i < n; i++) { + vl = vsetvl_e32m2(8); + b0 = b + i; + int j = 0; + for (; j + 7 < k; j += 8) { + vfloat32m2_t _tmp = vlse32_v_f32m2(b0, ldx * sizeof(float), vl); + b0 += 8 * ldx; + vse32_v_f32m2(sb, _tmp, vl); + sb += 8; + } + if (j < k) { + vl = vsetvl_e32m2(k & 7); + vfloat32m2_t _tmp = vlse32_v_f32m2(b0, ldx * sizeof(float), vl); + vse32_v_f32m2(sb, _tmp, vl); + sb += vl; + } + } +} + +/* + dst - output:[m, n] + sa - kernel: [m, k] + sb - input: [k, n] +*/ +void csi_nn_rvv_gemm_8x8_fp32(float *dst, const float *sa, const float *sb, int m, int k, int n, + int ldc, float *bias) +{ + float *kernel_data = (float *)sa; + float *input_data = (float *)sb; + float *output_data = dst; + + int flag_bias = 1; // default: conv2d layer include bias + if (bias == NULL) { + flag_bias = 0; + bias = (float *)csi_mem_alloc(m * sizeof(float)); + } + float *bias_ptr = bias; + + int vl; + + int i = 0; + // m8 loop + vl = vsetvl_e32m2(8); + for (; i + 7 < m; i += 8) { + float *in_ptr = input_data; + + float *out_ptr0 = output_data; + float *out_ptr1 = out_ptr0 + ldc; + float *out_ptr2 = out_ptr1 + ldc; + float *out_ptr3 = out_ptr2 + ldc; + float *out_ptr4 = out_ptr3 + ldc; + float *out_ptr5 = out_ptr4 + ldc; + float *out_ptr6 = out_ptr5 + ldc; + float *out_ptr7 = out_ptr6 + ldc; + + int j = 0; + // m8n8 loop + for (; j + 7 < n; j += 8) { + float *kernel_ptr = kernel_data; + vfloat32m2_t _acc0 = vfmv_v_f_f32m2(bias_ptr[0], vl); + vfloat32m2_t _acc1 = vfmv_v_f_f32m2(bias_ptr[1], vl); + vfloat32m2_t _acc2 = vfmv_v_f_f32m2(bias_ptr[2], vl); + vfloat32m2_t _acc3 = vfmv_v_f_f32m2(bias_ptr[3], vl); + vfloat32m2_t _acc4 = vfmv_v_f_f32m2(bias_ptr[4], vl); + vfloat32m2_t _acc5 = vfmv_v_f_f32m2(bias_ptr[5], vl); + vfloat32m2_t _acc6 = vfmv_v_f_f32m2(bias_ptr[6], vl); + vfloat32m2_t _acc7 = vfmv_v_f_f32m2(bias_ptr[7], vl); // init acc with bias_data + + for (int c = 0; c < k; c++) { + vfloat32m2_t _input = vle32_v_f32m2(in_ptr, vl); + + float k0 = kernel_ptr[0]; + float k1 = kernel_ptr[1]; + float k2 = kernel_ptr[2]; + float k3 = kernel_ptr[3]; + float k4 = kernel_ptr[4]; + float k5 = kernel_ptr[5]; + float k6 = kernel_ptr[6]; + float k7 = kernel_ptr[7]; + + _acc0 = vfmacc_vf_f32m2(_acc0, k0, _input, vl); + _acc1 = vfmacc_vf_f32m2(_acc1, k1, _input, vl); + _acc2 = vfmacc_vf_f32m2(_acc2, k2, _input, vl); + _acc3 = vfmacc_vf_f32m2(_acc3, k3, _input, vl); + _acc4 = vfmacc_vf_f32m2(_acc4, k4, _input, vl); + _acc5 = vfmacc_vf_f32m2(_acc5, k5, _input, vl); + _acc6 = vfmacc_vf_f32m2(_acc6, k6, _input, vl); + _acc7 = vfmacc_vf_f32m2(_acc7, k7, _input, vl); + + kernel_ptr += 8; + in_ptr += 8; + } + vse32_v_f32m2(out_ptr0, _acc0, vl); + vse32_v_f32m2(out_ptr1, _acc1, vl); + vse32_v_f32m2(out_ptr2, _acc2, vl); + vse32_v_f32m2(out_ptr3, _acc3, vl); + vse32_v_f32m2(out_ptr4, _acc4, vl); + vse32_v_f32m2(out_ptr5, _acc5, vl); + vse32_v_f32m2(out_ptr6, _acc6, vl); + vse32_v_f32m2(out_ptr7, _acc7, vl); + out_ptr0 += 8; + out_ptr1 += 8; + out_ptr2 += 8; + out_ptr3 += 8; + out_ptr4 += 8; + out_ptr5 += 8; + out_ptr6 += 8; + out_ptr7 += 8; + } + // m8n4 + for (; j + 3 < n; j += 4) { + vfloat32m2_t _acc0 = vle32_v_f32m2(bias_ptr, vl); + vfloat32m2_t _acc1 = vle32_v_f32m2(bias_ptr, vl); + vfloat32m2_t _acc2 = vle32_v_f32m2(bias_ptr, vl); + vfloat32m2_t _acc3 = vle32_v_f32m2(bias_ptr, vl); // init acc with bias_data + + float *kernel_ptr = kernel_data; + + float *in_ptr0 = in_ptr; + float *in_ptr1 = in_ptr0 + k; + float *in_ptr2 = in_ptr1 + k; + float *in_ptr3 = in_ptr2 + k; + + out_ptr1 = out_ptr0 + 1; + out_ptr2 = out_ptr0 + 2; + out_ptr3 = out_ptr0 + 3; + + for (int c = 0; c < k; c++) { + vfloat32m2_t _kernel = vle32_v_f32m2(kernel_ptr, vl); + _acc0 = vfmacc_vf_f32m2(_acc0, in_ptr0[c], _kernel, vl); + _acc1 = vfmacc_vf_f32m2(_acc1, in_ptr1[c], _kernel, vl); + _acc2 = vfmacc_vf_f32m2(_acc2, in_ptr2[c], _kernel, vl); + _acc3 = vfmacc_vf_f32m2(_acc3, in_ptr3[c], _kernel, vl); + kernel_ptr += 8; + } + vsse32_v_f32m2(out_ptr0, ldc * sizeof(float), _acc0, vl); + vsse32_v_f32m2(out_ptr1, ldc * sizeof(float), _acc1, vl); + vsse32_v_f32m2(out_ptr2, ldc * sizeof(float), _acc2, vl); + vsse32_v_f32m2(out_ptr3, ldc * sizeof(float), _acc3, vl); + out_ptr0 += 4; + in_ptr += 4 * k; + } + // m8n2 + for (; j + 1 < n; j += 2) { + vfloat32m2_t _acc0 = vle32_v_f32m2(bias_ptr, vl); + vfloat32m2_t _acc1 = vle32_v_f32m2(bias_ptr, vl); + + float *kernel_ptr = kernel_data; + + float *in_ptr0 = in_ptr; + float *in_ptr1 = in_ptr0 + k; + + out_ptr1 = out_ptr0 + 1; + + for (int c = 0; c < k; c++) { + vfloat32m2_t _kernel = vle32_v_f32m2(kernel_ptr, vl); + _acc0 = vfmacc_vf_f32m2(_acc0, in_ptr0[c], _kernel, vl); + _acc1 = vfmacc_vf_f32m2(_acc1, in_ptr1[c], _kernel, vl); + kernel_ptr += 8; + } + vsse32_v_f32m2(out_ptr0, ldc * sizeof(float), _acc0, vl); + vsse32_v_f32m2(out_ptr1, ldc * sizeof(float), _acc1, vl); + out_ptr0 += 2; + in_ptr += 2 * k; + } + // m8n1 + for (; j < n; j++) { + vfloat32m2_t _acc0 = vle32_v_f32m2(bias_ptr, vl); + float *kernel_ptr = kernel_data; + float *in_ptr0 = in_ptr; + + for (int c = 0; c < k; c++) { + vfloat32m2_t _kernel = vle32_v_f32m2(kernel_ptr, vl); + _acc0 = vfmacc_vf_f32m2(_acc0, in_ptr0[c], _kernel, vl); + kernel_ptr += 8; + } + vsse32_v_f32m2(out_ptr0, ldc * sizeof(float), _acc0, vl); + } + kernel_data += 8 * k; + output_data += 8 * ldc; + bias_ptr += 8; + } + + // m4 + for (; i + 3 < m; i += 4) { + vl = vsetvl_e32m2(8); + float *in_ptr = input_data; + + float *out_ptr0 = output_data; + float *out_ptr1 = out_ptr0 + ldc; + float *out_ptr2 = out_ptr1 + ldc; + float *out_ptr3 = out_ptr2 + ldc; + + int j = 0; + // m4n8 loop + for (; j + 7 < n; j += 8) { + float *kernel_ptr = kernel_data; + vfloat32m2_t _acc0 = vfmv_v_f_f32m2(bias_ptr[0], vl); + vfloat32m2_t _acc1 = vfmv_v_f_f32m2(bias_ptr[1], vl); + vfloat32m2_t _acc2 = vfmv_v_f_f32m2(bias_ptr[2], vl); + vfloat32m2_t _acc3 = vfmv_v_f_f32m2(bias_ptr[3], vl); + + for (int c = 0; c < k; c++) { + vfloat32m2_t _input = vle32_v_f32m2(in_ptr, vl); + float k0 = kernel_ptr[0]; + float k1 = kernel_ptr[1]; + float k2 = kernel_ptr[2]; + float k3 = kernel_ptr[3]; + + _acc0 = vfmacc_vf_f32m2(_acc0, k0, _input, vl); + _acc1 = vfmacc_vf_f32m2(_acc1, k1, _input, vl); + _acc2 = vfmacc_vf_f32m2(_acc2, k2, _input, vl); + _acc3 = vfmacc_vf_f32m2(_acc3, k3, _input, vl); + + kernel_ptr += 4; + in_ptr += 8; + } + vse32_v_f32m2(out_ptr0, _acc0, vl); + vse32_v_f32m2(out_ptr1, _acc1, vl); + vse32_v_f32m2(out_ptr2, _acc2, vl); + vse32_v_f32m2(out_ptr3, _acc3, vl); + out_ptr0 += 8; + out_ptr1 += 8; + out_ptr2 += 8; + out_ptr3 += 8; + } + vl = vsetvl_e32m1(4); + // m4n4 + for (; j + 3 < n; j += 4) { + vfloat32m1_t _acc0 = vle32_v_f32m1(bias_ptr, vl); + vfloat32m1_t _acc1 = vle32_v_f32m1(bias_ptr, vl); + vfloat32m1_t _acc2 = vle32_v_f32m1(bias_ptr, vl); + vfloat32m1_t _acc3 = vle32_v_f32m1(bias_ptr, vl); // init acc with bias_data + + float *kernel_ptr = kernel_data; + + float *in_ptr0 = in_ptr; + float *in_ptr1 = in_ptr0 + k; + float *in_ptr2 = in_ptr1 + k; + float *in_ptr3 = in_ptr2 + k; + + out_ptr1 = out_ptr0 + 1; + out_ptr2 = out_ptr0 + 2; + out_ptr3 = out_ptr0 + 3; + + for (int c = 0; c < k; c++) { + vfloat32m1_t _kernel = vle32_v_f32m1(kernel_ptr, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, in_ptr0[c], _kernel, vl); + _acc1 = vfmacc_vf_f32m1(_acc1, in_ptr1[c], _kernel, vl); + _acc2 = vfmacc_vf_f32m1(_acc2, in_ptr2[c], _kernel, vl); + _acc3 = vfmacc_vf_f32m1(_acc3, in_ptr3[c], _kernel, vl); + kernel_ptr += 4; + } + vsse32_v_f32m1(out_ptr0, ldc * sizeof(float), _acc0, vl); + vsse32_v_f32m1(out_ptr1, ldc * sizeof(float), _acc1, vl); + vsse32_v_f32m1(out_ptr2, ldc * sizeof(float), _acc2, vl); + vsse32_v_f32m1(out_ptr3, ldc * sizeof(float), _acc3, vl); + out_ptr0 += 4; + in_ptr += 4 * k; + } + // m4n2 + for (; j + 1 < n; j += 2) { + vfloat32m1_t _acc0 = vle32_v_f32m1(bias_ptr, vl); + vfloat32m1_t _acc1 = vle32_v_f32m1(bias_ptr, vl); // init acc with bias_data + + float *kernel_ptr = kernel_data; + float *in_ptr0 = in_ptr; + float *in_ptr1 = in_ptr0 + k; + out_ptr1 = out_ptr0 + 1; + + for (int c = 0; c < k; c++) { + vfloat32m1_t _kernel = vle32_v_f32m1(kernel_ptr, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, in_ptr0[c], _kernel, vl); + _acc1 = vfmacc_vf_f32m1(_acc1, in_ptr1[c], _kernel, vl); + kernel_ptr += 4; + } + vsse32_v_f32m1(out_ptr0, ldc * sizeof(float), _acc0, vl); + vsse32_v_f32m1(out_ptr1, ldc * sizeof(float), _acc1, vl); + out_ptr0 += 2; + in_ptr += 2 * k; + } + // m4n1 + for (; j < n; j++) { + vfloat32m1_t _acc0 = vle32_v_f32m1(bias_ptr, vl); // init acc with bias_data + float *kernel_ptr = kernel_data; + float *in_ptr0 = in_ptr; + + for (int c = 0; c < k; c++) { + vfloat32m1_t _kernel = vle32_v_f32m1(kernel_ptr, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, in_ptr0[c], _kernel, vl); + kernel_ptr += 4; + } + vsse32_v_f32m1(out_ptr0, ldc * sizeof(float), _acc0, vl); + } + kernel_data += 4 * k; + output_data += 4 * ldc; + bias_ptr += 4; + } + + // m2 + for (; i + 1 < m; i += 2) { + vl = vsetvl_e32m2(8); + float *in_ptr = input_data; + float *out_ptr0 = output_data; + float *out_ptr1 = out_ptr0 + ldc; + + int j = 0; + // m2n8 loop + for (; j + 7 < n; j += 8) { + float *kernel_ptr = kernel_data; + vfloat32m2_t _acc0 = vfmv_v_f_f32m2(bias_ptr[0], vl); + vfloat32m2_t _acc1 = vfmv_v_f_f32m2(bias_ptr[1], vl); + + for (int c = 0; c < k; c++) { + vfloat32m2_t _input = vle32_v_f32m2(in_ptr, vl); + float k0 = kernel_ptr[0]; + float k1 = kernel_ptr[1]; + _acc0 = vfmacc_vf_f32m2(_acc0, k0, _input, vl); + _acc1 = vfmacc_vf_f32m2(_acc1, k1, _input, vl); + kernel_ptr += 2; + in_ptr += 8; + } + vse32_v_f32m2(out_ptr0, _acc0, vl); + vse32_v_f32m2(out_ptr1, _acc1, vl); + out_ptr0 += 8; + out_ptr1 += 8; + } + + // TODO: rvv opt + for (; j < n; j++) { + float acc0 = bias_ptr[0]; + float acc1 = bias_ptr[1]; + for (int c = 0; c < k; c++) { + acc0 += kernel_data[2 * c] * in_ptr[c]; + acc1 += kernel_data[2 * c + 1] * in_ptr[c]; + } + *out_ptr0++ = acc0; + *out_ptr1++ = acc1; + in_ptr += k; + } + kernel_data += 2 * k; + output_data += 2 * ldc; + bias_ptr += 2; + } + + // m1 + for (; i < m; i++) { + vl = vsetvl_e32m2(8); + float *in_ptr = input_data; + float *out_ptr0 = output_data; + + int j = 0; + // m1n8 loop + for (; j + 7 < n; j += 8) { + float *kernel_ptr = kernel_data; + vfloat32m2_t _acc0 = vfmv_v_f_f32m2(bias_ptr[0], vl); + for (int c = 0; c < k; c++) { + vfloat32m2_t _input = vle32_v_f32m2(in_ptr, vl); + float k0 = kernel_ptr[0]; + _acc0 = vfmacc_vf_f32m2(_acc0, k0, _input, vl); + kernel_ptr += 1; + in_ptr += 8; + } + vse32_v_f32m2(out_ptr0, _acc0, vl); + out_ptr0 += 8; + } + + // TODO: rvv opt + for (; j < n; j++) { + float acc0 = bias_ptr[0]; + for (int c = 0; c < k; c++) { + acc0 += kernel_data[c] * in_ptr[c]; + } + *out_ptr0++ = acc0; + in_ptr += k; + } + } + + if (!flag_bias) { + csi_mem_free(bias); + bias = NULL; + } +} + +/************************************************************* + note: VLEN = 256 +*************************************************************/ +// kernel 数据排布 可复用 csi_nn_rvv_reorder_kernel_n8 + +void csi_nn_rvv256_reorder_input_z16_fp32(float *b, float *sb, int k, int n, int ldx) +{ + int vl = vsetvl_e32m2(16); + float *b0 = NULL; + int i = 0; + + // Z16 + for (; i + 15 < n; i += 16) { + b0 = b + i; + for (int j = 0; j < k; j++) { + vfloat32m2_t _tmp = vle32_v_f32m2(b0, vl); + b0 += ldx; + vse32_v_f32m2(sb, _tmp, vl); + sb += 16; + } + } + + // Z8 + for (; i + 7 < n; i += 8) { + vl = vsetvl_e32m1(8); + b0 = b + i; + for (int j = 0; j < k; j++) { + vfloat32m1_t _tmp = vle32_v_f32m1(b0, vl); + b0 += ldx; + vse32_v_f32m1(sb, _tmp, vl); + sb += 8; + } + } + + // col by col + for (; i < n; i++) { + vl = vsetvl_e32m2(16); + b0 = b + i; + int j = 0; + for (; j + 15 < k; j += 16) { + vfloat32m2_t _tmp = vlse32_v_f32m2(b0, ldx * sizeof(float), vl); + b0 += 16 * ldx; + vse32_v_f32m2(sb, _tmp, vl); + sb += 16; + } + if (j < k) { + vl = vsetvl_e32m2(k & 15); + vfloat32m2_t _tmp = vlse32_v_f32m2(b0, ldx * sizeof(float), vl); + vse32_v_f32m2(sb, _tmp, vl); + sb += vl; + } + } +} + +void csi_nn_rvv256_gemm_8x16_fp32(float *dst, const float *sa, const float *sb, int m, int k, int n, + int ldc, float *bias) +{ + float *kernel_data = (float *)sa; + float *input_data = (float *)sb; + float *output_data = dst; + + int flag_bias = 1; // default: conv2d layer include bias + if (bias == NULL) { + flag_bias = 0; + bias = (float *)csi_mem_alloc(m * sizeof(float)); + } + float *bias_ptr = bias; + + int vl; + + int i = 0; + // m8 loop + for (; i + 7 < m; i += 8) { + vl = vsetvl_e32m2(16); + + float *in_ptr = input_data; + + float *out_ptr0 = output_data; + float *out_ptr1 = out_ptr0 + ldc; + float *out_ptr2 = out_ptr1 + ldc; + float *out_ptr3 = out_ptr2 + ldc; + float *out_ptr4 = out_ptr3 + ldc; + float *out_ptr5 = out_ptr4 + ldc; + float *out_ptr6 = out_ptr5 + ldc; + float *out_ptr7 = out_ptr6 + ldc; + + int j = 0; + // m8n16 loop + for (; j + 15 < n; j += 16) { + float *kernel_ptr = kernel_data; + vfloat32m2_t _acc0 = vfmv_v_f_f32m2(bias_ptr[0], vl); + vfloat32m2_t _acc1 = vfmv_v_f_f32m2(bias_ptr[1], vl); + vfloat32m2_t _acc2 = vfmv_v_f_f32m2(bias_ptr[2], vl); + vfloat32m2_t _acc3 = vfmv_v_f_f32m2(bias_ptr[3], vl); + vfloat32m2_t _acc4 = vfmv_v_f_f32m2(bias_ptr[4], vl); + vfloat32m2_t _acc5 = vfmv_v_f_f32m2(bias_ptr[5], vl); + vfloat32m2_t _acc6 = vfmv_v_f_f32m2(bias_ptr[6], vl); + vfloat32m2_t _acc7 = vfmv_v_f_f32m2(bias_ptr[7], vl); + + for (int c = 0; c < k; c++) { + vfloat32m2_t _input = vle32_v_f32m2(in_ptr, vl); + + float k0 = kernel_ptr[0]; + float k1 = kernel_ptr[1]; + float k2 = kernel_ptr[2]; + float k3 = kernel_ptr[3]; + float k4 = kernel_ptr[4]; + float k5 = kernel_ptr[5]; + float k6 = kernel_ptr[6]; + float k7 = kernel_ptr[7]; + + _acc0 = vfmacc_vf_f32m2(_acc0, k0, _input, vl); + _acc1 = vfmacc_vf_f32m2(_acc1, k1, _input, vl); + _acc2 = vfmacc_vf_f32m2(_acc2, k2, _input, vl); + _acc3 = vfmacc_vf_f32m2(_acc3, k3, _input, vl); + _acc4 = vfmacc_vf_f32m2(_acc4, k4, _input, vl); + _acc5 = vfmacc_vf_f32m2(_acc5, k5, _input, vl); + _acc6 = vfmacc_vf_f32m2(_acc6, k6, _input, vl); + _acc7 = vfmacc_vf_f32m2(_acc7, k7, _input, vl); + + kernel_ptr += 8; + in_ptr += 16; + } + vse32_v_f32m2(out_ptr0, _acc0, vl); + vse32_v_f32m2(out_ptr1, _acc1, vl); + vse32_v_f32m2(out_ptr2, _acc2, vl); + vse32_v_f32m2(out_ptr3, _acc3, vl); + vse32_v_f32m2(out_ptr4, _acc4, vl); + vse32_v_f32m2(out_ptr5, _acc5, vl); + vse32_v_f32m2(out_ptr6, _acc6, vl); + vse32_v_f32m2(out_ptr7, _acc7, vl); + out_ptr0 += 16; + out_ptr1 += 16; + out_ptr2 += 16; + out_ptr3 += 16; + out_ptr4 += 16; + out_ptr5 += 16; + out_ptr6 += 16; + out_ptr7 += 16; + } + + vl = vsetvl_e32m1(8); + + // m8n8 + for (; j + 7 < n; j += 8) { + float *kernel_ptr = kernel_data; + vfloat32m1_t _acc0 = vfmv_v_f_f32m1(bias_ptr[0], vl); + vfloat32m1_t _acc1 = vfmv_v_f_f32m1(bias_ptr[1], vl); + vfloat32m1_t _acc2 = vfmv_v_f_f32m1(bias_ptr[2], vl); + vfloat32m1_t _acc3 = vfmv_v_f_f32m1(bias_ptr[3], vl); + vfloat32m1_t _acc4 = vfmv_v_f_f32m1(bias_ptr[4], vl); + vfloat32m1_t _acc5 = vfmv_v_f_f32m1(bias_ptr[5], vl); + vfloat32m1_t _acc6 = vfmv_v_f_f32m1(bias_ptr[6], vl); + vfloat32m1_t _acc7 = vfmv_v_f_f32m1(bias_ptr[7], vl); + + for (int c = 0; c < k; c++) { + vfloat32m1_t _input = vle32_v_f32m1(in_ptr, vl); + + float k0 = kernel_ptr[0]; + float k1 = kernel_ptr[1]; + float k2 = kernel_ptr[2]; + float k3 = kernel_ptr[3]; + float k4 = kernel_ptr[4]; + float k5 = kernel_ptr[5]; + float k6 = kernel_ptr[6]; + float k7 = kernel_ptr[7]; + + _acc0 = vfmacc_vf_f32m1(_acc0, k0, _input, vl); + _acc1 = vfmacc_vf_f32m1(_acc1, k1, _input, vl); + _acc2 = vfmacc_vf_f32m1(_acc2, k2, _input, vl); + _acc3 = vfmacc_vf_f32m1(_acc3, k3, _input, vl); + _acc4 = vfmacc_vf_f32m1(_acc4, k4, _input, vl); + _acc5 = vfmacc_vf_f32m1(_acc5, k5, _input, vl); + _acc6 = vfmacc_vf_f32m1(_acc6, k6, _input, vl); + _acc7 = vfmacc_vf_f32m1(_acc7, k7, _input, vl); + + kernel_ptr += 8; + in_ptr += 8; + } + vse32_v_f32m1(out_ptr0, _acc0, vl); + vse32_v_f32m1(out_ptr1, _acc1, vl); + vse32_v_f32m1(out_ptr2, _acc2, vl); + vse32_v_f32m1(out_ptr3, _acc3, vl); + vse32_v_f32m1(out_ptr4, _acc4, vl); + vse32_v_f32m1(out_ptr5, _acc5, vl); + vse32_v_f32m1(out_ptr6, _acc6, vl); + vse32_v_f32m1(out_ptr7, _acc7, vl); + out_ptr0 += 8; + out_ptr1 += 8; + out_ptr2 += 8; + out_ptr3 += 8; + out_ptr4 += 8; + out_ptr5 += 8; + out_ptr6 += 8; + out_ptr7 += 8; + } + // m8n4 + for (; j + 3 < n; j += 4) { + vfloat32m1_t _acc0 = vle32_v_f32m1(bias_ptr, vl); + vfloat32m1_t _acc1 = vle32_v_f32m1(bias_ptr, vl); + vfloat32m1_t _acc2 = vle32_v_f32m1(bias_ptr, vl); + vfloat32m1_t _acc3 = vle32_v_f32m1(bias_ptr, vl); + + float *kernel_ptr = kernel_data; + + float *in_ptr0 = in_ptr; + float *in_ptr1 = in_ptr0 + k; + float *in_ptr2 = in_ptr1 + k; + float *in_ptr3 = in_ptr2 + k; + + out_ptr1 = out_ptr0 + 1; + out_ptr2 = out_ptr0 + 2; + out_ptr3 = out_ptr0 + 3; + + for (int c = 0; c < k; c++) { + vfloat32m1_t _kernel = vle32_v_f32m1(kernel_ptr, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, in_ptr0[c], _kernel, vl); + _acc1 = vfmacc_vf_f32m1(_acc1, in_ptr1[c], _kernel, vl); + _acc2 = vfmacc_vf_f32m1(_acc2, in_ptr2[c], _kernel, vl); + _acc3 = vfmacc_vf_f32m1(_acc3, in_ptr3[c], _kernel, vl); + kernel_ptr += 8; + } + vsse32_v_f32m1(out_ptr0, ldc * sizeof(float), _acc0, vl); + vsse32_v_f32m1(out_ptr1, ldc * sizeof(float), _acc1, vl); + vsse32_v_f32m1(out_ptr2, ldc * sizeof(float), _acc2, vl); + vsse32_v_f32m1(out_ptr3, ldc * sizeof(float), _acc3, vl); + out_ptr0 += 4; + in_ptr += 4 * k; + } + // m8n2 + for (; j + 1 < n; j += 2) { + vfloat32m1_t _acc0 = vle32_v_f32m1(bias_ptr, vl); + vfloat32m1_t _acc1 = vle32_v_f32m1(bias_ptr, vl); + + float *kernel_ptr = kernel_data; + + float *in_ptr0 = in_ptr; + float *in_ptr1 = in_ptr0 + k; + + out_ptr1 = out_ptr0 + 1; + + for (int c = 0; c < k; c++) { + vfloat32m1_t _kernel = vle32_v_f32m1(kernel_ptr, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, in_ptr0[c], _kernel, vl); + _acc1 = vfmacc_vf_f32m1(_acc1, in_ptr1[c], _kernel, vl); + kernel_ptr += 8; + } + vsse32_v_f32m1(out_ptr0, ldc * sizeof(float), _acc0, vl); + vsse32_v_f32m1(out_ptr1, ldc * sizeof(float), _acc1, vl); + out_ptr0 += 2; + in_ptr += 2 * k; + } + // m8n1 + for (; j < n; j++) { + vfloat32m1_t _acc0 = vle32_v_f32m1(bias_ptr, vl); + float *kernel_ptr = kernel_data; + float *in_ptr0 = in_ptr; + + for (int c = 0; c < k; c++) { + vfloat32m1_t _kernel = vle32_v_f32m1(kernel_ptr, vl); + _acc0 = vfmacc_vf_f32m1(_acc0, in_ptr0[c], _kernel, vl); + kernel_ptr += 8; + } + vsse32_v_f32m1(out_ptr0, ldc * sizeof(__fp16), _acc0, vl); + } + kernel_data += 8 * k; + output_data += 8 * ldc; + bias_ptr += 8; + } + + // m4 + for (; i + 3 < m; i += 4) { + vl = vsetvl_e32m2(16); + + float *in_ptr = input_data; + + float *out_ptr0 = output_data; + float *out_ptr1 = out_ptr0 + ldc; + float *out_ptr2 = out_ptr1 + ldc; + float *out_ptr3 = out_ptr2 + ldc; + + int j = 0; + // m4n16 loop + for (; j + 15 < n; j += 16) { + float *kernel_ptr = kernel_data; + vfloat32m2_t _acc0 = vfmv_v_f_f32m2(bias_ptr[0], vl); + vfloat32m2_t _acc1 = vfmv_v_f_f32m2(bias_ptr[1], vl); + vfloat32m2_t _acc2 = vfmv_v_f_f32m2(bias_ptr[2], vl); + vfloat32m2_t _acc3 = vfmv_v_f_f32m2(bias_ptr[3], vl); + + for (int c = 0; c < k; c++) { + vfloat32m2_t _input = vle32_v_f32m2(in_ptr, vl); + + float k0 = kernel_ptr[0]; + float k1 = kernel_ptr[1]; + float k2 = kernel_ptr[2]; + float k3 = kernel_ptr[3]; + _acc0 = vfmacc_vf_f32m2(_acc0, k0, _input, vl); + _acc1 = vfmacc_vf_f32m2(_acc1, k1, _input, vl); + _acc2 = vfmacc_vf_f32m2(_acc2, k2, _input, vl); + _acc3 = vfmacc_vf_f32m2(_acc3, k3, _input, vl); + + kernel_ptr += 4; + in_ptr += 16; + } + vse32_v_f32m2(out_ptr0, _acc0, vl); + vse32_v_f32m2(out_ptr1, _acc1, vl); + vse32_v_f32m2(out_ptr2, _acc2, vl); + vse32_v_f32m2(out_ptr3, _acc3, vl); + out_ptr0 += 16; + out_ptr1 += 16; + out_ptr2 += 16; + out_ptr3 += 16; + } + + // m4n8 + for (; j + 7 < n; j += 8) { + vl = vsetvl_e16m1(8); + + float *kernel_ptr = kernel_data; + vfloat32m1_t _acc0 = vfmv_v_f_f32m1(bias_ptr[0], vl); + vfloat32m1_t _acc1 = vfmv_v_f_f32m1(bias_ptr[1], vl); + vfloat32m1_t _acc2 = vfmv_v_f_f32m1(bias_ptr[2], vl); + vfloat32m1_t _acc3 = vfmv_v_f_f32m1(bias_ptr[3], vl); + for (int c = 0; c < k; c++) { + vfloat32m1_t _input = vle32_v_f32m1(in_ptr, vl); + + float k0 = kernel_ptr[0]; + float k1 = kernel_ptr[1]; + float k2 = kernel_ptr[2]; + float k3 = kernel_ptr[3]; + + _acc0 = vfmacc_vf_f32m1(_acc0, k0, _input, vl); + _acc1 = vfmacc_vf_f32m1(_acc1, k1, _input, vl); + _acc2 = vfmacc_vf_f32m1(_acc2, k2, _input, vl); + _acc3 = vfmacc_vf_f32m1(_acc3, k3, _input, vl); + + kernel_ptr += 4; + in_ptr += 8; + } + vse32_v_f32m1(out_ptr0, _acc0, vl); + vse32_v_f32m1(out_ptr1, _acc1, vl); + vse32_v_f32m1(out_ptr2, _acc2, vl); + vse32_v_f32m1(out_ptr3, _acc3, vl); + + out_ptr0 += 8; + out_ptr1 += 8; + out_ptr2 += 8; + out_ptr3 += 8; + } + + // TODO: rvv opt + for (; j < n; j++) { + float acc0 = bias_ptr[0]; + float acc1 = bias_ptr[1]; + float acc2 = bias_ptr[2]; + float acc3 = bias_ptr[3]; + for (int c = 0; c < k; c++) { + acc0 += kernel_data[4 * c] * in_ptr[c]; + acc1 += kernel_data[4 * c + 1] * in_ptr[c]; + acc2 += kernel_data[4 * c + 2] * in_ptr[c]; + acc3 += kernel_data[4 * c + 3] * in_ptr[c]; + } + *out_ptr0++ = acc0; + *out_ptr1++ = acc1; + *out_ptr2++ = acc2; + *out_ptr3++ = acc3; + in_ptr += k; + } + kernel_data += 4 * k; + output_data += 4 * ldc; + bias_ptr += 4; + } + + // m2 + for (; i + 1 < m; i += 2) { + vl = vsetvl_e16m2(16); + + float *in_ptr = input_data; + float *out_ptr0 = output_data; + float *out_ptr1 = out_ptr0 + ldc; + + int j = 0; + // m2n16 loop + for (; j + 15 < n; j += 16) { + float *kernel_ptr = kernel_data; + vfloat32m2_t _acc0 = vfmv_v_f_f32m2(bias_ptr[0], vl); + vfloat32m2_t _acc1 = vfmv_v_f_f32m2(bias_ptr[1], vl); + + for (int c = 0; c < k; c++) { + vfloat32m2_t _input = vle32_v_f32m2(in_ptr, vl); + + float k0 = kernel_ptr[0]; + float k1 = kernel_ptr[1]; + _acc0 = vfmacc_vf_f32m2(_acc0, k0, _input, vl); + _acc1 = vfmacc_vf_f32m2(_acc1, k1, _input, vl); + kernel_ptr += 2; + in_ptr += 16; + } + vse32_v_f32m2(out_ptr0, _acc0, vl); + vse32_v_f32m2(out_ptr1, _acc1, vl); + out_ptr0 += 16; + out_ptr1 += 16; + } + + vl = vsetvl_e32m1(8); + // m2n8 + for (; j + 7 < n; j += 8) { + float *kernel_ptr = kernel_data; + vfloat32m1_t _acc0 = vfmv_v_f_f32m1(bias_ptr[0], vl); + vfloat32m1_t _acc1 = vfmv_v_f_f32m1(bias_ptr[1], vl); + for (int c = 0; c < k; c++) { + vfloat32m1_t _input = vle32_v_f32m1(in_ptr, vl); + + float k0 = kernel_ptr[0]; + float k1 = kernel_ptr[1]; + _acc0 = vfmacc_vf_f32m1(_acc0, k0, _input, vl); + _acc1 = vfmacc_vf_f32m1(_acc1, k1, _input, vl); + + kernel_ptr += 2; + in_ptr += 8; + } + vse32_v_f32m1(out_ptr0, _acc0, vl); + vse32_v_f32m1(out_ptr1, _acc1, vl); + out_ptr0 += 8; + out_ptr1 += 8; + } + + // TODO: rvv opt + for (; j < n; j++) { + float acc0 = bias_ptr[0]; + float acc1 = bias_ptr[1]; + for (int c = 0; c < k; c++) { + acc0 += kernel_data[2 * c] * in_ptr[c]; + acc1 += kernel_data[2 * c + 1] * in_ptr[c]; + } + *out_ptr0++ = acc0; + *out_ptr1++ = acc1; + in_ptr += k; + } + kernel_data += 2 * k; + output_data += 2 * ldc; + bias_ptr += 2; + } + + // m1 + for (; i < m; i++) { + vl = vsetvl_e32m2(16); + + float *in_ptr = input_data; + float *out_ptr0 = output_data; + + int j = 0; + // m1n16 loop + for (; j + 15 < n; j += 16) { + float *kernel_ptr = kernel_data; + vfloat32m2_t _acc0 = vfmv_v_f_f32m2(bias_ptr[0], vl); + + for (int c = 0; c < k; c++) { + vfloat32m2_t _input = vle32_v_f32m2(in_ptr, vl); + float k0 = kernel_ptr[0]; + _acc0 = vfmacc_vf_f32m2(_acc0, k0, _input, vl); + kernel_ptr += 1; + in_ptr += 16; + } + vse32_v_f32m2(out_ptr0, _acc0, vl); + out_ptr0 += 16; + } + + vl = vsetvl_e32m1(8); + // m1n8 + for (; j + 7 < n; j += 8) { + float *kernel_ptr = kernel_data; + vfloat32m1_t _acc0 = vfmv_v_f_f32m1(bias_ptr[0], vl); + for (int c = 0; c < k; c++) { + vfloat32m1_t _input = vle32_v_f32m1(in_ptr, vl); + float k0 = kernel_ptr[0]; + _acc0 = vfmacc_vf_f32m1(_acc0, k0, _input, vl); + kernel_ptr += 1; + in_ptr += 8; + } + vse32_v_f32m1(out_ptr0, _acc0, vl); + out_ptr0 += 8; + } + // TODO: rvv opt + for (; j < n; j++) { + float acc0 = bias_ptr[0]; + for (int c = 0; c < k; c++) { + acc0 += kernel_data[c] * in_ptr[c]; + } + *out_ptr0++ = acc0; + in_ptr += k; + } + } + + if (!flag_bias) { + csi_mem_free(bias); + bias = NULL; + } +} diff --git a/source/thead_rvv/sigmoid.c b/source/thead_rvv/sigmoid.c new file mode 100644 index 00000000..5cb8575d --- /dev/null +++ b/source/thead_rvv/sigmoid.c @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" +#include "rvv_mathfun.h" + +int csi_nn_rvv_sigmoid_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct sigmoid_params *params) +{ + __fp16 *input_data = input->data; + __fp16 *output_data = output->data; + int size = 1; + for (int i = 0; i < input->dim_count; i++) { + size = size * input->dim[i]; + } + + while (size > 0) { + size_t vl = vsetvl_e16m2(size); + + vfloat16m2_t _val = vle16_v_f16m2(input_data, vl); // val + _val = vfmul_vf_f16m2(_val, -1.0f, 16); + vfloat16m2_t _output_data = exp_ps_vfloat16m2(_val, vl); + _output_data = vfadd_vf_f16m2(_output_data, 1.0f, vl); + _output_data = vfrdiv_vf_f16m2(_output_data, 1.0f, vl); + vse16_v_f16m2(output_data, _output_data, vl); + + input_data += vl; + output_data += vl; + size -= vl; + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/softmax.c b/source/thead_rvv/softmax.c new file mode 100644 index 00000000..3d860e90 --- /dev/null +++ b/source/thead_rvv/softmax.c @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" +#include "rvv_mathfun.h" + +int csi_nn_rvv_softmax_fp16(struct csi_tensor *input, struct csi_tensor *output, + struct softmax_params *params) +{ + __fp16 *input_data = (__fp16 *)input->data; + __fp16 *output_data = (__fp16 *)output->data; + + int axis = params->axis; + // FlatSize() = outer_size * inner_size * cnt; + int64_t outer_size = 1; + for (int i = 0; i < axis; i++) { + outer_size *= input->dim[i]; + } + + int64_t inner_size = 1; + for (int i = axis + 1; i < input->dim_count; i++) { + inner_size *= input->dim[i]; + } + + int cnt = input->dim[axis]; + + for (int i = 0; i < outer_size; i++) { + for (int k = 0; k < inner_size; k++) { + __fp16 acc_exp = 0.0f; + __fp16 max = -FLT_MAX; + // Find max element value which we'll use to ensure numerical stability + // taking advantage of the following equality: + // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C)) + + int n = cnt; + __fp16 *ptr = input_data + k; + vfloat16m2_t _output_data = vfmv_v_f_f16m2(max, 16); + int j = 0; + for (; j + 15 < cnt; j += 16) { + vfloat16m2_t _input_data = vlse16_v_f16m2(ptr, 2 * inner_size, 16); + _output_data = vfmax_vv_f16m2(_input_data, _output_data, 16); + ptr += 16; + } + vfloat16m1_t _min_f = vfmv_v_f_f16m1(-FLT_MAX, 8); + vfloat16m1_t _max = vfredmax_vs_f16m2_f16m1(_min_f, _output_data, _min_f, 16); + max = vfmv_f_s_f16m1_f16(_max); + + for (; j < cnt; j++) { + max = fmax(max, *(input_data + j * inner_size + k)); + } + + n = cnt; + ptr = input_data + k; + vfloat16m2_t _sum = vfmv_v_f_f16m2(0.0f, 16); + + j = 0; + for (; j + 15 < cnt; j += 16) { + vfloat16m2_t _input_data = vlse16_v_f16m2(ptr, 2 * inner_size, 16); + _input_data = vfsub_vf_f16m2(_input_data, max, 16); + vfloat16m2_t _output_data = exp_ps_vfloat16m2(_input_data, 16); + _sum = vfadd_vv_f16m2(_sum, _output_data, 16); + ptr += 16; + } + vfloat16m1_t _0_f = vfmv_v_f_f16m1(0.0f, 8); + vfloat16m1_t _sum2 = vfredosum_vs_f16m2_f16m1(_0_f, _sum, _0_f, 16); + acc_exp = vfmv_f_s_f16m1_f16(_sum2); + + for (; j < cnt; j++) { + acc_exp += exp(*(input_data + j * inner_size + k) - max); + } + n = cnt; + ptr = input_data + k; + __fp16 *ptr2 = output_data + k; + while (n > 0) { + size_t vl = vsetvl_e16m2(n); + + vfloat16m2_t _input_data = vlse16_v_f16m2(ptr, 2 * inner_size, vl); + _input_data = vfsub_vf_f16m2(_input_data, max, vl); + vfloat16m2_t _output_data = exp_ps_vfloat16m2(_input_data, vl); + _output_data = vfdiv_vf_f16m2(_output_data, acc_exp, vl); + vsse16_v_f16m2(ptr2, 2 * inner_size, _output_data, vl); + + ptr += vl; + ptr2 += vl; + n -= vl; + } + } + input_data += inner_size * cnt; + output_data += inner_size * cnt; + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/sum.c b/source/thead_rvv/sum.c new file mode 100644 index 00000000..a113c536 --- /dev/null +++ b/source/thead_rvv/sum.c @@ -0,0 +1,127 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +int csi_nn_rvv_sum_stride_int8(struct csi_tensor *input, struct csi_tensor *output, + struct reduce_params *params) +{ + int8_t *input_data = (int8_t *)input->data; + int8_t *output_data = (int8_t *)output->data; + + // TODO: move to init api + float real_scale = input->qinfo->scale / output->qinfo->scale; + csi_quantize_multiplier(real_scale, &output->qinfo->multiplier, &output->qinfo->shift); + + if (*(params->axis) == -1) { + int size = 1; + for (int i = 0; i < input->dim_count; i++) { + size = size * input->dim[i]; + } + float res = 0; + for (int j = 0; j < size; j++) { + float temp = (input_data[j] - input->qinfo->zero_point) * input->qinfo->scale; + res = res + temp; + } + float ret = round(res / output->qinfo->scale) + output->qinfo->zero_point; + if (ret > 127) + ret = 127; + else if (ret < -128) + ret = -128; + *output_data = (int8_t)ret; + } else { + int axis = *(params->axis); + int64_t outer_size = 1; + for (int i = 0; i < axis; i++) { + outer_size *= input->dim[i]; + } + int64_t inner_size = 1; + for (int i = axis + 1; i < input->dim_count; i++) { + inner_size *= input->dim[i]; + } + int cnt = input->dim[axis]; + + for (int i = 0; i < outer_size; i++) { + int packn = csrr_vlenb() / sizeof(int8_t); + int vl = vsetvl_e8m1(packn); + int k = 0; + for (; k + packn - 1 < inner_size; k += packn) { + int8_t *in_ptr_start = input_data + k; + vint32m4_t _acc = vmv_v_x_i32m4(0, vl); + for (int j = 0; j < cnt; j++) { + vint8m1_t _input = vle8_v_i8m1(in_ptr_start, vl); + vint16m2_t _input1 = vwadd_vx_i16m2(_input, 0, vl); // widden 8->16 + vint32m4_t _input2 = vwadd_vx_i32m4(_input1, 0, vl); // widden 16->32 + + vint32m4_t _tmp = vsub_vx_i32m4(_input2, input->qinfo->zero_point, vl); + _acc = vadd_vv_i32m4(_acc, _tmp, vl); + in_ptr_start += inner_size; + } + vint32m4_t _mulh = vmulh_vx_i32m4(_acc, output->qinfo->multiplier, vl); + vint32m4_t _res; + + if (output->qinfo->shift < 0) { + _res = vssra_vx_i32m4(_mulh, -output->qinfo->shift - 1, vl); + } else { + _res = vsll_vx_i32m4(_mulh, output->qinfo->shift + 1, vl); + } + + vint32m4_t _res0 = + vadd_vx_i32m4(_res, output->qinfo->zero_point, vl); // +z2 (z2 = -128) + vint16m2_t _res1 = vnclip_wx_i16m2(_res0, 0, vl); // narrow 32->16 + vint8m1_t _res2 = vnclip_wx_i8m1(_res1, 0, vl); // narrow 16->8 + vse8_v_i8m1(output_data, _res2, vl); + output_data += vl; + } + if (k < inner_size) { + vl = vsetvl_e8m1(inner_size & (packn - 1)); + int8_t *in_ptr_start = input_data + k; + vint32m4_t _acc = vmv_v_x_i32m4(0, vl); + for (int j = 0; j < cnt; j++) { + vint8m1_t _input = vle8_v_i8m1(in_ptr_start, vl); + vint16m2_t _input1 = vwadd_vx_i16m2(_input, 0, vl); // widden 8->16 + vint32m4_t _input2 = vwadd_vx_i32m4(_input1, 0, vl); // widden 16->32 + + vint32m4_t _tmp = vsub_vx_i32m4(_input2, input->qinfo->zero_point, vl); + _acc = vadd_vv_i32m4(_acc, _tmp, vl); + in_ptr_start += inner_size; + } + vint32m4_t _mulh = vmulh_vx_i32m4(_acc, output->qinfo->multiplier, vl); + vint32m4_t _res; + + if (output->qinfo->shift < 0) { + _res = vssra_vx_i32m4(_mulh, -output->qinfo->shift - 1, vl); + } else { + _res = vsll_vx_i32m4(_mulh, output->qinfo->shift + 1, vl); + } + + vint32m4_t _res0 = + vadd_vx_i32m4(_res, output->qinfo->zero_point, vl); // +z2 (z2 = -128) + vint16m2_t _res1 = vnclip_wx_i16m2(_res0, 0, vl); // narrow 32->16 + vint8m1_t _res2 = vnclip_wx_i8m1(_res1, 0, vl); // narrow 16->8 + vse8_v_i8m1(output_data, _res2, vl); + } + + input_data += inner_size * cnt; + output_data += inner_size; + } + } + return CSINN_TRUE; +} diff --git a/source/thead_rvv/utils.c b/source/thead_rvv/utils.c new file mode 100644 index 00000000..b4b3e790 --- /dev/null +++ b/source/thead_rvv/utils.c @@ -0,0 +1,373 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_thead_rvv.h" + +int csrr_vl() +{ + int a = 0; + asm volatile("csrr %0, vl" : "=r"(a) : : "memory"); + return a; +} + +int csrr_vlenb() +{ + int a = 0; + asm volatile("csrr %0, vlenb" : "=r"(a) : : "memory"); + return a; +} + +/* params: + input: origin input data + input_padded: input data after pad + inc: origin input channel + inh: origin input height + inw: origin input width + padded_h: input height after pad + padded_w: input width after pad + pad_top: origin pad top + pad_left: origin pad left +*/ +void csi_nn_rvv_pad_input_fp32(const float *input, float *input_padded, int inc, int inh, int inw, + int padded_h, int padded_w, int pad_top, int pad_left) +{ + int padded_hw = padded_h * padded_w; + + float *pad_ptr = input_padded; + float *inp_ptr = (float *)input; + int resi_h = padded_h - pad_top - inh; // remain to pad on h (pad_down) + int resi_w = padded_w - pad_left - inw; // remain to pad on w (pad_right) + int size; + int vl = vsetvl_e32m1(csrr_vlenb() / sizeof(float)); + vfloat32m1_t _zero = vfmv_v_f_f32m1(0.0f, vl); + + for (int c = 0; c < inc; c++) { + pad_ptr = input_padded + c * padded_hw; + // pad h_top + size = padded_w * pad_top; + while (size > 0) { + vl = vsetvl_e32m1(size); + vse32_v_f32m1(pad_ptr, _zero, vl); + pad_ptr += vl; + size -= vl; + } + // pad h_mid + for (int h = 0; h < inh; h++) { + // pad w_left + memset(pad_ptr, 0, pad_left * sizeof(float)); + pad_ptr += pad_left; + // pad w_mid + size = inw; + while (size > 0) { + vl = vsetvl_e32m1(size); + vfloat32m1_t _input = vle32_v_f32m1(inp_ptr, vl); + inp_ptr += vl; + vse32_v_f32m1(pad_ptr, _input, vl); + pad_ptr += vl; + size -= vl; + } + // pad w_end + memset(pad_ptr, 0, resi_w * sizeof(float)); + pad_ptr += resi_w; + } + // pad h_bottom + size = padded_w * resi_h; + while (size > 0) { + vl = vsetvl_e32m1(size); + vse32_v_f32m1(pad_ptr, _zero, vl); + pad_ptr += vl; + size -= vl; + } + } +} + +void csi_nn_rvv_pad_input_fp16(const __fp16 *input, __fp16 *input_padded, int inc, int inh, int inw, + int padded_h, int padded_w, int pad_top, int pad_left) +{ + int padded_hw = padded_h * padded_w; + + __fp16 *pad_ptr = input_padded; + __fp16 *inp_ptr = (__fp16 *)input; + int resi_h = padded_h - pad_top - inh; // remain to pad on h (pad_down) + int resi_w = padded_w - pad_left - inw; // remain to pad on w (pad_right) + int size; + int vl = vsetvl_e16m1(csrr_vlenb() / sizeof(__fp16)); + vfloat16m1_t _zero = vfmv_v_f_f16m1(0.0f, vl); + + for (int c = 0; c < inc; c++) { + pad_ptr = input_padded + c * padded_hw; + // pad h_top + size = padded_w * pad_top; + while (size > 0) { + vl = vsetvl_e16m1(size); + vse16_v_f16m1(pad_ptr, _zero, vl); + pad_ptr += vl; + size -= vl; + } + // pad h_mid + for (int h = 0; h < inh; h++) { + // pad w_left + memset(pad_ptr, 0, pad_left * sizeof(__fp16)); + pad_ptr += pad_left; + // pad w_mid + size = inw; + while (size > 0) { + vl = vsetvl_e16m1(size); + vfloat16m1_t _input = vle16_v_f16m1(inp_ptr, vl); + inp_ptr += vl; + vse16_v_f16m1(pad_ptr, _input, vl); + pad_ptr += vl; + size -= vl; + } + // pad w_end + memset(pad_ptr, 0, resi_w * sizeof(__fp16)); + pad_ptr += resi_w; + } + // pad h_bottom + size = padded_w * resi_h; + while (size > 0) { + vl = vsetvl_e16m1(size); + vse16_v_f16m1(pad_ptr, _zero, vl); + pad_ptr += vl; + size -= vl; + } + } +} + +void csi_nn_rvv_pad_input_int8(const int8_t *input, int8_t *input_padded, int inc, int inh, int inw, + int padded_h, int padded_w, int pad_top, int pad_left, + int8_t pad_value) +{ + int padded_hw = padded_h * padded_w; + + int8_t *pad_ptr = input_padded; + int8_t *inp_ptr = (int8_t *)input; + int resi_h = padded_h - pad_top - inh; // remain to pad on h (pad_down) + int resi_w = padded_w - pad_left - inw; // remain to pad on w (pad_right) + int size; + int vl = vsetvl_e8m1(csrr_vlenb() / sizeof(int8_t)); + vint8m1_t _pad_zero = vmv_v_x_i8m1(pad_value, vl); // float 0.0 -> input->zero_point + + for (int c = 0; c < inc; c++) { + pad_ptr = input_padded + c * padded_hw; + // pad h_top + size = padded_w * pad_top; + while (size > 0) { + vl = vsetvl_e8m1(size); + vse8_v_i8m1(pad_ptr, _pad_zero, vl); + pad_ptr += vl; + size -= vl; + } + // pad h_mid + for (int h = 0; h < inh; h++) { + // pad w_left + memset(pad_ptr, pad_value, pad_left * sizeof(int8_t)); + pad_ptr += pad_left; + // pad w_mid + size = inw; + while (size > 0) { + vl = vsetvl_e8m1(size); + vint8m1_t _input = vle8_v_i8m1(inp_ptr, vl); + inp_ptr += vl; + vse8_v_i8m1(pad_ptr, _input, vl); + pad_ptr += vl; + size -= vl; + } + // pad w_end + memset(pad_ptr, pad_value, resi_w * sizeof(int8_t)); + pad_ptr += resi_w; + } + // pad h_bottom + size = padded_w * resi_h; + while (size > 0) { + vl = vsetvl_e8m1(size); + vse8_v_i8m1(pad_ptr, _pad_zero, vl); + pad_ptr += vl; + size -= vl; + } + } +} + +/********************* for int8 quantization *********************/ +// add output_zeropint +void csi_nn_rvv_saturated_int8(int32_t *src, int8_t *dst, int32_t out_zp, int size) +{ + while (size > 0) { + int vl = vsetvl_e32m2(size); + vint32m4_t _tmp = vle32_v_i32m4(src, vl); + _tmp = vadd_vx_i32m4(_tmp, out_zp, vl); + + vint16m2_t _tmp1 = vnclip_wx_i16m2(_tmp, 0, vl); // narrow 32->16 + vint8m1_t _tmp2 = vnclip_wx_i8m1(_tmp1, 0, vl); // narrow 16->8 + + vse8_v_i8m1(dst, _tmp2, vl); + src += vl; + dst += vl; + size -= vl; + } +} + +// 再量化 int32 -> int8 +// (val * multiplier)/(2 ^ shift) +void csi_nn_rvv_requantize(int32_t *src, int32_t multiplier, int32_t shift, int channel_size) +{ + while (channel_size > 0) { + int vl = vsetvl_e32m4(channel_size); + vint32m4_t _val = vle32_v_i32m4(src, vl); + vint32m4_t _mulh = vmulh_vx_i32m4(_val, multiplier, vl); + vint32m4_t _res; + // FIXME: precision error + if (shift < 0) { + _res = vssra_vx_i32m4(_mulh, -shift - 1, vl); + } else { + _res = vsll_vx_i32m4(_mulh, shift + 1, vl); + } + vse32_v_i32m4(src, _res, vl); + src += vl; + channel_size -= vl; + } +} + +// 反量化 int32 -> float32 int8 -> float32 +void csi_nn_rvv_dequantize() +{ + ; +} + +/********************* int4 easter eggs *********************/ +void csi_nn_rvv_pad_input_int4_trans_int8(const int8_t *input, int8_t *input_padded, int inc, + int inh, int inw, int padded_h, int padded_w, int pad_top, + int pad_left, int8_t pad_value) +{ + int padded_hw = padded_h * padded_w; + + int8_t *pad_ptr = input_padded; + int8_t *inp_ptr = (int8_t *)input; + int resi_h = padded_h - pad_top - inh; // remain to pad on h (pad_down) + int resi_w = padded_w - pad_left - inw; // remain to pad on w (pad_right) + int size; + int vl = vsetvl_e8m1(csrr_vlenb() / sizeof(int8_t)); + vint8m1_t _pad_zero = vmv_v_x_i8m1(pad_value, vl); // float 0.0 -> input->zero_point + + // pad h_top + size = padded_w * pad_top * inc; + while (size > 0) { + vl = vsetvl_e8m1(size); + vse8_v_i8m1(pad_ptr, _pad_zero, vl); + pad_ptr += vl; + size -= vl; + } + // pad h_mid + for (int h = 0; h < inh; h++) { + // pad w_left + size = pad_left * inc; + memset(pad_ptr, pad_value, size * sizeof(int8_t)); + pad_ptr += size; + // pad w_mid + csi_nn_rvv_int4_trans_int8(inp_ptr, pad_ptr, inw * inc); + inp_ptr += inw * inc / 2; + pad_ptr += inw * inc; + // pad w_right + size = resi_w * inc; + memset(pad_ptr, pad_value, size * sizeof(int8_t)); + pad_ptr += size; + } + // pad h_bottom + size = padded_w * resi_h * inc; + while (size > 0) { + vl = vsetvl_e8m1(size); + vse8_v_i8m1(pad_ptr, _pad_zero, vl); + pad_ptr += vl; + size -= vl; + } +} + +// size: int4 number +// TODO: 这里是不是需要增加一条指令 +void csi_nn_rvv_int4_to_int8(int8_t *src, int8_t *dst, int size) +{ + int j = size / 2; + while (j > 0) { + int vl = vsetvl_e8m1(j); + vint8m1_t _input = vle8_v_i8m1(src, vl); + vint8m1_t _low = vand_vx_i8m1(_input, 0x0f, vl); + vint8m1_t _high_input = vsra_vx_i8m1(_input, 4, vl); + vint8m1_t _high = vand_vx_i8m1(_high_input, 0x0f, vl); + vsse8_v_i8m1(dst, 2 * sizeof(int8_t), _low, vl); + vsse8_v_i8m1(dst + 1, 2 * sizeof(int8_t), _high, vl); + + src += vl; + dst += 2 * vl; + j -= vl; + } + // tail, odd size + if (size & 1) { + *dst = *src; + } +} + +// size: int4 number +// todo: replace with vpnclip_wx inst +void csi_nn_rvv_int8_to_int4(int8_t *src, int8_t *dst, int size) +{ + int j = size / 2; + while (j > 0) { + int vl = vsetvl_e8m1(j); + vint8m1_t _low_tmp = vlse8_v_i8m1(src, 2 * sizeof(int8_t), vl); + vint8m1_t _high_tmp = vlse8_v_i8m1(src + 1, 2 * sizeof(int8_t), vl); + vint8m1_t _low = vand_vx_i8m1(_low_tmp, 0x0f, vl); + vint8m1_t _high = vsll_vx_i8m1(_high_tmp, 4, vl); + vint8m1_t _output = vor_vv_i8m1(_low, _high, vl); + vse8_v_i8m1(dst, _output, vl); + + src += 2 * vl; + dst += vl; + j -= vl; + } + // tail, odd size + if (size & 1) { + *dst = *src; + } +} + +// size: int4 number +// TODO: replace with vpwadd.vx inst +void csi_nn_rvv_int4_trans_int8(int8_t *src, int8_t *dst, int size) +{ + int j = size / 2; + while (j > 0) { + int vl = vsetvl_e8m1(j); + vint8m1_t _input = vle8_v_i8m1(src, vl); + vint8m1_t _low = vand_vx_i8m1(_input, 0x0f, vl); + vbool8_t _mask = vmsgt_vx_i8m1_b8(_low, 7, vl); + vint8m1_t _low_int8 = vsub_vx_i8m1_m(_mask, _low, _low, 16, vl); + vint8m1_t _high_int8 = vsra_vx_i8m1(_input, 4, vl); + vsse8_v_i8m1(dst, 2 * sizeof(int8_t), _low_int8, vl); + vsse8_v_i8m1(dst + 1, 2 * sizeof(int8_t), _high_int8, vl); + + src += vl; + dst += 2 * vl; + j -= vl; + } + // tail, odd size + if (size & 1) { + *dst = *src > 7 ? (*src - 16) : (*src); + } +} diff --git a/source/utils/atat_malloc.c b/source/utils/atat_malloc.c new file mode 100644 index 00000000..8d90a7ab --- /dev/null +++ b/source/utils/atat_malloc.c @@ -0,0 +1,137 @@ +/**************************************************************** +Copyright 1990, 1994, 2000 by AT&T, Lucent Technologies and Bellcore. + +Permission to use, copy, modify, and distribute this software +and its documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appear in all +copies and that both that the copyright notice and this +permission notice and warranty disclaimer appear in supporting +documentation, and that the names of AT&T, Bell Laboratories, +Lucent or Bellcore or any of their entities not be used in +advertising or publicity pertaining to distribution of the +software without specific, written prior permission. + +AT&T, Lucent and Bellcore disclaim all warranties with regard to +this software, including all implied warranties of +merchantability and fitness. In no event shall AT&T, Lucent or +Bellcore be liable for any special, indirect or consequential +damages or any damages whatsoever resulting from loss of use, +data or profits, whether in an action of contract, negligence or +other tortious action, arising out of or in connection with the +use or performance of this software. +****************************************************************/ + +// #define F _malloc_free_ + +#include +#include + +#ifdef CSI_BUILD_RTOS +#define SBGULP 0x800000 +#else +#define SBGULP 0x8000000 +#endif + +typedef struct csi_atat_mem { + struct csi_atat_mem *next; + size_t len; +} csi_atat_mem; + +#define MINBLK (2 * sizeof(struct csi_atat_mem) + 16) + +csi_atat_mem *F; + +static char *sbrk_wrapper(int size) +{ +#ifdef CSI_BUILD_RTOS + return (char *)0x60000000; +#else + return sbrk(size); +#endif +} + +void *csi_atat_malloc(register size_t size) +{ + register csi_atat_mem *p, *q, *r, *s; + unsigned register k, m; + // extern void *sbrk(Int); + char *top, *top1; + + size = (size + 7) & ~7; + r = (csi_atat_mem *)&F; + for (p = F, q = 0; p; r = p, p = p->next) { + if ((k = p->len) >= size && (!q || m > k)) { + m = k; + q = p; + s = r; + } + } + if (q) { + if (q->len - size >= MINBLK) { /* split block */ + p = (csi_atat_mem *)(((char *)(q + 1)) + size); + p->next = q->next; + p->len = q->len - size - sizeof(csi_atat_mem); + s->next = p; + q->len = size; + } else { + s->next = q->next; + } + } else { + top = (void *)(((long)sbrk_wrapper(0) + 7) & ~7); + if (F && (char *)(F + 1) + F->len == top) { + q = F; + F = F->next; + } else { + q = (csi_atat_mem *)top; + } + top1 = (char *)(q + 1) + size; + if (sbrk_wrapper((int)(top1 - top + SBGULP)) == (void *)-1) { + return 0; + } + r = (csi_atat_mem *)top1; + r->len = SBGULP - sizeof(csi_atat_mem); + r->next = F; + F = r; + q->len = size; + } + return (void *)(q + 1); +} + +void csi_atat_free(void *f) +{ + csi_atat_mem *p, *q, *r; + char *pn, *qn; + + if (!f) return; + q = (csi_atat_mem *)((char *)f - sizeof(csi_atat_mem)); + qn = (char *)f + q->len; + for (p = F, r = (csi_atat_mem *)&F;; r = p, p = p->next) { + if (qn == (void *)p) { + q->len += p->len + sizeof(csi_atat_mem); + p = p->next; + } + pn = p ? ((char *)(p + 1)) + p->len : 0; + if (pn == (void *)q) { + p->len += sizeof(csi_atat_mem) + q->len; + q->len = 0; + q->next = p; + r->next = p; + break; + } + if (pn < (char *)q) { + r->next = q; + q->next = p; + break; + } + } +} + +void *csi_atat_calloc(size_t n, size_t m) +{ + void *rv; + rv = csi_atat_malloc(n *= m); + if (n && rv) { + memset(rv, 0, n); + } + return rv; +} diff --git a/source/nn2/debug.c b/source/utils/debug.c similarity index 88% rename from source/nn2/debug.c rename to source/utils/debug.c index 14855ef5..2302a10f 100644 --- a/source/nn2/debug.c +++ b/source/utils/debug.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,11 +16,12 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ -#include "csi_nn.h" #include #include +#include "csi_nn.h" +#include "csi_node.h" int csi_debug_level = CSI_DEBUG_LEVEL_WARNING; @@ -34,6 +35,16 @@ void csi_debug_set_level(int level) csi_debug_level = level; } #ifdef CSI_DEBUG +void csi_debug_debug(const char *format, ...) +{ + if (csi_debug_get_level() <= CSI_DEBUG_LEVEL_DEBUG) { + va_list arg; + va_start(arg, format); + vfprintf(stdout, format, arg); + va_end(arg); + } +} + void csi_debug_info(const char *format, ...) { if (csi_debug_get_level() <= CSI_DEBUG_LEVEL_INFO) { @@ -64,14 +75,24 @@ void csi_debug_error(const char *format, ...) } } -static int csi_debug_print_list_int(int *list, int len, char *name) +void csi_debug_fatal(const char *format, ...) +{ + if (csi_debug_get_level() <= CSI_DEBUG_LEVEL_FATAL) { + va_list arg; + va_start(arg, format); + vfprintf(stdout, format, arg); + va_end(arg); + } +} + +static int csi_debug_print_list_int(int32_t *list, int len, char *name) { csi_debug_info("%s", name); for (int i = 0; i < len; i++) { if (i == 0) { csi_debug_info("["); } - csi_debug_info("%d", list[i]); + csi_debug_info("%4d", list[i]); if (i == (len - 1)) { csi_debug_info("]"); } else { @@ -186,6 +207,17 @@ int csi_diso_debug_info(struct csi_tensor *input0, return CSINN_TRUE; } +int csi_conv1d_debug_info(struct csi_tensor *input, + struct csi_tensor *output, + struct csi_tensor *kernel, + struct csi_tensor *bias, + struct conv1d_params *params, + const char *name) +{ + csi_debug_print_sidcso_base(input, output, kernel, bias, &(params->base), name); + return CSINN_TRUE; +} + int csi_conv2d_debug_info(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, @@ -215,6 +247,17 @@ int csi_fullyconnected_debug_info(struct csi_tensor *input, return CSINN_TRUE; } +int csi_layer_norm_debug_info(struct csi_tensor *input, + struct csi_tensor *output, + struct csi_tensor *gamma, + struct csi_tensor *beta, + struct layer_norm_params *params, + const char *name) +{ + csi_debug_print_siso_base(input, output, &(params->base), name); + return CSINN_TRUE; +} + int csi_relu_debug_info(struct csi_tensor *input, struct csi_tensor *output, struct relu_params *params, @@ -413,6 +456,22 @@ int csi_reduce_debug_info(struct csi_tensor *input, return CSINN_TRUE; } +int csi_cache_matmul_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_matmul_params *params, const char *name) +{ + csi_debug_print_siso_base(input, output, &(params->base), name); + return CSINN_TRUE; +} + +int csi_cache_conv1d_debug_info(struct csi_tensor *input, struct csi_tensor *output, + struct csi_tensor *weight, struct csi_tensor *bias, + struct cache_conv1d_params *params, const char *name) +{ + csi_debug_print_siso_base(input, output, &(params->base), name); + return CSINN_TRUE; +} + int csi_clip_debug_info(struct csi_tensor *input, struct csi_tensor *output, struct clip_params *params, @@ -965,4 +1024,74 @@ int csi_debug_callback_unset(char *func_name) csi_debug_info("callback function unset: %s\n", func_name); return CSINN_CALLBACK_UNSET; } + +int csi_debug_dump_data(struct csi_tensor *input, char *filename) +{ + float *data = input->data; + int size = csi_tensor_size(input); + int i = 0; + FILE *fp = fopen(filename, "w+"); + for (i = 0; i < size; i++) { + if (i == size - 1) { + fprintf(fp, "%f", data[i]); + } else { + fprintf(fp, "%f\n", data[i]); + } + } + fclose(fp); + return CSINN_TRUE; +} + +// TODO:complete string pointer table +char *op_strings[] = { + [CSINN_OP_ABS] = "abs", + [CSINN_OP_ADD] = "add", + [CSINN_OP_AVGPOOL2D] = "avgpool2d", + [CSINN_OP_CONCAT] = "concat", + [CSINN_OP_CONV2D] = "conv2d", + [CSINN_OP_CONV2D_RELU] = "conv2d_relu", + [CSINN_OP_DEPTHWISE_CONV2D] = "dwconv2d", + [CSINN_OP_DEPTHWISE_CONV2D_RELU] = "dwconv2d_relu", + [CSINN_OP_FULLYCONNECTED] = "fullyconnected", + [CSINN_OP_GLOBAL_AVGPOOL2D] = "global_avgpool2d", + [CSINN_OP_LEAKY_RELU] = "leaky_relu", + [CSINN_OP_MAXPOOL2D] = "maxpool2d", + [CSINN_OP_RELU] = "relu", + [CSINN_OP_RESHAPE] = "reshape", + [CSINN_OP_SOFTMAX] = "softmax", + [CSINN_OP_YUV_RGB_SCALE] = "yuv_rgb_scale", +}; + +#define FREQ 30 // FPGA: 30MHz +// TODO: support NHWC layout too +int csi_benchmark_layer(struct csi_node *node, uint64_t start_time, uint64_t end_time, + int layer_idx) +{ + char *op_name = op_strings[node->type]; + csi_debug_info("[%3d]: %-18s %6.2lfms ^*^ feature_map:", layer_idx, op_name, + (end_time - start_time) / 1000000.0f); + + struct csi_tensor *in0 = (struct csi_tensor *)node->in[0]->data; + struct csi_tensor *out0 = (struct csi_tensor *)node->out[0]->data; + // print first input node and first output node dim + csi_debug_print_list_int(in0->dim, in0->dim_count, ""); + csi_debug_info(" ==> "); + csi_debug_print_list_int(out0->dim, out0->dim_count, ""); + // print kernel dim + if (node->type >= CSINN_OP_CONV1D && node->type <= CSINN_OP_CONV3D) { + struct csi_tensor *in1 = (struct csi_tensor *)node->in[1]->data; + int64_t cacls = out0->dim[1] * out0->dim[2] * out0->dim[3] * in0->dim[1] * in1->dim[2] * + in1->dim[3] * 2; + if (node->type >= CSINN_OP_DEPTHWISE_CONV2D && + node->type <= CSINN_OP_DEPTHWISE_CONV2D_CHANNEL_RELU6) { + cacls = out0->dim[1] * out0->dim[2] * out0->dim[3] * in1->dim[2] * in1->dim[3] * 2; + } + csi_debug_info(" (%2.4lfGOPS)", cacls / ((end_time - start_time) * 30 / 1000.0f)); + csi_debug_info(" kernel:"); + csi_debug_print_list_int(in1->dim, in1->dim_count, ""); + } + csi_debug_info("\n"); + return CSINN_TRUE; +} + #endif diff --git a/source/nn2/memory.c b/source/utils/memory.c similarity index 60% rename from source/nn2/memory.c rename to source/utils/memory.c index b04d6014..dc7870c2 100644 --- a/source/nn2/memory.c +++ b/source/utils/memory.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,12 +16,14 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ +#include #include "csi_nn.h" // #define CSI_MEM_DEBUG - +// #define CSI_MEM_DEBUG_VALID_WRITE +// #define CSI_USE_ATAT_MALLOC struct csi_mem_alloc_debug_element_ { void *ptr; int64_t size; @@ -64,7 +66,27 @@ static int csi_mem_map_insert(void *ptr, uint64_t size) void *csi_mem_alloc(int64_t size) { - void *ret = calloc(1, size); + void *ret; +#ifdef CSI_MEM_DEBUG_VALID_WRITE + ret = calloc(1, size + 8); + int8_t *check_ptr = ret + size; + /* magic number */ + check_ptr[0] = 0xff; + check_ptr[1] = 0x23; + check_ptr[2] = 0x33; + check_ptr[3] = 0x44; + check_ptr[4] = 0x45; + check_ptr[5] = 0x55; + check_ptr[6] = 0x67; + check_ptr[7] = 0xff; +#else +#ifdef CSI_USE_ATAT_MALLOC + void *csi_atat_calloc(size_t n, size_t m); + ret = csi_atat_calloc(1, size); +#else + ret = calloc(1, size); +#endif +#endif if (ret == NULL) { csi_debug_error("cannot alloc memory\n"); } @@ -76,6 +98,33 @@ void *csi_mem_alloc(int64_t size) return ret; } +void *csi_mem_calloc(size_t nmemb, size_t size) { return csi_mem_alloc(nmemb * size); } + +void *csi_mem_realloc(void *ptr, size_t size) +{ + void *ret = csi_mem_alloc(size); + if (!ptr) { + return ret; + } + memcpy(ret, ptr, size); + csi_mem_free(ptr); + return ret; +} + +void *csi_mem_alloc_aligned(int64_t size, int aligned_bytes) +{ + void *ptr = NULL; +#ifndef CSI_BUILD_RTOS + if (aligned_bytes == 0) { + aligned_bytes = getpagesize(); + } + int ret = posix_memalign(&ptr, aligned_bytes, size); + if (ret || ptr == NULL) + csi_debug_error("cannot alloc aligned memory\n"); +#endif + return ptr; +} + void csi_mem_free(void *ptr) { #ifdef CSI_MEM_DEBUG @@ -85,9 +134,24 @@ void csi_mem_free(void *ptr) e->is_free = 1; csi_mem_alloc_debug_map.total_size -= e->size; printf("csi_mem_free: total size = %ld\n", csi_mem_alloc_debug_map.total_size); +#ifdef CSI_MEM_DEBUG_VALID_WRITE + uint8_t *cptr = ptr + e->size; + if ((cptr[0] == 0xff) && (cptr[1] == 0x23) && (cptr[2] == 0x33) && (cptr[3] == 0x44) && + (cptr[4] == 0x45) && (cptr[5] == 0x55) && (cptr[6] == 0x67) && (cptr[7] == 0xff)) { + break; + } else { + printf("csi_mem_free: invalid write %p\n", ptr); + } +#else break; +#endif } } #endif +#ifdef CSI_USE_ATAT_MALLOC + void csi_atat_free(void *f); + csi_atat_free(ptr); +#else free(ptr); +#endif } diff --git a/tests/Makefile b/tests/Makefile index 262c7ba6..97f8995c 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -3,16 +3,16 @@ TEST_ROOT := $(shell pwd) all: test_ref test_anole test_ref_x86: - make -C validation -f Makefile.ref_x86 + make -C validation_layer -f Makefile.ref_x86 test_ref: - make -C validation -f Makefile.ref + make -C validation_layer -f Makefile.ref test_c860: - make -C validation -f Makefile.c860 + make -C validation_layer -f Makefile.c860 test_c906: - make -C validation -f Makefile.c906 + make -C validation_layer -f Makefile.c906 test_anole: make -C validation_graph -f Makefile.anole @@ -32,9 +32,12 @@ test_e804: test_ref_i805: make -C validation_xt800 -f Makefile.ref_i805 +unit_test_opt_interface: + make -C unit_test -f Makefile.rvv clean: rm -rf *.a *.asm utils/*.o - cd validation/; find . -name "*.o" -or -name "*.elf" | xargs rm; cd - + cd validation_layer; find . -name "*.o" -or -name "*.elf" | xargs rm; cd - cd validation_graph; find . -name "*.o" -or -name "*.elf" | xargs rm; cd - - cd validation_xt800; find . -name "*.o" -or -name "*.elf" | xargs rm; cd - \ No newline at end of file + cd validation_xt800; find . -name "*.o" -or -name "*.elf" | xargs rm; cd - + cd unit_test; find . -name "*.o" -or -name "*.elf" | xargs rm; cd - \ No newline at end of file diff --git a/tests/autotest/conftest.py b/tests/autotest/conftest.py index 23ec13ad..c5f22995 100644 --- a/tests/autotest/conftest.py +++ b/tests/autotest/conftest.py @@ -20,10 +20,10 @@ def pytest_addoption(parser): parser.addoption( - "--board", action="store", default="c860", help="board option: c860|c906|anole|x86_ref" + "--board", action="store", default="c860", help="board option: c860|c906|anole|x86_ref|c910" ) parser.addoption( - "--accuracy", action="store", default="0.1", help="error measures accuracy" + "--accuracy", action="store", default="0.99", help="error measures accuracy" ) diff --git a/tests/autotest/interface_test.py b/tests/autotest/interface_test.py index 268d12bb..6b94af17 100644 --- a/tests/autotest/interface_test.py +++ b/tests/autotest/interface_test.py @@ -26,8 +26,9 @@ TOPDIR = os.path.dirname(__file__) + "/../" python_path = "{TOPDIR_}/python_ref".format(TOPDIR_=TOPDIR) -elf_path = "{TOPDIR_}/validation".format(TOPDIR_=TOPDIR) +elf_path = "{TOPDIR_}/validation_layer".format(TOPDIR_=TOPDIR) valid_dir = "{TOPDIR_}/valid_datas".format(TOPDIR_=TOPDIR) +unit_test_elf_path = "{TOPDIR_}/unit_test".format(TOPDIR_=TOPDIR) def mkdir(path): @@ -90,8 +91,9 @@ def compile_execute(cmdopt): if board == "c860": qemu = "qemu-cskyv2 -cpu ck860v" elif board == "c906": - qemu = "qemu-riscv64" - os.system("make clean;make -j16 test_" + board) + qemu = "qemu-riscv64 -cpu c906fdv" + elif board == "c910": + qemu = "qemu-riscv64 -cpu c910v" mkdir(valid_dir) return qemu, accuracy @@ -108,3 +110,97 @@ def test_inference(cmdopt, elf_data, compile_execute): ret = os.system(cmd) assert ret == 0 run_base(compile_execute[0], elf_data, valid_dir + "/" + python_data + "_data_f32.bin", compile_execute[1]) + + +def get_testtype(op_type): + if "averagepool" in op_type or "maxpool" in op_type: + test_type = ["random","2x2s2","2x2s2_p1","3x3s2","3x3s2_p1","3x3s1_p1"] + elif op_type == "convolution": + test_type = ["random","gemm_conv1x1s1","conv3x3s1_im2col_sgemm","conv3x3s1_winograd64","conv3x3s1_winograd64","gemm_random"] + elif op_type == "depthwise_convolution": + test_type = ["random","3x3s1","3x3s2"] + else: + test_type =[] + return test_type + + + +@pytest.mark.usefixtures("compile_execute") +class TestCSINN: + @pytest.mark.parametrize('elf_data', numberOffile(elf_path, "elf")) + def test_layer(self,elf_data,compile_execute): + flag = 0 + data = elf_data.split("/")[-1].split(".")[0] + if "argmax" in data or "argmin" in data: + path = os.path.join(python_path, data + "_stride.py") + elif "roipool" in data: + path = os.path.join(python_path, data + "_caffe.py") + else: + path = os.path.join(python_path, data + "_nchw.py") + if not os.path.exists(path): + path = os.path.join(python_path, data + ".py") + flag = 1 + + os.chdir(valid_dir) + if "roipool" in data: + cmd = f'docker run --rm -v {valid_dir}:mnt tvm_caffe:rfcn sh -c "cd mnt && python3 {path}"' + else: + cmd = f"python {path}" + ret = os.system(cmd) + assert ret == 0 + if flag == 1: + run_base(compile_execute[0], elf_data, valid_dir + "/" + data + "_data_f32.bin", compile_execute[1]) + else: + if "argmax" in data or "argmin" in data: + run_base(compile_execute[0], elf_data, valid_dir + "/" + data + "_stride_data_f32.bin", compile_execute[1]) + else: + run_base(compile_execute[0], elf_data, valid_dir + "/" + data + "_nchw_data_f32.bin", compile_execute[1]) + + + @pytest.mark.parametrize('elf_data', numberOffile(elf_path, "elf")) + def test_rvv_layer(self,elf_data,compile_execute): + flag = 0 + data = elf_data.split("/")[-1].split(".")[0] + test_type = get_testtype(data) + path = os.path.join(python_path, data + "_nchw.py") + if not os.path.exists(path): + path = os.path.join(python_path, data + ".py") + flag = 1 + if test_type != []: + for i in test_type: + cmd = f"python {path} {i}" + ret = os.system(cmd) + assert ret == 0 + if flag == 1: + run_base(compile_execute[0], elf_data, TOPDIR + data + "_data_f32.bin", compile_execute[1]) + else: + run_base(compile_execute[0], elf_data, TOPDIR + data + "_nchw_data_f32.bin", compile_execute[1]) + else: + cmd = f"python {path}" + ret = os.system(cmd) + assert ret == 0 + if flag == 1: + run_base(compile_execute[0], elf_data, TOPDIR + data + "_data_f32.bin", compile_execute[1]) + else: + run_base(compile_execute[0], elf_data, TOPDIR + data + "_nchw_data_f32.bin", compile_execute[1]) + + + + @pytest.mark.parametrize('unit_test_elf_data', numberOffile(unit_test_elf_path, "elf")) + def test_opt_interface(self, unit_test_elf_data, compile_execute): + run_base(compile_execute[0], unit_test_elf_data, "", compile_execute[1]) + + +class TestHeterogeneous: + def test_subgraph_fuse(self): + hlight_test_dir = os.path.join(TOPDIR, "validation_graph", "hlight") + compile_cmd = f"make -C {hlight_test_dir}" + + ret = os.system(compile_cmd) + assert ret == 0, "Compiling subgraph fusion test fails." + + os.chdir(hlight_test_dir) + exec_cmd = f"./run.sh" + ret = os.system(exec_cmd) + assert ret == 0, "Execute subgraph fusion test fails" + diff --git a/tests/python_ref/averagepool.py b/tests/python_ref/averagepool.py index 4124537e..c83da016 100755 --- a/tests/python_ref/averagepool.py +++ b/tests/python_ref/averagepool.py @@ -73,7 +73,7 @@ def avgpool2d_f32(): print(para) print(len(src_out_1)) - with open("avgpool2d_data_f32.bin", "wb") as fp: + with open("averagepool_data_f32.bin", "wb") as fp: data = struct.pack(('%di' % len(para)), *para) fp.write(data) data = struct.pack(('%df' % len(src_in_1)), *src_in_1) diff --git a/tests/python_ref/averagepool3d.py b/tests/python_ref/averagepool3d.py index e19bba99..a8fa2088 100644 --- a/tests/python_ref/averagepool3d.py +++ b/tests/python_ref/averagepool3d.py @@ -93,7 +93,7 @@ def avgpool3d_f32(): para.append(include_pad)# 20 print(para) - with open("avgpool3d_data_f32.bin", "wb") as fp: + with open("averagepool3d_data_f32.bin", "wb") as fp: data = struct.pack(('%di' % len(para)), *para) fp.write(data) data = struct.pack(('%df' % len(src_in_1)), *src_in_1) diff --git a/tests/python_ref/averagepool_nchw.py b/tests/python_ref/averagepool_nchw.py index 3e6df3b5..4824432a 100644 --- a/tests/python_ref/averagepool_nchw.py +++ b/tests/python_ref/averagepool_nchw.py @@ -7,7 +7,7 @@ from torch import tensor from torch.nn import functional as fn -def avgpool2d_f32(): +def avgpool2d_f32(test_type): para = [] # init the input data and parameters batch = int(np.random.randint(1, high=4, size=1)) @@ -15,28 +15,63 @@ def avgpool2d_f32(): in_height = int(np.random.randint(32, high=64, size=1)) in_width = int(np.random.randint(32, high=64, size=1)) - stride_h = int(np.random.randint(1, high=4, size=1)) - stride_w = int(np.random.randint(1, high=4, size=1)) - - kernel_h = int(np.random.randint(stride_h, high=9, size=1)) - kernel_w = int(np.random.randint(stride_w, high=9, size=1)) + if test_type == "random": + stride_h = int(np.random.randint(1, high=4, size=1)) + stride_w = int(np.random.randint(1, high=4, size=1)) + + kernel_h = int(np.random.randint(stride_h, high=9, size=1)) + kernel_w = int(np.random.randint(stride_w, high=9, size=1)) + pad_left = pad_right = 0 + pad_top = pad_down = 0 + + pad_w = (in_width - kernel_w) - int((in_width - kernel_w) / stride_w) * stride_w + if(pad_w !=0): + pad_w = int((in_width - kernel_w) / stride_w) * stride_w + stride_w - (in_width - kernel_w) + pad_left = int(np.random.randint(0, high=pad_w, size=1)) + pad_right = pad_w - pad_left + + pad_h = (in_height - kernel_h) - int((in_height - kernel_h) / stride_h) * stride_h + if(pad_h !=0): + pad_h = int((in_height - kernel_h) / stride_h) * stride_h + stride_h - (in_height - kernel_h) + pad_top = int(np.random.randint(0, high=pad_h, size=1)) + pad_down = pad_h - pad_top + + elif test_type == "2x2s2": + stride_h = stride_w = 2 + kernel_h = kernel_w = 2 + pad_left = pad_top = 0 + pad_right = int(np.random.randint(0, high=1, size=1)) + pad_down = int(np.random.randint(0, high=1, size=1)) + + elif test_type == "2x2s2_p1": + stride_h = stride_w = 2 + kernel_h = kernel_w = 2 + pad_left = pad_top = 1 + pad_right = int(np.random.randint(0, high=1, size=1)) + pad_down = int(np.random.randint(0, high=1, size=1)) + + + elif test_type == "3x3s2": + stride_h = stride_w = 2 + kernel_h = kernel_w = 3 + pad_left = pad_top = 0 + pad_right = int(np.random.randint(0, high=1, size=1)) + pad_down = int(np.random.randint(0, high=1, size=1)) + + elif test_type == "3x3s2_p1": + stride_h = stride_w = 2 + kernel_h = kernel_w = 3 + pad_left = pad_top = 1 + pad_right = int(np.random.randint(0, high=1, size=1)) + pad_down = int(np.random.randint(0, high=1, size=1)) + + elif test_type == "3x3s1_p1": + stride_h = stride_w = 1 + kernel_h = kernel_w = 3 + pad_left = pad_right = pad_top = pad_down = 1 include_pad = int(np.random.randint(0, high=2, size=1)) # 0: false 1: true - pad_left = pad_right = 0 - pad_top = pad_down = 0 - - pad_w = (in_width - kernel_w) - int((in_width - kernel_w) / stride_w) * stride_w - if(pad_w !=0): - pad_w = int((in_width - kernel_w) / stride_w) * stride_w + stride_w - (in_width - kernel_w) - pad_left = int(np.random.randint(0, high=pad_w, size=1)) - pad_right = pad_w - pad_left - - pad_h = (in_height - kernel_h) - int((in_height - kernel_h) / stride_h) * stride_h - if(pad_h !=0): - pad_h = int((in_height - kernel_h) / stride_h) * stride_h + stride_h - (in_height - kernel_h) - pad_top = int(np.random.randint(0, high=pad_h, size=1)) - pad_down = pad_h - pad_top zero_point = int(np.random.randint(-8, high=8, size=1)) std = int(np.random.randint(1, high=3, size=1)) @@ -76,7 +111,7 @@ def avgpool2d_f32(): print(para) print(len(src_out_1)) - with open("avgpool2d_nchw_data_f32.bin", "wb") as fp: + with open("averagepool_nchw_data_f32.bin", "wb") as fp: data = struct.pack(('%di' % len(para)), *para) fp.write(data) data = struct.pack(('%df' % len(src_in_1)), *src_in_1) @@ -89,5 +124,6 @@ def avgpool2d_f32(): if __name__ == '__main__': - avgpool2d_f32() + test_type = sys.argv[1] + avgpool2d_f32(test_type) print("end") diff --git a/tests/python_ref/concat.py b/tests/python_ref/concat.py index 72dbce96..22cd7050 100755 --- a/tests/python_ref/concat.py +++ b/tests/python_ref/concat.py @@ -17,15 +17,13 @@ def concat_f32(): con_axis = int(np.random.randint(0, high=4, size=1)) src_in = [] - zero_point = int(np.random.randint(-6, high=6, size=1)) - std = int(np.random.randint(1, high=20, size=1)) - src_out = np.random.normal(zero_point, std, (batch, in_channel, in_size_y, in_size_x)) + low = int(np.random.randint(-5, high=-1, size=1)) + high = int(np.random.randint(1, high=5, size=1)) + src_out = np.random.uniform(low, high, (batch, in_channel, in_size_y, in_size_x)) src_in.append(src_out) for i in range(0, input_cn - 1): - zero_point = int(np.random.randint(-6, high=6, size=1)) - std = int(np.random.randint(1, high=20, size=1)) - src_in2 = np.random.normal(zero_point, std, (batch, in_channel, in_size_y, in_size_x)) + src_in2 = np.random.uniform(low, high, (batch, in_channel, in_size_y, in_size_x)) src_in.append(src_in2) src_out = np.concatenate((src_out, src_in2), axis=con_axis) diff --git a/tests/python_ref/convolution_nchw.py b/tests/python_ref/convolution_nchw.py index 1822969a..9681e3d1 100644 --- a/tests/python_ref/convolution_nchw.py +++ b/tests/python_ref/convolution_nchw.py @@ -7,19 +7,51 @@ from torch import tensor from torch.nn import functional as fn -def convolution_f32(): +def convolution_f32(test_type): para = [] - # init the input data and parameters batch = int(np.random.randint(1, high=4, size=1)) - in_size_x = int(np.random.randint(64, high=128, size=1)) #width - in_size_y = int(np.random.randint(64, high=128, size=1)) #height - in_channel = int(np.random.randint(2, high=16, size=1)) - stride_x = int(np.random.randint(1, high=3, size=1)) - stride_y = int(np.random.randint(1, high=3, size=1)) - kernel_x = int(np.random.randint(stride_x, high=7, size=1)) - kernel_y = int(np.random.randint(stride_y, high=7, size=1)) - dilation_x = int(np.random.randint(1, high=2, size=1)) - dilation_y = int(np.random.randint(1, high=2, size=1)) + in_size_x = int(np.random.randint(6, high=10, size=1)) #width + in_size_y = int(np.random.randint(6, high=10, size=1)) #height + in_channel = int(np.random.randint(2, high=10, size=1)) + out_channel = int(np.random.randint(1, high=10, size=1)) + + # init the input data and parameters + if test_type == "random": + stride_x = int(np.random.randint(1, high=3, size=1)) + stride_y = int(np.random.randint(1, high=3, size=1)) + kernel_x = int(np.random.randint(stride_x, high=7, size=1)) + kernel_y = int(np.random.randint(stride_y, high=7, size=1)) + dilation_x = int(np.random.randint(1, high=2, size=1)) + dilation_y = int(np.random.randint(1, high=2, size=1)) + elif test_type == "gemm_conv1x1s1": + stride_x = 1 + stride_y = 1 + kernel_x = 1 + kernel_y = 1 + dilation_x = 1 + dilation_y = 1 + + elif test_type == "conv3x3s1_im2col_sgemm" or test_type == "conv3x3s1_winograd64": + stride_x = 1 + stride_y = 1 + kernel_x = 3 + kernel_y = 3 + dilation_x = 1 + dilation_y = 1 + if test_type == "conv3x3s1_winograd64": + n = int(np.random.randint(1, high=4, size=1)) + in_channel = 8 * n + out_channel = 8 * n + + elif test_type == "gemm_random": + stride_x = int(np.random.randint(2, high=3, size=1)) + stride_y = int(np.random.randint(2, high=3, size=1)) + kernel_x = int(np.random.randint(stride_x, high=7, size=1)) + kernel_y = int(np.random.randint(stride_y, high=7, size=1)) + dilation_x = int(np.random.randint(1, high=2, size=1)) + dilation_y = int(np.random.randint(1, high=2, size=1)) + + kernel_x_t = kernel_x + (kernel_x - 1) * (dilation_x - 1) kernel_y_t = kernel_y + (kernel_y - 1) * (dilation_y - 1) pad_left = pad_right = pad_top = pad_down = 0 @@ -36,7 +68,6 @@ def convolution_f32(): pad_top = int(np.random.randint(0, high=pad_y, size=1)) pad_down = pad_y - pad_top - out_channel = int(np.random.randint(1, high=64, size=1)) zero_point1 = int(np.random.randint(-3, high=3, size=1)) std1 = int(np.random.randint(1, high=3, size=1)) zero_point2 = int(np.random.randint(-3, high=3, size=1)) @@ -51,7 +82,7 @@ def convolution_f32(): weight = weight.astype(np.float32) bias = bias.astype(np.float32) - + t_src_in = tensor(src_in) t_weight = tensor(weight) t_bias = tensor(bias) @@ -72,19 +103,19 @@ def convolution_f32(): para.append(batch) para.append(in_channel) para.append(in_size_y) #height - para.append(in_size_x) #width + para.append(in_size_x) #width para.append(stride_y) para.append(stride_x) - para.append(kernel_y) - para.append(kernel_x) + para.append(kernel_y) + para.append(kernel_x) para.append(pad_left) para.append(pad_right) para.append(pad_top) para.append(pad_down) - para.append(out_channel) + para.append(out_channel) para.append(dilation_x) para.append(dilation_y) - para.append(out_size_x) #width + para.append(out_size_x) #width para.append(out_size_y) #height print(para) @@ -106,5 +137,6 @@ def convolution_f32(): if __name__ == '__main__': - convolution_f32() + test_type = sys.argv[1] + convolution_f32(test_type) print("end") diff --git a/tests/python_ref/deconvolution.py b/tests/python_ref/deconvolution.py index cfdbbadf..47b4a9f6 100755 --- a/tests/python_ref/deconvolution.py +++ b/tests/python_ref/deconvolution.py @@ -78,7 +78,7 @@ def deconvolution_f32(): para.append(out_size_y) print(para) - with open("deconvolution_data_32.bin", "wb") as fp: + with open("deconvolution_data_f32.bin", "wb") as fp: data = struct.pack(('%di' % len(para)), *para) fp.write(data) data = struct.pack(('%df' % len(src_in_1)), *src_in_1) diff --git a/tests/python_ref/depthwise_convolution_nchw.py b/tests/python_ref/depthwise_convolution_nchw.py index 0b9c19b7..3123a138 100644 --- a/tests/python_ref/depthwise_convolution_nchw.py +++ b/tests/python_ref/depthwise_convolution_nchw.py @@ -7,19 +7,36 @@ from torch import tensor from torch.nn import functional as fn -def depthwise_convolution_f32(): +def depthwise_convolution_f32(test_type): para = [] - # init the input data and parameters - batch = int(np.random.randint(1, high=4, size=1)) + batch = int(np.random.randint(1, high=4, size=1)) in_size_x = int(np.random.randint(32, high=64, size=1)) in_size_y = int(np.random.randint(32, high=64, size=1)) in_channel = int(np.random.randint(2, high=32, size=1)) - stride_x = int(np.random.randint(1, high=3, size=1)) - stride_y = int(np.random.randint(1, high=3, size=1)) - kernel_x = int(np.random.randint(stride_x + 1, high=7, size=1)) - kernel_y = int(np.random.randint(stride_y + 1, high=7, size=1)) dilation_x = int(np.random.randint(1, high=5, size=1)) dilation_y = int(np.random.randint(1, high=5, size=1)) + + # init the input data and parameters + if test_type == "random": + stride_x = int(np.random.randint(1, high=3, size=1)) + stride_y = int(np.random.randint(1, high=3, size=1)) + kernel_x = int(np.random.randint(stride_x + 1, high=7, size=1)) + kernel_y = int(np.random.randint(stride_y + 1, high=7, size=1)) + + elif test_type == "3x3s1": + stride_x = 1 + stride_y = 1 + kernel_x = 3 + kernel_y = 3 + dilation_x = dilation_y = 1 + + elif test_type == "3x3s2": + stride_x = 2 + stride_y = 2 + kernel_x = 3 + kernel_y = 3 + dilation_x = dilation_y = 1 + kernel_x_t = kernel_x + (kernel_x - 1) * (dilation_x - 1) kernel_y_t = kernel_y + (kernel_y - 1) * (dilation_y - 1) pad_left = pad_right = pad_top = pad_down = 0 @@ -101,5 +118,6 @@ def depthwise_convolution_f32(): if __name__ == '__main__': - depthwise_convolution_f32() + test_type = sys.argv[1] + depthwise_convolution_f32(test_type) print("end") diff --git a/tests/python_ref/expand_dims.py b/tests/python_ref/expand_dims.py index 08d9fedb..c3a74348 100755 --- a/tests/python_ref/expand_dims.py +++ b/tests/python_ref/expand_dims.py @@ -38,7 +38,7 @@ def expand_dims_f32(): with open("expand_dims_data_f32.bin", "wb") as fp: data = struct.pack(('%di' % len(para)), *para) fp.write(data) - data = struct.pack(('%df' % len(in_shape)), *in_shape) + data = struct.pack(('%di' % len(in_shape)), *in_shape) fp.write(data) data = struct.pack(('%df' % len(src_in_1)), *src_in_1) fp.write(data) diff --git a/tests/python_ref/fullyconnected.py b/tests/python_ref/fullyconnected.py index 48e0c643..afa78ba4 100755 --- a/tests/python_ref/fullyconnected.py +++ b/tests/python_ref/fullyconnected.py @@ -13,12 +13,12 @@ def fullconnected_f32(): in_size = int(np.random.randint(64, high=256, size=1)) out_size = int(np.random.randint(64, high=256, size=1)) - zero_point1 = int(np.random.randint(-2, high=2, size=1)) + zero_point1 = int(np.random.randint(-1, high=2, size=1)) std1 = int(np.random.randint(1, high=2, size=1)) - zero_point2 = int(np.random.randint(-2, high=2, size=1)) - std2 = int(np.random.randint(1, high=20, size=1)) - zero_point3 = int(np.random.randint(-6, high=6, size=1)) - std3 = int(np.random.randint(1, high=20, size=1)) + zero_point2 = int(np.random.randint(1, high=3, size=1)) + std2 = int(np.random.randint(1, high=2, size=1)) + zero_point3 = int(np.random.randint(1, high=3, size=1)) + std3 = int(np.random.randint(1, high=2, size=1)) src_in = np.random.normal(zero_point1, std1, (batch, in_size)) weight = np.random.normal(zero_point2, std2, (in_size, out_size)) diff --git a/tests/python_ref/global_avgpool.py b/tests/python_ref/global_avgpool.py index 7948811d..16e669a0 100644 --- a/tests/python_ref/global_avgpool.py +++ b/tests/python_ref/global_avgpool.py @@ -52,7 +52,7 @@ def global_avgpool2d_f32(): print(para) - with open("global_avgpool2d_data_f32_.bin", "wb") as fp: + with open("global_avgpool_data_f32_.bin", "wb") as fp: data = struct.pack(('%di' % len(para)), *para) fp.write(data) data = struct.pack(('%df' % len(src_in_1)), *src_in_1) diff --git a/tests/python_ref/global_avgpool_nchw.py b/tests/python_ref/global_avgpool_nchw.py index 5c49d08e..d11518b8 100644 --- a/tests/python_ref/global_avgpool_nchw.py +++ b/tests/python_ref/global_avgpool_nchw.py @@ -44,7 +44,7 @@ def global_avgpool2d_f32(): print(para) - with open("global_avgpool2d_nchw_data_f32.bin", "wb") as fp: + with open("global_avgpool_nchw_data_f32.bin", "wb") as fp: data = struct.pack(('%di' % len(para)), *para) fp.write(data) data = struct.pack(('%df' % len(src_in_1)), *src_in_1) diff --git a/tests/python_ref/global_maxpool.py b/tests/python_ref/global_maxpool.py index f6eac28f..e8e367b8 100644 --- a/tests/python_ref/global_maxpool.py +++ b/tests/python_ref/global_maxpool.py @@ -49,7 +49,7 @@ def global_maxpool2d_f32(): print(para) - with open("global_maxpool2d_data_f32.bin", "wb") as fp: + with open("global_maxpool_data_f32.bin", "wb") as fp: data = struct.pack(('%di' % len(para)), *para) fp.write(data) data = struct.pack(('%df' % len(src_in_1)), *src_in_1) diff --git a/tests/python_ref/global_maxpool_nchw.py b/tests/python_ref/global_maxpool_nchw.py index 8eab2227..87f35916 100644 --- a/tests/python_ref/global_maxpool_nchw.py +++ b/tests/python_ref/global_maxpool_nchw.py @@ -44,7 +44,7 @@ def global_maxpool2d_f32(): print(para) - with open("global_maxpool2d_nchw_data_f32.bin", "wb") as fp: + with open("global_maxpool_nchw_data_f32.bin", "wb") as fp: data = struct.pack(('%di' % len(para)), *para) fp.write(data) data = struct.pack(('%df' % len(src_in_1)), *src_in_1) diff --git a/tests/python_ref/leaky_relu.py b/tests/python_ref/leaky_relu.py index a3f6795e..c0afc874 100755 --- a/tests/python_ref/leaky_relu.py +++ b/tests/python_ref/leaky_relu.py @@ -16,7 +16,7 @@ def leaky_relu_f32(): in_size_x = int(np.random.randint(32, high=64, size=1)) zero_point = int(np.random.randint(-6, high=6, size=1)) std = int(np.random.randint(1, high=20, size=1)) - alpha = np.float32(np.random.random(1)) + alpha = np.float32(np.random.uniform(0, 0.5, (1))) # rvv constrains src_in = np.random.normal(zero_point, std, (batch, in_size_y, in_size_x, in_channel)) src_in = src_in.astype(np.float32) diff --git a/tests/python_ref/log.py b/tests/python_ref/log.py index 1cea3a58..dd648863 100644 --- a/tests/python_ref/log.py +++ b/tests/python_ref/log.py @@ -14,12 +14,13 @@ def log_f32(): in_size_x = int(np.random.randint(128, high=512, size=1)) in_size_y = int(np.random.randint(128, high=512, size=1)) in_channel = int(np.random.randint(1, high=64, size=1)) - zero_point = int(np.random.randint(-600, high=600, size=1)) - std = int(np.random.randint(1, high=20, size=1)) + zero_point = int(np.random.randint(1, high=10, size=1)) + std = int(np.random.randint(10, high=30, size=1)) - src_in = np.random.normal(zero_point, std, (batch, in_size_y, in_size_x, in_channel)) + # src_in = np.random.normal(zero_point, std, (batch, in_size_y, in_size_x, in_channel)) + src_in = np.random.uniform(zero_point, std, (batch, in_size_y, in_size_x, in_channel)) - out_calcu = tf.log1p(tf.convert_to_tensor(src_in)) + out_calcu = tf.log(tf.convert_to_tensor(src_in)) sess = tf.Session() diff --git a/tests/python_ref/maxpool.py b/tests/python_ref/maxpool.py index f869d1ee..70934443 100755 --- a/tests/python_ref/maxpool.py +++ b/tests/python_ref/maxpool.py @@ -70,7 +70,7 @@ def maxpool2d_f32(): print(para) print(len(src_out_1)) - with open("maxpool2d_data_f32.bin", "wb") as fp: + with open("maxpool_data_f32.bin", "wb") as fp: data = struct.pack(('%di' % len(para)), *para) fp.write(data) data = struct.pack(('%df' % len(src_in_1)), *src_in_1) diff --git a/tests/python_ref/maxpool_nchw.py b/tests/python_ref/maxpool_nchw.py index 45c80602..81313ccd 100644 --- a/tests/python_ref/maxpool_nchw.py +++ b/tests/python_ref/maxpool_nchw.py @@ -7,7 +7,7 @@ from torch import tensor from torch.nn import functional as fn -def maxpool2d_f32(): +def maxpool2d_f32(test_type): para = [] # init the input data and parameters batch = int(np.random.randint(1, high=4, size=1)) @@ -15,26 +15,61 @@ def maxpool2d_f32(): in_height = int(np.random.randint(32, high=64, size=1)) in_width = int(np.random.randint(32, high=64, size=1)) - stride_h = int(np.random.randint(1, high=4, size=1)) - stride_w = int(np.random.randint(1, high=4, size=1)) + if test_type == "random": + stride_h = int(np.random.randint(1, high=4, size=1)) + stride_w = int(np.random.randint(1, high=4, size=1)) + + kernel_h = int(np.random.randint(stride_h, high=9, size=1)) + kernel_w = int(np.random.randint(stride_w, high=9, size=1)) + pad_left = pad_right = 0 + pad_top = pad_down = 0 + + pad_w = (in_width - kernel_w) - int((in_width - kernel_w) / stride_w) * stride_w + if(pad_w !=0): + pad_w = int((in_width - kernel_w) / stride_w) * stride_w + stride_w - (in_width - kernel_w) + pad_left = int(np.random.randint(0, high=pad_w, size=1)) + pad_right = pad_w - pad_left + + pad_h = (in_height - kernel_h) - int((in_height - kernel_h) / stride_h) * stride_h + if(pad_h !=0): + pad_h = int((in_height - kernel_h) / stride_h) * stride_h + stride_h - (in_height - kernel_h) + pad_top = int(np.random.randint(0, high=pad_h, size=1)) + pad_down = pad_h - pad_top + + elif test_type == "2x2s2": + stride_h = stride_w = 2 + kernel_h = kernel_w = 2 + pad_left = pad_top = 0 + pad_right = int(np.random.randint(0, high=1, size=1)) + pad_down = int(np.random.randint(0, high=1, size=1)) + elif test_type == "2x2s2_p1": + stride_h = stride_w = 2 + kernel_h = kernel_w = 2 + pad_left = pad_top = 1 + pad_right = int(np.random.randint(0, high=1, size=1)) + pad_down = int(np.random.randint(0, high=1, size=1)) + + + elif test_type == "3x3s2": + stride_h = stride_w = 2 + kernel_h = kernel_w = 3 + pad_left = pad_top = 0 + pad_right = int(np.random.randint(0, high=1, size=1)) + pad_down = int(np.random.randint(0, high=1, size=1)) + + elif test_type == "3x3s2_p1": + stride_h = stride_w = 2 + kernel_h = kernel_w = 3 + pad_left = pad_top = 1 + pad_right = int(np.random.randint(0, high=1, size=1)) + pad_down = int(np.random.randint(0, high=1, size=1)) + + elif test_type == "3x3s1_p1": + stride_h = stride_w = 1 + kernel_h = kernel_w = 3 + pad_left = pad_right = pad_top = pad_down = 1 - kernel_h = int(np.random.randint(stride_h, high=9, size=1)) - kernel_w = int(np.random.randint(stride_w, high=9, size=1)) - pad_left = pad_right = 0 - pad_top = pad_down = 0 - - pad_w = (in_width - kernel_w) - int((in_width - kernel_w) / stride_w) * stride_w - if(pad_w !=0): - pad_w = int((in_width - kernel_w) / stride_w) * stride_w + stride_w - (in_width - kernel_w) - pad_left = int(np.random.randint(0, high=pad_w, size=1)) - pad_right = pad_w - pad_left - - pad_h = (in_height - kernel_h) - int((in_height - kernel_h) / stride_h) * stride_h - if(pad_h !=0): - pad_h = int((in_height - kernel_h) / stride_h) * stride_h + stride_h - (in_height - kernel_h) - pad_top = int(np.random.randint(0, high=pad_h, size=1)) - pad_down = pad_h - pad_top zero_point = int(np.random.randint(-8, high=8, size=1)) std = int(np.random.randint(1, high=3, size=1)) @@ -72,7 +107,7 @@ def maxpool2d_f32(): para.append(out_width) print(para) - with open("maxpool2d_nchw_data_f32.bin", "wb") as fp: + with open("maxpool_nchw_data_f32.bin", "wb") as fp: data = struct.pack(('%di' % len(para)), *para) fp.write(data) data = struct.pack(('%df' % len(src_in_1)), *src_in_1) @@ -85,5 +120,6 @@ def maxpool2d_f32(): if __name__ == '__main__': - maxpool2d_f32() + test_type = sys.argv[1] + maxpool2d_f32(test_type) print("end") diff --git a/tests/unit_test/Makefile.rvv b/tests/unit_test/Makefile.rvv new file mode 100644 index 00000000..8bbf1434 --- /dev/null +++ b/tests/unit_test/Makefile.rvv @@ -0,0 +1,45 @@ +LIB_DIR = ../../riscv_build +INCLUDE = -I../../include -I../utils +CFLAGS = -O0 -g3 -static +CFLAGS += -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d +CFLAGS += -ffunction-sections -fdata-sections -Wl,--gc-sections +CFLAGS += -DCSINN_API=15 +LIB_NAME = csi_nn2_rvv +CC = riscv64-unknown-linux-gnu-gcc + + +test_objs = + +test_objs += maxpool.o +test_objs += avgpool.o +test_objs += dwconv2d.o +test_objs += relu.o +test_objs += leaky_relu.o +test_objs += add.o +#test_objs += mul.o +test_objs += pad.o +test_objs += concat.o +test_objs += fullyconnected.o +test_objs += gemm.o +test_objs += conv2d_1x1s1_gemm.o +test_objs += conv2d_im2col_gemm.o +test_objs += conv2d_winograd.o + +utils_objs = + +utils_objs += ../utils/math_snr.o +utils_objs += ../utils/test_utils.o + +all: csi + +csi: $(utils_objs) $(test_objs) + +$(utils_objs): %.o: %.c + $(CC) -c $(CFLAGS) $(INCLUDE) $< -o $@ + +$(test_objs): %.o: %.c + $(CC) -c $(CFLAGS) $(INCLUDE) $< -o $@ + $(CC) $@ $(CFLAGS) $(BOARD) $(utils_objs) -L$(LIB_DIR) -l$(LIB_NAME) -lc -lm -o $@.elf -lgcov + +clean: + rm -rf $(test_objs) $(utils_objs) *.a *.asm *.elf *.bin *.asm diff --git a/tests/unit_test/add.c b/tests/unit_test/add.c new file mode 100644 index 00000000..bb613960 --- /dev/null +++ b/tests/unit_test/add.c @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.13.x */ + +#include "./valid_data/basic_math.dat" +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "math_snr.h" +#include "test_utils.h" + +void verify_add(void *input0_data, void *input1_data, void *ref_data, int (*func)(), int in_c, + int in_h, int in_w, enum csinn_dtype_enum dtype) +{ + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + input0->dim[0] = 1; + input0->dim[1] = in_c; + input0->dim[2] = in_h; + input0->dim[3] = in_w; + input0->dim_count = 4; + input0->name = "input0"; + int in0_size = csi_tensor_size(input0); + + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + input1->dim[0] = 1; + input1->dim[1] = in_c; + input1->dim[2] = in_h; + input1->dim[3] = in_w; + input1->dim_count = 4; + input1->name = "input1"; + int in1_size = csi_tensor_size(input1); + + struct csi_tensor *output = csi_alloc_tensor(NULL); + output->dim[0] = 1; + output->dim[1] = in_c; + output->dim[2] = in_h; + output->dim[3] = in_w; + output->dim_count = 4; + output->name = "output"; + int out_size = csi_tensor_size(output); + + struct diso_params params; + params.base.name = "params"; + + input0->data = input0_data; + input1->data = input1_data; + output->data = csi_mem_alloc(out_size * sizeof(float)); + + func(input0, input1, output, ¶ms); + + evaluate_error(output->data, ref_data, out_size, dtype); + + csi_free_tensor(input0); + csi_free_tensor(input1); + csi_mem_free(output->data); + csi_free_tensor(output); +} + +int main(int argc, char **argv) +{ + init_testsuite("Test function of add for RVV.\n"); + verify_add(add_fp32_in0, add_fp32_in1, add_fp32_out, csi_nn_rvv_add_fp32, 2, 5, 11, + CSINN_DTYPE_FLOAT32); + verify_add(add_fp16_in0, add_fp16_in1, add_fp16_out, csi_nn_rvv_add_fp16, 2, 5, 11, + CSINN_DTYPE_FLOAT16); + // verify_add(add_int8_in0, add_int8_in1, add_int8_out, csi_nn_rvv_add_int8, 2, 5, 11, + // CSINN_DTYPE_INT8); + return done_testing(); +} diff --git a/tests/unit_test/avgpool.c b/tests/unit_test/avgpool.c new file mode 100644 index 00000000..767e6864 --- /dev/null +++ b/tests/unit_test/avgpool.c @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.13.x */ + +#include "./valid_data/avgpool.dat" +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "math_snr.h" +#include "test_utils.h" + +void verify_avgpool2d(void *input_data, void *ref_data, int (*func)(), int in_c, int in_h, int in_w, + int out_c, int out_h, int out_w, int kernel_h, int kernel_w, int stride_h, + int stride_w, int pad_h, int pad_w, enum csinn_dtype_enum dtype) +{ + struct csi_tensor *input = csi_alloc_tensor(NULL); + input->dim[0] = 1; + input->dim[1] = in_c; + input->dim[2] = in_h; + input->dim[3] = in_w; + input->dim_count = 4; + input->name = "input"; + int in_size = csi_tensor_size(input); + + struct csi_tensor *output = csi_alloc_tensor(NULL); + output->dim[0] = input->dim[0]; + output->dim[1] = out_c; + output->dim[2] = out_h; + output->dim[3] = out_w; + output->dim_count = 4; + output->name = "output"; + int out_size = csi_tensor_size(output); + + struct pool_params params; + params.base.name = "params"; + params.ceil_mode = 0; + params.stride_height = stride_h; + params.stride_width = stride_w; + params.filter_height = kernel_h; + params.filter_width = kernel_w; + params.pad_left = pad_w; + params.pad_right = pad_w; + params.pad_top = pad_h; + params.pad_down = pad_h; + params.count_include_pad = 1; + + input->data = input_data; + output->data = csi_mem_alloc(out_size * sizeof(float)); + + func(input, output, ¶ms); + + evaluate_error(output->data, ref_data, out_size, dtype); + + csi_free_tensor(input); + csi_mem_free(output->data); + csi_free_tensor(output); +} + +int main(int argc, char **argv) +{ + init_testsuite("Test function of avgpool for RVV.\n"); + verify_avgpool2d(avgpool2x2s2_fp32_in, avgpool2x2s2_fp32_out, csi_nn_rvv_avgpool2x2s2_fp32, 2, + 6, 18, 2, 3, 9, 2, 2, 2, 2, 0, 0, CSINN_DTYPE_FLOAT32); + verify_avgpool2d(avgpool2x2s2_fp16_in, avgpool2x2s2_fp16_out, csi_nn_rvv_avgpool2x2s2_fp16, 2, + 6, 18, 2, 3, 9, 2, 2, 2, 2, 0, 0, CSINN_DTYPE_FLOAT16); + + verify_avgpool2d(avgpool2x2s2_p1_fp32_in, avgpool2x2s2_p1_fp32_out, + csi_nn_rvv_avgpool2x2s2_p1_fp32, 2, 7, 19, 2, 4, 10, 2, 2, 2, 2, 1, 1, + CSINN_DTYPE_FLOAT32); + verify_avgpool2d(avgpool2x2s2_p1_fp16_in, avgpool2x2s2_p1_fp16_out, + csi_nn_rvv_avgpool2x2s2_p1_fp16, 2, 7, 19, 2, 4, 10, 2, 2, 2, 2, 1, 1, + CSINN_DTYPE_FLOAT16); + + verify_avgpool2d(avgpool3x3s2_fp32_in, avgpool3x3s2_fp32_out, csi_nn_rvv_avgpool3x3s2_fp32, 2, + 7, 19, 2, 3, 9, 3, 3, 2, 2, 0, 0, CSINN_DTYPE_FLOAT32); + verify_avgpool2d(avgpool3x3s2_fp16_in, avgpool3x3s2_fp16_out, csi_nn_rvv_avgpool3x3s2_fp16, 2, + 7, 19, 2, 3, 9, 3, 3, 2, 2, 0, 0, CSINN_DTYPE_FLOAT16); + + verify_avgpool2d(avgpool3x3s2_p1_fp32_in, avgpool3x3s2_p1_fp32_out, + csi_nn_rvv_avgpool3x3s2_p1_fp32, 2, 6, 18, 2, 3, 9, 3, 3, 2, 2, 1, 1, + CSINN_DTYPE_FLOAT32); + verify_avgpool2d(avgpool3x3s2_p1_fp16_in, avgpool3x3s2_p1_fp16_out, + csi_nn_rvv_avgpool3x3s2_p1_fp16, 2, 6, 18, 2, 3, 9, 3, 3, 2, 2, 1, 1, + CSINN_DTYPE_FLOAT16); + + verify_avgpool2d(avgpool3x3s1_p1_fp32_in, avgpool3x3s1_p1_fp32_out, + csi_nn_rvv_avgpool3x3s1_p1_fp32, 2, 3, 10, 2, 3, 10, 3, 3, 1, 1, 1, 1, + CSINN_DTYPE_FLOAT32); + verify_avgpool2d(avgpool3x3s1_p1_fp16_in, avgpool3x3s1_p1_fp16_out, + csi_nn_rvv_avgpool3x3s1_p1_fp16, 2, 3, 10, 2, 3, 10, 3, 3, 1, 1, 1, 1, + CSINN_DTYPE_FLOAT16); + + verify_avgpool2d(global_avgpool_fp32_in, global_avgpool_fp32_out, + csi_nn_rvv_global_avgpool2d_fp32, 3, 7, 7, 3, 1, 1, 7, 7, 1, 1, 0, 0, + CSINN_DTYPE_FLOAT32); + verify_avgpool2d(global_avgpool_fp16_in, global_avgpool_fp16_out, + csi_nn_rvv_global_avgpool2d_fp16, 3, 7, 7, 3, 1, 1, 7, 7, 1, 1, 0, 0, + CSINN_DTYPE_FLOAT16); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/unit_test/concat.c b/tests/unit_test/concat.c new file mode 100644 index 00000000..8b9067f6 --- /dev/null +++ b/tests/unit_test/concat.c @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.13.x */ + +#include "./valid_data/concat.dat" +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "math_snr.h" +#include "test_utils.h" + +void verify_concat(void *input0_data, void *input1_data, void *ref_data, int (*func)(), int in_c, + int in_h, int in_w, int axis, enum csinn_dtype_enum dtype) +{ + struct csi_tensor *input[2]; + + input[0] = csi_alloc_tensor(NULL); + input[0]->dim[0] = 1; + input[0]->dim[1] = in_c; + input[0]->dim[2] = in_h; + input[0]->dim[3] = in_w; + input[0]->dim_count = 4; + input[0]->name = "input0"; + + input[1] = csi_alloc_tensor(NULL); + input[1]->dim[0] = 1; + input[1]->dim[1] = in_c; + input[1]->dim[2] = in_h; + input[1]->dim[3] = in_w; + input[1]->dim_count = 4; + input[1]->name = "input1"; + + struct csi_tensor *output = csi_alloc_tensor(NULL); + output->dim[0] = 1; + output->dim[1] = in_c; + output->dim[2] = 2 * in_h; + output->dim[3] = in_w; + output->dim_count = 4; + output->name = "output"; + int out_size = csi_tensor_size(output); + + struct concat_params params; + params.base.name = "params"; + params.axis = axis; + params.inputs_count = 2; + + input[0]->data = input0_data; + input[1]->data = input1_data; + output->data = csi_mem_alloc(out_size * sizeof(float)); + + func((struct csi_tensor **)input, output, ¶ms); + + evaluate_error(output->data, ref_data, out_size, dtype); + + csi_free_tensor(input[0]); + csi_free_tensor(input[1]); + csi_mem_free(output->data); + csi_free_tensor(output); +} + +int main(int argc, char **argv) +{ + init_testsuite("Test function of concat for RVV.\n"); + verify_concat(concat_fp32_in0, concat_fp32_in1, concat_fp32_out, csi_nn_rvv_concat_fp32, 2, 3, + 10, 2, CSINN_DTYPE_FLOAT32); + verify_concat(concat_fp16_in0, concat_fp16_in1, concat_fp16_out, csi_nn_rvv_concat_fp16, 2, 3, + 10, 2, CSINN_DTYPE_FLOAT16); + // verify_concat(concat_int8_in0, concat_int8_in1, concat_int8_out, csi_nn_rvv_concat_int8, 2, + // 3, 10, 2, CSINN_DTYPE_FLOAT32); + return done_testing(); +} diff --git a/tests/unit_test/conv2d_1x1s1_gemm.c b/tests/unit_test/conv2d_1x1s1_gemm.c new file mode 100644 index 00000000..b6e3151a --- /dev/null +++ b/tests/unit_test/conv2d_1x1s1_gemm.c @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.13.x */ + +#include "./valid_data/conv2d.dat" +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "math_snr.h" +#include "test_utils.h" + +void verify_conv2d_1x1s1_reorder(void *kernel_data, void *ref_kernel, void (*reorder)(), int out_ch, + int in_ch, enum csinn_dtype_enum dtype) +{ + struct csi_tensor *kernel = csi_alloc_tensor(NULL); + kernel->dim[0] = out_ch; + kernel->dim[1] = in_ch; + kernel->dim[2] = 1; + kernel->dim[3] = 1; + kernel->dim_count = 4; + kernel->name = "kernel"; + int kernel_size = csi_tensor_size(kernel); + + struct conv2d_params params; + params.base.name = "params"; + params.stride_height = 1; + params.stride_width = 1; + params.pad_left = 0; + params.pad_right = 0; + params.pad_top = 0; + params.pad_down = 0; + params.group = 1; + + kernel->data = kernel_data; + + reorder(kernel, ¶ms); + evaluate_error(kernel->data, ref_kernel, kernel_size, dtype); + + csi_free_tensor(kernel); +} + +void verify_conv2d_1x1s1_compute(void *input_data, void *kernel_data, void *bias_data, + void *ref_data, int (*compute)(), int out_c, int in_c, int in_h, + int in_w, enum csinn_dtype_enum dtype) + +{ + struct csi_tensor *input = csi_alloc_tensor(NULL); + input->dim[0] = 1; + input->dim[1] = in_c; + input->dim[2] = in_h; + input->dim[3] = in_w; + input->dim_count = 4; + input->name = "input"; + int in_size = csi_tensor_size(input); + + struct csi_tensor *kernel = csi_alloc_tensor(NULL); + kernel->dim[0] = out_c; + kernel->dim[1] = in_c; + kernel->dim[2] = 1; + kernel->dim[3] = 1; + kernel->dim_count = 4; + kernel->name = "kernel"; + + struct csi_tensor *bias = csi_alloc_tensor(NULL); + bias->dim[0] = out_c; + bias->dim_count = 1; + bias->name = "bias"; + + struct csi_tensor *output = csi_alloc_tensor(NULL); + output->dim[0] = 1; + output->dim[1] = out_c; + output->dim[2] = in_h; + output->dim[3] = in_w; + output->dim_count = 4; + output->name = "output"; + int out_size = csi_tensor_size(output); + + struct conv2d_params params; + params.base.name = "params"; + params.stride_height = 1; + params.stride_width = 1; + params.pad_left = 0; + params.pad_right = 0; + params.pad_top = 0; + params.pad_down = 0; + params.group = 1; + + input->data = input_data; + kernel->data = kernel_data; + bias->data = bias_data; + output->data = csi_mem_alloc(out_size * sizeof(float)); + + compute(input, output, kernel, bias, ¶ms); + evaluate_error(output->data, ref_data, out_size, dtype); + + csi_free_tensor(input); + csi_mem_free(output->data); + csi_free_tensor(output); + csi_free_tensor(kernel); + csi_free_tensor(bias); +} + +int main(int argc, char **argv) +{ + init_testsuite("Test function of convolution 1x1s1 for RVV.\n"); + + verify_conv2d_1x1s1_reorder(conv2d1x1s1_fp32_ker, conv2d1x1s1_fp32_ker1, + csi_nn_rvv_conv1x1s1_gemm_transform_kernel_fp32, 19, 16, + CSINN_DTYPE_FLOAT32); + verify_conv2d_1x1s1_compute(conv2d1x1s1_fp32_in, conv2d1x1s1_fp32_ker1, conv2d1x1s1_fp32_bias, + conv2d1x1s1_fp32_out, csi_nn_rvv_conv1x1s1_gemm_fp32, 19, 16, 4, 5, + CSINN_DTYPE_FLOAT32); + + verify_conv2d_1x1s1_reorder(conv2d1x1s1_fp16_ker, conv2d1x1s1_fp16_ker1, + csi_nn_rvv_conv1x1s1_gemm_transform_kernel_fp16, 19, 16, + CSINN_DTYPE_FLOAT16); + verify_conv2d_1x1s1_compute(conv2d1x1s1_fp16_in, conv2d1x1s1_fp16_ker1, conv2d1x1s1_fp16_bias, + conv2d1x1s1_fp16_out, csi_nn_rvv_conv1x1s1_gemm_fp16, 19, 16, 4, 5, + CSINN_DTYPE_FLOAT16); + + return done_testing(); +} diff --git a/tests/unit_test/conv2d_im2col_gemm.c b/tests/unit_test/conv2d_im2col_gemm.c new file mode 100644 index 00000000..f837fa89 --- /dev/null +++ b/tests/unit_test/conv2d_im2col_gemm.c @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.13.x */ + +#include "./valid_data/conv2d.dat" +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "math_snr.h" +#include "test_utils.h" + +void verify_conv2d_im2col_reorder(void *kernel_data, void *ref_kernel, void (*reorder)(), + int out_ch, int in_ch, int k_h, int k_w, + enum csinn_dtype_enum dtype) +{ + struct csi_tensor *kernel = csi_alloc_tensor(NULL); + kernel->dim[0] = out_ch; + kernel->dim[1] = in_ch; + kernel->dim[2] = k_h; + kernel->dim[3] = k_w; + kernel->dim_count = 4; + kernel->name = "kernel"; + int kernel_size = csi_tensor_size(kernel); + + struct conv2d_params params; + params.base.name = "params"; + params.stride_height = 1; + params.stride_width = 1; + params.pad_left = 1; + params.pad_right = 1; + params.pad_top = 1; + params.pad_down = 1; + params.group = 1; + + kernel->data = kernel_data; + + reorder(kernel, ¶ms); + evaluate_error(kernel->data, ref_kernel, kernel_size, dtype); + + csi_free_tensor(kernel); +} + +void verify_conv2d_im2col_compute(void *input_data, void *kernel_data, void *bias_data, + void *ref_data, int (*compute)(), int in_c, int in_h, int in_w, + int out_c, int out_h, int out_w, int k_h, int k_w, + enum csinn_dtype_enum dtype) +{ + struct csi_tensor *input = csi_alloc_tensor(NULL); + input->dim[0] = 1; + input->dim[1] = in_c; + input->dim[2] = in_h; + input->dim[3] = in_w; + input->dim_count = 4; + input->name = "input"; + int in_size = csi_tensor_size(input); + + struct csi_tensor *kernel = csi_alloc_tensor(NULL); + kernel->dim[0] = out_c; + kernel->dim[1] = in_c; + kernel->dim[2] = k_h; + kernel->dim[3] = k_w; + kernel->dim_count = 4; + kernel->name = "kernel"; + + struct csi_tensor *bias = csi_alloc_tensor(NULL); + bias->dim[0] = out_c; + bias->dim_count = 1; + bias->name = "bias"; + + struct csi_tensor *output = csi_alloc_tensor(NULL); + output->dim[0] = 1; + output->dim[1] = out_c; + output->dim[2] = out_h; + output->dim[3] = out_w; + output->dim_count = 4; + output->name = "output"; + int out_size = csi_tensor_size(output); + + struct conv2d_params params; + params.base.name = "params"; + params.stride_height = 1; + params.stride_width = 1; + params.pad_left = 1; + params.pad_right = 1; + params.pad_top = 1; + params.pad_down = 1; + params.group = 1; + + input->data = input_data; + kernel->data = kernel_data; + bias->data = bias_data; + output->data = csi_mem_alloc(out_size * sizeof(float)); + + compute(input, output, kernel, bias, ¶ms); + evaluate_error(output->data, ref_data, out_size, dtype); + + csi_free_tensor(input); + csi_mem_free(output->data); + csi_free_tensor(output); + csi_free_tensor(kernel); + csi_free_tensor(bias); +} + +int main(int argc, char **argv) +{ + init_testsuite("Test function of convolution im2col_gemm for RVV.\n"); + + verify_conv2d_im2col_reorder(conv2d_im2col_fp32_ker, conv2d_im2col_fp32_ker1, + csi_nn_rvv_conv_im2col_sgemm_transform_kernel_fp32, 19, 3, 3, 3, + CSINN_DTYPE_FLOAT32); + verify_conv2d_im2col_compute(conv2d_im2col_fp32_in, conv2d_im2col_fp32_ker1, + conv2d_im2col_fp32_bias, conv2d_im2col_fp32_out, + csi_nn_rvv_conv_im2col_gemm_fp32, 3, 4, 5, 19, 4, 5, 3, 3, + CSINN_DTYPE_FLOAT32); + + verify_conv2d_im2col_reorder(conv2d_im2col_fp16_ker, conv2d_im2col_fp16_ker1, + csi_nn_rvv_conv_im2col_sgemm_transform_kernel_fp16, 19, 3, 3, 3, + CSINN_DTYPE_FLOAT16); + verify_conv2d_im2col_compute(conv2d_im2col_fp16_in, conv2d_im2col_fp16_ker1, + conv2d_im2col_fp16_bias, conv2d_im2col_fp16_out, + csi_nn_rvv_conv_im2col_gemm_fp16, 3, 4, 5, 19, 4, 5, 3, 3, + CSINN_DTYPE_FLOAT16); + + return done_testing(); +} diff --git a/tests/unit_test/conv2d_winograd.c b/tests/unit_test/conv2d_winograd.c new file mode 100644 index 00000000..bc0db0e5 --- /dev/null +++ b/tests/unit_test/conv2d_winograd.c @@ -0,0 +1,147 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.13.x */ + +#include "./valid_data/conv2d.dat" +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "math_snr.h" +#include "test_utils.h" + +void verify_conv2d_winograd3x3s1_trans(void *kernel_data, void *ref_kernel, void (*reorder)(), + int out_ch, int in_ch, int k_h, int k_w, + enum csinn_dtype_enum dtype) +{ + struct csi_tensor *kernel = csi_alloc_tensor(NULL); + kernel->dim[0] = out_ch; + kernel->dim[1] = in_ch; + kernel->dim[2] = k_h; + kernel->dim[3] = k_w; + kernel->dim_count = 4; + kernel->name = "kernel"; + int kernel_size = csi_tensor_size(kernel); + + struct csi_tensor *t_kernel = csi_alloc_tensor(NULL); + + struct conv2d_params params; + params.base.name = "params"; + params.stride_height = 1; + params.stride_width = 1; + params.pad_left = 1; + params.pad_right = 1; + params.pad_top = 1; + params.pad_down = 1; + params.group = 1; + + kernel->data = kernel_data; + int ker_out_size = out_ch * in_ch * 8 * 8; // b6f3 + + reorder(kernel, t_kernel); + evaluate_error(t_kernel->data, ref_kernel, ker_out_size, dtype); + + csi_free_tensor(kernel); + csi_free_tensor(t_kernel); +} + +void verify_conv2d_winograd3x3s1_compute(void *input_data, void *kernel_data, void *bias_data, + void *ref_data, int (*compute)(), int in_c, int in_h, + int in_w, int out_c, int out_h, int out_w, int k_h, + int k_w, enum csinn_dtype_enum dtype) +{ + struct csi_tensor *input = csi_alloc_tensor(NULL); + input->dim[0] = 1; + input->dim[1] = in_c; + input->dim[2] = in_h; + input->dim[3] = in_w; + input->dim_count = 4; + input->name = "input"; + int in_size = csi_tensor_size(input); + + struct csi_tensor *kernel = csi_alloc_tensor(NULL); + kernel->dim[0] = out_c; + kernel->dim[1] = in_c; + kernel->dim[2] = k_h; + kernel->dim[3] = k_w; + kernel->dim_count = 4; + kernel->name = "kernel"; + int ker_out_size = out_c * in_c * 8 * 8; // b6f3 + + struct csi_tensor *bias = csi_alloc_tensor(NULL); + bias->dim[0] = out_c; + bias->dim_count = 1; + bias->name = "bias"; + + struct csi_tensor *output = csi_alloc_tensor(NULL); + output->dim[0] = 1; + output->dim[1] = out_c; + output->dim[2] = out_h; + output->dim[3] = out_w; + output->dim_count = 4; + output->name = "output"; + int out_size = csi_tensor_size(output); + + struct conv2d_params params; + params.base.name = "params"; + params.stride_height = 1; + params.stride_width = 1; + params.pad_left = 1; + params.pad_right = 1; + params.pad_top = 1; + params.pad_down = 1; + params.group = 1; + params.conv_extra.kernel_tm = csi_alloc_tensor(NULL); + + input->data = input_data; + params.conv_extra.kernel_tm->data = csi_mem_alloc(ker_out_size * sizeof(float)); + memcpy(params.conv_extra.kernel_tm->data, kernel_data, ker_out_size * sizeof(float)); + bias->data = bias_data; + output->data = csi_mem_alloc(out_size * sizeof(float)); + + compute(input, output, kernel, bias, ¶ms); + evaluate_error(output->data, ref_data, out_size, dtype); + + csi_free_tensor(input); + csi_mem_free(output->data); + csi_free_tensor(output); + csi_free_tensor(kernel); + csi_free_tensor(bias); +} + +int main(int argc, char **argv) +{ + init_testsuite("Test function of convolution winograd3x3s1 for RVV.\n"); + + verify_conv2d_winograd3x3s1_trans(conv2d_winograd_fp32_ker, conv2d_winograd_fp32_ker1, + csi_nn_rvv_conv3x3s1_winograd64_transform_kernel_packn_fp32, + 16, 8, 3, 3, CSINN_DTYPE_FLOAT32); + verify_conv2d_winograd3x3s1_compute(conv2d_winograd_fp32_in, conv2d_winograd_fp32_ker1, + conv2d_winograd_fp32_bias, conv2d_winograd_fp32_out, + csi_nn_rvv_conv3x3s1_winograd64_packn_fp32, 8, 14, 14, 16, + 14, 14, 3, 3, CSINN_DTYPE_FLOAT32); + + verify_conv2d_winograd3x3s1_trans(conv2d_winograd_fp16_ker, conv2d_winograd_fp16_ker1, + csi_nn_rvv_conv3x3s1_winograd64_transform_kernel_packn_fp16, + 16, 8, 3, 3, CSINN_DTYPE_FLOAT16); + verify_conv2d_winograd3x3s1_compute(conv2d_winograd_fp16_in, conv2d_winograd_fp16_ker1, + conv2d_winograd_fp16_bias, conv2d_winograd_fp16_out, + csi_nn_rvv_conv3x3s1_winograd64_packn_fp16, 8, 14, 14, 16, + 14, 14, 3, 3, CSINN_DTYPE_FLOAT16); + + return done_testing(); +} diff --git a/tests/unit_test/dwconv2d.c b/tests/unit_test/dwconv2d.c new file mode 100644 index 00000000..d4f3cd04 --- /dev/null +++ b/tests/unit_test/dwconv2d.c @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.13.x */ + +#include "./valid_data/dwconv2d.dat" +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "math_snr.h" +#include "test_utils.h" + +void verify_dwconv2d(void *input_data, void *kernel_data, void *bias_data, void *ref_data, + int (*func)(), int in_c, int in_h, int in_w, int out_c, int out_h, int out_w, + int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, int pad_w, + enum csinn_dtype_enum dtype) +{ + struct csi_tensor *input = csi_alloc_tensor(NULL); + input->dim[0] = 1; + input->dim[1] = in_c; + input->dim[2] = in_h; + input->dim[3] = in_w; + input->dim_count = 4; + input->name = "input"; + int in_size = csi_tensor_size(input); + + struct csi_tensor *kernel = csi_alloc_tensor(NULL); + kernel->dim[0] = in_c; + kernel->dim[1] = 1; + kernel->dim[2] = kernel_h; + kernel->dim[3] = kernel_w; + kernel->dim_count = 4; + kernel->name = "kernel"; + + struct csi_tensor *bias = csi_alloc_tensor(NULL); + bias->dim[0] = in_c; + bias->dim_count = 1; + bias->name = "bias"; + + struct csi_tensor *output = csi_alloc_tensor(NULL); + output->dim[0] = input->dim[0]; + output->dim[1] = out_c; + output->dim[2] = out_h; + output->dim[3] = out_w; + output->dim_count = 4; + output->name = "output"; + int out_size = csi_tensor_size(output); + + struct conv2d_params params; + params.base.name = "params"; + params.stride_height = stride_h; + params.stride_width = stride_w; + params.pad_left = pad_w; + params.pad_right = pad_w; + params.pad_top = pad_h; + params.pad_down = pad_h; + + input->data = input_data; + kernel->data = kernel_data; + bias->data = bias_data; + output->data = csi_mem_alloc(out_size * sizeof(float)); + + func(input, output, kernel, bias, ¶ms); + + evaluate_error(output->data, ref_data, out_size, dtype); + + csi_free_tensor(input); + csi_mem_free(output->data); + csi_free_tensor(output); + csi_free_tensor(kernel); + csi_free_tensor(bias); +} + +int main(int argc, char **argv) +{ + init_testsuite("Test function of depthwise_convolution for RVV.\n"); + verify_dwconv2d(dwconv3x3s1_fp32_in, dwconv3x3s1_fp32_ker, dwconv3x3s1_fp32_bias, + dwconv3x3s1_fp32_out, csi_nn_rvv_dwconv3x3s1_fp32, 2, 4, 10, 2, 4, 10, 3, 3, 1, + 1, 1, 1, CSINN_DTYPE_FLOAT32); + verify_dwconv2d(dwconv3x3s1_fp16_in, dwconv3x3s1_fp16_ker, dwconv3x3s1_fp16_bias, + dwconv3x3s1_fp16_out, csi_nn_rvv_dwconv3x3s1_fp16, 2, 4, 10, 2, 4, 10, 3, 3, 1, + 1, 1, 1, CSINN_DTYPE_FLOAT16); + // verify_dwconv2d(dwconv3x3s1_int8_in, dwconv3x3s1_int8_ker, dwconv3x3s1_int8_bias, + // dwconv3x3s1_int8_out, csi_nn_rvv_dwconv3x3s1_int8, 2, 4, 10, 2, 4, 10, 3, 3, + // 1, 1, 1, 1, CSINN_DTYPE_INT8); + + verify_dwconv2d(dwconv3x3s2_fp32_in, dwconv3x3s2_fp32_ker, dwconv3x3s2_fp32_bias, + dwconv3x3s2_fp32_out, csi_nn_rvv_dwconv3x3s2_fp32, 2, 6, 18, 2, 3, 9, 3, 3, 2, + 2, 1, 1, CSINN_DTYPE_FLOAT32); + verify_dwconv2d(dwconv3x3s2_fp16_in, dwconv3x3s2_fp16_ker, dwconv3x3s2_fp16_bias, + dwconv3x3s2_fp16_out, csi_nn_rvv_dwconv3x3s2_fp16, 2, 6, 18, 2, 3, 9, 3, 3, 2, + 2, 1, 1, CSINN_DTYPE_FLOAT16); + // verify_dwconv2d(dwconv3x3s2_int8_in, dwconv3x3s2_int8_ker, dwconv3x3s2_int8_bias, + // dwconv3x3s2_int8_out, csi_nn_rvv_dwconv3x3s2_int8, 2, 6, 18, 2, 3, 9, 3, 3, + // 2, 2, 1, 1, CSINN_DTYPE_INT8); + + return done_testing(); +} diff --git a/tests/unit_test/fullyconnected.c b/tests/unit_test/fullyconnected.c new file mode 100644 index 00000000..581a7f72 --- /dev/null +++ b/tests/unit_test/fullyconnected.c @@ -0,0 +1,113 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.13.x */ + +#include "./valid_data/fullyconnected.dat" +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "math_snr.h" +#include "test_utils.h" + +void verify_fc_reorder(void *weight_data, void *ref_weight, void (*reorder)(), int in_nodes, + int out_nodes, enum csinn_dtype_enum dtype) +{ + struct csi_tensor *weight = csi_alloc_tensor(NULL); + weight->dim[0] = out_nodes; + weight->dim[1] = in_nodes; + weight->dim_count = 2; + weight->name = "weight"; + int weight_size = csi_tensor_size(weight); + + weight->data = weight_data; + + reorder(weight); + evaluate_error(weight->data, ref_weight, weight_size, dtype); + + csi_free_tensor(weight); +} + +void verify_fc_compute(void *input_data, void *weight_data, void *bias_data, void *ref_data, + int (*compute)(), int in_nodes, int out_nodes, enum csinn_dtype_enum dtype) +{ + struct csi_tensor *input = csi_alloc_tensor(NULL); + input->dim[0] = 1; + input->dim[1] = in_nodes; + input->dim_count = 2; + input->name = "input"; + int in_size = csi_tensor_size(input); + + struct csi_tensor *weight = csi_alloc_tensor(NULL); + weight->dim[0] = out_nodes; + weight->dim[1] = in_nodes; + weight->dim_count = 2; + weight->name = "weight"; + int weight_size = csi_tensor_size(weight); + + struct csi_tensor *bias = csi_alloc_tensor(NULL); + bias->dim[0] = out_nodes; + bias->dim_count = 1; + bias->name = "bias"; + + struct csi_tensor *output = csi_alloc_tensor(NULL); + output->dim[0] = 1; + output->dim[1] = out_nodes; + output->dim_count = 2; + output->name = "output"; + int out_size = csi_tensor_size(output); + + struct fc_params params; + params.base.name = "params"; + + input->data = input_data; + weight->data = weight_data; + bias->data = bias_data; + output->data = csi_mem_alloc(out_size * sizeof(float)); + + compute(input, output, weight, bias, ¶ms); + evaluate_error(output->data, ref_data, out_size, dtype); + + csi_free_tensor(input); + csi_mem_free(output->data); + csi_free_tensor(output); + csi_free_tensor(weight); + csi_free_tensor(bias); +} + +int main(int argc, char **argv) +{ + init_testsuite("Test function of fullyconnected for RVV.\n"); + + verify_fc_reorder(fc_fp32_weight, fc_fp32_weight_ref, csi_nn_rvv_fc_gemv_transform_weight_fp32, + 17, 31, CSINN_DTYPE_FLOAT32); + verify_fc_compute(fc_fp32_in, fc_fp32_weight_ref, fc_fp32_bias, fc_fp32_out, + csi_nn_rvv_fullyconnected_packn_fp32, 17, 31, CSINN_DTYPE_FLOAT32); + + verify_fc_reorder(fc_fp16_weight, fc_fp16_weight_ref, csi_nn_rvv_fc_gemv_transform_weight_fp16, + 17, 31, CSINN_DTYPE_FLOAT16); + verify_fc_compute(fc_fp16_in, fc_fp16_weight_ref, fc_fp16_bias, fc_fp16_out, + csi_nn_rvv_fullyconnected_packn_fp16, 17, 31, CSINN_DTYPE_FLOAT16); + + // verify_fc_reorder(fc_int8_weight, fc_int8_weight_ref, + // csi_nn_rvv_fc_gemv_transform_weight_int8, + // 17, 31, CSINN_DTYPE_INT8); + // verify_fc_compute(fc_int8_in, fc_int8_weight_ref, fc_int8_bias, fc_int8_out, + // csi_nn_rvv_fullyconnected_packn_int8, 17, 31, CSINN_DTYPE_INT8); + + return done_testing(); +} diff --git a/tests/unit_test/gemm.c b/tests/unit_test/gemm.c new file mode 100644 index 00000000..c2bc8b63 --- /dev/null +++ b/tests/unit_test/gemm.c @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.13.x */ + +#include "./valid_data/gemm.dat" +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "math_snr.h" +#include "test_utils.h" + +void verify_gemm_reorderA(void *ma_data, void *ref_ma_data, void (*reorder)(), int m, int k, + int ldx, enum csinn_dtype_enum dtype) +{ + void *out_data = csi_mem_alloc(m * k * sizeof(float)); + reorder(ma_data, out_data, m, k, ldx); + evaluate_error(out_data, ref_ma_data, m * k, dtype); + csi_mem_free(out_data); +} + +void verify_gemm_reorderB(void *mb_data, void *ref_mb_data, void (*reorder)(), int k, int n, + int ldx, enum csinn_dtype_enum dtype) +{ + void *out_data = csi_mem_alloc(k * n * sizeof(float)); + reorder(mb_data, out_data, k, n, ldx); + evaluate_error(out_data, ref_mb_data, k * n, dtype); + csi_mem_free(out_data); +} + +void verify_gemm_compute(void *ma_data, void *mb_data, void *bias_data, void *ref_data, + void (*compute)(), int m, int k, int n, int ldx, + enum csinn_dtype_enum dtype) +{ + void *out_data = csi_mem_alloc(m * n * sizeof(float)); + compute(out_data, ma_data, mb_data, m, k, n, ldx, bias_data); + evaluate_error(out_data, ref_data, m * n, dtype); + csi_mem_free(out_data); +} + +int main(int argc, char **argv) +{ + init_testsuite("Test function of gemm for RVV.\n"); + + verify_gemm_reorderA(gemm_fp32_a, gemm_fp32_a1, csi_nn_rvv_reorder_kernel_n8_fp32, 31, 16, 16, + CSINN_DTYPE_FLOAT32); + verify_gemm_reorderB(gemm_fp32_b, gemm_fp32_b1, csi_nn_rvv_reorder_input_z8_fp32, 16, 20, 20, + CSINN_DTYPE_FLOAT32); + verify_gemm_compute(gemm_fp32_a1, gemm_fp32_b1, gemm_fp32_bias, gemm_fp32_c, + csi_nn_rvv_gemm_8x8_fp32, 31, 16, 20, 20, CSINN_DTYPE_FLOAT32); + + verify_gemm_reorderA(gemm_fp16_a, gemm_fp16_a1, csi_nn_rvv_reorder_kernel_n8_fp16, 31, 16, 16, + CSINN_DTYPE_FLOAT16); + verify_gemm_reorderB(gemm_fp16_b, gemm_fp16_b1, csi_nn_rvv_reorder_input_z16_fp16, 16, 20, 20, + CSINN_DTYPE_FLOAT16); + verify_gemm_compute(gemm_fp16_a1, gemm_fp16_b1, gemm_fp16_bias, gemm_fp16_c, + csi_nn_rvv_gemm_8x16_fp16, 31, 16, 20, 20, CSINN_DTYPE_FLOAT16); + + return done_testing(); +} diff --git a/tests/unit_test/leaky_relu.c b/tests/unit_test/leaky_relu.c new file mode 100644 index 00000000..b23e746b --- /dev/null +++ b/tests/unit_test/leaky_relu.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.13.x */ + +#include "./valid_data/activation.dat" +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "math_snr.h" +#include "test_utils.h" + +void verify_leaky_relu(void *input_data, void *ref_data, int (*func)(), int in_c, int in_h, + int in_w, float alpha, enum csinn_dtype_enum dtype) +{ + struct csi_tensor *input = csi_alloc_tensor(NULL); + input->dim[0] = 1; + input->dim[1] = in_c; + input->dim[2] = in_h; + input->dim[3] = in_w; + input->dim_count = 4; + input->name = "input"; + int in_size = csi_tensor_size(input); + + struct csi_tensor *output = csi_alloc_tensor(NULL); + output->dim[0] = 1; + output->dim[1] = in_c; + output->dim[2] = in_h; + output->dim[3] = in_w; + output->dim_count = 4; + output->name = "output"; + int out_size = csi_tensor_size(output); + + struct relu_params params; + params.base.name = "params"; + params.n = alpha; + + input->data = input_data; + output->data = csi_mem_alloc(out_size * sizeof(float)); + + func(input, output, ¶ms); + + evaluate_error(output->data, ref_data, out_size, dtype); + + csi_free_tensor(input); + csi_mem_free(output->data); + csi_free_tensor(output); +} + +int main(int argc, char **argv) +{ + init_testsuite("Test function of leaky_relu for RVV.\n"); + verify_leaky_relu(leaky_relu_fp32_in, leaky_relu_fp32_out, csi_nn_rvv_leaky_relu_fp32, 2, 5, 11, + 0.2, CSINN_DTYPE_FLOAT32); + verify_leaky_relu(leaky_relu_fp16_in, leaky_relu_fp16_out, csi_nn_rvv_leaky_relu_fp16, 2, 5, 11, + 0.2, CSINN_DTYPE_FLOAT16); + // verify_leaky_relu(leaky_relu_int8_in, leaky_relu_int8_out, csi_nn_rvv_leaky_relu_int8, 2, 5, + // 11, 0.2, CSINN_DTYPE_INT8); + + return done_testing(); +} diff --git a/tests/unit_test/maxpool.c b/tests/unit_test/maxpool.c new file mode 100644 index 00000000..e4da7765 --- /dev/null +++ b/tests/unit_test/maxpool.c @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.13.x */ + +#include "./valid_data/maxpool.dat" +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "math_snr.h" +#include "test_utils.h" + +void verify_maxpool2d(void *input_data, void *ref_data, int (*func)(), int in_c, int in_h, int in_w, + int out_c, int out_h, int out_w, int kernel_h, int kernel_w, int stride_h, + int stride_w, int pad_h, int pad_w, enum csinn_dtype_enum dtype) +{ + struct csi_tensor *input = csi_alloc_tensor(NULL); + input->dim[0] = 1; + input->dim[1] = in_c; + input->dim[2] = in_h; + input->dim[3] = in_w; + input->dim_count = 4; + input->name = "input"; + int in_size = csi_tensor_size(input); + + struct csi_tensor *output = csi_alloc_tensor(NULL); + output->dim[0] = input->dim[0]; + output->dim[1] = out_c; + output->dim[2] = out_h; + output->dim[3] = out_w; + output->dim_count = 4; + output->name = "output"; + int out_size = csi_tensor_size(output); + + struct pool_params params; + params.base.name = "params"; + params.ceil_mode = 0; + params.stride_height = stride_h; + params.stride_width = stride_w; + params.filter_height = kernel_h; + params.filter_width = kernel_w; + params.pad_left = pad_w; + params.pad_right = pad_w; + params.pad_top = pad_h; + params.pad_down = pad_h; + + input->data = input_data; + output->data = csi_mem_alloc(out_size * sizeof(float)); + + func(input, output, ¶ms); + + evaluate_error(output->data, ref_data, out_size, dtype); + + csi_free_tensor(input); + csi_mem_free(output->data); + csi_free_tensor(output); +} + +int main(int argc, char **argv) +{ + init_testsuite("Test function of maxpool for RVV.\n"); + verify_maxpool2d(maxpool2x2s2_fp32_in, maxpool2x2s2_fp32_out, csi_nn_rvv_maxpool2x2s2_fp32, 2, + 6, 18, 2, 3, 9, 2, 2, 2, 2, 0, 0, CSINN_DTYPE_FLOAT32); + verify_maxpool2d(maxpool2x2s2_fp16_in, maxpool2x2s2_fp16_out, csi_nn_rvv_maxpool2x2s2_fp16, 2, + 6, 18, 2, 3, 9, 2, 2, 2, 2, 0, 0, CSINN_DTYPE_FLOAT16); + verify_maxpool2d(maxpool2x2s2_int8_in, maxpool2x2s2_int8_out, csi_nn_rvv_maxpool2x2s2_int8, 2, + 6, 18, 2, 3, 9, 2, 2, 2, 2, 0, 0, CSINN_DTYPE_INT8); + + verify_maxpool2d(maxpool2x2s2_p1_fp32_in, maxpool2x2s2_p1_fp32_out, + csi_nn_rvv_maxpool2x2s2_p1_fp32, 2, 7, 19, 2, 4, 10, 2, 2, 2, 2, 1, 1, + CSINN_DTYPE_FLOAT32); + verify_maxpool2d(maxpool2x2s2_p1_fp16_in, maxpool2x2s2_p1_fp16_out, + csi_nn_rvv_maxpool2x2s2_p1_fp16, 2, 7, 19, 2, 4, 10, 2, 2, 2, 2, 1, 1, + CSINN_DTYPE_FLOAT16); + verify_maxpool2d(maxpool2x2s2_p1_int8_in, maxpool2x2s2_p1_int8_out, + csi_nn_rvv_maxpool2x2s2_p1_int8, 2, 7, 19, 2, 4, 10, 2, 2, 2, 2, 1, 1, + CSINN_DTYPE_INT8); + + verify_maxpool2d(maxpool3x3s2_fp32_in, maxpool3x3s2_fp32_out, csi_nn_rvv_maxpool3x3s2_fp32, 2, + 7, 19, 2, 3, 9, 3, 3, 2, 2, 0, 0, CSINN_DTYPE_FLOAT32); + verify_maxpool2d(maxpool3x3s2_fp16_in, maxpool3x3s2_fp16_out, csi_nn_rvv_maxpool3x3s2_fp16, 2, + 7, 19, 2, 3, 9, 3, 3, 2, 2, 0, 0, CSINN_DTYPE_FLOAT16); + verify_maxpool2d(maxpool3x3s2_int8_in, maxpool3x3s2_int8_out, csi_nn_rvv_maxpool3x3s2_int8, 2, + 7, 19, 2, 3, 9, 3, 3, 2, 2, 0, 0, CSINN_DTYPE_INT8); + + verify_maxpool2d(maxpool3x3s2_p1_fp32_in, maxpool3x3s2_p1_fp32_out, + csi_nn_rvv_maxpool3x3s2_p1_fp32, 2, 6, 18, 2, 3, 9, 3, 3, 2, 2, 1, 1, + CSINN_DTYPE_FLOAT32); + verify_maxpool2d(maxpool3x3s2_p1_fp16_in, maxpool3x3s2_p1_fp16_out, + csi_nn_rvv_maxpool3x3s2_p1_fp16, 2, 6, 18, 2, 3, 9, 3, 3, 2, 2, 1, 1, + CSINN_DTYPE_FLOAT16); + verify_maxpool2d(maxpool3x3s2_p1_int8_in, maxpool3x3s2_p1_int8_out, + csi_nn_rvv_maxpool3x3s2_p1_int8, 2, 6, 18, 2, 3, 9, 3, 3, 2, 2, 1, 1, + CSINN_DTYPE_INT8); + + verify_maxpool2d(maxpool3x3s1_p1_fp32_in, maxpool3x3s1_p1_fp32_out, + csi_nn_rvv_maxpool3x3s1_p1_fp32, 2, 3, 10, 2, 3, 10, 3, 3, 1, 1, 1, 1, + CSINN_DTYPE_FLOAT32); + verify_maxpool2d(maxpool3x3s1_p1_fp16_in, maxpool3x3s1_p1_fp16_out, + csi_nn_rvv_maxpool3x3s1_p1_fp16, 2, 3, 10, 2, 3, 10, 3, 3, 1, 1, 1, 1, + CSINN_DTYPE_FLOAT16); + verify_maxpool2d(maxpool3x3s1_p1_int8_in, maxpool3x3s1_p1_int8_out, + csi_nn_rvv_maxpool3x3s1_p1_int8, 2, 3, 10, 2, 3, 10, 3, 3, 1, 1, 1, 1, + CSINN_DTYPE_INT8); + + verify_maxpool2d(global_maxpool_fp32_in, global_maxpool_fp32_out, + csi_nn_rvv_global_maxpool2d_fp32, 3, 7, 7, 3, 1, 1, 7, 7, 1, 1, 0, 0, + CSINN_DTYPE_FLOAT32); + verify_maxpool2d(global_maxpool_fp16_in, global_maxpool_fp16_out, + csi_nn_rvv_global_maxpool2d_fp16, 3, 7, 7, 3, 1, 1, 7, 7, 1, 1, 0, 0, + CSINN_DTYPE_FLOAT16); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/unit_test/mul.c b/tests/unit_test/mul.c new file mode 100644 index 00000000..f8f5f89e --- /dev/null +++ b/tests/unit_test/mul.c @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.13.x */ + +#include "./valid_data/basic_math.dat" +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "math_snr.h" +#include "test_utils.h" + +void verify_mul(void *input0_data, void *input1_data, void *ref_data, int (*func)(), int in_c, + int in_h, int in_w, enum csinn_dtype_enum dtype) +{ + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + input0->dim[0] = 1; + input0->dim[1] = in_c; + input0->dim[2] = in_h; + input0->dim[3] = in_w; + input0->dim_count = 4; + input0->name = "input0"; + int in0_size = csi_tensor_size(input0); + + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + input1->dim[0] = 1; + input1->dim[1] = in_c; + input1->dim[2] = in_h; + input1->dim[3] = in_w; + input1->dim_count = 4; + input1->name = "input1"; + int in1_size = csi_tensor_size(input1); + + struct csi_tensor *output = csi_alloc_tensor(NULL); + output->dim[0] = 1; + output->dim[1] = in_c; + output->dim[2] = in_h; + output->dim[3] = in_w; + output->dim_count = 4; + output->name = "output"; + int out_size = csi_tensor_size(output); + + struct diso_params params; + params.base.name = "params"; + + input0->data = input0_data; + input1->data = input1_data; + output->data = csi_mem_alloc(out_size * sizeof(float)); + + func(input0, input1, output, ¶ms); + + evaluate_error(output->data, ref_data, out_size, dtype); + + csi_free_tensor(input0); + csi_free_tensor(input1); + csi_mem_free(output->data); + csi_free_tensor(output); +} + +int main(int argc, char **argv) +{ + init_testsuite("Test function of mul for RVV.\n"); + // verify_mul(mul_fp32_in0, mul_fp32_in1, mul_fp32_out, csi_nn_rvv_mul_fp32, 2, 5, 11, + // CSINN_DTYPE_FLOAT32); + // verify_mul(mul_fp16_in0, mul_fp16_in1, mul_fp16_out, csi_nn_rvv_mul_fp16, 2, 5, 11, + // CSINN_DTYPE_FLOAT16); + // verify_mul(mul_int8_in0, mul_int8_in1, mul_int8_out, csi_nn_rvv_mul_int8, 2, 5, 11, + // CSINN_DTYPE_INT8); + + return done_testing(); +} diff --git a/tests/unit_test/pad.c b/tests/unit_test/pad.c new file mode 100644 index 00000000..8861dbdd --- /dev/null +++ b/tests/unit_test/pad.c @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.13.x */ + +#include "./valid_data/pad.dat" +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "math_snr.h" +#include "test_utils.h" + +void verify_pad(void *input_data, void *ref_data, void (*func)(), int in_c, int in_h, int in_w, + int pad_top, int pad_left, int pad_down, int pad_right, enum csinn_dtype_enum dtype) +{ + int padded_h = in_h + pad_top + pad_down; + int padded_w = in_w + pad_left + pad_right; + int out_size = in_c * padded_h * padded_w; + + float *out = csi_mem_alloc(out_size * sizeof(float)); + + if (dtype == CSINN_DTYPE_INT8) { + func(input_data, out, in_c, in_h, in_w, padded_h, padded_w, pad_top, pad_left, (int8_t)0); + } else { + func(input_data, out, in_c, in_h, in_w, padded_h, padded_w, pad_top, pad_left); + } + + evaluate_error(out, ref_data, out_size, dtype); + + csi_mem_free(out); +} + +int main(int argc, char **argv) +{ + init_testsuite("Test function of pad for RVV.\n"); + verify_pad(pad_fp32_in, pad_fp32_out, csi_nn_rvv_pad_input_fp32, 3, 4, 19, 1, 1, 1, 1, + CSINN_DTYPE_FLOAT32); + verify_pad(pad_fp16_in, pad_fp16_out, csi_nn_rvv_pad_input_fp16, 3, 4, 19, 1, 1, 1, 1, + CSINN_DTYPE_FLOAT16); + verify_pad(pad_int8_in, pad_int8_out, csi_nn_rvv_pad_input_int8, 3, 4, 19, 1, 1, 1, 1, + CSINN_DTYPE_INT8); + + return done_testing(); +} diff --git a/tests/unit_test/relu.c b/tests/unit_test/relu.c new file mode 100644 index 00000000..332a4aa6 --- /dev/null +++ b/tests/unit_test/relu.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.13.x */ + +#include "./valid_data/activation.dat" +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "math_snr.h" +#include "test_utils.h" + +void verify_relu(void *input_data, void *ref_data, int (*func)(), int in_c, int in_h, int in_w, + enum csinn_dtype_enum dtype) +{ + struct csi_tensor *input = csi_alloc_tensor(NULL); + input->dim[0] = 1; + input->dim[1] = in_c; + input->dim[2] = in_h; + input->dim[3] = in_w; + input->dim_count = 4; + input->name = "input"; + int in_size = csi_tensor_size(input); + + struct csi_tensor *output = csi_alloc_tensor(NULL); + output->dim[0] = 1; + output->dim[1] = in_c; + output->dim[2] = in_h; + output->dim[3] = in_w; + output->dim_count = 4; + output->name = "output"; + int out_size = csi_tensor_size(output); + + struct relu_params params; + params.base.name = "params"; + + input->data = input_data; + output->data = csi_mem_alloc(out_size * sizeof(float)); + + func(input, output, ¶ms); + + evaluate_error(output->data, ref_data, out_size, dtype); + + csi_free_tensor(input); + csi_mem_free(output->data); + csi_free_tensor(output); +} + +int main(int argc, char **argv) +{ + init_testsuite("Test function of relu for RVV.\n"); + verify_relu(relu_fp32_in, relu_fp32_out, csi_nn_rvv_relu_fp32, 2, 5, 11, CSINN_DTYPE_FLOAT32); + verify_relu(relu_fp16_in, relu_fp16_out, csi_nn_rvv_relu_fp16, 2, 5, 11, CSINN_DTYPE_FLOAT16); + // verify_relu(relu_int8_in, relu_int8_out, csi_nn_rvv_relu_int8, 2, 5, 11, CSINN_DTYPE_INT8); + + return done_testing(); +} diff --git a/tests/unit_test/valid_data/activation.dat b/tests/unit_test/valid_data/activation.dat new file mode 100644 index 00000000..f79ddc48 --- /dev/null +++ b/tests/unit_test/valid_data/activation.dat @@ -0,0 +1,188 @@ +/************************************************************************** + * NCHW layout + **************************************************************************/ +// [c h w]: [2 5 11] +// [c h w]: [2 5 11] +unsigned char relu_fp32_in[] = { + 0x60, 0x81, 0x39, 0x41, 0xc2, 0x83, 0xb1, 0x41, 0x54, 0x0c, 0x79, 0xc0, 0xa9, 0x66, 0x30, 0x41, + 0x0b, 0xfb, 0x73, 0x40, 0x5f, 0x38, 0xdd, 0x40, 0xee, 0x0f, 0x53, 0x41, 0x7f, 0xf0, 0x23, 0x41, + 0x6b, 0xc2, 0xce, 0xbd, 0xff, 0x26, 0x06, 0x3d, 0x53, 0x83, 0xa1, 0xbf, 0xf8, 0x17, 0xc8, 0x41, + 0x90, 0x36, 0xa2, 0xc1, 0xcc, 0x5e, 0xcd, 0x40, 0x6f, 0x79, 0xa3, 0xc0, 0x3a, 0x64, 0x9d, 0x40, + 0x0d, 0x77, 0x87, 0xc1, 0x59, 0x64, 0x36, 0xc1, 0xfb, 0x6a, 0xfc, 0x40, 0x0e, 0xa0, 0xf5, 0x3f, + 0xe2, 0x45, 0xcc, 0xc1, 0xa8, 0x47, 0x96, 0xc1, 0xaa, 0x50, 0xe2, 0xc0, 0x54, 0x1c, 0xac, 0x41, + 0x17, 0x2d, 0xf7, 0xc1, 0xd2, 0xea, 0xdf, 0xc1, 0x61, 0xdb, 0x09, 0xc2, 0xd9, 0x51, 0xde, 0x3f, + 0xcb, 0x6c, 0xe1, 0xc1, 0x41, 0x45, 0x71, 0xc1, 0xb6, 0x68, 0xd5, 0x3f, 0xc3, 0xaa, 0x9e, 0x41, + 0xb3, 0xd8, 0xcd, 0xc0, 0x46, 0x0f, 0x5a, 0x41, 0xbf, 0x60, 0x86, 0xc0, 0xde, 0x14, 0xe2, 0xc0, + 0xb2, 0x64, 0x94, 0x40, 0x5e, 0x25, 0x28, 0xc1, 0x97, 0x5e, 0x76, 0xc1, 0x3d, 0x2b, 0x6f, 0xc1, + 0x27, 0xa7, 0x0a, 0xc2, 0xd4, 0xd9, 0x5e, 0xc1, 0x28, 0xab, 0x8f, 0xc1, 0x06, 0xa4, 0x53, 0x41, + 0x46, 0x35, 0xe6, 0x3f, 0x34, 0xb4, 0x1d, 0xc0, 0xf7, 0x8a, 0x16, 0xc2, 0xac, 0x4e, 0x33, 0x41, + 0x78, 0xb7, 0x8a, 0x41, 0xc7, 0x8e, 0x7e, 0x41, 0xe1, 0x1e, 0xb7, 0xc0, 0x13, 0xd0, 0xdf, 0x41, + 0x59, 0x9a, 0xba, 0x41, 0x64, 0xec, 0x8d, 0xc1, 0x7e, 0x3f, 0x17, 0xc1, 0x76, 0x56, 0x9e, 0x40, + 0x9f, 0xda, 0x85, 0xc0, 0xc8, 0x03, 0x54, 0xc1, 0x70, 0x31, 0x14, 0xc1, 0xe6, 0xd8, 0x4c, 0x41, + 0x0e, 0x79, 0x9b, 0xc1, 0xfa, 0x3d, 0xcf, 0xc1, 0xb2, 0xd0, 0x13, 0xc1, 0x97, 0x51, 0x0c, 0xc2, + 0xd1, 0x6b, 0x1e, 0x41, 0xf1, 0x08, 0xc2, 0xc1, 0xe0, 0x0b, 0x83, 0x41, 0x15, 0x7a, 0xbf, 0xc1, + 0x09, 0xb5, 0x2c, 0x40, 0x05, 0x87, 0xba, 0xc0, 0xf5, 0xed, 0x52, 0x41, 0x07, 0xf9, 0xf2, 0xc1, + 0xfa, 0x2a, 0x04, 0xc2, 0xc7, 0xb2, 0xb3, 0xc1, 0x80, 0x0a, 0x7d, 0xc1, 0x4b, 0x7d, 0x0f, 0xc1, + 0xaf, 0x61, 0x83, 0xc1, 0xde, 0x28, 0x07, 0xc1, 0x7b, 0xe6, 0xbf, 0xc1, 0xf4, 0xbf, 0xc4, 0xc0, + 0x18, 0x6b, 0xe7, 0x41, 0x81, 0x6b, 0x9b, 0xc1, 0x50, 0x7b, 0xa3, 0xbe, 0xd6, 0x83, 0x20, 0xbf, + 0xcb, 0xa6, 0x9e, 0xc0, 0xd0, 0x01, 0xef, 0xbf, 0x6b, 0x71, 0x72, 0xc1, 0x00, 0xa3, 0x1d, 0x41, + 0xf7, 0x60, 0x44, 0xc1, 0x8d, 0xb6, 0x33, 0xc1, 0x8a, 0xa2, 0x22, 0xc2, 0x0f, 0xb5, 0x65, 0xc0, + 0xbe, 0x70, 0xcb, 0x40, 0xc8, 0xe5, 0x52, 0xc1, 0xe1, 0xe7, 0x21, 0xc2, 0xfa, 0xd8, 0x04, 0x41, + 0xd4, 0x8a, 0x8f, 0xc1, 0xbf, 0x9e, 0xa0, 0xc1, 0xe1, 0xd6, 0x4a, 0xc1, 0xed, 0xad, 0x1c, 0xc2, + 0xa5, 0x8c, 0xc4, 0xc1, 0xfc, 0x8e, 0x09, 0x3f, 0x54, 0xbf, 0xc1, 0x40, 0x0a, 0x8e, 0x47, 0x40, + 0x83, 0xfa, 0x1f, 0x40, 0xbf, 0x46, 0xf2, 0x41, 0xc0, 0x90, 0x07, 0xc2, 0x95, 0x66, 0x8d, 0xc1, + 0xc0, 0x86, 0x04, 0xc2, 0xbd, 0x34, 0x43, 0x40}; +unsigned char relu_fp32_out[] = { + 0x60, 0x81, 0x39, 0x41, 0xc2, 0x83, 0xb1, 0x41, 0x00, 0x00, 0x00, 0x00, 0xa9, 0x66, 0x30, 0x41, + 0x0b, 0xfb, 0x73, 0x40, 0x5f, 0x38, 0xdd, 0x40, 0xee, 0x0f, 0x53, 0x41, 0x7f, 0xf0, 0x23, 0x41, + 0x00, 0x00, 0x00, 0x00, 0xff, 0x26, 0x06, 0x3d, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x17, 0xc8, 0x41, + 0x00, 0x00, 0x00, 0x00, 0xcc, 0x5e, 0xcd, 0x40, 0x00, 0x00, 0x00, 0x00, 0x3a, 0x64, 0x9d, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfb, 0x6a, 0xfc, 0x40, 0x0e, 0xa0, 0xf5, 0x3f, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x1c, 0xac, 0x41, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd9, 0x51, 0xde, 0x3f, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb6, 0x68, 0xd5, 0x3f, 0xc3, 0xaa, 0x9e, 0x41, + 0x00, 0x00, 0x00, 0x00, 0x46, 0x0f, 0x5a, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xb2, 0x64, 0x94, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0xa4, 0x53, 0x41, + 0x46, 0x35, 0xe6, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xac, 0x4e, 0x33, 0x41, + 0x78, 0xb7, 0x8a, 0x41, 0xc7, 0x8e, 0x7e, 0x41, 0x00, 0x00, 0x00, 0x00, 0x13, 0xd0, 0xdf, 0x41, + 0x59, 0x9a, 0xba, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0x56, 0x9e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xd8, 0x4c, 0x41, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xd1, 0x6b, 0x1e, 0x41, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x0b, 0x83, 0x41, 0x00, 0x00, 0x00, 0x00, + 0x09, 0xb5, 0x2c, 0x40, 0x00, 0x00, 0x00, 0x00, 0xf5, 0xed, 0x52, 0x41, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x18, 0x6b, 0xe7, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa3, 0x1d, 0x41, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xbe, 0x70, 0xcb, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0xd8, 0x04, 0x41, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xfc, 0x8e, 0x09, 0x3f, 0x54, 0xbf, 0xc1, 0x40, 0x0a, 0x8e, 0x47, 0x40, + 0x83, 0xfa, 0x1f, 0x40, 0xbf, 0x46, 0xf2, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xbd, 0x34, 0x43, 0x40}; +unsigned char relu_fp16_in[] = { + 0xcc, 0x49, 0x8c, 0x4d, 0xc8, 0xc3, 0x83, 0x49, 0x9f, 0x43, 0xe9, 0x46, 0x98, 0x4a, 0x1f, 0x49, + 0x76, 0xae, 0x31, 0x28, 0x0c, 0xbd, 0x40, 0x4e, 0x11, 0xcd, 0x6a, 0x46, 0x1b, 0xc5, 0xeb, 0x44, + 0x3b, 0xcc, 0xb3, 0xc9, 0xe3, 0x47, 0xad, 0x3f, 0x62, 0xce, 0xb2, 0xcc, 0x12, 0xc7, 0x60, 0x4d, + 0xb9, 0xcf, 0xff, 0xce, 0x4e, 0xd0, 0xf2, 0x3e, 0x0b, 0xcf, 0x8a, 0xcb, 0xab, 0x3e, 0xf5, 0x4c, + 0x6e, 0xc6, 0xd0, 0x4a, 0x33, 0xc4, 0x10, 0xc7, 0xa3, 0x44, 0x41, 0xc9, 0xb2, 0xcb, 0x79, 0xcb, + 0x55, 0xd0, 0xf6, 0xca, 0x7d, 0xcc, 0x9d, 0x4a, 0x31, 0x3f, 0xed, 0xc0, 0xb4, 0xd0, 0x9a, 0x49, + 0x55, 0x4c, 0xf4, 0x4b, 0xb8, 0xc5, 0xfe, 0x4e, 0xd4, 0x4d, 0x6f, 0xcc, 0xb9, 0xc8, 0xf2, 0x44, + 0x2e, 0xc4, 0xa0, 0xca, 0xa1, 0xc8, 0x66, 0x4a, 0xdb, 0xcc, 0x79, 0xce, 0x9e, 0xc8, 0x62, 0xd0, + 0xf3, 0x48, 0x10, 0xce, 0x18, 0x4c, 0xfb, 0xcd, 0x65, 0x41, 0xd4, 0xc5, 0x97, 0x4a, 0x97, 0xcf, + 0x21, 0xd0, 0x9d, 0xcd, 0xe8, 0xcb, 0x7b, 0xc8, 0x1b, 0xcc, 0x39, 0xc8, 0xff, 0xcd, 0x25, 0xc6, + 0x3b, 0x4f, 0xdb, 0xcc, 0x1b, 0xb5, 0x04, 0xb9, 0xf5, 0xc4, 0x78, 0xbf, 0x93, 0xcb, 0xed, 0x48, + 0x23, 0xca, 0x9d, 0xc9, 0x15, 0xd1, 0x2d, 0xc3, 0x5b, 0x46, 0x97, 0xca, 0x0f, 0xd1, 0x26, 0x48, + 0x7c, 0xcc, 0x04, 0xcd, 0x56, 0xca, 0xe5, 0xd0, 0x24, 0xce, 0x4c, 0x38, 0x0d, 0x46, 0x3c, 0x42, + 0xff, 0x40, 0x92, 0x4f, 0x3c, 0xd0, 0x6b, 0xcc, 0x24, 0xd0, 0x19, 0x42}; +unsigned char relu_fp16_out[] = { + 0xcc, 0x49, 0x8c, 0x4d, 0x00, 0x00, 0x83, 0x49, 0x9f, 0x43, 0xe9, 0x46, 0x98, 0x4a, 0x1f, 0x49, + 0x00, 0x00, 0x31, 0x28, 0x00, 0x00, 0x40, 0x4e, 0x00, 0x00, 0x6a, 0x46, 0x00, 0x00, 0xeb, 0x44, + 0x00, 0x00, 0x00, 0x00, 0xe3, 0x47, 0xad, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x4d, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf2, 0x3e, 0x00, 0x00, 0x00, 0x00, 0xab, 0x3e, 0xf5, 0x4c, + 0x00, 0x00, 0xd0, 0x4a, 0x00, 0x00, 0x00, 0x00, 0xa3, 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9d, 0x4a, 0x31, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x9a, 0x49, + 0x55, 0x4c, 0xf4, 0x4b, 0x00, 0x00, 0xfe, 0x4e, 0xd4, 0x4d, 0x00, 0x00, 0x00, 0x00, 0xf2, 0x44, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x4a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xf3, 0x48, 0x00, 0x00, 0x18, 0x4c, 0x00, 0x00, 0x65, 0x41, 0x00, 0x00, 0x97, 0x4a, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3b, 0x4f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xed, 0x48, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x46, 0x00, 0x00, 0x00, 0x00, 0x26, 0x48, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4c, 0x38, 0x0d, 0x46, 0x3c, 0x42, + 0xff, 0x40, 0x92, 0x4f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x42}; +unsigned char relu_int8_in[] = {}; +unsigned char relu_int8_out[] = {}; + +// [c h w]: [2 5 11] +// [c h w]: [2 5 11] +unsigned char leaky_relu_fp32_in[] = { + 0xa9, 0x86, 0x89, 0xc0, 0x3b, 0x68, 0xb9, 0xbf, 0x3a, 0xd9, 0x15, 0xbf, 0xb7, 0x3a, 0xab, 0x3f, + 0xa7, 0x45, 0x23, 0xc0, 0xb0, 0xff, 0x2b, 0xbf, 0xfa, 0x80, 0x39, 0x40, 0xea, 0x69, 0x9b, 0xc0, + 0x10, 0xb4, 0xa1, 0xc0, 0xaa, 0x9e, 0x19, 0x3f, 0x88, 0x57, 0x69, 0xbf, 0x38, 0xd0, 0x8c, 0xc0, + 0xd4, 0x9b, 0x8b, 0xbf, 0x99, 0x9f, 0x54, 0x3f, 0x5a, 0x00, 0x9b, 0x40, 0x87, 0x4d, 0x1a, 0x3f, + 0x6f, 0xce, 0x45, 0x40, 0x49, 0x93, 0x57, 0x3f, 0xcc, 0x0d, 0x51, 0xc0, 0x17, 0x58, 0xa0, 0x3d, + 0xb7, 0xd4, 0xb4, 0x3d, 0x98, 0x11, 0x57, 0x40, 0x31, 0xf2, 0xa6, 0xc0, 0xce, 0xc4, 0x3d, 0x3f, + 0x06, 0x2a, 0x2c, 0xc0, 0xe4, 0x79, 0xe8, 0xbf, 0x17, 0x5e, 0x6d, 0xc0, 0xfb, 0x59, 0x87, 0x40, + 0xb2, 0x5f, 0x97, 0x3e, 0x44, 0xf9, 0xbe, 0x40, 0xb9, 0x8d, 0xea, 0x3f, 0x38, 0xcb, 0xaa, 0x40, + 0x0c, 0x18, 0x78, 0x3f, 0x4f, 0x05, 0x8c, 0xc0, 0xda, 0xe2, 0xb1, 0x40, 0xa8, 0x41, 0xbf, 0x40, + 0x69, 0xbf, 0x9b, 0xc0, 0x79, 0x81, 0x22, 0x3f, 0x52, 0xac, 0x17, 0xc0, 0xaa, 0xbc, 0x3d, 0xc0, + 0x73, 0x15, 0x64, 0xc0, 0x31, 0x0a, 0x6a, 0xc0, 0x1e, 0x5e, 0xd7, 0x3f, 0x2e, 0xea, 0xab, 0xc0, + 0xef, 0x5d, 0xa2, 0xc0, 0x7e, 0x7b, 0x03, 0xc0, 0x6b, 0x0b, 0xa6, 0xc0, 0xa5, 0x8f, 0x6b, 0x40, + 0x49, 0x4d, 0x11, 0x40, 0x24, 0x89, 0xac, 0xc0, 0xad, 0xe5, 0xb5, 0xc0, 0xb3, 0x5e, 0xbf, 0xc0, + 0x4d, 0x27, 0x40, 0xc0, 0x2b, 0x42, 0xa5, 0xbf, 0x82, 0x1b, 0x86, 0x40, 0x16, 0x1e, 0x8b, 0xc0, + 0xde, 0x75, 0x3c, 0x40, 0x0f, 0x4b, 0x98, 0x40, 0x6c, 0xd0, 0xb9, 0x40, 0x5f, 0x7f, 0x68, 0xc0, + 0x1f, 0x66, 0x83, 0x40, 0xc2, 0x00, 0x9d, 0x40, 0x6b, 0x1e, 0x1d, 0xc0, 0x0f, 0xe1, 0x9c, 0xbf, + 0xf8, 0x35, 0x8a, 0x40, 0x1a, 0x44, 0x3d, 0x40, 0xfc, 0x21, 0x25, 0xc0, 0x4f, 0xfb, 0x9f, 0xc0, + 0x99, 0xc5, 0xe8, 0x3f, 0x2f, 0x39, 0xd2, 0xbf, 0x31, 0x31, 0xe6, 0xbe, 0xe6, 0xdf, 0x99, 0xc0, + 0x5b, 0xc2, 0x61, 0xbf, 0x60, 0x33, 0x5e, 0xc0, 0x6b, 0x24, 0x1f, 0x40, 0x1a, 0x02, 0xb3, 0x40, + 0xb3, 0x73, 0x83, 0xc0, 0x97, 0x9d, 0xbc, 0x40, 0xfc, 0xa2, 0x46, 0x40, 0x60, 0xab, 0xbb, 0x40, + 0x37, 0x6c, 0xa2, 0x3f, 0x21, 0xb6, 0x8e, 0xc0, 0x9f, 0x32, 0xa4, 0xbf, 0x54, 0xec, 0x5d, 0xc0, + 0x87, 0x54, 0xb9, 0xbe, 0x81, 0x7d, 0xb1, 0xc0, 0x69, 0x12, 0x28, 0x40, 0xfb, 0xce, 0x2b, 0x40, + 0x36, 0xa7, 0x84, 0x40, 0xf2, 0xdd, 0x9e, 0x3f, 0xb0, 0x86, 0xae, 0xbe, 0xc5, 0x3d, 0xaa, 0x40, + 0xef, 0x47, 0xae, 0xbd, 0x28, 0xf8, 0x9c, 0x40, 0xae, 0x95, 0xc7, 0xbf, 0x6a, 0x62, 0x4e, 0xc0, + 0xec, 0x55, 0xf2, 0xbf, 0x84, 0xc4, 0x99, 0x3d, 0xc0, 0x2b, 0x87, 0x3f, 0xc8, 0xb6, 0x95, 0xc0, + 0x7e, 0xe1, 0x4a, 0xc0, 0xd5, 0x51, 0x9c, 0xc0, 0x9f, 0x02, 0xbd, 0xbf, 0x2c, 0xa7, 0x1c, 0x40, + 0xea, 0x1d, 0x1d, 0xc0, 0x25, 0x5b, 0xb9, 0xc0, 0xa2, 0x03, 0x27, 0x3f, 0x20, 0x36, 0x5c, 0x40, + 0xe2, 0x03, 0xbc, 0x3f, 0x71, 0x1a, 0x48, 0x40}; +unsigned char leaky_relu_fp32_out[] = { + 0xa8, 0x0a, 0x5c, 0xbf, 0x63, 0x53, 0x94, 0xbe, 0xf7, 0xc1, 0xef, 0xbd, 0xb7, 0x3a, 0xab, 0x3f, + 0x1f, 0x9e, 0x02, 0xbf, 0x5a, 0x99, 0x09, 0xbe, 0xfa, 0x80, 0x39, 0x40, 0x77, 0xa9, 0x78, 0xbf, + 0xda, 0x5c, 0x81, 0xbf, 0xaa, 0x9e, 0x19, 0x3f, 0x6d, 0xac, 0x3a, 0xbe, 0x27, 0x4d, 0x61, 0xbf, + 0xba, 0x5f, 0x5f, 0xbe, 0x99, 0x9f, 0x54, 0x3f, 0x5a, 0x00, 0x9b, 0x40, 0x87, 0x4d, 0x1a, 0x3f, + 0x6f, 0xce, 0x45, 0x40, 0x49, 0x93, 0x57, 0x3f, 0x3d, 0x3e, 0x27, 0xbf, 0x17, 0x58, 0xa0, 0x3d, + 0xb7, 0xd4, 0xb4, 0x3d, 0x98, 0x11, 0x57, 0x40, 0x8e, 0x8e, 0x85, 0xbf, 0xce, 0xc4, 0x3d, 0x3f, + 0x38, 0xbb, 0x09, 0xbf, 0x1d, 0xfb, 0xb9, 0xbe, 0xdf, 0xe4, 0x3d, 0xbf, 0xfb, 0x59, 0x87, 0x40, + 0xb2, 0x5f, 0x97, 0x3e, 0x44, 0xf9, 0xbe, 0x40, 0xb9, 0x8d, 0xea, 0x3f, 0x38, 0xcb, 0xaa, 0x40, + 0x0c, 0x18, 0x78, 0x3f, 0x7f, 0x08, 0x60, 0xbf, 0xda, 0xe2, 0xb1, 0x40, 0xa8, 0x41, 0xbf, 0x40, + 0x42, 0x32, 0x79, 0xbf, 0x79, 0x81, 0x22, 0x3f, 0x50, 0xad, 0xf2, 0xbe, 0x22, 0xca, 0x17, 0xbf, + 0x8f, 0x77, 0x36, 0xbf, 0x5b, 0x3b, 0x3b, 0xbf, 0x1e, 0x5e, 0xd7, 0x3f, 0x25, 0x88, 0x89, 0xbf, + 0xbf, 0xe4, 0x81, 0xbf, 0x30, 0x5f, 0xd2, 0xbe, 0xef, 0xd5, 0x84, 0xbf, 0xa5, 0x8f, 0x6b, 0x40, + 0x49, 0x4d, 0x11, 0x40, 0x50, 0x07, 0x8a, 0xbf, 0x8b, 0x84, 0x91, 0xbf, 0x8f, 0x18, 0x99, 0xbf, + 0x0b, 0xb9, 0x19, 0xbf, 0xef, 0x34, 0x84, 0xbe, 0x82, 0x1b, 0x86, 0x40, 0x8a, 0x96, 0x5e, 0xbf, + 0xde, 0x75, 0x3c, 0x40, 0x0f, 0x4b, 0x98, 0x40, 0x6c, 0xd0, 0xb9, 0x40, 0x7f, 0xff, 0x39, 0xbf, + 0x1f, 0x66, 0x83, 0x40, 0xc2, 0x00, 0x9d, 0x40, 0xdf, 0x63, 0xfb, 0xbe, 0xb2, 0x01, 0x7b, 0xbe, + 0xf8, 0x35, 0x8a, 0x40, 0x1a, 0x44, 0x3d, 0x40, 0x30, 0x1b, 0x04, 0xbf, 0x7f, 0xf8, 0x7f, 0xbf, + 0x99, 0xc5, 0xe8, 0x3f, 0xbf, 0x2d, 0xa8, 0xbe, 0x5b, 0x27, 0xb8, 0xbd, 0x0a, 0x33, 0x76, 0xbf, + 0x7c, 0x9b, 0x34, 0xbe, 0xb3, 0xc2, 0x31, 0xbf, 0x6b, 0x24, 0x1f, 0x40, 0x1a, 0x02, 0xb3, 0x40, + 0xb8, 0x52, 0x52, 0xbf, 0x97, 0x9d, 0xbc, 0x40, 0xfc, 0xa2, 0x46, 0x40, 0x60, 0xab, 0xbb, 0x40, + 0x37, 0x6c, 0xa2, 0x3f, 0x9b, 0x56, 0x64, 0xbf, 0xb3, 0x5b, 0x83, 0xbe, 0xdd, 0x89, 0x31, 0xbf, + 0x9f, 0x43, 0x94, 0xbd, 0x01, 0xfe, 0x8d, 0xbf, 0x69, 0x12, 0x28, 0x40, 0xfb, 0xce, 0x2b, 0x40, + 0x36, 0xa7, 0x84, 0x40, 0xf2, 0xdd, 0x9e, 0x3f, 0xf3, 0x9e, 0x8b, 0xbd, 0xc5, 0x3d, 0xaa, 0x40, + 0xbf, 0x6c, 0x8b, 0xbc, 0x28, 0xf8, 0x9c, 0x40, 0xf2, 0xaa, 0x9f, 0xbe, 0x88, 0x1b, 0x25, 0xbf, + 0x57, 0xde, 0xc1, 0xbe, 0x84, 0xc4, 0x99, 0x3d, 0xc0, 0x2b, 0x87, 0x3f, 0xda, 0x8a, 0x6f, 0xbf, + 0xff, 0x4d, 0x22, 0xbf, 0x88, 0x1c, 0x7a, 0xbf, 0x4c, 0x35, 0x97, 0xbe, 0x2c, 0xa7, 0x1c, 0x40, + 0x10, 0x63, 0xfb, 0xbe, 0xeb, 0x48, 0x94, 0xbf, 0xa2, 0x03, 0x27, 0x3f, 0x20, 0x36, 0x5c, 0x40, + 0xe2, 0x03, 0xbc, 0x3f, 0x71, 0x1a, 0x48, 0x40}; +unsigned char leaky_relu_fp16_in[] = { + 0x4c, 0xc4, 0xcb, 0xbd, 0xae, 0xb8, 0x59, 0x3d, 0x1a, 0xc1, 0x5f, 0xb9, 0xcc, 0x41, 0xdb, 0xc4, + 0x0d, 0xc5, 0xcc, 0x38, 0x4a, 0xbb, 0x66, 0xc4, 0x5c, 0xbc, 0xa4, 0x3a, 0xd8, 0x44, 0xd2, 0x38, + 0x2e, 0x42, 0xbc, 0x3a, 0x88, 0xc2, 0x02, 0x2d, 0xa6, 0x2d, 0xb8, 0x42, 0x37, 0xc5, 0xee, 0x39, + 0x61, 0xc1, 0x43, 0xbf, 0x6a, 0xc3, 0x3a, 0x44, 0xba, 0x34, 0xf7, 0x45, 0x54, 0x3f, 0x56, 0x45, + 0xc0, 0x3b, 0x60, 0xc4, 0x8f, 0x45, 0xfa, 0x45, 0xdd, 0xc4, 0x14, 0x39, 0xbd, 0xc0, 0xed, 0xc1, + 0x20, 0xc3, 0x50, 0xc3, 0xba, 0x3e, 0x5f, 0xc5, 0x12, 0xc5, 0x1b, 0xc0, 0x30, 0xc5, 0x5c, 0x43, + 0x8a, 0x40, 0x64, 0xc5, 0xaf, 0xc5, 0xfa, 0xc5, 0x01, 0xc2, 0x2a, 0xbd, 0x30, 0x44, 0x58, 0xc4, + 0xe3, 0x41, 0xc2, 0x44, 0xce, 0x45, 0x43, 0xc3, 0x1b, 0x44, 0xe8, 0x44, 0xe8, 0xc0, 0xe7, 0xbc, + 0x51, 0x44, 0xea, 0x41, 0x29, 0xc1, 0xff, 0xc4, 0x46, 0x3f, 0x91, 0xbe, 0x31, 0xb7, 0xce, 0xc4, + 0x0e, 0xbb, 0xf1, 0xc2, 0xf9, 0x40, 0x98, 0x45, 0x1b, 0xc4, 0xe4, 0x45, 0x35, 0x42, 0xdd, 0x45, + 0x13, 0x3d, 0x75, 0xc4, 0x21, 0xbd, 0xef, 0xc2, 0xca, 0xb5, 0x8b, 0xc5, 0x40, 0x41, 0x5e, 0x41, + 0x25, 0x44, 0xf6, 0x3c, 0x74, 0xb5, 0x51, 0x45, 0x72, 0xad, 0xe7, 0x44, 0x3c, 0xbe, 0x73, 0xc2, + 0x92, 0xbf, 0xce, 0x2c, 0x39, 0x3c, 0xad, 0xc4, 0x57, 0xc2, 0xe2, 0xc4, 0xe8, 0xbd, 0xe5, 0x40, + 0xe8, 0xc0, 0xca, 0xc5, 0x38, 0x39, 0xe1, 0x42, 0xe0, 0x3d, 0x40, 0x42}; +unsigned char leaky_relu_fp16_out[] = { + 0xe0, 0xba, 0xa2, 0xb4, 0x7c, 0xaf, 0x59, 0x3d, 0x15, 0xb8, 0x4c, 0xb0, 0xcc, 0x41, 0xc4, 0xbb, + 0x0a, 0xbc, 0xcc, 0x38, 0xd4, 0xb1, 0x09, 0xbb, 0xf9, 0xb2, 0xa4, 0x3a, 0xd8, 0x44, 0xd2, 0x38, + 0x2e, 0x42, 0xbc, 0x3a, 0x39, 0xb9, 0x02, 0x2d, 0xa6, 0x2d, 0xb8, 0x42, 0x2c, 0xbc, 0xee, 0x39, + 0x4d, 0xb8, 0xcf, 0xb5, 0xee, 0xb9, 0x3a, 0x44, 0xba, 0x34, 0xf7, 0x45, 0x54, 0x3f, 0x56, 0x45, + 0xc0, 0x3b, 0x00, 0xbb, 0x8f, 0x45, 0xfa, 0x45, 0xc8, 0xbb, 0x14, 0x39, 0x94, 0xb7, 0xbd, 0xb8, + 0xb3, 0xb9, 0xd9, 0xb9, 0xba, 0x3e, 0x4c, 0xbc, 0x0e, 0xbc, 0x91, 0xb6, 0x26, 0xbc, 0x5c, 0x43, + 0x8a, 0x40, 0x50, 0xbc, 0x8c, 0xbc, 0xc8, 0xbc, 0xcd, 0xb8, 0x21, 0xb4, 0x30, 0x44, 0xf3, 0xba, + 0xe3, 0x41, 0xc2, 0x44, 0xce, 0x45, 0xcf, 0xb9, 0x1b, 0x44, 0xe8, 0x44, 0xd9, 0xb7, 0xd8, 0xb3, + 0x51, 0x44, 0xea, 0x41, 0x21, 0xb8, 0xfe, 0xbb, 0x46, 0x3f, 0x40, 0xb5, 0xc0, 0xad, 0xb0, 0xbb, + 0xa4, 0xb1, 0x8d, 0xb9, 0xf9, 0x40, 0x98, 0x45, 0x91, 0xba, 0xe4, 0x45, 0x35, 0x42, 0xdd, 0x45, + 0x13, 0x3d, 0x21, 0xbb, 0x1a, 0xb4, 0x8c, 0xb9, 0xa1, 0xac, 0x6f, 0xbc, 0x40, 0x41, 0x5e, 0x41, + 0x25, 0x44, 0xf6, 0x3c, 0x5d, 0xac, 0x51, 0x45, 0x5b, 0xa4, 0xe7, 0x44, 0xfc, 0xb4, 0x28, 0xb9, + 0x0e, 0xb6, 0xce, 0x2c, 0x39, 0x3c, 0x7b, 0xbb, 0x12, 0xb9, 0xd0, 0xbb, 0xb9, 0xb4, 0xe5, 0x40, + 0xd9, 0xb7, 0xa1, 0xbc, 0x38, 0x39, 0xe1, 0x42, 0xe0, 0x3d, 0x40, 0x42}; +unsigned char leaky_relu_int8_in[] = {}; +unsigned char leaky_relu_int8_out[] = {}; \ No newline at end of file diff --git a/tests/unit_test/valid_data/avgpool.dat b/tests/unit_test/valid_data/avgpool.dat new file mode 100644 index 00000000..60f3fb2d --- /dev/null +++ b/tests/unit_test/valid_data/avgpool.dat @@ -0,0 +1,604 @@ +/************************************************************************** + * NCHW layout + **************************************************************************/ +// [c h w]: [2 6 18] +// [c h w]: [2 3 9] +unsigned char avgpool2x2s2_fp32_in[] = { + 0x80, 0xe0, 0xbf, 0xc0, 0x4b, 0xa4, 0x4c, 0xc0, 0xbb, 0x02, 0xdd, 0xc0, 0x84, 0xd4, 0x00, 0xc0, + 0xfa, 0x0f, 0xe0, 0xc0, 0xd7, 0x99, 0xb6, 0xc0, 0x46, 0xeb, 0x62, 0xc0, 0x05, 0x0e, 0x8f, 0xc0, + 0x3b, 0x2a, 0x03, 0xc1, 0x22, 0x91, 0xa8, 0xc0, 0xeb, 0x3f, 0x75, 0xbd, 0xb0, 0x07, 0xb7, 0xc0, + 0xa6, 0x83, 0xbc, 0xc0, 0x01, 0x98, 0xd8, 0xc0, 0xbf, 0x10, 0xdb, 0xc0, 0xbb, 0x73, 0xee, 0xc0, + 0x38, 0x09, 0x96, 0xc0, 0x07, 0xe8, 0x90, 0xc0, 0x25, 0xdc, 0x92, 0xc0, 0xf5, 0x12, 0x16, 0xc0, + 0xb8, 0x56, 0x8e, 0xc0, 0xce, 0xaf, 0x97, 0xc0, 0x1c, 0xb0, 0x55, 0xc0, 0x13, 0x63, 0x44, 0xc0, + 0x1d, 0x8d, 0x95, 0xc0, 0x11, 0x97, 0xc3, 0xc0, 0x5d, 0xcf, 0xa7, 0xc0, 0x6a, 0x88, 0xb8, 0xbe, + 0x63, 0x8a, 0xef, 0xc0, 0x69, 0xde, 0x57, 0xc0, 0xf5, 0x99, 0xc2, 0xc0, 0x8b, 0x6b, 0x9a, 0xc0, + 0x71, 0xe3, 0x48, 0xc0, 0xa0, 0x97, 0xa4, 0xc0, 0x3c, 0xc5, 0x10, 0xc1, 0xc1, 0xee, 0xaf, 0xc0, + 0xac, 0xc9, 0x86, 0xc0, 0xc0, 0x84, 0xc4, 0xc0, 0xb1, 0x84, 0xe9, 0xc0, 0xa2, 0xc5, 0xa0, 0xc0, + 0xab, 0x04, 0xb3, 0xc0, 0xd6, 0x32, 0x48, 0xc0, 0xe8, 0xd1, 0xa6, 0xc0, 0x47, 0xaa, 0x03, 0xc0, + 0xef, 0xfd, 0x28, 0xc0, 0x52, 0xa8, 0x72, 0xc0, 0x1b, 0xe9, 0x8e, 0xc0, 0x81, 0x5d, 0xf6, 0xc0, + 0x7c, 0xe7, 0xb4, 0xc0, 0xc9, 0xd7, 0x55, 0xc0, 0x99, 0xcb, 0xbe, 0xc0, 0x0a, 0x65, 0x13, 0xc0, + 0xae, 0x1e, 0xfd, 0xc0, 0x2f, 0x76, 0x5f, 0xc0, 0x14, 0xc3, 0x44, 0xc0, 0x48, 0xee, 0xd4, 0xc0, + 0xa7, 0x35, 0xd8, 0xc0, 0xa8, 0xaf, 0xae, 0xc0, 0x64, 0x29, 0xc9, 0xc0, 0x92, 0x55, 0x84, 0xc0, + 0x36, 0x69, 0x90, 0xc0, 0x35, 0x00, 0xd4, 0xbf, 0x96, 0xff, 0xd7, 0xc0, 0xac, 0x48, 0xb9, 0xc0, + 0x60, 0x0c, 0x99, 0xc0, 0x31, 0xd3, 0x8a, 0xc0, 0x0e, 0x46, 0x88, 0xc0, 0x2b, 0x64, 0xc7, 0xbf, + 0xc9, 0x48, 0x0d, 0xc1, 0xad, 0xeb, 0x2d, 0xc0, 0x8d, 0xcf, 0x92, 0xc0, 0x79, 0x87, 0xab, 0xc0, + 0x70, 0x74, 0x6f, 0xc0, 0x77, 0xf4, 0xfb, 0xbf, 0x91, 0x95, 0x2e, 0xc0, 0x00, 0x8d, 0x6c, 0xc0, + 0x7c, 0x0b, 0x40, 0xc0, 0x75, 0xa9, 0xb0, 0xc0, 0xb3, 0x7f, 0x1a, 0xc0, 0x44, 0xb1, 0x9d, 0xc0, + 0x1b, 0xe5, 0x92, 0xc0, 0xb2, 0xb7, 0x26, 0xc0, 0x43, 0x64, 0xc5, 0xc0, 0xcb, 0x53, 0xa1, 0xc0, + 0xcf, 0xd1, 0x60, 0xc0, 0xc7, 0xbc, 0x0c, 0xc0, 0x2a, 0xb8, 0xd9, 0xbe, 0x22, 0x28, 0x87, 0xc0, + 0x80, 0xd2, 0x11, 0xc1, 0xb3, 0x6d, 0xd0, 0xc0, 0xc2, 0xb2, 0x55, 0xc0, 0x32, 0xde, 0xef, 0xbf, + 0xda, 0xa6, 0xf6, 0xbf, 0xc6, 0x21, 0xcb, 0xc0, 0xb5, 0x0e, 0xba, 0xc0, 0xb1, 0x9c, 0x81, 0xc0, + 0xbe, 0x89, 0xb1, 0xc0, 0x5d, 0xcf, 0x5d, 0xc0, 0x26, 0x70, 0xd4, 0xc0, 0xb1, 0x61, 0x3a, 0xc0, + 0x55, 0x6e, 0x8b, 0xc0, 0x36, 0x16, 0xc8, 0xc0, 0xbb, 0x83, 0x00, 0xc0, 0x77, 0x70, 0xc9, 0xc0, + 0x5a, 0x49, 0xba, 0xc0, 0x52, 0xd8, 0x58, 0xc0, 0x19, 0xc8, 0xd6, 0xc0, 0xd9, 0xd2, 0xe0, 0xc0, + 0x78, 0x9a, 0x9b, 0xc0, 0xff, 0x86, 0x76, 0xc0, 0xb4, 0x32, 0x2d, 0xc0, 0x56, 0x07, 0xbd, 0xc0, + 0xdc, 0xf6, 0x1b, 0xc0, 0xfb, 0x2d, 0xe0, 0xc0, 0xc6, 0xdf, 0x17, 0xc1, 0x4f, 0x52, 0xfd, 0xc0, + 0xfc, 0x40, 0xa6, 0xc0, 0x3e, 0x49, 0xdd, 0x3e, 0xb1, 0x68, 0x2a, 0xc0, 0xcb, 0xed, 0xca, 0xc0, + 0xb6, 0xe2, 0xb5, 0xc0, 0x86, 0x8f, 0x13, 0xc1, 0x25, 0xce, 0x86, 0xc0, 0x11, 0x8b, 0x48, 0xc0, + 0x69, 0x86, 0xd9, 0xc0, 0x86, 0x4a, 0xd6, 0xc0, 0x2a, 0xf9, 0x8f, 0xc0, 0x01, 0x76, 0x5b, 0xc0, + 0x1c, 0xb6, 0xcb, 0xc0, 0xe0, 0xe8, 0xcd, 0xc0, 0x9f, 0x09, 0x22, 0xc1, 0x51, 0x00, 0xca, 0xc0, + 0xc4, 0xaa, 0x5d, 0xc0, 0xac, 0x1e, 0x9a, 0xc0, 0xdb, 0xd1, 0x86, 0xc0, 0x9c, 0x20, 0x9d, 0xc0, + 0x9e, 0xfd, 0x8a, 0xc0, 0x9a, 0x51, 0xf3, 0xc0, 0x18, 0xb9, 0xf8, 0xc0, 0x38, 0x6e, 0xf0, 0xc0, + 0xa9, 0x84, 0xab, 0xc0, 0xca, 0xe2, 0xac, 0xc0, 0x70, 0xc8, 0x11, 0xc1, 0x28, 0x80, 0x03, 0xc1, + 0xb1, 0x32, 0xe5, 0xc0, 0x59, 0x8e, 0x69, 0xc0, 0xec, 0x9d, 0xc7, 0xc0, 0xd4, 0x9d, 0x78, 0xc0, + 0xb5, 0x96, 0x01, 0xc1, 0x5a, 0x43, 0xc1, 0xc0, 0x66, 0xdc, 0xf4, 0xc0, 0x00, 0xa1, 0xb3, 0xc0, + 0x67, 0xbc, 0xd3, 0xc0, 0x79, 0x85, 0xd5, 0xc0, 0x90, 0xaf, 0x45, 0xbf, 0x87, 0xaa, 0xd8, 0xc0, + 0xef, 0xcb, 0x0c, 0xc1, 0x6e, 0xf7, 0xd5, 0xc0, 0x17, 0x7b, 0x76, 0xc0, 0x30, 0x24, 0x82, 0xc0, + 0x6c, 0x5d, 0x4a, 0xc0, 0x67, 0x41, 0xb9, 0xc0, 0x9e, 0x60, 0x09, 0xc1, 0x4b, 0xec, 0x09, 0xc1, + 0x63, 0x54, 0x0f, 0xc0, 0x0b, 0x6a, 0x90, 0xc0, 0xcb, 0x82, 0x02, 0xc1, 0x37, 0xd8, 0x41, 0xc0, + 0xaf, 0x14, 0x8d, 0xc0, 0x40, 0xfb, 0x0f, 0xc0, 0x38, 0x8e, 0xd4, 0xbf, 0x50, 0x63, 0xf6, 0xc0, + 0xde, 0x2e, 0xec, 0xc0, 0x44, 0x33, 0xd3, 0xc0, 0x98, 0x46, 0x42, 0xc0, 0x7f, 0xca, 0x21, 0xc0, + 0x67, 0xd5, 0xde, 0xc0, 0x9e, 0x7c, 0x3d, 0xc0, 0x7b, 0x12, 0xac, 0xc0, 0xb9, 0xce, 0x5a, 0xc0, + 0x62, 0x79, 0x45, 0xc0, 0xe8, 0xab, 0xa8, 0xc0, 0xa9, 0x42, 0xfa, 0xbf, 0x7a, 0xd1, 0x28, 0xc0, + 0xf2, 0xfc, 0x6d, 0xc0, 0xd0, 0x7a, 0x93, 0xc0, 0x33, 0x66, 0xc8, 0xc0, 0xf7, 0x52, 0x95, 0xc0, + 0x7d, 0x7f, 0xf2, 0xc0, 0x73, 0x8d, 0x76, 0xc0, 0xea, 0x7a, 0x0e, 0xc1, 0xa6, 0x75, 0xaa, 0xc0, + 0xff, 0x26, 0x46, 0xc0, 0x82, 0x4f, 0xbd, 0xc0, 0x98, 0x8a, 0xb4, 0xc0, 0x6b, 0x39, 0xba, 0xc0, + 0x54, 0xda, 0x82, 0xc0, 0x4f, 0x6e, 0x85, 0xc0, 0x6a, 0x64, 0x4f, 0xc0, 0x99, 0x5c, 0x93, 0xc0, + 0x8a, 0x58, 0xa7, 0xc0, 0xbc, 0xc1, 0xa2, 0xc0, 0xcd, 0x79, 0xb1, 0xc0, 0x5d, 0x2d, 0x11, 0xc1, + 0x5d, 0xd7, 0xf0, 0xc0, 0xe4, 0xe2, 0x90, 0xc0, 0x04, 0x29, 0xa4, 0xc0, 0xf8, 0x75, 0x7b, 0xc0, + 0xa3, 0xb8, 0xb3, 0xc0, 0x67, 0x34, 0x9d, 0xc0, 0x27, 0xd5, 0xa3, 0xc0, 0x8a, 0xb0, 0x96, 0xc0, + 0x45, 0xa3, 0xbd, 0xc0, 0xda, 0x56, 0xe8, 0xc0, 0xa7, 0xfb, 0x0e, 0xc0, 0x30, 0x8f, 0x19, 0xc1}; +unsigned char avgpool2x2s2_fp32_out[] = { + 0x12, 0x06, 0x81, 0xc0, 0xe0, 0xdc, 0x90, 0xc0, 0xda, 0xec, 0x98, 0xc0, 0xf6, 0x69, 0x96, 0xc0, + 0x5f, 0x8f, 0x98, 0xc0, 0xf2, 0x1a, 0x85, 0xc0, 0x4a, 0x88, 0xbc, 0xc0, 0x74, 0xa3, 0xb4, 0xc0, + 0x9e, 0x1a, 0xbe, 0xc0, 0x90, 0xa7, 0xa0, 0xc0, 0xe9, 0x4b, 0xc4, 0xc0, 0x43, 0x27, 0x99, 0xc0, + 0x28, 0x08, 0x57, 0xc0, 0xd8, 0xc6, 0x97, 0xc0, 0x8b, 0x49, 0xaa, 0xc0, 0x3c, 0xf9, 0x6c, 0xc0, + 0x62, 0x81, 0x9e, 0xc0, 0x33, 0xcc, 0xaa, 0xc0, 0x22, 0xc4, 0x2e, 0xc0, 0x62, 0x2e, 0x6b, 0xc0, + 0xa6, 0x16, 0x93, 0xc0, 0xa2, 0xd8, 0x82, 0xc0, 0x7c, 0xf8, 0x85, 0xc0, 0x26, 0x8f, 0xae, 0xc0, + 0xd0, 0x3c, 0x60, 0xc0, 0x94, 0xbc, 0x5d, 0xc0, 0x6a, 0xeb, 0xea, 0xc0, 0x88, 0x24, 0x85, 0xc0, + 0xeb, 0x4f, 0xab, 0xc0, 0x3e, 0x0f, 0xcf, 0xc0, 0x7a, 0x81, 0xcd, 0xc0, 0x70, 0x2f, 0x5e, 0xc0, + 0x57, 0x9c, 0xa7, 0xc0, 0x44, 0x8a, 0xf1, 0xc0, 0xc8, 0xde, 0x90, 0xc0, 0x88, 0x98, 0xf6, 0xc0, + 0xec, 0x24, 0xe0, 0xc0, 0x44, 0x00, 0x87, 0xc0, 0x9e, 0x98, 0xca, 0xc0, 0xed, 0x63, 0x9f, 0xc0, + 0x30, 0x32, 0xb5, 0xc0, 0xa7, 0x30, 0xac, 0xc0, 0xf6, 0x65, 0xa8, 0xc0, 0x5c, 0xbd, 0x8e, 0xc0, + 0x7d, 0xfa, 0x8d, 0xc0, 0xda, 0x9d, 0x81, 0xc0, 0xd6, 0x89, 0x6e, 0xc0, 0x74, 0x93, 0xb7, 0xc0, + 0xda, 0xdc, 0xb7, 0xc0, 0x8e, 0xea, 0xa3, 0xc0, 0x21, 0x16, 0xc6, 0xc0, 0x2c, 0xba, 0x96, 0xc0, + 0x89, 0x2f, 0xc5, 0xc0, 0x36, 0xb9, 0xa0, 0xc0}; +unsigned char avgpool2x2s2_fp16_in[] = { + 0xff, 0xc5, 0x65, 0xc2, 0xe8, 0xc6, 0x06, 0xc0, 0x00, 0xc7, 0xb4, 0xc5, 0x17, 0xc3, 0x78, 0xc4, + 0x19, 0xc8, 0x44, 0xc5, 0xa9, 0xab, 0xb8, 0xc5, 0xe4, 0xc5, 0xc4, 0xc6, 0xd8, 0xc6, 0x73, 0xc7, + 0xb0, 0xc4, 0x87, 0xc4, 0x96, 0xc4, 0xb0, 0xc0, 0x72, 0xc4, 0xbd, 0xc4, 0xad, 0xc2, 0x23, 0xc2, + 0xac, 0xc4, 0x1c, 0xc6, 0x3e, 0xc5, 0xc4, 0xb5, 0x7c, 0xc7, 0xbe, 0xc2, 0x14, 0xc6, 0xd3, 0xc4, + 0x47, 0xc2, 0x24, 0xc5, 0x86, 0xc8, 0x7f, 0xc5, 0x36, 0xc4, 0x24, 0xc6, 0x4c, 0xc7, 0x06, 0xc5, + 0x98, 0xc5, 0x41, 0xc2, 0x36, 0xc5, 0x1d, 0xc0, 0x47, 0xc1, 0x95, 0xc3, 0x77, 0xc4, 0xb2, 0xc7, + 0xa7, 0xc5, 0xae, 0xc2, 0xf6, 0xc5, 0x9b, 0xc0, 0xe8, 0xc7, 0xfb, 0xc2, 0x26, 0xc2, 0xa7, 0xc6, + 0xc1, 0xc6, 0x75, 0xc5, 0x49, 0xc6, 0x22, 0xc4, 0x83, 0xc4, 0xa0, 0xbe, 0xbf, 0xc6, 0xca, 0xc5, + 0xc8, 0xc4, 0x56, 0xc4, 0x42, 0xc4, 0x3b, 0xbe, 0x6a, 0xc8, 0x6f, 0xc1, 0x96, 0xc4, 0x5c, 0xc5, + 0x7b, 0xc3, 0xdf, 0xbf, 0x74, 0xc1, 0x64, 0xc3, 0x00, 0xc2, 0x85, 0xc5, 0xd3, 0xc0, 0xed, 0xc4, + 0x97, 0xc4, 0x35, 0xc1, 0x2b, 0xc6, 0x0a, 0xc5, 0x06, 0xc3, 0x65, 0xc0, 0xcd, 0xb6, 0x39, 0xc4, + 0x8e, 0xc8, 0x83, 0xc6, 0xad, 0xc2, 0x7e, 0xbf, 0xb5, 0xbf, 0x59, 0xc6, 0xd0, 0xc5, 0x0c, 0xc4, + 0x8c, 0xc5, 0xee, 0xc2, 0xa3, 0xc6, 0xd3, 0xc1, 0x5b, 0xc4, 0x40, 0xc6, 0x04, 0xc0, 0x4b, 0xc6, + 0xd2, 0xc5, 0xc6, 0xc2, 0xb6, 0xc6, 0x06, 0xc7, 0xdc, 0xc4, 0xb4, 0xc3, 0x69, 0xc1, 0xe8, 0xc5, + 0xdf, 0xc0, 0x01, 0xc7, 0xbe, 0xc8, 0xea, 0xc7, 0x32, 0xc5, 0xea, 0x36, 0x53, 0xc1, 0x57, 0xc6, + 0xaf, 0xc5, 0x9c, 0xc8, 0x36, 0xc4, 0x44, 0xc2, 0xcc, 0xc6, 0xb2, 0xc6, 0x7f, 0xc4, 0xdb, 0xc2, + 0x5d, 0xc6, 0x6f, 0xc6, 0x10, 0xc9, 0x50, 0xc6, 0xed, 0xc2, 0xd0, 0xc4, 0x36, 0xc4, 0xe9, 0xc4, + 0x57, 0xc4, 0x9a, 0xc7, 0xc5, 0xc7, 0x83, 0xc7, 0x5c, 0xc5, 0x67, 0xc5, 0x8e, 0xc8, 0x1c, 0xc8, + 0x29, 0xc7, 0x4c, 0xc3, 0x3c, 0xc6, 0xc4, 0xc3, 0x0c, 0xc8, 0x0a, 0xc6, 0xa6, 0xc7, 0x9d, 0xc5, + 0x9d, 0xc6, 0xac, 0xc6, 0x2d, 0xba, 0xc5, 0xc6, 0x66, 0xc8, 0xaf, 0xc6, 0xb3, 0xc3, 0x11, 0xc4, + 0x52, 0xc2, 0xca, 0xc5, 0x4b, 0xc8, 0x4f, 0xc8, 0x7a, 0xc0, 0x83, 0xc4, 0x14, 0xc8, 0x0e, 0xc2, + 0x68, 0xc4, 0x7f, 0xc0, 0xa4, 0xbe, 0xb3, 0xc7, 0x61, 0xc7, 0x99, 0xc6, 0x12, 0xc2, 0x0e, 0xc1, + 0xf6, 0xc6, 0xeb, 0xc1, 0x60, 0xc5, 0xd6, 0xc2, 0x2b, 0xc2, 0x45, 0xc5, 0xd2, 0xbf, 0x46, 0xc1, + 0x6f, 0xc3, 0x9b, 0xc4, 0x43, 0xc6, 0xaa, 0xc4, 0x93, 0xc7, 0xb4, 0xc3, 0x73, 0xc8, 0x53, 0xc5, + 0x31, 0xc2, 0xea, 0xc5, 0xa4, 0xc5, 0xd1, 0xc5, 0x16, 0xc4, 0x2b, 0xc4, 0x7b, 0xc2, 0x9a, 0xc4, + 0x3a, 0xc5, 0x16, 0xc5, 0x8b, 0xc5, 0x89, 0xc8, 0x86, 0xc7, 0x87, 0xc4, 0x21, 0xc5, 0xdb, 0xc3, + 0x9d, 0xc5, 0xe9, 0xc4, 0x1e, 0xc5, 0xb5, 0xc4, 0xed, 0xc5, 0x42, 0xc7, 0x77, 0xc0, 0xcc, 0xc8}; +unsigned char avgpool2x2s2_fp16_out[] = { + 0x08, 0xc4, 0x87, 0xc4, 0xc7, 0xc4, 0xb3, 0xc4, 0xc4, 0xc4, 0x29, 0xc4, 0xe4, 0xc5, 0xa5, 0xc5, + 0xf1, 0xc5, 0x05, 0xc5, 0x22, 0xc6, 0xc9, 0xc4, 0xb8, 0xc2, 0xbe, 0xc4, 0x52, 0xc5, 0x68, 0xc3, + 0xf4, 0xc4, 0x56, 0xc5, 0x76, 0xc1, 0x59, 0xc3, 0x98, 0xc4, 0x16, 0xc4, 0x30, 0xc4, 0x74, 0xc5, + 0x01, 0xc3, 0xed, 0xc2, 0x57, 0xc7, 0x28, 0xc4, 0x5a, 0xc5, 0x78, 0xc6, 0x6c, 0xc6, 0xf2, 0xc2, + 0x3c, 0xc5, 0x8c, 0xc7, 0x87, 0xc4, 0xb4, 0xc7, 0x01, 0xc7, 0x38, 0xc4, 0x54, 0xc6, 0xfb, 0xc4, + 0xa9, 0xc5, 0x61, 0xc5, 0x43, 0xc5, 0x76, 0xc4, 0x70, 0xc4, 0x0c, 0xc4, 0x74, 0xc3, 0xbc, 0xc5, + 0xbe, 0xc5, 0x1e, 0xc5, 0x30, 0xc6, 0xb6, 0xc4, 0x29, 0xc6, 0x05, 0xc5}; + +// [c h w]: [2 7 19] +// [c h w]: [2 4 10] +unsigned char avgpool2x2s2_p1_fp32_in[] = { + 0xd9, 0xdc, 0xfd, 0x3f, 0x1e, 0xdc, 0x95, 0x3f, 0xb1, 0xb5, 0x0d, 0x40, 0xc6, 0x1e, 0xad, 0x3f, + 0x0a, 0x77, 0xf3, 0x3f, 0x47, 0x8b, 0xa4, 0x3e, 0x1a, 0x8c, 0x88, 0x3f, 0xbe, 0xc4, 0xae, 0x3f, + 0x5d, 0x39, 0x8f, 0xbf, 0x35, 0x5b, 0x15, 0xbd, 0x20, 0xa5, 0xda, 0x3f, 0xfe, 0x2e, 0xde, 0x3e, + 0x1c, 0xab, 0x33, 0x40, 0x39, 0x4b, 0xd4, 0x3f, 0xc0, 0xbf, 0x10, 0x40, 0x62, 0x58, 0x04, 0x3f, + 0x8b, 0xde, 0x91, 0x3f, 0x0f, 0x99, 0x78, 0x3e, 0x9c, 0xfb, 0x80, 0x3e, 0x3e, 0xca, 0x1b, 0x3f, + 0x7b, 0x59, 0xa9, 0x3e, 0x14, 0xbc, 0x16, 0x40, 0xd8, 0x8b, 0xc9, 0x3f, 0x92, 0x5e, 0x02, 0xbf, + 0x35, 0xf0, 0xff, 0x3f, 0x60, 0xdf, 0x99, 0xbf, 0xf3, 0xe7, 0x23, 0x40, 0x5b, 0xb9, 0x8c, 0xbf, + 0xc1, 0x8c, 0xd6, 0x3f, 0x7a, 0x8b, 0x99, 0xbe, 0x6f, 0xb6, 0x4e, 0xbe, 0xc0, 0xae, 0x87, 0x3f, + 0xf6, 0xe7, 0x47, 0x3f, 0x3a, 0x44, 0x95, 0xbe, 0x2b, 0x93, 0x40, 0x3f, 0xa2, 0x0a, 0x01, 0x3f, + 0x1d, 0x20, 0x00, 0xbf, 0x1e, 0x0e, 0x87, 0x3f, 0xdb, 0x9d, 0x83, 0x3f, 0x14, 0x3b, 0xe7, 0x3f, + 0xae, 0xca, 0x4b, 0x3f, 0xa0, 0x3a, 0x0c, 0x40, 0x5f, 0x30, 0x5b, 0x3e, 0x9c, 0xe7, 0x99, 0x3f, + 0x4e, 0x7c, 0xd9, 0x3f, 0x74, 0xbf, 0x9f, 0x3f, 0xa1, 0x44, 0x89, 0x3c, 0xf8, 0x2a, 0x2a, 0xbf, + 0x6b, 0x00, 0x1e, 0x3f, 0x68, 0xff, 0x51, 0x3f, 0x8e, 0x73, 0x48, 0x3f, 0x3a, 0x85, 0xca, 0x3e, + 0x79, 0x09, 0x41, 0x3f, 0x9d, 0xf9, 0xa1, 0x3e, 0x0f, 0xeb, 0x99, 0x3e, 0x4c, 0x83, 0x1f, 0x3f, + 0x39, 0x0c, 0xcb, 0x3f, 0x51, 0xa2, 0x02, 0x40, 0xc3, 0x9d, 0x8f, 0x3f, 0xae, 0x56, 0x96, 0x3e, + 0x5b, 0x31, 0x5b, 0xbf, 0x56, 0x77, 0xba, 0x3f, 0xca, 0xd3, 0x51, 0x3f, 0xf5, 0xf8, 0x78, 0x3f, + 0x94, 0x98, 0x4a, 0x3e, 0xce, 0x08, 0x9f, 0x3f, 0x4e, 0x24, 0x8a, 0x3e, 0xca, 0xda, 0x6b, 0x3f, + 0x3f, 0x46, 0x37, 0xbe, 0xa9, 0x9c, 0x03, 0xbf, 0xf1, 0x2a, 0x20, 0x3f, 0xb9, 0x16, 0x96, 0x3e, + 0xba, 0x91, 0xee, 0x3e, 0xfb, 0xb8, 0x15, 0x3f, 0x65, 0x36, 0x02, 0x40, 0xdb, 0xe9, 0xfa, 0x3f, + 0x60, 0xb1, 0xaf, 0x3f, 0x47, 0x06, 0xf2, 0x3f, 0xfc, 0x90, 0xb9, 0xbe, 0x70, 0xbc, 0xab, 0x3f, + 0x36, 0x0f, 0x44, 0x3f, 0xf0, 0xa0, 0xde, 0xbe, 0x80, 0xd8, 0xc0, 0x3f, 0xbb, 0x38, 0xb4, 0x3f, + 0x6b, 0x8e, 0x53, 0x3f, 0x19, 0x16, 0xb4, 0xbe, 0x56, 0x44, 0x07, 0x3f, 0xf3, 0x25, 0x58, 0xbd, + 0x8c, 0x3e, 0x44, 0x3e, 0x79, 0x16, 0x92, 0x3f, 0xe8, 0x84, 0x9d, 0x3d, 0x7c, 0x46, 0xda, 0x3e, + 0x13, 0x00, 0x78, 0x3f, 0x75, 0xa6, 0xe1, 0x3e, 0xd3, 0xcb, 0xfe, 0x3f, 0xc6, 0x63, 0x57, 0xbe, + 0xb0, 0x96, 0x00, 0x3f, 0xac, 0xa2, 0xcf, 0x3f, 0x09, 0xa2, 0x25, 0x3f, 0x37, 0xbf, 0x13, 0x40, + 0xae, 0xec, 0x4f, 0x3f, 0xc5, 0x66, 0x32, 0x40, 0xa0, 0xf8, 0xf8, 0x3f, 0x38, 0x78, 0x10, 0x3f, + 0x5a, 0x21, 0x50, 0x3f, 0xbb, 0x4c, 0x94, 0xbe, 0x09, 0x23, 0x56, 0x40, 0x3e, 0xea, 0x56, 0x3f, + 0xc4, 0x61, 0x2a, 0x40, 0x42, 0x2d, 0x83, 0x3f, 0xb0, 0xfd, 0x42, 0xbe, 0xa1, 0x51, 0x1d, 0x40, + 0xd6, 0x87, 0xf0, 0x3f, 0x18, 0xa4, 0xbd, 0x3f, 0x31, 0x32, 0x20, 0xbe, 0x36, 0xda, 0x86, 0x3f, + 0xe4, 0xe9, 0x07, 0x40, 0x62, 0xa0, 0x07, 0x3f, 0xb9, 0x3e, 0xb0, 0x3f, 0x72, 0x30, 0x5b, 0x40, + 0x46, 0xb8, 0xdb, 0x3f, 0xf9, 0xe3, 0xcf, 0x3e, 0xd2, 0x4c, 0x06, 0x40, 0x0b, 0x24, 0xd6, 0x3f, + 0xaf, 0x9e, 0x20, 0x3f, 0x9f, 0xed, 0xf6, 0x3e, 0x0f, 0x46, 0xe2, 0x3f, 0xf9, 0x5a, 0x9d, 0x3f, + 0x24, 0x28, 0x82, 0x3f, 0x28, 0x23, 0xa0, 0xbf, 0xb6, 0x74, 0x12, 0x40, 0x88, 0x21, 0xe0, 0x3f, + 0x09, 0xa3, 0x03, 0x40, 0x7b, 0x74, 0x86, 0x3f, 0xd7, 0x7b, 0x91, 0x3d, 0x1f, 0xd3, 0x4e, 0x40, + 0x10, 0x98, 0xda, 0x3f, 0xbf, 0xea, 0x98, 0xbd, 0xb9, 0x0b, 0xa2, 0x3f, 0x3d, 0xa7, 0x3f, 0x3d, + 0xfa, 0x1e, 0x1c, 0x3d, 0x80, 0x3c, 0xd3, 0x3e, 0xe5, 0x65, 0x90, 0x3f, 0xbb, 0x81, 0xce, 0x3f, + 0xd8, 0x0b, 0xe8, 0x3f, 0x28, 0xe9, 0xa0, 0x3e, 0x59, 0x8e, 0xbf, 0x3d, 0x0b, 0x50, 0x59, 0xbf, + 0x8f, 0x5b, 0x47, 0x3f, 0xc9, 0xa5, 0xc9, 0x3f, 0x1d, 0xeb, 0xb4, 0x3f, 0x37, 0x30, 0xb5, 0xbf, + 0x88, 0xfb, 0xc0, 0x3d, 0x68, 0x1d, 0x20, 0xbf, 0xd6, 0xf8, 0x5e, 0x3f, 0x77, 0xa5, 0x8e, 0x3f, + 0x15, 0x74, 0xf4, 0x3f, 0x0d, 0x65, 0x83, 0x3f, 0xec, 0x50, 0xcf, 0x3f, 0xd8, 0x6a, 0x9e, 0x3f, + 0x99, 0x46, 0x13, 0x40, 0x17, 0x60, 0xa1, 0x3f, 0x43, 0xe1, 0x01, 0x3d, 0x04, 0xcc, 0x7b, 0x3f, + 0x69, 0x87, 0x80, 0x3f, 0x93, 0xf0, 0xa4, 0x3f, 0xeb, 0xfc, 0xb8, 0x3f, 0xf0, 0xec, 0x18, 0x40, + 0x34, 0x5d, 0x3a, 0x3f, 0x62, 0x16, 0x20, 0x40, 0x77, 0x98, 0x49, 0x3f, 0xdc, 0xa2, 0x7a, 0xbe, + 0x3a, 0x37, 0x6d, 0x40, 0x00, 0xa8, 0xf8, 0x3f, 0x7c, 0x9c, 0x88, 0x3f, 0xc2, 0x6d, 0x46, 0xbe, + 0x04, 0x2f, 0xd4, 0x3f, 0x39, 0x1d, 0x23, 0x3f, 0xfd, 0xe7, 0xe1, 0x3f, 0x29, 0xe5, 0x36, 0x3f, + 0x18, 0x0b, 0x1a, 0x40, 0x6c, 0x72, 0x97, 0x3e, 0x67, 0x4d, 0x6c, 0x3f, 0x94, 0x00, 0x83, 0x3e, + 0xc0, 0x42, 0xee, 0x3f, 0x2d, 0x19, 0xa6, 0x3f, 0x2e, 0x42, 0xd9, 0x3d, 0x74, 0xd2, 0x98, 0xbe, + 0x87, 0x56, 0x97, 0x3f, 0xdb, 0x4f, 0x3b, 0x3f, 0xb7, 0x77, 0xbe, 0xbe, 0x21, 0x19, 0xbe, 0x3f, + 0x4f, 0x55, 0x0e, 0xbf, 0x88, 0xd6, 0x0a, 0x40, 0x51, 0x0c, 0x96, 0x3d, 0x23, 0x7a, 0x73, 0x3f, + 0x9d, 0x0f, 0x21, 0x40, 0x6b, 0x63, 0x76, 0x3e, 0x79, 0x08, 0x28, 0x3f, 0xb4, 0xa8, 0x12, 0x40, + 0x5e, 0x32, 0xd7, 0x3e, 0xc8, 0xc1, 0x66, 0x3f, 0xda, 0x41, 0x26, 0xbe, 0xad, 0x3e, 0xef, 0x3f, + 0xce, 0xeb, 0xbb, 0xbe, 0xaf, 0xb0, 0x7a, 0xbc, 0xe3, 0xc1, 0x78, 0x3f, 0x08, 0xdb, 0x07, 0x40, + 0x4c, 0xcc, 0xa9, 0x3e, 0xb2, 0x1b, 0xe2, 0x3f, 0x1f, 0x70, 0x30, 0x3f, 0xb0, 0x6c, 0x13, 0x40, + 0xda, 0xf4, 0xba, 0x3f, 0xc4, 0xb5, 0x42, 0x3f, 0x4b, 0x94, 0x07, 0x40, 0x79, 0xac, 0x90, 0x3f, + 0x3e, 0xb4, 0xdb, 0x3f, 0x78, 0x6f, 0x3c, 0x40, 0xc8, 0x2e, 0x7c, 0x40, 0x27, 0x62, 0xa4, 0xbe, + 0x2e, 0x46, 0x10, 0x40, 0xb5, 0x3e, 0xf9, 0x3f, 0xb8, 0x21, 0xe4, 0x3e, 0xdb, 0x81, 0x8f, 0xbe, + 0x08, 0x25, 0x64, 0x3f, 0x64, 0x13, 0x96, 0xbd, 0x36, 0x38, 0x97, 0x3e, 0xd6, 0xec, 0xed, 0x3f, + 0xda, 0x5a, 0x89, 0x3f, 0xd4, 0x85, 0xb6, 0x3f, 0x75, 0x17, 0x03, 0x3f, 0xe1, 0xdd, 0x0d, 0x40, + 0xd0, 0xe3, 0x8b, 0x3f, 0x72, 0x9f, 0xcd, 0x3f, 0xe3, 0x5e, 0x6c, 0x3f, 0xd8, 0xae, 0x47, 0x3f, + 0x88, 0x0c, 0xa9, 0x3f, 0xf5, 0x8e, 0xcd, 0x3e, 0x9e, 0x05, 0xdd, 0x3e, 0x35, 0xf3, 0xd0, 0x3f, + 0x6b, 0xa1, 0xf9, 0x3f, 0xc7, 0x95, 0xe4, 0x3e, 0x43, 0xec, 0xb4, 0x3f, 0x93, 0xd6, 0xbe, 0x3f, + 0xa6, 0x83, 0xcb, 0x3f, 0x15, 0x9e, 0x1b, 0x40, 0xbd, 0xc1, 0x90, 0x3f, 0xed, 0xdd, 0x20, 0x3e, + 0x10, 0x65, 0x21, 0x3e, 0x48, 0x9d, 0x4b, 0x3f, 0xb0, 0x8d, 0x88, 0x3f, 0x50, 0x10, 0x54, 0x3f, + 0x5c, 0x99, 0x80, 0x3f, 0xe7, 0x22, 0xfc, 0xbe, 0x68, 0x92, 0x1b, 0x40, 0x73, 0x6d, 0x5c, 0x3f, + 0x4c, 0x17, 0x12, 0x3f, 0x58, 0x81, 0x17, 0xbe, 0xc9, 0xd5, 0x81, 0xbd, 0xc5, 0x91, 0xa8, 0x3f, + 0x30, 0xb1, 0xdb, 0x3f, 0x2f, 0x15, 0x28, 0x3e, 0xaa, 0x07, 0x42, 0x3f, 0x1c, 0xb5, 0x0e, 0x40, + 0x69, 0x6e, 0x94, 0x3f, 0x9f, 0x00, 0x00, 0xbf}; +unsigned char avgpool2x2s2_p1_fp32_out[] = { + 0xd9, 0xdc, 0xfd, 0x3e, 0xc0, 0xa3, 0x58, 0x3f, 0xe8, 0x4a, 0x50, 0x3f, 0xec, 0xae, 0xb1, 0x3e, + 0x08, 0x5b, 0x7c, 0x3d, 0x46, 0xfa, 0xd5, 0x3e, 0xfc, 0x70, 0x4f, 0x3f, 0x5c, 0xe5, 0x7a, 0x3f, + 0xbc, 0x0a, 0xd4, 0x3e, 0x24, 0x48, 0xfd, 0x3d, 0xfa, 0x82, 0xd1, 0x3e, 0xbc, 0x3b, 0xa9, 0x3f, + 0xee, 0x1b, 0x5e, 0x3f, 0x60, 0xba, 0x6c, 0x3f, 0x89, 0x7d, 0x2e, 0x3f, 0x9c, 0x14, 0xaa, 0x3e, + 0xb6, 0x88, 0x1d, 0x3f, 0xf8, 0xc8, 0xd1, 0x3e, 0x11, 0xc8, 0xef, 0x3e, 0xf8, 0xe5, 0x30, 0x3f, + 0x01, 0x7b, 0x5a, 0x3f, 0xbb, 0x6a, 0x3c, 0x3f, 0x5a, 0x51, 0x2d, 0x3f, 0x52, 0x4b, 0x37, 0x3f, + 0xe8, 0x2d, 0x6b, 0x3f, 0x1d, 0x13, 0xaf, 0x3e, 0xf5, 0xe0, 0x0d, 0xbe, 0xf8, 0xc4, 0x08, 0x3f, + 0x4a, 0x89, 0x1c, 0x3f, 0x05, 0xa3, 0xcd, 0x3f, 0xfc, 0xca, 0xbb, 0xbd, 0x00, 0xa7, 0xa9, 0x3f, + 0x97, 0x97, 0x9b, 0x3f, 0xa1, 0xdb, 0x0b, 0x40, 0xd8, 0x71, 0xa0, 0x3f, 0x70, 0xb8, 0x34, 0x3f, + 0x2a, 0xef, 0xcd, 0x3f, 0xfa, 0xdc, 0xbd, 0x3f, 0xe8, 0x84, 0x53, 0x3f, 0xe2, 0x64, 0xe5, 0x3f, + 0x7b, 0x74, 0x86, 0x3e, 0xfe, 0x5e, 0x53, 0x3f, 0x64, 0x09, 0xd1, 0x3e, 0xf3, 0x08, 0xa8, 0x3e, + 0x5f, 0xc0, 0xe6, 0x3d, 0xd0, 0x73, 0x2f, 0x3f, 0x11, 0x23, 0x08, 0x3f, 0x40, 0x5e, 0x41, 0xbe, + 0xc8, 0xa9, 0x16, 0x3f, 0x00, 0x34, 0x0a, 0xba, 0x18, 0x25, 0x1a, 0xbd, 0x0b, 0xa1, 0xbc, 0x3f, + 0x28, 0x74, 0x79, 0x3f, 0xe6, 0x1c, 0x9e, 0x3f, 0xa7, 0x94, 0xc0, 0x3f, 0xf6, 0xb0, 0x7f, 0x3f, + 0x22, 0xaa, 0x4a, 0x3f, 0x5a, 0x92, 0xbc, 0x3f, 0x00, 0x34, 0x3b, 0x3f, 0xdd, 0x7d, 0xa6, 0x3f, + 0xc4, 0x7d, 0xb2, 0x3e, 0xf5, 0xff, 0x7a, 0x3f, 0x64, 0xd7, 0x8e, 0x3f, 0x57, 0xec, 0xd6, 0x3f, + 0xf6, 0xd8, 0xb1, 0x3f, 0xc2, 0x98, 0xca, 0x3f, 0xee, 0x18, 0x9e, 0x3f, 0xb1, 0x6b, 0xd5, 0x3e, + 0xde, 0x1c, 0xe3, 0x3e, 0x02, 0x79, 0x93, 0x3f, 0x98, 0x76, 0x9d, 0x3e, 0x6a, 0x06, 0x39, 0x3f, + 0x5a, 0x8d, 0xa6, 0x3f, 0xc3, 0xaf, 0x42, 0x3f, 0xe0, 0x8f, 0xac, 0x3f, 0xa0, 0xc0, 0xa0, 0x3e, + 0x42, 0xc2, 0x9a, 0x3f, 0xc6, 0x62, 0x6f, 0x3f, 0x11, 0x32, 0xc2, 0x3f, 0x00, 0x1b, 0x87, 0x3f}; +unsigned char avgpool2x2s2_p1_fp16_in[] = { + 0xee, 0x3f, 0xae, 0x3c, 0x6d, 0x40, 0x68, 0x3d, 0x9b, 0x3f, 0x24, 0x35, 0x44, 0x3c, 0x76, 0x3d, + 0x79, 0xbc, 0xaa, 0xa8, 0xd5, 0x3e, 0xf1, 0x36, 0x9d, 0x41, 0xa2, 0x3e, 0x85, 0x40, 0x22, 0x38, + 0x8e, 0x3c, 0xc4, 0x33, 0x07, 0x34, 0xde, 0x38, 0x4a, 0x35, 0xb5, 0x40, 0x4c, 0x3e, 0x12, 0xb8, + 0xff, 0x3f, 0xce, 0xbc, 0x1f, 0x41, 0x65, 0xbc, 0xb4, 0x3e, 0xcc, 0xb4, 0x75, 0xb2, 0x3d, 0x3c, + 0x3f, 0x3a, 0xaa, 0xb4, 0x04, 0x3a, 0x08, 0x38, 0x01, 0xb8, 0x38, 0x3c, 0x1c, 0x3c, 0x39, 0x3f, + 0x5e, 0x3a, 0x61, 0x40, 0xd9, 0x32, 0xcf, 0x3c, 0xcb, 0x3e, 0xfd, 0x3c, 0x4a, 0x24, 0x51, 0xb9, + 0xf0, 0x38, 0x8f, 0x3a, 0x43, 0x3a, 0x54, 0x36, 0x08, 0x3a, 0x0f, 0x35, 0xcf, 0x34, 0xfc, 0x38, + 0x58, 0x3e, 0x15, 0x40, 0x7c, 0x3c, 0xb2, 0x34, 0xd9, 0xba, 0xd3, 0x3d, 0x8e, 0x3a, 0xc7, 0x3b, + 0x54, 0x32, 0xf8, 0x3c, 0x51, 0x34, 0x5e, 0x3b, 0xba, 0xb1, 0x1c, 0xb8, 0x01, 0x39, 0xb0, 0x34, + 0x74, 0x37, 0xad, 0x38, 0x11, 0x40, 0xd7, 0x3f, 0x7d, 0x3d, 0x90, 0x3f, 0xcc, 0xb5, 0x5d, 0x3d, + 0x20, 0x3a, 0xf5, 0xb6, 0x06, 0x3e, 0xa1, 0x3d, 0x9c, 0x3a, 0xa0, 0xb5, 0x3a, 0x38, 0xc1, 0xaa, + 0x21, 0x32, 0x90, 0x3c, 0xec, 0x2c, 0xd2, 0x36, 0xc0, 0x3b, 0x0d, 0x37, 0xf6, 0x3f, 0xbb, 0xb2, + 0x04, 0x38, 0x7d, 0x3e, 0x2d, 0x39, 0x9d, 0x40, 0x7f, 0x3a, 0x93, 0x41, 0xc7, 0x3f, 0x83, 0x38, + 0x81, 0x3a, 0xa2, 0xb4, 0xb1, 0x42, 0xb7, 0x3a, 0x53, 0x41, 0x19, 0x3c, 0x17, 0xb2, 0xea, 0x40, + 0x84, 0x3f, 0xed, 0x3d, 0x01, 0xb1, 0x36, 0x3c, 0x3f, 0x40, 0x3d, 0x38, 0x81, 0x3d, 0xd9, 0x42, + 0xdd, 0x3e, 0x7f, 0x36, 0x32, 0x40, 0xb1, 0x3e, 0x04, 0x39, 0xb7, 0x37, 0x12, 0x3f, 0xea, 0x3c, + 0x11, 0x3c, 0x01, 0xbd, 0x93, 0x40, 0x01, 0x3f, 0x1d, 0x40, 0x33, 0x3c, 0x8b, 0x2c, 0x76, 0x42, + 0xd4, 0x3e, 0xc7, 0xac, 0x10, 0x3d, 0xfd, 0x29, 0xe0, 0x28, 0x99, 0x36, 0x83, 0x3c, 0x74, 0x3e, + 0x40, 0x3f, 0x07, 0x35, 0xfc, 0x2d, 0xca, 0xba, 0x3a, 0x3a, 0x4d, 0x3e, 0xa7, 0x3d, 0xa9, 0xbd, + 0x07, 0x2e, 0x00, 0xb9, 0xf7, 0x3a, 0x75, 0x3c, 0xa3, 0x3f, 0x1b, 0x3c, 0x7a, 0x3e, 0xf3, 0x3c, + 0x9a, 0x40, 0x0b, 0x3d, 0x0f, 0x28, 0xde, 0x3b, 0x04, 0x3c, 0x27, 0x3d, 0xc7, 0x3d, 0xc7, 0x40, + 0xd2, 0x39, 0x00, 0x41, 0x4c, 0x3a, 0xd5, 0xb3, 0x69, 0x43, 0xc5, 0x3f, 0x44, 0x3c, 0x33, 0xb2, + 0xa1, 0x3e, 0x18, 0x39, 0x0f, 0x3f, 0xb7, 0x39, 0xd0, 0x40, 0xbb, 0x34, 0x62, 0x3b, 0x18, 0x34, + 0x72, 0x3f, 0x30, 0x3d, 0xca, 0x2e, 0xc6, 0xb4, 0xba, 0x3c, 0xda, 0x39, 0xf3, 0xb5, 0xf0, 0x3d, + 0x72, 0xb8, 0x56, 0x40, 0xb0, 0x2c, 0x9b, 0x3b, 0x08, 0x41, 0xb3, 0x33, 0x40, 0x39, 0x95, 0x40, + 0xb9, 0x36, 0x36, 0x3b, 0x32, 0xb1, 0x79, 0x3f, 0xdf, 0xb5, 0xd5, 0xa3, 0xc6, 0x3b, 0x3e, 0x40, + 0x4e, 0x35, 0x10, 0x3f, 0x83, 0x39, 0x9b, 0x40, 0xd7, 0x3d, 0x15, 0x3a, 0x3c, 0x40, 0x85, 0x3c, + 0xdd, 0x3e, 0xe3, 0x41, 0xe1, 0x43, 0x23, 0xb5, 0x82, 0x40, 0xc9, 0x3f, 0x21, 0x37, 0x7c, 0xb4, + 0x21, 0x3b, 0xb0, 0xac, 0xb9, 0x34, 0x6f, 0x3f, 0x4a, 0x3c, 0xb4, 0x3d, 0x18, 0x38, 0x6e, 0x40, + 0x5f, 0x3c, 0x6c, 0x3e, 0x62, 0x3b, 0x3d, 0x3a, 0x48, 0x3d, 0x6c, 0x36, 0xe8, 0x36, 0x87, 0x3e, + 0xcd, 0x3f, 0x24, 0x37, 0xa7, 0x3d, 0xf6, 0x3d, 0x5c, 0x3e, 0xdc, 0x40, 0x86, 0x3c, 0x06, 0x31, + 0x0b, 0x31, 0x5c, 0x3a, 0x44, 0x3c, 0xa0, 0x3a, 0x04, 0x3c, 0xe1, 0xb7, 0xdc, 0x40, 0xe3, 0x3a, + 0x90, 0x38, 0xbc, 0xb0, 0x0e, 0xac, 0x44, 0x3d, 0xdd, 0x3e, 0x40, 0x31, 0x10, 0x3a, 0x75, 0x40, + 0xa3, 0x3c, 0x00, 0xb8}; +unsigned char avgpool2x2s2_p1_fp16_out[] = { + 0xee, 0x37, 0xc4, 0x3a, 0x82, 0x3a, 0x8d, 0x35, 0xe8, 0x2b, 0xb0, 0x36, 0x7b, 0x3a, 0xd6, 0x3b, + 0x9f, 0x36, 0xe9, 0x2f, 0x8b, 0x36, 0x49, 0x3d, 0xf0, 0x3a, 0x66, 0x3b, 0x74, 0x39, 0x50, 0x35, + 0xec, 0x38, 0x8e, 0x36, 0x7e, 0x37, 0x87, 0x39, 0xd4, 0x3a, 0xe2, 0x39, 0x69, 0x39, 0xba, 0x39, + 0x59, 0x3b, 0x78, 0x35, 0x6e, 0xb0, 0x46, 0x38, 0xe4, 0x38, 0x6c, 0x3e, 0xde, 0xad, 0x4d, 0x3d, + 0xdc, 0x3c, 0x5f, 0x40, 0x03, 0x3d, 0xa6, 0x39, 0x6f, 0x3e, 0xef, 0x3d, 0x9c, 0x3a, 0x2b, 0x3f, + 0x33, 0x34, 0x9a, 0x3a, 0x88, 0x36, 0x40, 0x35, 0x35, 0x2f, 0x7c, 0x39, 0x41, 0x38, 0x0a, 0xb2, + 0xb5, 0x38, 0x00, 0x90, 0xd2, 0xa8, 0xe5, 0x3d, 0xcb, 0x3b, 0xf0, 0x3c, 0x04, 0x3e, 0xfc, 0x3b, + 0x55, 0x3a, 0xe4, 0x3d, 0xda, 0x39, 0x33, 0x3d, 0x93, 0x35, 0xd8, 0x3b, 0x76, 0x3c, 0xb6, 0x3e, + 0x8f, 0x3d, 0x54, 0x3e, 0xf0, 0x3c, 0xaa, 0x36, 0x19, 0x37, 0x9c, 0x3c, 0xeb, 0x34, 0xc8, 0x39, + 0x34, 0x3d, 0x14, 0x3a, 0x64, 0x3d, 0x06, 0x35, 0xd6, 0x3c, 0x7a, 0x3b, 0x11, 0x3e, 0x38, 0x3c}; + +// [c h w]: [2 7 19] +// [c h w]: [2 3 9] +unsigned char avgpool3x3s2_fp32_in[] = { + 0x80, 0x66, 0xcf, 0xc0, 0x67, 0x26, 0xaf, 0xc0, 0x9a, 0x7b, 0x53, 0xc0, 0xee, 0xbe, 0x89, 0xc0, + 0x03, 0x26, 0x93, 0xc0, 0x18, 0xba, 0x64, 0xc0, 0xf8, 0x11, 0xff, 0xbf, 0xba, 0xbb, 0x42, 0xc0, + 0x9c, 0xd1, 0xb6, 0xc0, 0x29, 0xd0, 0x70, 0xc0, 0xf2, 0x42, 0xa2, 0xc0, 0xe1, 0x22, 0xb5, 0xc0, + 0x9e, 0x29, 0x54, 0xbe, 0x03, 0x1c, 0x83, 0xc0, 0xbc, 0xfa, 0xc1, 0xc0, 0x96, 0x17, 0xce, 0xc0, + 0x34, 0x2f, 0xa5, 0xc0, 0x07, 0x04, 0x43, 0xc0, 0x77, 0x76, 0x24, 0xc0, 0x5b, 0x83, 0x46, 0xc0, + 0x66, 0x04, 0x13, 0xc1, 0x81, 0x2e, 0xdb, 0xc0, 0x4e, 0x72, 0xef, 0xc0, 0x01, 0xd0, 0xb2, 0xc0, + 0x8b, 0xf8, 0xb7, 0xbf, 0x86, 0xfb, 0xe3, 0xc0, 0x31, 0xb5, 0x46, 0xc0, 0x98, 0xe4, 0xf8, 0xbf, + 0x5e, 0x2c, 0x46, 0xc0, 0x7c, 0xd0, 0x10, 0xc1, 0xce, 0xaf, 0x90, 0xc0, 0x09, 0xd2, 0xd8, 0xc0, + 0xc2, 0x19, 0xed, 0xc0, 0x81, 0x89, 0xc5, 0xc0, 0xe0, 0x4b, 0xc7, 0xc0, 0x64, 0xe5, 0x41, 0xc0, + 0x40, 0x73, 0xe3, 0xc0, 0xa1, 0x2c, 0xaa, 0xc0, 0x7b, 0xba, 0x9f, 0xc0, 0xc3, 0xc3, 0x81, 0xc0, + 0x73, 0xca, 0x97, 0xc0, 0xe7, 0x8a, 0x7b, 0xc0, 0x41, 0x06, 0x7c, 0xc0, 0x74, 0xcb, 0xc1, 0xc0, + 0xc8, 0x73, 0xcc, 0xc0, 0x78, 0x22, 0xcc, 0xc0, 0x43, 0x9e, 0xfd, 0xc0, 0x5d, 0x92, 0x09, 0xc1, + 0x68, 0xf2, 0x08, 0xc1, 0xc8, 0x7b, 0x8b, 0xc0, 0x74, 0x2c, 0xd3, 0xc0, 0x7c, 0x98, 0x82, 0xc0, + 0x49, 0xf0, 0xbd, 0xc0, 0x81, 0xf3, 0x85, 0xc0, 0xdd, 0x65, 0xc4, 0xc0, 0xf5, 0xd9, 0x49, 0xc0, + 0x19, 0x6e, 0x82, 0xc0, 0x0f, 0x10, 0x09, 0xc1, 0xed, 0xab, 0x90, 0xc0, 0x0e, 0x86, 0x88, 0xc0, + 0x55, 0xb2, 0xbc, 0xc0, 0x50, 0xbe, 0x98, 0xc0, 0x34, 0xde, 0xf9, 0xc0, 0xd1, 0xef, 0xd3, 0xc0, + 0x8d, 0x50, 0x35, 0xc1, 0x3f, 0x72, 0xa1, 0xc0, 0xb5, 0xee, 0x43, 0xc0, 0x03, 0xf6, 0xab, 0xc0, + 0x4d, 0x15, 0xbb, 0xc0, 0xb7, 0xff, 0x39, 0xc0, 0x4c, 0x8e, 0xa0, 0xc0, 0xa7, 0x47, 0x8e, 0xc0, + 0xbf, 0xcc, 0x9e, 0xc0, 0xc0, 0x4d, 0xe1, 0xc0, 0x96, 0x5d, 0x53, 0xc0, 0x98, 0x94, 0x5d, 0xc0, + 0x47, 0xef, 0x5a, 0xc0, 0x77, 0x4d, 0xd0, 0xc0, 0x72, 0x02, 0x75, 0xc0, 0xfb, 0x1b, 0x45, 0xc0, + 0x91, 0x3b, 0x09, 0xc0, 0xd4, 0x25, 0x99, 0xc0, 0x38, 0xed, 0xcd, 0xc0, 0x60, 0xdd, 0x0b, 0xc1, + 0xc6, 0xf1, 0xc5, 0xc0, 0x88, 0xc5, 0xfb, 0xc0, 0xf9, 0x43, 0xe5, 0xc0, 0x8d, 0x03, 0xeb, 0xbf, + 0x3d, 0x77, 0xf6, 0xc0, 0x53, 0x88, 0x7b, 0xc0, 0x26, 0x6f, 0x5c, 0xc0, 0x71, 0xbc, 0xf0, 0xc0, + 0xd5, 0x6e, 0x76, 0xc0, 0x59, 0x57, 0xb9, 0xc0, 0xcc, 0x22, 0xd9, 0xc0, 0x94, 0x9f, 0xb5, 0xc0, + 0xa4, 0x07, 0x9c, 0xc0, 0xaa, 0x05, 0x8e, 0xc0, 0xf2, 0x20, 0xb0, 0xc0, 0xd3, 0xda, 0x8c, 0xbf, + 0x39, 0x6c, 0xc6, 0xc0, 0x25, 0x31, 0xae, 0xc0, 0x11, 0xa8, 0xed, 0xc0, 0x2a, 0x11, 0xcf, 0xc0, + 0x37, 0xef, 0xfd, 0xc0, 0x21, 0x75, 0x89, 0xc0, 0x4b, 0x14, 0x97, 0xc0, 0x7b, 0x3a, 0x98, 0xc0, + 0x5d, 0xd2, 0x88, 0xc0, 0x07, 0xab, 0xa8, 0xc0, 0xeb, 0xa3, 0xab, 0xc0, 0xfb, 0x5c, 0xef, 0xc0, + 0xc4, 0x3d, 0x86, 0xc0, 0xea, 0x35, 0xc4, 0xc0, 0x4e, 0xfd, 0x56, 0xc0, 0x34, 0x26, 0xb0, 0xc0, + 0xfc, 0x66, 0xab, 0xc0, 0xb9, 0x49, 0x05, 0xc1, 0x67, 0x24, 0x9e, 0xc0, 0xf8, 0x80, 0x56, 0xc0, + 0x81, 0x06, 0xe6, 0xc0, 0xe5, 0x82, 0xcb, 0xc0, 0xf9, 0xae, 0xb5, 0xc0, 0xa2, 0xdf, 0x7b, 0xc0, + 0xdd, 0x36, 0x9e, 0xc0, 0x6a, 0xc6, 0x43, 0xc0, 0xaf, 0x45, 0x34, 0xc0, 0xd0, 0x4c, 0xaa, 0xc0, + 0xac, 0x92, 0xb8, 0xc0, 0x8b, 0xc9, 0xd2, 0xc0, 0xa9, 0x37, 0x99, 0xc0, 0x6a, 0x92, 0x44, 0xc0, + 0x07, 0x4a, 0xdd, 0xc0, 0xf1, 0x60, 0x97, 0xc0, 0xbf, 0x39, 0xbb, 0xc0, 0xb4, 0x34, 0x17, 0xc1, + 0x12, 0x11, 0x0d, 0xc1, 0x9b, 0xa8, 0xf0, 0xc0, 0x0d, 0x76, 0xa8, 0xc0, 0xbe, 0x02, 0x10, 0xc1, + 0x85, 0x6d, 0x68, 0xc0, 0xc9, 0x69, 0x83, 0xc0, 0xdc, 0xd1, 0xe9, 0xc0, 0xa8, 0x01, 0x24, 0xc0, + 0xab, 0x9c, 0x73, 0xc0, 0x6d, 0x05, 0xee, 0xc0, 0xfb, 0xec, 0x98, 0xc0, 0x57, 0xc1, 0x41, 0xc0, + 0x15, 0xa2, 0xcf, 0xbf, 0x84, 0x49, 0xa8, 0xc0, 0x5f, 0xba, 0xab, 0xc0, 0xa3, 0x13, 0x7e, 0xc0, + 0x83, 0x56, 0x60, 0xc0, 0x10, 0x91, 0xa5, 0xc0, 0xb1, 0x15, 0xc1, 0xc0, 0xd8, 0x15, 0x0a, 0xc0, + 0xac, 0xbd, 0x98, 0xc0, 0x7c, 0xe1, 0xc3, 0xc0, 0x99, 0xd9, 0x88, 0xbf, 0xef, 0x21, 0x5c, 0xc0, + 0xee, 0x03, 0xbe, 0xc0, 0xf3, 0x7c, 0xba, 0xc0, 0x7e, 0x9d, 0xd4, 0xc0, 0x51, 0xcd, 0x22, 0xc0, + 0xb6, 0x3f, 0x35, 0xc0, 0xf2, 0xc9, 0xcf, 0xc0, 0xec, 0x8b, 0xca, 0xc0, 0x72, 0x40, 0x5b, 0xc0, + 0x55, 0x0c, 0xf8, 0xc0, 0x1c, 0xf1, 0xed, 0xc0, 0xbc, 0xfa, 0x6a, 0xc0, 0x7e, 0x90, 0x05, 0xc1, + 0xde, 0x9e, 0x05, 0xc0, 0xaf, 0x54, 0x57, 0xc0, 0xd9, 0x23, 0x37, 0xc0, 0xb7, 0x11, 0xe0, 0xc0, + 0x51, 0xad, 0xae, 0xc0, 0x7f, 0x1a, 0x96, 0xc0, 0x65, 0x4b, 0xb7, 0xc0, 0x12, 0x80, 0xa6, 0xc0, + 0x63, 0x26, 0xa9, 0x3e, 0xf2, 0x0b, 0xe2, 0xc0, 0xa8, 0x66, 0xb1, 0xc0, 0x49, 0xf5, 0xbc, 0xc0, + 0x4d, 0x29, 0xeb, 0xc0, 0x27, 0xc9, 0x38, 0xbd, 0x94, 0xbe, 0xef, 0xc0, 0xb6, 0x55, 0xe5, 0xc0, + 0x48, 0x50, 0x95, 0xbc, 0x5c, 0x98, 0xf3, 0xc0, 0x17, 0x19, 0x73, 0xc0, 0xcf, 0xd5, 0xd6, 0xc0, + 0x13, 0x92, 0x4b, 0xc0, 0xcd, 0xa9, 0x39, 0xc0, 0xbc, 0x97, 0xd1, 0xc0, 0xdb, 0x3e, 0xe4, 0xc0, + 0x10, 0x36, 0xa5, 0xc0, 0x59, 0xed, 0x97, 0xc0, 0x8c, 0x11, 0x80, 0xc0, 0x19, 0xc2, 0x92, 0xc0, + 0x56, 0x44, 0x94, 0xc0, 0xbd, 0x79, 0x8b, 0xc0, 0xc7, 0xed, 0x66, 0xc0, 0x31, 0x57, 0x03, 0xc1, + 0xed, 0x3d, 0xa0, 0xc0, 0x84, 0xbf, 0xcb, 0xc0, 0x0e, 0xc5, 0x2c, 0xc0, 0x2f, 0x48, 0x45, 0xc0, + 0xd7, 0xa0, 0x89, 0xc0, 0x85, 0x2b, 0xa0, 0xc0, 0xa5, 0xda, 0x28, 0xc0, 0x3d, 0x91, 0xc0, 0xc0, + 0x59, 0x0d, 0x9a, 0xc0, 0x65, 0x9e, 0x56, 0xc0, 0x58, 0x16, 0xec, 0xc0, 0x59, 0xe8, 0x52, 0xc0, + 0xe8, 0xb4, 0x55, 0xc0, 0x0c, 0xf0, 0x71, 0xc0, 0x33, 0xc1, 0xc8, 0xc0, 0x4b, 0x40, 0xc0, 0xc0, + 0x23, 0x43, 0x21, 0xbf, 0x9a, 0x8b, 0xcd, 0xc0, 0x2e, 0x1b, 0xd2, 0xc0, 0xcb, 0xea, 0x92, 0xc0, + 0xde, 0x22, 0x79, 0xc0, 0xef, 0x2e, 0xac, 0xc0, 0xe2, 0x8a, 0x8f, 0xbf, 0xcd, 0xf4, 0x20, 0xc0, + 0x23, 0x91, 0xa2, 0xc0, 0x28, 0xa9, 0x8e, 0xc0, 0x1e, 0xb7, 0x78, 0xc0, 0x9e, 0xf7, 0xcc, 0xc0, + 0x9f, 0x1c, 0x3c, 0xc0, 0x2c, 0xe4, 0x2c, 0xc0, 0x9d, 0xad, 0xcb, 0xc0, 0x06, 0x73, 0x92, 0xc0, + 0xeb, 0x07, 0xb1, 0xc0, 0x07, 0xe0, 0x99, 0xc0, 0x83, 0xa0, 0xa3, 0xc0, 0xf0, 0x13, 0x29, 0xc0, + 0x00, 0x1d, 0x1b, 0xc0, 0x96, 0x0e, 0x6a, 0xc0, 0xb4, 0x05, 0x99, 0xc0, 0xcc, 0x13, 0x10, 0xc1, + 0x28, 0xd5, 0x92, 0xc0, 0x54, 0xa4, 0xb1, 0xc0, 0x27, 0x7c, 0x60, 0xc0, 0xe6, 0x3e, 0x90, 0xc0, + 0xdd, 0xa6, 0xfa, 0xc0, 0x5e, 0x1a, 0xb6, 0xc0, 0xb7, 0xa7, 0x2a, 0xc0, 0xf7, 0xa4, 0x58, 0xc0, + 0xcb, 0x03, 0xaf, 0xbf, 0xc1, 0x54, 0xbe, 0xc0, 0x2a, 0x20, 0xcb, 0xc0, 0xda, 0x76, 0xa1, 0xc0, + 0x8c, 0x3b, 0x84, 0xc0, 0x44, 0x63, 0x73, 0xc0, 0xfd, 0x2b, 0xa6, 0xc0, 0xcf, 0xda, 0xd3, 0xc0, + 0x68, 0x52, 0xea, 0xc0, 0x13, 0xbe, 0xd6, 0xc0, 0x49, 0x49, 0x07, 0xc1, 0x45, 0x06, 0xce, 0xc0, + 0x2e, 0xef, 0x1a, 0xc1, 0x70, 0x08, 0xcb, 0xc0}; +unsigned char avgpool3x3s2_fp32_out[] = { + 0xb5, 0x56, 0xab, 0xc0, 0x83, 0x12, 0x9f, 0xc0, 0xf4, 0xb3, 0x90, 0xc0, 0x6b, 0x14, 0x9b, 0xc0, + 0x4e, 0x02, 0xbf, 0xc0, 0x8e, 0xa2, 0xb4, 0xc0, 0x10, 0x52, 0xa8, 0xc0, 0x07, 0x7b, 0xaf, 0xc0, + 0x38, 0xce, 0x8c, 0xc0, 0x3d, 0xa7, 0x9f, 0xc0, 0x86, 0x22, 0x82, 0xc0, 0x2b, 0x0e, 0xae, 0xc0, + 0x29, 0x50, 0xe7, 0xc0, 0x85, 0x8e, 0xd4, 0xc0, 0x42, 0x02, 0xb3, 0xc0, 0x72, 0xaa, 0x9c, 0xc0, + 0x79, 0xfb, 0xa8, 0xc0, 0x5f, 0x06, 0x9b, 0xc0, 0x02, 0xc7, 0x98, 0xc0, 0x3d, 0xaf, 0x89, 0xc0, + 0xd0, 0xe1, 0x93, 0xc0, 0xf2, 0x14, 0xd5, 0xc0, 0x7e, 0xb2, 0xc1, 0xc0, 0xfd, 0xd4, 0x92, 0xc0, + 0x94, 0xe8, 0x99, 0xc0, 0xea, 0x0b, 0xb2, 0xc0, 0x75, 0x41, 0xae, 0xc0, 0xc4, 0x9a, 0xac, 0xc0, + 0xd4, 0xd1, 0xb8, 0xc0, 0x06, 0xfb, 0xb4, 0xc0, 0x5d, 0x34, 0x98, 0xc0, 0x9c, 0x97, 0x9d, 0xc0, + 0x2b, 0xa7, 0x9d, 0xc0, 0x54, 0x26, 0x9d, 0xc0, 0xe5, 0xc1, 0x95, 0xc0, 0x8c, 0x8f, 0xab, 0xc0, + 0x95, 0x53, 0x92, 0xc0, 0xf0, 0x61, 0x8e, 0xc0, 0x6f, 0xdc, 0xb1, 0xc0, 0xd7, 0x0a, 0x8e, 0xc0, + 0x75, 0x8f, 0x92, 0xc0, 0x20, 0x10, 0x9d, 0xc0, 0xfe, 0x9c, 0xa9, 0xc0, 0x55, 0x9e, 0x97, 0xc0, + 0x34, 0x0c, 0x71, 0xc0, 0xad, 0x6a, 0xa0, 0xc0, 0xaa, 0x3a, 0x8b, 0xc0, 0xa8, 0x9a, 0x82, 0xc0, + 0x00, 0x43, 0x9d, 0xc0, 0x00, 0x36, 0x9e, 0xc0, 0xbe, 0x27, 0x8a, 0xc0, 0xee, 0x90, 0xae, 0xc0, + 0x48, 0x72, 0xbf, 0xc0, 0x30, 0x77, 0xa0, 0xc0}; +unsigned char avgpool3x3s2_fp16_in[] = { + 0x7b, 0xc6, 0x79, 0xc5, 0x9b, 0xc2, 0x4d, 0xc4, 0x99, 0xc4, 0x25, 0xc3, 0xf8, 0xbf, 0x15, 0xc2, + 0xb6, 0xc5, 0x86, 0xc3, 0x12, 0xc5, 0xa9, 0xc5, 0xa1, 0xb2, 0x18, 0xc4, 0x0f, 0xc6, 0x70, 0xc6, + 0x29, 0xc5, 0x18, 0xc2, 0x23, 0xc1, 0x34, 0xc2, 0x98, 0xc8, 0xd9, 0xc6, 0x7b, 0xc7, 0x96, 0xc5, + 0xbf, 0xbd, 0x1f, 0xc7, 0x35, 0xc2, 0xc7, 0xbf, 0x31, 0xc2, 0x86, 0xc8, 0x85, 0xc4, 0xc6, 0xc6, + 0x68, 0xc7, 0x2c, 0xc6, 0x3a, 0xc6, 0x0f, 0xc2, 0x1b, 0xc7, 0x51, 0xc5, 0xfd, 0xc4, 0x0e, 0xc4, + 0xbe, 0xc4, 0xdc, 0xc3, 0xe0, 0xc3, 0x0e, 0xc6, 0x63, 0xc6, 0x61, 0xc6, 0xec, 0xc7, 0x4c, 0xc8, + 0x47, 0xc8, 0x5b, 0xc4, 0x99, 0xc6, 0x14, 0xc4, 0xef, 0xc5, 0x2f, 0xc4, 0x23, 0xc6, 0x4e, 0xc2, + 0x13, 0xc4, 0x48, 0xc8, 0x85, 0xc4, 0x44, 0xc4, 0xe5, 0xc5, 0xc5, 0xc4, 0xce, 0xc7, 0x9f, 0xc6, + 0xaa, 0xc9, 0x0b, 0xc5, 0x1f, 0xc2, 0x5f, 0xc5, 0xd8, 0xc5, 0xcf, 0xc1, 0x04, 0xc5, 0x72, 0xc4, + 0xf6, 0xc4, 0x0a, 0xc7, 0x9a, 0xc2, 0xec, 0xc2, 0xd7, 0xc2, 0x82, 0xc6, 0xa8, 0xc3, 0x28, 0xc2, + 0x49, 0xc0, 0xc9, 0xc4, 0x6f, 0xc6, 0x5e, 0xc8, 0x2f, 0xc6, 0xde, 0xc7, 0x2a, 0xc7, 0x58, 0xbf, + 0xb3, 0xc7, 0xdc, 0xc3, 0xe3, 0xc2, 0x85, 0xc7, 0xb3, 0xc3, 0xca, 0xc5, 0xc9, 0xc6, 0xac, 0xc5, + 0xe0, 0xc4, 0x70, 0xc4, 0x81, 0xc5, 0x66, 0xbc, 0x33, 0xc6, 0x71, 0xc5, 0x6d, 0xc7, 0x78, 0xc6, + 0xef, 0xc7, 0x4b, 0xc4, 0xb8, 0xc4, 0xc1, 0xc4, 0x46, 0xc4, 0x45, 0xc5, 0x5d, 0xc5, 0x7a, 0xc7, + 0x31, 0xc4, 0x21, 0xc6, 0xb7, 0xc2, 0x81, 0xc5, 0x5b, 0xc5, 0x2a, 0xc8, 0xf1, 0xc4, 0xb4, 0xc2, + 0x30, 0xc7, 0x5c, 0xc6, 0xad, 0xc5, 0xde, 0xc3, 0xf1, 0xc4, 0x1e, 0xc2, 0xa2, 0xc1, 0x52, 0xc5, + 0xc4, 0xc5, 0x96, 0xc6, 0xc9, 0xc4, 0x24, 0xc2, 0xea, 0xc6, 0xbb, 0xc4, 0xd9, 0xc5, 0xb9, 0xc8, + 0x68, 0xc8, 0x85, 0xc7, 0x43, 0xc5, 0x80, 0xc8, 0x43, 0xc3, 0x1b, 0xc4, 0x4e, 0xc7, 0x20, 0xc1, + 0x9c, 0xc3, 0x70, 0xc7, 0xc7, 0xc4, 0x0e, 0xc2, 0x7d, 0xbe, 0x42, 0xc5, 0x5d, 0xc5, 0xf0, 0xc3, + 0x02, 0xc3, 0x2c, 0xc5, 0x08, 0xc6, 0x50, 0xc0, 0xc5, 0xc4, 0x1f, 0xc6, 0x46, 0xbc, 0xe1, 0xc2, + 0xf0, 0xc5, 0xd3, 0xc5, 0xa4, 0xc6, 0x16, 0xc1, 0xa9, 0xc1, 0x7e, 0xc6, 0x54, 0xc6, 0xda, 0xc2, + 0xc0, 0xc7, 0x6f, 0xc7, 0x57, 0xc3, 0x2c, 0xc8, 0x2c, 0xc0, 0xba, 0xc2, 0xb9, 0xc1, 0x00, 0xc7, + 0x75, 0xc5, 0xb0, 0xc4, 0xba, 0xc5, 0x34, 0xc5, 0x49, 0x35, 0x10, 0xc7, 0x8b, 0xc5, 0xe7, 0xc5, + 0x59, 0xc7, 0xc6, 0xa9, 0x7d, 0xc7, 0x2a, 0xc7, 0xaa, 0xa4, 0x9c, 0xc7, 0x98, 0xc3, 0xb6, 0xc6, + 0x5c, 0xc2, 0xcd, 0xc1, 0x8c, 0xc6, 0x21, 0xc7, 0x29, 0xc5, 0xbf, 0xc4, 0x00, 0xc4, 0x96, 0xc4, + 0xa2, 0xc4, 0x5b, 0xc4, 0x37, 0xc3, 0x1a, 0xc8, 0x01, 0xc5, 0x5d, 0xc6, 0x66, 0xc1, 0x2a, 0xc2, + 0x4d, 0xc4, 0x01, 0xc5, 0x46, 0xc1, 0x04, 0xc6, 0xd0, 0xc4, 0xb4, 0xc2, 0x60, 0xc7, 0x97, 0xc2, + 0xad, 0xc2, 0x8f, 0xc3, 0x46, 0xc6, 0x02, 0xc6, 0x0a, 0xb9, 0x6c, 0xc6, 0x90, 0xc6, 0x97, 0xc4, + 0xc9, 0xc3, 0x61, 0xc5, 0x7c, 0xbc, 0x07, 0xc1, 0x14, 0xc5, 0x75, 0xc4, 0xc5, 0xc3, 0x67, 0xc6, + 0xe0, 0xc1, 0x67, 0xc1, 0x5d, 0xc6, 0x93, 0xc4, 0x88, 0xc5, 0xcf, 0xc4, 0x1d, 0xc5, 0x48, 0xc1, + 0xd8, 0xc0, 0x50, 0xc3, 0xc8, 0xc4, 0x80, 0xc8, 0x96, 0xc4, 0x8d, 0xc5, 0x03, 0xc3, 0x81, 0xc4, + 0xd5, 0xc7, 0xb0, 0xc5, 0x55, 0xc1, 0xc5, 0xc2, 0x78, 0xbd, 0xf2, 0xc5, 0x59, 0xc6, 0x0b, 0xc5, + 0x21, 0xc4, 0x9b, 0xc3, 0x31, 0xc5, 0x9e, 0xc6, 0x52, 0xc7, 0xb5, 0xc6, 0x3a, 0xc8, 0x70, 0xc6, + 0xd7, 0xc8, 0x58, 0xc6}; +unsigned char avgpool3x3s2_fp16_out[] = { + 0x5a, 0xc5, 0xf9, 0xc4, 0x85, 0xc4, 0xd8, 0xc4, 0xf7, 0xc5, 0xa4, 0xc5, 0x41, 0xc5, 0x7b, 0xc5, + 0x65, 0xc4, 0xfc, 0xc4, 0x10, 0xc4, 0x70, 0xc5, 0x3a, 0xc7, 0xa3, 0xc6, 0x97, 0xc5, 0xe4, 0xc4, + 0x47, 0xc5, 0xd7, 0xc4, 0xc6, 0xc4, 0x4d, 0xc4, 0x9e, 0xc4, 0xa8, 0xc6, 0x0e, 0xc6, 0x96, 0xc4, + 0xcf, 0xc4, 0x90, 0xc5, 0x71, 0xc5, 0x65, 0xc5, 0xc5, 0xc5, 0xa7, 0xc5, 0xc1, 0xc4, 0xec, 0xc4, + 0xec, 0xc4, 0xe9, 0xc4, 0xae, 0xc4, 0x5c, 0xc5, 0x91, 0xc4, 0x72, 0xc4, 0x8e, 0xc5, 0x70, 0xc4, + 0x94, 0xc4, 0xe9, 0xc4, 0x4c, 0xc5, 0xbc, 0xc4, 0x87, 0xc3, 0x02, 0xc5, 0x59, 0xc4, 0x14, 0xc4, + 0xe9, 0xc4, 0xf1, 0xc4, 0x51, 0xc4, 0x73, 0xc5, 0xfb, 0xc5, 0x03, 0xc5}; + +// [c h w]: [2 6 18] +// [c h w]: [2 3 9] +unsigned char avgpool3x3s2_p1_fp32_in[] = { + 0x0c, 0x47, 0x8b, 0xc0, 0x50, 0x83, 0x52, 0xc0, 0xe9, 0x47, 0x88, 0xc0, 0xc4, 0xda, 0x55, 0xc0, + 0x6e, 0xa2, 0xa2, 0xc0, 0x61, 0xc4, 0x94, 0xc0, 0x3b, 0x38, 0xf4, 0xbf, 0xf8, 0x72, 0x2a, 0xc0, + 0x13, 0xce, 0x2f, 0xc0, 0xe4, 0x7a, 0x80, 0xc0, 0x77, 0x63, 0x82, 0xc0, 0x4f, 0xda, 0x8f, 0xc0, + 0xb1, 0x47, 0xa0, 0xc0, 0x75, 0xb2, 0x70, 0xc0, 0xd4, 0x8b, 0x70, 0xc0, 0xeb, 0xbe, 0xa3, 0xc0, + 0xbc, 0x00, 0x83, 0xc0, 0x61, 0x67, 0x62, 0xc0, 0x42, 0x3c, 0x80, 0xc0, 0x6a, 0xc2, 0x9e, 0xc0, + 0xb5, 0x7a, 0x94, 0xc0, 0x6c, 0x3a, 0xab, 0xc0, 0x48, 0x5e, 0xe3, 0xbf, 0x25, 0x1e, 0x82, 0xc0, + 0x39, 0xc6, 0x87, 0xc0, 0xf9, 0x63, 0x98, 0xc0, 0x1a, 0x75, 0x8b, 0xc0, 0xb0, 0xea, 0x84, 0xc0, + 0x12, 0xd3, 0x99, 0xc0, 0x17, 0xcf, 0x56, 0xc0, 0x8e, 0xae, 0x8a, 0xc0, 0x1d, 0x92, 0x26, 0xc0, + 0x9c, 0xb9, 0x02, 0xc0, 0xb8, 0x1f, 0xb3, 0xc0, 0x59, 0x93, 0x75, 0xc0, 0x0e, 0x8b, 0x69, 0xc0, + 0x06, 0xf2, 0x6e, 0xc0, 0xe8, 0x27, 0x55, 0xc0, 0x42, 0x5d, 0x4e, 0xc0, 0x15, 0x89, 0xf7, 0xbf, + 0x3f, 0xf2, 0x8c, 0xc0, 0x92, 0x9f, 0x85, 0xc0, 0xc6, 0x5b, 0x77, 0xc0, 0x58, 0x03, 0x15, 0xc0, + 0x94, 0xde, 0x5c, 0xc0, 0xe9, 0x99, 0x83, 0xc0, 0xb4, 0xf8, 0x7c, 0xc0, 0xc6, 0x01, 0x8c, 0xc0, + 0x7b, 0x12, 0x57, 0xc0, 0xbe, 0x7e, 0xa4, 0xc0, 0xdf, 0x92, 0x70, 0xc0, 0x64, 0x81, 0x6c, 0xc0, + 0xa1, 0x81, 0x60, 0xc0, 0xc6, 0x1e, 0xa2, 0xc0, 0x85, 0x96, 0x85, 0xc0, 0xf6, 0xff, 0xb6, 0xc0, + 0xf8, 0xda, 0x77, 0xc0, 0xaf, 0x39, 0x81, 0xc0, 0x97, 0x7d, 0x32, 0xc0, 0x6d, 0x82, 0xab, 0xc0, + 0xe5, 0x57, 0xad, 0xc0, 0x9a, 0x7c, 0x88, 0xc0, 0xed, 0x28, 0x86, 0xc0, 0xdc, 0x72, 0x42, 0xc0, + 0x6a, 0x83, 0x57, 0xc0, 0x61, 0x12, 0x82, 0xc0, 0x8f, 0xac, 0x65, 0xc0, 0x9e, 0x6b, 0x60, 0xc0, + 0x8e, 0x2e, 0xb6, 0xc0, 0x1f, 0xba, 0x62, 0xc0, 0x25, 0x7d, 0x6b, 0xc0, 0x6d, 0xb8, 0x8b, 0xc0, + 0xe7, 0xba, 0x8e, 0xc0, 0xb3, 0x0e, 0x30, 0xc0, 0x70, 0x2c, 0x7d, 0xc0, 0xed, 0x0c, 0xe0, 0xc0, + 0x67, 0xbf, 0x93, 0xc0, 0x43, 0x48, 0x98, 0xc0, 0xc6, 0xe1, 0x44, 0xc0, 0x4a, 0x76, 0x81, 0xc0, + 0xc2, 0x7e, 0x0f, 0xc0, 0x9e, 0xea, 0x7f, 0xc0, 0x71, 0x00, 0x86, 0xc0, 0x8d, 0x2b, 0x96, 0xc0, + 0x59, 0x7c, 0x74, 0xc0, 0x0c, 0x92, 0x07, 0xc0, 0x19, 0xdf, 0x6d, 0xc0, 0x54, 0xda, 0xa1, 0xc0, + 0x62, 0x91, 0x9f, 0xc0, 0x30, 0x26, 0x5b, 0xc0, 0x7c, 0xb4, 0x81, 0xc0, 0xb0, 0xdc, 0xe4, 0xbf, + 0x67, 0x2d, 0x2c, 0xc0, 0x31, 0x7b, 0x33, 0xc0, 0xb1, 0xf0, 0x9e, 0xc0, 0x05, 0x00, 0x3d, 0xc0, + 0x4d, 0x84, 0x87, 0xc0, 0x48, 0x4e, 0x04, 0xc0, 0xae, 0xfc, 0x61, 0xc0, 0xdb, 0xe2, 0x92, 0xc0, + 0xa1, 0x50, 0x77, 0xc0, 0x95, 0xda, 0x41, 0xc0, 0xdd, 0x46, 0x84, 0xc0, 0x49, 0x37, 0x64, 0xc0, + 0x4f, 0x9e, 0x8e, 0xc0, 0xe3, 0xb2, 0x79, 0xc0, 0x68, 0x7e, 0x35, 0xc0, 0x14, 0xe8, 0x03, 0xc0, + 0x18, 0xdb, 0x12, 0xc0, 0xa7, 0x89, 0x61, 0xc0, 0x71, 0x11, 0x64, 0xc0, 0xa4, 0xed, 0x57, 0xc0, + 0x2b, 0xfc, 0x38, 0xc0, 0x15, 0x55, 0x98, 0xc0, 0x9b, 0x48, 0x22, 0xc0, 0x5f, 0xee, 0x4c, 0xc0, + 0x08, 0xf5, 0xb3, 0xc0, 0xba, 0x6f, 0xc0, 0xc0, 0xfa, 0x39, 0x79, 0xc0, 0xd0, 0x96, 0x98, 0xc0, + 0x17, 0x33, 0x43, 0xc0, 0x4a, 0x7a, 0x9d, 0xc0, 0xd0, 0x80, 0xa1, 0xc0, 0xb6, 0x04, 0x9f, 0xc0, + 0x43, 0x52, 0x7b, 0xc0, 0x32, 0x9c, 0x86, 0xc0, 0xe8, 0x15, 0x63, 0xc0, 0xca, 0x98, 0x2f, 0xc0, + 0xb2, 0xc0, 0x81, 0xc0, 0xe0, 0x27, 0x7d, 0xc0, 0xbd, 0x81, 0x89, 0xc0, 0x46, 0xdd, 0xf2, 0xbf, + 0x88, 0xd3, 0x59, 0xc0, 0x50, 0xfd, 0xa3, 0xc0, 0x15, 0x17, 0x5d, 0xc0, 0x74, 0xc0, 0x99, 0xc0, + 0x1f, 0x40, 0x4b, 0xc0, 0x76, 0x23, 0x94, 0xc0, 0x7e, 0x07, 0x32, 0xc0, 0x1a, 0xb6, 0x43, 0xc0, + 0x50, 0xbe, 0x84, 0xc0, 0xc4, 0xa5, 0x38, 0xc0, 0x91, 0xa7, 0x82, 0xc0, 0x86, 0x6a, 0x78, 0xc0, + 0x10, 0x8f, 0x84, 0xc0, 0x7e, 0xd7, 0x83, 0xc0, 0x26, 0x43, 0x82, 0xc0, 0x9a, 0x80, 0x76, 0xc0, + 0x97, 0x40, 0x46, 0xc0, 0x3f, 0x24, 0x4b, 0xc0, 0x91, 0x44, 0x7d, 0xc0, 0xd2, 0x10, 0xa4, 0xc0, + 0x01, 0x80, 0x29, 0xc0, 0x16, 0x59, 0x92, 0xc0, 0xe0, 0xbb, 0x79, 0xc0, 0x20, 0x56, 0xc4, 0xc0, + 0x47, 0xba, 0x98, 0xc0, 0x98, 0x1e, 0x84, 0xc0, 0xcd, 0xa2, 0x22, 0xc0, 0x47, 0x0d, 0x52, 0xc0, + 0xf6, 0x66, 0x54, 0xc0, 0xe2, 0xfa, 0x92, 0xc0, 0xa0, 0x27, 0x62, 0xbf, 0x82, 0xa6, 0x58, 0xc0, + 0x61, 0x30, 0xa0, 0xc0, 0x96, 0x5f, 0x63, 0xc0, 0x29, 0xc6, 0x3c, 0xc0, 0x0c, 0x04, 0x79, 0xc0, + 0xe4, 0x60, 0x8b, 0xc0, 0x6d, 0xa7, 0xca, 0xc0, 0x29, 0x30, 0x8c, 0xc0, 0xc5, 0x26, 0x76, 0xc0, + 0x43, 0xce, 0xaa, 0xc0, 0x7b, 0xb5, 0x90, 0xc0, 0x37, 0x22, 0x9d, 0xc0, 0x40, 0x0d, 0x35, 0xc0, + 0x63, 0xb4, 0xa0, 0xc0, 0xc2, 0xe6, 0xb2, 0xc0, 0x5d, 0x98, 0x66, 0xc0, 0x1d, 0xbe, 0xf5, 0xbf, + 0x9b, 0x41, 0xbc, 0xc0, 0xd7, 0x00, 0x91, 0xc0, 0x97, 0x68, 0xab, 0xc0, 0xc4, 0xd4, 0xad, 0xc0, + 0x2d, 0xc8, 0x5e, 0xc0, 0xa5, 0xe9, 0x73, 0xc0, 0xf6, 0x97, 0x8f, 0xc0, 0x46, 0x7e, 0x6b, 0xc0, + 0x48, 0xca, 0x9c, 0xc0, 0x56, 0xcf, 0x85, 0xc0, 0xef, 0xb7, 0x91, 0xc0, 0x49, 0x6b, 0x1e, 0xc0, + 0x1e, 0x47, 0x7c, 0xc0, 0x45, 0x95, 0x8a, 0xc0, 0xcd, 0xdf, 0xb1, 0xc0, 0x29, 0xc2, 0xab, 0xc0, + 0xa4, 0xfb, 0x57, 0xc0, 0xd2, 0x78, 0x81, 0xc0, 0x5c, 0xca, 0x77, 0xc0, 0x3a, 0x7e, 0x73, 0xc0, + 0x8e, 0xf0, 0x46, 0xc0, 0x86, 0x0f, 0xa5, 0xc0, 0xef, 0xc7, 0xb1, 0xc0, 0x94, 0x9f, 0x60, 0xc0, + 0x9f, 0xe8, 0x6f, 0xc0, 0xe3, 0xac, 0x6d, 0xc0, 0x4e, 0xd9, 0x33, 0xc0, 0x27, 0x1f, 0x84, 0xc0, + 0x56, 0xde, 0x82, 0xc0, 0x17, 0xde, 0x6d, 0xc0, 0x96, 0xcc, 0x8f, 0xc0, 0x8f, 0x98, 0x0a, 0xc0, + 0x9f, 0xaa, 0x94, 0xc0, 0x5d, 0x60, 0x5f, 0xc0, 0x37, 0x2a, 0x8d, 0xc0, 0x20, 0x8a, 0xac, 0xbf}; +unsigned char avgpool3x3s2_p1_fp32_out[] = { + 0x2b, 0x3c, 0xec, 0xbf, 0x39, 0xc3, 0x37, 0xc0, 0x2f, 0x8f, 0x2c, 0xc0, 0x81, 0x84, 0x1e, 0xc0, + 0x7d, 0x6a, 0x21, 0xc0, 0xe2, 0x14, 0x31, 0xc0, 0xdd, 0x85, 0x27, 0xc0, 0x96, 0xb2, 0x22, 0xc0, + 0x48, 0xb2, 0x37, 0xc0, 0xeb, 0x5c, 0x38, 0xc0, 0x9d, 0xce, 0x83, 0xc0, 0xeb, 0x06, 0x71, 0xc0, + 0x60, 0xd3, 0x88, 0xc0, 0xe9, 0x7b, 0x76, 0xc0, 0x6b, 0x82, 0x7a, 0xc0, 0x15, 0x1e, 0x74, 0xc0, + 0x87, 0x03, 0x7d, 0xc0, 0x40, 0x59, 0x83, 0xc0, 0xa5, 0x2b, 0x23, 0xc0, 0x22, 0x31, 0x76, 0xc0, + 0x89, 0xb1, 0x8b, 0xc0, 0xfb, 0xa1, 0x80, 0xc0, 0x65, 0x59, 0x63, 0xc0, 0x65, 0xb3, 0x77, 0xc0, + 0x39, 0x40, 0x67, 0xc0, 0x29, 0xde, 0x7c, 0xc0, 0xe7, 0xbf, 0x70, 0xc0, 0x35, 0x3d, 0xac, 0xbf, + 0xa5, 0xdd, 0x16, 0xc0, 0x54, 0x93, 0x16, 0xc0, 0x70, 0xca, 0x14, 0xc0, 0xf2, 0xb2, 0x48, 0xc0, + 0xc7, 0x08, 0x42, 0xc0, 0xfb, 0x09, 0x25, 0xc0, 0x92, 0xfb, 0x31, 0xc0, 0xf2, 0x53, 0x2a, 0xc0, + 0x3f, 0xda, 0x05, 0xc0, 0x87, 0x0a, 0x77, 0xc0, 0xce, 0x12, 0x5a, 0xc0, 0xff, 0x7d, 0x84, 0xc0, + 0x5b, 0x43, 0x8f, 0xc0, 0x79, 0x84, 0x91, 0xc0, 0xd7, 0x48, 0x86, 0xc0, 0x79, 0x1d, 0x6e, 0xc0, + 0xb5, 0xc8, 0x6b, 0xc0, 0xc5, 0xff, 0x1e, 0xc0, 0x9d, 0xdb, 0x8b, 0xc0, 0xa5, 0xc4, 0x84, 0xc0, + 0x17, 0x70, 0x85, 0xc0, 0x38, 0x04, 0x87, 0xc0, 0x2d, 0x07, 0x83, 0xc0, 0x27, 0x68, 0x6d, 0xc0, + 0xad, 0xaa, 0x8a, 0xc0, 0x45, 0xbe, 0x6b, 0xc0}; +unsigned char avgpool3x3s2_p1_fp16_in[] = { + 0x5a, 0xc4, 0x94, 0xc2, 0x42, 0xc4, 0xae, 0xc2, 0x15, 0xc5, 0xa6, 0xc4, 0xa1, 0xbf, 0x53, 0xc1, + 0x7e, 0xc1, 0x03, 0xc4, 0x13, 0xc4, 0x7e, 0xc4, 0x02, 0xc5, 0x85, 0xc3, 0x84, 0xc3, 0x1d, 0xc5, + 0x18, 0xc4, 0x13, 0xc3, 0x01, 0xc4, 0xf6, 0xc4, 0xa3, 0xc4, 0x59, 0xc5, 0x1a, 0xbf, 0x10, 0xc4, + 0x3e, 0xc4, 0xc3, 0xc4, 0x5b, 0xc4, 0x27, 0xc4, 0xce, 0xc4, 0xb6, 0xc2, 0x55, 0xc4, 0x34, 0xc1, + 0x15, 0xc0, 0x98, 0xc5, 0xac, 0xc3, 0x4c, 0xc3, 0x77, 0xc3, 0xa9, 0xc2, 0x72, 0xc2, 0xbc, 0xbf, + 0x67, 0xc4, 0x2c, 0xc4, 0xba, 0xc3, 0xa8, 0xc0, 0xe6, 0xc2, 0x1c, 0xc4, 0xe7, 0xc3, 0x60, 0xc4, + 0xb8, 0xc2, 0x23, 0xc5, 0x84, 0xc3, 0x64, 0xc3, 0x04, 0xc3, 0x10, 0xc5, 0x2c, 0xc4, 0xb7, 0xc5, + 0xbe, 0xc3, 0x09, 0xc4, 0x93, 0xc1, 0x5c, 0xc5, 0x6a, 0xc5, 0x43, 0xc4, 0x31, 0xc4, 0x13, 0xc2, + 0xbc, 0xc2, 0x10, 0xc4, 0x2d, 0xc3, 0x03, 0xc3, 0xb1, 0xc5, 0x15, 0xc3, 0x5b, 0xc3, 0x5d, 0xc4, + 0x75, 0xc4, 0x80, 0xc1, 0xe9, 0xc3, 0x00, 0xc7, 0x9d, 0xc4, 0xc2, 0xc4, 0x27, 0xc2, 0x0b, 0xc4, + 0x7b, 0xc0, 0xff, 0xc3, 0x30, 0xc4, 0xb1, 0xc4, 0xa3, 0xc3, 0x3c, 0xc0, 0x6e, 0xc3, 0x0e, 0xc5, + 0xfc, 0xc4, 0xd9, 0xc2, 0x0d, 0xc4, 0x26, 0xbf, 0x61, 0xc1, 0x9b, 0xc1, 0xf7, 0xc4, 0xe8, 0xc1, + 0x3c, 0xc4, 0x22, 0xc0, 0x0f, 0xc3, 0x97, 0xc4, 0xba, 0xc3, 0x0e, 0xc2, 0x22, 0xc4, 0x21, 0xc3, + 0x74, 0xc4, 0xcd, 0xc3, 0xab, 0xc1, 0x1f, 0xc0, 0x96, 0xc0, 0x0c, 0xc3, 0x20, 0xc3, 0xbf, 0xc2, + 0xc7, 0xc1, 0xc2, 0xc4, 0x12, 0xc1, 0x67, 0xc2, 0x9f, 0xc5, 0x03, 0xc6, 0xc9, 0xc3, 0xc4, 0xc4, + 0x19, 0xc2, 0xeb, 0xc4, 0x0c, 0xc5, 0xf8, 0xc4, 0xda, 0xc3, 0x34, 0xc4, 0x18, 0xc3, 0x7c, 0xc1, + 0x0e, 0xc4, 0xe9, 0xc3, 0x4c, 0xc4, 0x96, 0xbf, 0xce, 0xc2, 0x1f, 0xc5, 0xe8, 0xc2, 0xce, 0xc4, + 0x5a, 0xc2, 0xa1, 0xc4, 0x90, 0xc1, 0x1d, 0xc2, 0x25, 0xc4, 0xc5, 0xc1, 0x15, 0xc4, 0xc3, 0xc3, + 0x24, 0xc4, 0x1e, 0xc4, 0x12, 0xc4, 0xb4, 0xc3, 0x32, 0xc2, 0x59, 0xc2, 0xea, 0xc3, 0x20, 0xc5, + 0x4c, 0xc1, 0x92, 0xc4, 0xcd, 0xc3, 0x22, 0xc6, 0xc5, 0xc4, 0x20, 0xc4, 0x15, 0xc1, 0x90, 0xc2, + 0xa3, 0xc2, 0x97, 0xc4, 0x11, 0xbb, 0xc5, 0xc2, 0x01, 0xc5, 0x1a, 0xc3, 0xe6, 0xc1, 0xc8, 0xc3, + 0x5b, 0xc4, 0x55, 0xc6, 0x61, 0xc4, 0xb1, 0xc3, 0x56, 0xc5, 0x85, 0xc4, 0xe9, 0xc4, 0xa8, 0xc1, + 0x05, 0xc5, 0x97, 0xc5, 0x34, 0xc3, 0xad, 0xbf, 0xe2, 0xc5, 0x88, 0xc4, 0x5b, 0xc5, 0x6e, 0xc5, + 0xf6, 0xc2, 0x9f, 0xc3, 0x7c, 0xc4, 0x5b, 0xc3, 0xe6, 0xc4, 0x2e, 0xc4, 0x8d, 0xc4, 0xf3, 0xc0, + 0xe2, 0xc3, 0x54, 0xc4, 0x8e, 0xc5, 0x5e, 0xc5, 0xbf, 0xc2, 0x0b, 0xc4, 0xbe, 0xc3, 0x9b, 0xc3, + 0x37, 0xc2, 0x28, 0xc5, 0x8e, 0xc5, 0x04, 0xc3, 0x7f, 0xc3, 0x6d, 0xc3, 0x9e, 0xc1, 0x20, 0xc4, + 0x16, 0xc4, 0x6e, 0xc3, 0x7e, 0xc4, 0x54, 0xc0, 0xa5, 0xc4, 0xfb, 0xc2, 0x69, 0xc4, 0x64, 0xbd}; +unsigned char avgpool3x3s2_p1_fp16_out[] = { + 0x60, 0xbf, 0xbd, 0xc1, 0x63, 0xc1, 0xf3, 0xc0, 0x0a, 0xc1, 0x88, 0xc1, 0x3b, 0xc1, 0x14, 0xc1, + 0xbc, 0xc1, 0xc2, 0xc1, 0x1e, 0xc4, 0x87, 0xc3, 0x45, 0xc4, 0xb3, 0xc3, 0xd3, 0xc3, 0xa0, 0xc3, + 0xe7, 0xc3, 0x1a, 0xc4, 0x18, 0xc1, 0xb1, 0xc3, 0x5d, 0xc4, 0x04, 0xc4, 0x1a, 0xc3, 0xbc, 0xc3, + 0x38, 0xc3, 0xe5, 0xc3, 0x83, 0xc3, 0x61, 0xbd, 0xb7, 0xc0, 0xb3, 0xc0, 0xa5, 0xc0, 0x45, 0xc2, + 0x10, 0xc2, 0x28, 0xc1, 0x90, 0xc1, 0x51, 0xc1, 0x2e, 0xc0, 0xb7, 0xc3, 0xd0, 0xc2, 0x23, 0xc4, + 0x79, 0xc4, 0x8c, 0xc4, 0x32, 0xc4, 0x70, 0xc3, 0x5c, 0xc3, 0xf8, 0xc0, 0x5e, 0xc4, 0x25, 0xc4, + 0x2b, 0xc4, 0x38, 0xc4, 0x17, 0xc4, 0x6a, 0xc3, 0x55, 0xc4, 0x5c, 0xc3}; + +// [c h w]: [2 3 10] +// [c h w]: [2 3 10] +unsigned char avgpool3x3s1_p1_fp32_in[] = { + 0x8a, 0x74, 0x05, 0x40, 0xbf, 0x10, 0xcb, 0x3f, 0xfb, 0x3a, 0xf5, 0x3f, 0x5e, 0x77, 0xbd, 0x3f, + 0x2b, 0x7d, 0xc4, 0x3f, 0xcc, 0x29, 0xf1, 0x3f, 0xe0, 0x31, 0x1c, 0x40, 0x7e, 0xe9, 0x0b, 0xbf, + 0x7a, 0xbd, 0x92, 0x3f, 0xbd, 0xdb, 0xab, 0x3f, 0x5a, 0x6c, 0x95, 0x3f, 0x8e, 0xc8, 0xd5, 0x3f, + 0x37, 0x04, 0x1d, 0x40, 0x54, 0x5d, 0x7e, 0x3f, 0x13, 0x06, 0x44, 0x3f, 0x38, 0x15, 0x98, 0x3f, + 0x02, 0xe0, 0x52, 0x40, 0x33, 0x27, 0x21, 0x40, 0xf7, 0x87, 0x92, 0x40, 0x78, 0x7b, 0x92, 0x3e, + 0x2c, 0x28, 0x13, 0x40, 0x22, 0x31, 0x28, 0x3f, 0xb8, 0x8d, 0xa8, 0x3f, 0xf4, 0x88, 0x44, 0x40, + 0xbe, 0x6e, 0x45, 0x40, 0x7b, 0x0f, 0x82, 0x40, 0xf0, 0xf2, 0x5b, 0x3f, 0x7c, 0x14, 0x05, 0x40, + 0x72, 0xb6, 0x0f, 0x3f, 0x7a, 0x8b, 0x3b, 0x40, 0x1b, 0x2c, 0x1f, 0x40, 0x43, 0x35, 0x1e, 0x40, + 0xba, 0x7a, 0x5d, 0x3f, 0xab, 0xde, 0x5b, 0x3f, 0x33, 0x79, 0x84, 0x40, 0xdc, 0x96, 0x8a, 0xbe, + 0xe6, 0xbe, 0x1a, 0x40, 0x30, 0x10, 0xa5, 0x3e, 0xb2, 0xbc, 0xbd, 0x3f, 0xf5, 0x62, 0x4c, 0x40, + 0xe3, 0x84, 0xf0, 0x3e, 0x3a, 0xda, 0x8b, 0x3f, 0x9e, 0x8e, 0xac, 0x3f, 0xa9, 0xbe, 0x28, 0xbf, + 0xf8, 0x38, 0x89, 0x3f, 0xe6, 0x8e, 0x52, 0x3f, 0x65, 0xf1, 0x10, 0x40, 0x5d, 0xf8, 0x8e, 0x3f, + 0x85, 0x83, 0xa9, 0x3f, 0xcf, 0xb2, 0x0e, 0x40, 0x4c, 0x87, 0x33, 0x3e, 0xe9, 0x50, 0x31, 0x40, + 0x2a, 0xed, 0x36, 0x40, 0xd5, 0x6a, 0x4c, 0x3f, 0xbf, 0x6f, 0x30, 0x40, 0x06, 0x2f, 0x6e, 0x3f, + 0xb1, 0x91, 0x21, 0x40, 0x5c, 0xe8, 0x03, 0x40, 0xc4, 0x7a, 0x6d, 0x3f, 0xad, 0x43, 0x22, 0x40}; +unsigned char avgpool3x3s1_p1_fp32_out[] = { + 0xd5, 0x26, 0x39, 0x3f, 0x5a, 0xb7, 0x9a, 0x3f, 0xdd, 0xa3, 0x8f, 0x3f, 0xbd, 0x0b, 0x82, 0x3f, + 0xf2, 0x32, 0x5f, 0x3f, 0xc7, 0xfc, 0x9d, 0x3f, 0xd4, 0x4d, 0x99, 0x3f, 0x15, 0x0a, 0xbf, 0x3f, + 0xf5, 0xa1, 0x84, 0x3f, 0xfd, 0x2f, 0x51, 0x3f, 0x13, 0x9f, 0x86, 0x3f, 0x6c, 0x7d, 0xd7, 0x3f, + 0xfe, 0x62, 0xd7, 0x3f, 0x87, 0x52, 0xec, 0x3f, 0xec, 0x79, 0x00, 0x40, 0x54, 0xf1, 0x07, 0x40, + 0xc2, 0xe6, 0xfc, 0x3f, 0xee, 0xd0, 0xf0, 0x3f, 0xdd, 0xdd, 0xd3, 0x3f, 0x20, 0x41, 0x9a, 0x3f, + 0xbd, 0xcd, 0x24, 0x3f, 0xc6, 0x05, 0x88, 0x3f, 0x35, 0x86, 0x90, 0x3f, 0xce, 0x30, 0xa6, 0x3f, + 0xce, 0x45, 0xbb, 0x3f, 0x21, 0x8c, 0xbc, 0x3f, 0x1d, 0x2b, 0xc7, 0x3f, 0x1a, 0x93, 0xc5, 0x3f, + 0x5d, 0x3d, 0xb8, 0x3f, 0x87, 0xb5, 0x6d, 0x3f, 0x99, 0x7f, 0x39, 0x3f, 0x09, 0x74, 0x78, 0x3f, + 0x46, 0x07, 0x2a, 0x3f, 0x2b, 0xe2, 0x58, 0x3f, 0xe4, 0x9f, 0x29, 0x3f, 0xc9, 0x91, 0x94, 0x3f, + 0x0d, 0xd5, 0x3d, 0x3f, 0x44, 0xf8, 0x7d, 0x3f, 0x76, 0x84, 0x89, 0x3f, 0xf2, 0x17, 0x6a, 0x3f, + 0x6f, 0xa5, 0x86, 0x3f, 0x23, 0xc6, 0xce, 0x3f, 0xb4, 0x6c, 0xb0, 0x3f, 0x1d, 0xa8, 0xc7, 0x3f, + 0xfe, 0x9b, 0x94, 0x3f, 0x0d, 0xea, 0xec, 0x3f, 0x97, 0x5d, 0xad, 0x3f, 0x2f, 0x65, 0xcd, 0x3f, + 0x12, 0x15, 0xd8, 0x3f, 0x7f, 0x4c, 0xa6, 0x3f, 0x0b, 0x3c, 0x00, 0x3f, 0x95, 0xe1, 0x77, 0x3f, + 0xe9, 0x7e, 0x69, 0x3f, 0x39, 0x85, 0x68, 0x3f, 0xe4, 0xbb, 0x22, 0x3f, 0xbb, 0x7f, 0x93, 0x3f, + 0x05, 0x3e, 0x8a, 0x3f, 0x20, 0x57, 0x91, 0x3f, 0xfe, 0xfe, 0x90, 0x3f, 0x6b, 0x98, 0x47, 0x3f}; +unsigned char avgpool3x3s1_p1_fp16_in[] = { + 0x2b, 0x40, 0x58, 0x3e, 0xa9, 0x3f, 0xeb, 0x3d, 0x23, 0x3e, 0x89, 0x3f, 0xe1, 0x40, 0x5f, + 0xb8, 0x95, 0x3c, 0x5e, 0x3d, 0xab, 0x3c, 0xae, 0x3e, 0xe8, 0x40, 0xf2, 0x3b, 0x20, 0x3a, + 0xc0, 0x3c, 0x97, 0x42, 0x09, 0x41, 0x94, 0x44, 0x93, 0x34, 0x99, 0x40, 0x41, 0x39, 0x44, + 0x3d, 0x24, 0x42, 0x2b, 0x42, 0x10, 0x44, 0xdf, 0x3a, 0x28, 0x40, 0x7d, 0x38, 0xdc, 0x41, + 0xf9, 0x40, 0xf1, 0x40, 0xeb, 0x3a, 0xde, 0x3a, 0x23, 0x44, 0x54, 0xb4, 0xd5, 0x40, 0x28, + 0x35, 0xed, 0x3d, 0x63, 0x42, 0x84, 0x37, 0x5e, 0x3c, 0x64, 0x3d, 0x45, 0xb9, 0x49, 0x3c, + 0x94, 0x3a, 0x87, 0x40, 0x77, 0x3c, 0x4c, 0x3d, 0x75, 0x40, 0x9c, 0x31, 0x8a, 0x41, 0xb7, + 0x41, 0x63, 0x3a, 0x83, 0x41, 0x71, 0x3b, 0x0c, 0x41, 0x1f, 0x40, 0x6b, 0x3b, 0x12, 0x41}; +unsigned char avgpool3x3s1_p1_fp16_out[] = { + 0xc9, 0x39, 0xd5, 0x3c, 0x7c, 0x3c, 0x10, 0x3c, 0xf8, 0x3a, 0xf0, 0x3c, 0xca, 0x3c, 0xf8, + 0x3d, 0x24, 0x3c, 0x88, 0x3a, 0x34, 0x3c, 0xbb, 0x3e, 0xba, 0x3e, 0x61, 0x3f, 0x03, 0x40, + 0x40, 0x40, 0xe7, 0x3f, 0x85, 0x3f, 0x9e, 0x3e, 0xd1, 0x3c, 0x25, 0x39, 0x40, 0x3c, 0x83, + 0x3c, 0x31, 0x3d, 0xd9, 0x3d, 0xe4, 0x3d, 0x38, 0x3e, 0x2c, 0x3e, 0xc1, 0x3d, 0x6e, 0x3b, + 0xca, 0x39, 0xc3, 0x3b, 0x50, 0x39, 0xc5, 0x3a, 0x4c, 0x39, 0xa3, 0x3c, 0xee, 0x39, 0xee, + 0x3b, 0x4c, 0x3c, 0x50, 0x3b, 0x34, 0x3c, 0x76, 0x3e, 0x82, 0x3d, 0x3c, 0x3e, 0xa4, 0x3c, + 0x67, 0x3f, 0x6a, 0x3d, 0x6a, 0x3e, 0xc0, 0x3e, 0x31, 0x3d, 0x02, 0x38, 0xc0, 0x3b, 0x4a, + 0x3b, 0x43, 0x3b, 0x15, 0x39, 0x9b, 0x3c, 0x52, 0x3c, 0x8a, 0x3c, 0x88, 0x3c, 0x3c, 0x3a}; + +// [c h w]: [3 7 7] +// [c h w]: [3 1 1] +unsigned char global_avgpool_fp32_in[] = { + 0x81, 0xd1, 0x73, 0x40, 0xb2, 0xe6, 0xa4, 0x40, 0xa2, 0x96, 0x5c, 0x40, 0xd8, 0x25, 0x68, 0x40, + 0x1a, 0x53, 0x92, 0x40, 0xeb, 0xc7, 0x51, 0x40, 0x71, 0x9c, 0xa1, 0x40, 0xe0, 0x07, 0xa0, 0x40, + 0x1d, 0x9b, 0x4f, 0x40, 0x19, 0x2e, 0x49, 0x40, 0xd3, 0x1e, 0x35, 0x40, 0x0f, 0xa9, 0x82, 0x40, + 0x7f, 0xf3, 0xa9, 0x40, 0x48, 0x96, 0x58, 0x40, 0x4b, 0xc5, 0x51, 0x40, 0xc5, 0x75, 0x83, 0x40, + 0x36, 0x34, 0x92, 0x40, 0xfb, 0x9e, 0xbb, 0x40, 0x56, 0x07, 0xae, 0x40, 0x63, 0xf0, 0xcb, 0x40, + 0xf5, 0x61, 0xa4, 0x40, 0xe1, 0x7b, 0xaf, 0x40, 0xef, 0x1d, 0x57, 0x40, 0xed, 0x4d, 0x82, 0x40, + 0xf0, 0xeb, 0x91, 0x40, 0xd8, 0x34, 0x82, 0x40, 0x34, 0x31, 0x6f, 0x40, 0x7f, 0xb4, 0x8c, 0x40, + 0xd2, 0x77, 0x88, 0x40, 0xfe, 0x4e, 0x89, 0x40, 0x8e, 0x80, 0x41, 0x40, 0x37, 0x83, 0xa6, 0x40, + 0x78, 0x81, 0x56, 0x40, 0x67, 0x45, 0x84, 0x40, 0x24, 0x97, 0x83, 0x40, 0xde, 0x66, 0x9a, 0x40, + 0xe2, 0x9f, 0x85, 0x40, 0x42, 0x50, 0x58, 0x40, 0x5f, 0x6a, 0x1d, 0x40, 0x3e, 0x34, 0x50, 0x40, + 0xbf, 0x16, 0x5a, 0x40, 0x80, 0x9e, 0x17, 0x40, 0x82, 0xde, 0x53, 0x40, 0x04, 0xf3, 0x76, 0x40, + 0x74, 0xda, 0x89, 0x40, 0x04, 0x96, 0xa1, 0x40, 0x44, 0x42, 0x68, 0x40, 0x3e, 0x43, 0xf0, 0x3f, + 0xa0, 0x8f, 0x4e, 0x40, 0x36, 0xcf, 0x69, 0x40, 0x73, 0xa6, 0x37, 0x40, 0xe1, 0xac, 0x86, 0x40, + 0xcb, 0xdb, 0x9b, 0x40, 0x98, 0x15, 0x64, 0x40, 0xe9, 0x3c, 0x8c, 0x40, 0xb8, 0x5b, 0x4e, 0x40, + 0xe5, 0x49, 0x98, 0x40, 0xba, 0xa1, 0x5f, 0x40, 0x3d, 0x2a, 0xa0, 0x40, 0x88, 0x55, 0x56, 0x40, + 0x05, 0x6b, 0xe5, 0x3f, 0xcd, 0x4a, 0xc3, 0x40, 0x0a, 0x8c, 0xb7, 0x40, 0xb5, 0xf6, 0xba, 0x40, + 0x0d, 0xdf, 0x2b, 0x40, 0xaa, 0xd5, 0x52, 0x40, 0x45, 0x79, 0x88, 0x40, 0x52, 0xb2, 0x64, 0x40, + 0xb9, 0x3c, 0xa7, 0x40, 0x96, 0xe2, 0x3e, 0x40, 0xe2, 0xab, 0x33, 0x40, 0xf8, 0x59, 0x9a, 0x40, + 0xaf, 0xc4, 0x49, 0x40, 0xb8, 0x02, 0x49, 0x40, 0xcb, 0x52, 0x7b, 0x40, 0xf1, 0xc3, 0xcd, 0x40, + 0xc0, 0xe1, 0x8c, 0x40, 0x2e, 0xc8, 0x75, 0x40, 0x91, 0x0b, 0x74, 0x40, 0xdc, 0x95, 0xa2, 0x40, + 0x46, 0xea, 0xa5, 0x40, 0x7c, 0x1f, 0x08, 0x40, 0x7d, 0x11, 0x67, 0x40, 0x32, 0x3e, 0xb3, 0x40, + 0xab, 0x31, 0x69, 0x40, 0x8b, 0x0c, 0x28, 0x40, 0xde, 0x6f, 0x80, 0x40, 0x4d, 0x69, 0x80, 0x40, + 0x2f, 0x79, 0x31, 0x40, 0xdf, 0xd7, 0x9c, 0x40, 0xb4, 0xe0, 0x22, 0x40, 0x3e, 0xc5, 0x76, 0x40, + 0xa4, 0xa5, 0x9f, 0x40, 0x1a, 0xeb, 0x64, 0x40, 0x02, 0x89, 0x83, 0x40, 0x58, 0x4c, 0xb2, 0x40, + 0x03, 0x00, 0x90, 0x40, 0x90, 0xd0, 0x66, 0x40, 0xc9, 0xc7, 0x4a, 0x40, 0x91, 0x10, 0xa0, 0x40, + 0xea, 0xce, 0x3c, 0x40, 0x68, 0xb3, 0x8e, 0x40, 0x67, 0x2d, 0x12, 0x40, 0x77, 0x4b, 0x70, 0x40, + 0x0e, 0x84, 0xa6, 0x40, 0x1c, 0x37, 0x7e, 0x40, 0x69, 0xd5, 0x57, 0x40, 0x8d, 0x71, 0x89, 0x40, + 0xf4, 0x21, 0xa1, 0x40, 0x62, 0x58, 0x77, 0x40, 0xd0, 0x29, 0x63, 0x40, 0x3c, 0x27, 0x94, 0x40, + 0x4e, 0x12, 0x9d, 0x40, 0x68, 0x8e, 0x55, 0x40, 0xdd, 0x37, 0x98, 0x40, 0x82, 0x11, 0xbc, 0x40, + 0x06, 0x4a, 0x18, 0x40, 0x40, 0x88, 0x87, 0x40, 0xe5, 0x21, 0xad, 0x40, 0xba, 0x73, 0x84, 0x40, + 0x25, 0xcb, 0x6f, 0x40, 0x60, 0xd5, 0x99, 0x40, 0x23, 0x3f, 0x89, 0x40, 0xd7, 0xc8, 0x2f, 0x40, + 0x6d, 0xfc, 0x88, 0x40, 0x8a, 0xde, 0x52, 0x40, 0x0b, 0xe1, 0x8b, 0x40, 0x7d, 0x57, 0x78, 0x40, + 0x11, 0x4d, 0x84, 0x40, 0xeb, 0x32, 0xca, 0x40, 0x3c, 0xe3, 0x22, 0x40, 0xf5, 0xd1, 0x3d, 0x40, + 0x74, 0xde, 0x91, 0x40, 0xfd, 0x85, 0x32, 0x40, 0x04, 0x77, 0x92, 0x40, 0x56, 0x76, 0x85, 0x40, + 0xd6, 0xd6, 0x62, 0x40, 0x9e, 0x15, 0xd6, 0x40, 0x50, 0x19, 0x8b, 0x40, 0xd7, 0xd7, 0x9d, 0x40, + 0x68, 0x9e, 0x48, 0x40, 0x80, 0x73, 0x5f, 0x40, 0xaa, 0xe4, 0x80, 0x40, 0xb1, 0xfe, 0xa5, 0x40, + 0xf4, 0x35, 0x84, 0x40, 0x10, 0xb5, 0x61, 0x40, 0x6c, 0xf8, 0x1e, 0x40}; +unsigned char global_avgpool_fp32_out[] = {0x2a, 0x3c, 0x81, 0x40, 0xc6, 0x02, + 0x81, 0x40, 0x03, 0x53, 0x82, 0x40}; +unsigned char global_avgpool_fp16_in[] = { + 0x9e, 0x43, 0x27, 0x45, 0xe4, 0x42, 0x41, 0x43, 0x92, 0x44, 0x8e, 0x42, 0x0c, 0x45, 0x00, 0x45, + 0x7c, 0x42, 0x49, 0x42, 0xa8, 0x41, 0x15, 0x44, 0x4f, 0x45, 0xc4, 0x42, 0x8e, 0x42, 0x1b, 0x44, + 0x91, 0x44, 0xdc, 0x45, 0x70, 0x45, 0x5f, 0x46, 0x23, 0x45, 0x7b, 0x45, 0xb8, 0x42, 0x12, 0x44, + 0x8f, 0x44, 0x11, 0x44, 0x79, 0x43, 0x65, 0x44, 0x43, 0x44, 0x4a, 0x44, 0x0c, 0x42, 0x34, 0x45, + 0xb4, 0x42, 0x22, 0x44, 0x1c, 0x44, 0xd3, 0x44, 0x2c, 0x44, 0xc2, 0x42, 0xeb, 0x40, 0x81, 0x42, + 0xd0, 0x42, 0xbc, 0x40, 0x9e, 0x42, 0xb7, 0x43, 0x4e, 0x44, 0x0c, 0x45, 0x42, 0x43, 0x82, 0x3f, + 0x74, 0x42, 0x4e, 0x43, 0xbd, 0x41, 0x35, 0x44, 0xde, 0x44, 0x20, 0x43, 0x61, 0x44, 0x72, 0x42, + 0xc2, 0x44, 0xfd, 0x42, 0x01, 0x45, 0xb2, 0x42, 0x2b, 0x3f, 0x1a, 0x46, 0xbc, 0x45, 0xd7, 0x45, + 0x5e, 0x41, 0x96, 0x42, 0x43, 0x44, 0x25, 0x43, 0x39, 0x45, 0xf7, 0x41, 0x9d, 0x41, 0xd2, 0x44, + 0x4e, 0x42, 0x48, 0x42, 0xda, 0x43, 0x6e, 0x46, 0x67, 0x44, 0xae, 0x43, 0xa0, 0x43, 0x14, 0x45, + 0x2f, 0x45, 0x40, 0x40, 0x38, 0x43, 0x99, 0x45, 0x49, 0x43, 0x40, 0x41, 0x03, 0x44, 0x03, 0x44, + 0x8b, 0x41, 0xe6, 0x44, 0x17, 0x41, 0xb6, 0x43, 0xfd, 0x44, 0x27, 0x43, 0x1c, 0x44, 0x92, 0x45, + 0x80, 0x44, 0x36, 0x43, 0x56, 0x42, 0x00, 0x45, 0xe6, 0x41, 0x75, 0x44, 0x91, 0x40, 0x82, 0x43, + 0x34, 0x45, 0xf1, 0x43, 0xbe, 0x42, 0x4b, 0x44, 0x09, 0x45, 0xba, 0x43, 0x19, 0x43, 0xa1, 0x44, + 0xe8, 0x44, 0xac, 0x42, 0xc1, 0x44, 0xe0, 0x45, 0xc2, 0x40, 0x3c, 0x44, 0x69, 0x45, 0x23, 0x44, + 0x7e, 0x43, 0xce, 0x44, 0x49, 0x44, 0x7e, 0x41, 0x47, 0x44, 0x96, 0x42, 0x5f, 0x44, 0xc2, 0x43, + 0x22, 0x44, 0x51, 0x46, 0x17, 0x41, 0xee, 0x41, 0x8e, 0x44, 0x94, 0x41, 0x93, 0x44, 0x2b, 0x44, + 0x16, 0x43, 0xb0, 0x46, 0x58, 0x44, 0xee, 0x44, 0x44, 0x42, 0xfb, 0x42, 0x07, 0x44, 0x2f, 0x45, + 0x21, 0x44, 0x0d, 0x43, 0xf7, 0x40}; +unsigned char global_avgpool_fp16_out[] = {0x0a, 0x44, 0x07, 0x44, 0x12, 0x44}; diff --git a/tests/unit_test/valid_data/basic_math.dat b/tests/unit_test/valid_data/basic_math.dat new file mode 100644 index 00000000..0f137062 --- /dev/null +++ b/tests/unit_test/valid_data/basic_math.dat @@ -0,0 +1,197 @@ +/************************************************************************** + * NCHW layout + **************************************************************************/ +// [c h w]: [2 5 11] +// [c h w]: [2 5 11] +// [c h w]: [2 5 11] +unsigned char add_fp32_in0[] = { 0x34, 0xd1, 0xc4, 0x3f, 0x49, 0x98, 0xff, 0x3f, 0xf2, 0xb5, 0xeb, 0xbf, + 0xe3, 0x98, 0x13, 0xc0, 0xab, 0x63, 0xc8, 0xbe, 0x2e, 0x93, 0xf0, 0x3f, + 0xce, 0xef, 0x1f, 0xc0, 0x4f, 0x1b, 0xd0, 0xbe, 0x97, 0x95, 0xee, 0xbf, + 0x3b, 0xf8, 0xa4, 0x3f, 0x0f, 0xaf, 0x6b, 0xbf, 0x97, 0x25, 0x05, 0xc0, + 0x5b, 0x83, 0x0f, 0x40, 0x66, 0xcd, 0x26, 0xbf, 0x26, 0x92, 0x72, 0x3f, + 0x3c, 0x0d, 0x8b, 0x3e, 0x79, 0xd0, 0x13, 0xbf, 0x34, 0x93, 0xd5, 0xbf, + 0x49, 0xac, 0xef, 0x3f, 0x0f, 0xb5, 0xc8, 0x3f, 0xb7, 0x2e, 0xe8, 0xbf, + 0xbd, 0xa0, 0xab, 0xbe, 0x12, 0x86, 0xa0, 0x3f, 0x02, 0x08, 0x37, 0x40, + 0x77, 0x69, 0xdc, 0x3f, 0xbf, 0xff, 0x05, 0x40, 0x73, 0x9e, 0x01, 0x40, + 0xf2, 0xac, 0x24, 0xc0, 0xa8, 0xf3, 0x1c, 0xc0, 0xde, 0x3b, 0x28, 0x40, + 0xf8, 0x38, 0x1b, 0x40, 0x59, 0x87, 0xbd, 0xbf, 0x57, 0x30, 0x35, 0xbe, + 0x04, 0xa0, 0xca, 0xbf, 0x8f, 0xa2, 0x13, 0x40, 0x3c, 0x33, 0xd6, 0xbf, + 0x21, 0x79, 0xc0, 0xbf, 0x24, 0xa5, 0xe3, 0xbe, 0xf5, 0x2d, 0xf5, 0xbe, + 0x0f, 0xe0, 0x29, 0xc0, 0xc7, 0xc4, 0xbc, 0xbe, 0x77, 0xca, 0xad, 0xbf, + 0x78, 0x7e, 0x0e, 0xc0, 0xd5, 0x66, 0xec, 0x3f, 0x53, 0x72, 0x07, 0xc0, + 0x18, 0xc8, 0x02, 0x40, 0x3d, 0x8b, 0x8d, 0x3f, 0x41, 0xbc, 0x07, 0xbe, + 0x02, 0x15, 0x2c, 0xbd, 0xf1, 0x14, 0xcf, 0xbf, 0xf1, 0x6e, 0xd5, 0x3f, + 0xb6, 0xff, 0xfc, 0xbe, 0x5c, 0xe2, 0x12, 0x40, 0x9c, 0xe1, 0x1b, 0x40, + 0x50, 0x6c, 0x3b, 0x40, 0x9e, 0x9a, 0x00, 0x40, 0x30, 0xef, 0x80, 0x3e, + 0x3e, 0x2c, 0xb6, 0xbf, 0xef, 0x69, 0xa8, 0xbf, 0xa3, 0x1d, 0x18, 0xbd, + 0xe9, 0xc5, 0x1a, 0xbe, 0xea, 0x55, 0x54, 0xbf, 0x7c, 0x59, 0x0f, 0xc0, + 0x68, 0xc6, 0x02, 0x40, 0x8a, 0xc6, 0x00, 0x40, 0xfd, 0x03, 0x66, 0xbf, + 0x7a, 0xb7, 0x84, 0x3f, 0x8e, 0xaf, 0x25, 0xbe, 0x13, 0x0d, 0x68, 0x3f, + 0x46, 0xd3, 0xfc, 0x3f, 0x43, 0x61, 0x1c, 0x40, 0x79, 0x36, 0x0a, 0xc0, + 0x86, 0x95, 0x03, 0x40, 0x4d, 0x28, 0x14, 0x3f, 0xcf, 0x42, 0x09, 0x40, + 0x87, 0x1a, 0x39, 0xc0, 0x27, 0x88, 0x2a, 0xbd, 0x09, 0x60, 0xd5, 0xbf, + 0x2f, 0x2b, 0x42, 0xbf, 0x8a, 0x80, 0x2b, 0x40, 0xc7, 0x86, 0xee, 0x3f, + 0x90, 0x78, 0x36, 0xbf, 0x19, 0x0f, 0x6a, 0xbf, 0x4e, 0xec, 0xa0, 0xbf, + 0x0e, 0x38, 0x11, 0x40, 0xe6, 0x9c, 0xc8, 0xbf, 0x63, 0x6d, 0x65, 0x3e, + 0xde, 0x6c, 0x49, 0x3e, 0xaa, 0x1b, 0x1a, 0x40, 0x60, 0x70, 0x03, 0xc0, + 0xbf, 0x92, 0xa9, 0x3f, 0x93, 0xb5, 0x35, 0x40, 0x89, 0x3e, 0xd4, 0x3f, + 0x5b, 0x0f, 0xbf, 0x3f, 0xb9, 0x93, 0xdb, 0xbf, 0x43, 0x23, 0x2a, 0xbd, + 0x90, 0x2f, 0x3a, 0xc0, 0x73, 0x43, 0xba, 0x3c, 0xea, 0x34, 0x24, 0xbf, + 0x90, 0xe9, 0x2b, 0xc0, 0x65, 0x91, 0x36, 0x3f, 0x6c, 0xac, 0x03, 0xc0, + 0x95, 0x1c, 0xa3, 0xbd, 0x6a, 0x77, 0x8b, 0x3b, 0xe5, 0x21, 0x55, 0x3f, + 0x11, 0xe3, 0xdf, 0xbe, 0x85, 0xa8, 0x26, 0x3f, 0xa4, 0x8a, 0x60, 0x3f, + 0x73, 0x8b, 0x11, 0x3e, 0x06, 0xd5, 0xc7, 0x3f}; +unsigned char add_fp32_in1[] = { 0x42, 0xae, 0x64, 0x3f, 0xd7, 0xf9, 0xf8, 0x3f, 0xad, 0xce, 0x78, 0x3f, + 0xbe, 0xfc, 0x81, 0xbc, 0xd3, 0x60, 0xcc, 0x3f, 0x1d, 0xa3, 0x5b, 0xbf, + 0x79, 0x45, 0x5f, 0xbe, 0xca, 0x09, 0xf8, 0xbf, 0x25, 0x4b, 0xd1, 0xbf, + 0x41, 0x70, 0xf7, 0xbf, 0x15, 0xfb, 0x78, 0xbf, 0x75, 0x6e, 0x50, 0x3e, + 0x07, 0x01, 0xf5, 0x3f, 0x61, 0xd3, 0xf1, 0xbf, 0x3d, 0x39, 0x67, 0x3e, + 0x88, 0x1a, 0x41, 0x40, 0x01, 0x14, 0xf0, 0xbe, 0x0b, 0x5a, 0x7f, 0x3f, + 0xf3, 0x00, 0xed, 0x3f, 0xac, 0x9f, 0xe3, 0x3f, 0xe7, 0x0f, 0x99, 0x3f, + 0x8f, 0x8a, 0x1b, 0x3f, 0xac, 0xc9, 0xb8, 0xbf, 0x32, 0xe2, 0x71, 0x40, + 0xd7, 0x1f, 0xf0, 0x3f, 0x0e, 0x8d, 0xeb, 0xbe, 0xfe, 0x22, 0x4d, 0xbe, + 0x08, 0xd8, 0x90, 0xbf, 0x79, 0xea, 0x09, 0x40, 0x4d, 0x2e, 0xd2, 0xbf, + 0xf0, 0x28, 0xe3, 0xbe, 0x14, 0xc2, 0x5b, 0x40, 0x7a, 0x4c, 0xfb, 0x3f, + 0x2a, 0x82, 0x7a, 0x40, 0x37, 0x90, 0x1d, 0x40, 0x21, 0x4d, 0x70, 0x40, + 0x3a, 0xc5, 0x85, 0x3f, 0x02, 0x34, 0x16, 0x40, 0x4d, 0x50, 0x74, 0x3f, + 0xa3, 0x18, 0x74, 0x40, 0x81, 0x0c, 0x0c, 0x40, 0xa0, 0x21, 0x59, 0x40, + 0x20, 0x2d, 0x44, 0x40, 0xa9, 0x8b, 0xbc, 0xbf, 0xeb, 0xff, 0x52, 0x3c, + 0x29, 0x4e, 0xf7, 0x3f, 0x70, 0x58, 0x0b, 0x3e, 0x04, 0x5c, 0x8c, 0x3f, + 0x83, 0x5e, 0x6d, 0x40, 0xb0, 0xc1, 0xda, 0x3e, 0x92, 0xf9, 0xb1, 0x3c, + 0x78, 0xdf, 0x98, 0xbf, 0x80, 0x26, 0x24, 0x40, 0x58, 0x42, 0xe9, 0xbe, + 0x0a, 0x34, 0x86, 0x3f, 0xa8, 0xd8, 0x74, 0x40, 0xf3, 0x62, 0x28, 0xbf, + 0x8c, 0x23, 0xb3, 0xbf, 0x64, 0x04, 0x49, 0xbf, 0x60, 0xb8, 0xed, 0xbf, + 0xaf, 0x4b, 0xd5, 0x3f, 0x2e, 0x26, 0xc2, 0xbf, 0x10, 0x0d, 0x3c, 0x40, + 0x4f, 0xc0, 0xcc, 0xbe, 0x3a, 0x06, 0x20, 0x40, 0x1f, 0xa6, 0xdc, 0xbf, + 0x9f, 0x90, 0x90, 0xbf, 0x5b, 0x2b, 0xb2, 0xbf, 0xf9, 0x8f, 0xe0, 0x3f, + 0x6c, 0x41, 0x1a, 0x40, 0x39, 0x2c, 0xc4, 0xbf, 0x25, 0xa1, 0x41, 0x40, + 0x29, 0x64, 0x10, 0x3f, 0xa8, 0x65, 0x8c, 0x3f, 0x8c, 0x4a, 0x99, 0x3f, + 0x7c, 0x8e, 0x31, 0x3f, 0x02, 0x55, 0x5a, 0x40, 0x63, 0x21, 0xa7, 0x3f, + 0x18, 0xb5, 0x1b, 0x40, 0x7d, 0x72, 0x56, 0x40, 0x69, 0x1a, 0x2a, 0x40, + 0x3f, 0x3c, 0x3e, 0x40, 0xdd, 0xf4, 0x84, 0xbf, 0xe9, 0xbb, 0x15, 0x3f, + 0x25, 0xfc, 0xb3, 0x3e, 0x72, 0xf5, 0x3c, 0x3f, 0xb4, 0x31, 0xc0, 0x3f, + 0xcd, 0xc7, 0x6a, 0x3f, 0x2d, 0x7a, 0xf5, 0x3f, 0xf5, 0x0d, 0xf2, 0xbe, + 0x4c, 0xbe, 0xf2, 0xbe, 0xce, 0x67, 0x89, 0xbf, 0x11, 0xe4, 0x07, 0xbf, + 0xaa, 0x35, 0xbc, 0x3f, 0x71, 0x8b, 0xec, 0xbf, 0x74, 0x9d, 0x82, 0xbe, + 0xbf, 0x92, 0xeb, 0xbf, 0x2e, 0xd3, 0x46, 0x40, 0x7a, 0xd4, 0x74, 0xbf, + 0x9b, 0x24, 0xc2, 0xbe, 0xa6, 0xcb, 0xe0, 0xbf, 0xe0, 0xb7, 0x24, 0x3e, + 0xcb, 0xb1, 0x7b, 0x40, 0x80, 0x1d, 0xb6, 0xbf, 0x2b, 0x13, 0x67, 0x3f, + 0xd8, 0x17, 0x5e, 0x40, 0x6c, 0x57, 0x1a, 0x3e, 0x4c, 0x9f, 0x7b, 0x3f, + 0xe9, 0xe4, 0x88, 0x3f, 0x59, 0x4f, 0xd3, 0x3f}; +unsigned char add_fp32_out[] = { 0x2a, 0x94, 0x1b, 0x40, 0x10, 0x49, 0x7c, 0x40, 0x37, 0x9d, 0x5e, 0xbf, + 0xdc, 0x9c, 0x14, 0xc0, 0xe8, 0x47, 0x9a, 0x3f, 0xa0, 0xc1, 0x82, 0x3f, + 0x26, 0xe4, 0x2d, 0xc0, 0x4f, 0x08, 0x16, 0xc0, 0x5e, 0xf0, 0x5f, 0xc0, + 0x0c, 0xf0, 0x24, 0xbf, 0x12, 0x55, 0xf2, 0xbf, 0x5f, 0x3d, 0xf0, 0xbf, + 0xef, 0x01, 0x85, 0x40, 0x0a, 0x9d, 0x22, 0xc0, 0x3b, 0x30, 0x96, 0x3f, + 0x30, 0x7c, 0x52, 0x40, 0x3d, 0xed, 0x85, 0xbf, 0x5d, 0xcc, 0x2b, 0xbf, + 0x9e, 0x56, 0x6e, 0x40, 0x5e, 0x2a, 0x56, 0x40, 0xa0, 0x3d, 0x1e, 0xbf, + 0x61, 0x74, 0x8b, 0x3e, 0xd0, 0x1c, 0x42, 0xbe, 0x1a, 0x75, 0xd4, 0x40, + 0xa7, 0x44, 0x66, 0x40, 0x3a, 0x1c, 0xd1, 0x3f, 0x86, 0x98, 0xe9, 0x3f, + 0xf6, 0x18, 0x6d, 0xc0, 0x78, 0x49, 0x98, 0xbe, 0xde, 0x92, 0x7c, 0x3f, + 0xb4, 0xa7, 0xfd, 0x3f, 0xcf, 0xfc, 0xf9, 0x3f, 0x6f, 0xa6, 0xe4, 0x3f, + 0x28, 0x32, 0x15, 0x40, 0x63, 0x99, 0x98, 0x40, 0x83, 0x33, 0x05, 0x40, + 0x9c, 0xcf, 0xea, 0xbe, 0xbb, 0x7e, 0xf3, 0x3f, 0xa5, 0x72, 0xf3, 0x3e, + 0x28, 0x71, 0x94, 0x3f, 0xd0, 0xe7, 0xe8, 0x3f, 0x64, 0x3c, 0x02, 0x40, + 0xa0, 0xba, 0x56, 0x3f, 0xb0, 0x6c, 0xbf, 0x3e, 0x53, 0x9f, 0x06, 0xc0, + 0x2c, 0x6f, 0x7e, 0x40, 0x4b, 0xf6, 0x9e, 0x3f, 0xf8, 0xc8, 0x76, 0x3f, + 0x2f, 0xae, 0x6a, 0x40, 0x85, 0x64, 0x98, 0xbf, 0xd7, 0x36, 0xd8, 0x3f, + 0x66, 0x1f, 0xd8, 0xbf, 0x6e, 0x84, 0x9b, 0x40, 0xa2, 0x72, 0xfd, 0x3f, + 0x55, 0x86, 0x7e, 0x40, 0xa3, 0xb9, 0xba, 0x40, 0xb6, 0xd6, 0xcf, 0xbe, + 0xe5, 0xa7, 0x34, 0xc0, 0x10, 0x76, 0x06, 0xc0, 0x4d, 0x79, 0xf2, 0xbf, + 0xf2, 0xf2, 0xc1, 0x3f, 0x92, 0x28, 0x16, 0xc0, 0x50, 0xce, 0x32, 0x3f, + 0xbc, 0x5c, 0xd2, 0x3f, 0x62, 0x66, 0x90, 0x40, 0x0f, 0xd4, 0x27, 0xc0, + 0x50, 0x92, 0xbd, 0xbd, 0x4d, 0xe1, 0xc6, 0xbf, 0x41, 0x4b, 0x2a, 0x40, + 0x88, 0x55, 0x8c, 0x40, 0x9a, 0x2c, 0x69, 0x3f, 0xb0, 0xaa, 0x5d, 0x3f, + 0x90, 0xae, 0x27, 0x40, 0xce, 0x79, 0xd6, 0x3f, 0x15, 0xe8, 0x55, 0x40, + 0xe8, 0xb6, 0x0c, 0xc0, 0xe1, 0xaa, 0x57, 0x40, 0x98, 0xfa, 0xb8, 0xbe, + 0x98, 0x54, 0xd6, 0x3f, 0x84, 0xf9, 0xc0, 0x40, 0xe6, 0xae, 0x90, 0x40, + 0x1b, 0x9e, 0x10, 0x40, 0x6a, 0xfc, 0xf9, 0xbf, 0xb3, 0x1c, 0x2c, 0xbf, + 0x93, 0xb7, 0x27, 0x40, 0x5a, 0x44, 0x54, 0xbf, 0x60, 0xdf, 0xdc, 0x3f, + 0x82, 0x91, 0x8e, 0x3f, 0x60, 0x6c, 0x8a, 0x40, 0x1f, 0xb2, 0x21, 0xc0, + 0x58, 0xc6, 0x59, 0x3f, 0x58, 0x03, 0xe2, 0x3f, 0x80, 0x4c, 0x90, 0x3f, + 0x82, 0xa2, 0x3d, 0x40, 0x95, 0x0f, 0x64, 0xc0, 0xdc, 0xe1, 0x97, 0xbe, + 0x78, 0xfc, 0x97, 0xc0, 0xb5, 0x47, 0x48, 0x40, 0xb2, 0x84, 0xcc, 0xbf, + 0x23, 0x2e, 0x44, 0xc0, 0xf4, 0x82, 0x85, 0xbf, 0xdc, 0xc1, 0xf2, 0xbf, + 0xe6, 0x98, 0x76, 0x40, 0x09, 0x92, 0xb5, 0xbf, 0x88, 0x1a, 0xde, 0x3f, + 0x76, 0x1b, 0x42, 0x40, 0x60, 0x3e, 0x4d, 0x3f, 0xf8, 0x14, 0xee, 0x3f, + 0x57, 0x16, 0x9b, 0x3f, 0x30, 0x92, 0x4d, 0x40}; +unsigned char add_fp16_in0[] = { 0x26, 0x3e, 0xfc, 0x3f, 0x5d, 0xbf, 0x9c, 0xc0, 0x43, 0xb6, 0x84, 0x3f, + 0xff, 0xc0, 0x80, 0xb6, 0x74, 0xbf, 0x27, 0x3d, 0x5d, 0xbb, 0x29, 0xc0, + 0x7c, 0x40, 0x36, 0xb9, 0x94, 0x3b, 0x58, 0x34, 0x9e, 0xb8, 0xac, 0xbe, + 0x7d, 0x3f, 0x45, 0x3e, 0x41, 0xbf, 0x5d, 0xb5, 0x04, 0x3d, 0xb8, 0x41, + 0xe3, 0x3e, 0x2f, 0x40, 0x0c, 0x40, 0x25, 0xc1, 0xe7, 0xc0, 0x41, 0x41, + 0xd9, 0x40, 0xec, 0xbd, 0xa9, 0xb1, 0x55, 0xbe, 0x9d, 0x40, 0xb1, 0xbe, + 0x03, 0xbe, 0x1d, 0xb7, 0xa9, 0xb7, 0x4f, 0xc1, 0xe6, 0xb5, 0x6e, 0xbd, + 0x73, 0xc0, 0x63, 0x3f, 0x3b, 0xc0, 0x16, 0x40, 0x6c, 0x3c, 0x3d, 0xb0, + 0x60, 0xa9, 0x78, 0xbe, 0xab, 0x3e, 0xe7, 0xb7, 0x97, 0x40, 0xdf, 0x40, + 0xdb, 0x41, 0x04, 0x40, 0x07, 0x34, 0xb1, 0xbd, 0x43, 0xbd, 0xc0, 0xa8, + 0xd6, 0xb0, 0xa2, 0xba, 0x7a, 0xc0, 0x16, 0x40, 0x06, 0x40, 0x30, 0xbb, + 0x25, 0x3c, 0x2d, 0xb1, 0x40, 0x3b, 0xe6, 0x3f, 0xe3, 0x40, 0x51, 0xc0, + 0x1c, 0x40, 0xa1, 0x38, 0x4a, 0x40, 0xc8, 0xc1, 0x54, 0xa9, 0xab, 0xbe, + 0x11, 0xba, 0x5c, 0x41, 0x74, 0x3f, 0xb3, 0xb9, 0x50, 0xbb, 0x07, 0xbd, + 0x89, 0x40, 0x44, 0xbe, 0x2b, 0x33, 0x4b, 0x32, 0xd0, 0x40, 0x1b, 0xc0, + 0x4c, 0x3d, 0xad, 0x41, 0xa1, 0x3e, 0xf8, 0x3d, 0xdc, 0xbe, 0x51, 0xa9, + 0xd1, 0xc1, 0xd2, 0x25, 0x21, 0xb9, 0x5f, 0xc1, 0xb4, 0x39, 0x1d, 0xc0, + 0x18, 0xad, 0x5b, 0x1c, 0xa9, 0x3a, 0xff, 0xb6, 0x35, 0x39, 0x04, 0x3b, + 0x8c, 0x30, 0x3e, 0x3e}; +unsigned char add_fp16_in1[] = { 0x25, 0x3b, 0xc7, 0x3f, 0xc6, 0x3b, 0x0f, 0xa4, 0x63, 0x3e, 0xdd, 0xba, + 0xfa, 0xb2, 0xc0, 0xbf, 0x8a, 0xbe, 0xbb, 0xbf, 0xc7, 0xbb, 0x83, 0x32, + 0xa8, 0x3f, 0x8e, 0xbf, 0x39, 0x33, 0x08, 0x42, 0x80, 0xb7, 0xfa, 0x3b, + 0x68, 0x3f, 0x1c, 0x3f, 0xc8, 0x3c, 0xdc, 0x38, 0xc6, 0xbd, 0x8f, 0x43, + 0x80, 0x3f, 0x5c, 0xb7, 0x69, 0xb2, 0x86, 0xbc, 0x4f, 0x40, 0x91, 0xbe, + 0x19, 0xb7, 0xde, 0x42, 0xda, 0x3f, 0xd4, 0x43, 0xec, 0x40, 0x82, 0x43, + 0x2e, 0x3c, 0xb1, 0x40, 0xa2, 0x3b, 0xa0, 0x43, 0x60, 0x40, 0xc9, 0x42, + 0x21, 0x42, 0xe4, 0xbd, 0x97, 0x22, 0xba, 0x3f, 0x5a, 0x30, 0x62, 0x3c, + 0x6a, 0x43, 0xd6, 0x36, 0x8f, 0x25, 0xc6, 0xbc, 0x21, 0x41, 0x4a, 0xb7, + 0x31, 0x3c, 0xa6, 0x43, 0x43, 0xb9, 0x99, 0xbd, 0x48, 0xba, 0x6d, 0xbf, + 0xaa, 0x3e, 0x11, 0xbe, 0xe0, 0x41, 0x66, 0xb6, 0x00, 0x41, 0xe5, 0xbe, + 0x84, 0xbc, 0x91, 0xbd, 0x04, 0x3f, 0xd2, 0x40, 0x21, 0xbe, 0x0d, 0x42, + 0x83, 0x38, 0x63, 0x3c, 0xca, 0x3c, 0x8c, 0x39, 0xd2, 0x42, 0x39, 0x3d, + 0xdd, 0x40, 0xb3, 0x42, 0x50, 0x41, 0xf1, 0x41, 0x27, 0xbc, 0xad, 0x38, + 0x9f, 0x35, 0xe7, 0x39, 0x01, 0x3e, 0x56, 0x3b, 0xab, 0x3f, 0x90, 0xb7, + 0x95, 0xb7, 0x4b, 0xbc, 0x3f, 0xb8, 0xe1, 0x3d, 0x64, 0xbf, 0x14, 0xb4, + 0x5c, 0xbf, 0x36, 0x42, 0xa6, 0xbb, 0x11, 0xb6, 0x06, 0xbf, 0x25, 0x31, + 0xdd, 0x43, 0xb0, 0xbd, 0x38, 0x3b, 0xf0, 0x42, 0xd2, 0x30, 0xdc, 0x3b, + 0x47, 0x3c, 0x9a, 0x3e}; +unsigned char add_fp16_out[] = { 0xdc, 0x40, 0xe2, 0x43, 0xf4, 0xba, 0xa4, 0xc0, 0xd2, 0x3c, 0x16, 0x3c, + 0x6f, 0xc1, 0xb0, 0xc0, 0xff, 0xc2, 0x28, 0xb9, 0x92, 0xbf, 0x82, 0xbf, + 0x28, 0x44, 0x14, 0xc1, 0xb1, 0x3c, 0x93, 0x42, 0x2f, 0xbc, 0x5e, 0xb9, + 0x72, 0x43, 0xb0, 0x42, 0xf2, 0xb8, 0x5b, 0x34, 0x10, 0xb2, 0xa4, 0x46, + 0x32, 0x43, 0x87, 0x3e, 0x4b, 0x3f, 0x68, 0xc3, 0xc0, 0xb4, 0xe2, 0x3b, + 0xec, 0x3f, 0xd0, 0x3f, 0x25, 0x3f, 0xaa, 0x40, 0xc4, 0x44, 0x2a, 0x40, + 0x54, 0xb7, 0x9b, 0x3f, 0x9b, 0x37, 0xa2, 0x3c, 0x46, 0x3f, 0x12, 0x40, + 0xb8, 0x3a, 0xfc, 0x35, 0x34, 0xc0, 0xf3, 0x43, 0xf7, 0x3c, 0xb5, 0x3b, + 0x54, 0x43, 0xc2, 0xbc, 0xc1, 0x3e, 0xc0, 0xbe, 0xdc, 0x44, 0xec, 0x3f, + 0xf4, 0x43, 0xd5, 0x45, 0x7f, 0xb6, 0xa5, 0xc1, 0x34, 0xc0, 0x93, 0xbf, + 0x0f, 0x3e, 0xb1, 0xc0, 0x98, 0x39, 0x92, 0x3e, 0x83, 0x44, 0x3e, 0xc1, + 0xf0, 0xad, 0x37, 0xbe, 0x52, 0x41, 0x62, 0x44, 0x4a, 0x3b, 0xf0, 0x3a, + 0x3d, 0x41, 0xb4, 0x3e, 0xaf, 0x42, 0x65, 0xc0, 0xbd, 0x42, 0xc8, 0xb5, + 0xb2, 0x3e, 0x08, 0x46, 0x85, 0x44, 0x84, 0x40, 0xcf, 0xbf, 0x61, 0xb9, + 0x3d, 0x41, 0xa1, 0xba, 0xe6, 0x3e, 0x74, 0x3c, 0x53, 0x44, 0x0d, 0xc1, + 0xce, 0x3a, 0x0f, 0x3f, 0x82, 0x3c, 0xec, 0x41, 0x20, 0xc3, 0xbe, 0xb4, + 0xc0, 0xc4, 0x42, 0x42, 0x64, 0xbe, 0x21, 0xc2, 0x2c, 0xbc, 0x95, 0xbf, + 0xb4, 0x43, 0xac, 0xbd, 0xf0, 0x3e, 0x10, 0x42, 0x6a, 0x3a, 0x70, 0x3f, + 0xd8, 0x3c, 0x6c, 0x42}; +unsigned char add_int8_in0[] = {}; +unsigned char add_int8_in1[] = {}; +unsigned char add_int8_out[] = {}; + + +// [c h w]: [2 5 11] +// [c h w]: [2 5 11] +// [c h w]: [2 5 11] +unsigned char mul_fp32_in0[] = {}; +unsigned char mul_fp32_in1[] = {}; +unsigned char mul_fp32_out[] = {}; +unsigned char mul_fp16_in0[] = {}; +unsigned char mul_fp16_in1[] = {}; +unsigned char mul_fp16_out[] = {}; +unsigned char mul_int8_in0[] = {}; +unsigned char mul_int8_in1[] = {}; +unsigned char mul_int8_out[] = {}; + + +// [c h w]: [2 5 11] +// +unsigned char sum_stride_int8_in[] = {}; +unsigned char sum_stride_int8_out[] = {}; \ No newline at end of file diff --git a/tests/unit_test/valid_data/concat.dat b/tests/unit_test/valid_data/concat.dat new file mode 100644 index 00000000..d0e9961e --- /dev/null +++ b/tests/unit_test/valid_data/concat.dat @@ -0,0 +1,114 @@ +/************************************************************************** + * NCHW layout + **************************************************************************/ +// [c h w]: [2 3 10] [2 3 10] +// [c h w]: [2 6 10] +// axis = 2 +unsigned char concat_fp32_in0[] = { + 0x20, 0x4e, 0x57, 0x40, 0x23, 0x72, 0xce, 0xc0, 0xb2, 0xfa, 0xcc, 0xc0, 0x1c, 0xae, 0x9f, 0xbe, + 0xb8, 0xbc, 0x95, 0xbd, 0xdd, 0x0b, 0x89, 0x3c, 0x71, 0xbb, 0x6d, 0xc0, 0x31, 0x30, 0x81, 0x40, + 0x36, 0x2d, 0x83, 0x40, 0x31, 0xce, 0x7b, 0xbf, 0xcf, 0x8c, 0xaf, 0xc0, 0x11, 0xe0, 0x03, 0x3f, + 0xba, 0x4e, 0xc1, 0x3f, 0x28, 0xb1, 0x22, 0xc0, 0xe4, 0x04, 0x05, 0x40, 0x4e, 0x65, 0xa4, 0xbf, + 0xae, 0x39, 0xdc, 0xbe, 0x4a, 0x3f, 0x14, 0x3e, 0x65, 0xb3, 0x2e, 0xc0, 0x49, 0xe3, 0x8d, 0x3f, + 0x20, 0x30, 0x68, 0x40, 0xef, 0xa8, 0x65, 0x40, 0x13, 0x51, 0x20, 0xc0, 0x4c, 0x2f, 0x88, 0xc0, + 0x7a, 0xcd, 0x74, 0x40, 0xd6, 0x3e, 0x1e, 0xbf, 0x72, 0x8c, 0x6c, 0x3f, 0x13, 0x73, 0x5a, 0xbf, + 0xd5, 0xbb, 0x51, 0xbf, 0xe3, 0xe0, 0xc5, 0x40, 0xcc, 0x60, 0x3a, 0xc0, 0x5c, 0xe6, 0x00, 0x40, + 0x83, 0x0e, 0x8f, 0xc0, 0x6a, 0x44, 0xab, 0x3f, 0xaf, 0x36, 0xe4, 0xbf, 0xbe, 0x20, 0x88, 0x40, + 0x1c, 0x85, 0x22, 0xc0, 0xe9, 0x85, 0x1d, 0xbf, 0x83, 0x39, 0x2a, 0x40, 0x65, 0x35, 0x13, 0xc0, + 0xb9, 0xef, 0xfb, 0x40, 0xa3, 0x1b, 0x00, 0xc0, 0xe0, 0x1b, 0x8f, 0x40, 0xfa, 0xb5, 0x29, 0x3f, + 0x6c, 0x3a, 0x29, 0xc0, 0x9f, 0x7c, 0xc4, 0x3f, 0xcc, 0x9d, 0x84, 0xc0, 0x38, 0x8b, 0xb9, 0x3f, + 0xae, 0x7b, 0xff, 0x3f, 0xc3, 0xf8, 0x0c, 0xbf, 0xf5, 0x9e, 0x08, 0xc1, 0x45, 0xff, 0x7c, 0xc0, + 0xdc, 0x75, 0x13, 0xc0, 0x6b, 0xf5, 0x39, 0x40, 0xe5, 0x79, 0x3c, 0xbf, 0x55, 0xa2, 0x19, 0x40, + 0xbe, 0x52, 0x42, 0x3f, 0x32, 0xb5, 0x93, 0xbf, 0x8e, 0x89, 0x99, 0x3f, 0x62, 0xed, 0xea, 0xbb}; +unsigned char concat_fp32_in1[] = { + 0x27, 0xee, 0x16, 0x41, 0x71, 0x72, 0xef, 0xc0, 0xaf, 0x93, 0x8d, 0x3f, 0xde, 0x83, 0xab, 0x3f, + 0x2c, 0x5f, 0xbb, 0x40, 0x63, 0xd1, 0x8b, 0x40, 0xb8, 0x88, 0xfa, 0x3f, 0x4a, 0x7c, 0xf9, 0x40, + 0xad, 0xef, 0x8d, 0x40, 0x6a, 0x93, 0xae, 0xbe, 0xf7, 0x23, 0x29, 0xc0, 0x8b, 0x61, 0x2c, 0x3f, + 0x61, 0x18, 0xc0, 0xc0, 0xe6, 0xcd, 0x2a, 0x40, 0x4d, 0x3a, 0xfe, 0xc0, 0xa1, 0x2b, 0xdf, 0x40, + 0x67, 0x15, 0xf3, 0xbf, 0xba, 0xe1, 0xfc, 0xc0, 0x3c, 0xea, 0xc1, 0xc0, 0x32, 0x90, 0x8d, 0x3f, + 0x59, 0x95, 0xd3, 0x40, 0x77, 0x3d, 0x95, 0x41, 0x5d, 0x53, 0xb9, 0xc0, 0x18, 0xd6, 0x62, 0x40, + 0xd7, 0x07, 0xd4, 0xc0, 0xbe, 0xa7, 0x20, 0xc0, 0x22, 0x00, 0x1d, 0x41, 0x8f, 0x4c, 0x4a, 0x41, + 0x34, 0x90, 0x5b, 0xc0, 0x44, 0x54, 0x9e, 0x3f, 0x7e, 0x18, 0x50, 0x41, 0x7d, 0x4b, 0x1d, 0x40, + 0xd4, 0x05, 0x26, 0x41, 0x19, 0xf3, 0x2e, 0x41, 0x51, 0x40, 0xce, 0xc0, 0x20, 0xcd, 0xdb, 0xc0, + 0x9b, 0x86, 0x1f, 0xc0, 0x1f, 0x2c, 0xcb, 0x40, 0xd4, 0xcd, 0xbe, 0xc0, 0x59, 0xf4, 0x88, 0x3f, + 0xe3, 0xd8, 0x10, 0xc0, 0x39, 0x96, 0x9b, 0x41, 0xd9, 0xaf, 0x44, 0x40, 0xde, 0x96, 0xd6, 0x40, + 0xe9, 0x81, 0x82, 0xc0, 0x61, 0x9b, 0xe5, 0xbf, 0x5f, 0xd6, 0xe5, 0x3f, 0xb3, 0xec, 0x7c, 0x40, + 0x27, 0xdf, 0x02, 0x40, 0x0a, 0x7b, 0x99, 0xc0, 0x88, 0x88, 0x93, 0x40, 0x3f, 0xd4, 0xda, 0x40, + 0x03, 0x44, 0xab, 0xbe, 0x3f, 0x64, 0x88, 0x3e, 0x68, 0xbe, 0x13, 0x40, 0xfa, 0x30, 0x94, 0x3f, + 0x4c, 0x52, 0x87, 0x3f, 0x72, 0x25, 0x81, 0x40, 0x73, 0x0d, 0x12, 0xc1, 0x17, 0xd4, 0x51, 0x41}; +unsigned char concat_fp32_out[] = { + 0x20, 0x4e, 0x57, 0x40, 0x23, 0x72, 0xce, 0xc0, 0xb2, 0xfa, 0xcc, 0xc0, 0x1c, 0xae, 0x9f, 0xbe, + 0xb8, 0xbc, 0x95, 0xbd, 0xdd, 0x0b, 0x89, 0x3c, 0x71, 0xbb, 0x6d, 0xc0, 0x31, 0x30, 0x81, 0x40, + 0x36, 0x2d, 0x83, 0x40, 0x31, 0xce, 0x7b, 0xbf, 0xcf, 0x8c, 0xaf, 0xc0, 0x11, 0xe0, 0x03, 0x3f, + 0xba, 0x4e, 0xc1, 0x3f, 0x28, 0xb1, 0x22, 0xc0, 0xe4, 0x04, 0x05, 0x40, 0x4e, 0x65, 0xa4, 0xbf, + 0xae, 0x39, 0xdc, 0xbe, 0x4a, 0x3f, 0x14, 0x3e, 0x65, 0xb3, 0x2e, 0xc0, 0x49, 0xe3, 0x8d, 0x3f, + 0x20, 0x30, 0x68, 0x40, 0xef, 0xa8, 0x65, 0x40, 0x13, 0x51, 0x20, 0xc0, 0x4c, 0x2f, 0x88, 0xc0, + 0x7a, 0xcd, 0x74, 0x40, 0xd6, 0x3e, 0x1e, 0xbf, 0x72, 0x8c, 0x6c, 0x3f, 0x13, 0x73, 0x5a, 0xbf, + 0xd5, 0xbb, 0x51, 0xbf, 0xe3, 0xe0, 0xc5, 0x40, 0x27, 0xee, 0x16, 0x41, 0x71, 0x72, 0xef, 0xc0, + 0xaf, 0x93, 0x8d, 0x3f, 0xde, 0x83, 0xab, 0x3f, 0x2c, 0x5f, 0xbb, 0x40, 0x63, 0xd1, 0x8b, 0x40, + 0xb8, 0x88, 0xfa, 0x3f, 0x4a, 0x7c, 0xf9, 0x40, 0xad, 0xef, 0x8d, 0x40, 0x6a, 0x93, 0xae, 0xbe, + 0xf7, 0x23, 0x29, 0xc0, 0x8b, 0x61, 0x2c, 0x3f, 0x61, 0x18, 0xc0, 0xc0, 0xe6, 0xcd, 0x2a, 0x40, + 0x4d, 0x3a, 0xfe, 0xc0, 0xa1, 0x2b, 0xdf, 0x40, 0x67, 0x15, 0xf3, 0xbf, 0xba, 0xe1, 0xfc, 0xc0, + 0x3c, 0xea, 0xc1, 0xc0, 0x32, 0x90, 0x8d, 0x3f, 0x59, 0x95, 0xd3, 0x40, 0x77, 0x3d, 0x95, 0x41, + 0x5d, 0x53, 0xb9, 0xc0, 0x18, 0xd6, 0x62, 0x40, 0xd7, 0x07, 0xd4, 0xc0, 0xbe, 0xa7, 0x20, 0xc0, + 0x22, 0x00, 0x1d, 0x41, 0x8f, 0x4c, 0x4a, 0x41, 0x34, 0x90, 0x5b, 0xc0, 0x44, 0x54, 0x9e, 0x3f, + 0xcc, 0x60, 0x3a, 0xc0, 0x5c, 0xe6, 0x00, 0x40, 0x83, 0x0e, 0x8f, 0xc0, 0x6a, 0x44, 0xab, 0x3f, + 0xaf, 0x36, 0xe4, 0xbf, 0xbe, 0x20, 0x88, 0x40, 0x1c, 0x85, 0x22, 0xc0, 0xe9, 0x85, 0x1d, 0xbf, + 0x83, 0x39, 0x2a, 0x40, 0x65, 0x35, 0x13, 0xc0, 0xb9, 0xef, 0xfb, 0x40, 0xa3, 0x1b, 0x00, 0xc0, + 0xe0, 0x1b, 0x8f, 0x40, 0xfa, 0xb5, 0x29, 0x3f, 0x6c, 0x3a, 0x29, 0xc0, 0x9f, 0x7c, 0xc4, 0x3f, + 0xcc, 0x9d, 0x84, 0xc0, 0x38, 0x8b, 0xb9, 0x3f, 0xae, 0x7b, 0xff, 0x3f, 0xc3, 0xf8, 0x0c, 0xbf, + 0xf5, 0x9e, 0x08, 0xc1, 0x45, 0xff, 0x7c, 0xc0, 0xdc, 0x75, 0x13, 0xc0, 0x6b, 0xf5, 0x39, 0x40, + 0xe5, 0x79, 0x3c, 0xbf, 0x55, 0xa2, 0x19, 0x40, 0xbe, 0x52, 0x42, 0x3f, 0x32, 0xb5, 0x93, 0xbf, + 0x8e, 0x89, 0x99, 0x3f, 0x62, 0xed, 0xea, 0xbb, 0x7e, 0x18, 0x50, 0x41, 0x7d, 0x4b, 0x1d, 0x40, + 0xd4, 0x05, 0x26, 0x41, 0x19, 0xf3, 0x2e, 0x41, 0x51, 0x40, 0xce, 0xc0, 0x20, 0xcd, 0xdb, 0xc0, + 0x9b, 0x86, 0x1f, 0xc0, 0x1f, 0x2c, 0xcb, 0x40, 0xd4, 0xcd, 0xbe, 0xc0, 0x59, 0xf4, 0x88, 0x3f, + 0xe3, 0xd8, 0x10, 0xc0, 0x39, 0x96, 0x9b, 0x41, 0xd9, 0xaf, 0x44, 0x40, 0xde, 0x96, 0xd6, 0x40, + 0xe9, 0x81, 0x82, 0xc0, 0x61, 0x9b, 0xe5, 0xbf, 0x5f, 0xd6, 0xe5, 0x3f, 0xb3, 0xec, 0x7c, 0x40, + 0x27, 0xdf, 0x02, 0x40, 0x0a, 0x7b, 0x99, 0xc0, 0x88, 0x88, 0x93, 0x40, 0x3f, 0xd4, 0xda, 0x40, + 0x03, 0x44, 0xab, 0xbe, 0x3f, 0x64, 0x88, 0x3e, 0x68, 0xbe, 0x13, 0x40, 0xfa, 0x30, 0x94, 0x3f, + 0x4c, 0x52, 0x87, 0x3f, 0x72, 0x25, 0x81, 0x40, 0x73, 0x0d, 0x12, 0xc1, 0x17, 0xd4, 0x51, 0x41}; + +unsigned char concat_fp16_in0[] = { 0xba, 0x42, 0x73, 0xc6, 0x67, 0xc6, 0xfd, 0xb4, 0xad, 0xac, 0x48, 0x24, + 0x6d, 0xc3, 0x09, 0x44, 0x19, 0x44, 0xde, 0xbb, 0x7c, 0xc5, 0x1f, 0x38, + 0x0a, 0x3e, 0x15, 0xc1, 0x28, 0x40, 0x23, 0xbd, 0xe1, 0xb6, 0xa1, 0x30, + 0x75, 0xc1, 0x6f, 0x3c, 0x41, 0x43, 0x2d, 0x43, 0x02, 0xc1, 0x41, 0xc4, + 0xa6, 0x43, 0xf1, 0xb8, 0x64, 0x3b, 0xd3, 0xba, 0x8d, 0xba, 0x2f, 0x46, + 0xd3, 0xc1, 0x07, 0x40, 0x78, 0xc4, 0x5a, 0x3d, 0x21, 0xbf, 0x41, 0x44, + 0x14, 0xc1, 0xec, 0xb8, 0x51, 0x41, 0x99, 0xc0, 0xdf, 0x47, 0x00, 0xc0, + 0x78, 0x44, 0x4d, 0x39, 0x49, 0xc1, 0x23, 0x3e, 0x24, 0xc4, 0xcc, 0x3d, + 0xfb, 0x3f, 0x67, 0xb8, 0x44, 0xc8, 0xe7, 0xc3, 0x9b, 0xc0, 0xcf, 0x41, + 0xe3, 0xb9, 0xcd, 0x40, 0x12, 0x3a, 0x9d, 0xbc, 0xcc, 0x3c, 0x57, 0x9f}; +unsigned char concat_fp16_in1[] = { 0xb7, 0x48, 0x7b, 0xc7, 0x6c, 0x3c, 0x5c, 0x3d, 0xda, 0x45, 0x5e, 0x44, + 0xd4, 0x3f, 0xcb, 0x47, 0x6f, 0x44, 0x74, 0xb5, 0x49, 0xc1, 0x63, 0x39, + 0x00, 0xc6, 0x56, 0x41, 0xf1, 0xc7, 0xf9, 0x46, 0x98, 0xbf, 0xe7, 0xc7, + 0x0f, 0xc6, 0x6c, 0x3c, 0x9c, 0x46, 0xa9, 0x4c, 0xca, 0xc5, 0x16, 0x43, + 0xa0, 0xc6, 0x05, 0xc1, 0xe8, 0x48, 0x52, 0x4a, 0xdc, 0xc2, 0xf2, 0x3c, + 0x80, 0x4a, 0xea, 0x40, 0x30, 0x49, 0x77, 0x49, 0x72, 0xc6, 0xde, 0xc6, + 0xfc, 0xc0, 0x59, 0x46, 0xf6, 0xc5, 0x47, 0x3c, 0x86, 0xc0, 0xdc, 0x4c, + 0x25, 0x42, 0xb4, 0x46, 0x14, 0xc4, 0x2c, 0xbf, 0x2e, 0x3f, 0xe7, 0x43, + 0x16, 0x40, 0xcb, 0xc4, 0x9c, 0x44, 0xd6, 0x46, 0x5a, 0xb5, 0x43, 0x34, + 0x9d, 0x40, 0xa1, 0x3c, 0x3a, 0x3c, 0x09, 0x44, 0x90, 0xc8, 0x8e, 0x4a}; +unsigned char concat_fp16_out[] = { 0xba, 0x42, 0x73, 0xc6, 0x67, 0xc6, 0xfd, 0xb4, 0xad, 0xac, 0x48, 0x24, + 0x6d, 0xc3, 0x09, 0x44, 0x19, 0x44, 0xde, 0xbb, 0x7c, 0xc5, 0x1f, 0x38, + 0x0a, 0x3e, 0x15, 0xc1, 0x28, 0x40, 0x23, 0xbd, 0xe1, 0xb6, 0xa1, 0x30, + 0x75, 0xc1, 0x6f, 0x3c, 0x41, 0x43, 0x2d, 0x43, 0x02, 0xc1, 0x41, 0xc4, + 0xa6, 0x43, 0xf1, 0xb8, 0x64, 0x3b, 0xd3, 0xba, 0x8d, 0xba, 0x2f, 0x46, + 0xb7, 0x48, 0x7b, 0xc7, 0x6c, 0x3c, 0x5c, 0x3d, 0xda, 0x45, 0x5e, 0x44, + 0xd4, 0x3f, 0xcb, 0x47, 0x6f, 0x44, 0x74, 0xb5, 0x49, 0xc1, 0x63, 0x39, + 0x00, 0xc6, 0x56, 0x41, 0xf1, 0xc7, 0xf9, 0x46, 0x98, 0xbf, 0xe7, 0xc7, + 0x0f, 0xc6, 0x6c, 0x3c, 0x9c, 0x46, 0xa9, 0x4c, 0xca, 0xc5, 0x16, 0x43, + 0xa0, 0xc6, 0x05, 0xc1, 0xe8, 0x48, 0x52, 0x4a, 0xdc, 0xc2, 0xf2, 0x3c, + 0xd3, 0xc1, 0x07, 0x40, 0x78, 0xc4, 0x5a, 0x3d, 0x21, 0xbf, 0x41, 0x44, + 0x14, 0xc1, 0xec, 0xb8, 0x51, 0x41, 0x99, 0xc0, 0xdf, 0x47, 0x00, 0xc0, + 0x78, 0x44, 0x4d, 0x39, 0x49, 0xc1, 0x23, 0x3e, 0x24, 0xc4, 0xcc, 0x3d, + 0xfb, 0x3f, 0x67, 0xb8, 0x44, 0xc8, 0xe7, 0xc3, 0x9b, 0xc0, 0xcf, 0x41, + 0xe3, 0xb9, 0xcd, 0x40, 0x12, 0x3a, 0x9d, 0xbc, 0xcc, 0x3c, 0x57, 0x9f, + 0x80, 0x4a, 0xea, 0x40, 0x30, 0x49, 0x77, 0x49, 0x72, 0xc6, 0xde, 0xc6, + 0xfc, 0xc0, 0x59, 0x46, 0xf6, 0xc5, 0x47, 0x3c, 0x86, 0xc0, 0xdc, 0x4c, + 0x25, 0x42, 0xb4, 0x46, 0x14, 0xc4, 0x2c, 0xbf, 0x2e, 0x3f, 0xe7, 0x43, + 0x16, 0x40, 0xcb, 0xc4, 0x9c, 0x44, 0xd6, 0x46, 0x5a, 0xb5, 0x43, 0x34, + 0x9d, 0x40, 0xa1, 0x3c, 0x3a, 0x3c, 0x09, 0x44, 0x90, 0xc8, 0x8e, 0x4a}; + +unsigned char concat_int8_in0[] = {}; +unsigned char concat_int8_in1[] = {}; +unsigned char concat_int8_out[] = {}; \ No newline at end of file diff --git a/tests/unit_test/valid_data/conv2d.dat b/tests/unit_test/valid_data/conv2d.dat new file mode 100644 index 00000000..071fc448 --- /dev/null +++ b/tests/unit_test/valid_data/conv2d.dat @@ -0,0 +1,6394 @@ +/************************************************************************** + * NCHW layout + **************************************************************************/ + +/*********************************** conv2d1x1s1_gemm *****************************/ +// input: [1, 16, 4, 5] +// kernel: [19, 16, 1, 1] +// bias: [19] +// output: [1, 19, 4, 5] +// pad_left = pad_right = pad_top = pad_down = 0 +unsigned char conv2d1x1s1_fp32_in[] = { + 0x3a, 0x69, 0x19, 0xbe, 0x1c, 0xe1, 0x42, 0xbf, 0xab, 0xdc, 0xd9, 0xbf, 0xde, 0x34, 0xec, 0xbe, + 0xdb, 0xc8, 0x39, 0x40, 0x82, 0x15, 0x7c, 0xbe, 0x57, 0xe2, 0x00, 0x3e, 0x4d, 0x9a, 0xb9, 0xbf, + 0x11, 0xc4, 0x09, 0xbe, 0x0e, 0x81, 0x48, 0x40, 0xa6, 0x9a, 0x39, 0x40, 0x6e, 0x83, 0xd4, 0xbe, + 0x62, 0x62, 0x32, 0xc0, 0x94, 0x0e, 0x2a, 0x40, 0x1f, 0x50, 0x97, 0x3f, 0xce, 0x9a, 0x94, 0x3f, + 0xa6, 0x05, 0xef, 0xbf, 0x35, 0x93, 0x72, 0x3f, 0x73, 0xff, 0x5b, 0x40, 0xf7, 0x35, 0x8a, 0x3f, + 0x71, 0x58, 0x83, 0x3f, 0x38, 0xa2, 0x74, 0xbe, 0x01, 0x63, 0x2e, 0xc0, 0x59, 0x71, 0xef, 0xbe, + 0x80, 0xf9, 0xdf, 0x3f, 0x62, 0xde, 0x3a, 0x40, 0x88, 0x33, 0x51, 0x40, 0xdd, 0x5c, 0xae, 0x40, + 0x4a, 0xcb, 0x94, 0x3f, 0x64, 0x28, 0x47, 0x40, 0x2d, 0x60, 0xb4, 0x3f, 0x30, 0xa0, 0x94, 0x3d, + 0x8a, 0x29, 0x9b, 0xbf, 0x06, 0xb0, 0xb1, 0x3f, 0x11, 0x9b, 0x62, 0xbf, 0x4b, 0xb3, 0x1a, 0xc0, + 0x06, 0xf5, 0x24, 0xc0, 0x89, 0x7c, 0x20, 0xc0, 0x58, 0xde, 0x6b, 0xbe, 0x87, 0x33, 0x2a, 0xc0, + 0x0b, 0xa7, 0x28, 0x40, 0x09, 0xbc, 0x9d, 0x3e, 0x8a, 0x3e, 0x28, 0x40, 0x78, 0x84, 0x6f, 0xc0, + 0xa4, 0x74, 0x0d, 0xc0, 0x29, 0x44, 0xa3, 0xbe, 0xd8, 0x6c, 0x97, 0xbf, 0x3f, 0x85, 0x4f, 0x40, + 0xed, 0x14, 0x30, 0xbf, 0xc1, 0xce, 0xb1, 0xbf, 0x1b, 0x8b, 0x6a, 0x40, 0xae, 0xf2, 0x2d, 0xbf, + 0x2b, 0x84, 0x8c, 0x40, 0x54, 0xde, 0xc5, 0x3f, 0x0a, 0xb3, 0x57, 0xc0, 0x91, 0x1d, 0x05, 0x40, + 0x80, 0x54, 0x91, 0x3f, 0x68, 0x63, 0x63, 0xbd, 0xa1, 0x0f, 0x1b, 0xbf, 0x78, 0xc9, 0x77, 0x3f, + 0x0c, 0x09, 0x56, 0x40, 0xab, 0xdd, 0x09, 0x40, 0x90, 0xf4, 0x9a, 0x3e, 0x11, 0xa2, 0x81, 0xbf, + 0xda, 0xe0, 0x88, 0xbf, 0x84, 0xcc, 0x78, 0xbf, 0x9c, 0xb2, 0xfa, 0x3f, 0xb3, 0xcb, 0x9f, 0xbe, + 0x53, 0x00, 0xc6, 0xbf, 0xf1, 0x02, 0xe2, 0xbe, 0xe1, 0xe6, 0xcd, 0x3f, 0x80, 0xf5, 0x4e, 0xbf, + 0xb1, 0x09, 0x01, 0xbe, 0x8b, 0xb1, 0x9c, 0xbf, 0x44, 0xb8, 0xc5, 0x3f, 0x32, 0xce, 0x1a, 0x3f, + 0xba, 0xec, 0xa6, 0xbf, 0x4a, 0xec, 0x7e, 0xc0, 0x8a, 0x81, 0x1f, 0x3f, 0x33, 0x97, 0x30, 0xbf, + 0x2d, 0x84, 0x98, 0x3f, 0x7c, 0x91, 0xd7, 0xbe, 0x76, 0x2c, 0x3b, 0x3f, 0x36, 0x1f, 0xb2, 0x3f, + 0xca, 0x83, 0x86, 0xbf, 0xa4, 0xff, 0x1a, 0x40, 0x4d, 0x59, 0x23, 0xbf, 0x1f, 0xd2, 0x5e, 0xc0, + 0x03, 0x66, 0x0f, 0xbf, 0x60, 0x82, 0xe3, 0x3e, 0x1f, 0xa8, 0xec, 0xbf, 0x6f, 0xf4, 0xb4, 0xbf, + 0x77, 0xcc, 0x0a, 0xc0, 0x39, 0x3f, 0xdf, 0xbe, 0xe5, 0x5b, 0x80, 0x3f, 0x89, 0x99, 0x4a, 0xbf, + 0x0b, 0xe3, 0x42, 0xbe, 0x11, 0xa2, 0x93, 0xbe, 0x79, 0xa7, 0xf0, 0xbe, 0xf3, 0xdf, 0x07, 0x40, + 0x92, 0xdb, 0x88, 0x3d, 0x78, 0x58, 0x4b, 0xbf, 0x71, 0x85, 0xeb, 0x3f, 0xe5, 0x2e, 0xf6, 0xbf, + 0x14, 0xdf, 0xed, 0x3e, 0xa3, 0xf8, 0xfb, 0x3f, 0x10, 0x10, 0xc1, 0x3f, 0xd0, 0x20, 0x9c, 0xbf, + 0x37, 0x62, 0x14, 0x40, 0xeb, 0xbe, 0x38, 0x40, 0x4d, 0x4d, 0x9c, 0xbe, 0x18, 0xee, 0xb2, 0xbf, + 0x7d, 0x55, 0x26, 0xbf, 0xb3, 0x24, 0xa5, 0xbf, 0x89, 0xf4, 0x5d, 0x3f, 0x67, 0xce, 0x0f, 0x3f, + 0xa5, 0xdd, 0x19, 0xc0, 0x67, 0x2c, 0x04, 0x40, 0x1f, 0x26, 0x1d, 0xc0, 0xc2, 0xbf, 0x4b, 0xbf, + 0xb2, 0xfc, 0x28, 0x40, 0x46, 0x10, 0x01, 0xc0, 0xb4, 0x33, 0xe4, 0xbf, 0x79, 0x96, 0x4e, 0xbe, + 0xa3, 0x47, 0x3c, 0xc0, 0xce, 0xcf, 0xc6, 0x3f, 0x77, 0xec, 0x0f, 0x3f, 0xec, 0xd2, 0x99, 0x40, + 0x48, 0x13, 0x9a, 0xc0, 0xb5, 0x5e, 0x82, 0xbe, 0x95, 0x02, 0xad, 0x3f, 0x72, 0xde, 0x92, 0xbe, + 0x81, 0x02, 0x1d, 0xbf, 0x39, 0xe9, 0x84, 0x40, 0x74, 0xc6, 0x46, 0xbf, 0x92, 0x6a, 0xf7, 0xbf, + 0x61, 0xaf, 0xa6, 0xbf, 0x94, 0xf6, 0xe3, 0x3f, 0x08, 0xf5, 0x91, 0xbe, 0xd5, 0xf8, 0x37, 0x40, + 0x69, 0xf0, 0x7c, 0x40, 0x39, 0x5f, 0x8e, 0xbf, 0xa3, 0x84, 0x42, 0x3e, 0x6f, 0x27, 0x15, 0x3f, + 0x10, 0x7f, 0xc1, 0x3e, 0x96, 0xfe, 0x8c, 0x3f, 0x97, 0x43, 0x18, 0x3e, 0xaf, 0x98, 0xc1, 0xbf, + 0x3f, 0xc0, 0x96, 0xbf, 0x29, 0xa9, 0x75, 0x3f, 0x03, 0x27, 0xf4, 0xbe, 0xba, 0x84, 0x39, 0x3f, + 0x83, 0x78, 0xb5, 0x3f, 0xb0, 0xfd, 0x47, 0xc0, 0xba, 0x33, 0x54, 0x40, 0x70, 0xf1, 0x98, 0x3e, + 0x2e, 0x14, 0x62, 0x40, 0x41, 0xc5, 0x5a, 0xbe, 0xbc, 0xbb, 0x0d, 0xc0, 0xb5, 0x14, 0xc0, 0x3f, + 0x93, 0xeb, 0x1b, 0xc0, 0xa5, 0x2e, 0x33, 0xbf, 0x23, 0x7c, 0x27, 0xc0, 0x79, 0xbb, 0xef, 0xbf, + 0x2b, 0xcd, 0xea, 0xbf, 0x04, 0x09, 0xf8, 0xbf, 0x97, 0xa3, 0x93, 0xbf, 0xb4, 0x0a, 0x39, 0xc0, + 0x23, 0x11, 0x01, 0xbf, 0x14, 0x28, 0xaa, 0x3e, 0xa2, 0xd4, 0xa9, 0x3f, 0x48, 0xc0, 0x14, 0xc0, + 0x89, 0x8e, 0x15, 0xc0, 0x4c, 0x13, 0x5a, 0x3f, 0xd5, 0x08, 0x9d, 0x3e, 0x3e, 0xc8, 0x81, 0x3f, + 0xc1, 0x00, 0x20, 0x3e, 0xb5, 0xc7, 0x3b, 0xc0, 0xdb, 0xf1, 0x3c, 0x3f, 0x1c, 0x5d, 0x7f, 0xc0, + 0xa1, 0xaa, 0x26, 0x40, 0x93, 0x7b, 0xb3, 0x3f, 0xc3, 0x59, 0xb1, 0xbf, 0x10, 0x38, 0x71, 0x3e, + 0xe5, 0xda, 0x04, 0x40, 0xc4, 0x61, 0xe5, 0x3e, 0x3a, 0xf1, 0x25, 0x3f, 0xeb, 0x49, 0xa1, 0x3f, + 0xff, 0xbc, 0xee, 0xbf, 0xab, 0x8f, 0x91, 0xbe, 0x86, 0xc2, 0x87, 0x40, 0xf2, 0xb3, 0xb6, 0xbf, + 0x5e, 0x46, 0x6e, 0xc0, 0x7e, 0x20, 0x5d, 0x3f, 0x68, 0x26, 0x36, 0x3f, 0xb7, 0x94, 0x04, 0x40, + 0x1c, 0x07, 0xce, 0xc0, 0x75, 0x8d, 0x0a, 0xc0, 0xd8, 0xde, 0xca, 0xbe, 0x0a, 0xb7, 0x8c, 0xc0, + 0x64, 0xfb, 0xa6, 0x3f, 0x0e, 0x60, 0x74, 0xbf, 0xe4, 0x7b, 0xf1, 0xbe, 0x37, 0xae, 0xdf, 0xbe, + 0x35, 0x8a, 0xfb, 0x3f, 0x39, 0x5d, 0xb8, 0x3f, 0x0d, 0x4d, 0x8b, 0x3f, 0xa9, 0xbc, 0xed, 0x3f, + 0x61, 0x1d, 0xff, 0xbc, 0xbf, 0x8f, 0xcc, 0x3d, 0xdc, 0x17, 0xb7, 0xbf, 0x9d, 0x9b, 0x87, 0x3f, + 0x08, 0x7a, 0x46, 0x3f, 0x6a, 0xa4, 0xee, 0xbf, 0xfb, 0x97, 0x91, 0x40, 0x8b, 0xee, 0x00, 0x3f, + 0x61, 0x5e, 0xe6, 0xbf, 0x2b, 0x28, 0xfb, 0x3f, 0x13, 0x17, 0x80, 0x3e, 0xcd, 0xe5, 0x53, 0xc0, + 0xc2, 0xfa, 0xc3, 0xbe, 0x3d, 0xd6, 0xfe, 0xbf, 0x78, 0x62, 0xdc, 0x3f, 0x95, 0x7b, 0xcc, 0x3f, + 0x6d, 0xbb, 0xaf, 0x3e, 0xdb, 0x82, 0x4a, 0xbf, 0x17, 0x81, 0x6a, 0x3f, 0x5f, 0x42, 0xae, 0xbf, + 0xc3, 0x84, 0x84, 0x3f, 0x25, 0x33, 0x5c, 0xbf, 0x00, 0x22, 0x0a, 0xc0, 0x8c, 0x3e, 0x9b, 0x3f, + 0x32, 0x5e, 0xd2, 0x3f, 0xba, 0x59, 0x5a, 0xbe, 0x57, 0x1c, 0x77, 0xbf, 0x3f, 0x3a, 0x27, 0xbf, + 0x85, 0x1c, 0x20, 0x3f, 0x79, 0xf3, 0xa3, 0xbe, 0x7f, 0xe5, 0x7f, 0xbe, 0xb1, 0x51, 0x80, 0x3f, + 0xd9, 0x23, 0x56, 0xbf, 0xea, 0xff, 0x4e, 0xc0, 0xe2, 0x8a, 0x20, 0xc0, 0xc0, 0x53, 0x8e, 0xc0, + 0xf4, 0x94, 0xc0, 0xbe, 0xed, 0x2b, 0xea, 0xbf, 0xe6, 0x47, 0x01, 0xbf, 0xd3, 0xc0, 0xc8, 0xbf, + 0xee, 0x67, 0x31, 0xbe, 0x13, 0xf9, 0x76, 0x40, 0x38, 0x9c, 0x6a, 0xbf, 0x3f, 0x3e, 0xfc, 0xbe, + 0x92, 0xbd, 0x55, 0x40, 0x45, 0xdf, 0x6a, 0x3e, 0x9e, 0x57, 0xe5, 0xbd, 0x1d, 0xbc, 0xc4, 0xbf, + 0xd7, 0x94, 0x8a, 0xc0, 0xc7, 0x61, 0x3f, 0xbf, 0x7b, 0xa9, 0xa6, 0xbf, 0x03, 0x18, 0x95, 0x3e, + 0x29, 0xad, 0xe9, 0x3f, 0x9a, 0x27, 0xde, 0xbf, 0x80, 0x85, 0x79, 0xbf, 0x90, 0x92, 0x9b, 0xbf, + 0x01, 0x1c, 0x10, 0x40, 0x39, 0x40, 0x26, 0xc0, 0x1b, 0x25, 0x27, 0x40, 0x57, 0xfa, 0xd9, 0xbf, + 0x29, 0xe4, 0xc9, 0x3f, 0xfa, 0xce, 0xb6, 0x3e, 0x18, 0xcd, 0xe3, 0x3f, 0x7f, 0x73, 0xeb, 0xbf, + 0x85, 0x85, 0x15, 0x40, 0xf3, 0x52, 0xaa, 0xbe, 0x81, 0x85, 0x29, 0xc0, 0x36, 0x9f, 0x11, 0x40, + 0x56, 0xc3, 0x1d, 0xbe, 0xdc, 0x41, 0x8a, 0x3f, 0x00, 0xbb, 0x1a, 0xc0, 0xb4, 0xa5, 0xa9, 0x3f, + 0x2c, 0xa6, 0x91, 0xbe, 0x11, 0xdb, 0xd0, 0x3f, 0x90, 0xd9, 0x83, 0xc0, 0x14, 0xfd, 0x40, 0x40, + 0x07, 0x59, 0xb9, 0xbe, 0x5e, 0x8d, 0xa6, 0x3e, 0xef, 0xd9, 0x4e, 0xbf, 0x63, 0xc2, 0x2e, 0x40, + 0xe8, 0xc2, 0x93, 0x3c, 0x55, 0xc1, 0x11, 0xc0, 0x58, 0x9d, 0xce, 0xbf, 0xdd, 0x67, 0xb6, 0x3f, + 0x04, 0xb7, 0x57, 0x3f, 0x45, 0x09, 0x3b, 0xc0, 0x1e, 0xdf, 0xb5, 0x3f, 0xad, 0xdb, 0x67, 0x3f, + 0x3e, 0x4f, 0x9a, 0x3f, 0x80, 0x62, 0xe2, 0x3f, 0xa1, 0xda, 0x27, 0xc0, 0xb1, 0xbe, 0x78, 0xbf, + 0x8e, 0x42, 0xab, 0xbd, 0xf3, 0x42, 0xd8, 0xbf, 0x2b, 0xc6, 0xb4, 0x3f, 0xd0, 0x42, 0xf2, 0x3e, + 0xd9, 0xb7, 0x18, 0x40, 0xe8, 0x55, 0x39, 0x3f, 0xbd, 0x9f, 0xe8, 0x3d, 0xed, 0x14, 0xad, 0x40, + 0x4b, 0x21, 0xe8, 0x3f, 0x12, 0xeb, 0x07, 0xbf, 0xf6, 0xe3, 0x02, 0x40, 0x30, 0xa4, 0x48, 0xc0, + 0xc1, 0x3b, 0x3e, 0x3f, 0x98, 0x22, 0x87, 0x3f, 0xb0, 0x0c, 0xab, 0x3f, 0x5b, 0x0f, 0xcc, 0xbe, + 0xf6, 0xd5, 0x8a, 0xbf, 0xf0, 0x41, 0x00, 0xc0, 0x78, 0x80, 0x10, 0x40, 0xe0, 0x42, 0x34, 0x40}; +unsigned char conv2d1x1s1_fp32_ker[] = { + 0xa8, 0x88, 0x0d, 0xc0, 0x96, 0xfe, 0x45, 0xc0, 0xec, 0xb0, 0x72, 0xbf, 0xcc, 0x45, 0x91, 0xbf, + 0x8d, 0x85, 0x42, 0xc0, 0x4e, 0x7a, 0x3c, 0xbe, 0xe2, 0x6f, 0x09, 0xc0, 0x30, 0x05, 0x69, 0xbd, + 0xae, 0x01, 0x5a, 0xc0, 0xf9, 0xdc, 0x02, 0xc0, 0x42, 0x93, 0x52, 0xc0, 0x28, 0xe0, 0x13, 0xc0, + 0x50, 0xd7, 0xc7, 0xbf, 0xfb, 0x10, 0xea, 0xbf, 0x72, 0xa9, 0x10, 0xc0, 0xec, 0x7b, 0xb9, 0xbf, + 0x16, 0x30, 0x44, 0xc0, 0x01, 0x85, 0x65, 0xc0, 0xac, 0x7f, 0x28, 0xc0, 0x5b, 0x57, 0x7b, 0xc0, + 0x5b, 0xed, 0x14, 0xc0, 0xa0, 0xd5, 0x48, 0xc0, 0x6c, 0xf3, 0x29, 0xc0, 0x3d, 0xe4, 0xe9, 0xbf, + 0x31, 0x50, 0x01, 0xc0, 0x2d, 0x4a, 0x82, 0xc0, 0xfe, 0xc3, 0x9e, 0xbf, 0xbf, 0xbc, 0x4d, 0xc0, + 0x02, 0x14, 0x4c, 0xc0, 0x33, 0x5a, 0x1c, 0xc0, 0x60, 0x13, 0x0d, 0xc0, 0x7c, 0xe9, 0xa9, 0xbe, + 0x87, 0x6b, 0x30, 0xc0, 0x3b, 0x2c, 0x84, 0xc0, 0x57, 0x2a, 0xea, 0xbf, 0x00, 0x85, 0x54, 0xc0, + 0xce, 0x62, 0x39, 0xbf, 0x21, 0x2a, 0x26, 0xc0, 0x5e, 0x5a, 0xb8, 0xbf, 0xab, 0xa9, 0xfd, 0xbf, + 0x74, 0x72, 0x0f, 0xc0, 0x54, 0xc8, 0xdc, 0x3c, 0xf6, 0xc9, 0x24, 0xbf, 0x3e, 0xf2, 0x06, 0xc0, + 0xb1, 0x60, 0x47, 0xc0, 0x4e, 0xf9, 0x7c, 0xc0, 0x13, 0xc4, 0x9d, 0xbf, 0x3d, 0xd2, 0x09, 0xc0, + 0xd5, 0x0d, 0xf6, 0xbf, 0x35, 0x67, 0x29, 0xc0, 0xd3, 0x77, 0xe6, 0xbf, 0x72, 0xb3, 0x30, 0x3f, + 0x6a, 0x09, 0x2f, 0xc0, 0xe4, 0xaf, 0x49, 0xc0, 0xaa, 0x40, 0xa4, 0xbf, 0x3b, 0xec, 0x82, 0xbf, + 0x62, 0x95, 0x40, 0xc0, 0x20, 0x51, 0x76, 0x3f, 0x80, 0x14, 0x90, 0xbf, 0xea, 0xab, 0x9c, 0xbf, + 0x0c, 0xc7, 0x52, 0xc0, 0x41, 0xd8, 0x39, 0xc0, 0xdf, 0x22, 0x13, 0xc0, 0x67, 0xc4, 0x13, 0xc0, + 0xc1, 0x13, 0xb9, 0xbf, 0x30, 0x47, 0x21, 0xc0, 0xd2, 0x1e, 0xa8, 0xbf, 0x33, 0x83, 0xc0, 0xbf, + 0x8f, 0x19, 0x93, 0xc0, 0x97, 0x32, 0x5c, 0xc0, 0xa5, 0x27, 0x00, 0xc0, 0xc6, 0xe8, 0x37, 0xc0, + 0x96, 0xc9, 0x92, 0xbe, 0xc6, 0x9f, 0xb7, 0xbf, 0x51, 0xf8, 0x26, 0xc0, 0x67, 0x05, 0xb9, 0xbf, + 0x98, 0xd9, 0x67, 0xc0, 0xb8, 0xf0, 0xa5, 0xbf, 0x03, 0x47, 0x02, 0xc0, 0x0c, 0x47, 0x70, 0xbf, + 0x52, 0x5f, 0x29, 0xc0, 0x4d, 0x73, 0x26, 0xc0, 0x5b, 0xa7, 0x7a, 0xbf, 0x65, 0xda, 0x2c, 0xc0, + 0x64, 0x2a, 0x2d, 0xc0, 0x7d, 0x37, 0x54, 0xbf, 0x4f, 0x4c, 0xa9, 0xbe, 0x00, 0x17, 0x05, 0xc0, + 0x8d, 0xb9, 0xbe, 0xbf, 0x37, 0x96, 0x1e, 0xc0, 0xa0, 0x53, 0x07, 0xc0, 0x3d, 0xd5, 0x0e, 0xc0, + 0x9f, 0x16, 0x47, 0xc0, 0x20, 0x56, 0x9a, 0xbf, 0x5e, 0x67, 0x0c, 0xc0, 0x07, 0xf2, 0x34, 0xc0, + 0x8d, 0x20, 0x57, 0xc0, 0x39, 0x40, 0x29, 0xc0, 0x9c, 0x29, 0xcf, 0xbf, 0xe9, 0x81, 0x24, 0xc0, + 0x11, 0x57, 0xd8, 0xbf, 0x93, 0x9a, 0xfb, 0x3e, 0xf3, 0x12, 0x0a, 0xc0, 0xbd, 0x68, 0x11, 0xc0, + 0x7d, 0xa6, 0x10, 0xbf, 0xd7, 0xfe, 0x9a, 0xbf, 0xda, 0x81, 0xf0, 0xbb, 0x58, 0x80, 0xf0, 0xbf, + 0x0b, 0x67, 0x06, 0xc0, 0x9c, 0x3a, 0x18, 0xbf, 0x6b, 0xdd, 0x18, 0xbf, 0x53, 0x00, 0xcf, 0xbd, + 0xc4, 0xc4, 0x72, 0xbf, 0x39, 0xd3, 0x3b, 0xc0, 0x02, 0x60, 0x34, 0xc0, 0xc8, 0xe4, 0x7d, 0xc0, + 0x8d, 0x69, 0xd5, 0xbf, 0x1e, 0xe5, 0x1f, 0xbf, 0x5e, 0x8c, 0xe9, 0xbf, 0x85, 0xae, 0x09, 0xc0, + 0xea, 0x4c, 0xd9, 0xbe, 0x32, 0x7a, 0x0a, 0xc0, 0x92, 0x9e, 0x3e, 0xc0, 0x0d, 0xda, 0x06, 0xbf, + 0xed, 0x70, 0x6d, 0xc0, 0x12, 0x99, 0x1c, 0xc0, 0x15, 0x59, 0xa6, 0xbe, 0x2f, 0xbc, 0xd7, 0xbf, + 0x8c, 0xb3, 0xd4, 0xbf, 0x14, 0xe8, 0xc6, 0xbf, 0x98, 0x7a, 0xbd, 0xbf, 0x00, 0xa9, 0x06, 0xc0, + 0xb6, 0x38, 0x71, 0xbe, 0x9a, 0x87, 0x1a, 0xc0, 0xd6, 0xb5, 0x8f, 0xbf, 0x3d, 0x66, 0x5f, 0xc0, + 0x6f, 0x3c, 0x04, 0xc0, 0x7f, 0xbc, 0x13, 0xc0, 0xe4, 0xfc, 0x18, 0xc0, 0x48, 0x07, 0x10, 0xc0, + 0xb4, 0x60, 0x13, 0xc0, 0xad, 0x06, 0x03, 0xc0, 0x94, 0x92, 0x2a, 0xc0, 0x2e, 0xda, 0xb9, 0xbf, + 0xe7, 0x17, 0x2b, 0xc0, 0x02, 0x99, 0xb3, 0xbf, 0xcb, 0x17, 0xe4, 0xbf, 0xb7, 0x91, 0x3a, 0xc0, + 0x05, 0x78, 0xd5, 0xbf, 0xfb, 0x47, 0x64, 0xc0, 0xcb, 0x68, 0xd4, 0xbf, 0xc7, 0x15, 0xf2, 0xbf, + 0x5f, 0xe2, 0x91, 0xc0, 0x25, 0x30, 0x8e, 0xbf, 0x57, 0xa9, 0x08, 0xc0, 0xbf, 0x8d, 0x01, 0xc0, + 0x74, 0x1d, 0x3b, 0xc0, 0x48, 0xec, 0xf0, 0xbf, 0xdd, 0xb8, 0xa6, 0xbf, 0xe4, 0xe5, 0x26, 0xc0, + 0x34, 0x3e, 0x0b, 0xc0, 0xa5, 0x7c, 0xe3, 0xbf, 0x0e, 0x32, 0x2e, 0xc0, 0x3e, 0x76, 0x0e, 0xc0, + 0xc2, 0x42, 0x3a, 0xc0, 0xf6, 0x00, 0x15, 0xc0, 0xc7, 0x1e, 0x2b, 0xc0, 0x76, 0xb0, 0x4a, 0xc0, + 0x35, 0xdd, 0xe9, 0xbf, 0xef, 0xde, 0x24, 0xc0, 0x1b, 0xe4, 0xb6, 0xbf, 0xa9, 0x4f, 0x44, 0xc0, + 0x33, 0x1d, 0x25, 0xc0, 0xdd, 0xe4, 0x1e, 0xc0, 0xba, 0x3f, 0x99, 0xbd, 0xe2, 0x39, 0xf7, 0xbf, + 0x3f, 0x45, 0xec, 0xbf, 0x74, 0xa1, 0x1c, 0xc0, 0x59, 0x84, 0x8b, 0xbe, 0xd4, 0x75, 0x28, 0xc0, + 0x22, 0x8f, 0x4e, 0xbf, 0xd5, 0x62, 0x14, 0xc0, 0x23, 0x58, 0x48, 0xc0, 0x94, 0x70, 0xea, 0xbe, + 0x55, 0xd7, 0x12, 0xbf, 0x25, 0xa8, 0xfd, 0xbf, 0x86, 0xee, 0x55, 0xbf, 0x32, 0x9e, 0x14, 0xc0, + 0xa8, 0x38, 0x46, 0xc0, 0x45, 0x18, 0xff, 0xbf, 0xd5, 0x51, 0x2a, 0xc0, 0x1c, 0x97, 0xf7, 0xbf, + 0xd2, 0xaf, 0x90, 0xbf, 0xec, 0x68, 0xb8, 0xbe, 0xc7, 0x7b, 0x38, 0xbf, 0x34, 0x30, 0xc1, 0xbe, + 0x0c, 0x17, 0x9b, 0xbf, 0xfd, 0x58, 0xd8, 0xbf, 0xff, 0x7e, 0x34, 0xc0, 0x98, 0x4c, 0x2a, 0xc0, + 0x98, 0x5b, 0xd6, 0xbf, 0x83, 0xc4, 0xb7, 0xbf, 0xc5, 0x21, 0x11, 0xc0, 0x23, 0x08, 0x04, 0xc0, + 0xf1, 0xd0, 0xa5, 0xc0, 0x40, 0xa0, 0x49, 0xc0, 0xae, 0xfb, 0x3b, 0xc0, 0x88, 0x69, 0x3f, 0xbf, + 0x0d, 0x2e, 0x26, 0xc0, 0xb2, 0x63, 0x33, 0xc0, 0x3f, 0xd0, 0x31, 0xc0, 0xd5, 0x3b, 0xcb, 0xbe, + 0x6f, 0xf7, 0x54, 0xc0, 0x9e, 0xec, 0x4d, 0xc0, 0xc8, 0x22, 0x7d, 0xc0, 0x1f, 0x8c, 0x3e, 0xc0, + 0x50, 0xa9, 0x57, 0xc0, 0x73, 0xff, 0xc4, 0xbf, 0x85, 0xfd, 0xfd, 0xbf, 0x1f, 0x11, 0xce, 0xbf, + 0xa3, 0xb8, 0x0a, 0xc0, 0xf6, 0xb4, 0x17, 0xc0, 0x31, 0x0e, 0x29, 0xc0, 0xb5, 0x50, 0x4e, 0xc0, + 0xd4, 0x57, 0x30, 0xc0, 0xa0, 0x37, 0xea, 0xbf, 0xc0, 0x88, 0x3f, 0xbf, 0x75, 0xd3, 0x6c, 0xc0, + 0x34, 0x80, 0xd9, 0xbf, 0xa1, 0x88, 0x8c, 0xbf, 0x2c, 0xbe, 0xe5, 0xbf, 0xac, 0x13, 0x26, 0xc0, + 0xf5, 0x8a, 0x3b, 0xbf, 0x41, 0xe5, 0x74, 0xbf, 0xa3, 0x65, 0x90, 0xbf, 0x2f, 0x06, 0xb7, 0xbf, + 0x21, 0x81, 0xe6, 0xbf, 0x69, 0x99, 0x65, 0xc0, 0x15, 0x33, 0xf7, 0xbf, 0x62, 0x86, 0xed, 0xbf, + 0x83, 0x6a, 0x5a, 0xbe, 0x44, 0xb2, 0xbc, 0xbe, 0x3f, 0x49, 0x51, 0xc0, 0xbd, 0x3d, 0x01, 0xc0, + 0x69, 0x19, 0x12, 0xc0, 0x81, 0x6c, 0x3a, 0xc0, 0x89, 0xc1, 0x0d, 0xc0, 0x55, 0x42, 0x2f, 0xbf, + 0x62, 0xf4, 0x94, 0xbf, 0xed, 0xc5, 0xd8, 0xbf, 0x81, 0x6c, 0x83, 0xc0, 0xc9, 0xb4, 0xf9, 0xbe, + 0x99, 0x4b, 0x28, 0xc0, 0xb3, 0xd4, 0xa3, 0xbf, 0x64, 0x51, 0x65, 0xbf, 0xdf, 0x86, 0x42, 0xc0, + 0xb2, 0xea, 0xe3, 0xbf, 0x8c, 0x7b, 0x36, 0xbf, 0xb2, 0xb3, 0x22, 0xc0, 0x55, 0x82, 0x0a, 0xbf, + 0x3f, 0x32, 0xf1, 0xbf, 0x73, 0x99, 0x27, 0xc0, 0xa3, 0x19, 0x4b, 0xc0, 0x45, 0xc1, 0x16, 0xc0, + 0x91, 0x71, 0x23, 0xc0, 0x06, 0x59, 0x3e, 0xc0, 0x4f, 0x88, 0x75, 0xbf, 0x2b, 0x26, 0x5a, 0xc0, + 0x44, 0x5a, 0xd4, 0xbf, 0x72, 0x19, 0x5e, 0xc0, 0xeb, 0x51, 0x73, 0xc0, 0x62, 0x56, 0xe8, 0xbe, + 0xc1, 0xba, 0x25, 0xc0, 0x99, 0xb3, 0x00, 0xc0, 0x49, 0xbc, 0xe2, 0xbf, 0x76, 0x4e, 0x47, 0xc0, + 0x4e, 0xf5, 0x06, 0xc0, 0x94, 0xae, 0x39, 0xc0, 0xf9, 0x77, 0x35, 0xc0, 0x4b, 0xf4, 0xd2, 0xbf, + 0xe2, 0x54, 0xca, 0xbf, 0x7f, 0xb0, 0x26, 0xc0, 0x6b, 0x8a, 0x2f, 0xc0, 0xa6, 0x31, 0x24, 0xc0, + 0xb7, 0x51, 0xcd, 0xbf, 0x3b, 0x47, 0x40, 0xc0, 0x39, 0x4b, 0xa1, 0xbf, 0x54, 0x47, 0xd8, 0xbf, + 0x15, 0x1d, 0x02, 0xc0, 0xfe, 0x65, 0xb2, 0xbf, 0x5c, 0x41, 0xd2, 0xbf, 0xf5, 0x60, 0xdd, 0xbf, + 0x2a, 0x42, 0xb0, 0xbf, 0x77, 0x45, 0x20, 0xc0, 0x15, 0x26, 0x32, 0xc0, 0x42, 0x69, 0xcc, 0xbf, + 0x8d, 0x8a, 0xd7, 0xbf, 0x14, 0x61, 0x20, 0xc0, 0x75, 0x1e, 0xb8, 0xbf, 0x11, 0xb1, 0x46, 0xc0, + 0xf6, 0x0c, 0x6f, 0xc0, 0x25, 0x9a, 0x60, 0xc0, 0x1e, 0xb2, 0x53, 0xc0, 0x4b, 0xe4, 0x91, 0xbf}; +unsigned char conv2d1x1s1_fp32_ker1[] = { + 0xa8, 0x88, 0x0d, 0xc0, 0x16, 0x30, 0x44, 0xc0, 0x87, 0x6b, 0x30, 0xc0, 0xd5, 0x0d, 0xf6, 0xbf, + 0xc1, 0x13, 0xb9, 0xbf, 0x52, 0x5f, 0x29, 0xc0, 0x8d, 0x20, 0x57, 0xc0, 0xc4, 0xc4, 0x72, 0xbf, + 0x96, 0xfe, 0x45, 0xc0, 0x01, 0x85, 0x65, 0xc0, 0x3b, 0x2c, 0x84, 0xc0, 0x35, 0x67, 0x29, 0xc0, + 0x30, 0x47, 0x21, 0xc0, 0x4d, 0x73, 0x26, 0xc0, 0x39, 0x40, 0x29, 0xc0, 0x39, 0xd3, 0x3b, 0xc0, + 0xec, 0xb0, 0x72, 0xbf, 0xac, 0x7f, 0x28, 0xc0, 0x57, 0x2a, 0xea, 0xbf, 0xd3, 0x77, 0xe6, 0xbf, + 0xd2, 0x1e, 0xa8, 0xbf, 0x5b, 0xa7, 0x7a, 0xbf, 0x9c, 0x29, 0xcf, 0xbf, 0x02, 0x60, 0x34, 0xc0, + 0xcc, 0x45, 0x91, 0xbf, 0x5b, 0x57, 0x7b, 0xc0, 0x00, 0x85, 0x54, 0xc0, 0x72, 0xb3, 0x30, 0x3f, + 0x33, 0x83, 0xc0, 0xbf, 0x65, 0xda, 0x2c, 0xc0, 0xe9, 0x81, 0x24, 0xc0, 0xc8, 0xe4, 0x7d, 0xc0, + 0x8d, 0x85, 0x42, 0xc0, 0x5b, 0xed, 0x14, 0xc0, 0xce, 0x62, 0x39, 0xbf, 0x6a, 0x09, 0x2f, 0xc0, + 0x8f, 0x19, 0x93, 0xc0, 0x64, 0x2a, 0x2d, 0xc0, 0x11, 0x57, 0xd8, 0xbf, 0x8d, 0x69, 0xd5, 0xbf, + 0x4e, 0x7a, 0x3c, 0xbe, 0xa0, 0xd5, 0x48, 0xc0, 0x21, 0x2a, 0x26, 0xc0, 0xe4, 0xaf, 0x49, 0xc0, + 0x97, 0x32, 0x5c, 0xc0, 0x7d, 0x37, 0x54, 0xbf, 0x93, 0x9a, 0xfb, 0x3e, 0x1e, 0xe5, 0x1f, 0xbf, + 0xe2, 0x6f, 0x09, 0xc0, 0x6c, 0xf3, 0x29, 0xc0, 0x5e, 0x5a, 0xb8, 0xbf, 0xaa, 0x40, 0xa4, 0xbf, + 0xa5, 0x27, 0x00, 0xc0, 0x4f, 0x4c, 0xa9, 0xbe, 0xf3, 0x12, 0x0a, 0xc0, 0x5e, 0x8c, 0xe9, 0xbf, + 0x30, 0x05, 0x69, 0xbd, 0x3d, 0xe4, 0xe9, 0xbf, 0xab, 0xa9, 0xfd, 0xbf, 0x3b, 0xec, 0x82, 0xbf, + 0xc6, 0xe8, 0x37, 0xc0, 0x00, 0x17, 0x05, 0xc0, 0xbd, 0x68, 0x11, 0xc0, 0x85, 0xae, 0x09, 0xc0, + 0xae, 0x01, 0x5a, 0xc0, 0x31, 0x50, 0x01, 0xc0, 0x74, 0x72, 0x0f, 0xc0, 0x62, 0x95, 0x40, 0xc0, + 0x96, 0xc9, 0x92, 0xbe, 0x8d, 0xb9, 0xbe, 0xbf, 0x7d, 0xa6, 0x10, 0xbf, 0xea, 0x4c, 0xd9, 0xbe, + 0xf9, 0xdc, 0x02, 0xc0, 0x2d, 0x4a, 0x82, 0xc0, 0x54, 0xc8, 0xdc, 0x3c, 0x20, 0x51, 0x76, 0x3f, + 0xc6, 0x9f, 0xb7, 0xbf, 0x37, 0x96, 0x1e, 0xc0, 0xd7, 0xfe, 0x9a, 0xbf, 0x32, 0x7a, 0x0a, 0xc0, + 0x42, 0x93, 0x52, 0xc0, 0xfe, 0xc3, 0x9e, 0xbf, 0xf6, 0xc9, 0x24, 0xbf, 0x80, 0x14, 0x90, 0xbf, + 0x51, 0xf8, 0x26, 0xc0, 0xa0, 0x53, 0x07, 0xc0, 0xda, 0x81, 0xf0, 0xbb, 0x92, 0x9e, 0x3e, 0xc0, + 0x28, 0xe0, 0x13, 0xc0, 0xbf, 0xbc, 0x4d, 0xc0, 0x3e, 0xf2, 0x06, 0xc0, 0xea, 0xab, 0x9c, 0xbf, + 0x67, 0x05, 0xb9, 0xbf, 0x3d, 0xd5, 0x0e, 0xc0, 0x58, 0x80, 0xf0, 0xbf, 0x0d, 0xda, 0x06, 0xbf, + 0x50, 0xd7, 0xc7, 0xbf, 0x02, 0x14, 0x4c, 0xc0, 0xb1, 0x60, 0x47, 0xc0, 0x0c, 0xc7, 0x52, 0xc0, + 0x98, 0xd9, 0x67, 0xc0, 0x9f, 0x16, 0x47, 0xc0, 0x0b, 0x67, 0x06, 0xc0, 0xed, 0x70, 0x6d, 0xc0, + 0xfb, 0x10, 0xea, 0xbf, 0x33, 0x5a, 0x1c, 0xc0, 0x4e, 0xf9, 0x7c, 0xc0, 0x41, 0xd8, 0x39, 0xc0, + 0xb8, 0xf0, 0xa5, 0xbf, 0x20, 0x56, 0x9a, 0xbf, 0x9c, 0x3a, 0x18, 0xbf, 0x12, 0x99, 0x1c, 0xc0, + 0x72, 0xa9, 0x10, 0xc0, 0x60, 0x13, 0x0d, 0xc0, 0x13, 0xc4, 0x9d, 0xbf, 0xdf, 0x22, 0x13, 0xc0, + 0x03, 0x47, 0x02, 0xc0, 0x5e, 0x67, 0x0c, 0xc0, 0x6b, 0xdd, 0x18, 0xbf, 0x15, 0x59, 0xa6, 0xbe, + 0xec, 0x7b, 0xb9, 0xbf, 0x7c, 0xe9, 0xa9, 0xbe, 0x3d, 0xd2, 0x09, 0xc0, 0x67, 0xc4, 0x13, 0xc0, + 0x0c, 0x47, 0x70, 0xbf, 0x07, 0xf2, 0x34, 0xc0, 0x53, 0x00, 0xcf, 0xbd, 0x2f, 0xbc, 0xd7, 0xbf, + 0x8c, 0xb3, 0xd4, 0xbf, 0xe7, 0x17, 0x2b, 0xc0, 0x34, 0x3e, 0x0b, 0xc0, 0x3f, 0x45, 0xec, 0xbf, + 0xd2, 0xaf, 0x90, 0xbf, 0x0d, 0x2e, 0x26, 0xc0, 0xd4, 0x57, 0x30, 0xc0, 0x83, 0x6a, 0x5a, 0xbe, + 0x14, 0xe8, 0xc6, 0xbf, 0x02, 0x99, 0xb3, 0xbf, 0xa5, 0x7c, 0xe3, 0xbf, 0x74, 0xa1, 0x1c, 0xc0, + 0xec, 0x68, 0xb8, 0xbe, 0xb2, 0x63, 0x33, 0xc0, 0xa0, 0x37, 0xea, 0xbf, 0x44, 0xb2, 0xbc, 0xbe, + 0x98, 0x7a, 0xbd, 0xbf, 0xcb, 0x17, 0xe4, 0xbf, 0x0e, 0x32, 0x2e, 0xc0, 0x59, 0x84, 0x8b, 0xbe, + 0xc7, 0x7b, 0x38, 0xbf, 0x3f, 0xd0, 0x31, 0xc0, 0xc0, 0x88, 0x3f, 0xbf, 0x3f, 0x49, 0x51, 0xc0, + 0x00, 0xa9, 0x06, 0xc0, 0xb7, 0x91, 0x3a, 0xc0, 0x3e, 0x76, 0x0e, 0xc0, 0xd4, 0x75, 0x28, 0xc0, + 0x34, 0x30, 0xc1, 0xbe, 0xd5, 0x3b, 0xcb, 0xbe, 0x75, 0xd3, 0x6c, 0xc0, 0xbd, 0x3d, 0x01, 0xc0, + 0xb6, 0x38, 0x71, 0xbe, 0x05, 0x78, 0xd5, 0xbf, 0xc2, 0x42, 0x3a, 0xc0, 0x22, 0x8f, 0x4e, 0xbf, + 0x0c, 0x17, 0x9b, 0xbf, 0x6f, 0xf7, 0x54, 0xc0, 0x34, 0x80, 0xd9, 0xbf, 0x69, 0x19, 0x12, 0xc0, + 0x9a, 0x87, 0x1a, 0xc0, 0xfb, 0x47, 0x64, 0xc0, 0xf6, 0x00, 0x15, 0xc0, 0xd5, 0x62, 0x14, 0xc0, + 0xfd, 0x58, 0xd8, 0xbf, 0x9e, 0xec, 0x4d, 0xc0, 0xa1, 0x88, 0x8c, 0xbf, 0x81, 0x6c, 0x3a, 0xc0, + 0xd6, 0xb5, 0x8f, 0xbf, 0xcb, 0x68, 0xd4, 0xbf, 0xc7, 0x1e, 0x2b, 0xc0, 0x23, 0x58, 0x48, 0xc0, + 0xff, 0x7e, 0x34, 0xc0, 0xc8, 0x22, 0x7d, 0xc0, 0x2c, 0xbe, 0xe5, 0xbf, 0x89, 0xc1, 0x0d, 0xc0, + 0x3d, 0x66, 0x5f, 0xc0, 0xc7, 0x15, 0xf2, 0xbf, 0x76, 0xb0, 0x4a, 0xc0, 0x94, 0x70, 0xea, 0xbe, + 0x98, 0x4c, 0x2a, 0xc0, 0x1f, 0x8c, 0x3e, 0xc0, 0xac, 0x13, 0x26, 0xc0, 0x55, 0x42, 0x2f, 0xbf, + 0x6f, 0x3c, 0x04, 0xc0, 0x5f, 0xe2, 0x91, 0xc0, 0x35, 0xdd, 0xe9, 0xbf, 0x55, 0xd7, 0x12, 0xbf, + 0x98, 0x5b, 0xd6, 0xbf, 0x50, 0xa9, 0x57, 0xc0, 0xf5, 0x8a, 0x3b, 0xbf, 0x62, 0xf4, 0x94, 0xbf, + 0x7f, 0xbc, 0x13, 0xc0, 0x25, 0x30, 0x8e, 0xbf, 0xef, 0xde, 0x24, 0xc0, 0x25, 0xa8, 0xfd, 0xbf, + 0x83, 0xc4, 0xb7, 0xbf, 0x73, 0xff, 0xc4, 0xbf, 0x41, 0xe5, 0x74, 0xbf, 0xed, 0xc5, 0xd8, 0xbf, + 0xe4, 0xfc, 0x18, 0xc0, 0x57, 0xa9, 0x08, 0xc0, 0x1b, 0xe4, 0xb6, 0xbf, 0x86, 0xee, 0x55, 0xbf, + 0xc5, 0x21, 0x11, 0xc0, 0x85, 0xfd, 0xfd, 0xbf, 0xa3, 0x65, 0x90, 0xbf, 0x81, 0x6c, 0x83, 0xc0, + 0x48, 0x07, 0x10, 0xc0, 0xbf, 0x8d, 0x01, 0xc0, 0xa9, 0x4f, 0x44, 0xc0, 0x32, 0x9e, 0x14, 0xc0, + 0x23, 0x08, 0x04, 0xc0, 0x1f, 0x11, 0xce, 0xbf, 0x2f, 0x06, 0xb7, 0xbf, 0xc9, 0xb4, 0xf9, 0xbe, + 0xb4, 0x60, 0x13, 0xc0, 0x74, 0x1d, 0x3b, 0xc0, 0x33, 0x1d, 0x25, 0xc0, 0xa8, 0x38, 0x46, 0xc0, + 0xf1, 0xd0, 0xa5, 0xc0, 0xa3, 0xb8, 0x0a, 0xc0, 0x21, 0x81, 0xe6, 0xbf, 0x99, 0x4b, 0x28, 0xc0, + 0xad, 0x06, 0x03, 0xc0, 0x48, 0xec, 0xf0, 0xbf, 0xdd, 0xe4, 0x1e, 0xc0, 0x45, 0x18, 0xff, 0xbf, + 0x40, 0xa0, 0x49, 0xc0, 0xf6, 0xb4, 0x17, 0xc0, 0x69, 0x99, 0x65, 0xc0, 0xb3, 0xd4, 0xa3, 0xbf, + 0x94, 0x92, 0x2a, 0xc0, 0xdd, 0xb8, 0xa6, 0xbf, 0xba, 0x3f, 0x99, 0xbd, 0xd5, 0x51, 0x2a, 0xc0, + 0xae, 0xfb, 0x3b, 0xc0, 0x31, 0x0e, 0x29, 0xc0, 0x15, 0x33, 0xf7, 0xbf, 0x64, 0x51, 0x65, 0xbf, + 0x2e, 0xda, 0xb9, 0xbf, 0xe4, 0xe5, 0x26, 0xc0, 0xe2, 0x39, 0xf7, 0xbf, 0x1c, 0x97, 0xf7, 0xbf, + 0x88, 0x69, 0x3f, 0xbf, 0xb5, 0x50, 0x4e, 0xc0, 0x62, 0x86, 0xed, 0xbf, 0xdf, 0x86, 0x42, 0xc0, + 0xb2, 0xea, 0xe3, 0xbf, 0xc1, 0xba, 0x25, 0xc0, 0x8c, 0x7b, 0x36, 0xbf, 0x99, 0xb3, 0x00, 0xc0, + 0xb2, 0xb3, 0x22, 0xc0, 0x49, 0xbc, 0xe2, 0xbf, 0x55, 0x82, 0x0a, 0xbf, 0x76, 0x4e, 0x47, 0xc0, + 0x3f, 0x32, 0xf1, 0xbf, 0x4e, 0xf5, 0x06, 0xc0, 0x73, 0x99, 0x27, 0xc0, 0x94, 0xae, 0x39, 0xc0, + 0xa3, 0x19, 0x4b, 0xc0, 0xf9, 0x77, 0x35, 0xc0, 0x45, 0xc1, 0x16, 0xc0, 0x4b, 0xf4, 0xd2, 0xbf, + 0x91, 0x71, 0x23, 0xc0, 0xe2, 0x54, 0xca, 0xbf, 0x06, 0x59, 0x3e, 0xc0, 0x7f, 0xb0, 0x26, 0xc0, + 0x4f, 0x88, 0x75, 0xbf, 0x6b, 0x8a, 0x2f, 0xc0, 0x2b, 0x26, 0x5a, 0xc0, 0xa6, 0x31, 0x24, 0xc0, + 0x44, 0x5a, 0xd4, 0xbf, 0xb7, 0x51, 0xcd, 0xbf, 0x72, 0x19, 0x5e, 0xc0, 0x3b, 0x47, 0x40, 0xc0, + 0xeb, 0x51, 0x73, 0xc0, 0x39, 0x4b, 0xa1, 0xbf, 0x62, 0x56, 0xe8, 0xbe, 0x54, 0x47, 0xd8, 0xbf, + 0x15, 0x1d, 0x02, 0xc0, 0xfe, 0x65, 0xb2, 0xbf, 0x5c, 0x41, 0xd2, 0xbf, 0xf5, 0x60, 0xdd, 0xbf, + 0x2a, 0x42, 0xb0, 0xbf, 0x77, 0x45, 0x20, 0xc0, 0x15, 0x26, 0x32, 0xc0, 0x42, 0x69, 0xcc, 0xbf, + 0x8d, 0x8a, 0xd7, 0xbf, 0x14, 0x61, 0x20, 0xc0, 0x75, 0x1e, 0xb8, 0xbf, 0x11, 0xb1, 0x46, 0xc0, + 0xf6, 0x0c, 0x6f, 0xc0, 0x25, 0x9a, 0x60, 0xc0, 0x1e, 0xb2, 0x53, 0xc0, 0x4b, 0xe4, 0x91, 0xbf}; +unsigned char conv2d1x1s1_fp32_bias[] = { + 0xbe, 0xdf, 0x60, 0xc1, 0xef, 0x7c, 0x7b, 0xc0, 0x76, 0xeb, 0xfa, 0xbf, 0xe5, 0xc9, 0xbd, 0x40, + 0xd6, 0xa2, 0xd5, 0x40, 0x33, 0x07, 0x35, 0x41, 0x87, 0x07, 0xc9, 0x40, 0x0d, 0xa1, 0x30, 0xc1, + 0x7f, 0x11, 0x1c, 0xc1, 0xbe, 0x68, 0xcd, 0xc0, 0xa7, 0x25, 0x2c, 0xc0, 0x85, 0xbd, 0x1f, 0xc1, + 0x2b, 0x55, 0x0b, 0x40, 0xb5, 0x8d, 0x20, 0x41, 0x89, 0x57, 0xcf, 0x3f, 0x8e, 0x49, 0xe8, 0xbf, + 0x48, 0xf5, 0xbd, 0xc0, 0xeb, 0xf2, 0x02, 0xbf, 0x4d, 0xbc, 0x4d, 0x40}; +unsigned char conv2d1x1s1_fp32_out[] = { + 0xcd, 0xb0, 0x09, 0xc2, 0x00, 0x28, 0xb6, 0x40, 0xe6, 0xc5, 0x96, 0x41, 0xc4, 0xcd, 0xa1, 0xc0, + 0x3d, 0xca, 0xc4, 0xc1, 0x71, 0x4f, 0xbd, 0xc1, 0x0a, 0x29, 0xf4, 0xc1, 0x3f, 0xfb, 0x07, 0xc2, + 0x9a, 0xbc, 0xa9, 0xc0, 0x68, 0xac, 0xee, 0xc1, 0x39, 0xbc, 0xff, 0xc1, 0xbf, 0x3a, 0x55, 0x40, + 0xb4, 0x31, 0x5d, 0xc0, 0xd9, 0x8f, 0xc1, 0xc1, 0x35, 0xb0, 0xec, 0xc1, 0xc7, 0x79, 0x83, 0xc1, + 0xed, 0x8a, 0xc8, 0x41, 0xc2, 0xf8, 0xa9, 0xbe, 0x53, 0xc2, 0x34, 0xc1, 0x72, 0x6e, 0xaf, 0x40, + 0x00, 0x93, 0x4e, 0xc2, 0xca, 0x80, 0x7a, 0x41, 0x91, 0xad, 0xc6, 0x41, 0x42, 0xb8, 0xbe, 0x41, + 0x3d, 0xeb, 0x58, 0xc1, 0x35, 0xeb, 0x4d, 0xc1, 0xf8, 0x07, 0x05, 0xc2, 0x61, 0x37, 0xa2, 0xc1, + 0x21, 0xbe, 0x28, 0x41, 0x77, 0x4d, 0x0d, 0xc2, 0x9c, 0x56, 0x24, 0xc2, 0x2b, 0x5a, 0x93, 0x41, + 0x88, 0x65, 0x4e, 0xbf, 0xf8, 0x38, 0x4a, 0xc1, 0x21, 0xd6, 0x56, 0xc1, 0x52, 0x8d, 0x58, 0xc1, + 0x53, 0xad, 0x60, 0x42, 0xc8, 0x30, 0x8d, 0x41, 0x20, 0x0a, 0x6d, 0x41, 0x18, 0x7f, 0x58, 0x41, + 0xba, 0xdb, 0x03, 0xc2, 0x8e, 0xee, 0xce, 0x41, 0xf8, 0x18, 0xaa, 0x41, 0xfc, 0xec, 0xec, 0x41, + 0x65, 0x5b, 0x86, 0xc1, 0x53, 0x35, 0xdb, 0xbf, 0xdd, 0xef, 0x02, 0xc2, 0x2f, 0x79, 0x96, 0xc1, + 0x27, 0x7f, 0xe1, 0xc0, 0x4a, 0x1e, 0x15, 0xc2, 0xcb, 0x62, 0xf5, 0xc1, 0x04, 0x64, 0xb1, 0x41, + 0x63, 0x44, 0x85, 0xc1, 0x39, 0x68, 0x03, 0xc1, 0xc5, 0x7f, 0x8a, 0xc0, 0x67, 0xee, 0xa9, 0xc0, + 0xe5, 0xdb, 0xfc, 0x41, 0x93, 0x8c, 0x8f, 0x41, 0xeb, 0x0d, 0x2c, 0x41, 0xcb, 0x1b, 0x0c, 0xc0, + 0x8a, 0x8e, 0x2c, 0xc0, 0xbc, 0x96, 0x1c, 0x42, 0xf5, 0xa2, 0xe3, 0x41, 0xd8, 0x83, 0xf4, 0x41, + 0x26, 0x97, 0xaa, 0xbf, 0xa2, 0x30, 0x1c, 0xbf, 0x24, 0x13, 0xbe, 0xc0, 0x5a, 0x93, 0x0b, 0xc0, + 0x8e, 0x62, 0x9b, 0xc0, 0x9d, 0xba, 0xc5, 0xc1, 0xe8, 0x33, 0x3a, 0xc0, 0x26, 0xc3, 0xe7, 0x41, + 0xa0, 0xa0, 0xe1, 0xc0, 0xeb, 0xd0, 0xe1, 0x3f, 0x81, 0xf7, 0x32, 0x40, 0x8e, 0xcd, 0xb9, 0x40, + 0xdb, 0xba, 0xe4, 0x41, 0x25, 0x0e, 0xa4, 0x40, 0xe7, 0x3d, 0xb5, 0x41, 0xb1, 0x11, 0xad, 0x3f, + 0xa4, 0xd0, 0xf8, 0xc1, 0x05, 0xb4, 0x01, 0x42, 0xdb, 0x8f, 0xc2, 0x41, 0x53, 0x5e, 0xc0, 0x41, + 0xf4, 0x34, 0x04, 0xbf, 0x69, 0x65, 0x85, 0xc1, 0x83, 0x99, 0x3b, 0xc1, 0x92, 0x25, 0xdb, 0xbe, + 0x71, 0xb3, 0x50, 0x41, 0x7c, 0x89, 0xcb, 0xc1, 0xac, 0x47, 0x2b, 0x40, 0x88, 0x47, 0xab, 0x41, + 0xd4, 0xa8, 0xce, 0x3f, 0x4d, 0xff, 0x67, 0x41, 0xd8, 0x3d, 0x91, 0xc1, 0x68, 0xe6, 0x1a, 0x41, + 0xe6, 0x84, 0x35, 0x42, 0xee, 0xbd, 0xfa, 0x40, 0x39, 0x98, 0xf6, 0x41, 0x36, 0x41, 0xb6, 0x40, + 0x14, 0x42, 0x93, 0xc1, 0x21, 0xea, 0xef, 0x41, 0x1d, 0xde, 0x0a, 0x42, 0xe1, 0xfd, 0xbd, 0x41, + 0x22, 0xcf, 0x00, 0xc1, 0x67, 0x4f, 0x38, 0x40, 0x82, 0xf8, 0xc0, 0xc0, 0xdd, 0xc0, 0x5a, 0xc0, + 0xdd, 0xb4, 0x5b, 0x41, 0xeb, 0xb3, 0x40, 0xc1, 0xb3, 0x79, 0x05, 0xc1, 0xa9, 0x7b, 0xf1, 0x41, + 0x93, 0xa3, 0x3c, 0x41, 0x1d, 0x61, 0x5f, 0x41, 0x86, 0xe6, 0x57, 0xc1, 0xb5, 0x52, 0xef, 0x40, + 0xb6, 0xb0, 0x54, 0x42, 0x5d, 0xb3, 0x08, 0x42, 0x48, 0xad, 0x7f, 0x41, 0x3f, 0x38, 0xa3, 0x41, + 0x99, 0x4f, 0xcd, 0xc1, 0x69, 0xd1, 0xa0, 0x41, 0x51, 0xcc, 0xca, 0x41, 0xc4, 0x44, 0x9c, 0x41, + 0x63, 0x53, 0x40, 0x40, 0x28, 0xf4, 0x11, 0x3f, 0x15, 0xa3, 0xf0, 0xc0, 0x94, 0x25, 0x92, 0xc0, + 0xab, 0xd5, 0xbd, 0x41, 0x37, 0x07, 0x73, 0xc1, 0x47, 0x83, 0x82, 0xc1, 0xbf, 0xff, 0x5a, 0x41, + 0xdb, 0xd5, 0xe8, 0x40, 0x54, 0x27, 0xe6, 0xc0, 0xf7, 0x31, 0x14, 0x3f, 0x80, 0x91, 0x07, 0x41, + 0xf4, 0x7b, 0xef, 0x41, 0xb5, 0x24, 0xb9, 0x41, 0xe5, 0xaf, 0xbf, 0x40, 0x59, 0xe3, 0x6a, 0x3f, + 0x9d, 0x76, 0x6d, 0xc2, 0x5c, 0xaf, 0xfe, 0x40, 0x34, 0x35, 0xb6, 0x40, 0x31, 0x32, 0xae, 0x41, + 0xa9, 0xa2, 0xa1, 0xc1, 0xf5, 0x5a, 0x8a, 0xc1, 0xbe, 0xf9, 0x0d, 0xc2, 0x20, 0x5a, 0x1b, 0xc2, + 0xb9, 0xc4, 0x04, 0x40, 0x41, 0x2f, 0x0a, 0xc2, 0x47, 0x19, 0x24, 0xc2, 0x0e, 0x6b, 0xfc, 0x40, + 0xea, 0x12, 0xe3, 0xc1, 0x39, 0x38, 0x5e, 0xc1, 0xaa, 0x35, 0xc5, 0xc1, 0xcf, 0x2b, 0x70, 0xc1, + 0xf7, 0x8f, 0x06, 0x42, 0x83, 0x7c, 0x2f, 0x41, 0x0b, 0xf8, 0x6f, 0x3f, 0xe9, 0xad, 0x7e, 0xc0, + 0x8e, 0x9f, 0x2d, 0xc2, 0x18, 0x27, 0x12, 0x41, 0x9f, 0x08, 0x41, 0x41, 0x68, 0x6b, 0xd6, 0x40, + 0x84, 0x0c, 0xbb, 0xc1, 0x8e, 0xbb, 0x55, 0xc1, 0xe0, 0x34, 0xe0, 0xc1, 0xd9, 0x35, 0xb5, 0xc1, + 0x51, 0xdc, 0xad, 0xc0, 0x36, 0x79, 0xe0, 0xc1, 0x01, 0xb2, 0xe5, 0xc1, 0x37, 0x45, 0x61, 0x40, + 0x4b, 0xa8, 0x9c, 0xc1, 0x43, 0xd2, 0x81, 0x3f, 0x13, 0xa6, 0x02, 0xc2, 0x48, 0x16, 0xaf, 0xc1, + 0xe3, 0xdb, 0x9b, 0x41, 0x6e, 0xd5, 0xb0, 0xbe, 0x92, 0x4a, 0x19, 0x41, 0x4b, 0x4d, 0x97, 0x40, + 0x18, 0x79, 0xf1, 0xc1, 0x25, 0x82, 0xa6, 0x41, 0x87, 0xc4, 0x50, 0x41, 0xbe, 0xc1, 0xc7, 0x41, + 0xec, 0xa3, 0x50, 0xc1, 0x35, 0x73, 0xec, 0xc0, 0x97, 0x5e, 0xb6, 0xc1, 0xd2, 0x0b, 0x15, 0xc1, + 0x0e, 0xa3, 0x7c, 0xc0, 0x5b, 0x69, 0x0c, 0xc2, 0x2e, 0x37, 0xfe, 0xc1, 0x23, 0x55, 0xba, 0x41, + 0xbd, 0x01, 0xfb, 0xc0, 0x04, 0xb2, 0x1b, 0xc1, 0x57, 0x6f, 0xd4, 0xc1, 0xf4, 0xfa, 0x81, 0xc1, + 0x7d, 0x89, 0xfc, 0x41, 0x46, 0x77, 0x42, 0x41, 0xc5, 0x94, 0x8e, 0x3f, 0xb7, 0xf0, 0xa2, 0x40, + 0x72, 0x92, 0x3c, 0xc2, 0xa0, 0x6d, 0xd5, 0x41, 0xf2, 0xde, 0xbf, 0x40, 0xf6, 0xcb, 0xb4, 0x41, + 0x4e, 0x1c, 0x22, 0xc1, 0x5d, 0x57, 0x31, 0xc1, 0x75, 0x78, 0xb6, 0xc1, 0x7a, 0x22, 0x34, 0xc1, + 0x59, 0xf6, 0x14, 0x41, 0x20, 0x5d, 0xdd, 0xc1, 0xe1, 0x26, 0xfd, 0xc1, 0x6e, 0x66, 0xb0, 0x41, + 0xc3, 0xf1, 0x15, 0xc1, 0x10, 0x81, 0x36, 0xc1, 0x3e, 0x23, 0x51, 0xc1, 0xe6, 0x89, 0x19, 0xc1, + 0xd4, 0xe6, 0x0a, 0x42, 0x9a, 0x52, 0x4c, 0x41, 0x38, 0x42, 0x2a, 0x41, 0xc9, 0xf4, 0xe9, 0xc0, + 0xd9, 0x18, 0x14, 0xc2, 0x5e, 0x38, 0x2b, 0x41, 0x86, 0x50, 0x8c, 0x41, 0x22, 0xdb, 0x7a, 0x40, + 0x45, 0x02, 0x9f, 0xc1, 0xa3, 0xd8, 0x83, 0xc1, 0x45, 0xea, 0x04, 0xc2, 0x8d, 0x84, 0x18, 0xc2, + 0xee, 0x12, 0x0c, 0xc0, 0xf1, 0x98, 0xfe, 0xc1, 0x61, 0x4a, 0xe9, 0xc1, 0x8a, 0x41, 0xb9, 0x40, + 0x56, 0x2b, 0x4a, 0xc1, 0xab, 0x60, 0x91, 0xc1, 0x16, 0x87, 0x97, 0xc1, 0x93, 0xfa, 0x01, 0xc1, + 0x90, 0xa5, 0x11, 0x42, 0x94, 0x3e, 0x38, 0xbf, 0xc5, 0xd6, 0x3b, 0x40, 0xd9, 0xef, 0x06, 0xc1, + 0xae, 0x66, 0xb8, 0xc1, 0xe9, 0xde, 0x12, 0x42, 0x9e, 0x6b, 0x0a, 0x42, 0xe9, 0x80, 0xc5, 0x41, + 0xa2, 0x6d, 0x39, 0xc0, 0x3d, 0xf7, 0xb1, 0x40, 0x68, 0x73, 0x17, 0xc1, 0x4f, 0x88, 0x0f, 0xbf, + 0x6d, 0x07, 0x47, 0x41, 0x72, 0xa4, 0xa9, 0xc1, 0x94, 0xf9, 0x39, 0xc0, 0x7c, 0xa1, 0x5e, 0x41, + 0x1e, 0x0c, 0x9a, 0xc1, 0x31, 0x93, 0xdf, 0x40, 0xef, 0xaa, 0x46, 0xc1, 0x20, 0xe4, 0xad, 0xbf, + 0x09, 0xb4, 0x0d, 0x42, 0x19, 0x52, 0x42, 0xc0, 0xfa, 0x37, 0xed, 0x41, 0x45, 0x0b, 0xd3, 0x3f, + 0x68, 0x0c, 0xd6, 0xc1, 0x03, 0x1a, 0x2f, 0x42, 0x93, 0xf9, 0x0f, 0x42, 0x08, 0xe9, 0xf9, 0x41, + 0x00, 0x95, 0x7c, 0x40, 0xa3, 0xe6, 0x0d, 0xc1, 0x5f, 0x41, 0xcd, 0xc0, 0xd8, 0x16, 0xa2, 0xc1, + 0xff, 0x7f, 0xa2, 0x41, 0x68, 0x11, 0x83, 0xc1, 0x1f, 0x73, 0x92, 0xc1, 0xe1, 0x29, 0x1c, 0x42, + 0x81, 0xdb, 0x00, 0x41, 0xd0, 0x77, 0xd1, 0xbf, 0x3c, 0xed, 0xbb, 0xc0, 0x23, 0x5c, 0xb4, 0x40, + 0xb8, 0xef, 0x31, 0x42, 0xd4, 0xf1, 0x6e, 0x41, 0xf5, 0xda, 0xd4, 0x41, 0x35, 0xf7, 0xd9, 0x40, + 0x16, 0x04, 0x15, 0xc2, 0xac, 0x68, 0x99, 0x41, 0x4a, 0x88, 0xb2, 0x41, 0x70, 0xa4, 0x5e, 0x41, + 0x9a, 0xfa, 0x4d, 0xc1, 0x4e, 0x6d, 0xde, 0x3b, 0x2d, 0xae, 0xb3, 0xc1, 0x6d, 0xfd, 0x0a, 0xc1, + 0x12, 0xb2, 0x02, 0x41, 0x6e, 0x89, 0x8d, 0xc1, 0x4f, 0xc9, 0xa8, 0xc1, 0xb1, 0x23, 0x9b, 0x41, + 0x17, 0xae, 0x9a, 0xc0, 0x2b, 0x6d, 0x57, 0x40, 0x4d, 0x17, 0x54, 0xc1, 0x55, 0x4d, 0xc9, 0xc0, + 0x9c, 0x76, 0xd6, 0x41, 0x96, 0x2a, 0x63, 0x41, 0xb9, 0x6f, 0x44, 0x41, 0x3e, 0xab, 0xb4, 0xc0, + 0xc6, 0x7f, 0x0d, 0xc2, 0x90, 0x80, 0x9a, 0x41, 0xfa, 0xbc, 0x5d, 0x3f, 0x6b, 0xf7, 0xd8, 0x41, + 0x7a, 0x89, 0x9a, 0xc0, 0x07, 0xe0, 0x5a, 0xc1, 0x02, 0x99, 0x5b, 0xc1, 0x31, 0x4e, 0xef, 0xc1, + 0xda, 0xce, 0xad, 0x40, 0x20, 0xef, 0x68, 0xc1, 0x91, 0x75, 0xb1, 0xc1, 0xe9, 0xf8, 0xac, 0x41, + 0x49, 0x27, 0x8b, 0xc1, 0x30, 0xa6, 0x4f, 0xc0, 0x52, 0x0b, 0xa5, 0xc1, 0x9e, 0xb0, 0x1d, 0xc1, + 0x3c, 0xdf, 0x17, 0x42, 0x79, 0xaf, 0x0f, 0x40, 0xf4, 0xb3, 0x18, 0x41, 0x11, 0x5e, 0x12, 0x40, + 0x62, 0x44, 0x21, 0xc2, 0x60, 0x70, 0x86, 0x41, 0x30, 0x47, 0x99, 0x41, 0x80, 0xf9, 0xbd, 0x40, + 0x36, 0x2e, 0x06, 0xc1, 0x17, 0x1c, 0x01, 0xc1, 0xfb, 0x38, 0x9e, 0xc1, 0x69, 0x43, 0x71, 0xc1, + 0xe0, 0x65, 0xcb, 0x40, 0x59, 0x86, 0x7b, 0xc1, 0x66, 0x30, 0xdf, 0xc1, 0xf6, 0xe1, 0x2a, 0x41, + 0xf8, 0xd8, 0x54, 0xc1, 0xae, 0xc8, 0xeb, 0xc0, 0x22, 0x9a, 0x1c, 0xc1, 0x9d, 0x7b, 0xb0, 0xc1, + 0x8b, 0xde, 0xab, 0x41, 0x31, 0xc1, 0x0b, 0xc1, 0xcf, 0x31, 0xb1, 0x41, 0x58, 0xe0, 0x10, 0xc0, + 0xec, 0x96, 0x2c, 0xc2, 0xef, 0x7c, 0xae, 0x41, 0x5a, 0x24, 0x80, 0x41, 0x76, 0x61, 0x9a, 0x41, + 0xee, 0xae, 0x63, 0xc1, 0x87, 0xc7, 0x21, 0xc1, 0xf5, 0x8d, 0xdd, 0xc1, 0xab, 0x07, 0x90, 0xc1, + 0x8e, 0xe7, 0x11, 0x41, 0x36, 0x87, 0xb4, 0xc1, 0x29, 0xd3, 0xec, 0xc1, 0x9c, 0xe4, 0xaf, 0x41, + 0x76, 0x86, 0xdd, 0xbc, 0x28, 0x57, 0xcf, 0xc0, 0xb6, 0x38, 0x86, 0xc1, 0x0a, 0xd3, 0x2b, 0xc1, + 0x33, 0x86, 0x31, 0x42, 0x34, 0x38, 0x15, 0x41, 0x65, 0xb1, 0x36, 0x41, 0x5f, 0xfa, 0xbe, 0x40, + 0x2e, 0xce, 0xe6, 0xc1, 0x01, 0xee, 0xf5, 0x41, 0xb9, 0xc9, 0x02, 0x42, 0x62, 0xa7, 0xbe, 0x41, + 0x2f, 0xa6, 0xb9, 0xc0, 0xc7, 0x6e, 0x82, 0x40, 0xfb, 0x8d, 0x84, 0xc1, 0x26, 0xe6, 0x16, 0xc1, + 0xb0, 0x2a, 0x34, 0x41, 0xe2, 0xad, 0x8f, 0xc1, 0xa9, 0x79, 0x88, 0xc1, 0x86, 0x55, 0xa7, 0x41, + 0x6a, 0x0c, 0x1a, 0xc1, 0xd9, 0x0e, 0x9e, 0x3f, 0xcb, 0x3d, 0x84, 0xc0, 0xba, 0x11, 0xc1, 0xc0, + 0x00, 0xf9, 0x36, 0x42, 0x10, 0x0e, 0xcb, 0x40, 0x74, 0xa7, 0xd9, 0x41, 0x5f, 0xed, 0xd4, 0x40}; + +unsigned char conv2d1x1s1_fp16_in[] = { + 0xcb, 0xb0, 0x17, 0xba, 0xce, 0xbe, 0x61, 0xb7, 0xce, 0x41, 0xe0, 0xb3, 0x07, 0x30, 0xcc, 0xbd, + 0x4e, 0xb0, 0x44, 0x42, 0xcc, 0x41, 0xa4, 0xb6, 0x93, 0xc1, 0x50, 0x41, 0xba, 0x3c, 0xa4, 0x3c, + 0x78, 0xbf, 0x94, 0x3b, 0xdf, 0x42, 0x51, 0x3c, 0x1a, 0x3c, 0xa5, 0xb3, 0x73, 0xc1, 0x7b, 0xb7, + 0xff, 0x3e, 0xd6, 0x41, 0x89, 0x42, 0x72, 0x45, 0xa6, 0x3c, 0x39, 0x42, 0xa3, 0x3d, 0xa5, 0x2c, + 0xd9, 0xbc, 0x8d, 0x3d, 0x14, 0xbb, 0xd5, 0xc0, 0x27, 0xc1, 0x03, 0xc1, 0x5e, 0xb3, 0x51, 0xc1, + 0x45, 0x41, 0xed, 0x34, 0x41, 0x41, 0x7c, 0xc3, 0x6b, 0xc0, 0x1a, 0xb5, 0xbb, 0xbc, 0x7c, 0x42, + 0x80, 0xb9, 0x8e, 0xbd, 0x54, 0x43, 0x6f, 0xb9, 0x64, 0x44, 0x2e, 0x3e, 0xbd, 0xc2, 0x28, 0x40, + 0x8a, 0x3c, 0x1b, 0xab, 0xd8, 0xb8, 0xbe, 0x3b, 0xb0, 0x42, 0x4e, 0x40, 0xd7, 0x34, 0x0d, 0xbc, + 0x47, 0xbc, 0xc6, 0xbb, 0xd5, 0x3f, 0xfe, 0xb4, 0x30, 0xbe, 0x10, 0xb7, 0x6f, 0x3e, 0x77, 0xba, + 0x08, 0xb0, 0xe5, 0xbc, 0x2d, 0x3e, 0xd6, 0x38, 0x37, 0xbd, 0xf7, 0xc3, 0xfc, 0x38, 0x84, 0xb9, + 0xc4, 0x3c, 0xbc, 0xb6, 0xd9, 0x39, 0x90, 0x3d, 0x34, 0xbc, 0xd7, 0x40, 0x1a, 0xb9, 0xf6, 0xc2, + 0x7b, 0xb8, 0x1c, 0x37, 0x65, 0xbf, 0xa7, 0xbd, 0x56, 0xc0, 0xf9, 0xb6, 0x02, 0x3c, 0x54, 0xba, + 0x17, 0xb2, 0x9d, 0xb4, 0x85, 0xb7, 0x3e, 0x40, 0x46, 0x2c, 0x5a, 0xba, 0x5c, 0x3f, 0xb1, 0xbf, + 0x6e, 0x37, 0xdf, 0x3f, 0x08, 0x3e, 0xe1, 0xbc, 0xa3, 0x40, 0xc5, 0x41, 0xe2, 0xb4, 0x97, 0xbd, + 0x32, 0xb9, 0x29, 0xbd, 0xef, 0x3a, 0x7e, 0x38, 0xce, 0xc0, 0x21, 0x40, 0xe9, 0xc0, 0x5d, 0xba, + 0x47, 0x41, 0x08, 0xc0, 0x21, 0xbf, 0x74, 0xb2, 0xe2, 0xc1, 0x36, 0x3e, 0x7f, 0x38, 0xce, 0x44, + 0xd0, 0xc4, 0x12, 0xb4, 0x68, 0x3d, 0x96, 0xb4, 0xe8, 0xb8, 0x27, 0x44, 0x36, 0xba, 0xbb, 0xbf, + 0x35, 0xbd, 0x1f, 0x3f, 0x8f, 0xb4, 0xbf, 0x41, 0xe7, 0x43, 0x72, 0xbc, 0x14, 0x32, 0xa9, 0x38, + 0x0b, 0x36, 0x67, 0x3c, 0xc2, 0x30, 0x0c, 0xbe, 0xb6, 0xbc, 0xad, 0x3b, 0xa1, 0xb7, 0xcc, 0x39, + 0xab, 0x3d, 0x3f, 0xc2, 0xa1, 0x42, 0xc7, 0x34, 0x10, 0x43, 0xd6, 0xb2, 0x6d, 0xc0, 0x00, 0x3e, + 0xdf, 0xc0, 0x99, 0xb9, 0x3b, 0xc1, 0x7d, 0xbf, 0x56, 0xbf, 0xc0, 0xbf, 0x9d, 0xbc, 0xc8, 0xc1, + 0x08, 0xb8, 0x51, 0x35, 0x4e, 0x3d, 0xa6, 0xc0, 0xac, 0xc0, 0xd0, 0x3a, 0xe8, 0x34, 0x0e, 0x3c, + 0x00, 0x31, 0xde, 0xc1, 0xe7, 0x39, 0xfa, 0xc3, 0x35, 0x41, 0x9b, 0x3d, 0x8a, 0xbd, 0x89, 0x33, + 0x26, 0x40, 0x2b, 0x37, 0x2f, 0x39, 0x0a, 0x3d, 0x75, 0xbf, 0x8c, 0xb4, 0x3e, 0x44, 0xb5, 0xbd, + 0x72, 0xc3, 0xe9, 0x3a, 0xb1, 0x39, 0x24, 0x40, 0x70, 0xc6, 0x54, 0xc0, 0x56, 0xb6, 0x65, 0xc4, + 0x37, 0x3d, 0xa3, 0xbb, 0x8b, 0xb7, 0xfd, 0xb6, 0xdc, 0x3f, 0xc2, 0x3d, 0x5a, 0x3c, 0x6d, 0x3f, + 0xf8, 0xa7, 0x64, 0x2e, 0xb8, 0xbd, 0x3c, 0x3c, 0x33, 0x3a, 0x75, 0xbf, 0x8c, 0x44, 0x07, 0x38, + 0x32, 0xbf, 0xd9, 0x3f, 0x00, 0x34, 0x9f, 0xc2, 0x1f, 0xb6, 0xf6, 0xbf, 0xe3, 0x3e, 0x63, 0x3e, + 0x7d, 0x35, 0x54, 0xba, 0x54, 0x3b, 0x72, 0xbd, 0x24, 0x3c, 0xe1, 0xba, 0x51, 0xc0, 0xd9, 0x3c, + 0x92, 0x3e, 0xd2, 0xb2, 0xb8, 0xbb, 0x39, 0xb9, 0x00, 0x39, 0x1f, 0xb5, 0xff, 0xb3, 0x02, 0x3c, + 0xb1, 0xba, 0x77, 0xc2, 0x04, 0xc1, 0x72, 0xc4, 0x04, 0xb6, 0x51, 0xbf, 0x0a, 0xb8, 0x46, 0xbe, + 0x8b, 0xb1, 0xb7, 0x43, 0x54, 0xbb, 0xe1, 0xb7, 0xad, 0x42, 0x56, 0x33, 0x2a, 0xaf, 0x25, 0xbe, + 0x54, 0xc4, 0xfb, 0xb9, 0x35, 0xbd, 0xa8, 0x34, 0x4d, 0x3f, 0xf1, 0xbe, 0xcc, 0xbb, 0xdc, 0xbc, + 0x80, 0x40, 0x32, 0xc1, 0x39, 0x41, 0xcf, 0xbe, 0x4f, 0x3e, 0xb6, 0x35, 0x1e, 0x3f, 0x5b, 0xbf, + 0xac, 0x40, 0x52, 0xb5, 0x4c, 0xc1, 0x8c, 0x40, 0xee, 0xb0, 0x52, 0x3c, 0xd5, 0xc0, 0x4d, 0x3d, + 0x8d, 0xb4, 0x86, 0x3e, 0x1e, 0xc4, 0x07, 0x42, 0xca, 0xb5, 0x34, 0x35, 0x76, 0xba, 0x76, 0x41, + 0x9e, 0x24, 0x8e, 0xc0, 0x74, 0xbe, 0xb3, 0x3d, 0xbd, 0x3a, 0xd8, 0xc1, 0xae, 0x3d, 0x3e, 0x3b, + 0xd2, 0x3c, 0x13, 0x3f, 0x3e, 0xc1, 0xc5, 0xbb, 0x5a, 0xad, 0xc2, 0xbe, 0xa6, 0x3d, 0x92, 0x37, + 0xc5, 0x40, 0xca, 0x39, 0x44, 0x2f, 0x68, 0x45, 0x41, 0x3f, 0x3f, 0xb8, 0x17, 0x40, 0x45, 0xc2, + 0xf1, 0x39, 0x39, 0x3c, 0x58, 0x3d, 0x60, 0xb6, 0x56, 0xbc, 0x02, 0xc0, 0x84, 0x40, 0xa2, 0x41}; +unsigned char conv2d1x1s1_fp16_ker[] = { + 0x6c, 0xc0, 0x2f, 0xc2, 0x95, 0xbb, 0x8a, 0xbc, 0x14, 0xc2, 0xe3, 0xb1, 0x4b, 0xc0, 0x48, 0xab, + 0xd0, 0xc2, 0x16, 0xc0, 0x94, 0xc2, 0x9f, 0xc0, 0x3e, 0xbe, 0x50, 0xbf, 0x85, 0xc0, 0xcb, 0xbd, + 0x21, 0xc2, 0x2c, 0xc3, 0x43, 0xc1, 0xda, 0xc3, 0xa7, 0xc0, 0x46, 0xc2, 0x4f, 0xc1, 0x4f, 0xbf, + 0x0a, 0xc0, 0x12, 0xc4, 0xf6, 0xbc, 0x6d, 0xc2, 0x60, 0xc2, 0xe2, 0xc0, 0x68, 0xc0, 0x4f, 0xb5, + 0x83, 0xc1, 0x21, 0xc4, 0x51, 0xbf, 0xa4, 0xc2, 0xcb, 0xb9, 0x31, 0xc1, 0xc2, 0xbd, 0xed, 0xbf, + 0x7b, 0xc0, 0xe6, 0x26, 0x26, 0xb9, 0x37, 0xc0, 0x3b, 0xc2, 0xe7, 0xc3, 0xee, 0xbc, 0x4e, 0xc0, + 0xb0, 0xbf, 0x4b, 0xc1, 0x33, 0xbf, 0x85, 0x39, 0x78, 0xc1, 0x4d, 0xc2, 0x22, 0xbd, 0x17, 0xbc, + 0x04, 0xc2, 0xb2, 0x3b, 0x80, 0xbc, 0xe5, 0xbc, 0x96, 0xc2, 0xce, 0xc1, 0x99, 0xc0, 0x9e, 0xc0, + 0xc8, 0xbd, 0x0a, 0xc1, 0x40, 0xbd, 0x04, 0xbe, 0x98, 0xc4, 0xe1, 0xc2, 0x01, 0xc0, 0xbf, 0xc1, + 0x96, 0xb4, 0xbc, 0xbd, 0x37, 0xc1, 0xc8, 0xbd, 0x3e, 0xc3, 0x2f, 0xbd, 0x12, 0xc0, 0x82, 0xbb, + 0x4a, 0xc1, 0x33, 0xc1, 0xd5, 0xbb, 0x66, 0xc1, 0x69, 0xc1, 0xa1, 0xba, 0x4a, 0xb5, 0x28, 0xc0, + 0xf5, 0xbd, 0xf4, 0xc0, 0x3a, 0xc0, 0x76, 0xc0, 0x38, 0xc2, 0xd2, 0xbc, 0x63, 0xc0, 0xa7, 0xc1, + 0xb9, 0xc2, 0x4a, 0xc1, 0x79, 0xbe, 0x24, 0xc1, 0xc2, 0xbe, 0xdc, 0x37, 0x50, 0xc0, 0x8b, 0xc0, + 0x85, 0xb8, 0xd7, 0xbc, 0x84, 0x9f, 0x84, 0xbf, 0x33, 0xc0, 0xc1, 0xb8, 0xc6, 0xb8, 0x78, 0xae, + 0x96, 0xbb, 0xde, 0xc1, 0xa3, 0xc1, 0xef, 0xc3, 0xab, 0xbe, 0xff, 0xb8, 0x4c, 0xbf, 0x4d, 0xc0, + 0xca, 0xb6, 0x53, 0xc0, 0xf4, 0xc1, 0x36, 0xb8, 0x6b, 0xc3, 0xe4, 0xc0, 0x32, 0xb5, 0xbd, 0xbe, + 0xa5, 0xbe, 0x37, 0xbe, 0xeb, 0xbd, 0x35, 0xc0, 0x89, 0xb3, 0xd4, 0xc0, 0x7d, 0xbc, 0xfb, 0xc2, + 0x21, 0xc0, 0x9d, 0xc0, 0xc7, 0xc0, 0x80, 0xc0, 0x9b, 0xc0, 0x18, 0xc0, 0x54, 0xc1, 0xce, 0xbd, + 0x58, 0xc1, 0x9c, 0xbd, 0x20, 0xbf, 0xd4, 0xc1, 0xab, 0xbe, 0x22, 0xc3, 0xa3, 0xbe, 0x90, 0xbf, + 0x8f, 0xc4, 0x71, 0xbc, 0x45, 0xc0, 0x0c, 0xc0, 0xd8, 0xc1, 0x87, 0xbf, 0x35, 0xbd, 0x37, 0xc1, + 0x59, 0xc0, 0x1b, 0xbf, 0x71, 0xc1, 0x73, 0xc0, 0xd2, 0xc1, 0xa8, 0xc0, 0x58, 0xc1, 0x55, 0xc2, + 0x4e, 0xbf, 0x26, 0xc1, 0xb7, 0xbd, 0x22, 0xc2, 0x28, 0xc1, 0xf7, 0xc0, 0xc9, 0xac, 0xb9, 0xbf, + 0x62, 0xbf, 0xe5, 0xc0, 0x5c, 0xb4, 0x43, 0xc1, 0x74, 0xba, 0xa3, 0xc0, 0x42, 0xc2, 0x53, 0xb7, + 0x96, 0xb8, 0xed, 0xbf, 0xaf, 0xba, 0xa4, 0xc0, 0x31, 0xc2, 0xf8, 0xbf, 0x52, 0xc1, 0xbc, 0xbf, + 0x85, 0xbc, 0xc3, 0xb5, 0xc3, 0xb9, 0x09, 0xb6, 0xd8, 0xbc, 0xc2, 0xbe, 0xa3, 0xc1, 0x52, 0xc1, + 0xb2, 0xbe, 0xbe, 0xbd, 0x89, 0xc0, 0x20, 0xc0, 0x2e, 0xc5, 0x4d, 0xc2, 0xdf, 0xc1, 0xfb, 0xb9, + 0x31, 0xc1, 0x9b, 0xc1, 0x8e, 0xc1, 0x59, 0xb6, 0xa7, 0xc2, 0x6f, 0xc2, 0xe9, 0xc3, 0xf4, 0xc1, + 0xbd, 0xc2, 0x27, 0xbe, 0xef, 0xbf, 0x70, 0xbe, 0x55, 0xc0, 0xbd, 0xc0, 0x48, 0xc1, 0x72, 0xc2, + 0x82, 0xc1, 0x51, 0xbf, 0xfc, 0xb9, 0x66, 0xc3, 0xcc, 0xbe, 0x64, 0xbc, 0x2d, 0xbf, 0x30, 0xc1, + 0xdc, 0xb9, 0xa7, 0xbb, 0x83, 0xbc, 0xb8, 0xbd, 0x34, 0xbf, 0x2c, 0xc3, 0xb9, 0xbf, 0x6c, 0xbf, + 0xd3, 0xb2, 0xe5, 0xb5, 0x8a, 0xc2, 0x09, 0xc0, 0x90, 0xc0, 0xd3, 0xc1, 0x6e, 0xc0, 0x7a, 0xb9, + 0xa7, 0xbc, 0xc6, 0xbe, 0x1b, 0xc4, 0xcd, 0xb7, 0x42, 0xc1, 0x1e, 0xbd, 0x2a, 0xbb, 0x14, 0xc2, + 0x1f, 0xbf, 0xb3, 0xb9, 0x15, 0xc1, 0x54, 0xb8, 0x89, 0xbf, 0x3c, 0xc1, 0x58, 0xc2, 0xb6, 0xc0, + 0x1b, 0xc1, 0xf2, 0xc1, 0xac, 0xbb, 0xd1, 0xc2, 0xa2, 0xbe, 0xf0, 0xc2, 0x9a, 0xc3, 0x42, 0xb7, + 0x2d, 0xc1, 0x05, 0xc0, 0x15, 0xbf, 0x3a, 0xc2, 0x37, 0xc0, 0xcd, 0xc1, 0xab, 0xc1, 0x97, 0xbe, + 0x52, 0xbe, 0x35, 0xc1, 0x7c, 0xc1, 0x21, 0xc1, 0x6a, 0xbe, 0x02, 0xc2, 0x0a, 0xbd, 0xc2, 0xbe, + 0x10, 0xc0, 0x93, 0xbd, 0x92, 0xbe, 0xeb, 0xbe, 0x82, 0xbd, 0x02, 0xc1, 0x91, 0xc1, 0x63, 0xbe, + 0xbc, 0xbe, 0x03, 0xc1, 0xc0, 0xbd, 0x35, 0xc2, 0x78, 0xc3, 0x04, 0xc3, 0x9d, 0xc2, 0x8f, 0xbc}; +unsigned char conv2d1x1s1_fp16_ker1[] = { + 0x6c, 0xc0, 0x21, 0xc2, 0x83, 0xc1, 0xb0, 0xbf, 0xc8, 0xbd, 0x4a, 0xc1, 0xb9, 0xc2, 0x96, 0xbb, + 0x2f, 0xc2, 0x2c, 0xc3, 0x21, 0xc4, 0x4b, 0xc1, 0x0a, 0xc1, 0x33, 0xc1, 0x4a, 0xc1, 0xde, 0xc1, + 0x95, 0xbb, 0x43, 0xc1, 0x51, 0xbf, 0x33, 0xbf, 0x40, 0xbd, 0xd5, 0xbb, 0x79, 0xbe, 0xa3, 0xc1, + 0x8a, 0xbc, 0xda, 0xc3, 0xa4, 0xc2, 0x85, 0x39, 0x04, 0xbe, 0x66, 0xc1, 0x24, 0xc1, 0xef, 0xc3, + 0x14, 0xc2, 0xa7, 0xc0, 0xcb, 0xb9, 0x78, 0xc1, 0x98, 0xc4, 0x69, 0xc1, 0xc2, 0xbe, 0xab, 0xbe, + 0xe3, 0xb1, 0x46, 0xc2, 0x31, 0xc1, 0x4d, 0xc2, 0xe1, 0xc2, 0xa1, 0xba, 0xdc, 0x37, 0xff, 0xb8, + 0x4b, 0xc0, 0x4f, 0xc1, 0xc2, 0xbd, 0x22, 0xbd, 0x01, 0xc0, 0x4a, 0xb5, 0x50, 0xc0, 0x4c, 0xbf, + 0x48, 0xab, 0x4f, 0xbf, 0xed, 0xbf, 0x17, 0xbc, 0xbf, 0xc1, 0x28, 0xc0, 0x8b, 0xc0, 0x4d, 0xc0, + 0xd0, 0xc2, 0x0a, 0xc0, 0x7b, 0xc0, 0x04, 0xc2, 0x96, 0xb4, 0xf5, 0xbd, 0x85, 0xb8, 0xca, 0xb6, + 0x16, 0xc0, 0x12, 0xc4, 0xe6, 0x26, 0xb2, 0x3b, 0xbc, 0xbd, 0xf4, 0xc0, 0xd7, 0xbc, 0x53, 0xc0, + 0x94, 0xc2, 0xf6, 0xbc, 0x26, 0xb9, 0x80, 0xbc, 0x37, 0xc1, 0x3a, 0xc0, 0x84, 0x9f, 0xf4, 0xc1, + 0x9f, 0xc0, 0x6d, 0xc2, 0x37, 0xc0, 0xe5, 0xbc, 0xc8, 0xbd, 0x76, 0xc0, 0x84, 0xbf, 0x36, 0xb8, + 0x3e, 0xbe, 0x60, 0xc2, 0x3b, 0xc2, 0x96, 0xc2, 0x3e, 0xc3, 0x38, 0xc2, 0x33, 0xc0, 0x6b, 0xc3, + 0x50, 0xbf, 0xe2, 0xc0, 0xe7, 0xc3, 0xce, 0xc1, 0x2f, 0xbd, 0xd2, 0xbc, 0xc1, 0xb8, 0xe4, 0xc0, + 0x85, 0xc0, 0x68, 0xc0, 0xee, 0xbc, 0x99, 0xc0, 0x12, 0xc0, 0x63, 0xc0, 0xc6, 0xb8, 0x32, 0xb5, + 0xcb, 0xbd, 0x4f, 0xb5, 0x4e, 0xc0, 0x9e, 0xc0, 0x82, 0xbb, 0xa7, 0xc1, 0x78, 0xae, 0xbd, 0xbe, + 0xa5, 0xbe, 0x58, 0xc1, 0x59, 0xc0, 0x62, 0xbf, 0x85, 0xbc, 0x31, 0xc1, 0x82, 0xc1, 0xd3, 0xb2, + 0x37, 0xbe, 0x9c, 0xbd, 0x1b, 0xbf, 0xe5, 0xc0, 0xc3, 0xb5, 0x9b, 0xc1, 0x51, 0xbf, 0xe5, 0xb5, + 0xeb, 0xbd, 0x20, 0xbf, 0x71, 0xc1, 0x5c, 0xb4, 0xc3, 0xb9, 0x8e, 0xc1, 0xfc, 0xb9, 0x8a, 0xc2, + 0x35, 0xc0, 0xd4, 0xc1, 0x73, 0xc0, 0x43, 0xc1, 0x09, 0xb6, 0x59, 0xb6, 0x66, 0xc3, 0x09, 0xc0, + 0x89, 0xb3, 0xab, 0xbe, 0xd2, 0xc1, 0x74, 0xba, 0xd8, 0xbc, 0xa7, 0xc2, 0xcc, 0xbe, 0x90, 0xc0, + 0xd4, 0xc0, 0x22, 0xc3, 0xa8, 0xc0, 0xa3, 0xc0, 0xc2, 0xbe, 0x6f, 0xc2, 0x64, 0xbc, 0xd3, 0xc1, + 0x7d, 0xbc, 0xa3, 0xbe, 0x58, 0xc1, 0x42, 0xc2, 0xa3, 0xc1, 0xe9, 0xc3, 0x2d, 0xbf, 0x6e, 0xc0, + 0xfb, 0xc2, 0x90, 0xbf, 0x55, 0xc2, 0x53, 0xb7, 0x52, 0xc1, 0xf4, 0xc1, 0x30, 0xc1, 0x7a, 0xb9, + 0x21, 0xc0, 0x8f, 0xc4, 0x4e, 0xbf, 0x96, 0xb8, 0xb2, 0xbe, 0xbd, 0xc2, 0xdc, 0xb9, 0xa7, 0xbc, + 0x9d, 0xc0, 0x71, 0xbc, 0x26, 0xc1, 0xed, 0xbf, 0xbe, 0xbd, 0x27, 0xbe, 0xa7, 0xbb, 0xc6, 0xbe, + 0xc7, 0xc0, 0x45, 0xc0, 0xb7, 0xbd, 0xaf, 0xba, 0x89, 0xc0, 0xef, 0xbf, 0x83, 0xbc, 0x1b, 0xc4, + 0x80, 0xc0, 0x0c, 0xc0, 0x22, 0xc2, 0xa4, 0xc0, 0x20, 0xc0, 0x70, 0xbe, 0xb8, 0xbd, 0xcd, 0xb7, + 0x9b, 0xc0, 0xd8, 0xc1, 0x28, 0xc1, 0x31, 0xc2, 0x2e, 0xc5, 0x55, 0xc0, 0x34, 0xbf, 0x42, 0xc1, + 0x18, 0xc0, 0x87, 0xbf, 0xf7, 0xc0, 0xf8, 0xbf, 0x4d, 0xc2, 0xbd, 0xc0, 0x2c, 0xc3, 0x1e, 0xbd, + 0x54, 0xc1, 0x35, 0xbd, 0xc9, 0xac, 0x52, 0xc1, 0xdf, 0xc1, 0x48, 0xc1, 0xb9, 0xbf, 0x2a, 0xbb, + 0xce, 0xbd, 0x37, 0xc1, 0xb9, 0xbf, 0xbc, 0xbf, 0xfb, 0xb9, 0x72, 0xc2, 0x6c, 0xbf, 0x14, 0xc2, + 0x1f, 0xbf, 0x2d, 0xc1, 0xb3, 0xb9, 0x05, 0xc0, 0x15, 0xc1, 0x15, 0xbf, 0x54, 0xb8, 0x3a, 0xc2, + 0x89, 0xbf, 0x37, 0xc0, 0x3c, 0xc1, 0xcd, 0xc1, 0x58, 0xc2, 0xab, 0xc1, 0xb6, 0xc0, 0x97, 0xbe, + 0x1b, 0xc1, 0x52, 0xbe, 0xf2, 0xc1, 0x35, 0xc1, 0xac, 0xbb, 0x7c, 0xc1, 0xd1, 0xc2, 0x21, 0xc1, + 0xa2, 0xbe, 0x6a, 0xbe, 0xf0, 0xc2, 0x02, 0xc2, 0x9a, 0xc3, 0x0a, 0xbd, 0x42, 0xb7, 0xc2, 0xbe, + 0x10, 0xc0, 0x93, 0xbd, 0x92, 0xbe, 0xeb, 0xbe, 0x82, 0xbd, 0x02, 0xc1, 0x91, 0xc1, 0x63, 0xbe, + 0xbc, 0xbe, 0x03, 0xc1, 0xc0, 0xbd, 0x35, 0xc2, 0x78, 0xc3, 0x04, 0xc3, 0x9d, 0xc2, 0x8f, 0xbc}; +unsigned char conv2d1x1s1_fp16_bias[] = {0x06, 0xcb, 0xdb, 0xc3, 0xd7, 0xbf, 0xee, 0x45, 0xad, 0x46, + 0xa8, 0x49, 0x48, 0x46, 0x85, 0xc9, 0xe0, 0xc8, 0x6b, 0xc6, + 0x61, 0xc1, 0xfd, 0xc8, 0x5a, 0x40, 0x04, 0x49, 0x7a, 0x3e, + 0x42, 0xbf, 0xef, 0xc5, 0x17, 0xb8, 0x6d, 0x42}; +unsigned char conv2d1x1s1_fp16_out[] = { + 0x4d, 0xd0, 0xb1, 0x45, 0xb6, 0x4c, 0x0e, 0xc5, 0x26, 0xce, 0xea, 0xcd, 0xa1, 0xcf, 0x3f, 0xd0, + 0x4d, 0xc5, 0x75, 0xcf, 0xfd, 0xcf, 0xa9, 0x42, 0xe9, 0xc2, 0x0c, 0xce, 0x65, 0xcf, 0x1b, 0xcc, + 0x44, 0x4e, 0x4f, 0xb5, 0xa6, 0xc9, 0x7b, 0x45, 0x74, 0xd2, 0xd4, 0x4b, 0x35, 0x4e, 0xf5, 0x4d, + 0xc7, 0xca, 0x6f, 0xca, 0x28, 0xd0, 0x11, 0xcd, 0x45, 0x49, 0x6a, 0xd0, 0x22, 0xd1, 0x9a, 0x4c, + 0x73, 0xba, 0x51, 0xca, 0xb6, 0xca, 0xc4, 0xca, 0x05, 0x53, 0x69, 0x4c, 0x68, 0x4b, 0xc3, 0x4a, + 0x1e, 0xd0, 0x77, 0x4e, 0x50, 0x4d, 0x67, 0x4f, 0x32, 0xcc, 0xd9, 0xbe, 0x17, 0xd0, 0xb3, 0xcc, + 0x0b, 0xc7, 0xa8, 0xd0, 0xab, 0xcf, 0x8b, 0x4d, 0x2a, 0xcc, 0x1b, 0xc8, 0x53, 0xc4, 0x4f, 0xc5, + 0xe6, 0x4f, 0x7c, 0x4c, 0x60, 0x49, 0x60, 0xc0, 0x64, 0xc1, 0xe4, 0x50, 0x1d, 0x4f, 0xa4, 0x4f, + 0x54, 0xbd, 0xe1, 0xb8, 0xf0, 0xc5, 0x5c, 0xc0, 0xdb, 0xc4, 0x2d, 0xce, 0xd1, 0xc1, 0x3e, 0x4f, + 0x0d, 0xc7, 0x0e, 0x3f, 0x97, 0x41, 0xce, 0x45, 0x25, 0x4f, 0x20, 0x45, 0xa9, 0x4d, 0x68, 0x3d, + 0xc6, 0xcf, 0x0d, 0x50, 0x14, 0x4e, 0x02, 0x4e, 0x21, 0xb8, 0x2b, 0xcc, 0xdc, 0xc9, 0xd9, 0xb6, + 0x85, 0x4a, 0x5c, 0xce, 0x5a, 0x41, 0x5a, 0x4d, 0x75, 0x3e, 0x3f, 0x4b, 0x89, 0xcc, 0xd7, 0x48, + 0xac, 0x51, 0xd5, 0x47, 0xb4, 0x4f, 0xb2, 0x45, 0x9a, 0xcc, 0x7f, 0x4f, 0x56, 0x50, 0xef, 0x4d, + 0x06, 0xc8, 0xc2, 0x41, 0x07, 0xc6, 0xd6, 0xc2, 0xdd, 0x4a, 0x05, 0xca, 0x2b, 0xc8, 0x8b, 0x4f, + 0xe5, 0x49, 0xfb, 0x4a, 0xbf, 0xca, 0x7a, 0x47, 0xa5, 0x52, 0x45, 0x50, 0xfd, 0x4b, 0x19, 0x4d, + 0x6a, 0xce, 0x06, 0x4d, 0x56, 0x4e, 0xe2, 0x4c, 0x02, 0x42, 0x8f, 0x38, 0x85, 0xc7, 0x91, 0xc4, + 0xee, 0x4d, 0x98, 0xcb, 0x14, 0xcc, 0xd7, 0x4a, 0x46, 0x47, 0x31, 0xc7, 0xa1, 0x38, 0x3c, 0x48, + 0x7b, 0x4f, 0xc9, 0x4d, 0xfd, 0x45, 0x57, 0x3b, 0x6b, 0xd3, 0xf5, 0x47, 0xb1, 0x45, 0x71, 0x4d, + 0x0d, 0xcd, 0x52, 0xcc, 0x6f, 0xd0, 0xda, 0xd0, 0x26, 0x40, 0x51, 0xd0, 0x20, 0xd1, 0xe3, 0x47, + 0x18, 0xcf, 0xf1, 0xca, 0x29, 0xce, 0x81, 0xcb, 0x34, 0x50, 0x7b, 0x49, 0x7f, 0x3b, 0xf5, 0xc3, + 0x6c, 0xd1, 0x91, 0x48, 0x08, 0x4a, 0xb3, 0x46, 0xd8, 0xcd, 0xad, 0xca, 0x01, 0xcf, 0xa9, 0xcd, + 0x6e, 0xc5, 0x03, 0xcf, 0x2d, 0xcf, 0x0a, 0x43, 0xe5, 0xcc, 0x0e, 0x3c, 0x15, 0xd0, 0x78, 0xcd, + 0xde, 0x4c, 0x86, 0xb5, 0xca, 0x48, 0xba, 0x44, 0x8b, 0xcf, 0x34, 0x4d, 0x86, 0x4a, 0x3e, 0x4e, + 0x85, 0xca, 0x63, 0xc7, 0xb2, 0xcd, 0xa8, 0xc8, 0xe5, 0xc3, 0x63, 0xd0, 0xf1, 0xcf, 0xd2, 0x4d, + 0xd8, 0xc7, 0xdd, 0xc8, 0xa3, 0xce, 0x0f, 0xcc, 0xe4, 0x4f, 0x13, 0x4a, 0x74, 0x3c, 0x17, 0x45, + 0xe4, 0xd1, 0xab, 0x4e, 0xfe, 0x45, 0xa6, 0x4d, 0x10, 0xc9, 0x8a, 0xc9, 0xb3, 0xcd, 0xa1, 0xc9, + 0xa7, 0x48, 0xea, 0xce, 0xe9, 0xcf, 0x83, 0x4d, 0xaf, 0xc8, 0xb4, 0xc9, 0x89, 0xca, 0xcc, 0xc8, + 0x57, 0x50, 0x62, 0x4a, 0x52, 0x49, 0x4f, 0xc7, 0xa0, 0xd0, 0x59, 0x49, 0x62, 0x4c, 0xd6, 0x43, + 0xf8, 0xcc, 0x1e, 0xcc, 0x27, 0xd0, 0xc4, 0xd0, 0x60, 0xc0, 0xf4, 0xcf, 0x4a, 0xcf, 0xca, 0x45, + 0x51, 0xca, 0x8b, 0xcc, 0xbc, 0xcc, 0x0f, 0xc8, 0x8d, 0x50, 0xc1, 0xb9, 0xde, 0x41, 0x37, 0xc8, + 0xc3, 0xcd, 0x96, 0x50, 0x53, 0x50, 0x2c, 0x4e, 0xcb, 0xc1, 0x8f, 0x45, 0xbb, 0xc8, 0x7c, 0xb8, + 0x38, 0x4a, 0x4d, 0xcd, 0xcf, 0xc1, 0xf5, 0x4a, 0xd0, 0xcc, 0xfc, 0x46, 0x35, 0xca, 0x6f, 0xbd, + 0x6d, 0x50, 0x12, 0xc2, 0x69, 0x4f, 0x98, 0x3e, 0xb0, 0xce, 0x78, 0x51, 0x7f, 0x50, 0xcf, 0x4f, + 0xe4, 0x43, 0x6f, 0xc8, 0x6a, 0xc6, 0x10, 0xcd, 0x13, 0x4d, 0x18, 0xcc, 0x93, 0xcc, 0xe1, 0x50, + 0x06, 0x48, 0x8b, 0xbe, 0xdf, 0xc5, 0xa2, 0x45, 0x8f, 0x51, 0x77, 0x4b, 0xa6, 0x4e, 0xcf, 0x46, + 0xa8, 0xd0, 0xcb, 0x4c, 0x94, 0x4d, 0xf5, 0x4a, 0x6f, 0xca, 0xf3, 0x1e, 0x9d, 0xcd, 0x57, 0xc8, + 0x15, 0x48, 0x6c, 0xcc, 0x46, 0xcd, 0xd9, 0x4c, 0xd5, 0xc4, 0xbb, 0x42, 0xa0, 0xca, 0x4a, 0xc6, + 0xb3, 0x4e, 0x19, 0x4b, 0x23, 0x4a, 0xa5, 0xc5, 0x6b, 0xd0, 0xd4, 0x4c, 0xed, 0x3a, 0xc7, 0x4e, + 0xd4, 0xc4, 0xd7, 0xca, 0xdc, 0xca, 0x7a, 0xcf, 0x6e, 0x45, 0x47, 0xcb, 0x8b, 0xcd, 0x67, 0x4d, + 0x59, 0xcc, 0x7d, 0xc2, 0x28, 0xcd, 0xed, 0xc8, 0xbe, 0x50, 0x7d, 0x40, 0xc5, 0x48, 0x92, 0x40, + 0x0a, 0xd1, 0x33, 0x4c, 0xca, 0x4c, 0xef, 0x45, 0x31, 0xc8, 0x08, 0xc8, 0xf1, 0xcc, 0x8a, 0xcb, + 0x5b, 0x46, 0xdc, 0xcb, 0xf9, 0xce, 0x57, 0x49, 0xa6, 0xca, 0x5e, 0xc7, 0xe4, 0xc8, 0x83, 0xcd, + 0x5e, 0x4d, 0x5e, 0xc8, 0x89, 0x4d, 0x87, 0xc0, 0x64, 0xd1, 0x73, 0x4d, 0x01, 0x4c, 0xd3, 0x4c, + 0x1d, 0xcb, 0x0e, 0xc9, 0xec, 0xce, 0x80, 0xcc, 0x8f, 0x48, 0xa4, 0xcd, 0x66, 0xcf, 0x7f, 0x4d, + 0xec, 0xa6, 0x7a, 0xc6, 0x31, 0xcc, 0x5e, 0xc9, 0x8c, 0x51, 0xa9, 0x48, 0xb5, 0x49, 0xf7, 0x45, + 0x36, 0xcf, 0xaf, 0x4f, 0x16, 0x50, 0xf5, 0x4d, 0xcd, 0xc5, 0x13, 0x44, 0x24, 0xcc, 0xb7, 0xc8, + 0xa1, 0x49, 0x7d, 0xcc, 0x43, 0xcc, 0x3a, 0x4d, 0xd0, 0xc8, 0xf0, 0x3c, 0x21, 0xc4, 0x08, 0xc6, + 0xb7, 0x51, 0x58, 0x46, 0xcd, 0x4e, 0xa7, 0x46}; + +/*********************************** conv2d_im2col_gemm *****************************/ +// input: [1, 3, 4, 5] +// kernel: [19, 3, 3, 3] +// bias: [19] +// output: [1, 19, 4, 5] +// pad_left = pad_right = pad_top = pad_down = 1 + +unsigned char conv2d_im2col_fp32_in[] = { + 0xe4, 0xd2, 0xb8, 0x3e, 0x70, 0x5d, 0x7b, 0x40, 0x6e, 0x9a, 0xd7, 0xbf, 0xaf, 0xf3, 0x90, 0xc0, + 0x3c, 0xae, 0x89, 0xbf, 0x8c, 0x4a, 0x5e, 0xbc, 0x07, 0xd9, 0x4e, 0xc0, 0x0d, 0xaf, 0x0a, 0xbf, + 0xdb, 0x48, 0x3e, 0x40, 0xdd, 0xbd, 0x8b, 0xc0, 0xf3, 0xf5, 0x66, 0x3f, 0xf2, 0xb1, 0x92, 0x3f, + 0x94, 0xd1, 0xd9, 0xbe, 0xd6, 0xc9, 0xda, 0xbf, 0xec, 0x35, 0x8f, 0x3e, 0x13, 0x68, 0xee, 0xbf, + 0xf1, 0x37, 0x43, 0x3e, 0x79, 0x92, 0xb2, 0xbe, 0x5f, 0xe9, 0x83, 0x3e, 0x34, 0xae, 0xfa, 0xbe, + 0x3b, 0x68, 0xe8, 0xbf, 0x68, 0x45, 0x17, 0x3f, 0x40, 0x16, 0x12, 0xc0, 0x78, 0xd2, 0x0d, 0xc0, + 0xc1, 0x90, 0x31, 0x3f, 0x09, 0x8b, 0x76, 0xc0, 0x0e, 0x5d, 0x17, 0x3e, 0xc2, 0xe2, 0x89, 0x3f, + 0xd0, 0xca, 0x1d, 0xc0, 0xe5, 0xe0, 0x40, 0xbf, 0xa5, 0xf3, 0xc1, 0xbe, 0xeb, 0x09, 0xf0, 0x3f, + 0x1b, 0x6c, 0x5f, 0xc0, 0xe5, 0x09, 0x3e, 0xc0, 0x67, 0x6c, 0x21, 0x40, 0xd1, 0xdc, 0x8f, 0x3f, + 0x00, 0xf3, 0x63, 0xc0, 0x7a, 0xb6, 0xb1, 0x3f, 0x88, 0xdd, 0x27, 0xbf, 0xef, 0xe6, 0xc1, 0xbf, + 0xd9, 0x05, 0x04, 0x40, 0xda, 0x68, 0x44, 0xc0, 0x12, 0x84, 0xe9, 0x3f, 0x22, 0x5c, 0x0a, 0xc0, + 0x9f, 0xc3, 0xe7, 0x3e, 0x3a, 0xf4, 0x88, 0xc0, 0x39, 0x6a, 0x85, 0x3f, 0xd6, 0xcf, 0x00, 0xc0, + 0x7e, 0x91, 0x63, 0x40, 0xe8, 0xab, 0xb6, 0xbf, 0x05, 0xda, 0x0b, 0xc0, 0x8d, 0xdc, 0xbf, 0x3f, + 0x91, 0xe2, 0xe2, 0xbe, 0x18, 0xf4, 0x21, 0xbf, 0x97, 0xb2, 0x80, 0xc0, 0x3d, 0xed, 0x86, 0xc0, + 0x37, 0x35, 0x35, 0xc0, 0x56, 0xd3, 0x7d, 0xc0, 0x83, 0x85, 0xb2, 0x3e, 0x11, 0xaa, 0x38, 0xbf}; +unsigned char conv2d_im2col_fp32_ker[] = { + 0xd9, 0x16, 0xb5, 0x40, 0xcf, 0x2b, 0x00, 0x3f, 0xdd, 0xb5, 0x9b, 0x40, 0xd7, 0x2f, 0x05, 0x40, + 0xdc, 0xe5, 0x27, 0xc0, 0x85, 0x9d, 0x36, 0x40, 0x92, 0x9e, 0x10, 0x40, 0x2c, 0xd2, 0x74, 0x40, + 0x90, 0x53, 0x8d, 0x3f, 0x0c, 0x31, 0x44, 0x40, 0xc6, 0x0a, 0x25, 0xbf, 0x6e, 0x44, 0x0b, 0x40, + 0x96, 0x3e, 0xa5, 0x3f, 0x22, 0xc9, 0x15, 0x40, 0x0a, 0xaa, 0x94, 0x40, 0x7a, 0x88, 0x55, 0x40, + 0x1c, 0x93, 0x00, 0x3f, 0xf2, 0x36, 0x93, 0xbf, 0x4c, 0x30, 0xb4, 0x3f, 0x47, 0x9c, 0xf0, 0xbe, + 0xe8, 0xa2, 0x8a, 0x40, 0xf7, 0xb0, 0xac, 0x40, 0x3a, 0x07, 0xb5, 0x3f, 0x2c, 0x43, 0x24, 0x40, + 0x51, 0xb1, 0xa3, 0xbf, 0x34, 0x6a, 0x5b, 0x40, 0xa3, 0x1f, 0x20, 0x40, 0x8a, 0x37, 0x36, 0xc0, + 0x3c, 0x0b, 0x80, 0x40, 0xed, 0x02, 0x7c, 0x40, 0x7f, 0x1a, 0x94, 0xbe, 0xe3, 0xa1, 0xda, 0x3f, + 0xc7, 0x17, 0x39, 0x40, 0xe2, 0xc6, 0x43, 0x40, 0xbc, 0x2d, 0x60, 0x40, 0x19, 0x86, 0x65, 0x40, + 0x5a, 0x01, 0xa9, 0xbe, 0xa8, 0x69, 0x5a, 0x40, 0xa8, 0x99, 0xbf, 0x3e, 0xf1, 0x7e, 0x51, 0xbf, + 0x54, 0xfb, 0xcb, 0x3d, 0xd8, 0x04, 0x5c, 0x3e, 0x4c, 0x16, 0x06, 0x40, 0x4f, 0x4c, 0x65, 0x40, + 0xe7, 0x16, 0xc0, 0x40, 0x25, 0xbf, 0xd4, 0x40, 0xc7, 0x53, 0x4d, 0x40, 0xaf, 0xa4, 0x1c, 0x40, + 0x1e, 0x40, 0x3e, 0xbf, 0x22, 0x62, 0x3f, 0xbf, 0x10, 0x5b, 0x51, 0x40, 0x96, 0xb6, 0x9a, 0x3f, + 0x8e, 0xca, 0x81, 0x3f, 0x36, 0x7c, 0x09, 0xc0, 0x20, 0xb2, 0x01, 0x40, 0xcb, 0x0d, 0xf9, 0x3f, + 0x88, 0x58, 0x58, 0xc0, 0x03, 0x67, 0x50, 0x40, 0xfc, 0x42, 0x8b, 0x3f, 0xa3, 0xef, 0x0b, 0x3f, + 0x0e, 0x00, 0xc8, 0x40, 0x18, 0x24, 0x5e, 0xbf, 0x3b, 0xac, 0xaa, 0xbe, 0xb7, 0x0f, 0xaa, 0x40, + 0x64, 0x1d, 0xf7, 0x3f, 0x34, 0xf6, 0x32, 0xbf, 0x28, 0x81, 0x84, 0xbf, 0x9c, 0x6a, 0x29, 0x40, + 0xb6, 0x6a, 0x37, 0xbf, 0x70, 0x16, 0x9c, 0x40, 0xf1, 0xc0, 0x25, 0x3f, 0x83, 0x55, 0xee, 0xbf, + 0x49, 0xb7, 0xab, 0x40, 0x8f, 0x97, 0x6c, 0x40, 0x0c, 0x2e, 0x22, 0x40, 0x2a, 0xd3, 0xda, 0x3d, + 0x62, 0x76, 0xab, 0x40, 0xb2, 0xf6, 0x05, 0xbf, 0x09, 0x0b, 0x4f, 0x40, 0xb6, 0x78, 0x4a, 0x40, + 0xb3, 0x73, 0x6c, 0x40, 0xf6, 0x1d, 0x33, 0x40, 0xc1, 0x30, 0x54, 0x3f, 0x9c, 0x3f, 0xd7, 0x3f, + 0x1c, 0x00, 0x15, 0x40, 0x06, 0x27, 0xd9, 0x3f, 0x2e, 0x34, 0x47, 0x40, 0x01, 0xe7, 0x66, 0x3f, + 0x4a, 0xe8, 0x81, 0xbf, 0x22, 0x94, 0x07, 0x40, 0x05, 0xd0, 0xa5, 0x40, 0x07, 0x21, 0x62, 0x40, + 0xaa, 0x8d, 0x88, 0x40, 0x43, 0x91, 0x39, 0x40, 0x4b, 0xcf, 0x01, 0x40, 0x74, 0x75, 0x66, 0x40, + 0x2b, 0x10, 0x4d, 0x3e, 0x77, 0x57, 0xab, 0x3f, 0x22, 0xbf, 0x2d, 0xbf, 0x5e, 0x1d, 0x12, 0x40, + 0xca, 0x1e, 0xa8, 0x40, 0x80, 0x61, 0xf0, 0x3f, 0x83, 0xc5, 0x3a, 0x40, 0xca, 0xd9, 0xca, 0x3f, + 0xd0, 0xec, 0x5a, 0x40, 0x92, 0xba, 0x6c, 0x40, 0x65, 0x0c, 0x0d, 0x40, 0xe0, 0x15, 0x1a, 0x3e, + 0x3a, 0xaa, 0xc5, 0x3f, 0xc5, 0x97, 0x81, 0x40, 0x59, 0x76, 0xcc, 0x40, 0x1b, 0x9c, 0xae, 0x3f, + 0xfa, 0xcf, 0x6b, 0xbf, 0xea, 0x81, 0xb6, 0xc0, 0xe5, 0x27, 0x64, 0x3f, 0xb0, 0x57, 0xad, 0xbf, + 0x91, 0xf5, 0x1d, 0x40, 0x6c, 0x75, 0xab, 0x3f, 0x7e, 0x02, 0x0e, 0xbf, 0xbb, 0xd1, 0x0b, 0x40, + 0xa2, 0x12, 0xab, 0x40, 0x11, 0xec, 0x0a, 0x40, 0x3c, 0xdd, 0x29, 0x40, 0x35, 0xad, 0x9c, 0x3f, + 0xd6, 0x14, 0x31, 0x40, 0xfa, 0x37, 0x47, 0x40, 0xb9, 0x32, 0xc2, 0x3f, 0x31, 0x21, 0x73, 0x40, + 0xd2, 0x17, 0x6f, 0x40, 0xf7, 0x0a, 0xc6, 0xbf, 0xeb, 0x0a, 0xcb, 0x3f, 0xd8, 0x47, 0x6a, 0x40, + 0x9d, 0x0a, 0x10, 0x3f, 0x27, 0xe0, 0x2f, 0x40, 0x53, 0x0f, 0x1a, 0x40, 0x2e, 0xc6, 0x6c, 0x40, + 0xad, 0x87, 0x84, 0x3f, 0x0c, 0xb0, 0x81, 0x40, 0xa1, 0x0e, 0x46, 0x40, 0x2f, 0x5f, 0xa2, 0xbf, + 0x7e, 0x8e, 0x9f, 0xbf, 0x48, 0xd1, 0x80, 0x3f, 0xde, 0x42, 0xa7, 0x3f, 0x75, 0x21, 0x39, 0x40, + 0x71, 0x8f, 0x18, 0x3f, 0x1e, 0x79, 0x49, 0x40, 0xdb, 0xad, 0x01, 0x40, 0xf0, 0xa6, 0x15, 0xc0, + 0x38, 0x33, 0x29, 0x40, 0xa6, 0xcd, 0x14, 0x40, 0x8f, 0x5b, 0x6a, 0x40, 0x2c, 0xad, 0xa5, 0x40, + 0x7c, 0x66, 0xbd, 0x3e, 0xab, 0x85, 0x9a, 0x3e, 0x51, 0xc2, 0xd6, 0xbd, 0xd9, 0xbc, 0x18, 0xc0, + 0xcc, 0xbd, 0x03, 0x40, 0x96, 0xba, 0x5d, 0x40, 0x32, 0x33, 0x4b, 0x3f, 0xef, 0xe3, 0x4f, 0x40, + 0x1d, 0xab, 0x3b, 0x40, 0xdb, 0x3d, 0xe7, 0x3f, 0x0f, 0xe7, 0x86, 0x3e, 0x0b, 0xd2, 0xe2, 0x3f, + 0x92, 0xe0, 0xab, 0x3f, 0x7b, 0x46, 0x98, 0x3f, 0x1f, 0x1b, 0x16, 0xbf, 0x9e, 0x67, 0x07, 0x40, + 0xd9, 0xc0, 0x25, 0x40, 0x8c, 0x36, 0x2e, 0x40, 0x69, 0x4f, 0x21, 0xc0, 0x50, 0xed, 0x97, 0x40, + 0x20, 0xf4, 0x02, 0x40, 0xe5, 0x4b, 0x2a, 0x40, 0xd4, 0x0d, 0x25, 0x40, 0x55, 0x22, 0x56, 0x40, + 0xa5, 0x82, 0x1c, 0x40, 0x40, 0x0a, 0xec, 0x3f, 0xee, 0x61, 0x03, 0x3f, 0x42, 0x3e, 0x9f, 0x40, + 0xfb, 0x1a, 0x32, 0x40, 0x1f, 0x02, 0x85, 0xc0, 0xf6, 0x09, 0x33, 0x3f, 0xa8, 0x3f, 0x26, 0x40, + 0x39, 0x08, 0x65, 0x40, 0x06, 0x49, 0x33, 0x40, 0xeb, 0xf7, 0x13, 0xc0, 0x6e, 0xbc, 0x19, 0x3f, + 0x33, 0xbd, 0xa2, 0x3f, 0x5b, 0xd5, 0xd6, 0x3f, 0x64, 0xb6, 0x7c, 0x40, 0x79, 0xe5, 0xc9, 0x40, + 0xc6, 0x3b, 0x01, 0x40, 0x47, 0x3e, 0x6a, 0x40, 0xfe, 0xee, 0x73, 0x40, 0x4c, 0x73, 0xa8, 0x3f, + 0xf7, 0x40, 0x22, 0x40, 0xed, 0x87, 0x92, 0xbf, 0x52, 0x79, 0xdf, 0x3f, 0x2f, 0x60, 0x14, 0x40, + 0x34, 0xa9, 0x7c, 0x40, 0xda, 0x56, 0x27, 0x40, 0xb8, 0x26, 0x7f, 0x40, 0x12, 0x80, 0x07, 0x40, + 0x9b, 0x91, 0x96, 0x40, 0x49, 0xeb, 0x8c, 0xbf, 0xb4, 0xb3, 0xaf, 0x40, 0x61, 0xfb, 0x86, 0x3f, + 0x59, 0x56, 0x07, 0xbf, 0xac, 0x77, 0x2a, 0x3f, 0x04, 0x82, 0x24, 0x40, 0xeb, 0x10, 0x74, 0x40, + 0xb4, 0x74, 0x6a, 0x3f, 0x22, 0xbc, 0x81, 0x3c, 0x72, 0x77, 0x96, 0x40, 0x95, 0xa7, 0x43, 0x40, + 0xe5, 0xf9, 0xe6, 0x3f, 0xa1, 0x6f, 0x06, 0x3f, 0x07, 0x9a, 0x12, 0x40, 0x79, 0x94, 0x56, 0x40, + 0x04, 0x1e, 0x9d, 0xbd, 0xe5, 0x1d, 0x8f, 0x40, 0xdb, 0xb3, 0x79, 0x40, 0x08, 0x5b, 0x68, 0xbf, + 0x49, 0x48, 0xb4, 0x3f, 0xa1, 0x22, 0xa4, 0x40, 0x9b, 0xf5, 0x59, 0x40, 0xe7, 0x88, 0x84, 0x3f, + 0x16, 0xba, 0x98, 0x40, 0x16, 0xe7, 0x85, 0x40, 0xe0, 0xf4, 0x30, 0x40, 0x26, 0x9b, 0xdf, 0x40, + 0xad, 0xb1, 0x84, 0x40, 0x54, 0xb0, 0x8e, 0x3f, 0xdc, 0x73, 0xec, 0x3f, 0x38, 0x66, 0xda, 0x3f, + 0x8b, 0x87, 0x85, 0x3f, 0x5e, 0x95, 0x4e, 0x40, 0x8c, 0x72, 0xca, 0x3f, 0x8b, 0x64, 0xaa, 0x3f, + 0x54, 0x63, 0xb3, 0x3f, 0xc7, 0xf8, 0xed, 0x3f, 0x9d, 0x00, 0x4a, 0x40, 0x53, 0xa3, 0x0f, 0x3f, + 0x7c, 0xf4, 0x61, 0x40, 0x3e, 0x30, 0x27, 0x40, 0x3e, 0x1c, 0x98, 0x3f, 0xc9, 0x63, 0xa7, 0x40, + 0xeb, 0x1c, 0x8d, 0x3e, 0x4a, 0x38, 0xaf, 0x3c, 0x67, 0x03, 0x0a, 0x40, 0x85, 0x55, 0x56, 0xbc, + 0xe9, 0x64, 0x68, 0x40, 0x53, 0x2e, 0x02, 0x3f, 0x83, 0x0d, 0xf8, 0x3f, 0xe4, 0xa8, 0x48, 0x3f, + 0x5a, 0xe1, 0xc6, 0x3e, 0xc4, 0xc2, 0xc7, 0x3f, 0xb8, 0xa9, 0x53, 0x40, 0xfd, 0xbe, 0xfe, 0xbf, + 0x87, 0xe6, 0xd6, 0x40, 0xd7, 0x02, 0xdc, 0x3f, 0xb7, 0xde, 0x5b, 0xbd, 0x1a, 0x73, 0x9e, 0x3e, + 0xe9, 0x18, 0xd1, 0x3f, 0x56, 0x81, 0xac, 0x3f, 0xce, 0x29, 0x6b, 0x3f, 0xce, 0xa8, 0xdf, 0xbe, + 0x81, 0x44, 0x94, 0x3f, 0xc4, 0xfd, 0xbf, 0xbf, 0xc7, 0x00, 0xde, 0x3f, 0x53, 0xe3, 0xcb, 0xbc, + 0x46, 0xfc, 0xbf, 0x3f, 0x96, 0xff, 0x09, 0x40, 0xa6, 0x89, 0xfc, 0xbf, 0xb7, 0x4e, 0x67, 0x40, + 0x93, 0x56, 0x7c, 0x40, 0x38, 0xa3, 0x05, 0x40, 0x68, 0x95, 0xf7, 0x3f, 0xdb, 0x29, 0x89, 0x40, + 0x4a, 0x4a, 0xa2, 0x3f, 0xb0, 0xd8, 0x0f, 0xbf, 0x21, 0x43, 0xcb, 0x3d, 0x86, 0xff, 0x1d, 0x40, + 0xd5, 0x32, 0xfe, 0x3f, 0xf1, 0xb8, 0x40, 0x40, 0xa1, 0x7d, 0x9e, 0x3f, 0xe6, 0x93, 0x83, 0x40, + 0xa4, 0xfa, 0x2f, 0x3e, 0xe6, 0x0f, 0x3f, 0x40, 0x0d, 0x42, 0xa6, 0xbf, 0x17, 0x50, 0x28, 0x40, + 0x26, 0x2e, 0x1a, 0x40, 0xe8, 0x8c, 0x34, 0x3f, 0x92, 0x6b, 0xcb, 0x3f, 0x85, 0x31, 0x2e, 0x40, + 0x0e, 0x77, 0xac, 0x3f, 0x13, 0x0f, 0x6d, 0x40, 0xba, 0x81, 0x88, 0x3f, 0x3c, 0x99, 0x88, 0x40, + 0x9f, 0xec, 0x15, 0x40, 0xd8, 0xea, 0xd5, 0x3f, 0x2d, 0x98, 0x01, 0x40, 0x49, 0xe2, 0xdc, 0x3c, + 0x75, 0x93, 0xe4, 0xbf, 0xb4, 0x9b, 0x9b, 0x3f, 0xb2, 0x1c, 0x12, 0x3f, 0x2e, 0x30, 0x32, 0x3f, + 0x60, 0xd1, 0x07, 0x40, 0x76, 0x8a, 0xd9, 0x3f, 0x81, 0xd8, 0xf3, 0x3f, 0x00, 0x44, 0xab, 0xbd, + 0xef, 0xdc, 0xb3, 0x40, 0x8a, 0xcd, 0x61, 0x40, 0x86, 0x4a, 0x62, 0x40, 0x30, 0xf6, 0xd2, 0x3e, + 0xcc, 0x89, 0x5c, 0x40, 0x11, 0xf9, 0x7c, 0x40, 0x74, 0xd5, 0xbf, 0xbf, 0x7b, 0x00, 0xb3, 0x40, + 0x58, 0x76, 0x89, 0x40, 0x3b, 0x29, 0x6a, 0x40, 0x97, 0x28, 0xc4, 0x3f, 0xdd, 0xea, 0xe8, 0x3f, + 0xfb, 0xc2, 0x6c, 0x40, 0x2e, 0x00, 0xa6, 0x3f, 0xc3, 0x8c, 0xd9, 0x3f, 0xca, 0xcc, 0x0f, 0x3f, + 0xa1, 0x32, 0x97, 0x40, 0xf9, 0x76, 0x46, 0x3f, 0x89, 0x30, 0xc1, 0x40, 0x86, 0xea, 0x76, 0xbe, + 0xc2, 0x66, 0xec, 0x3f, 0x40, 0x1c, 0x96, 0x3f, 0x4b, 0x83, 0xa7, 0x3e, 0x79, 0x92, 0x08, 0x40, + 0x85, 0xf8, 0x17, 0x40, 0x12, 0xb4, 0x92, 0x40, 0x62, 0xd5, 0x22, 0x40, 0xf0, 0xb9, 0x5d, 0x40, + 0x71, 0x96, 0x53, 0x40, 0xee, 0x0f, 0x39, 0xc0, 0xb9, 0xd7, 0x96, 0x3f, 0xfb, 0xb6, 0x3a, 0x40, + 0xc5, 0x1c, 0x1b, 0x40, 0x4a, 0x60, 0x83, 0x40, 0xf0, 0x9a, 0x75, 0x3f, 0x03, 0x08, 0x21, 0x41, + 0xb8, 0x07, 0xc0, 0x3f, 0x19, 0x12, 0x8d, 0x3e, 0xee, 0xa0, 0xbe, 0x40, 0xa4, 0x1a, 0xbf, 0x3f, + 0xd9, 0x42, 0x18, 0x40, 0x54, 0x02, 0x75, 0x40, 0xad, 0xe7, 0x4d, 0xbe, 0x1d, 0xe5, 0x2d, 0x40, + 0x1a, 0x0b, 0xeb, 0xbf, 0xfb, 0x9b, 0xae, 0x3f, 0x2f, 0x5e, 0xd0, 0x3f, 0x11, 0x71, 0x11, 0x40, + 0x22, 0x02, 0xf0, 0xbf, 0xd3, 0xf7, 0xbe, 0x3f, 0xbe, 0xf9, 0xd3, 0x3f, 0x02, 0xab, 0xe0, 0x3f, + 0x99, 0xf1, 0x7c, 0x3f, 0x8c, 0x48, 0xfa, 0x3f, 0x13, 0xee, 0xb1, 0x40, 0xe0, 0x15, 0x35, 0x40, + 0x72, 0xce, 0xc6, 0x3f, 0x26, 0x58, 0x0f, 0x40, 0x41, 0xbd, 0xa3, 0x40, 0x4c, 0xbf, 0x7b, 0x40, + 0x79, 0x00, 0x7e, 0x40, 0x76, 0xd8, 0xa1, 0x40, 0xa1, 0xf9, 0x71, 0x40, 0xc9, 0x40, 0x49, 0x40, + 0x1d, 0x24, 0xde, 0xbf, 0xac, 0x37, 0x86, 0x40, 0x22, 0x7c, 0x0b, 0x40, 0xc4, 0x39, 0x00, 0x3f, + 0x0a, 0xd7, 0x43, 0x40, 0x1a, 0xfe, 0xc4, 0x40, 0x21, 0x98, 0x97, 0x40, 0x76, 0x4e, 0x52, 0x40, + 0x74, 0xad, 0x8b, 0x40, 0xf6, 0x4b, 0x28, 0x40, 0x5c, 0x17, 0x14, 0x40, 0x34, 0x1b, 0x4b, 0x3f, + 0x39, 0xb9, 0x75, 0x40, 0xf6, 0x8c, 0x8a, 0x40, 0xc3, 0xad, 0x1e, 0xbd, 0x79, 0x37, 0x90, 0x3f, + 0x0b, 0x6e, 0xd1, 0x3f, 0x88, 0x1b, 0x30, 0x40, 0x4d, 0xc9, 0x2b, 0x40, 0xfb, 0x02, 0xfc, 0x3f, + 0x27, 0x26, 0x47, 0x40, 0x4b, 0x98, 0xa2, 0x40, 0x79, 0xd2, 0x15, 0x3f, 0x93, 0x62, 0xc0, 0xbf, + 0xe4, 0x2d, 0x87, 0x40, 0x6a, 0xe9, 0x81, 0x40, 0xca, 0xad, 0x01, 0xbe, 0x09, 0xed, 0x47, 0x40, + 0xc5, 0xf5, 0x64, 0x40, 0xcf, 0xa3, 0x78, 0x40, 0x73, 0x3a, 0x2c, 0x40, 0x06, 0xe6, 0x1b, 0xc0, + 0x61, 0x65, 0x1a, 0x40, 0xbb, 0x28, 0x80, 0x3f, 0xfc, 0xbe, 0x12, 0x40, 0x03, 0x2c, 0x0b, 0x40, + 0xb6, 0x52, 0x8c, 0x40, 0x11, 0x09, 0x36, 0x40, 0x9d, 0xac, 0xd0, 0x3f, 0xd2, 0x31, 0x8c, 0xbf, + 0xca, 0xbb, 0xa0, 0x40, 0x0f, 0x14, 0x7f, 0xbf, 0x6a, 0xd0, 0x67, 0x40, 0x1d, 0x64, 0x8b, 0x40, + 0x96, 0x7b, 0x96, 0xbf, 0x7a, 0x2f, 0xa2, 0x3f, 0x23, 0xf1, 0xb0, 0xbf, 0x47, 0x4c, 0xae, 0x3d, + 0x16, 0x53, 0x90, 0x40, 0xee, 0xe3, 0x8f, 0x40, 0xdd, 0xd5, 0x59, 0x40, 0x6a, 0xf5, 0x4a, 0x40, + 0xc1, 0x28, 0x62, 0x3f, 0xba, 0x44, 0x69, 0x40, 0x0b, 0xf7, 0x21, 0x40, 0xae, 0xdd, 0x92, 0x3f, + 0xa4, 0x29, 0x8c, 0x3e, 0xda, 0x14, 0x5b, 0xbd, 0x9a, 0x44, 0x93, 0x40, 0xf2, 0x24, 0x41, 0x3f, + 0x1a, 0x86, 0x10, 0x40, 0x1c, 0x3b, 0x82, 0xbf, 0x5f, 0xa6, 0x1c, 0x40, 0xf3, 0x23, 0x25, 0x40, + 0xc7, 0xa0, 0x01, 0x40, 0x21, 0x9d, 0xa9, 0x40, 0xc3, 0xd9, 0xa1, 0x3e, 0xa8, 0x6a, 0x89, 0x3f, + 0x93, 0x68, 0x24, 0xbd, 0x5c, 0x39, 0x77, 0x40, 0x00, 0xed, 0xe3, 0x3f, 0x95, 0x38, 0x04, 0x40, + 0xe2, 0x7b, 0x87, 0x40, 0xf9, 0x1a, 0x26, 0x40, 0xf3, 0xa1, 0xb4, 0x40, 0x5a, 0xb6, 0x5a, 0x40, + 0xd9, 0xe1, 0x91, 0x40, 0x0b, 0x1d, 0x5c, 0x40, 0xb1, 0xee, 0x25, 0xbf, 0x7b, 0xc9, 0xeb, 0x3f, + 0xdc, 0x6a, 0xe0, 0x3f, 0xa3, 0x02, 0xfa, 0x3f, 0x20, 0xdc, 0x25, 0x40, 0x4c, 0xbe, 0xa6, 0x3f, + 0x48, 0x14, 0x78, 0x3f, 0x85, 0x07, 0xb8, 0xbe, 0x42, 0x09, 0x24, 0x40, 0xa1, 0xf5, 0x70, 0x40, + 0xe8, 0x04, 0x7f, 0x40, 0x1c, 0x58, 0x13, 0xc0, 0x69, 0x6b, 0x0c, 0x3f, 0x64, 0x18, 0x47, 0x40, + 0xae, 0x19, 0x4b, 0x3f, 0x0d, 0x49, 0x87, 0x40, 0x12, 0xb0, 0x2b, 0x3f, 0x7f, 0xab, 0x03, 0x40, + 0x03, 0xed, 0x3a, 0xbf, 0x4f, 0xce, 0xb2, 0x40, 0xd6, 0xc1, 0xab, 0x3d, 0x2c, 0x93, 0xdc, 0x40, + 0x2e, 0x1a, 0x94, 0x3f, 0x36, 0xd4, 0x4d, 0x40, 0x76, 0x09, 0x89, 0x40, 0x11, 0x51, 0x45, 0x40, + 0x7c, 0xfa, 0xdd, 0x40, 0x55, 0x6a, 0x72, 0xc0, 0xc3, 0x60, 0x0f, 0xbf, 0x24, 0xb9, 0x58, 0x40, + 0x83, 0x79, 0x52, 0xbe, 0x3e, 0xee, 0x06, 0x40, 0x47, 0x3b, 0x71, 0x40, 0x65, 0x9c, 0x89, 0x40, + 0x9e, 0x71, 0x87, 0x40, 0x93, 0x36, 0x5b, 0x3f, 0x89, 0x16, 0x26, 0x3f, 0x97, 0x61, 0x12, 0xc0, + 0xa9, 0x91, 0x23, 0xc0, 0x91, 0x50, 0x06, 0x3f, 0x1a, 0xd7, 0x2a, 0x40, 0xa9, 0xa2, 0x94, 0x40, + 0x9f, 0xd9, 0x9c, 0xbf, 0xab, 0x96, 0xe8, 0xbf, 0x31, 0xc5, 0x8e, 0x3f, 0xc8, 0x44, 0xc6, 0x3e, + 0x3c, 0x71, 0x08, 0x40, 0x1d, 0xdd, 0x99, 0x3f, 0x31, 0x4f, 0x9d, 0x3f, 0xdc, 0x20, 0x79, 0x40, + 0x09, 0xb6, 0x08, 0x40, 0xed, 0xde, 0x78, 0x40, 0xdd, 0x3b, 0xa2, 0x3f, 0x84, 0x7b, 0x0b, 0x40, + 0x84, 0x16, 0x04, 0x40, 0x06, 0x57, 0xf7, 0xbe, 0xd4, 0x88, 0x82, 0x40, 0x1a, 0xef, 0xad, 0x3e, + 0x85, 0x33, 0x92, 0x40}; +unsigned char conv2d_im2col_fp32_ker1[] = { + 0xd9, 0x16, 0xb5, 0x40, 0x8a, 0x37, 0x36, 0xc0, 0x20, 0xb2, 0x01, 0x40, 0xf6, 0x1d, 0x33, 0x40, + 0x3a, 0xaa, 0xc5, 0x3f, 0x2e, 0xc6, 0x6c, 0x40, 0x0f, 0xe7, 0x86, 0x3e, 0x5b, 0xd5, 0xd6, 0x3f, + 0xcf, 0x2b, 0x00, 0x3f, 0x3c, 0x0b, 0x80, 0x40, 0xcb, 0x0d, 0xf9, 0x3f, 0xc1, 0x30, 0x54, 0x3f, + 0xc5, 0x97, 0x81, 0x40, 0xad, 0x87, 0x84, 0x3f, 0x0b, 0xd2, 0xe2, 0x3f, 0x64, 0xb6, 0x7c, 0x40, + 0xdd, 0xb5, 0x9b, 0x40, 0xed, 0x02, 0x7c, 0x40, 0x88, 0x58, 0x58, 0xc0, 0x9c, 0x3f, 0xd7, 0x3f, + 0x59, 0x76, 0xcc, 0x40, 0x0c, 0xb0, 0x81, 0x40, 0x92, 0xe0, 0xab, 0x3f, 0x79, 0xe5, 0xc9, 0x40, + 0xd7, 0x2f, 0x05, 0x40, 0x7f, 0x1a, 0x94, 0xbe, 0x03, 0x67, 0x50, 0x40, 0x1c, 0x00, 0x15, 0x40, + 0x1b, 0x9c, 0xae, 0x3f, 0xa1, 0x0e, 0x46, 0x40, 0x7b, 0x46, 0x98, 0x3f, 0xc6, 0x3b, 0x01, 0x40, + 0xdc, 0xe5, 0x27, 0xc0, 0xe3, 0xa1, 0xda, 0x3f, 0xfc, 0x42, 0x8b, 0x3f, 0x06, 0x27, 0xd9, 0x3f, + 0xfa, 0xcf, 0x6b, 0xbf, 0x2f, 0x5f, 0xa2, 0xbf, 0x1f, 0x1b, 0x16, 0xbf, 0x47, 0x3e, 0x6a, 0x40, + 0x85, 0x9d, 0x36, 0x40, 0xc7, 0x17, 0x39, 0x40, 0xa3, 0xef, 0x0b, 0x3f, 0x2e, 0x34, 0x47, 0x40, + 0xea, 0x81, 0xb6, 0xc0, 0x7e, 0x8e, 0x9f, 0xbf, 0x9e, 0x67, 0x07, 0x40, 0xfe, 0xee, 0x73, 0x40, + 0x92, 0x9e, 0x10, 0x40, 0xe2, 0xc6, 0x43, 0x40, 0x0e, 0x00, 0xc8, 0x40, 0x01, 0xe7, 0x66, 0x3f, + 0xe5, 0x27, 0x64, 0x3f, 0x48, 0xd1, 0x80, 0x3f, 0xd9, 0xc0, 0x25, 0x40, 0x4c, 0x73, 0xa8, 0x3f, + 0x2c, 0xd2, 0x74, 0x40, 0xbc, 0x2d, 0x60, 0x40, 0x18, 0x24, 0x5e, 0xbf, 0x4a, 0xe8, 0x81, 0xbf, + 0xb0, 0x57, 0xad, 0xbf, 0xde, 0x42, 0xa7, 0x3f, 0x8c, 0x36, 0x2e, 0x40, 0xf7, 0x40, 0x22, 0x40, + 0x90, 0x53, 0x8d, 0x3f, 0x19, 0x86, 0x65, 0x40, 0x3b, 0xac, 0xaa, 0xbe, 0x22, 0x94, 0x07, 0x40, + 0x91, 0xf5, 0x1d, 0x40, 0x75, 0x21, 0x39, 0x40, 0x69, 0x4f, 0x21, 0xc0, 0xed, 0x87, 0x92, 0xbf, + 0x0c, 0x31, 0x44, 0x40, 0x5a, 0x01, 0xa9, 0xbe, 0xb7, 0x0f, 0xaa, 0x40, 0x05, 0xd0, 0xa5, 0x40, + 0x6c, 0x75, 0xab, 0x3f, 0x71, 0x8f, 0x18, 0x3f, 0x50, 0xed, 0x97, 0x40, 0x52, 0x79, 0xdf, 0x3f, + 0xc6, 0x0a, 0x25, 0xbf, 0xa8, 0x69, 0x5a, 0x40, 0x64, 0x1d, 0xf7, 0x3f, 0x07, 0x21, 0x62, 0x40, + 0x7e, 0x02, 0x0e, 0xbf, 0x1e, 0x79, 0x49, 0x40, 0x20, 0xf4, 0x02, 0x40, 0x2f, 0x60, 0x14, 0x40, + 0x6e, 0x44, 0x0b, 0x40, 0xa8, 0x99, 0xbf, 0x3e, 0x34, 0xf6, 0x32, 0xbf, 0xaa, 0x8d, 0x88, 0x40, + 0xbb, 0xd1, 0x0b, 0x40, 0xdb, 0xad, 0x01, 0x40, 0xe5, 0x4b, 0x2a, 0x40, 0x34, 0xa9, 0x7c, 0x40, + 0x96, 0x3e, 0xa5, 0x3f, 0xf1, 0x7e, 0x51, 0xbf, 0x28, 0x81, 0x84, 0xbf, 0x43, 0x91, 0x39, 0x40, + 0xa2, 0x12, 0xab, 0x40, 0xf0, 0xa6, 0x15, 0xc0, 0xd4, 0x0d, 0x25, 0x40, 0xda, 0x56, 0x27, 0x40, + 0x22, 0xc9, 0x15, 0x40, 0x54, 0xfb, 0xcb, 0x3d, 0x9c, 0x6a, 0x29, 0x40, 0x4b, 0xcf, 0x01, 0x40, + 0x11, 0xec, 0x0a, 0x40, 0x38, 0x33, 0x29, 0x40, 0x55, 0x22, 0x56, 0x40, 0xb8, 0x26, 0x7f, 0x40, + 0x0a, 0xaa, 0x94, 0x40, 0xd8, 0x04, 0x5c, 0x3e, 0xb6, 0x6a, 0x37, 0xbf, 0x74, 0x75, 0x66, 0x40, + 0x3c, 0xdd, 0x29, 0x40, 0xa6, 0xcd, 0x14, 0x40, 0xa5, 0x82, 0x1c, 0x40, 0x12, 0x80, 0x07, 0x40, + 0x7a, 0x88, 0x55, 0x40, 0x4c, 0x16, 0x06, 0x40, 0x70, 0x16, 0x9c, 0x40, 0x2b, 0x10, 0x4d, 0x3e, + 0x35, 0xad, 0x9c, 0x3f, 0x8f, 0x5b, 0x6a, 0x40, 0x40, 0x0a, 0xec, 0x3f, 0x9b, 0x91, 0x96, 0x40, + 0x1c, 0x93, 0x00, 0x3f, 0x4f, 0x4c, 0x65, 0x40, 0xf1, 0xc0, 0x25, 0x3f, 0x77, 0x57, 0xab, 0x3f, + 0xd6, 0x14, 0x31, 0x40, 0x2c, 0xad, 0xa5, 0x40, 0xee, 0x61, 0x03, 0x3f, 0x49, 0xeb, 0x8c, 0xbf, + 0xf2, 0x36, 0x93, 0xbf, 0xe7, 0x16, 0xc0, 0x40, 0x83, 0x55, 0xee, 0xbf, 0x22, 0xbf, 0x2d, 0xbf, + 0xfa, 0x37, 0x47, 0x40, 0x7c, 0x66, 0xbd, 0x3e, 0x42, 0x3e, 0x9f, 0x40, 0xb4, 0xb3, 0xaf, 0x40, + 0x4c, 0x30, 0xb4, 0x3f, 0x25, 0xbf, 0xd4, 0x40, 0x49, 0xb7, 0xab, 0x40, 0x5e, 0x1d, 0x12, 0x40, + 0xb9, 0x32, 0xc2, 0x3f, 0xab, 0x85, 0x9a, 0x3e, 0xfb, 0x1a, 0x32, 0x40, 0x61, 0xfb, 0x86, 0x3f, + 0x47, 0x9c, 0xf0, 0xbe, 0xc7, 0x53, 0x4d, 0x40, 0x8f, 0x97, 0x6c, 0x40, 0xca, 0x1e, 0xa8, 0x40, + 0x31, 0x21, 0x73, 0x40, 0x51, 0xc2, 0xd6, 0xbd, 0x1f, 0x02, 0x85, 0xc0, 0x59, 0x56, 0x07, 0xbf, + 0xe8, 0xa2, 0x8a, 0x40, 0xaf, 0xa4, 0x1c, 0x40, 0x0c, 0x2e, 0x22, 0x40, 0x80, 0x61, 0xf0, 0x3f, + 0xd2, 0x17, 0x6f, 0x40, 0xd9, 0xbc, 0x18, 0xc0, 0xf6, 0x09, 0x33, 0x3f, 0xac, 0x77, 0x2a, 0x3f, + 0xf7, 0xb0, 0xac, 0x40, 0x1e, 0x40, 0x3e, 0xbf, 0x2a, 0xd3, 0xda, 0x3d, 0x83, 0xc5, 0x3a, 0x40, + 0xf7, 0x0a, 0xc6, 0xbf, 0xcc, 0xbd, 0x03, 0x40, 0xa8, 0x3f, 0x26, 0x40, 0x04, 0x82, 0x24, 0x40, + 0x3a, 0x07, 0xb5, 0x3f, 0x22, 0x62, 0x3f, 0xbf, 0x62, 0x76, 0xab, 0x40, 0xca, 0xd9, 0xca, 0x3f, + 0xeb, 0x0a, 0xcb, 0x3f, 0x96, 0xba, 0x5d, 0x40, 0x39, 0x08, 0x65, 0x40, 0xeb, 0x10, 0x74, 0x40, + 0x2c, 0x43, 0x24, 0x40, 0x10, 0x5b, 0x51, 0x40, 0xb2, 0xf6, 0x05, 0xbf, 0xd0, 0xec, 0x5a, 0x40, + 0xd8, 0x47, 0x6a, 0x40, 0x32, 0x33, 0x4b, 0x3f, 0x06, 0x49, 0x33, 0x40, 0xb4, 0x74, 0x6a, 0x3f, + 0x51, 0xb1, 0xa3, 0xbf, 0x96, 0xb6, 0x9a, 0x3f, 0x09, 0x0b, 0x4f, 0x40, 0x92, 0xba, 0x6c, 0x40, + 0x9d, 0x0a, 0x10, 0x3f, 0xef, 0xe3, 0x4f, 0x40, 0xeb, 0xf7, 0x13, 0xc0, 0x22, 0xbc, 0x81, 0x3c, + 0x34, 0x6a, 0x5b, 0x40, 0x8e, 0xca, 0x81, 0x3f, 0xb6, 0x78, 0x4a, 0x40, 0x65, 0x0c, 0x0d, 0x40, + 0x27, 0xe0, 0x2f, 0x40, 0x1d, 0xab, 0x3b, 0x40, 0x6e, 0xbc, 0x19, 0x3f, 0x72, 0x77, 0x96, 0x40, + 0xa3, 0x1f, 0x20, 0x40, 0x36, 0x7c, 0x09, 0xc0, 0xb3, 0x73, 0x6c, 0x40, 0xe0, 0x15, 0x1a, 0x3e, + 0x53, 0x0f, 0x1a, 0x40, 0xdb, 0x3d, 0xe7, 0x3f, 0x33, 0xbd, 0xa2, 0x3f, 0x95, 0xa7, 0x43, 0x40, + 0xe5, 0xf9, 0xe6, 0x3f, 0x53, 0xa3, 0x0f, 0x3f, 0xc7, 0x00, 0xde, 0x3f, 0x13, 0x0f, 0x6d, 0x40, + 0xfb, 0xc2, 0x6c, 0x40, 0xa4, 0x1a, 0xbf, 0x3f, 0x22, 0x7c, 0x0b, 0x40, 0xcf, 0xa3, 0x78, 0x40, + 0xa1, 0x6f, 0x06, 0x3f, 0x7c, 0xf4, 0x61, 0x40, 0x53, 0xe3, 0xcb, 0xbc, 0xba, 0x81, 0x88, 0x3f, + 0x2e, 0x00, 0xa6, 0x3f, 0xd9, 0x42, 0x18, 0x40, 0xc4, 0x39, 0x00, 0x3f, 0x73, 0x3a, 0x2c, 0x40, + 0x07, 0x9a, 0x12, 0x40, 0x3e, 0x30, 0x27, 0x40, 0x46, 0xfc, 0xbf, 0x3f, 0x3c, 0x99, 0x88, 0x40, + 0xc3, 0x8c, 0xd9, 0x3f, 0x54, 0x02, 0x75, 0x40, 0x0a, 0xd7, 0x43, 0x40, 0x06, 0xe6, 0x1b, 0xc0, + 0x79, 0x94, 0x56, 0x40, 0x3e, 0x1c, 0x98, 0x3f, 0x96, 0xff, 0x09, 0x40, 0x9f, 0xec, 0x15, 0x40, + 0xca, 0xcc, 0x0f, 0x3f, 0xad, 0xe7, 0x4d, 0xbe, 0x1a, 0xfe, 0xc4, 0x40, 0x61, 0x65, 0x1a, 0x40, + 0x04, 0x1e, 0x9d, 0xbd, 0xc9, 0x63, 0xa7, 0x40, 0xa6, 0x89, 0xfc, 0xbf, 0xd8, 0xea, 0xd5, 0x3f, + 0xa1, 0x32, 0x97, 0x40, 0x1d, 0xe5, 0x2d, 0x40, 0x21, 0x98, 0x97, 0x40, 0xbb, 0x28, 0x80, 0x3f, + 0xe5, 0x1d, 0x8f, 0x40, 0xeb, 0x1c, 0x8d, 0x3e, 0xb7, 0x4e, 0x67, 0x40, 0x2d, 0x98, 0x01, 0x40, + 0xf9, 0x76, 0x46, 0x3f, 0x1a, 0x0b, 0xeb, 0xbf, 0x76, 0x4e, 0x52, 0x40, 0xfc, 0xbe, 0x12, 0x40, + 0xdb, 0xb3, 0x79, 0x40, 0x4a, 0x38, 0xaf, 0x3c, 0x93, 0x56, 0x7c, 0x40, 0x49, 0xe2, 0xdc, 0x3c, + 0x89, 0x30, 0xc1, 0x40, 0xfb, 0x9b, 0xae, 0x3f, 0x74, 0xad, 0x8b, 0x40, 0x03, 0x2c, 0x0b, 0x40, + 0x08, 0x5b, 0x68, 0xbf, 0x67, 0x03, 0x0a, 0x40, 0x38, 0xa3, 0x05, 0x40, 0x75, 0x93, 0xe4, 0xbf, + 0x86, 0xea, 0x76, 0xbe, 0x2f, 0x5e, 0xd0, 0x3f, 0xf6, 0x4b, 0x28, 0x40, 0xb6, 0x52, 0x8c, 0x40, + 0x49, 0x48, 0xb4, 0x3f, 0x85, 0x55, 0x56, 0xbc, 0x68, 0x95, 0xf7, 0x3f, 0xb4, 0x9b, 0x9b, 0x3f, + 0xc2, 0x66, 0xec, 0x3f, 0x11, 0x71, 0x11, 0x40, 0x5c, 0x17, 0x14, 0x40, 0x11, 0x09, 0x36, 0x40, + 0xa1, 0x22, 0xa4, 0x40, 0xe9, 0x64, 0x68, 0x40, 0xdb, 0x29, 0x89, 0x40, 0xb2, 0x1c, 0x12, 0x3f, + 0x40, 0x1c, 0x96, 0x3f, 0x22, 0x02, 0xf0, 0xbf, 0x34, 0x1b, 0x4b, 0x3f, 0x9d, 0xac, 0xd0, 0x3f, + 0x9b, 0xf5, 0x59, 0x40, 0x53, 0x2e, 0x02, 0x3f, 0x4a, 0x4a, 0xa2, 0x3f, 0x2e, 0x30, 0x32, 0x3f, + 0x4b, 0x83, 0xa7, 0x3e, 0xd3, 0xf7, 0xbe, 0x3f, 0x39, 0xb9, 0x75, 0x40, 0xd2, 0x31, 0x8c, 0xbf, + 0xe7, 0x88, 0x84, 0x3f, 0x83, 0x0d, 0xf8, 0x3f, 0xb0, 0xd8, 0x0f, 0xbf, 0x60, 0xd1, 0x07, 0x40, + 0x79, 0x92, 0x08, 0x40, 0xbe, 0xf9, 0xd3, 0x3f, 0xf6, 0x8c, 0x8a, 0x40, 0xca, 0xbb, 0xa0, 0x40, + 0x16, 0xba, 0x98, 0x40, 0xe4, 0xa8, 0x48, 0x3f, 0x21, 0x43, 0xcb, 0x3d, 0x76, 0x8a, 0xd9, 0x3f, + 0x85, 0xf8, 0x17, 0x40, 0x02, 0xab, 0xe0, 0x3f, 0xc3, 0xad, 0x1e, 0xbd, 0x0f, 0x14, 0x7f, 0xbf, + 0x16, 0xe7, 0x85, 0x40, 0x5a, 0xe1, 0xc6, 0x3e, 0x86, 0xff, 0x1d, 0x40, 0x81, 0xd8, 0xf3, 0x3f, + 0x12, 0xb4, 0x92, 0x40, 0x99, 0xf1, 0x7c, 0x3f, 0x79, 0x37, 0x90, 0x3f, 0x6a, 0xd0, 0x67, 0x40, + 0xe0, 0xf4, 0x30, 0x40, 0xc4, 0xc2, 0xc7, 0x3f, 0xd5, 0x32, 0xfe, 0x3f, 0x00, 0x44, 0xab, 0xbd, + 0x62, 0xd5, 0x22, 0x40, 0x8c, 0x48, 0xfa, 0x3f, 0x0b, 0x6e, 0xd1, 0x3f, 0x1d, 0x64, 0x8b, 0x40, + 0x26, 0x9b, 0xdf, 0x40, 0xb8, 0xa9, 0x53, 0x40, 0xf1, 0xb8, 0x40, 0x40, 0xef, 0xdc, 0xb3, 0x40, + 0xf0, 0xb9, 0x5d, 0x40, 0x13, 0xee, 0xb1, 0x40, 0x88, 0x1b, 0x30, 0x40, 0x96, 0x7b, 0x96, 0xbf, + 0xad, 0xb1, 0x84, 0x40, 0xfd, 0xbe, 0xfe, 0xbf, 0xa1, 0x7d, 0x9e, 0x3f, 0x8a, 0xcd, 0x61, 0x40, + 0x71, 0x96, 0x53, 0x40, 0xe0, 0x15, 0x35, 0x40, 0x4d, 0xc9, 0x2b, 0x40, 0x7a, 0x2f, 0xa2, 0x3f, + 0x54, 0xb0, 0x8e, 0x3f, 0x87, 0xe6, 0xd6, 0x40, 0xe6, 0x93, 0x83, 0x40, 0x86, 0x4a, 0x62, 0x40, + 0xee, 0x0f, 0x39, 0xc0, 0x72, 0xce, 0xc6, 0x3f, 0xfb, 0x02, 0xfc, 0x3f, 0x23, 0xf1, 0xb0, 0xbf, + 0xdc, 0x73, 0xec, 0x3f, 0xd7, 0x02, 0xdc, 0x3f, 0xa4, 0xfa, 0x2f, 0x3e, 0x30, 0xf6, 0xd2, 0x3e, + 0xb9, 0xd7, 0x96, 0x3f, 0x26, 0x58, 0x0f, 0x40, 0x27, 0x26, 0x47, 0x40, 0x47, 0x4c, 0xae, 0x3d, + 0x38, 0x66, 0xda, 0x3f, 0xb7, 0xde, 0x5b, 0xbd, 0xe6, 0x0f, 0x3f, 0x40, 0xcc, 0x89, 0x5c, 0x40, + 0xfb, 0xb6, 0x3a, 0x40, 0x41, 0xbd, 0xa3, 0x40, 0x4b, 0x98, 0xa2, 0x40, 0x16, 0x53, 0x90, 0x40, + 0x8b, 0x87, 0x85, 0x3f, 0x1a, 0x73, 0x9e, 0x3e, 0x0d, 0x42, 0xa6, 0xbf, 0x11, 0xf9, 0x7c, 0x40, + 0xc5, 0x1c, 0x1b, 0x40, 0x4c, 0xbf, 0x7b, 0x40, 0x79, 0xd2, 0x15, 0x3f, 0xee, 0xe3, 0x8f, 0x40, + 0x5e, 0x95, 0x4e, 0x40, 0xe9, 0x18, 0xd1, 0x3f, 0x17, 0x50, 0x28, 0x40, 0x74, 0xd5, 0xbf, 0xbf, + 0x4a, 0x60, 0x83, 0x40, 0x79, 0x00, 0x7e, 0x40, 0x93, 0x62, 0xc0, 0xbf, 0xdd, 0xd5, 0x59, 0x40, + 0x8c, 0x72, 0xca, 0x3f, 0x56, 0x81, 0xac, 0x3f, 0x26, 0x2e, 0x1a, 0x40, 0x7b, 0x00, 0xb3, 0x40, + 0xf0, 0x9a, 0x75, 0x3f, 0x76, 0xd8, 0xa1, 0x40, 0xe4, 0x2d, 0x87, 0x40, 0x6a, 0xf5, 0x4a, 0x40, + 0x8b, 0x64, 0xaa, 0x3f, 0xce, 0x29, 0x6b, 0x3f, 0xe8, 0x8c, 0x34, 0x3f, 0x58, 0x76, 0x89, 0x40, + 0x03, 0x08, 0x21, 0x41, 0xa1, 0xf9, 0x71, 0x40, 0x6a, 0xe9, 0x81, 0x40, 0xc1, 0x28, 0x62, 0x3f, + 0x54, 0x63, 0xb3, 0x3f, 0xce, 0xa8, 0xdf, 0xbe, 0x92, 0x6b, 0xcb, 0x3f, 0x3b, 0x29, 0x6a, 0x40, + 0xb8, 0x07, 0xc0, 0x3f, 0xc9, 0x40, 0x49, 0x40, 0xca, 0xad, 0x01, 0xbe, 0xba, 0x44, 0x69, 0x40, + 0xc7, 0xf8, 0xed, 0x3f, 0x81, 0x44, 0x94, 0x3f, 0x85, 0x31, 0x2e, 0x40, 0x97, 0x28, 0xc4, 0x3f, + 0x19, 0x12, 0x8d, 0x3e, 0x1d, 0x24, 0xde, 0xbf, 0x09, 0xed, 0x47, 0x40, 0x0b, 0xf7, 0x21, 0x40, + 0x9d, 0x00, 0x4a, 0x40, 0xc4, 0xfd, 0xbf, 0xbf, 0x0e, 0x77, 0xac, 0x3f, 0xdd, 0xea, 0xe8, 0x3f, + 0xee, 0xa0, 0xbe, 0x40, 0xac, 0x37, 0x86, 0x40, 0xc5, 0xf5, 0x64, 0x40, 0xae, 0xdd, 0x92, 0x3f, + 0xa4, 0x29, 0x8c, 0x3e, 0x4c, 0xbe, 0xa6, 0x3f, 0xda, 0x14, 0x5b, 0xbd, 0x48, 0x14, 0x78, 0x3f, + 0x9a, 0x44, 0x93, 0x40, 0x85, 0x07, 0xb8, 0xbe, 0xf2, 0x24, 0x41, 0x3f, 0x42, 0x09, 0x24, 0x40, + 0x1a, 0x86, 0x10, 0x40, 0xa1, 0xf5, 0x70, 0x40, 0x1c, 0x3b, 0x82, 0xbf, 0xe8, 0x04, 0x7f, 0x40, + 0x5f, 0xa6, 0x1c, 0x40, 0x1c, 0x58, 0x13, 0xc0, 0xf3, 0x23, 0x25, 0x40, 0x69, 0x6b, 0x0c, 0x3f, + 0xc7, 0xa0, 0x01, 0x40, 0x64, 0x18, 0x47, 0x40, 0x21, 0x9d, 0xa9, 0x40, 0xae, 0x19, 0x4b, 0x3f, + 0xc3, 0xd9, 0xa1, 0x3e, 0x0d, 0x49, 0x87, 0x40, 0xa8, 0x6a, 0x89, 0x3f, 0x12, 0xb0, 0x2b, 0x3f, + 0x93, 0x68, 0x24, 0xbd, 0x7f, 0xab, 0x03, 0x40, 0x5c, 0x39, 0x77, 0x40, 0x03, 0xed, 0x3a, 0xbf, + 0x00, 0xed, 0xe3, 0x3f, 0x4f, 0xce, 0xb2, 0x40, 0x95, 0x38, 0x04, 0x40, 0xd6, 0xc1, 0xab, 0x3d, + 0xe2, 0x7b, 0x87, 0x40, 0x2c, 0x93, 0xdc, 0x40, 0xf9, 0x1a, 0x26, 0x40, 0x2e, 0x1a, 0x94, 0x3f, + 0xf3, 0xa1, 0xb4, 0x40, 0x36, 0xd4, 0x4d, 0x40, 0x5a, 0xb6, 0x5a, 0x40, 0x76, 0x09, 0x89, 0x40, + 0xd9, 0xe1, 0x91, 0x40, 0x11, 0x51, 0x45, 0x40, 0x0b, 0x1d, 0x5c, 0x40, 0x7c, 0xfa, 0xdd, 0x40, + 0xb1, 0xee, 0x25, 0xbf, 0x55, 0x6a, 0x72, 0xc0, 0x7b, 0xc9, 0xeb, 0x3f, 0xc3, 0x60, 0x0f, 0xbf, + 0xdc, 0x6a, 0xe0, 0x3f, 0x24, 0xb9, 0x58, 0x40, 0xa3, 0x02, 0xfa, 0x3f, 0x83, 0x79, 0x52, 0xbe, + 0x20, 0xdc, 0x25, 0x40, 0x3e, 0xee, 0x06, 0x40, 0x47, 0x3b, 0x71, 0x40, 0x65, 0x9c, 0x89, 0x40, + 0x9e, 0x71, 0x87, 0x40, 0x93, 0x36, 0x5b, 0x3f, 0x89, 0x16, 0x26, 0x3f, 0x97, 0x61, 0x12, 0xc0, + 0xa9, 0x91, 0x23, 0xc0, 0x91, 0x50, 0x06, 0x3f, 0x1a, 0xd7, 0x2a, 0x40, 0xa9, 0xa2, 0x94, 0x40, + 0x9f, 0xd9, 0x9c, 0xbf, 0xab, 0x96, 0xe8, 0xbf, 0x31, 0xc5, 0x8e, 0x3f, 0xc8, 0x44, 0xc6, 0x3e, + 0x3c, 0x71, 0x08, 0x40, 0x1d, 0xdd, 0x99, 0x3f, 0x31, 0x4f, 0x9d, 0x3f, 0xdc, 0x20, 0x79, 0x40, + 0x09, 0xb6, 0x08, 0x40, 0xed, 0xde, 0x78, 0x40, 0xdd, 0x3b, 0xa2, 0x3f, 0x84, 0x7b, 0x0b, 0x40, + 0x84, 0x16, 0x04, 0x40, 0x06, 0x57, 0xf7, 0xbe, 0xd4, 0x88, 0x82, 0x40, 0x1a, 0xef, 0xad, 0x3e, + 0x85, 0x33, 0x92, 0x40}; +unsigned char conv2d_im2col_fp32_bias[] = { + 0x74, 0x89, 0xa0, 0xc0, 0x38, 0xc4, 0x06, 0x40, 0x7d, 0x04, 0x44, 0xc0, 0x16, 0xd4, 0x57, 0xc1, + 0x7d, 0x5f, 0x8e, 0x40, 0xf1, 0xf0, 0x43, 0x3f, 0x16, 0x19, 0x97, 0x41, 0xf2, 0x25, 0x32, 0x40, + 0xaf, 0xeb, 0xd4, 0x40, 0x64, 0x64, 0xa7, 0xc0, 0xf1, 0xa8, 0x0c, 0x41, 0xb1, 0x88, 0x05, 0xc0, + 0x0a, 0x8f, 0x96, 0xc0, 0x6b, 0x8d, 0x37, 0xc0, 0xc8, 0x3d, 0x35, 0x41, 0x79, 0x22, 0x1c, 0x40, + 0x74, 0x47, 0x2c, 0x41, 0x5f, 0x4b, 0x51, 0x41, 0xf6, 0xfc, 0xd1, 0x3e}; +unsigned char conv2d_im2col_fp32_out[] = { + 0x7d, 0x15, 0x98, 0xc1, 0x47, 0xdc, 0x29, 0xc2, 0x4e, 0x41, 0x25, 0xc2, 0x4f, 0x99, 0xb8, 0x41, + 0x6d, 0xaa, 0x4f, 0xc2, 0x42, 0x63, 0x9e, 0xc1, 0x13, 0x73, 0x2e, 0xc1, 0xbe, 0x11, 0x57, 0xc1, + 0x4f, 0x72, 0xbc, 0xc2, 0x09, 0xeb, 0x10, 0xc2, 0xf4, 0xcd, 0xcd, 0xc1, 0xe0, 0xb3, 0x9a, 0xc2, + 0x8e, 0x8a, 0x00, 0xc2, 0x03, 0x41, 0x08, 0xc2, 0xa7, 0x15, 0x24, 0xc1, 0xea, 0xbe, 0x0d, 0xc1, + 0xa2, 0x46, 0x6f, 0xc2, 0x59, 0x4e, 0xe2, 0xc1, 0xab, 0x3b, 0x71, 0xc2, 0xb9, 0xe8, 0xcf, 0xc1, + 0xe1, 0x7b, 0xe5, 0xc1, 0x83, 0x42, 0x23, 0xc0, 0x2c, 0x16, 0x2a, 0xc2, 0x04, 0xdb, 0x93, 0xc1, + 0x7b, 0x8d, 0xcf, 0xc0, 0xfb, 0x25, 0xa6, 0x41, 0x69, 0x0c, 0x1c, 0x40, 0x3d, 0x5b, 0x8b, 0xc2, + 0xfc, 0xda, 0xfe, 0xc1, 0xb2, 0xa0, 0x55, 0xc1, 0x17, 0x4d, 0x32, 0xc2, 0xc0, 0x3e, 0x45, 0xc2, + 0xd0, 0xb5, 0xe0, 0x40, 0x8e, 0x01, 0x23, 0xc2, 0xc5, 0xfc, 0xed, 0xc0, 0xe9, 0x3c, 0x46, 0xc0, + 0xf7, 0xb2, 0x3f, 0xc1, 0x22, 0xba, 0xe0, 0xc0, 0x2c, 0xd2, 0xda, 0xc1, 0x54, 0xad, 0xa6, 0x3e, + 0x89, 0xcf, 0x8e, 0xc0, 0xaf, 0xab, 0x39, 0xc2, 0x58, 0x83, 0xcb, 0x40, 0x1a, 0x63, 0xed, 0xc1, + 0x78, 0xff, 0x85, 0x40, 0x11, 0x2f, 0x76, 0xc2, 0x52, 0x79, 0xa8, 0x41, 0xb2, 0xa0, 0xc2, 0x40, + 0x56, 0xfc, 0x64, 0xc2, 0xe9, 0x0c, 0x96, 0xc2, 0x6f, 0x03, 0x24, 0xc2, 0x05, 0x6c, 0xa2, 0xc2, + 0xb9, 0x53, 0x4f, 0xc2, 0xe8, 0x09, 0xab, 0xbf, 0xfa, 0xb4, 0xd5, 0xc1, 0xc8, 0x13, 0xe8, 0xc1, + 0xa4, 0x07, 0x00, 0xc2, 0xec, 0xfc, 0x4a, 0x3f, 0x56, 0x2a, 0x4a, 0xc2, 0xca, 0x08, 0x28, 0xc2, + 0x26, 0x68, 0xf6, 0xc1, 0xd9, 0x19, 0xe9, 0xc1, 0x3b, 0x80, 0x1a, 0xc2, 0x31, 0x10, 0x43, 0xc2, + 0x95, 0xcf, 0xa6, 0xc1, 0xd5, 0xbf, 0xf6, 0xc1, 0xa6, 0x9f, 0x8c, 0xc2, 0x2a, 0xd8, 0xa6, 0xc1, + 0xd8, 0xd0, 0x94, 0xc2, 0xcc, 0xc5, 0x47, 0xc2, 0x4d, 0xcc, 0x2c, 0xc2, 0xd4, 0x73, 0xa6, 0xc2, + 0xf4, 0x99, 0x60, 0xc2, 0xdf, 0x9c, 0x60, 0xc2, 0xc5, 0x36, 0x22, 0xc2, 0x6c, 0x2e, 0x29, 0xc2, + 0xb0, 0x54, 0x55, 0xc2, 0x28, 0xdc, 0x4d, 0xc2, 0x89, 0xea, 0x7d, 0xc2, 0xd2, 0xad, 0x51, 0xc2, + 0x81, 0x1c, 0x60, 0xc2, 0x06, 0xeb, 0xe7, 0xc0, 0x59, 0x26, 0x06, 0x42, 0x94, 0xec, 0xc9, 0xc1, + 0x54, 0x46, 0xaf, 0xc0, 0x38, 0xb6, 0x24, 0x42, 0x25, 0x29, 0x7e, 0xc1, 0x1e, 0x10, 0x8e, 0xc2, + 0x6f, 0x01, 0x8e, 0xc1, 0x47, 0x23, 0x11, 0xc2, 0xa7, 0xf5, 0x4a, 0xc2, 0x75, 0xba, 0x50, 0xc2, + 0xd3, 0x2f, 0xb6, 0x40, 0x4b, 0xe6, 0x46, 0xc2, 0x07, 0x55, 0x0f, 0xc2, 0x9d, 0x08, 0xcd, 0xc0, + 0xb3, 0xfa, 0x3e, 0xc1, 0xff, 0x35, 0xec, 0xc1, 0x5e, 0xb0, 0x0d, 0xc1, 0xd9, 0xc7, 0xd2, 0xc1, + 0xc3, 0xe8, 0x2c, 0xc2, 0x8a, 0xa9, 0x1a, 0xc2, 0xe6, 0x67, 0xa1, 0x41, 0x37, 0xcb, 0x85, 0xc1, + 0x97, 0x42, 0x80, 0xc1, 0x6c, 0x48, 0x1f, 0xbf, 0x07, 0x0f, 0xd4, 0x40, 0x5c, 0x47, 0x32, 0xc2, + 0xb9, 0x45, 0x82, 0xc2, 0x9d, 0x27, 0x11, 0xc1, 0x70, 0xac, 0x3a, 0xc2, 0x37, 0xf8, 0x3a, 0xc2, + 0x07, 0xfb, 0x40, 0xc2, 0x4c, 0xa4, 0x0c, 0xc2, 0x9b, 0x03, 0x64, 0xc1, 0xa4, 0xfc, 0x66, 0xc1, + 0x1d, 0x74, 0x06, 0xc2, 0x45, 0xca, 0xc2, 0xc1, 0xaf, 0xec, 0x70, 0xc1, 0xbb, 0xd9, 0xd9, 0xbf, + 0x0e, 0x1b, 0xd6, 0x41, 0x8b, 0x7e, 0x5e, 0x40, 0xa2, 0x96, 0xff, 0xc1, 0x3c, 0x94, 0xbc, 0x41, + 0xc7, 0x57, 0x37, 0xc1, 0x3a, 0x48, 0x28, 0xc1, 0xcb, 0xb3, 0xb5, 0x40, 0xe5, 0x13, 0x9f, 0xc1, + 0x63, 0x44, 0x3f, 0xc1, 0x84, 0x9c, 0x0a, 0xc1, 0xdc, 0x94, 0xd7, 0xbf, 0x0b, 0x33, 0x97, 0xc1, + 0x9b, 0xa4, 0x1f, 0x41, 0x06, 0xcb, 0xda, 0xc1, 0x88, 0x9a, 0xda, 0xc0, 0x84, 0x55, 0x1e, 0x41, + 0x42, 0xaf, 0x20, 0xc2, 0x31, 0xb1, 0x22, 0xc1, 0x18, 0xd3, 0x81, 0xc1, 0xa6, 0xe1, 0x88, 0x41, + 0x8a, 0xc7, 0x1e, 0x41, 0xef, 0x2c, 0xae, 0xc1, 0xd3, 0x08, 0x46, 0xc2, 0x8a, 0x9f, 0x1f, 0xc1, + 0x43, 0xf9, 0x26, 0xc2, 0x4c, 0xe1, 0x4c, 0xc1, 0xca, 0x30, 0x21, 0xc2, 0x32, 0xd0, 0x44, 0xc2, + 0x52, 0xa9, 0x5f, 0xc2, 0x82, 0x93, 0x85, 0xc2, 0x3a, 0x32, 0x94, 0xc2, 0xc0, 0x16, 0xe8, 0xc1, + 0x3f, 0x8c, 0x41, 0xc2, 0x97, 0x2f, 0x5e, 0xc2, 0x1d, 0x4a, 0x07, 0xc2, 0x31, 0x2c, 0xb2, 0xc0, + 0x30, 0x52, 0x35, 0xc2, 0x4b, 0x41, 0x48, 0xc2, 0x96, 0xbb, 0xb0, 0xc1, 0x55, 0xa3, 0xf7, 0xc0, + 0x77, 0x8b, 0xf3, 0xc0, 0xb7, 0x90, 0x26, 0xc2, 0x9d, 0x69, 0x9a, 0xc1, 0xa9, 0x63, 0x07, 0xc2, + 0x16, 0xc4, 0xc6, 0xc1, 0xe9, 0xce, 0xb7, 0xc1, 0xe5, 0x59, 0x08, 0xc2, 0xad, 0x39, 0xd0, 0xc0, + 0x0b, 0xa3, 0xc6, 0xc2, 0x8f, 0x21, 0x0d, 0xc2, 0x77, 0xf3, 0xd6, 0xc1, 0x49, 0x09, 0x87, 0xc2, + 0x0c, 0xcb, 0xfc, 0xc1, 0xce, 0xb3, 0x1e, 0xc2, 0xaf, 0x23, 0xeb, 0xc1, 0x55, 0x20, 0xcb, 0xc0, + 0x25, 0xbb, 0xf0, 0xc1, 0x8a, 0x53, 0xcc, 0xc1, 0x1a, 0x11, 0x2b, 0xc2, 0x5c, 0x4d, 0x9c, 0xc1, + 0x7b, 0xc7, 0x8d, 0x3d, 0xa0, 0x7c, 0x98, 0x40, 0xe4, 0x9d, 0x3a, 0xc2, 0x1f, 0xdb, 0x74, 0xc1, + 0x33, 0x60, 0x1f, 0xc2, 0x86, 0xd0, 0x0e, 0x41, 0xa1, 0x79, 0x49, 0xc2, 0x88, 0xce, 0x33, 0xc2, + 0xdd, 0xe3, 0x7c, 0xc0, 0x29, 0x3d, 0x6d, 0xc2, 0x7a, 0xa1, 0x1d, 0xc2, 0xe9, 0xe9, 0x4e, 0xc1, + 0xa3, 0x5d, 0xf0, 0xc1, 0x4a, 0x80, 0xc3, 0xc1, 0x8b, 0x83, 0xfd, 0xc1, 0xcc, 0xff, 0x90, 0xc1, + 0x9d, 0x90, 0xd7, 0xc1, 0x5e, 0x96, 0xba, 0xc1, 0x11, 0x96, 0xeb, 0xc1, 0x1b, 0xab, 0x9a, 0xc1, + 0x27, 0xc2, 0x8a, 0x3f, 0x18, 0xbd, 0xed, 0xc1, 0x4b, 0xd4, 0xd3, 0xc1, 0xd8, 0xaf, 0x44, 0x40, + 0xbc, 0x2b, 0xbe, 0xc0, 0x1d, 0x15, 0x59, 0xbf, 0x32, 0x95, 0xae, 0xc1, 0xed, 0xf8, 0x23, 0x41, + 0x54, 0xf3, 0x6e, 0xc2, 0x01, 0x4a, 0x3c, 0xc1, 0x77, 0x19, 0x39, 0xc2, 0x26, 0xf0, 0x04, 0xc2, + 0xf6, 0xb5, 0x20, 0xc2, 0xea, 0x64, 0xdd, 0xbe, 0x09, 0x52, 0x80, 0xc1, 0x34, 0xb3, 0x39, 0xc1, + 0x89, 0x43, 0x73, 0xc1, 0xd7, 0xd1, 0x9e, 0x3e, 0x25, 0xea, 0xcf, 0xc1, 0x6a, 0x6f, 0x97, 0xc1, + 0x68, 0x8b, 0xa2, 0xc1, 0xf2, 0xd8, 0x28, 0xc2, 0x3b, 0xff, 0xa2, 0x40, 0x34, 0x19, 0x65, 0xc2, + 0xa5, 0xba, 0x0f, 0xc1, 0x87, 0x26, 0x97, 0xc1, 0x25, 0xaf, 0x17, 0xc2, 0x09, 0x66, 0xcc, 0xc1, + 0xa7, 0xa1, 0x2b, 0xc2, 0x57, 0xd7, 0x60, 0xc2, 0x95, 0xa5, 0x41, 0xc2, 0xea, 0x9c, 0xd6, 0xc1, + 0x32, 0xf5, 0x2c, 0xc2, 0x02, 0xe1, 0x87, 0xc2, 0x2b, 0xaf, 0x09, 0xc2, 0x37, 0x43, 0xf1, 0xc1, + 0xf4, 0xc4, 0x1d, 0xc2, 0x67, 0x4c, 0x10, 0xc2, 0x06, 0xd2, 0x84, 0xc1, 0xf9, 0x9a, 0xf7, 0xc1, + 0x87, 0x20, 0x47, 0xc2, 0x97, 0x8c, 0xa5, 0xc0, 0xa8, 0x16, 0x27, 0xc2, 0xad, 0x42, 0x62, 0xc2, + 0x1d, 0x19, 0x1f, 0xc1, 0xe1, 0xb9, 0x03, 0xc1, 0x56, 0xc2, 0x59, 0xc2, 0x3b, 0xe2, 0x01, 0x42, + 0xee, 0x8f, 0xc5, 0xc2, 0xe8, 0x90, 0x5f, 0xc2, 0x78, 0x1a, 0x4a, 0xc0, 0x29, 0x22, 0xa2, 0xc2, + 0x99, 0xa7, 0x2b, 0xc2, 0x50, 0x24, 0x9f, 0xc2, 0x21, 0x83, 0x15, 0xc1, 0x44, 0x38, 0x36, 0xc2, + 0x1e, 0x4b, 0x9b, 0xc2, 0x70, 0x43, 0xda, 0xc1, 0xff, 0xca, 0x37, 0xc2, 0x1a, 0xee, 0x0e, 0xc2, + 0xfe, 0x29, 0x89, 0xc1, 0xa7, 0xe8, 0x38, 0xc2, 0xf6, 0x16, 0xd3, 0x40, 0xaf, 0x98, 0x38, 0xc2, + 0xa2, 0xe1, 0x9c, 0xc1, 0x56, 0x2e, 0x27, 0x41, 0x43, 0x80, 0x37, 0xc2, 0xac, 0xcf, 0x1c, 0xc2, + 0xc5, 0x64, 0x41, 0xc2, 0x4b, 0x6a, 0xd3, 0xc1, 0xcb, 0x50, 0x47, 0xc2, 0x19, 0xd6, 0x47, 0xc2, + 0x45, 0x69, 0x66, 0xc1, 0xab, 0x47, 0x5d, 0xc2, 0xbd, 0x9e, 0x0c, 0xc2, 0x19, 0x7f, 0x29, 0xc2, + 0x3c, 0x0c, 0x34, 0xc2, 0x95, 0x68, 0x75, 0xc2, 0xcc, 0xda, 0x11, 0xc2, 0x4e, 0xb6, 0xbd, 0xc1, + 0x3a, 0xad, 0xc2, 0xc0, 0x7c, 0xa3, 0xca, 0xc0, 0x0c, 0xd1, 0xc1, 0x40, 0x44, 0x81, 0x1b, 0xc2, + 0x99, 0xc2, 0xde, 0xc1, 0x5f, 0xf0, 0xac, 0x40, 0xb1, 0x4e, 0x63, 0xc1, 0xbf, 0x79, 0x1d, 0xc2, + 0x8f, 0x24, 0x21, 0xc2, 0x3e, 0xe7, 0x16, 0xc2, 0xcc, 0x4a, 0x68, 0xc2, 0x78, 0xd3, 0x9e, 0xc1, + 0x15, 0xe2, 0xff, 0xc1, 0xae, 0xf0, 0x06, 0xc2, 0x5c, 0x34, 0x80, 0xc1, 0x84, 0xb6, 0xf2, 0xc1, + 0x3d, 0x7b, 0xf2, 0xc1, 0x25, 0x77, 0xc3, 0xc1, 0xf1, 0x85, 0x8c, 0xbf, 0x9b, 0x1b, 0x55, 0xc1, + 0xbc, 0x68, 0x45, 0xc1, 0x32, 0xfd, 0xfb, 0xc1, 0x00, 0x66, 0x96, 0xc1, 0x03, 0xcd, 0x77, 0xc1, + 0x16, 0x8c, 0x42, 0xc1, 0xc4, 0xfc, 0x1a, 0xc2, 0x78, 0x49, 0xe1, 0xbf, 0x7a, 0xd1, 0x5b, 0x40, + 0xb2, 0x6e, 0x60, 0xc2, 0xbe, 0x00, 0x96, 0xc1, 0xa6, 0xa4, 0x1d, 0xc1, 0x8f, 0x2c, 0x6a, 0xc2, + 0x36, 0x08, 0x81, 0xc2, 0xd2, 0x2c, 0xfb, 0x40, 0xf4, 0x48, 0x88, 0xc1, 0x82, 0x98, 0x9f, 0xc1, + 0x15, 0xe0, 0x35, 0xc2, 0xf4, 0x93, 0x92, 0xc1, 0x65, 0x55, 0x21, 0xc2, 0x8f, 0x30, 0x0c, 0xc2, + 0x95, 0x1f, 0x06, 0xc2, 0xc7, 0x5b, 0xf5, 0x40, 0xca, 0xdd, 0x3a, 0xc1, 0x41, 0xfc, 0x34, 0xc1, + 0x4b, 0xdf, 0x0d, 0xc1, 0x9d, 0x72, 0xb6, 0x41, 0xcd, 0x04, 0xb3, 0xc1, 0xaa, 0x36, 0x36, 0xc2, + 0x9c, 0x3a, 0x30, 0xc2, 0x04, 0x0b, 0x8e, 0xc1, 0x0d, 0x5f, 0x02, 0xc2, 0x84, 0x76, 0xa4, 0xc2, + 0x56, 0x58, 0x17, 0x41, 0x4b, 0xe1, 0x30, 0xc2, 0x7f, 0x3b, 0x62, 0x41, 0x5a, 0xe9, 0x05, 0x41, + 0x45, 0x4b, 0x15, 0xc2, 0x27, 0xb4, 0x1d, 0x41, 0x11, 0x78, 0x3d, 0xc2, 0xec, 0x55, 0xd8, 0xc1, + 0xa6, 0xce, 0x97, 0xc0, 0xf1, 0x32, 0x1a, 0x41, 0x63, 0x08, 0xaa, 0xc0, 0xf9, 0xd0, 0xf1, 0xc1, + 0x5c, 0x4b, 0xd5, 0xc1, 0x49, 0x68, 0x8d, 0x41, 0x85, 0x59, 0x0b, 0xc2, 0xae, 0x11, 0x03, 0xc2, + 0xa4, 0x0c, 0x86, 0xc2, 0xf5, 0xbd, 0x1b, 0x42, 0xc6, 0xba, 0xe5, 0x40, 0xc3, 0xff, 0xab, 0xc2, + 0x6b, 0x85, 0x56, 0x41, 0xdf, 0x10, 0xcd, 0xc0, 0x60, 0x48, 0x1c, 0x40, 0x89, 0xa8, 0x74, 0xbf, + 0x52, 0x0c, 0x1c, 0x41, 0x08, 0x0c, 0x81, 0xc1, 0xfc, 0x4f, 0x5b, 0xc2, 0x4f, 0x9c, 0x79, 0x40, + 0xa4, 0x70, 0x38, 0xc1, 0x7c, 0x65, 0xff, 0xc1, 0xb7, 0x5f, 0x07, 0x42, 0x6f, 0x59, 0xf8, 0xc1, + 0x28, 0x82, 0x1e, 0xc1, 0xcb, 0xd9, 0x14, 0x42, 0x4c, 0xe0, 0x02, 0xc2, 0x44, 0x0f, 0x21, 0xc2, + 0x00, 0x3a, 0x3b, 0xc2, 0x05, 0xdd, 0x06, 0xc2, 0x82, 0xbe, 0x5b, 0xc2, 0x69, 0x2a, 0x9f, 0xc2, + 0x9d, 0xef, 0x57, 0xc1, 0xcb, 0xf4, 0x87, 0xc1, 0x9d, 0xa3, 0xfa, 0xc1, 0x0b, 0x4c, 0x84, 0xc1, + 0x6b, 0xfa, 0x90, 0xbf, 0xca, 0x11, 0xa9, 0xc0, 0xdc, 0x2e, 0x26, 0xc2, 0x7e, 0x63, 0x23, 0xc2}; + +unsigned char conv2d_im2col_fp16_in[] = { + 0xc6, 0x35, 0xda, 0x43, 0xbc, 0xbe, 0x87, 0xc4, 0x4d, 0xbc, 0xf2, 0xa2, 0x76, 0xc2, 0x55, + 0xb8, 0xf2, 0x41, 0x5d, 0xc4, 0x37, 0x3b, 0x95, 0x3c, 0xce, 0xb6, 0xd6, 0xbe, 0x79, 0x34, + 0x73, 0xbf, 0x19, 0x32, 0x94, 0xb5, 0x1f, 0x34, 0xd5, 0xb7, 0x43, 0xbf, 0xba, 0x38, 0x90, + 0xc0, 0x6e, 0xc0, 0x8c, 0x39, 0xb4, 0xc3, 0xba, 0x30, 0x4f, 0x3c, 0xee, 0xc0, 0x07, 0xba, + 0x0f, 0xb6, 0x80, 0x3f, 0xfb, 0xc2, 0xf0, 0xc1, 0x0b, 0x41, 0x7e, 0x3c, 0x1f, 0xc3, 0x8d, + 0x3d, 0x3e, 0xb9, 0x0f, 0xbe, 0x20, 0x40, 0x23, 0xc2, 0x4c, 0x3f, 0x52, 0xc0, 0x3e, 0x37, + 0x47, 0xc4, 0x2b, 0x3c, 0x06, 0xc0, 0x1c, 0x43, 0xb5, 0xbd, 0x5e, 0xc0, 0xfe, 0x3d, 0x17, + 0xb7, 0x0f, 0xb9, 0x05, 0xc4, 0x37, 0xc4, 0xa9, 0xc1, 0xee, 0xc3, 0x94, 0x35, 0xc5, 0xb9}; +unsigned char conv2d_im2col_fp16_ker[] = { + 0xa8, 0x45, 0x01, 0x38, 0xdd, 0x44, 0x29, 0x40, 0x3f, 0xc1, 0xb4, 0x41, 0x84, 0x40, 0xa6, 0x43, + 0x6a, 0x3c, 0x21, 0x42, 0x28, 0xb9, 0x5a, 0x40, 0x29, 0x3d, 0xae, 0x40, 0xa5, 0x44, 0xac, 0x42, + 0x04, 0x38, 0x99, 0xbc, 0xa1, 0x3d, 0x84, 0xb7, 0x55, 0x44, 0x65, 0x45, 0xa8, 0x3d, 0x22, 0x41, + 0x1d, 0xbd, 0xdb, 0x42, 0x00, 0x41, 0xb1, 0xc1, 0x00, 0x44, 0xe0, 0x43, 0xa0, 0xb4, 0xd5, 0x3e, + 0xc8, 0x41, 0x1e, 0x42, 0x01, 0x43, 0x2c, 0x43, 0x48, 0xb5, 0xd3, 0x42, 0xfc, 0x35, 0x8b, 0xba, + 0x5f, 0x2e, 0xe0, 0x32, 0x30, 0x40, 0x2a, 0x43, 0x00, 0x46, 0xa5, 0x46, 0x6a, 0x42, 0xe5, 0x40, + 0xf2, 0xb9, 0xfb, 0xb9, 0x8a, 0x42, 0xd5, 0x3c, 0x0e, 0x3c, 0x4b, 0xc0, 0x0d, 0x40, 0xc8, 0x3f, + 0xc2, 0xc2, 0x83, 0x42, 0x5a, 0x3c, 0x5f, 0x38, 0x40, 0x46, 0xf1, 0xba, 0x55, 0xb5, 0x50, 0x45, + 0xb8, 0x3f, 0x97, 0xb9, 0x24, 0xbc, 0x4b, 0x41, 0xbb, 0xb9, 0xe0, 0x44, 0x2e, 0x39, 0x72, 0xbf, + 0x5d, 0x45, 0x64, 0x43, 0x11, 0x41, 0xd6, 0x2e, 0x5b, 0x45, 0x2f, 0xb8, 0x78, 0x42, 0x53, 0x42, + 0x63, 0x43, 0x98, 0x41, 0xa1, 0x3a, 0xb9, 0x3e, 0xa8, 0x40, 0xc9, 0x3e, 0x39, 0x42, 0x37, 0x3b, + 0x0f, 0xbc, 0x3c, 0x40, 0x2e, 0x45, 0x11, 0x43, 0x44, 0x44, 0xcc, 0x41, 0x0e, 0x40, 0x33, 0x43, + 0x68, 0x32, 0x5a, 0x3d, 0x6d, 0xb9, 0x90, 0x40, 0x40, 0x45, 0x83, 0x3f, 0xd6, 0x41, 0x56, 0x3e, + 0xd7, 0x42, 0x65, 0x43, 0x68, 0x40, 0xd0, 0x30, 0x2d, 0x3e, 0x0c, 0x44, 0x63, 0x46, 0x74, 0x3d, + 0x5e, 0xbb, 0xb4, 0xc5, 0x21, 0x3b, 0x6a, 0xbd, 0xef, 0x40, 0x5b, 0x3d, 0x70, 0xb8, 0x5e, 0x40, + 0x58, 0x45, 0x57, 0x40, 0x4e, 0x41, 0xe5, 0x3c, 0x88, 0x41, 0x39, 0x42, 0x11, 0x3e, 0x99, 0x43, + 0x78, 0x43, 0x30, 0xbe, 0x58, 0x3e, 0x52, 0x43, 0x80, 0x38, 0x7f, 0x41, 0xd0, 0x40, 0x66, 0x43, + 0x24, 0x3c, 0x0d, 0x44, 0x30, 0x42, 0x12, 0xbd, 0xfc, 0xbc, 0x06, 0x3c, 0x3a, 0x3d, 0xc9, 0x41, + 0xc4, 0x38, 0x4b, 0x42, 0x0d, 0x40, 0xad, 0xc0, 0x49, 0x41, 0xa6, 0x40, 0x52, 0x43, 0x2d, 0x45, + 0xeb, 0x35, 0xd4, 0x34, 0xb6, 0xae, 0xc5, 0xc0, 0x1d, 0x40, 0xed, 0x42, 0x59, 0x3a, 0x7f, 0x42, + 0xdd, 0x41, 0x39, 0x3f, 0x37, 0x34, 0x16, 0x3f, 0x5f, 0x3d, 0xc2, 0x3c, 0xb0, 0xb8, 0x3b, 0x40, + 0x2e, 0x41, 0x71, 0x41, 0x0a, 0xc1, 0xbf, 0x44, 0x17, 0x40, 0x52, 0x41, 0x28, 0x41, 0xb1, 0x42, + 0xe4, 0x40, 0x60, 0x3f, 0x1b, 0x38, 0xf9, 0x44, 0x90, 0x41, 0x28, 0xc4, 0x98, 0x39, 0x31, 0x41, + 0x28, 0x43, 0x9a, 0x41, 0x9f, 0xc0, 0xcd, 0x38, 0x15, 0x3d, 0xb6, 0x3e, 0xe5, 0x43, 0x4f, 0x46, + 0x09, 0x40, 0x51, 0x43, 0x9f, 0x43, 0x43, 0x3d, 0x12, 0x41, 0x94, 0xbc, 0xfb, 0x3e, 0xa3, 0x40, + 0xe5, 0x43, 0x3a, 0x41, 0xf9, 0x43, 0x3c, 0x40, 0xb4, 0x44, 0x67, 0xbc, 0x7d, 0x45, 0x37, 0x3c, + 0x3a, 0xb8, 0x53, 0x39, 0x24, 0x41, 0xa0, 0x43, 0x53, 0x3b, 0x0d, 0x24, 0xb3, 0x44, 0x1d, 0x42, + 0x37, 0x3f, 0x33, 0x38, 0x94, 0x40, 0xb4, 0x42, 0xe8, 0xac, 0x78, 0x44, 0xcd, 0x43, 0x42, 0xbb, + 0xa2, 0x3d, 0x21, 0x45, 0xcf, 0x42, 0x24, 0x3c, 0xc5, 0x44, 0x2f, 0x44, 0x87, 0x41, 0xfc, 0x46, + 0x25, 0x44, 0x75, 0x3c, 0x63, 0x3f, 0xd3, 0x3e, 0x2c, 0x3c, 0x74, 0x42, 0x53, 0x3e, 0x53, 0x3d, + 0x9b, 0x3d, 0x6f, 0x3f, 0x50, 0x42, 0x7d, 0x38, 0x0f, 0x43, 0x39, 0x41, 0xc0, 0x3c, 0x3b, 0x45, + 0x68, 0x34, 0x79, 0x25, 0x50, 0x40, 0xb2, 0xa2, 0x43, 0x43, 0x11, 0x38, 0xc0, 0x3f, 0x45, 0x3a, + 0x37, 0x36, 0x3e, 0x3e, 0x9d, 0x42, 0xf5, 0xbf, 0xb7, 0x46, 0xe0, 0x3e, 0xde, 0xaa, 0xf3, 0x34, + 0x88, 0x3e, 0x64, 0x3d, 0x59, 0x3b, 0xfd, 0xb6, 0xa2, 0x3c, 0xff, 0xbd, 0xf0, 0x3e, 0x5f, 0xa6, + 0xff, 0x3d, 0x4f, 0x40, 0xe4, 0xbf, 0x3a, 0x43, 0xe2, 0x43, 0x2d, 0x40, 0xbc, 0x3f, 0x49, 0x44, + 0x12, 0x3d, 0x7e, 0xb8, 0x5a, 0x2e, 0xef, 0x40, 0xf1, 0x3f, 0x05, 0x42, 0xf3, 0x3c, 0x1c, 0x44, + 0x7f, 0x31, 0xf8, 0x41, 0x32, 0xbd, 0x42, 0x41, 0xd1, 0x40, 0xa4, 0x39, 0x5b, 0x3e, 0x71, 0x41, + 0x63, 0x3d, 0x68, 0x43, 0x44, 0x3c, 0x44, 0x44, 0xaf, 0x40, 0xaf, 0x3e, 0x0c, 0x40, 0xe7, 0x26, + 0x24, 0xbf, 0xdc, 0x3c, 0x90, 0x38, 0x91, 0x39, 0x3e, 0x40, 0xcc, 0x3e, 0x9e, 0x3f, 0x5a, 0xad, + 0x9e, 0x45, 0x0e, 0x43, 0x12, 0x43, 0x97, 0x36, 0xe4, 0x42, 0xe7, 0x43, 0xfe, 0xbd, 0x98, 0x45, + 0x4b, 0x44, 0x51, 0x43, 0x21, 0x3e, 0x47, 0x3f, 0x66, 0x43, 0x30, 0x3d, 0xcc, 0x3e, 0x7e, 0x38, + 0xb9, 0x44, 0x33, 0x3a, 0x09, 0x46, 0xb7, 0xb3, 0x63, 0x3f, 0xb0, 0x3c, 0x3c, 0x35, 0x44, 0x40, + 0xbf, 0x40, 0x95, 0x44, 0x16, 0x41, 0xed, 0x42, 0x9c, 0x42, 0xc8, 0xc1, 0xb6, 0x3c, 0xd5, 0x41, + 0xd8, 0x40, 0x1b, 0x44, 0xac, 0x3b, 0x08, 0x49, 0x00, 0x3e, 0x68, 0x34, 0xf5, 0x45, 0xf8, 0x3d, + 0xc2, 0x40, 0xa8, 0x43, 0x6f, 0xb2, 0x6f, 0x41, 0x58, 0xbf, 0x74, 0x3d, 0x82, 0x3e, 0x8b, 0x40, + 0x80, 0xbf, 0xf7, 0x3d, 0x9f, 0x3e, 0x05, 0x3f, 0xe7, 0x3b, 0xd2, 0x3f, 0x8f, 0x45, 0xa8, 0x41, + 0x36, 0x3e, 0x7a, 0x40, 0x1d, 0x45, 0xdd, 0x43, 0xf0, 0x43, 0x0e, 0x45, 0x8f, 0x43, 0x4a, 0x42, + 0xf1, 0xbe, 0x31, 0x44, 0x5b, 0x40, 0x01, 0x38, 0x1e, 0x42, 0x27, 0x46, 0xbc, 0x44, 0x92, 0x42, + 0x5d, 0x44, 0x42, 0x41, 0xa0, 0x40, 0x58, 0x3a, 0xad, 0x43, 0x54, 0x44, 0xf5, 0xa8, 0x81, 0x3c, + 0x8b, 0x3e, 0x80, 0x41, 0x5e, 0x41, 0xe0, 0x3f, 0x39, 0x42, 0x14, 0x45, 0xae, 0x38, 0x03, 0xbe, + 0x39, 0x44, 0x0f, 0x44, 0x0d, 0xb0, 0x3f, 0x42, 0x27, 0x43, 0xc5, 0x43, 0x61, 0x41, 0xdf, 0xc0, + 0xd3, 0x40, 0x01, 0x3c, 0x95, 0x40, 0x59, 0x40, 0x62, 0x44, 0xb0, 0x41, 0x85, 0x3e, 0x61, 0xbc, + 0x05, 0x45, 0xf8, 0xbb, 0x3e, 0x43, 0x5b, 0x44, 0xb3, 0xbc, 0x11, 0x3d, 0x87, 0xbd, 0x72, 0x2d, + 0x82, 0x44, 0x7f, 0x44, 0xce, 0x42, 0x57, 0x42, 0x11, 0x3b, 0x4a, 0x43, 0x0f, 0x41, 0x96, 0x3c, + 0x61, 0x34, 0xd8, 0xaa, 0x9a, 0x44, 0x09, 0x3a, 0x84, 0x40, 0x11, 0xbc, 0xe5, 0x40, 0x29, 0x41, + 0x0d, 0x40, 0x4c, 0x45, 0x0e, 0x35, 0x4b, 0x3c, 0x23, 0xa9, 0xb9, 0x43, 0x1f, 0x3f, 0x21, 0x40, + 0x3b, 0x44, 0x30, 0x41, 0xa5, 0x45, 0xd5, 0x42, 0x8f, 0x44, 0xe0, 0x42, 0x2f, 0xb9, 0x5e, 0x3f, + 0x03, 0x3f, 0xd0, 0x3f, 0x2e, 0x41, 0x35, 0x3d, 0xc0, 0x3b, 0xc0, 0xb5, 0x20, 0x41, 0x87, 0x43, + 0xf8, 0x43, 0x9a, 0xc0, 0x63, 0x38, 0x38, 0x42, 0x58, 0x3a, 0x3a, 0x44, 0x5d, 0x39, 0x1d, 0x40, + 0xd7, 0xb9, 0x96, 0x45, 0x5e, 0x2d, 0xe4, 0x46, 0xa0, 0x3c, 0x6e, 0x42, 0x48, 0x44, 0x2a, 0x42, + 0xef, 0x46, 0x93, 0xc3, 0x7b, 0xb8, 0xc5, 0x42, 0x93, 0xb2, 0x37, 0x40, 0x89, 0x43, 0x4c, 0x44, + 0x3b, 0x44, 0xd9, 0x3a, 0x30, 0x39, 0x93, 0xc0, 0x1c, 0xc1, 0x32, 0x38, 0x56, 0x41, 0xa5, 0x44, + 0xe6, 0xbc, 0x44, 0xbf, 0x76, 0x3c, 0x32, 0x36, 0x43, 0x40, 0xce, 0x3c, 0xea, 0x3c, 0xc9, 0x43, + 0x45, 0x40, 0xc6, 0x43, 0x11, 0x3d, 0x5b, 0x40, 0x20, 0x40, 0xba, 0xb7, 0x14, 0x44, 0x6f, 0x35, + 0x91, 0x44}; +unsigned char conv2d_im2col_fp16_ker1[] = { + 0xa8, 0x45, 0xb1, 0xc1, 0x0d, 0x40, 0x98, 0x41, 0x2d, 0x3e, 0x66, 0x43, 0x37, 0x34, 0xb6, 0x3e, + 0x01, 0x38, 0x00, 0x44, 0xc8, 0x3f, 0xa1, 0x3a, 0x0c, 0x44, 0x24, 0x3c, 0x16, 0x3f, 0xe5, 0x43, + 0xdd, 0x44, 0xe0, 0x43, 0xc2, 0xc2, 0xb9, 0x3e, 0x63, 0x46, 0x0d, 0x44, 0x5f, 0x3d, 0x4f, 0x46, + 0x29, 0x40, 0xa0, 0xb4, 0x83, 0x42, 0xa8, 0x40, 0x74, 0x3d, 0x30, 0x42, 0xc2, 0x3c, 0x09, 0x40, + 0x3f, 0xc1, 0xd5, 0x3e, 0x5a, 0x3c, 0xc9, 0x3e, 0x5e, 0xbb, 0x12, 0xbd, 0xb0, 0xb8, 0x51, 0x43, + 0xb4, 0x41, 0xc8, 0x41, 0x5f, 0x38, 0x39, 0x42, 0xb4, 0xc5, 0xfc, 0xbc, 0x3b, 0x40, 0x9f, 0x43, + 0x84, 0x40, 0x1e, 0x42, 0x40, 0x46, 0x37, 0x3b, 0x21, 0x3b, 0x06, 0x3c, 0x2e, 0x41, 0x43, 0x3d, + 0xa6, 0x43, 0x01, 0x43, 0xf1, 0xba, 0x0f, 0xbc, 0x6a, 0xbd, 0x3a, 0x3d, 0x71, 0x41, 0x12, 0x41, + 0x6a, 0x3c, 0x2c, 0x43, 0x55, 0xb5, 0x3c, 0x40, 0xef, 0x40, 0xc9, 0x41, 0x0a, 0xc1, 0x94, 0xbc, + 0x21, 0x42, 0x48, 0xb5, 0x50, 0x45, 0x2e, 0x45, 0x5b, 0x3d, 0xc4, 0x38, 0xbf, 0x44, 0xfb, 0x3e, + 0x28, 0xb9, 0xd3, 0x42, 0xb8, 0x3f, 0x11, 0x43, 0x70, 0xb8, 0x4b, 0x42, 0x17, 0x40, 0xa3, 0x40, + 0x5a, 0x40, 0xfc, 0x35, 0x97, 0xb9, 0x44, 0x44, 0x5e, 0x40, 0x0d, 0x40, 0x52, 0x41, 0xe5, 0x43, + 0x29, 0x3d, 0x8b, 0xba, 0x24, 0xbc, 0xcc, 0x41, 0x58, 0x45, 0xad, 0xc0, 0x28, 0x41, 0x3a, 0x41, + 0xae, 0x40, 0x5f, 0x2e, 0x4b, 0x41, 0x0e, 0x40, 0x57, 0x40, 0x49, 0x41, 0xb1, 0x42, 0xf9, 0x43, + 0xa5, 0x44, 0xe0, 0x32, 0xbb, 0xb9, 0x33, 0x43, 0x4e, 0x41, 0xa6, 0x40, 0xe4, 0x40, 0x3c, 0x40, + 0xac, 0x42, 0x30, 0x40, 0xe0, 0x44, 0x68, 0x32, 0xe5, 0x3c, 0x52, 0x43, 0x60, 0x3f, 0xb4, 0x44, + 0x04, 0x38, 0x2a, 0x43, 0x2e, 0x39, 0x5a, 0x3d, 0x88, 0x41, 0x2d, 0x45, 0x1b, 0x38, 0x67, 0xbc, + 0x99, 0xbc, 0x00, 0x46, 0x72, 0xbf, 0x6d, 0xb9, 0x39, 0x42, 0xeb, 0x35, 0xf9, 0x44, 0x7d, 0x45, + 0xa1, 0x3d, 0xa5, 0x46, 0x5d, 0x45, 0x90, 0x40, 0x11, 0x3e, 0xd4, 0x34, 0x90, 0x41, 0x37, 0x3c, + 0x84, 0xb7, 0x6a, 0x42, 0x64, 0x43, 0x40, 0x45, 0x99, 0x43, 0xb6, 0xae, 0x28, 0xc4, 0x3a, 0xb8, + 0x55, 0x44, 0xe5, 0x40, 0x11, 0x41, 0x83, 0x3f, 0x78, 0x43, 0xc5, 0xc0, 0x98, 0x39, 0x53, 0x39, + 0x65, 0x45, 0xf2, 0xb9, 0xd6, 0x2e, 0xd6, 0x41, 0x30, 0xbe, 0x1d, 0x40, 0x31, 0x41, 0x24, 0x41, + 0xa8, 0x3d, 0xfb, 0xb9, 0x5b, 0x45, 0x56, 0x3e, 0x58, 0x3e, 0xed, 0x42, 0x28, 0x43, 0xa0, 0x43, + 0x22, 0x41, 0x8a, 0x42, 0x2f, 0xb8, 0xd7, 0x42, 0x52, 0x43, 0x59, 0x3a, 0x9a, 0x41, 0x53, 0x3b, + 0x1d, 0xbd, 0xd5, 0x3c, 0x78, 0x42, 0x65, 0x43, 0x80, 0x38, 0x7f, 0x42, 0x9f, 0xc0, 0x0d, 0x24, + 0xdb, 0x42, 0x0e, 0x3c, 0x53, 0x42, 0x68, 0x40, 0x7f, 0x41, 0xdd, 0x41, 0xcd, 0x38, 0xb3, 0x44, + 0x00, 0x41, 0x4b, 0xc0, 0x63, 0x43, 0xd0, 0x30, 0xd0, 0x40, 0x39, 0x3f, 0x15, 0x3d, 0x1d, 0x42, + 0x37, 0x3f, 0x7d, 0x38, 0xf0, 0x3e, 0x68, 0x43, 0x66, 0x43, 0xf8, 0x3d, 0x5b, 0x40, 0xc5, 0x43, + 0x33, 0x38, 0x0f, 0x43, 0x5f, 0xa6, 0x44, 0x3c, 0x30, 0x3d, 0xc2, 0x40, 0x01, 0x38, 0x61, 0x41, + 0x94, 0x40, 0x39, 0x41, 0xff, 0x3d, 0x44, 0x44, 0xcc, 0x3e, 0xa8, 0x43, 0x1e, 0x42, 0xdf, 0xc0, + 0xb4, 0x42, 0xc0, 0x3c, 0x4f, 0x40, 0xaf, 0x40, 0x7e, 0x38, 0x6f, 0xb2, 0x27, 0x46, 0xd3, 0x40, + 0xe8, 0xac, 0x3b, 0x45, 0xe4, 0xbf, 0xaf, 0x3e, 0xb9, 0x44, 0x6f, 0x41, 0xbc, 0x44, 0x01, 0x3c, + 0x78, 0x44, 0x68, 0x34, 0x3a, 0x43, 0x0c, 0x40, 0x33, 0x3a, 0x58, 0xbf, 0x92, 0x42, 0x95, 0x40, + 0xcd, 0x43, 0x79, 0x25, 0xe2, 0x43, 0xe7, 0x26, 0x09, 0x46, 0x74, 0x3d, 0x5d, 0x44, 0x59, 0x40, + 0x42, 0xbb, 0x50, 0x40, 0x2d, 0x40, 0x24, 0xbf, 0xb7, 0xb3, 0x82, 0x3e, 0x42, 0x41, 0x62, 0x44, + 0xa2, 0x3d, 0xb2, 0xa2, 0xbc, 0x3f, 0xdc, 0x3c, 0x63, 0x3f, 0x8b, 0x40, 0xa0, 0x40, 0xb0, 0x41, + 0x21, 0x45, 0x43, 0x43, 0x49, 0x44, 0x90, 0x38, 0xb0, 0x3c, 0x80, 0xbf, 0x58, 0x3a, 0x85, 0x3e, + 0xcf, 0x42, 0x11, 0x38, 0x12, 0x3d, 0x91, 0x39, 0x3c, 0x35, 0xf7, 0x3d, 0xad, 0x43, 0x61, 0xbc, + 0x24, 0x3c, 0xc0, 0x3f, 0x7e, 0xb8, 0x3e, 0x40, 0x44, 0x40, 0x9f, 0x3e, 0x54, 0x44, 0x05, 0x45, + 0xc5, 0x44, 0x45, 0x3a, 0x5a, 0x2e, 0xcc, 0x3e, 0xbf, 0x40, 0x05, 0x3f, 0xf5, 0xa8, 0xf8, 0xbb, + 0x2f, 0x44, 0x37, 0x36, 0xef, 0x40, 0x9e, 0x3f, 0x95, 0x44, 0xe7, 0x3b, 0x81, 0x3c, 0x3e, 0x43, + 0x87, 0x41, 0x3e, 0x3e, 0xf1, 0x3f, 0x5a, 0xad, 0x16, 0x41, 0xd2, 0x3f, 0x8b, 0x3e, 0x5b, 0x44, + 0xfc, 0x46, 0x9d, 0x42, 0x05, 0x42, 0x9e, 0x45, 0xed, 0x42, 0x8f, 0x45, 0x80, 0x41, 0xb3, 0xbc, + 0x25, 0x44, 0xf5, 0xbf, 0xf3, 0x3c, 0x0e, 0x43, 0x9c, 0x42, 0xa8, 0x41, 0x5e, 0x41, 0x11, 0x3d, + 0x75, 0x3c, 0xb7, 0x46, 0x1c, 0x44, 0x12, 0x43, 0xc8, 0xc1, 0x36, 0x3e, 0xe0, 0x3f, 0x87, 0xbd, + 0x63, 0x3f, 0xe0, 0x3e, 0x7f, 0x31, 0x97, 0x36, 0xb6, 0x3c, 0x7a, 0x40, 0x39, 0x42, 0x72, 0x2d, + 0xd3, 0x3e, 0xde, 0xaa, 0xf8, 0x41, 0xe4, 0x42, 0xd5, 0x41, 0x1d, 0x45, 0x14, 0x45, 0x82, 0x44, + 0x2c, 0x3c, 0xf3, 0x34, 0x32, 0xbd, 0xe7, 0x43, 0xd8, 0x40, 0xdd, 0x43, 0xae, 0x38, 0x7f, 0x44, + 0x74, 0x42, 0x88, 0x3e, 0x42, 0x41, 0xfe, 0xbd, 0x1b, 0x44, 0xf0, 0x43, 0x03, 0xbe, 0xce, 0x42, + 0x53, 0x3e, 0x64, 0x3d, 0xd1, 0x40, 0x98, 0x45, 0xac, 0x3b, 0x0e, 0x45, 0x39, 0x44, 0x57, 0x42, + 0x53, 0x3d, 0x59, 0x3b, 0xa4, 0x39, 0x4b, 0x44, 0x08, 0x49, 0x8f, 0x43, 0x0f, 0x44, 0x11, 0x3b, + 0x9b, 0x3d, 0xfd, 0xb6, 0x5b, 0x3e, 0x51, 0x43, 0x00, 0x3e, 0x4a, 0x42, 0x0d, 0xb0, 0x4a, 0x43, + 0x6f, 0x3f, 0xa2, 0x3c, 0x71, 0x41, 0x21, 0x3e, 0x68, 0x34, 0xf1, 0xbe, 0x3f, 0x42, 0x0f, 0x41, + 0x50, 0x42, 0xff, 0xbd, 0x63, 0x3d, 0x47, 0x3f, 0xf5, 0x45, 0x31, 0x44, 0x27, 0x43, 0x96, 0x3c, + 0x61, 0x34, 0x35, 0x3d, 0xd8, 0xaa, 0xc0, 0x3b, 0x9a, 0x44, 0xc0, 0xb5, 0x09, 0x3a, 0x20, 0x41, + 0x84, 0x40, 0x87, 0x43, 0x11, 0xbc, 0xf8, 0x43, 0xe5, 0x40, 0x9a, 0xc0, 0x29, 0x41, 0x63, 0x38, + 0x0d, 0x40, 0x38, 0x42, 0x4c, 0x45, 0x58, 0x3a, 0x0e, 0x35, 0x3a, 0x44, 0x4b, 0x3c, 0x5d, 0x39, + 0x23, 0xa9, 0x1d, 0x40, 0xb9, 0x43, 0xd7, 0xb9, 0x1f, 0x3f, 0x96, 0x45, 0x21, 0x40, 0x5e, 0x2d, + 0x3b, 0x44, 0xe4, 0x46, 0x30, 0x41, 0xa0, 0x3c, 0xa5, 0x45, 0x6e, 0x42, 0xd5, 0x42, 0x48, 0x44, + 0x8f, 0x44, 0x2a, 0x42, 0xe0, 0x42, 0xef, 0x46, 0x2f, 0xb9, 0x93, 0xc3, 0x5e, 0x3f, 0x7b, 0xb8, + 0x03, 0x3f, 0xc5, 0x42, 0xd0, 0x3f, 0x93, 0xb2, 0x2e, 0x41, 0x37, 0x40, 0x89, 0x43, 0x4c, 0x44, + 0x3b, 0x44, 0xd9, 0x3a, 0x30, 0x39, 0x93, 0xc0, 0x1c, 0xc1, 0x32, 0x38, 0x56, 0x41, 0xa5, 0x44, + 0xe6, 0xbc, 0x44, 0xbf, 0x76, 0x3c, 0x32, 0x36, 0x43, 0x40, 0xce, 0x3c, 0xea, 0x3c, 0xc9, 0x43, + 0x45, 0x40, 0xc6, 0x43, 0x11, 0x3d, 0x5b, 0x40, 0x20, 0x40, 0xba, 0xb7, 0x14, 0x44, 0x6f, 0x35, + 0x91, 0x44}; +unsigned char conv2d_im2col_fp16_bias[] = { + 0x04, 0xc5, 0x36, 0x40, 0x20, 0xc2, 0xbe, 0xca, 0x72, 0x44, 0x1f, 0x3a, 0xb8, + 0x4c, 0x91, 0x41, 0xa7, 0x46, 0x3b, 0xc5, 0x65, 0x48, 0x2c, 0xc0, 0xb4, 0xc4, + 0xbc, 0xc1, 0xa9, 0x49, 0xe1, 0x40, 0x62, 0x49, 0x8a, 0x4a, 0x8f, 0x36}; +unsigned char conv2d_im2col_fp16_out[] = { + 0xc0, 0xcc, 0x4f, 0xd1, 0x29, 0xd1, 0xc5, 0x4d, 0x7b, 0xd2, 0xf2, 0xcc, 0x70, 0xc9, 0xb9, 0xca, + 0xe5, 0xd5, 0x86, 0xd0, 0x6e, 0xce, 0xd5, 0xd4, 0x04, 0xd0, 0x42, 0xd0, 0x20, 0xc9, 0x6f, 0xc8, + 0x7b, 0xd3, 0x12, 0xcf, 0x88, 0xd3, 0x7e, 0xce, 0x2b, 0xcf, 0x15, 0xc1, 0x4f, 0xd1, 0x9e, 0xcc, + 0x79, 0xc6, 0x2f, 0x4d, 0xe6, 0x40, 0x59, 0xd4, 0xf6, 0xcf, 0xab, 0xca, 0x92, 0xd1, 0x29, 0xd2, + 0x0d, 0x47, 0x17, 0xd1, 0x72, 0xc7, 0x2e, 0xc2, 0xfb, 0xc9, 0x08, 0xc7, 0xd5, 0xce, 0xfa, 0x34, + 0x75, 0xc4, 0xcd, 0xd1, 0x5f, 0x46, 0x6a, 0xcf, 0x33, 0x44, 0xaf, 0xd3, 0x41, 0x4d, 0x0e, 0x46, + 0x26, 0xd3, 0xb0, 0xd4, 0x1e, 0xd1, 0x12, 0xd5, 0x79, 0xd2, 0x73, 0xbd, 0xac, 0xce, 0x3f, 0xcf, + 0xff, 0xcf, 0x4d, 0x3a, 0x50, 0xd2, 0x40, 0xd1, 0xb3, 0xcf, 0x47, 0xcf, 0xd3, 0xd0, 0x17, 0xd2, + 0x37, 0xcd, 0xb8, 0xcf, 0x62, 0xd4, 0x38, 0xcd, 0xa4, 0xd4, 0x3d, 0xd2, 0x65, 0xd1, 0x33, 0xd5, + 0x03, 0xd3, 0x04, 0xd3, 0x11, 0xd1, 0x49, 0xd1, 0xaa, 0xd2, 0x6d, 0xd2, 0xee, 0xd3, 0x8d, 0xd2, + 0xff, 0xd2, 0x3e, 0xc7, 0x31, 0x50, 0x4d, 0xce, 0x79, 0xc5, 0x25, 0x51, 0xef, 0xcb, 0x70, 0xd4, + 0x6e, 0xcc, 0x88, 0xd0, 0x56, 0xd2, 0x85, 0xd2, 0xbe, 0x45, 0x35, 0xd2, 0x79, 0xd0, 0x6d, 0xc6, + 0xf9, 0xc9, 0x60, 0xcf, 0x6c, 0xc8, 0x96, 0xce, 0x66, 0xd1, 0xd4, 0xd0, 0x0b, 0x4d, 0x2d, 0xcc, + 0x02, 0xcc, 0xf8, 0xb8, 0xa8, 0x46, 0x92, 0xd1, 0x11, 0xd4, 0x89, 0xc8, 0xd5, 0xd1, 0xd6, 0xd1, + 0x08, 0xd2, 0x65, 0xd0, 0x1e, 0xcb, 0x34, 0xcb, 0x32, 0xd0, 0x15, 0xce, 0x87, 0xcb, 0xd5, 0xbe, + 0xaf, 0x4e, 0xef, 0x42, 0xfb, 0xcf, 0xe4, 0x4d, 0xb7, 0xc9, 0x42, 0xc9, 0xad, 0x45, 0xf9, 0xcc, + 0xf7, 0xc9, 0x53, 0xc8, 0xb0, 0xbe, 0xb8, 0xcc, 0xfc, 0x48, 0xd8, 0xce, 0xd1, 0xc6, 0xef, 0x48, + 0x05, 0xd1, 0x13, 0xc9, 0x0d, 0xcc, 0x46, 0x4c, 0xf7, 0x48, 0x6f, 0xcd, 0x2e, 0xd2, 0xfa, 0xc8, + 0x36, 0xd1, 0x66, 0xca, 0x08, 0xd1, 0x24, 0xd2, 0xf9, 0xd2, 0x2c, 0xd4, 0xa0, 0xd4, 0x40, 0xcf, + 0x0b, 0xd2, 0xf0, 0xd2, 0x39, 0xd0, 0x8f, 0xc5, 0xaa, 0xd1, 0x42, 0xd2, 0x85, 0xcd, 0xbd, 0xc7, + 0x9a, 0xc7, 0x33, 0xd1, 0xd1, 0xcc, 0x3b, 0xd0, 0x34, 0xce, 0xbd, 0xcd, 0x41, 0xd0, 0x7d, 0xc6, + 0x34, 0xd6, 0x69, 0xd0, 0xb7, 0xce, 0x37, 0xd4, 0xe8, 0xcf, 0xf5, 0xd0, 0x59, 0xcf, 0x57, 0xc6, + 0x84, 0xcf, 0x60, 0xce, 0x57, 0xd1, 0xe2, 0xcc, 0x39, 0x2c, 0xc3, 0x44, 0xd4, 0xd1, 0xa4, 0xcb, + 0xfa, 0xd0, 0x75, 0x48, 0x4a, 0xd2, 0x9f, 0xd1, 0xdf, 0xc3, 0x69, 0xd3, 0xec, 0xd0, 0x75, 0xca, + 0x82, 0xcf, 0x1a, 0xce, 0xeb, 0xcf, 0x88, 0xcc, 0xbb, 0xce, 0xd4, 0xcd, 0x5c, 0xcf, 0xd6, 0xcc, + 0x60, 0x3c, 0x6c, 0xcf, 0x9c, 0xce, 0x30, 0x42, 0xeb, 0xc5, 0xbc, 0xba, 0x73, 0xcd, 0x24, 0x49, + 0x75, 0xd3, 0xe0, 0xc9, 0xc7, 0xd1, 0x27, 0xd0, 0x05, 0xd1, 0xc8, 0xb6, 0x02, 0xcc, 0xcd, 0xc9, + 0x98, 0xcb, 0xea, 0x34, 0x7e, 0xce, 0xba, 0xcc, 0x13, 0xcd, 0x45, 0xd1, 0x1d, 0x45, 0x27, 0xd3, + 0x7d, 0xc8, 0xb9, 0xcc, 0xbd, 0xd0, 0x61, 0xce, 0x5c, 0xd1, 0x03, 0xd3, 0x0c, 0xd2, 0xb5, 0xce, + 0x67, 0xd1, 0x3e, 0xd4, 0x4c, 0xd0, 0x8a, 0xcf, 0xee, 0xd0, 0x81, 0xd0, 0x26, 0xcc, 0xbb, 0xcf, + 0x38, 0xd2, 0x27, 0xc5, 0x37, 0xd1, 0x11, 0xd3, 0xf6, 0xc8, 0x1e, 0xc8, 0xcd, 0xd2, 0x0e, 0x50, + 0x2c, 0xd6, 0xfa, 0xd2, 0x54, 0xc2, 0x11, 0xd5, 0x5f, 0xd1, 0xf8, 0xd4, 0xab, 0xc8, 0xb1, 0xd1, + 0xda, 0xd4, 0xd0, 0xce, 0xbd, 0xd1, 0x77, 0xd0, 0x48, 0xcc, 0xc6, 0xd1, 0x96, 0x46, 0xc2, 0xd1, + 0xe7, 0xcc, 0x3b, 0x49, 0xba, 0xd1, 0xe5, 0xd0, 0x08, 0xd2, 0x9c, 0xce, 0x39, 0xd2, 0x3e, 0xd2, + 0x30, 0xcb, 0xe9, 0xd2, 0x64, 0xd0, 0x4b, 0xd1, 0x9f, 0xd1, 0xaa, 0xd3, 0x8e, 0xd0, 0xeb, 0xcd, + 0x16, 0xc6, 0x59, 0xc6, 0x0d, 0x46, 0xda, 0xd0, 0xf4, 0xce, 0x65, 0x45, 0x18, 0xcb, 0xe9, 0xd0, + 0x08, 0xd1, 0xb6, 0xd0, 0x41, 0xd3, 0xf7, 0xcc, 0xfe, 0xcf, 0x38, 0xd0, 0x00, 0xcc, 0x94, 0xcf, + 0x93, 0xcf, 0x1a, 0xce, 0x6c, 0xbc, 0xa7, 0xca, 0x2a, 0xca, 0xde, 0xcf, 0xb2, 0xcc, 0xba, 0xcb, + 0x10, 0xca, 0xd7, 0xd0, 0xe8, 0xbe, 0xdf, 0x42, 0x00, 0xd3, 0xb1, 0xcc, 0xe8, 0xc8, 0x4f, 0xd3, + 0x08, 0xd4, 0xda, 0x47, 0x40, 0xcc, 0xfc, 0xcc, 0xae, 0xd1, 0x93, 0xcc, 0x0a, 0xd1, 0x60, 0xd0, + 0x31, 0xd0, 0xae, 0x47, 0xd4, 0xc9, 0xa4, 0xc9, 0x6c, 0xc8, 0xb0, 0x4d, 0x96, 0xcd, 0xaf, 0xd1, + 0x80, 0xd1, 0x6f, 0xcc, 0x12, 0xd0, 0x22, 0xd5, 0xbd, 0x48, 0x85, 0xd1, 0x13, 0x4b, 0x31, 0x48, + 0xaa, 0xd0, 0xee, 0x48, 0xea, 0xd1, 0xc1, 0xce, 0xbb, 0xc4, 0xd5, 0x48, 0x4e, 0xc5, 0x8c, 0xcf, + 0xa7, 0xce, 0x6a, 0x4c, 0x5a, 0xd0, 0x18, 0xd0, 0x2f, 0xd4, 0xde, 0x50, 0x33, 0x47, 0x5e, 0xd5, + 0xb4, 0x4a, 0x61, 0xc6, 0xea, 0x40, 0x9e, 0xbb, 0xe2, 0x48, 0x06, 0xcc, 0xd9, 0xd2, 0xdc, 0x43, + 0xc3, 0xc9, 0xf9, 0xcf, 0x3a, 0x50, 0xc1, 0xcf, 0xf0, 0xc8, 0xa7, 0x50, 0x16, 0xd0, 0x08, 0xd1, + 0xd9, 0xd1, 0x37, 0xd0, 0xdd, 0xd2, 0xf8, 0xd4, 0xbd, 0xca, 0x3e, 0xcc, 0xd3, 0xcf, 0x22, 0xcc, + 0x7f, 0xbc, 0x47, 0xc5, 0x30, 0xd1, 0x1c, 0xd1}; + +/*********************************** conv2d_winograd *****************************/ +// input: [1, 8, 14, 14] +// kernel: [16, 8, 3, 3] +// bias: [16] +// output: [1, 16, 14, 14] +// pad_left = pad_right = pad_top = pad_down = 1 + +unsigned char conv2d_winograd_fp32_in[] = { + 0xd6, 0x41, 0x27, 0xbf, 0x09, 0x15, 0x18, 0x3e, 0xc6, 0x08, 0x85, 0xbf, 0xf3, 0x50, 0x2e, 0xc0, + 0xf6, 0x82, 0xc7, 0xbf, 0x9c, 0xc8, 0x1d, 0xc0, 0xe1, 0x72, 0xa3, 0x3f, 0xc7, 0xaa, 0xfe, 0xbf, + 0xd4, 0xe1, 0xae, 0xbf, 0x8e, 0xa8, 0xa0, 0xbf, 0xd0, 0x40, 0x62, 0x3d, 0xac, 0xf1, 0x0b, 0xbf, + 0xb8, 0x79, 0x4e, 0xbc, 0x7a, 0x94, 0x95, 0xbf, 0x25, 0x29, 0xe3, 0xbf, 0x1d, 0x4c, 0x4c, 0xc0, + 0x97, 0xae, 0xf8, 0xbe, 0xf5, 0x97, 0x50, 0x3f, 0x40, 0x18, 0x8e, 0xbe, 0x9b, 0x20, 0xbd, 0xbf, + 0xcf, 0xf3, 0x39, 0xbf, 0x48, 0x7a, 0xd3, 0xbf, 0xfb, 0xa1, 0x15, 0xbf, 0xf9, 0x84, 0x29, 0x3f, + 0x33, 0x49, 0x35, 0xc0, 0xce, 0x18, 0xaa, 0xbd, 0xbe, 0x9f, 0xe9, 0xbf, 0xad, 0x73, 0x31, 0xbf, + 0x23, 0xde, 0xd3, 0xbf, 0x5e, 0xca, 0x17, 0xbf, 0x0a, 0xbb, 0xe0, 0xbf, 0xee, 0x9b, 0x83, 0xbf, + 0x6e, 0x81, 0x6f, 0xbf, 0x70, 0xca, 0x79, 0xbe, 0x66, 0x4f, 0x91, 0x3d, 0xc1, 0xe1, 0x3c, 0xc0, + 0x77, 0x4d, 0x99, 0xbe, 0x1d, 0xc8, 0x1b, 0x3f, 0x18, 0x00, 0x9c, 0xbf, 0x10, 0xc2, 0x28, 0xbf, + 0xab, 0xfd, 0xe6, 0xbf, 0xdf, 0x11, 0xdc, 0x3e, 0xb0, 0xe3, 0xbd, 0xbf, 0xd0, 0x25, 0x1d, 0xc0, + 0xc8, 0xff, 0x12, 0xc0, 0xcb, 0xc8, 0xab, 0xbf, 0x1b, 0x61, 0x6b, 0xbf, 0x63, 0x38, 0x5f, 0x3d, + 0x93, 0x6f, 0x04, 0xbf, 0x59, 0x88, 0xcd, 0xbf, 0x87, 0x33, 0x41, 0xbf, 0xa1, 0x96, 0x9a, 0xbf, + 0xad, 0x23, 0x07, 0xc0, 0xe4, 0xad, 0xb3, 0xbf, 0x2c, 0x18, 0x5f, 0xbf, 0x55, 0x90, 0x4f, 0xbe, + 0xcc, 0xab, 0x00, 0xc0, 0xa0, 0x9a, 0xa2, 0xbe, 0x50, 0x06, 0xda, 0x3d, 0x34, 0xe6, 0xac, 0xbe, + 0x78, 0x12, 0x24, 0xbf, 0x6d, 0x22, 0x32, 0xbf, 0xa5, 0xce, 0xc7, 0xbc, 0xf8, 0x13, 0x0f, 0xc0, + 0xe4, 0x9f, 0x99, 0xbf, 0x4b, 0x0d, 0x22, 0xc0, 0x16, 0xe3, 0xa5, 0xbf, 0xbd, 0x5a, 0x29, 0xc0, + 0x57, 0x1a, 0xc7, 0xbf, 0x2a, 0xeb, 0x65, 0xbe, 0x7e, 0x30, 0x80, 0xbf, 0x43, 0xb6, 0x71, 0x3f, + 0xba, 0x61, 0x19, 0xc0, 0x70, 0x51, 0x94, 0xbf, 0x82, 0x9d, 0x56, 0x3c, 0xd1, 0x8c, 0xcf, 0xbf, + 0x81, 0x42, 0xf0, 0xbf, 0xba, 0xf4, 0x74, 0xbf, 0x6d, 0xdc, 0xbe, 0xbf, 0x5a, 0x0e, 0xfb, 0xbf, + 0x25, 0x14, 0x46, 0xbe, 0x7e, 0x99, 0x16, 0xbf, 0xe9, 0x4e, 0x7c, 0xbf, 0xda, 0x4e, 0x8b, 0xbf, + 0xb2, 0x67, 0x72, 0xbd, 0xa6, 0x3f, 0x56, 0x3f, 0x55, 0x03, 0x01, 0xbf, 0x0d, 0x52, 0xb2, 0xbf, + 0x10, 0xf4, 0x94, 0xbf, 0x18, 0x5c, 0x03, 0xbf, 0xbd, 0xfe, 0xcd, 0xbf, 0xcd, 0x04, 0x39, 0xc0, + 0x40, 0x8a, 0xb5, 0xbf, 0xef, 0x5f, 0x06, 0xc0, 0x47, 0x46, 0x68, 0xbf, 0xef, 0x05, 0x8c, 0x3f, + 0xf2, 0xf3, 0x87, 0x3e, 0x80, 0x81, 0xc0, 0xbf, 0x33, 0xad, 0xdd, 0xbf, 0x3a, 0xd5, 0x67, 0x3e, + 0x84, 0x64, 0x97, 0xbf, 0x00, 0xc3, 0xe9, 0xbf, 0x22, 0x05, 0x49, 0xbf, 0x86, 0xfa, 0xb5, 0xbf, + 0x5d, 0x33, 0x31, 0xbf, 0x92, 0xc1, 0x60, 0x3d, 0x65, 0x31, 0x09, 0x3f, 0xb1, 0xa4, 0x25, 0xbf, + 0xe1, 0xee, 0x09, 0xc0, 0xb2, 0xec, 0xb8, 0xbf, 0x79, 0x62, 0x1d, 0x3e, 0xe7, 0xf8, 0x88, 0x3e, + 0x14, 0x78, 0xc0, 0xbf, 0xb2, 0xf5, 0xc8, 0xbf, 0x30, 0xac, 0xed, 0xbf, 0x99, 0x5e, 0xd3, 0xbf, + 0x10, 0x6a, 0xc6, 0xbf, 0x57, 0x31, 0xf1, 0xbf, 0xdf, 0x68, 0x47, 0x3f, 0x3c, 0x95, 0xf7, 0x3e, + 0x35, 0x70, 0xbf, 0xbf, 0x86, 0x9e, 0x3c, 0xbf, 0xad, 0x11, 0x30, 0xbf, 0x76, 0x94, 0x9a, 0xbf, + 0x4b, 0x9d, 0x3c, 0xc0, 0x59, 0x7a, 0x6c, 0xbf, 0x95, 0xce, 0x3c, 0x3f, 0x3f, 0x8e, 0x21, 0xc0, + 0x8e, 0xdb, 0xa2, 0xbf, 0xd4, 0x81, 0x5b, 0xbd, 0xa2, 0xc7, 0xa0, 0xbe, 0xb0, 0x79, 0x07, 0xc0, + 0x07, 0x8f, 0x9a, 0xc0, 0x38, 0xb8, 0x10, 0x3d, 0x20, 0x95, 0xeb, 0xbf, 0x4b, 0xc5, 0x66, 0xbf, + 0xa0, 0x02, 0x82, 0xbf, 0xc6, 0x6e, 0x87, 0xbf, 0x7e, 0x1f, 0xa5, 0x3e, 0xb4, 0x19, 0xdc, 0xbe, + 0xd2, 0x2e, 0x13, 0xc0, 0x0c, 0xfa, 0xcd, 0xbf, 0x63, 0xd1, 0x95, 0xbf, 0x3c, 0x51, 0x9c, 0xbd, + 0x02, 0x26, 0x04, 0xc0, 0x5c, 0x57, 0x0a, 0xc0, 0xe9, 0xd2, 0xcf, 0xbe, 0x3c, 0xf2, 0x01, 0xc0, + 0xd8, 0x9c, 0xf3, 0xbe, 0x9a, 0x9a, 0x48, 0xbf, 0xba, 0x17, 0x36, 0xbf, 0x0d, 0x3f, 0x1b, 0xc0, + 0x27, 0x30, 0x17, 0xc0, 0xf9, 0xf2, 0x33, 0xbf, 0x8d, 0x22, 0xf3, 0xbd, 0xdd, 0x2a, 0xae, 0xbf, + 0x89, 0x4a, 0x06, 0xc0, 0x67, 0x6a, 0x6a, 0x3f, 0x49, 0x2b, 0x03, 0xc0, 0xe1, 0x09, 0x4f, 0x3f, + 0xc0, 0xe8, 0x2d, 0xbf, 0xef, 0x74, 0x28, 0xbf, 0x94, 0xed, 0x6c, 0xbf, 0x00, 0x05, 0x44, 0xbf, + 0x0a, 0x61, 0x1e, 0xbf, 0x12, 0x48, 0x03, 0xbe, 0xc9, 0xa9, 0x48, 0xc0, 0x41, 0x03, 0xe7, 0xbd, + 0x9f, 0x73, 0xc0, 0xbf, 0x8e, 0xeb, 0x37, 0xbf, 0x50, 0xc1, 0x55, 0xbf, 0xe2, 0x36, 0x59, 0xbe, + 0xd3, 0x7d, 0x96, 0xbf, 0x06, 0xe2, 0x83, 0xbf, 0x70, 0xa4, 0x86, 0xbf, 0xa1, 0x38, 0x07, 0xc0, + 0x0c, 0x5e, 0xc5, 0xbf, 0xd6, 0xfd, 0xa5, 0xbf, 0x61, 0x85, 0x11, 0xc0, 0x76, 0x94, 0x3a, 0xc0, + 0x3b, 0xcd, 0x09, 0x3f, 0x9c, 0x7a, 0x22, 0xc0, 0xdb, 0x8b, 0x49, 0xbf, 0xcf, 0x34, 0x26, 0xbf, + 0xb8, 0xff, 0xd4, 0xbf, 0xaf, 0xc9, 0x0f, 0xc0, 0xe7, 0xc8, 0x03, 0xbf, 0x6f, 0xd1, 0x90, 0xbe, + 0xb4, 0xc7, 0x99, 0xbf, 0x17, 0x34, 0xea, 0xbe, 0x72, 0xac, 0x13, 0xbe, 0xe4, 0xfa, 0x3a, 0xc0, + 0x15, 0x3b, 0x91, 0xbe, 0x97, 0x8c, 0x38, 0x3f, 0x21, 0xb3, 0x8a, 0xbf, 0x53, 0x8e, 0x68, 0x3e, + 0x4a, 0x8e, 0x50, 0x3f, 0xf9, 0x30, 0x25, 0xbf, 0xf5, 0x27, 0xac, 0xbe, 0x87, 0xda, 0xb1, 0xbf, + 0x31, 0x53, 0x5c, 0xbe, 0x60, 0x86, 0x2a, 0xbc, 0xcf, 0xd8, 0xa8, 0xbe, 0x7e, 0xa8, 0x96, 0x3e, + 0x78, 0x6b, 0x09, 0xc0, 0x15, 0x88, 0x10, 0xbf, 0xb3, 0xb3, 0x01, 0x3f, 0xe0, 0x34, 0x86, 0xc0, + 0xbd, 0x26, 0xdc, 0xbf, 0xe8, 0x02, 0x88, 0x3e, 0xcf, 0xec, 0x08, 0xc0, 0xa4, 0x8f, 0x5e, 0xbf, + 0x8e, 0x48, 0x29, 0x3f, 0xb3, 0x42, 0xb2, 0x3d, 0x7f, 0x5f, 0x0c, 0x3f, 0x95, 0x86, 0xa8, 0xbf, + 0xf8, 0x51, 0x8e, 0xbf, 0x3c, 0x23, 0x9e, 0xbf, 0x05, 0x22, 0xe0, 0xbf, 0xad, 0xb9, 0x7d, 0xbf, + 0xc1, 0xa2, 0x17, 0x40, 0x66, 0xc2, 0x34, 0xc0, 0x55, 0x92, 0x05, 0x40, 0x25, 0x85, 0xfd, 0x3d, + 0x25, 0xe4, 0x2b, 0xc0, 0xbf, 0xd6, 0x92, 0xbf, 0x38, 0x8c, 0x5f, 0xbf, 0x14, 0xe1, 0x9d, 0xbf, + 0x28, 0xe6, 0xa7, 0xbf, 0xfc, 0xf5, 0x81, 0xbf, 0x80, 0xb9, 0x53, 0xbf, 0x46, 0x65, 0x45, 0xc0, + 0x61, 0x5e, 0xbb, 0xbf, 0x9d, 0x8f, 0x53, 0xc0, 0x48, 0x20, 0x27, 0xbe, 0x88, 0x69, 0xfa, 0xbf, + 0x34, 0x2b, 0x2b, 0xc0, 0xfe, 0x9e, 0x15, 0xc0, 0x8b, 0x38, 0x80, 0xbe, 0xdf, 0x4e, 0x06, 0xbf, + 0xc1, 0x9b, 0x95, 0xbe, 0xa9, 0x50, 0x7f, 0xbf, 0x5b, 0x9d, 0xa8, 0xbf, 0x5d, 0x00, 0xb5, 0xbe, + 0xe5, 0x53, 0xb1, 0xbf, 0xf4, 0x0c, 0x62, 0xbf, 0x91, 0x9f, 0x06, 0xbf, 0x22, 0x8e, 0xa1, 0x3f, + 0x40, 0xae, 0x9f, 0xbd, 0x6d, 0x30, 0x8d, 0xbf, 0x37, 0x07, 0x1b, 0xc0, 0xaf, 0x15, 0x8c, 0xbf, + 0x4b, 0x81, 0x63, 0x3e, 0x46, 0xc2, 0xe2, 0xbe, 0x12, 0x14, 0x0b, 0xc0, 0xc6, 0xbc, 0x15, 0xbf, + 0x6b, 0xbf, 0x66, 0xc0, 0x26, 0x5a, 0xd7, 0xbf, 0x02, 0x94, 0xbd, 0xbe, 0x77, 0xb5, 0x52, 0xc0, + 0x02, 0x72, 0xbd, 0xbf, 0x8b, 0x40, 0xae, 0xbe, 0x3e, 0xf4, 0x12, 0x3f, 0x83, 0x96, 0x04, 0x3e, + 0x4d, 0x1c, 0x59, 0xbf, 0xe3, 0x78, 0x55, 0x3f, 0x84, 0x9e, 0xe9, 0xbf, 0x3f, 0xfa, 0x5d, 0xbf, + 0xda, 0x25, 0xdd, 0xbe, 0x79, 0x57, 0x14, 0xbf, 0x32, 0x84, 0xcc, 0xbf, 0x82, 0xd7, 0x01, 0xc0, + 0x28, 0x77, 0xbf, 0xbf, 0x14, 0xc0, 0x40, 0x3e, 0x7f, 0x4f, 0xda, 0xbf, 0x90, 0xbd, 0x0c, 0xbf, + 0xc6, 0x95, 0x5a, 0x3f, 0xe7, 0x8a, 0x74, 0xbf, 0xad, 0xba, 0x21, 0x3f, 0x48, 0x50, 0x8f, 0xbf, + 0x89, 0xb7, 0x53, 0x3d, 0xa2, 0x51, 0x72, 0xbe, 0x1c, 0x46, 0x00, 0xc0, 0x30, 0x90, 0xca, 0xbf, + 0xce, 0x57, 0xc2, 0x3d, 0xfb, 0x94, 0x10, 0xc0, 0x3d, 0x83, 0x68, 0xc0, 0x93, 0x5d, 0x91, 0xbf, + 0x5a, 0x7b, 0x9f, 0xbf, 0xee, 0x66, 0x51, 0xbf, 0x84, 0x3a, 0x92, 0xbf, 0x71, 0x86, 0x4e, 0xbf, + 0xd9, 0xa2, 0x64, 0xbd, 0x99, 0x73, 0x56, 0xbf, 0xbf, 0x08, 0x9a, 0xbf, 0x05, 0xcb, 0x51, 0x3f, + 0xc1, 0x2d, 0x17, 0xbf, 0x5a, 0xf5, 0x4d, 0x3c, 0xee, 0x55, 0x92, 0xbf, 0xd8, 0x99, 0x3b, 0xc0, + 0x4c, 0x10, 0x2b, 0xc0, 0x6f, 0x4b, 0x5d, 0xbf, 0x18, 0xef, 0x97, 0xbc, 0xc4, 0xf4, 0x15, 0x40, + 0xaa, 0x08, 0x62, 0xbf, 0xa0, 0x65, 0xf2, 0xbf, 0x59, 0x6e, 0xa6, 0xbe, 0x3b, 0x73, 0xd1, 0xbf, + 0x82, 0xc1, 0x37, 0xc0, 0x39, 0x64, 0x27, 0xbf, 0x6c, 0xd6, 0xc3, 0xbf, 0xa2, 0x18, 0xf6, 0x3e, + 0x20, 0xdd, 0x2f, 0xc0, 0x9f, 0x8e, 0x2b, 0xbf, 0x3a, 0xcf, 0x04, 0xc0, 0x8f, 0x92, 0xa1, 0xbf, + 0x70, 0x0c, 0xc3, 0xbf, 0x09, 0xfb, 0x86, 0x3f, 0x66, 0x6d, 0xbb, 0x3d, 0xe1, 0x5f, 0x9d, 0xbe, + 0x11, 0x1e, 0x83, 0xbf, 0x66, 0x2d, 0x41, 0xbf, 0xb6, 0xcd, 0x5d, 0xbe, 0x41, 0x0d, 0x18, 0xc0, + 0x27, 0xc5, 0xb5, 0xbf, 0x41, 0x75, 0x8e, 0xbf, 0x6c, 0x4e, 0x02, 0xc0, 0x7f, 0x39, 0x6d, 0x3f, + 0xec, 0x9e, 0x82, 0xbf, 0x39, 0xc5, 0x08, 0xc0, 0xca, 0xbb, 0x11, 0x3f, 0xb6, 0x12, 0x7b, 0xbf, + 0x58, 0x2c, 0x66, 0xbf, 0x38, 0x10, 0x91, 0xbf, 0x17, 0xdb, 0x25, 0x3f, 0xfe, 0x55, 0x04, 0xc0, + 0x95, 0xf4, 0x4c, 0xbf, 0x43, 0x87, 0x42, 0x3e, 0xe7, 0x9d, 0xe4, 0xbe, 0x35, 0x00, 0x24, 0xc0, + 0x3b, 0x4f, 0x0e, 0xc0, 0x6a, 0x1e, 0x79, 0x3e, 0xa9, 0xdd, 0xc1, 0xbf, 0x75, 0x72, 0x55, 0x3f, + 0x76, 0xa1, 0xb2, 0xbf, 0x75, 0x78, 0x92, 0xbe, 0x3d, 0x26, 0x1e, 0xbf, 0x35, 0x5d, 0x0e, 0xc0, + 0x90, 0x7b, 0x02, 0xc0, 0x3f, 0xca, 0x05, 0xc0, 0x85, 0x8b, 0xa5, 0xbf, 0xe6, 0x7b, 0x29, 0xc0, + 0x10, 0x56, 0x21, 0x3e, 0x6b, 0x86, 0x66, 0xbe, 0x2f, 0x6c, 0xa2, 0xbe, 0xc4, 0x9f, 0x29, 0xbf, + 0x88, 0xb7, 0xc8, 0xbf, 0x8e, 0xb1, 0x4b, 0xbf, 0x3a, 0xbe, 0xc9, 0xbf, 0x43, 0xa9, 0xc2, 0x3e, + 0xf5, 0x62, 0xd9, 0xbf, 0x04, 0xaf, 0x0a, 0xc0, 0xfa, 0x6d, 0xaa, 0xbf, 0x8d, 0x1a, 0x89, 0xbe, + 0x7c, 0x20, 0x87, 0xbf, 0xec, 0xaa, 0x97, 0xbf, 0xf7, 0x58, 0xb3, 0x3d, 0x83, 0x8f, 0x85, 0xbf, + 0x81, 0x87, 0x70, 0x3f, 0x9f, 0x88, 0x56, 0xbf, 0xa1, 0x44, 0x46, 0xbe, 0x52, 0x60, 0x53, 0xbf, + 0xc3, 0x40, 0x97, 0xbf, 0xdd, 0xec, 0x93, 0xbf, 0xe9, 0x51, 0xd9, 0xbe, 0xbf, 0x5f, 0xc6, 0xbf, + 0x91, 0xce, 0xe6, 0x3e, 0xc1, 0xf1, 0xc7, 0xbf, 0xec, 0x3b, 0x7d, 0xbf, 0x76, 0x3e, 0x8a, 0xbf, + 0x12, 0xcb, 0x12, 0xbf, 0xdd, 0x10, 0x96, 0xbf, 0x69, 0x84, 0xe9, 0xbf, 0xa9, 0xb9, 0x95, 0xbf, + 0x1e, 0x79, 0xc0, 0xbe, 0x75, 0xa5, 0xd3, 0xbf, 0x60, 0x94, 0xf9, 0xbf, 0x70, 0x59, 0x2c, 0x3d, + 0xf7, 0x37, 0xcb, 0xbf, 0x0b, 0x5d, 0xd5, 0xbe, 0x26, 0x76, 0xf9, 0xbf, 0x49, 0x9a, 0x03, 0xbf, + 0xe2, 0x36, 0x8b, 0x3f, 0xdd, 0xc8, 0x91, 0xbe, 0x03, 0xa0, 0xc5, 0xbe, 0x51, 0xee, 0x96, 0xbd, + 0x47, 0x6c, 0x41, 0xbf, 0x10, 0x3d, 0xd2, 0xbf, 0x97, 0x4c, 0xa7, 0xbe, 0xec, 0xed, 0xda, 0xbf, + 0x30, 0x5b, 0xdd, 0xbf, 0x23, 0x54, 0xb2, 0xbe, 0x9a, 0x19, 0xe8, 0xbd, 0x95, 0x03, 0x32, 0xbf, + 0x90, 0x4c, 0xd4, 0xbf, 0x16, 0x6a, 0xb5, 0x3e, 0x18, 0x5c, 0x84, 0xbf, 0xa8, 0x07, 0x14, 0xbf, + 0x5f, 0x1c, 0xc2, 0x3d, 0xa0, 0xcd, 0x79, 0xbf, 0x0e, 0xd9, 0xda, 0xbf, 0x3d, 0xcb, 0x45, 0x3e, + 0x36, 0x8a, 0x4c, 0x3e, 0x20, 0x56, 0x8a, 0xbe, 0x9c, 0x41, 0x1c, 0xc0, 0xc1, 0xbf, 0xb2, 0xbf, + 0x7b, 0xad, 0x16, 0xc0, 0xfe, 0x79, 0xc6, 0xbf, 0x83, 0x51, 0x8a, 0xbe, 0x7b, 0x76, 0xab, 0xbf, + 0x50, 0xec, 0x0e, 0x3f, 0x10, 0x23, 0xfc, 0xbf, 0xa9, 0xf5, 0xa5, 0xbf, 0x9d, 0x94, 0x32, 0xbf, + 0xb0, 0x0d, 0x1a, 0xbf, 0x03, 0xef, 0x88, 0xbf, 0x57, 0x50, 0xb8, 0xbe, 0x80, 0x11, 0x34, 0x3f, + 0x88, 0x78, 0x41, 0x40, 0x91, 0x3d, 0x96, 0xbf, 0xc2, 0xab, 0x66, 0xbf, 0x6b, 0xe8, 0x7a, 0xbe, + 0xaf, 0x29, 0xd8, 0xbf, 0x98, 0xf9, 0x1c, 0xc0, 0xb4, 0xf0, 0xe4, 0xbe, 0x65, 0x8f, 0xcf, 0xbe, + 0xcf, 0xf3, 0xaa, 0x3e, 0x8a, 0x7b, 0xb2, 0xbf, 0x4b, 0x64, 0xc4, 0xbe, 0x0e, 0xe3, 0xcb, 0xbf, + 0xe5, 0xf2, 0x94, 0xbf, 0x70, 0x33, 0x29, 0xbf, 0xb5, 0x76, 0x0b, 0xbf, 0x76, 0xc1, 0xc6, 0xbe, + 0x6d, 0x07, 0x57, 0xbf, 0x04, 0x74, 0x78, 0x3e, 0x17, 0x81, 0x64, 0xbe, 0x7b, 0xfd, 0x1d, 0xbf, + 0x4c, 0x47, 0x17, 0xc0, 0x58, 0xe5, 0x2a, 0x3f, 0x8f, 0x90, 0xd3, 0xbe, 0xef, 0xe9, 0xd9, 0xbf, + 0x3d, 0x93, 0x49, 0xbf, 0x2d, 0xc4, 0x89, 0x3e, 0x52, 0xa4, 0x8a, 0xbf, 0x4d, 0x53, 0x35, 0xbf, + 0x7c, 0x0c, 0xdb, 0xbf, 0x44, 0x89, 0x5f, 0x3f, 0xe0, 0x32, 0x37, 0xc0, 0x31, 0x71, 0x0f, 0x3e, + 0xe6, 0x5d, 0x96, 0xbf, 0x7c, 0xc6, 0x21, 0xbd, 0xa2, 0x0f, 0xf6, 0xbe, 0xfb, 0x35, 0x86, 0xbf, + 0x1c, 0x6a, 0x22, 0xbe, 0x23, 0xa2, 0x42, 0x3f, 0x9f, 0xe3, 0x57, 0x3e, 0xae, 0xf1, 0x25, 0x3f, + 0xc8, 0x31, 0x57, 0xc0, 0x1a, 0x30, 0xf7, 0xbf, 0x71, 0x8b, 0x40, 0xbf, 0xc5, 0xf6, 0xbe, 0xbf, + 0x44, 0x2c, 0xec, 0x3e, 0xc7, 0x38, 0xd3, 0xbf, 0x70, 0xa2, 0x11, 0xbf, 0x6b, 0x81, 0xab, 0xbe, + 0xab, 0x7e, 0x58, 0xc0, 0x22, 0x1e, 0x57, 0x3e, 0xef, 0xbd, 0x98, 0xbe, 0x6c, 0x29, 0xd6, 0xbe, + 0x5c, 0x5e, 0xdc, 0x3d, 0xb9, 0x23, 0x77, 0xbf, 0x7a, 0xaf, 0x32, 0x3f, 0xfd, 0x2c, 0xf9, 0xbf, + 0xe0, 0x05, 0x8b, 0xbe, 0x49, 0x76, 0x34, 0xbf, 0xa5, 0x77, 0xbd, 0xbe, 0xdb, 0x36, 0x01, 0xc0, + 0x79, 0xe0, 0x33, 0xc0, 0x6a, 0x17, 0x42, 0xbf, 0x31, 0x00, 0xe5, 0xbf, 0x5f, 0x33, 0x01, 0xc0, + 0x60, 0x93, 0x2c, 0xbf, 0x75, 0xf1, 0xba, 0x3f, 0xff, 0xb9, 0x95, 0xbf, 0x32, 0xc0, 0x1e, 0xbf, + 0x74, 0xb8, 0x54, 0xbf, 0x72, 0xce, 0xc2, 0xbf, 0xe3, 0x9f, 0x9d, 0x3f, 0xc0, 0x0f, 0x7f, 0xbf, + 0x0d, 0x52, 0xc8, 0xbf, 0x31, 0x0e, 0x12, 0xbf, 0x78, 0x35, 0x11, 0xbf, 0x18, 0x26, 0x12, 0xbf, + 0xba, 0xa6, 0xb4, 0xbf, 0x80, 0x7c, 0x6c, 0xbf, 0x2a, 0x15, 0x7d, 0xbe, 0xfd, 0x45, 0x04, 0xbf, + 0x68, 0x9b, 0x51, 0xbf, 0xff, 0xed, 0xa2, 0xbf, 0x69, 0x8e, 0x58, 0xbf, 0x1c, 0x8d, 0x90, 0xbf, + 0x44, 0xe8, 0xe5, 0xbf, 0x80, 0x48, 0x96, 0xbf, 0x95, 0x87, 0xc5, 0xbe, 0x39, 0xc7, 0x9f, 0xbf, + 0x43, 0x7b, 0x21, 0xbf, 0xf5, 0xf6, 0x76, 0xbe, 0x59, 0xf2, 0xd1, 0xbf, 0x80, 0x22, 0xbd, 0xbf, + 0x74, 0xec, 0xdc, 0xbf, 0xc0, 0x1a, 0x0f, 0xbf, 0xfd, 0x84, 0xa7, 0x3f, 0x99, 0x0c, 0x5f, 0xbf, + 0xf8, 0x8c, 0x0f, 0xbe, 0x77, 0xa5, 0xa7, 0xbf, 0xfa, 0x41, 0x0c, 0xc0, 0x99, 0xe9, 0x9b, 0xbf, + 0xe3, 0xf3, 0x18, 0xbf, 0xd7, 0x84, 0x2b, 0xc0, 0xe4, 0x60, 0xc7, 0xbf, 0xdf, 0xbf, 0x13, 0xbf, + 0x2c, 0x6e, 0x4c, 0xbf, 0x12, 0xd9, 0x64, 0xbf, 0x48, 0x9c, 0xe7, 0xbf, 0xaa, 0x93, 0xe4, 0xbf, + 0xd5, 0x79, 0xb0, 0xbf, 0xa0, 0x63, 0x16, 0xc0, 0xa4, 0x27, 0x91, 0x3e, 0x1e, 0xbd, 0x1e, 0xbe, + 0x26, 0x31, 0x5e, 0x3e, 0x9d, 0x08, 0x74, 0xbf, 0x44, 0xf1, 0xf5, 0xbf, 0xc3, 0x2c, 0xc6, 0xbf, + 0xad, 0x7b, 0xfc, 0x3d, 0xe6, 0xc9, 0xca, 0x3e, 0xa9, 0xbd, 0x87, 0xbe, 0x64, 0x51, 0x6f, 0xbb, + 0x13, 0xe3, 0xae, 0xbf, 0x2f, 0xf0, 0x33, 0x3e, 0x63, 0x3c, 0x1e, 0xbf, 0xa4, 0x65, 0x96, 0x3e, + 0xb9, 0x81, 0xd8, 0xbf, 0x51, 0xb2, 0xb7, 0x3d, 0xe4, 0x95, 0x7f, 0xbf, 0xd5, 0x60, 0x57, 0xbf, + 0xf7, 0xb1, 0x98, 0xbf, 0x4f, 0x87, 0xb4, 0xbe, 0x8c, 0xfd, 0x3a, 0xbf, 0xbb, 0x76, 0x83, 0xbf, + 0x5a, 0x75, 0xb1, 0xbf, 0x9d, 0xdc, 0x01, 0xbf, 0x49, 0x1c, 0xc3, 0xbf, 0x78, 0x79, 0xcd, 0xbe, + 0x2b, 0xda, 0xbd, 0x3d, 0x52, 0x43, 0x3a, 0xbe, 0x20, 0xef, 0x57, 0xbf, 0x71, 0xdd, 0xe6, 0xbf, + 0xb8, 0x7c, 0x58, 0xbf, 0xbd, 0xf7, 0x74, 0x3f, 0x13, 0xf1, 0xb3, 0xbf, 0x0d, 0x2c, 0xf2, 0xbf, + 0xd1, 0xd3, 0xe7, 0x3d, 0x8b, 0xbd, 0x2a, 0xbf, 0x14, 0x96, 0xc4, 0xbf, 0x56, 0x98, 0xd6, 0xbf, + 0x75, 0xb4, 0x45, 0x3e, 0xa6, 0x46, 0xc8, 0xbf, 0xcf, 0x43, 0xaf, 0x3f, 0xa4, 0x69, 0xeb, 0xbf, + 0x6f, 0xfc, 0x12, 0xc0, 0x9a, 0x21, 0x8d, 0xbf, 0xb4, 0x35, 0x5f, 0xbe, 0x5f, 0xb1, 0xfa, 0xbf, + 0xc9, 0x15, 0x3d, 0x3f, 0x79, 0x92, 0x76, 0xbe, 0x82, 0x68, 0xd7, 0xbf, 0xfd, 0x22, 0xf9, 0xbe, + 0xb9, 0x5e, 0x8c, 0xbf, 0xd7, 0x4e, 0x9a, 0xbf, 0x9f, 0x88, 0x03, 0x3f, 0x05, 0x92, 0x75, 0x3f, + 0x31, 0x59, 0x9a, 0x3e, 0x29, 0x10, 0xeb, 0xbd, 0x65, 0x8f, 0x91, 0xbf, 0x3e, 0x4f, 0xfb, 0xbf, + 0x97, 0xc3, 0x36, 0xbe, 0x30, 0x36, 0xe4, 0xbf, 0x3a, 0xee, 0x2e, 0xbf, 0xc2, 0x08, 0xce, 0xbf, + 0x0a, 0xdc, 0x17, 0x3f, 0x2a, 0x69, 0xb8, 0x3e, 0x93, 0x4f, 0x9c, 0xbf, 0xe2, 0x8a, 0x14, 0xbf, + 0x1c, 0x8d, 0x2f, 0xbf, 0xdf, 0xa6, 0x95, 0xbf, 0xe2, 0x18, 0x0a, 0xc0, 0x1d, 0xb5, 0x59, 0xbf, + 0x55, 0x81, 0xa7, 0x3e, 0x6e, 0xca, 0xe9, 0xbf, 0x94, 0xff, 0x07, 0xc0, 0xce, 0x4a, 0xda, 0xbf, + 0xe6, 0x54, 0x95, 0xbf, 0xe6, 0xd9, 0x25, 0xbf, 0x29, 0xf9, 0x2a, 0xbf, 0x52, 0x8d, 0x60, 0xbe, + 0xf5, 0xfc, 0x08, 0xc0, 0x4b, 0xc0, 0x08, 0x3e, 0xe2, 0xc4, 0xd4, 0x3f, 0xd7, 0x56, 0x2a, 0x3f, + 0xbe, 0x76, 0xc2, 0x3f, 0x7c, 0xfa, 0xc9, 0x3e, 0x6b, 0x3f, 0xb3, 0xbf, 0x35, 0x3f, 0x9f, 0xbf, + 0x6e, 0x3b, 0x0c, 0x3f, 0x9b, 0x4a, 0x91, 0xbf, 0xb4, 0xcf, 0x7e, 0xbf, 0x37, 0xfe, 0x40, 0xbd, + 0x6b, 0xf5, 0x64, 0xbe, 0xe8, 0xd1, 0xc4, 0xbf, 0x4b, 0x50, 0xbf, 0xbf, 0xcf, 0x48, 0x71, 0xbf, + 0xa0, 0x33, 0x16, 0xbe, 0x06, 0x93, 0x68, 0xc0, 0x7a, 0x35, 0xce, 0xbe, 0x67, 0x83, 0x06, 0xc0, + 0xca, 0xca, 0xbf, 0xbf, 0x41, 0x7d, 0x04, 0xc0, 0x6a, 0xce, 0xdb, 0x3d, 0xae, 0x14, 0x0d, 0xc0, + 0x42, 0x9e, 0xae, 0xbf, 0xc9, 0x66, 0x5d, 0xbe, 0x6a, 0xcd, 0x73, 0xbf, 0x69, 0xed, 0x7a, 0xbe, + 0x7c, 0x97, 0xc8, 0xbf, 0x9a, 0x7d, 0x31, 0xc0, 0x20, 0xa7, 0x46, 0xbf, 0x49, 0xa3, 0x1f, 0x3e, + 0x15, 0x0d, 0xb8, 0xbf, 0xa9, 0xec, 0x0f, 0xc0, 0xc2, 0xef, 0x36, 0xc0, 0xa2, 0xf6, 0xd6, 0xbf, + 0x54, 0xa9, 0x24, 0xbf, 0x12, 0x09, 0x1a, 0xc0, 0xcc, 0x61, 0xb9, 0xbf, 0x28, 0x22, 0x08, 0xc0, + 0x56, 0xd4, 0x65, 0x3f, 0x39, 0x27, 0xc0, 0xbf, 0x32, 0x4a, 0x8e, 0xbf, 0x7a, 0x87, 0xfe, 0x3d, + 0x14, 0xd1, 0x4a, 0xbe, 0xd5, 0x34, 0xd9, 0xbf, 0x60, 0x48, 0xbf, 0x3f, 0xff, 0xcb, 0x58, 0xbf, + 0xc3, 0x77, 0x06, 0xc0, 0xb7, 0xf5, 0x5b, 0xbd, 0x2d, 0x02, 0x3f, 0xbf, 0x39, 0x14, 0x1c, 0xc0, + 0xb1, 0xd1, 0x8f, 0xbf, 0x8c, 0x57, 0x4b, 0x3f, 0xee, 0xaa, 0x17, 0xc0, 0x3d, 0xc7, 0xc0, 0xbf, + 0x35, 0x1f, 0xdc, 0xbf, 0x48, 0x71, 0x6d, 0xbf, 0xf4, 0x4c, 0x05, 0xc0, 0x7d, 0xd7, 0xdd, 0xbf, + 0x0b, 0x0f, 0x51, 0xbe, 0xb9, 0x99, 0x4f, 0xbe, 0x8c, 0xfd, 0xd6, 0xbf, 0x79, 0x46, 0x1d, 0xc0, + 0x50, 0xb0, 0x65, 0xbe, 0x1f, 0xf3, 0x32, 0xbf, 0xcd, 0x2a, 0x3e, 0xbf, 0xc0, 0x98, 0x0b, 0x3d, + 0x49, 0xfd, 0x98, 0xbf, 0xa0, 0xba, 0xa9, 0xbf, 0x42, 0x7f, 0xac, 0xbe, 0xbd, 0x3f, 0x2a, 0xbe, + 0x19, 0x73, 0xed, 0xbf, 0x4d, 0x34, 0xff, 0xbf, 0xca, 0xcd, 0x66, 0xbf, 0x7f, 0xb8, 0x75, 0xbd, + 0x6b, 0xb0, 0xb1, 0xbf, 0x57, 0xb6, 0x9c, 0xbf, 0x04, 0xd9, 0xb3, 0xbf, 0xbf, 0x9a, 0x02, 0xc0, + 0x1c, 0xaa, 0x40, 0xbf, 0xb2, 0xcb, 0xf7, 0xbf, 0x57, 0xd8, 0xbf, 0xbd, 0x4b, 0xaa, 0xdf, 0xbf, + 0x83, 0xd0, 0xe7, 0x3d, 0x2f, 0x60, 0xe0, 0x3e, 0x9c, 0x17, 0x0f, 0x3f, 0x15, 0xc9, 0x38, 0xbf, + 0x61, 0x71, 0xba, 0x3e, 0x62, 0xeb, 0xcd, 0xbf, 0x92, 0xb8, 0x6a, 0xbf, 0x33, 0x35, 0xcc, 0xbf, + 0x5a, 0x0a, 0x75, 0xbe, 0x64, 0xec, 0xfb, 0xbf, 0xfd, 0x62, 0xac, 0xbf, 0x17, 0x18, 0x65, 0xbf, + 0xce, 0x49, 0xdb, 0xbf, 0x0b, 0x48, 0x9e, 0xbf, 0xf5, 0xac, 0x22, 0xbc, 0x5d, 0xa4, 0x1f, 0xbf, + 0xf6, 0xb0, 0xc1, 0xbf, 0x34, 0xc2, 0xc1, 0x3d, 0xcd, 0x28, 0x12, 0x3f, 0x46, 0xb4, 0x8e, 0xbf, + 0xd1, 0x86, 0x4c, 0xbe, 0x7a, 0x50, 0xc6, 0xbf, 0x10, 0x1e, 0x82, 0xc0, 0xc9, 0x6e, 0xcd, 0xbf, + 0xce, 0x33, 0x35, 0x3f, 0xb7, 0x18, 0xc3, 0xbf, 0xb5, 0xc0, 0xe4, 0xbd, 0xc1, 0xa4, 0x98, 0xbe, + 0xe6, 0x18, 0x4b, 0xc0, 0x19, 0xd7, 0x9c, 0x3f, 0x0c, 0x5a, 0xd5, 0xbf, 0x1c, 0xb3, 0xc2, 0xbf, + 0xb8, 0x7f, 0xd5, 0xbf, 0x4d, 0x46, 0xdb, 0xbf, 0xaf, 0xa9, 0x92, 0xbf, 0x7a, 0x06, 0x07, 0xc0, + 0x82, 0xe6, 0x04, 0xc0, 0xdf, 0xb0, 0x8d, 0xbf, 0xc6, 0xf9, 0x64, 0xbf, 0xba, 0x2d, 0x4b, 0xbf, + 0x6c, 0x88, 0x2f, 0xbf, 0x95, 0x2d, 0x5a, 0xbf, 0xf6, 0x52, 0xf1, 0xbf, 0xdc, 0xb8, 0x14, 0x40, + 0x3e, 0xf3, 0x80, 0x3f, 0xce, 0x7e, 0xea, 0xbf, 0x59, 0xf0, 0x53, 0xc0, 0x2f, 0x52, 0x2d, 0x3f, + 0xa3, 0xd3, 0x0d, 0xc0, 0x19, 0x20, 0xf0, 0xbf, 0x13, 0x77, 0xae, 0xbf, 0x5d, 0xe5, 0xe6, 0xbf, + 0x98, 0x01, 0x8a, 0x3c, 0x8e, 0x94, 0xdf, 0xbf, 0x8a, 0xf3, 0x23, 0xc0, 0x26, 0xe1, 0x40, 0x3e, + 0x33, 0x4a, 0x08, 0xbe, 0x54, 0x1e, 0x35, 0x3f, 0x31, 0x66, 0x2e, 0x3f, 0x02, 0xe5, 0x20, 0xbf, + 0xfa, 0xd3, 0x19, 0xc0, 0x03, 0x99, 0x96, 0xbf, 0xd8, 0xa1, 0xbf, 0xbf, 0xb7, 0xc9, 0x14, 0x3e, + 0xc0, 0xf7, 0xf6, 0xbf, 0x76, 0xfe, 0x7e, 0xbf, 0x55, 0x81, 0x2c, 0xc0, 0xfb, 0xe3, 0x2d, 0x3f, + 0xfb, 0x47, 0xde, 0xbf, 0xd6, 0x44, 0x53, 0x3f, 0xf9, 0xbb, 0xd8, 0xbf, 0x84, 0xaf, 0x91, 0xbe, + 0xc2, 0x64, 0xba, 0x3e, 0x93, 0xf9, 0xec, 0xbf, 0x68, 0x0b, 0x28, 0xbf, 0x3b, 0xff, 0x99, 0xbf, + 0xa3, 0x8d, 0xaf, 0xbf, 0xd2, 0x6d, 0x45, 0x3e, 0x2f, 0x98, 0xc9, 0xbf, 0x96, 0x43, 0xd5, 0xbf, + 0x41, 0x44, 0x1d, 0xc0, 0xfe, 0x99, 0x53, 0xbe, 0x4a, 0xdf, 0x94, 0x3e, 0x96, 0x6b, 0x63, 0xbb, + 0x1f, 0xd9, 0x9b, 0xbf, 0xfa, 0x15, 0x86, 0xbf, 0x40, 0x55, 0x8a, 0xbf, 0xf1, 0x7f, 0x8f, 0xbf, + 0x77, 0x37, 0x02, 0xc0, 0x80, 0x79, 0x60, 0xbf, 0xc1, 0xa8, 0x6c, 0xbf, 0x0d, 0xc9, 0x51, 0x3f, + 0x99, 0xaa, 0x50, 0xbf, 0x9e, 0xe8, 0x27, 0x40, 0xd4, 0x8d, 0xb0, 0xbf, 0xa5, 0x11, 0xfc, 0xbf, + 0x82, 0x6d, 0xae, 0x3e, 0x95, 0xd8, 0x39, 0xbf, 0xb2, 0x88, 0x75, 0x3e, 0x9f, 0x68, 0x59, 0x3f, + 0x60, 0x1c, 0xa5, 0xbe, 0xe4, 0x33, 0x7b, 0xbf, 0x4d, 0xc0, 0x8f, 0xbf, 0xa9, 0x22, 0xfd, 0xbf, + 0xa7, 0x66, 0x86, 0xbf, 0x6f, 0x91, 0xe7, 0xbf, 0xba, 0x97, 0xc0, 0x3e, 0x07, 0x98, 0x1a, 0xbc, + 0x5e, 0xc1, 0xce, 0xbf, 0x2d, 0x8c, 0xa7, 0x3d, 0x16, 0xc2, 0x03, 0xc0, 0xbc, 0x16, 0xc5, 0xbf, + 0x10, 0x74, 0x19, 0xc0, 0x88, 0x2d, 0xc8, 0x3e, 0x61, 0xad, 0x59, 0xbf, 0x66, 0xa0, 0x01, 0xc0, + 0x93, 0xce, 0x0d, 0xc0, 0xb3, 0x16, 0x0b, 0xbf, 0x71, 0x7b, 0x05, 0xc0, 0x87, 0xa0, 0x89, 0xc0, + 0xac, 0x15, 0xb3, 0xbf, 0x97, 0xd1, 0xb4, 0xbf, 0x7e, 0xd6, 0x02, 0x3f, 0x4b, 0xbe, 0x0d, 0xbf, + 0x55, 0xed, 0xa0, 0xbf, 0xa7, 0xbe, 0xba, 0x3f, 0xcc, 0xe2, 0xa5, 0xbf, 0x23, 0x4d, 0x41, 0xbf, + 0x5e, 0x78, 0x44, 0xbf, 0x67, 0x3a, 0xa2, 0xbe, 0xa5, 0x34, 0xbf, 0xbf, 0xc7, 0xe2, 0xcc, 0xbe, + 0x9a, 0x1f, 0x8c, 0xbf, 0x66, 0xde, 0x01, 0xc0, 0x57, 0x33, 0x95, 0xbf, 0xeb, 0x6c, 0x42, 0xbf, + 0x20, 0x22, 0x9a, 0xbe, 0x40, 0x8f, 0xd8, 0xbf, 0xcf, 0x25, 0x3a, 0xbe, 0xd8, 0xd7, 0x01, 0xc0, + 0x92, 0xb7, 0xb4, 0x3d, 0x43, 0xc2, 0x9a, 0xbe, 0xc5, 0x06, 0x6d, 0xbf, 0xba, 0x68, 0x34, 0xbf, + 0xc9, 0xb0, 0xa7, 0xbf, 0x6a, 0x35, 0x86, 0xbf, 0xd1, 0x23, 0x82, 0xbe, 0xf1, 0x48, 0xf7, 0xbe, + 0x61, 0x4e, 0xfe, 0x3e, 0xa3, 0x2f, 0xd2, 0xbe, 0xde, 0x17, 0xe4, 0xbf, 0xb7, 0x7e, 0x01, 0xc0, + 0x20, 0x91, 0x7a, 0xbf, 0x02, 0xd5, 0x01, 0xbf, 0x97, 0xbe, 0x35, 0xc0, 0xc4, 0x2a, 0xab, 0xbf, + 0x5a, 0x23, 0xff, 0xbe, 0x43, 0xa3, 0x08, 0xc0, 0xe9, 0x5f, 0x5c, 0xbf, 0x61, 0xfb, 0xf1, 0xbf, + 0xe6, 0x26, 0x5a, 0x3f, 0xf0, 0x50, 0x3c, 0xbf, 0xe3, 0x75, 0x8b, 0xbf, 0x6c, 0xac, 0xc9, 0xbf, + 0x8f, 0xa9, 0xae, 0xbf, 0x36, 0xca, 0x85, 0xbf, 0x20, 0x47, 0x14, 0x3e, 0xcd, 0xfa, 0x09, 0x3d, + 0x76, 0x6a, 0xfc, 0xbf, 0x9c, 0xda, 0x20, 0xbf, 0xd4, 0xdb, 0xfa, 0xbf, 0x42, 0x1a, 0x03, 0xbf, + 0x9f, 0xd3, 0x05, 0xc0, 0xe0, 0x55, 0x18, 0x3f, 0x66, 0x46, 0xe8, 0xbe, 0xb0, 0x19, 0x21, 0xc0, + 0x0f, 0x71, 0x16, 0xbf, 0x45, 0xe9, 0x54, 0xbf, 0x24, 0x85, 0x18, 0xbf, 0xaf, 0x3e, 0xc6, 0xbc, + 0x29, 0xc9, 0x37, 0xc0, 0x43, 0x22, 0x8f, 0xbe, 0xbd, 0x46, 0x0a, 0xbe, 0x1e, 0x7c, 0x68, 0xbf, + 0x51, 0xde, 0x4e, 0xbf, 0x55, 0x0f, 0x41, 0xbf, 0x71, 0xa0, 0x20, 0xc0, 0x24, 0x7c, 0x5f, 0xc0, + 0x66, 0xa3, 0x04, 0xc0, 0x46, 0x6a, 0x73, 0x3f, 0x39, 0x30, 0x89, 0x3e, 0x66, 0x5d, 0xbb, 0x3d, + 0xa3, 0xa9, 0x3f, 0xbf, 0x71, 0x0e, 0x55, 0x3e, 0x70, 0xad, 0x80, 0x3e, 0x96, 0x2e, 0x0b, 0xbf, + 0xe1, 0x69, 0x6b, 0x3e, 0x0a, 0xfd, 0x51, 0xbf, 0x91, 0x99, 0x9a, 0xbe, 0xb2, 0x09, 0xac, 0xbf, + 0xc5, 0x16, 0x45, 0xc0, 0xf0, 0x26, 0x66, 0xbf, 0xdd, 0xe2, 0x1b, 0xc0, 0x12, 0x86, 0xe3, 0xbf, + 0x14, 0x64, 0xc4, 0xbe, 0x25, 0x5c, 0xa7, 0xbf, 0x8d, 0xd0, 0x2e, 0xbf, 0x55, 0x62, 0x28, 0xbf, + 0xed, 0x09, 0xfd, 0xbe, 0xcd, 0x98, 0xf0, 0xbf, 0x9c, 0xe7, 0x91, 0x3e, 0x97, 0xe8, 0x77, 0xbf, + 0xaa, 0x8a, 0x09, 0xc0, 0x06, 0x78, 0x9f, 0x3f, 0x51, 0xda, 0xed, 0xbf, 0x35, 0x4d, 0xb0, 0xbf, + 0x3b, 0x3e, 0xbc, 0xbf, 0x38, 0xb2, 0x2d, 0xc0, 0x2b, 0x1b, 0xc1, 0xbf, 0x8e, 0x3e, 0x80, 0xbf, + 0xfa, 0x07, 0x85, 0xbf, 0xae, 0x93, 0x5e, 0xbf, 0x08, 0xeb, 0x4e, 0xbf, 0x74, 0x0c, 0x34, 0xc0, + 0xe8, 0x9e, 0x0c, 0xbf, 0xc4, 0x9b, 0x99, 0xbf, 0x47, 0x83, 0x4e, 0xc0, 0xba, 0xa4, 0xfd, 0xbd, + 0xab, 0xc3, 0x04, 0xc0, 0x30, 0x3e, 0xc4, 0xbf, 0x11, 0xe1, 0xa0, 0xbf, 0x55, 0x79, 0x2f, 0xc0, + 0x5c, 0x45, 0x57, 0xbf, 0x92, 0x1e, 0x27, 0x3f, 0x58, 0xed, 0x9e, 0xbf, 0xe4, 0x3c, 0x98, 0xbe, + 0x1a, 0xc7, 0x18, 0xc0, 0x2f, 0x4f, 0x12, 0xc0, 0x20, 0x09, 0x66, 0xbf, 0x47, 0xef, 0xdb, 0xbf, + 0xe3, 0x71, 0x19, 0xbf, 0xfd, 0x25, 0xb2, 0xbf, 0x1a, 0x4c, 0x6a, 0x3f, 0x18, 0x3f, 0x35, 0xbf, + 0xe5, 0x7d, 0x48, 0xbf, 0x36, 0xaa, 0x17, 0xbf, 0x15, 0x6f, 0xb8, 0xbf, 0xef, 0xee, 0x68, 0xbf, + 0x48, 0x75, 0x52, 0xbd, 0xa6, 0x11, 0x39, 0xc0, 0xff, 0xdf, 0x1e, 0x3f, 0xb7, 0x4b, 0x6c, 0xbf, + 0x8f, 0x53, 0xd7, 0xbe, 0xb3, 0x0b, 0x37, 0xbe, 0x16, 0x0e, 0x1d, 0xbf, 0x6d, 0x8f, 0x16, 0xc0, + 0x35, 0x2d, 0x2c, 0xbe, 0xa9, 0x68, 0x4d, 0xbf, 0x44, 0x7b, 0xd8, 0xbf, 0x92, 0x55, 0x63, 0xbf, + 0x79, 0x2b, 0x10, 0xc0, 0x71, 0x35, 0xa4, 0xbf, 0x0b, 0xbe, 0x41, 0xc0, 0x5e, 0x24, 0x89, 0xbd, + 0x45, 0x0b, 0xd0, 0xbf, 0x92, 0x1a, 0x98, 0x3e, 0xdd, 0xe5, 0x9c, 0xbd, 0x4b, 0x23, 0x21, 0xbe, + 0xed, 0x03, 0xb8, 0xbe, 0xdb, 0xff, 0xa2, 0xbf, 0x1b, 0xa2, 0x28, 0xbf, 0x30, 0x4e, 0xb7, 0x3e, + 0xd7, 0xe3, 0xad, 0xbf, 0x4a, 0x3d, 0xb2, 0xbf, 0xfe, 0x31, 0xe0, 0xbf, 0x2e, 0x57, 0x39, 0xbf, + 0x31, 0x87, 0xa5, 0xbd, 0xcc, 0xef, 0xba, 0x3f, 0x12, 0x5d, 0x63, 0xbf, 0x39, 0x6d, 0x8c, 0xbf, + 0x9b, 0x71, 0x43, 0xc0, 0x5b, 0xd7, 0x08, 0xbe, 0x19, 0x53, 0x5c, 0xbf, 0x1e, 0xc6, 0x80, 0xbf, + 0xd3, 0xc4, 0x45, 0xbf, 0x56, 0x1d, 0x18, 0xbf, 0xe4, 0x6c, 0x9d, 0xbf, 0x09, 0xa9, 0x9d, 0xbf, + 0xee, 0xa4, 0x3c, 0xbf, 0x66, 0xe7, 0x48, 0xbf, 0x70, 0x00, 0xfa, 0x3e, 0x19, 0xe9, 0x9e, 0xbf, + 0x1c, 0x48, 0xce, 0xbf, 0x70, 0x0d, 0xa0, 0xbf, 0x06, 0x78, 0xdb, 0x3d, 0xd3, 0x90, 0x17, 0xbf, + 0xe2, 0x7e, 0xcb, 0xbf, 0x1a, 0x85, 0xa7, 0xbf, 0x96, 0x21, 0xcd, 0xbf, 0xe2, 0xbe, 0x8b, 0x3e, + 0x92, 0x4e, 0xfa, 0xbf, 0x77, 0xf4, 0xe9, 0xbf, 0xde, 0x42, 0x36, 0xbf, 0x01, 0x13, 0xca, 0xbf, + 0x9b, 0x84, 0x9f, 0xbf, 0xe5, 0x9b, 0x2b, 0xbf, 0xed, 0xbd, 0x13, 0xc0, 0xcd, 0x27, 0xf9, 0xbf, + 0xc8, 0x1d, 0x27, 0xc0, 0x7f, 0x43, 0x1f, 0xc0, 0x9a, 0xad, 0x94, 0x3f, 0x0d, 0xe2, 0x8c, 0xbf, + 0x96, 0xcc, 0x18, 0xc0, 0x9d, 0x58, 0x4b, 0x3e, 0x65, 0x5d, 0xb2, 0xbf, 0xcf, 0x1f, 0x22, 0xc0, + 0x0b, 0x2f, 0x0f, 0xc0, 0x98, 0x62, 0xfc, 0xbf, 0x0a, 0xad, 0x41, 0xbf, 0xa5, 0x0f, 0x36, 0xc0, + 0xcb, 0xe7, 0x4b, 0xbf, 0x3b, 0x39, 0x96, 0xbf, 0x8b, 0x43, 0x88, 0xbf, 0x2f, 0xcb, 0xe9, 0x3f, + 0x6c, 0x67, 0x09, 0xbf, 0x53, 0xfa, 0xee, 0x3d, 0x15, 0xe2, 0x30, 0xc0, 0x76, 0x2a, 0x11, 0xc0, + 0x25, 0x2a, 0x1f, 0xc0, 0xf3, 0x50, 0xc2, 0xbf, 0x5e, 0x25, 0xc3, 0xbf, 0xc5, 0xa7, 0xce, 0xbe, + 0x4e, 0x2c, 0x5c, 0xc0, 0x9e, 0xab, 0x75, 0xbf, 0x06, 0xdb, 0x87, 0xbf, 0xe2, 0x71, 0x44, 0xbf, + 0x87, 0xa8, 0x99, 0xbf, 0xd2, 0x2d, 0xfc, 0x3e, 0x67, 0x9b, 0x2f, 0xbf, 0x1e, 0x0f, 0x35, 0xbe, + 0x31, 0xf0, 0x80, 0xbe, 0x02, 0x2b, 0x1d, 0x3f, 0x16, 0x57, 0xe2, 0xbe, 0x02, 0xcd, 0xa0, 0xbf, + 0x2c, 0xb8, 0x05, 0xbe, 0x0f, 0xc0, 0x29, 0xc0, 0x3b, 0xc7, 0x18, 0xc0, 0x4a, 0x5f, 0x33, 0x3f, + 0x47, 0x2f, 0x89, 0x3f, 0x84, 0x10, 0xc0, 0xbf, 0xc4, 0x9f, 0xa4, 0xbe, 0x23, 0xcf, 0xb0, 0xbf, + 0x3c, 0xf3, 0x93, 0xbe, 0x50, 0x97, 0xec, 0xbf, 0xc7, 0xf4, 0x5e, 0xbf, 0x62, 0xfc, 0xcb, 0xbf, + 0x8d, 0x23, 0xc6, 0x3e, 0x92, 0x90, 0xcc, 0xbf, 0x9b, 0x3b, 0xa8, 0x3e, 0x9c, 0xa7, 0x26, 0xc0, + 0xac, 0x39, 0xf0, 0xbf, 0xd6, 0xeb, 0xef, 0xbf, 0xba, 0x45, 0xd6, 0xbf, 0xb8, 0x2d, 0xbf, 0xbf, + 0x5b, 0xa1, 0x4c, 0xc0, 0x1c, 0x38, 0x41, 0xbf, 0x60, 0x00, 0x04, 0x3e, 0xd3, 0x15, 0xa2, 0xbf, + 0x6b, 0x4b, 0x3e, 0xc0, 0xd4, 0x90, 0x0c, 0xc0, 0xb5, 0x67, 0x95, 0xbf, 0xbe, 0x70, 0x08, 0xc0, + 0x5b, 0x3d, 0x1a, 0xbf, 0xce, 0x1a, 0xda, 0xbf, 0xfd, 0x2a, 0x21, 0xbf, 0xed, 0x97, 0xb5, 0xbf, + 0x77, 0xf5, 0x04, 0xbf, 0x6f, 0x8f, 0x02, 0xbf, 0x6d, 0x68, 0x07, 0xc0, 0xbb, 0xda, 0xe4, 0xbe, + 0x77, 0xe8, 0xaa, 0xbf, 0xf5, 0x59, 0x67, 0xbf, 0xde, 0x05, 0x64, 0x3f, 0xf6, 0xf2, 0x3c, 0xbf, + 0x3f, 0x79, 0xd6, 0x3e, 0x30, 0x79, 0xbd, 0xbf, 0x5b, 0x42, 0x09, 0xbf, 0x04, 0x5c, 0x8d, 0x3e, + 0xf4, 0xd6, 0xb5, 0x3f, 0xf3, 0x8f, 0xd8, 0xbe, 0x53, 0x13, 0xef, 0xbf, 0x21, 0xcb, 0xd4, 0xbf, + 0x0f, 0xcc, 0x87, 0x3d, 0x72, 0x70, 0x56, 0x3f, 0xbe, 0xb4, 0x79, 0xbf, 0x50, 0x3e, 0xce, 0xbf, + 0x9a, 0x37, 0x8d, 0xbf, 0xc0, 0x50, 0xeb, 0x3c, 0x24, 0x01, 0xb2, 0xbf, 0x76, 0xbb, 0xec, 0xbf, + 0x86, 0xb1, 0x93, 0xbf, 0x55, 0xcf, 0xae, 0xbd, 0x7d, 0x13, 0xe5, 0xbe, 0x68, 0x83, 0xaa, 0xbf, + 0x66, 0xf0, 0xdc, 0xbf, 0x37, 0x0c, 0xcd, 0xbe, 0xe4, 0x19, 0x5d, 0xbd, 0xda, 0xdf, 0xc8, 0xbf, + 0x9d, 0x8f, 0xb8, 0xbf, 0x95, 0xc7, 0x03, 0xc0, 0x8d, 0xae, 0x42, 0x3d, 0x79, 0x8a, 0x4f, 0xbf, + 0x03, 0x38, 0xbf, 0xbf, 0x1c, 0x6c, 0xad, 0xbf, 0xdd, 0xb9, 0x6a, 0x3f, 0x42, 0xed, 0xa0, 0xbf, + 0x85, 0xe8, 0x00, 0xc0, 0xf4, 0xb5, 0x10, 0xbf, 0x83, 0xbc, 0x9a, 0xbe, 0x70, 0x0f, 0x06, 0xc0, + 0x8e, 0x8c, 0xe3, 0x3c, 0x71, 0xef, 0x64, 0xbf, 0x81, 0xe6, 0xbd, 0xbf, 0x9c, 0xed, 0x66, 0xbf, + 0xcd, 0xa2, 0x0c, 0xbf, 0x2c, 0xca, 0x6e, 0x3e, 0x65, 0x76, 0x93, 0xbf, 0x0b, 0x37, 0x91, 0xbf, + 0xe0, 0x99, 0x4c, 0xc0, 0x18, 0xc8, 0x40, 0xbf, 0xef, 0x32, 0x87, 0x3f, 0x55, 0xcf, 0x3a, 0xbf, + 0x25, 0xa5, 0x06, 0xc0, 0x4a, 0x36, 0xf7, 0xbf, 0x57, 0xe6, 0xd9, 0xbe, 0x0e, 0xa0, 0xe9, 0xbf, + 0x09, 0x43, 0x6e, 0xbf, 0x52, 0xa3, 0xb0, 0xbf, 0x6e, 0x91, 0x06, 0xbe, 0xeb, 0x32, 0xda, 0xbf, + 0x08, 0x88, 0x2b, 0xbf, 0x17, 0xe5, 0xa0, 0x3e, 0x4c, 0x72, 0x20, 0xbf, 0xae, 0x4c, 0x9d, 0xbf, + 0xcb, 0x0b, 0x00, 0xc0, 0xe8, 0xb7, 0x21, 0xc0, 0xcb, 0xf4, 0x85, 0xbf, 0x12, 0x77, 0x03, 0xc0, + 0x97, 0xf7, 0xe3, 0xbf, 0xe1, 0x5b, 0x6e, 0xbf, 0x5a, 0x75, 0x90, 0xbf, 0x75, 0xdb, 0x41, 0xbf, + 0xcf, 0x1d, 0xb6, 0xbd, 0x0c, 0x2c, 0xdb, 0xbe, 0x0b, 0xa6, 0xb8, 0xbf, 0xd5, 0x02, 0xf0, 0xbf, + 0x61, 0xc1, 0x98, 0xbf, 0x9b, 0x91, 0x2a, 0xbf, 0x0a, 0x53, 0x91, 0xbf, 0xb7, 0xa7, 0x92, 0xbf, + 0xec, 0xef, 0xcc, 0xbf, 0x0a, 0xc4, 0xe2, 0xbc, 0x9a, 0x53, 0xf3, 0xbf, 0x23, 0x40, 0x78, 0x3e, + 0x82, 0xd0, 0xb4, 0xbf, 0x46, 0x67, 0x34, 0xbf, 0x09, 0x1b, 0x01, 0xc0, 0x80, 0xd1, 0x7c, 0xbf, + 0xda, 0xb8, 0xf8, 0x3e, 0xf7, 0x47, 0x82, 0xbf, 0x4a, 0x18, 0xfb, 0xbf, 0xdf, 0x5d, 0xb2, 0xbf, + 0x28, 0x26, 0x0e, 0xc0, 0xf1, 0xe3, 0x14, 0x3f, 0x76, 0x67, 0x08, 0xbf, 0x58, 0xe1, 0xa4, 0xbf, + 0x5e, 0xb7, 0xaa, 0xbf, 0x57, 0x25, 0x04, 0xbf, 0x40, 0x1e, 0x3d, 0xbd, 0x8b, 0xc2, 0xb3, 0x3f, + 0xb8, 0xfe, 0xd0, 0xbf, 0xd2, 0x63, 0x08, 0xc0, 0x80, 0x04, 0x55, 0xbf, 0xc3, 0xd4, 0x4c, 0xbe, + 0xd8, 0xad, 0x01, 0xbf, 0xc6, 0x12, 0x55, 0xbe, 0x7d, 0x09, 0x8a, 0xbe, 0x06, 0x76, 0xc7, 0xbf, + 0x13, 0x01, 0xd8, 0xbf, 0xb9, 0x87, 0x5e, 0x3e, 0xe4, 0x40, 0xbb, 0xbf, 0xbc, 0x30, 0x8e, 0xbf, + 0xd9, 0x10, 0xa2, 0xbf, 0x81, 0x62, 0xd8, 0xbe, 0x5e, 0x9c, 0xe1, 0xbf, 0x44, 0x30, 0x96, 0xbf, + 0x3e, 0xc2, 0xb7, 0xbf, 0xe2, 0x28, 0x5f, 0xbf, 0xb2, 0xbb, 0x9c, 0x3e, 0xc5, 0x4f, 0x9f, 0xbf, + 0xaa, 0xc7, 0x23, 0xbf, 0x95, 0x96, 0xfd, 0xbe, 0x2e, 0x5d, 0x0e, 0xbf, 0x12, 0xca, 0xf2, 0xbf, + 0xc8, 0x12, 0x94, 0xbf, 0x96, 0x56, 0x9d, 0xbd, 0xef, 0xd3, 0x86, 0x3d, 0x3a, 0x13, 0x1f, 0xc0, + 0x1a, 0xe5, 0x0b, 0x3f, 0x90, 0xc0, 0x15, 0xc0, 0xf2, 0x1b, 0x90, 0x3f, 0x21, 0x6b, 0x35, 0xc0, + 0x70, 0xf2, 0xe7, 0xbf, 0x04, 0xda, 0x3b, 0xc0, 0x55, 0x44, 0xe2, 0xbe, 0xf4, 0xa9, 0x10, 0xc0, + 0x4e, 0x55, 0xf8, 0xbf, 0x1e, 0x95, 0x54, 0xbf, 0xef, 0xbf, 0xa9, 0xbf, 0x4d, 0xc4, 0x45, 0x3f, + 0x7b, 0xd9, 0x04, 0x3e, 0x19, 0xd2, 0xc9, 0xbf, 0x30, 0x30, 0x01, 0x3f, 0x04, 0xb2, 0x25, 0xbf, + 0x30, 0xc8, 0x94, 0xbf, 0x0e, 0xf7, 0x1c, 0x3f, 0xe0, 0x95, 0x0d, 0xc0, 0x83, 0xdf, 0xd3, 0x3e, + 0xe1, 0xe3, 0x7a, 0xbf, 0x8d, 0xce, 0x10, 0xc0, 0x40, 0x3c, 0xb4, 0xbf, 0xfa, 0x36, 0x57, 0xbf, + 0x73, 0x88, 0x8d, 0xbc, 0x8a, 0x75, 0xd3, 0xbf, 0x21, 0x9f, 0x92, 0xbf, 0xcb, 0x35, 0x52, 0xbd, + 0x97, 0x19, 0x53, 0x3f, 0x50, 0xa3, 0x56, 0x3f, 0x7a, 0x51, 0x23, 0xc0, 0xaf, 0xde, 0x46, 0xc0, + 0xef, 0x07, 0xa5, 0xbf, 0xd7, 0xb9, 0x8b, 0xbf, 0x04, 0xec, 0x92, 0xbf, 0xc2, 0xd3, 0x85, 0xbf, + 0x63, 0xa4, 0x0a, 0xc0, 0x54, 0x8f, 0x92, 0xbf, 0x6c, 0xa1, 0x94, 0xbf, 0x8d, 0x64, 0x39, 0xbc, + 0x9a, 0xa6, 0xa4, 0xbb, 0x10, 0x6d, 0xbc, 0xbf, 0xfd, 0x28, 0xe3, 0xbf, 0xa6, 0x2f, 0x71, 0x3b, + 0x9f, 0x62, 0xf3, 0xbf, 0xf5, 0xa3, 0xf3, 0xbf, 0x3a, 0x2a, 0x8b, 0xbf, 0x81, 0x6d, 0x5d, 0xbe, + 0x9b, 0xfe, 0x09, 0xbf, 0x47, 0x6e, 0x8d, 0xbf, 0xe3, 0x13, 0x44, 0xc0, 0xb1, 0xc6, 0x96, 0xbe, + 0x08, 0xf2, 0x1d, 0xc0, 0xe4, 0x44, 0xce, 0xbf, 0x65, 0xb4, 0x5d, 0xbf, 0xfa, 0xb2, 0xd7, 0xbf, + 0xff, 0x6e, 0x4b, 0x3e, 0xea, 0x49, 0xa3, 0xbf, 0x78, 0x89, 0x53, 0xc0, 0x07, 0xa4, 0x57, 0xbf, + 0x14, 0x24, 0x03, 0xc0, 0x08, 0x87, 0x12, 0xc0, 0x54, 0xa0, 0x6e, 0x3f, 0xa7, 0xe3, 0xcb, 0xbf, + 0xfa, 0x93, 0xd1, 0xbf, 0xd9, 0xb2, 0x51, 0x3e, 0xac, 0x56, 0xbe, 0xbf, 0xec, 0x2f, 0x1a, 0xc0, + 0x98, 0x13, 0xb5, 0xbf, 0x8b, 0xef, 0xde, 0x3d, 0xfa, 0xb3, 0xd5, 0x3e, 0x8e, 0x80, 0xb3, 0xbf, + 0x8a, 0xbd, 0x8e, 0x3f, 0xa8, 0xaf, 0x0f, 0xbf, 0x03, 0xcb, 0xd3, 0xbf, 0x6b, 0xd1, 0x3b, 0x3d, + 0xd5, 0xa4, 0xe7, 0xbf, 0xcf, 0x32, 0x05, 0xc0, 0xb4, 0xfb, 0xa8, 0xbf, 0x23, 0x3c, 0xe7, 0xbf, + 0xbd, 0x26, 0xae, 0xbf, 0x8f, 0x46, 0x26, 0xc0, 0xd7, 0xa7, 0x04, 0xbf, 0xbe, 0x58, 0xb6, 0xbe, + 0x14, 0xcf, 0xcf, 0xbd, 0x6a, 0x8c, 0x0f, 0x3f, 0x04, 0xf9, 0x8b, 0xbf, 0xfd, 0x17, 0xd4, 0xbe, + 0xf0, 0x2b, 0x19, 0xc0, 0xe3, 0x52, 0x9e, 0xbf, 0x94, 0x91, 0x8e, 0xbf, 0xa2, 0xe4, 0xe5, 0x3e, + 0xaa, 0x12, 0x54, 0xbe, 0x33, 0x88, 0x9f, 0xbf, 0xf4, 0x20, 0x41, 0xc0, 0x8c, 0xca, 0xb8, 0x3e, + 0x12, 0xa2, 0x28, 0xc0, 0x51, 0x57, 0x2c, 0xc0, 0x41, 0x1d, 0x83, 0xc0, 0xaa, 0xa7, 0x64, 0xbf, + 0x0f, 0x96, 0x4e, 0xbf, 0xff, 0x71, 0x30, 0xbf, 0x3c, 0x63, 0xcd, 0x3e, 0x59, 0x7f, 0x8f, 0xbf, + 0x0f, 0x5a, 0x14, 0xbe, 0x59, 0x08, 0xdf, 0xbf, 0xaa, 0x02, 0xc4, 0xbf, 0xcc, 0x97, 0x5f, 0xbf, + 0x8c, 0xad, 0xc6, 0xbf, 0xde, 0x32, 0xca, 0xbf, 0xbb, 0xfe, 0xb7, 0xbf, 0xa5, 0xad, 0xa9, 0x3f, + 0x41, 0xbe, 0x82, 0xbe, 0x39, 0x0f, 0xbb, 0xbf, 0x29, 0xf2, 0x06, 0xc0, 0x19, 0x78, 0x92, 0xbf, + 0x1c, 0xb8, 0x93, 0xbf, 0x86, 0xb7, 0xbb, 0xbf, 0x8a, 0xb8, 0x1a, 0xc0, 0x8d, 0x98, 0xc3, 0xbf, + 0xa4, 0x2e, 0x21, 0xc0, 0x59, 0xe7, 0x12, 0xc0, 0x3f, 0x34, 0x5d, 0xbf, 0xa3, 0x5e, 0x44, 0xbf, + 0x52, 0xc8, 0xd8, 0xbf, 0x99, 0xfb, 0xc4, 0xbf, 0x8c, 0x14, 0x27, 0xbf, 0xe1, 0x3c, 0xd3, 0xbe, + 0xd2, 0x42, 0x19, 0x3f, 0xbd, 0xbc, 0x23, 0xc0, 0x16, 0x96, 0x9b, 0xbe, 0x01, 0x87, 0xd3, 0xbf, + 0x96, 0x66, 0xdd, 0xbf, 0xa6, 0x6c, 0x4c, 0xbf, 0xae, 0xfa, 0x93, 0xbf, 0xc4, 0xf3, 0x3d, 0xc0, + 0xef, 0x1e, 0xd5, 0xbd, 0x8b, 0xa4, 0x93, 0xbf, 0xe9, 0x2b, 0x24, 0x3e, 0x01, 0xdf, 0x8b, 0xbf, + 0xba, 0x83, 0x26, 0xc0, 0x6a, 0xe1, 0xa7, 0xbf, 0x55, 0x67, 0x05, 0xbe, 0x68, 0x86, 0x49, 0xbf, + 0x22, 0xa7, 0xca, 0xbf, 0x6b, 0x65, 0x10, 0xc0, 0xf5, 0xf4, 0x2d, 0xc0, 0x45, 0x45, 0xd2, 0xbe, + 0x09, 0x2f, 0xb3, 0x3e, 0x79, 0x90, 0xb2, 0x3e, 0xf7, 0x51, 0x41, 0xbe, 0xd0, 0x3d, 0xe2, 0xbf, + 0x91, 0xfa, 0x21, 0xbc, 0xd3, 0x0a, 0xd7, 0x3b, 0x19, 0x21, 0xcd, 0xbf, 0x40, 0x3c, 0x49, 0xbf, + 0x3a, 0x5a, 0xba, 0xbf, 0x84, 0x81, 0xf4, 0x3e, 0x71, 0x14, 0x17, 0xbf, 0xeb, 0x74, 0x23, 0xbf, + 0x92, 0xf4, 0x86, 0xbf, 0xb6, 0x84, 0xcf, 0xbf, 0xff, 0x58, 0x97, 0x3d, 0x99, 0x63, 0x63, 0xc0, + 0x86, 0x54, 0x45, 0x3e, 0xea, 0xae, 0x64, 0xbf, 0xd7, 0xae, 0x26, 0xc0, 0x16, 0x93, 0xc8, 0xbf, + 0x7b, 0x27, 0x52, 0xbf, 0x27, 0x2d, 0x07, 0xc0, 0x2f, 0xd5, 0x9d, 0x3e, 0xa0, 0xf2, 0x5a, 0xc0, + 0xb6, 0x82, 0x9a, 0xba, 0xe0, 0x8a, 0xc5, 0xbf, 0x03, 0x7d, 0x1a, 0xc0, 0xfa, 0x4c, 0x2d, 0xbe, + 0x02, 0x38, 0x84, 0x3e, 0x7d, 0xae, 0x2c, 0xbf, 0xd9, 0x1d, 0xaf, 0x3e, 0x75, 0x05, 0xa6, 0x3f, + 0x49, 0x99, 0xdb, 0xbf, 0x2a, 0x22, 0xee, 0xbf, 0x07, 0x4f, 0x34, 0xbf, 0x3e, 0x58, 0xa0, 0xbf, + 0xda, 0xe0, 0x8c, 0xbf, 0x34, 0xda, 0x00, 0xc0, 0xf6, 0x3d, 0x49, 0xbd, 0x63, 0x96, 0xaf, 0xbf, + 0x84, 0xf6, 0x18, 0xbe, 0x2c, 0x82, 0xfb, 0xbf, 0xaa, 0x87, 0x13, 0xbf, 0xd6, 0x99, 0x27, 0xbf, + 0x9a, 0xaf, 0x86, 0xbf, 0xc5, 0xeb, 0xbe, 0xbe, 0xdb, 0x49, 0xdf, 0xbf, 0x4c, 0x87, 0x81, 0x3e, + 0xf1, 0xa5, 0xa7, 0xbf, 0xe2, 0x0d, 0xed, 0xbf, 0x6f, 0x49, 0x37, 0xbf, 0xaf, 0x02, 0xe4, 0xbe, + 0xf7, 0xd8, 0xed, 0xbf, 0x53, 0x68, 0xa1, 0xbd, 0x59, 0x57, 0xeb, 0xbf, 0xb2, 0x22, 0xbf, 0xbf, + 0xd9, 0x07, 0x9d, 0xbf, 0x8b, 0xff, 0x0e, 0xc0, 0xc7, 0x1a, 0xc6, 0xbf, 0x1c, 0x6e, 0xe1, 0xbf, + 0xb4, 0x7c, 0xdd, 0xbd, 0xcf, 0xc4, 0x03, 0xbf, 0x0c, 0xc2, 0x3a, 0xc0, 0x51, 0xc1, 0x20, 0xbf, + 0x0e, 0x44, 0xde, 0xbf, 0x3b, 0x91, 0x81, 0xbf, 0x56, 0xb5, 0x8c, 0xbf, 0x73, 0xa8, 0x92, 0xbf, + 0x9c, 0xa3, 0x56, 0xbf, 0xa2, 0xb7, 0x2f, 0xbf, 0x65, 0xcc, 0x0e, 0xbf, 0xcf, 0xb8, 0x92, 0x3e, + 0xf7, 0x17, 0xd1, 0xbf, 0xcc, 0x61, 0xca, 0xbf, 0xad, 0x31, 0xc9, 0xbf, 0xaf, 0x0d, 0xbb, 0xbf, + 0xbc, 0x98, 0x34, 0xbf, 0x04, 0x4b, 0x1e, 0xbf, 0xa2, 0x16, 0x2d, 0x3c, 0xd4, 0x20, 0x47, 0xc0, + 0x4e, 0x01, 0x11, 0xc0, 0xed, 0xa9, 0x11, 0xbf, 0xa9, 0x18, 0xf2, 0x3e, 0xa3, 0x58, 0xb4, 0xbe, + 0xbe, 0xc5, 0x59, 0xbf, 0xff, 0xbb, 0xb4, 0xbf, 0x1d, 0xf9, 0x04, 0xc0, 0xd5, 0x42, 0x0b, 0xc0, + 0x3a, 0x43, 0xa9, 0xbf, 0xaa, 0x94, 0xe1, 0xbf, 0x3e, 0x8b, 0x0b, 0xc0, 0x34, 0xf6, 0xc6, 0xbe, + 0xc0, 0xa0, 0x40, 0xc0, 0x1b, 0x07, 0x2b, 0xc0, 0x0a, 0xba, 0x3e, 0xc0, 0x57, 0x80, 0x76, 0xbf, + 0x84, 0xa3, 0x7d, 0xbe, 0x71, 0xd3, 0xcb, 0xbf, 0xc6, 0xfa, 0xc2, 0xbe, 0x4c, 0xcf, 0x39, 0xbf, + 0x2e, 0x7c, 0xdf, 0xbe, 0xb7, 0xe1, 0x93, 0xbf, 0xdb, 0xd3, 0x96, 0xbf, 0x6b, 0x2a, 0x1c, 0x3e, + 0x0f, 0x44, 0x10, 0xc0, 0xb2, 0xb3, 0x23, 0xbf, 0x7c, 0x06, 0x3e, 0xbf, 0x4e, 0xc6, 0xa3, 0xbf, + 0x43, 0x31, 0x42, 0xbe, 0xa8, 0xda, 0x67, 0x3f, 0x5d, 0x94, 0x22, 0xbf, 0xf1, 0x7a, 0xb2, 0xbf, + 0x90, 0xf3, 0xbb, 0xbf, 0x27, 0x34, 0xb0, 0xbf, 0x3c, 0x74, 0x6d, 0xbf, 0x6b, 0xa4, 0xd7, 0xbf, + 0x50, 0x33, 0xb7, 0xbc, 0x0f, 0xc4, 0x60, 0xbf, 0x29, 0x6b, 0x88, 0xbe, 0x20, 0x12, 0xc1, 0xbf, + 0x69, 0x84, 0x8b, 0x3e, 0x8f, 0xfb, 0x2a, 0xc0, 0xeb, 0xd2, 0x25, 0xc0, 0x27, 0x62, 0x36, 0xbf, + 0xb3, 0x74, 0x94, 0xbf, 0x81, 0xee, 0x1a, 0xbf, 0xdb, 0x96, 0x02, 0xc0, 0x82, 0xf5, 0x5f, 0xbf, + 0x25, 0x95, 0x29, 0xc0, 0xd0, 0xe4, 0xa8, 0xbf, 0x50, 0x63, 0xbc, 0x3e, 0x60, 0xb3, 0x8d, 0xbf, + 0x2e, 0xbb, 0xbe, 0xbf, 0xeb, 0xe2, 0xa1, 0xbf, 0xc0, 0xb5, 0x25, 0x3c, 0x16, 0x57, 0xd3, 0x3e, + 0xe8, 0xb6, 0xf1, 0xbf, 0x6b, 0x74, 0xc1, 0xbf, 0xe7, 0x16, 0x10, 0xbf, 0xef, 0x82, 0x1d, 0xbf, + 0xc1, 0x5e, 0xf7, 0xbf, 0x17, 0xbb, 0x08, 0xc0, 0x66, 0x7d, 0xda, 0xbe, 0x2d, 0x97, 0x9f, 0xbf, + 0x7f, 0xf9, 0x5f, 0xbf, 0x86, 0x88, 0x65, 0xc0, 0x63, 0xd5, 0x2b, 0xbf, 0xb8, 0xd1, 0xaa, 0xbf, + 0x6b, 0xbe, 0x8e, 0xbf, 0x95, 0xfc, 0x84, 0x3e, 0x70, 0x0c, 0xa1, 0xbf, 0x59, 0x62, 0xa3, 0xbf, + 0xcd, 0x9e, 0x1e, 0xbf, 0x71, 0x6c, 0x28, 0xbf, 0xec, 0xb8, 0xd2, 0xbe, 0xe6, 0x30, 0xef, 0xbf, + 0xcf, 0x9b, 0x57, 0xbf, 0x15, 0x18, 0xaa, 0xbf, 0x90, 0xd7, 0xd8, 0xbf, 0xd2, 0x28, 0xef, 0xbe, + 0x91, 0x31, 0x9d, 0xbf, 0xea, 0x35, 0x1c, 0xbf, 0x8c, 0xa0, 0x4c, 0x3d, 0xee, 0xf7, 0x20, 0xbe, + 0x07, 0xd2, 0x2c, 0x3f, 0xc7, 0x8f, 0x17, 0x3f, 0x18, 0xea, 0xda, 0xbf, 0x28, 0x9b, 0x1b, 0xbf, + 0x39, 0xcf, 0x41, 0x3f, 0x96, 0x66, 0xc4, 0xbf, 0x95, 0xdb, 0x91, 0xbf, 0x83, 0x6c, 0x74, 0xbf, + 0xd1, 0x38, 0x68, 0xbf, 0xd2, 0xd4, 0x2a, 0x3d, 0x2a, 0x0c, 0x6b, 0x3d, 0x69, 0xfe, 0xdd, 0xbf, + 0xf8, 0x25, 0x0d, 0xc0, 0x61, 0xaa, 0xb7, 0xbf, 0x61, 0x87, 0xb1, 0xbe, 0x1a, 0x79, 0xcc, 0xbf, + 0xac, 0x3b, 0xf6, 0xbf, 0x17, 0x7c, 0xd2, 0xbf, 0xf9, 0x7c, 0x1e, 0xbf, 0x4b, 0xca, 0xd7, 0x3e, + 0x6e, 0x72, 0xa9, 0xbf, 0xde, 0xe2, 0x09, 0x3f, 0xb3, 0xcc, 0xd6, 0xbf, 0x02, 0x24, 0xdd, 0xbf, + 0x79, 0x64, 0x57, 0xbf, 0x39, 0xbf, 0x8b, 0x3f, 0xca, 0x75, 0xbb, 0x3c, 0x09, 0xda, 0x27, 0xc0, + 0xe7, 0xb1, 0x8c, 0xbf, 0x9b, 0x21, 0xa8, 0xbf, 0x58, 0x58, 0xc4, 0xbf, 0x3c, 0x43, 0x3f, 0x3e, + 0xda, 0x43, 0xa6, 0xbf, 0x57, 0xd1, 0x09, 0x3f, 0xd6, 0xe7, 0xd6, 0xbf, 0xd4, 0xef, 0x87, 0xbf, + 0x2b, 0x2f, 0xb0, 0xbd, 0xad, 0x5a, 0x70, 0xbf, 0x45, 0x43, 0x44, 0xbf, 0xe3, 0x54, 0x7c, 0xbe, + 0x09, 0x21, 0x87, 0xbf, 0x07, 0xaa, 0x8f, 0xbe, 0x5c, 0xc8, 0x07, 0xc0, 0x3f, 0x72, 0x85, 0xbf, + 0xfe, 0xf9, 0x35, 0xbf, 0x42, 0x36, 0xb2, 0xbf, 0xf3, 0x30, 0x0a, 0xc0, 0x8e, 0xfb, 0x24, 0xc0}; +unsigned char conv2d_winograd_fp32_ker[] = { + 0xd1, 0x12, 0xc5, 0xbf, 0x66, 0x9c, 0x03, 0xc0, 0xc3, 0x13, 0xc7, 0xbf, 0x17, 0xc4, 0x7b, 0xbf, + 0x3c, 0xae, 0xf5, 0xbf, 0xb4, 0x49, 0x25, 0xc0, 0xe7, 0x63, 0x0a, 0xc0, 0xa8, 0x35, 0xa5, 0xbf, + 0xbc, 0x40, 0x1d, 0xc0, 0x18, 0xa1, 0x9a, 0xbf, 0x59, 0x60, 0x4c, 0xbf, 0xbb, 0x2c, 0xe7, 0xbf, + 0x33, 0xe3, 0x3c, 0xc0, 0xdb, 0xfa, 0x93, 0xbf, 0x66, 0xab, 0x88, 0xc0, 0xce, 0xe3, 0x32, 0xc0, + 0x7e, 0x14, 0x3c, 0xbf, 0xa5, 0x58, 0x2a, 0xc0, 0xb3, 0x48, 0x23, 0xc0, 0x68, 0x52, 0x14, 0xc0, + 0xae, 0xa7, 0x4d, 0xc0, 0x29, 0x1b, 0xf6, 0xbf, 0xa3, 0xfe, 0x0f, 0xc0, 0xd2, 0xf9, 0xc9, 0xbe, + 0x9f, 0xac, 0x8b, 0xbf, 0xd3, 0x2f, 0x8c, 0xbf, 0x46, 0xd7, 0xe1, 0xbf, 0xec, 0x61, 0x48, 0xc0, + 0x73, 0x02, 0x1d, 0xc0, 0xcb, 0x7e, 0x30, 0xc0, 0xda, 0x50, 0xe0, 0xbf, 0xb8, 0x3f, 0x40, 0xbf, + 0x26, 0x74, 0x1d, 0xc0, 0x81, 0x63, 0x90, 0xc0, 0xa4, 0x32, 0x91, 0xbf, 0x0a, 0x12, 0x62, 0xc0, + 0x63, 0x1a, 0x19, 0xc0, 0xca, 0x35, 0x1a, 0xc0, 0x38, 0xc4, 0x20, 0xbf, 0x37, 0xa1, 0x63, 0xc0, + 0x2e, 0xa0, 0x4c, 0xc0, 0x52, 0x2f, 0x32, 0xc0, 0xfe, 0x8b, 0xdb, 0xbf, 0x84, 0x23, 0xaf, 0xbf, + 0x1d, 0x2b, 0xc5, 0xbf, 0xa9, 0x2d, 0x12, 0xc0, 0x04, 0x8c, 0xfa, 0xbf, 0xab, 0x17, 0x33, 0xc0, + 0x69, 0x7a, 0x29, 0xc0, 0xef, 0x06, 0x8d, 0xc0, 0x26, 0xb7, 0x6a, 0xc0, 0x58, 0x7a, 0xb3, 0xbf, + 0x7d, 0x57, 0x45, 0xc0, 0xd6, 0xdb, 0x4c, 0xc0, 0x5d, 0x46, 0x28, 0xbf, 0xaa, 0x37, 0xcb, 0xbf, + 0xdf, 0x07, 0x1c, 0xc0, 0x1a, 0x30, 0x94, 0xbf, 0x5a, 0x0c, 0x1d, 0xc0, 0x2e, 0x86, 0xf7, 0xbf, + 0x64, 0x2c, 0x7a, 0xc0, 0x2b, 0x33, 0xc2, 0xbf, 0x0b, 0x55, 0x3e, 0xc0, 0x5b, 0x0e, 0x7d, 0xbf, + 0x57, 0x9f, 0x01, 0xbe, 0x8c, 0x7b, 0x6c, 0xc0, 0x24, 0xc1, 0x13, 0xc0, 0xba, 0xaf, 0x5e, 0xbf, + 0xab, 0xed, 0x31, 0xc0, 0x18, 0xae, 0xe3, 0xbf, 0x09, 0xb0, 0x00, 0xc0, 0x0c, 0xd3, 0x41, 0xc0, + 0x07, 0x12, 0xea, 0xbf, 0x50, 0xff, 0xfd, 0xbf, 0x25, 0xed, 0x06, 0xc0, 0x7b, 0xd6, 0x81, 0xbf, + 0xef, 0xff, 0xb0, 0xbf, 0x0c, 0x48, 0xce, 0xbf, 0x35, 0x75, 0x15, 0xbf, 0x9f, 0xa2, 0x57, 0xc0, + 0x8e, 0xac, 0xfd, 0xbf, 0x5a, 0xf2, 0xab, 0xbf, 0xfd, 0xcb, 0x05, 0xc0, 0xd4, 0xcd, 0xa1, 0xbf, + 0xb2, 0x93, 0xdf, 0xbf, 0x25, 0x68, 0x32, 0xc0, 0x4d, 0x65, 0x2a, 0xc0, 0xdd, 0xd4, 0x8f, 0xbf, + 0x62, 0xb2, 0x06, 0xc0, 0x96, 0x0a, 0x27, 0xc0, 0x1e, 0x78, 0xf7, 0xbd, 0xd3, 0x57, 0xd7, 0xbf, + 0xb9, 0xc5, 0x3b, 0xc0, 0xc8, 0x8c, 0x94, 0xbf, 0xf8, 0x6c, 0xc6, 0xbf, 0x8d, 0xbb, 0x39, 0xc0, + 0x10, 0x02, 0x61, 0xbf, 0xdf, 0x81, 0x01, 0xc0, 0xa0, 0x34, 0x06, 0xc0, 0xc5, 0xbb, 0xb5, 0xbf, + 0x39, 0x89, 0x0c, 0xc0, 0x29, 0xdd, 0xe9, 0xbf, 0x11, 0x37, 0x32, 0xbf, 0x82, 0xb6, 0x3a, 0xc0, + 0xaa, 0xab, 0x12, 0xc0, 0x6a, 0xa4, 0xef, 0xbf, 0x8a, 0x91, 0x43, 0xc0, 0x41, 0x7a, 0xd0, 0xbf, + 0x2e, 0xe9, 0x07, 0xc0, 0xa1, 0x5e, 0xbb, 0xbf, 0x9d, 0x4a, 0xcf, 0xbf, 0x2d, 0xfb, 0x40, 0xc0, + 0x54, 0x21, 0xd4, 0xbf, 0xb1, 0xa6, 0x28, 0xbf, 0x3c, 0x40, 0xc6, 0xbd, 0xed, 0x8b, 0x08, 0xc0, + 0x90, 0xca, 0x3b, 0xc0, 0x1f, 0xf1, 0xbb, 0xbf, 0x66, 0xe0, 0xcf, 0xbf, 0x24, 0xd9, 0x74, 0xc0, + 0x88, 0xbe, 0x6c, 0xc0, 0x35, 0x92, 0x89, 0xbf, 0x8d, 0xae, 0xb7, 0xbf, 0x42, 0x4f, 0x3d, 0xc0, + 0xbc, 0x10, 0x30, 0xc0, 0x6d, 0x15, 0x0c, 0xc0, 0x5f, 0x9a, 0x10, 0xc0, 0x86, 0xa9, 0x33, 0xc0, + 0xc7, 0x8d, 0xae, 0xbf, 0xed, 0xa3, 0xce, 0xbf, 0x59, 0xf1, 0xd0, 0xbf, 0x9b, 0xf1, 0xbe, 0xbf, + 0x4d, 0x62, 0xe8, 0xbf, 0x64, 0x45, 0x3f, 0xc0, 0xa6, 0x45, 0x45, 0xc0, 0x26, 0x87, 0x70, 0xbf, + 0x69, 0xe8, 0xe5, 0xbf, 0x35, 0xf5, 0x26, 0xc0, 0xa1, 0xc5, 0x56, 0xbf, 0x56, 0x30, 0x32, 0xc0, + 0x63, 0xa3, 0x38, 0xc0, 0x25, 0xdc, 0x5d, 0xc0, 0xae, 0x18, 0x07, 0xc0, 0xe6, 0x42, 0xdc, 0xbf, + 0xae, 0x60, 0xc9, 0xbf, 0x50, 0x90, 0xa9, 0xbf, 0xf7, 0xda, 0xcb, 0xbf, 0xc9, 0x2f, 0x4c, 0xc0, + 0x07, 0x51, 0xb3, 0xbd, 0x97, 0xfb, 0xd7, 0xbf, 0xe4, 0x22, 0x4a, 0xbf, 0xfb, 0x77, 0x27, 0xc0, + 0xee, 0x24, 0x31, 0xc0, 0x34, 0x42, 0xf5, 0xbf, 0xff, 0x2a, 0xa0, 0xbf, 0x7b, 0xee, 0x47, 0xc0, + 0xb8, 0xac, 0xc0, 0xbf, 0x47, 0xba, 0x99, 0xbf, 0xf4, 0x7d, 0x18, 0xc0, 0x11, 0xb1, 0x47, 0xc0, + 0xa3, 0x73, 0x75, 0xc0, 0xb7, 0x88, 0xc3, 0xbf, 0x5f, 0x30, 0x29, 0xc0, 0x03, 0xdd, 0x01, 0xc0, + 0x38, 0x18, 0x24, 0xc0, 0x42, 0x1b, 0x51, 0xc0, 0x68, 0x8d, 0xff, 0xbf, 0xd2, 0xfb, 0x27, 0xbf, + 0xfd, 0x30, 0x9f, 0xbe, 0x0b, 0x43, 0x18, 0xbf, 0x79, 0x8b, 0xd2, 0xbf, 0x55, 0x22, 0xfc, 0xbf, + 0x22, 0xc3, 0xa6, 0xbf, 0x88, 0x8b, 0x44, 0xc0, 0x4e, 0x84, 0x30, 0xc0, 0xb0, 0xf0, 0x38, 0xc0, + 0x38, 0xf4, 0x63, 0x3e, 0xd2, 0x48, 0xe5, 0xbf, 0x82, 0x75, 0x09, 0xc0, 0xe9, 0x6a, 0xf4, 0xbf, + 0x92, 0xf7, 0x4a, 0xbf, 0x07, 0x83, 0x57, 0xbf, 0x25, 0xf1, 0x09, 0xc0, 0x98, 0xd2, 0x77, 0x3e, + 0x45, 0xa7, 0x67, 0xc0, 0x2e, 0xed, 0x0e, 0xc0, 0x07, 0xa4, 0x8c, 0xbe, 0x7d, 0x0a, 0xa3, 0xbf, + 0x66, 0x26, 0x2a, 0xc0, 0x7e, 0x8b, 0xdb, 0xbf, 0xb9, 0x1d, 0x39, 0xbf, 0x35, 0x86, 0x09, 0xc0, + 0x7e, 0xd8, 0xa2, 0x3e, 0x40, 0xc5, 0x97, 0xbf, 0x4c, 0x39, 0x26, 0xc0, 0x9c, 0x8b, 0xbc, 0xbf, + 0x33, 0xf5, 0x8a, 0xbf, 0x49, 0x27, 0x29, 0xc0, 0x87, 0x98, 0xba, 0xbf, 0x0a, 0x73, 0x5d, 0xbf, + 0x74, 0x4c, 0x20, 0xc0, 0x35, 0x16, 0x75, 0xbf, 0xfe, 0xac, 0x19, 0xc0, 0x2e, 0x66, 0x0f, 0xc0, + 0xe5, 0x29, 0x44, 0xbe, 0x28, 0x3d, 0x0c, 0xc0, 0x0b, 0x54, 0x31, 0xc0, 0x06, 0x95, 0xdc, 0xbf, + 0x47, 0xf5, 0x41, 0xc0, 0xac, 0xfa, 0x15, 0xc0, 0xf1, 0x05, 0x6e, 0xc0, 0xe1, 0x4e, 0x9a, 0xbf, + 0x01, 0xe6, 0xac, 0xbf, 0xa3, 0x29, 0x3b, 0xc0, 0x7d, 0xda, 0x49, 0xc0, 0x58, 0xd2, 0x2d, 0xc0, + 0x51, 0xc9, 0x83, 0xc0, 0x8e, 0x2e, 0x61, 0xc0, 0xe2, 0x45, 0xe8, 0xbf, 0x09, 0x05, 0x2a, 0xc0, + 0xac, 0x5c, 0xaa, 0xbf, 0x8b, 0xed, 0x6e, 0xc0, 0x23, 0xa2, 0x03, 0xc0, 0xb9, 0x25, 0x66, 0xbf, + 0xd6, 0x58, 0xda, 0xbf, 0x6c, 0xc2, 0x6c, 0xc0, 0x63, 0x6c, 0x93, 0xbf, 0x89, 0xa4, 0xe9, 0xbf, + 0x35, 0xc3, 0xcc, 0xbe, 0xbc, 0x4f, 0x40, 0xc0, 0xcb, 0xc8, 0x77, 0xc0, 0xcb, 0x3f, 0x1a, 0x3d, + 0x70, 0xfe, 0x5d, 0xc0, 0xa8, 0x2f, 0xa2, 0xbf, 0xae, 0xbe, 0x53, 0xc0, 0x2f, 0x3e, 0xbb, 0xbf, + 0xe7, 0x1b, 0xd6, 0xbf, 0x84, 0x25, 0xfb, 0xbf, 0xe2, 0x32, 0x49, 0xc0, 0x74, 0x36, 0xce, 0xbb, + 0x4b, 0x7e, 0xab, 0xbe, 0x51, 0x3c, 0x4b, 0xc0, 0x31, 0xca, 0xbe, 0xbf, 0xfa, 0x50, 0xa9, 0xbe, + 0x84, 0x47, 0xa2, 0xbf, 0xd6, 0x22, 0xc3, 0xbf, 0xb0, 0x60, 0x6e, 0xc0, 0x7c, 0xba, 0x9c, 0xbf, + 0xf2, 0x93, 0x9d, 0xbf, 0xad, 0xb4, 0xe5, 0xbf, 0x4b, 0x63, 0xc8, 0xbf, 0x13, 0x8b, 0x1f, 0xc0, + 0xc7, 0x17, 0x8d, 0xbf, 0x9e, 0x95, 0x04, 0xc0, 0xd7, 0xfa, 0x0f, 0xc0, 0xb4, 0x93, 0x11, 0xc0, + 0x44, 0x4e, 0x52, 0xbf, 0x9d, 0x4c, 0x4a, 0xbf, 0x79, 0x5f, 0x08, 0xc0, 0x37, 0x0f, 0x23, 0xbf, + 0xff, 0x89, 0x82, 0x3f, 0x46, 0x23, 0x42, 0xc0, 0xe1, 0x51, 0x54, 0xc0, 0xc0, 0x10, 0xa2, 0xbf, + 0xcf, 0x4f, 0x15, 0xc0, 0x63, 0xe6, 0x01, 0xc0, 0x05, 0x19, 0x07, 0xc0, 0x07, 0x75, 0x34, 0xc0, + 0xa0, 0x0b, 0xcb, 0xbf, 0xa4, 0x56, 0x4c, 0xc0, 0x64, 0x3f, 0x1d, 0xc0, 0x39, 0xce, 0x06, 0xc0, + 0xa7, 0xc3, 0x38, 0xc0, 0x32, 0x37, 0x84, 0xbf, 0xaf, 0xb9, 0x5e, 0xc0, 0xbc, 0x56, 0xa8, 0xbf, + 0x6e, 0xb1, 0x86, 0xbf, 0xcd, 0x4f, 0x3b, 0xc0, 0x7b, 0x60, 0x79, 0xbf, 0x03, 0xee, 0x4b, 0xbf, + 0x2a, 0x74, 0xa5, 0xbf, 0x1a, 0xf4, 0x5f, 0xc0, 0xc7, 0xfd, 0x6b, 0xbd, 0x7c, 0xeb, 0x98, 0x3d, + 0xa6, 0x33, 0x08, 0xc0, 0x61, 0xd8, 0xc5, 0xbf, 0xd2, 0x11, 0x07, 0xc0, 0x0d, 0xed, 0x7e, 0xc0, + 0xee, 0x16, 0xac, 0xbf, 0x0c, 0x68, 0x4e, 0xc0, 0x5a, 0x21, 0xe0, 0xbf, 0x68, 0x60, 0x07, 0xc0, + 0xb4, 0x9d, 0x11, 0xc0, 0x0b, 0x1a, 0xd1, 0xbd, 0x77, 0xc6, 0x04, 0xc0, 0x89, 0x58, 0xe5, 0xbf, + 0x77, 0xa7, 0x1a, 0xc0, 0x7a, 0xc2, 0x53, 0xc0, 0xb6, 0xb3, 0xce, 0xbf, 0xe1, 0xeb, 0x0c, 0xc0, + 0x0e, 0x1c, 0xf9, 0xbf, 0x6c, 0xa8, 0xcf, 0xbf, 0x00, 0x20, 0x1f, 0xc0, 0x01, 0xfc, 0x95, 0xbf, + 0x31, 0x1e, 0x48, 0xbf, 0xfd, 0x12, 0xe6, 0xbf, 0x81, 0x24, 0x23, 0xc0, 0x06, 0xc1, 0x62, 0xc0, + 0xda, 0x1a, 0x02, 0xc0, 0x53, 0xab, 0x10, 0xbe, 0xcc, 0x2b, 0x66, 0xc0, 0x68, 0xdb, 0xb5, 0xbf, + 0x3a, 0x14, 0x16, 0xc0, 0xf2, 0x92, 0xba, 0xbf, 0x50, 0xd3, 0x20, 0xc0, 0xf4, 0x46, 0xee, 0xbf, + 0xf8, 0x4d, 0x00, 0xc0, 0xb7, 0x7d, 0xf0, 0xbe, 0xe2, 0x46, 0x40, 0xc0, 0x86, 0xc4, 0x4a, 0xc0, + 0x3a, 0x21, 0x35, 0xc0, 0x6b, 0x7b, 0xef, 0xbf, 0x04, 0xc5, 0x19, 0xc0, 0x0c, 0x46, 0xe5, 0xbf, + 0xe1, 0x5f, 0x00, 0xc0, 0xba, 0xf8, 0x32, 0xc0, 0xa8, 0x34, 0x52, 0xbf, 0x58, 0x47, 0x3b, 0xbf, + 0x9c, 0x56, 0xbe, 0xbf, 0x30, 0xbd, 0x34, 0xc0, 0xa3, 0x7f, 0xfa, 0xbf, 0x79, 0xa9, 0x6c, 0xbf, + 0x82, 0x54, 0x06, 0xc0, 0x78, 0x6d, 0xe6, 0xbf, 0x49, 0x73, 0x72, 0xbf, 0x94, 0x7c, 0x0e, 0xc0, + 0x8f, 0x46, 0xf4, 0xbf, 0xc6, 0xcb, 0x5c, 0xc0, 0x08, 0x31, 0x45, 0xc0, 0x0b, 0x8a, 0x37, 0xc0, + 0xc7, 0x4e, 0x1f, 0xc0, 0xac, 0xb9, 0x27, 0xc0, 0xc1, 0x09, 0x45, 0xc0, 0xe8, 0x93, 0x54, 0xc0, + 0x8f, 0x84, 0x28, 0xbf, 0x21, 0xb7, 0x83, 0xc0, 0x6b, 0xb1, 0xe1, 0xbf, 0x38, 0xb3, 0x00, 0xc0, + 0x24, 0x03, 0x1c, 0xc0, 0x4d, 0x70, 0xf8, 0xbf, 0xc9, 0xa4, 0x84, 0x3d, 0x7f, 0x79, 0x38, 0xc0, + 0xd2, 0x7f, 0xe0, 0xbf, 0xc4, 0x1b, 0xeb, 0xbf, 0x57, 0x33, 0xc4, 0xbf, 0x17, 0x9d, 0xa7, 0xbf, + 0x25, 0xe4, 0x6d, 0xc0, 0x18, 0x23, 0x31, 0xc0, 0x2b, 0x95, 0x02, 0xc0, 0x01, 0xdc, 0x79, 0xc0, + 0x7d, 0x67, 0xba, 0xbf, 0x60, 0x75, 0x3d, 0xc0, 0xf4, 0x00, 0x45, 0xc0, 0xcd, 0x38, 0x2f, 0xc0, + 0x75, 0xea, 0xea, 0xbf, 0x64, 0x68, 0xdc, 0xbf, 0x4d, 0x11, 0x95, 0xbf, 0x56, 0x45, 0x19, 0xc0, + 0xf6, 0x40, 0xab, 0xbf, 0x84, 0x5c, 0xd5, 0xbf, 0x19, 0x07, 0xa8, 0xbf, 0x75, 0xb9, 0x21, 0xc0, + 0x9b, 0x73, 0x2b, 0xbf, 0xeb, 0xfa, 0x91, 0xbf, 0x10, 0xaf, 0x38, 0xc0, 0xf2, 0xb6, 0x3f, 0xbf, + 0x5f, 0x50, 0x03, 0xc0, 0x2b, 0x48, 0x2d, 0xc0, 0x44, 0x9a, 0x35, 0xc0, 0x8c, 0x38, 0xf1, 0xbf, + 0x09, 0x4a, 0x9e, 0xbf, 0x91, 0xdc, 0x6f, 0xc0, 0x1e, 0x90, 0x40, 0xc0, 0xc3, 0xc5, 0x17, 0xc0, + 0xc3, 0x6c, 0x07, 0xc0, 0xcd, 0x5f, 0x25, 0xc0, 0x27, 0x5f, 0x49, 0xc0, 0x64, 0x84, 0x34, 0xc0, + 0x80, 0x0a, 0x4c, 0xc0, 0xa4, 0x6b, 0xa7, 0xbf, 0x8d, 0xbb, 0xae, 0x3e, 0x66, 0x92, 0x19, 0xc0, + 0x33, 0xde, 0x37, 0xc0, 0x7f, 0x82, 0x0e, 0xc0, 0xfa, 0xdc, 0x11, 0xc0, 0xb4, 0xc3, 0x16, 0xc0, + 0xf8, 0x3a, 0x14, 0xc0, 0xec, 0x34, 0x2b, 0xc0, 0xbf, 0xd4, 0x17, 0xc0, 0x5f, 0x24, 0x9d, 0xbf, + 0x3c, 0xfb, 0xd3, 0xbf, 0xd0, 0xda, 0xcd, 0xbf, 0xa0, 0x95, 0x0a, 0xc0, 0x3e, 0x09, 0x45, 0xc0, + 0x32, 0xae, 0x80, 0xbf, 0x3f, 0x25, 0x3e, 0xc0, 0xb3, 0xd6, 0xe4, 0xbc, 0x57, 0x62, 0xcd, 0xbf, + 0x86, 0x7f, 0xa9, 0xbf, 0x31, 0x9e, 0x38, 0xbf, 0xf2, 0x00, 0xc9, 0xbf, 0x8a, 0x7d, 0x61, 0xbf, + 0x35, 0x14, 0xed, 0xbf, 0x4d, 0x2f, 0x9d, 0xbf, 0x80, 0xa3, 0x9c, 0xbf, 0x20, 0x98, 0x3b, 0xc0, + 0x26, 0x92, 0x9b, 0xc0, 0x1a, 0xa7, 0x39, 0xc0, 0xe9, 0xfe, 0x9e, 0xbc, 0xda, 0x3f, 0xb6, 0x3e, + 0x3c, 0x5e, 0x06, 0xc0, 0x27, 0x91, 0xd3, 0xbf, 0x05, 0x47, 0x07, 0xc0, 0x6f, 0x66, 0x02, 0xc0, + 0x6a, 0xce, 0x0e, 0xbf, 0xdb, 0xe7, 0x28, 0xbf, 0x67, 0xc8, 0x24, 0xc0, 0x3f, 0xb3, 0x44, 0xc0, + 0x8f, 0x11, 0x08, 0x3f, 0x36, 0x91, 0x49, 0xc0, 0x17, 0x63, 0x1f, 0x3e, 0xf6, 0x49, 0x06, 0xc0, + 0x7b, 0x05, 0xd6, 0xbe, 0x9d, 0x59, 0x95, 0xbf, 0x4f, 0xe2, 0x2f, 0xc0, 0x4a, 0x4a, 0x08, 0xc0, + 0xff, 0xba, 0xfa, 0xbf, 0xa7, 0xa2, 0x31, 0xc0, 0x14, 0x06, 0xdc, 0xbf, 0x63, 0x7d, 0x49, 0xbf, + 0x1e, 0xfa, 0xee, 0xbf, 0xb5, 0xc6, 0xc2, 0xbf, 0x2f, 0x87, 0xff, 0xbf, 0xf2, 0x7a, 0x02, 0xc0, + 0x91, 0x54, 0x0a, 0xc0, 0x55, 0x4c, 0x10, 0xc0, 0x9a, 0xfa, 0xfe, 0xbf, 0x50, 0xab, 0x1c, 0xbe, + 0x84, 0x99, 0x48, 0xc0, 0xe6, 0xdc, 0xdc, 0xbf, 0x74, 0x7d, 0x51, 0xbf, 0x94, 0xb6, 0xae, 0xbf, + 0xde, 0xcc, 0x36, 0xc0, 0xcd, 0x78, 0xb9, 0xbf, 0x2d, 0x2c, 0x16, 0xc0, 0x78, 0x0a, 0x65, 0xc0, + 0x75, 0x27, 0x61, 0xc0, 0x1e, 0x51, 0x15, 0xc0, 0x02, 0x43, 0x9b, 0xbf, 0x20, 0xc1, 0xbb, 0xbf, + 0xe4, 0x17, 0xd2, 0xbe, 0x59, 0x71, 0x5d, 0xc0, 0x81, 0x69, 0x02, 0xc0, 0x48, 0xdd, 0x4d, 0xc0, + 0x8f, 0x06, 0xa8, 0xbf, 0x09, 0x5b, 0x01, 0xc0, 0x68, 0x7d, 0x9f, 0xbf, 0xaf, 0x99, 0x07, 0xbf, + 0xa8, 0x24, 0xfe, 0xbf, 0x9b, 0xfa, 0x13, 0xc0, 0x9c, 0x95, 0x55, 0xc0, 0x1c, 0x64, 0x11, 0xc0, + 0x5c, 0x3e, 0x46, 0xc0, 0x97, 0xc2, 0x37, 0xc0, 0x5a, 0x0c, 0xf9, 0xbf, 0x10, 0x36, 0x23, 0xbf, + 0xcb, 0xe2, 0x91, 0xbf, 0xdc, 0x09, 0xa7, 0xbf, 0xf9, 0x81, 0x05, 0xc0, 0xb8, 0xdc, 0x49, 0xc0, + 0x1d, 0x37, 0x75, 0xc0, 0x8d, 0xe3, 0x2e, 0xc0, 0x18, 0x78, 0xd4, 0xbf, 0x28, 0x87, 0x45, 0xc0, + 0xad, 0xf0, 0xcc, 0xbf, 0xaa, 0x68, 0xc1, 0xbf, 0x8f, 0x27, 0x9e, 0xbf, 0x5f, 0xc9, 0x1e, 0xc0, + 0x0c, 0x12, 0xeb, 0xbe, 0xe3, 0x58, 0xe7, 0xbf, 0x7d, 0xfb, 0xfd, 0xbf, 0x3a, 0xaa, 0x6d, 0x3f, + 0x53, 0x6f, 0x7c, 0xc0, 0xbb, 0x98, 0x80, 0xc0, 0x92, 0x05, 0x47, 0xc0, 0xa5, 0xeb, 0xdb, 0xbf, + 0x34, 0x63, 0xe0, 0xbf, 0xff, 0x18, 0x51, 0xbf, 0x09, 0x73, 0x02, 0xc0, 0x7e, 0x41, 0x8a, 0xbf, + 0x37, 0xb7, 0xd0, 0xbf, 0x7d, 0x4c, 0x93, 0xc0, 0xda, 0xa1, 0xe0, 0xbf, 0x11, 0xc1, 0x72, 0xbf, + 0x76, 0x7e, 0x75, 0xbe, 0x53, 0x31, 0xe7, 0xbe, 0x39, 0x98, 0x5f, 0xc0, 0xa4, 0x83, 0xec, 0xbf, + 0x8a, 0x47, 0x47, 0xc0, 0xfc, 0x3e, 0x38, 0xc0, 0xa1, 0x64, 0x3e, 0xc0, 0xce, 0xd7, 0xee, 0xbf, + 0x74, 0xf3, 0x86, 0xbf, 0xbd, 0x5b, 0x7a, 0x3e, 0x66, 0x62, 0x19, 0xc0, 0xfe, 0xc7, 0x30, 0xc0, + 0x2f, 0x0a, 0x22, 0xc0, 0xe8, 0x25, 0x0c, 0xc0, 0x25, 0x05, 0x2c, 0xc0, 0x2c, 0x01, 0x19, 0xc0, + 0x7e, 0xd5, 0x06, 0xc0, 0xb2, 0xa4, 0x07, 0xc0, 0xcd, 0x4c, 0xb7, 0xbe, 0x64, 0x1c, 0x8c, 0xbf, + 0x74, 0x06, 0xd1, 0xbf, 0x78, 0x57, 0xdc, 0xbf, 0x6f, 0x4c, 0x2a, 0xc0, 0x63, 0xe0, 0x5f, 0xc0, + 0xf8, 0xe8, 0x36, 0xbe, 0x04, 0xef, 0x11, 0xc0, 0xab, 0x50, 0xb1, 0xbf, 0xf5, 0xd5, 0x1e, 0xc0, + 0xe4, 0xf4, 0xd6, 0xbf, 0x22, 0xf9, 0xc9, 0xbf, 0x23, 0x35, 0x43, 0xc0, 0x71, 0x0d, 0x14, 0xc0, + 0x88, 0xc5, 0xf5, 0xbf, 0x34, 0x1f, 0x3f, 0xc0, 0x3e, 0x84, 0x72, 0xc0, 0x64, 0x86, 0xbb, 0xbf, + 0xd0, 0x6a, 0xe9, 0xbf, 0x8d, 0xf8, 0xd0, 0xbf, 0x56, 0x74, 0x08, 0xc0, 0xde, 0x4d, 0x60, 0xc0, + 0x00, 0x49, 0x54, 0xc0, 0xc4, 0xb9, 0x86, 0xbf, 0x24, 0xf5, 0x68, 0xc0, 0x8c, 0x8c, 0x1f, 0x3f, + 0x50, 0x87, 0x61, 0xbf, 0xfd, 0x31, 0xae, 0xbf, 0xcb, 0xd7, 0x1b, 0xc0, 0xb1, 0xba, 0x85, 0xbf, + 0xce, 0x0a, 0xaf, 0xbf, 0xbb, 0xa7, 0x4c, 0xc0, 0xab, 0xcd, 0x81, 0xbf, 0x7a, 0x8e, 0x83, 0xbf, + 0xac, 0x36, 0x41, 0xc0, 0xd6, 0x55, 0xc4, 0xbf, 0xf0, 0x4f, 0x3c, 0xc0, 0xf7, 0xa0, 0x7b, 0xbf, + 0xf0, 0x02, 0x0b, 0xc0, 0x2e, 0xd5, 0x01, 0xc0, 0x17, 0xdf, 0x42, 0xc0, 0x8d, 0x1c, 0x30, 0xc0, + 0xf7, 0x90, 0x9f, 0xbf, 0x77, 0x89, 0x45, 0xc0, 0x81, 0x30, 0x73, 0xc0, 0x27, 0xe7, 0x20, 0xc0, + 0x12, 0xfd, 0x94, 0xbf, 0x63, 0xae, 0x29, 0xc0, 0x28, 0xcf, 0x49, 0xc0, 0xd1, 0x08, 0x56, 0xbf, + 0x0c, 0x99, 0x80, 0xbf, 0xcf, 0xf0, 0x04, 0xc0, 0xfd, 0x87, 0x90, 0xbf, 0x81, 0x05, 0x0d, 0xc0, + 0x43, 0x26, 0x52, 0xc0, 0xed, 0xb3, 0x2e, 0xbf, 0x25, 0x97, 0x0a, 0xc0, 0x5c, 0x24, 0x2c, 0xc0, + 0x72, 0x65, 0xdb, 0xbf, 0x19, 0xfc, 0x6e, 0xbf, 0x0e, 0x82, 0xa8, 0xbf, 0x6f, 0x16, 0x50, 0xc0, + 0x05, 0x09, 0x7e, 0x3d, 0x4a, 0x9d, 0xdc, 0xbf, 0x3c, 0xa1, 0x19, 0xbf, 0x4d, 0x71, 0x2b, 0xc0, + 0xc6, 0x29, 0x04, 0xc0, 0xfb, 0x57, 0x40, 0xbf, 0x57, 0x53, 0xbe, 0xbf, 0x33, 0xe4, 0x0c, 0xc0, + 0x7b, 0xcf, 0x5c, 0xbf, 0x87, 0x0c, 0x4b, 0xc0, 0xc2, 0xd2, 0x72, 0xbf, 0x8f, 0xc6, 0x54, 0xc0, + 0xc3, 0x92, 0xe8, 0xbf, 0x98, 0x6b, 0x0c, 0xc0, 0xb8, 0xd7, 0x1b, 0xc0, 0xc1, 0xae, 0x21, 0xc0, + 0xdb, 0x44, 0x2e, 0xc0, 0x6a, 0x09, 0x65, 0xc0, 0xcc, 0xd8, 0x3a, 0xc0, 0xcb, 0x9e, 0x23, 0xbf, + 0x1b, 0x8e, 0xe9, 0xbf, 0x52, 0x2a, 0xe9, 0xbf, 0x74, 0x04, 0x97, 0xbe, 0x4a, 0xaa, 0x5a, 0xc0, + 0x43, 0xf6, 0x33, 0xc0, 0x8a, 0x79, 0x4a, 0xbf, 0x1d, 0x30, 0x41, 0xc0, 0x02, 0x1d, 0x1b, 0x3e, + 0xfb, 0x68, 0x1a, 0xc0, 0xd4, 0x0a, 0xdb, 0xbf, 0xb3, 0x05, 0xc5, 0xbf, 0xfd, 0xd4, 0x0b, 0xc0, + 0xb0, 0xf2, 0x65, 0xc0, 0x9b, 0x98, 0xbe, 0xbf, 0x1d, 0xa3, 0xa7, 0xbf, 0x74, 0xea, 0x06, 0xc0, + 0x74, 0x6a, 0x1d, 0xc0, 0xae, 0x3d, 0x04, 0xc0, 0x36, 0x52, 0x3c, 0xbf, 0x02, 0x01, 0x24, 0xbf, + 0xe9, 0xb3, 0x14, 0xc0, 0xbb, 0xd6, 0xdc, 0xbf, 0x9c, 0xc7, 0x82, 0xc0, 0x38, 0xfd, 0x80, 0xbf, + 0x4a, 0x72, 0xb2, 0xbf, 0x98, 0xa1, 0x32, 0xc0, 0x97, 0x21, 0xb2, 0xbe, 0x21, 0x46, 0x35, 0xc0, + 0x91, 0x25, 0xd4, 0xbf, 0xa7, 0x53, 0x51, 0xbf, 0x24, 0x91, 0x9e, 0xbf, 0xec, 0x21, 0x3e, 0xc0, + 0x06, 0xc5, 0x28, 0xc0, 0x9e, 0xe1, 0xb1, 0xbf, 0xad, 0x1b, 0xb4, 0xbf, 0x6a, 0x3e, 0x04, 0xc0, + 0xd5, 0x36, 0x18, 0xc0, 0x62, 0xfb, 0x93, 0xbf, 0xcd, 0x83, 0x27, 0xc0, 0x48, 0x38, 0x2c, 0xc0, + 0x4f, 0x84, 0xd8, 0xbf, 0x1f, 0xc5, 0xae, 0xbf, 0x71, 0x26, 0x0d, 0xc0, 0x78, 0x1a, 0xd6, 0xbf, + 0x24, 0x39, 0x84, 0xbf, 0x9e, 0x36, 0x01, 0xc0, 0x6c, 0xcf, 0x30, 0xc0, 0xff, 0x3b, 0x8a, 0xbf, + 0xca, 0xb2, 0xef, 0xbf, 0xfb, 0xb2, 0xf5, 0xbf, 0xad, 0x89, 0x7d, 0xbf, 0x6b, 0x1f, 0x13, 0xc0, + 0xf6, 0xce, 0x21, 0xc0, 0x79, 0x7b, 0x22, 0xc0, 0x11, 0xc8, 0x20, 0xc0, 0x19, 0x12, 0xa7, 0xbf, + 0xc9, 0xc1, 0xc5, 0xbf, 0x0e, 0x2d, 0x1a, 0xc0, 0x56, 0x81, 0x8b, 0xbe, 0x5d, 0xab, 0xbf, 0xbf, + 0xea, 0x3b, 0x09, 0xbe, 0x7c, 0x69, 0x0d, 0xc0, 0xc5, 0xb6, 0x24, 0xc0, 0x6d, 0x76, 0xef, 0xbf, + 0x50, 0x27, 0x12, 0xc0, 0xe6, 0x5e, 0x89, 0xc0, 0x45, 0xf7, 0x00, 0xc0, 0x76, 0x14, 0xbe, 0xbf, + 0xa4, 0x4f, 0xeb, 0xbf, 0xee, 0x1d, 0x08, 0xc0, 0x90, 0xfb, 0xe2, 0xbf, 0x94, 0x87, 0xfa, 0xbf, + 0x9b, 0x4a, 0xda, 0xbf, 0x69, 0xd4, 0x4e, 0xc0, 0xa0, 0x65, 0x46, 0xc0, 0x7e, 0x41, 0x45, 0xbe, + 0x98, 0x04, 0xa5, 0xbf, 0xac, 0xaa, 0x2e, 0xc0, 0x6d, 0x16, 0xd4, 0xbf, 0xeb, 0xae, 0x81, 0xc0, + 0x40, 0xe7, 0x8a, 0xbf, 0xf9, 0x04, 0x4f, 0xc0, 0xaa, 0xde, 0x65, 0xc0, 0x5c, 0x52, 0xcc, 0xbf, + 0x81, 0x65, 0xdf, 0xbf, 0x2f, 0x04, 0x29, 0xbd, 0x58, 0x76, 0x12, 0xc0, 0xa4, 0xde, 0x33, 0xc0, + 0x66, 0xf7, 0x35, 0xc0, 0x2e, 0xd6, 0x0e, 0xc0, 0x5d, 0x92, 0x32, 0xc0, 0x18, 0x65, 0xff, 0xbf, + 0xc8, 0x16, 0x39, 0xc0, 0x40, 0x93, 0xdb, 0xbf, 0x4a, 0x52, 0x79, 0xbf, 0x93, 0x77, 0xc4, 0xbf, + 0x13, 0xae, 0x9f, 0xc0, 0xc5, 0x94, 0x05, 0xc0, 0xf8, 0xc6, 0x08, 0xc0, 0x16, 0x71, 0x9a, 0xc0, + 0x84, 0x76, 0x55, 0xc0, 0x32, 0x85, 0x70, 0xbf, 0xce, 0x99, 0x2c, 0xc0, 0x6e, 0xab, 0x85, 0xbe, + 0xa7, 0xb5, 0xa4, 0xbf, 0xf8, 0x96, 0x11, 0xbf, 0x64, 0x5e, 0x35, 0xc0, 0xee, 0xa6, 0x4e, 0xc0, + 0x58, 0xf5, 0xe1, 0xbf, 0x5e, 0x1c, 0x05, 0xc0, 0x3b, 0xc3, 0xfb, 0xbf, 0x8b, 0xd7, 0x4d, 0xc0, + 0x43, 0x95, 0xff, 0xbe, 0x67, 0xba, 0x2d, 0xbf, 0xd7, 0x9f, 0xcb, 0xbf, 0xbc, 0x86, 0x94, 0xc0, + 0xe1, 0xe6, 0x04, 0xc0, 0x77, 0x91, 0x58, 0xc0, 0x6d, 0x42, 0x8e, 0xbe, 0xfe, 0x66, 0x48, 0xc0, + 0xea, 0x1d, 0x5d, 0xc0, 0x61, 0xa3, 0xb6, 0xbf, 0xff, 0x6f, 0xc5, 0xbf, 0xac, 0xc0, 0x7a, 0xc0, + 0x3c, 0x3e, 0x18, 0xbf, 0xa3, 0xf6, 0x2b, 0xc0, 0xc4, 0xdb, 0x1b, 0xc0, 0x7a, 0xd8, 0xf2, 0xbf, + 0xc6, 0x2a, 0x33, 0xbf, 0xac, 0xcc, 0xa1, 0xbf, 0xea, 0x74, 0x71, 0xbf, 0x72, 0x7b, 0xc8, 0xbf, + 0x68, 0x71, 0x34, 0xc0, 0x31, 0xe2, 0xf2, 0xbf, 0x05, 0x6d, 0xf4, 0xbf, 0x3b, 0x1c, 0xe1, 0xbf, + 0xa4, 0xed, 0xe4, 0xbf, 0x3f, 0xdd, 0x35, 0xc0, 0x0b, 0x27, 0xa7, 0xbf, 0x0c, 0x2c, 0xfe, 0xbf, + 0xb0, 0x22, 0x2f, 0xc0, 0x3c, 0x82, 0xea, 0xbf, 0x63, 0x81, 0x57, 0xc0, 0xe3, 0xd4, 0xda, 0xbf, + 0x1b, 0x42, 0x8b, 0xbf, 0xa5, 0xab, 0xc9, 0xbf, 0x9c, 0xb2, 0x31, 0xbf, 0xb5, 0x41, 0x5a, 0xc0, + 0xa8, 0x1e, 0x1c, 0xc0, 0x85, 0xa0, 0x5a, 0xc0, 0x59, 0xc4, 0xef, 0xbf, 0x1b, 0x25, 0xe6, 0xbf, + 0x9e, 0x64, 0xc0, 0xbf, 0x90, 0xe9, 0xd4, 0xbf, 0xe6, 0x4b, 0x04, 0xc0, 0x4f, 0x5f, 0x07, 0xc0, + 0x3d, 0x5f, 0x9e, 0xbf, 0x8a, 0x2b, 0x27, 0xbf, 0x8e, 0x89, 0x93, 0x3f, 0x61, 0x37, 0xb1, 0xbf, + 0x7d, 0x4f, 0x25, 0xc0, 0xa3, 0x58, 0x00, 0xc0, 0xd7, 0x08, 0x11, 0xc0, 0x6e, 0xe4, 0x40, 0xc0, + 0xd8, 0x4d, 0x32, 0xc0, 0x04, 0x62, 0x78, 0xbf, 0x95, 0x10, 0xd1, 0xbf, 0x0a, 0x2d, 0x5d, 0xbf, + 0x24, 0x3f, 0xc3, 0xbf, 0xd5, 0x01, 0x2e, 0xc0, 0x65, 0x84, 0x9a, 0xbf, 0x63, 0xbb, 0x9a, 0xbf, + 0x29, 0xcf, 0xd6, 0xbf, 0x81, 0xf8, 0x03, 0xc0, 0xcd, 0x98, 0xfb, 0xbf, 0x91, 0x1d, 0x55, 0xc0, + 0xe8, 0x10, 0xf1, 0xbf, 0x50, 0xaa, 0x1c, 0xc0, 0x51, 0x3a, 0x04, 0xc0, 0xbf, 0x65, 0xfd, 0xbf, + 0x14, 0x94, 0xd5, 0xbf, 0x3f, 0x90, 0x07, 0xc0, 0xdd, 0x0a, 0x56, 0xc0, 0x9d, 0xf8, 0x1b, 0xc0, + 0x93, 0xed, 0x36, 0xc0, 0x08, 0x16, 0x75, 0xc0, 0x24, 0x6a, 0xa5, 0xbe, 0xbb, 0x01, 0x5a, 0xbe, + 0xd1, 0x83, 0xce, 0xbf, 0x14, 0x89, 0xbc, 0xbe, 0xd7, 0x8f, 0xb6, 0xbf, 0xa3, 0x8f, 0x28, 0xc0, + 0xf6, 0x99, 0x63, 0xbf, 0xba, 0xd1, 0x72, 0xc0, 0x6b, 0x68, 0x5f, 0xbf, 0x4f, 0x5f, 0x74, 0xc0, + 0x7d, 0x91, 0x30, 0xc0, 0x54, 0x01, 0x88, 0xbf, 0xd3, 0x03, 0xbc, 0xbf, 0xa8, 0xe4, 0xda, 0xbf, + 0xcc, 0x05, 0x30, 0xc0, 0x56, 0xae, 0x0e, 0xc0, 0x73, 0x4b, 0x16, 0xc0, 0x64, 0xa1, 0x4f, 0xc0, + 0xee, 0x05, 0x98, 0xbf, 0x3f, 0xcd, 0x81, 0xbf, 0x11, 0x24, 0xd7, 0xbf, 0x3e, 0xd5, 0x2e, 0xbf, + 0x4f, 0x0d, 0x58, 0xc0, 0x1f, 0x76, 0xf9, 0xbf, 0x64, 0xf7, 0x1d, 0xc0, 0x0a, 0xed, 0xc5, 0xbf, + 0xc0, 0x68, 0xd6, 0xbf, 0xdc, 0x4c, 0xc4, 0xbf, 0xf1, 0x6f, 0x13, 0xbf, 0x59, 0x8c, 0x1a, 0xc0, + 0xf1, 0x72, 0x2b, 0xc0, 0xfe, 0x08, 0x3d, 0xc0, 0x7f, 0x4d, 0x13, 0xc0, 0x51, 0x8c, 0xe0, 0xbf, + 0x4f, 0x1e, 0xe3, 0xbf, 0x6b, 0xea, 0x90, 0xbf, 0x96, 0xd7, 0x07, 0xc0, 0x89, 0x55, 0x48, 0xc0, + 0x97, 0x9b, 0x04, 0xc0, 0xe0, 0x4f, 0x77, 0xbf, 0x18, 0xe7, 0x23, 0xc0, 0xe8, 0xa5, 0xa9, 0xbf, + 0x7c, 0xd4, 0x37, 0xbf, 0xb9, 0x9e, 0x04, 0xc0, 0x2f, 0x4d, 0x04, 0xc0, 0x6a, 0xfb, 0xc6, 0xbf, + 0xa7, 0x22, 0xac, 0xbf, 0x8e, 0xa2, 0x7b, 0xbf, 0xbf, 0x19, 0x37, 0xc0, 0x71, 0x59, 0xca, 0xbf, + 0x9a, 0xd7, 0x01, 0xc0, 0x2c, 0x8b, 0x0c, 0xbf, 0xa3, 0x91, 0xc9, 0xbf, 0x7f, 0x4f, 0x5a, 0xbf, + 0xaf, 0xe8, 0x5d, 0xc0, 0xe3, 0x4a, 0x70, 0xc0, 0x78, 0xb5, 0x28, 0xbf, 0xb3, 0x32, 0x54, 0xbf, + 0x8f, 0x72, 0x0c, 0xc0, 0xf2, 0x0a, 0x42, 0xc0, 0xf5, 0x90, 0x42, 0xc0, 0x7d, 0x3e, 0x3a, 0xc0, + 0xd8, 0xbe, 0xeb, 0xbf, 0xf1, 0x43, 0xe0, 0xbf, 0xed, 0x59, 0x83, 0xbd, 0x3b, 0x3b, 0x2b, 0xc0, + 0x3a, 0x3c, 0xc8, 0xbf, 0x63, 0x20, 0x93, 0xbe, 0x2a, 0xbf, 0x4e, 0xc0, 0x0a, 0x54, 0xfe, 0x3e, + 0x92, 0xf1, 0x16, 0xc0, 0xce, 0xb9, 0x2a, 0xc0, 0x80, 0x14, 0x56, 0xbf, 0xfe, 0xae, 0xd1, 0xbf, + 0xd9, 0xb7, 0xac, 0xbf, 0xab, 0xa3, 0x08, 0xc0, 0x36, 0xe7, 0xa3, 0xbf, 0xc7, 0xf2, 0x8e, 0xbf, + 0x7d, 0x35, 0x49, 0xc0, 0x6f, 0x2b, 0x27, 0xc0, 0x8c, 0x7a, 0x19, 0xbf, 0xda, 0xb9, 0xc5, 0xbf, + 0xb5, 0x14, 0x72, 0xbf, 0x42, 0xd4, 0x41, 0xc0, 0x50, 0xa6, 0x06, 0xc0, 0xe0, 0xb4, 0x75, 0xc0, + 0x24, 0xc9, 0x50, 0xbf, 0xc1, 0xbf, 0x56, 0xc0, 0xc6, 0xf9, 0x00, 0xc0, 0x7d, 0xbf, 0xb9, 0xbf, + 0x47, 0xe0, 0x25, 0xc0, 0x69, 0x8e, 0x23, 0xc0, 0x41, 0xbf, 0x41, 0xc0, 0xa7, 0x73, 0x22, 0xc0, + 0x38, 0xc1, 0x22, 0xc0, 0x3b, 0xd9, 0x5d, 0xbf, 0xe7, 0x6b, 0x42, 0xbf, 0x30, 0xf4, 0x9a, 0xc0, + 0x5b, 0x05, 0x3f, 0xc0, 0xd5, 0xcf, 0x5f, 0xbf, 0x92, 0x2b, 0xde, 0xbf, 0x76, 0x5d, 0x77, 0xbf, + 0x4d, 0xe1, 0xec, 0xbf, 0xce, 0x94, 0x94, 0xc0, 0x4f, 0x51, 0xba, 0xbe, 0x74, 0x75, 0x53, 0xc0, + 0xc9, 0x8f, 0x1b, 0xc0, 0xc8, 0x94, 0xfc, 0xbf, 0x60, 0x90, 0x12, 0xc0, 0x1b, 0xbe, 0xa8, 0xbf, + 0x6e, 0x53, 0x3e, 0xc0, 0xb6, 0x06, 0xb4, 0xbf, 0x19, 0x99, 0x7a, 0x3e, 0x5b, 0x75, 0x71, 0xc0, + 0xf8, 0x29, 0xea, 0xbf, 0x4c, 0x1d, 0xa9, 0xbf, 0x6e, 0x49, 0xb8, 0xbe, 0x11, 0x8d, 0x59, 0xc0, + 0x1e, 0xa2, 0xe0, 0xbf, 0xfe, 0x2a, 0x0b, 0xc0, 0xff, 0x0d, 0xc3, 0xbf, 0xc7, 0x2e, 0x48, 0x3e, + 0x95, 0x66, 0x1e, 0xc0, 0x61, 0xdb, 0xe1, 0xbf, 0xa2, 0xc1, 0x5c, 0xbf, 0xb3, 0x17, 0x7b, 0xc0, + 0xd3, 0x30, 0x03, 0xc0, 0x13, 0xe5, 0x65, 0xbf, 0x08, 0x2f, 0xd7, 0xbf, 0x82, 0x82, 0x56, 0xc0, + 0xf8, 0xcb, 0x03, 0xc0, 0x62, 0x40, 0x29, 0xc0, 0xc4, 0x54, 0x34, 0x3f, 0x49, 0xa6, 0x55, 0xc0, + 0xb7, 0x46, 0x53, 0xc0, 0x17, 0xfb, 0x6a, 0xc0, 0x20, 0x97, 0x0c, 0xc0, 0x10, 0xa2, 0x78, 0xc0, + 0x77, 0x30, 0x71, 0xc0, 0x30, 0xe0, 0xd1, 0xbe, 0x76, 0x63, 0x4f, 0xc0, 0xa6, 0xf3, 0x04, 0xc0, + 0xe6, 0x2b, 0x08, 0xc0, 0x6c, 0xb7, 0x04, 0xc0, 0x41, 0xfe, 0x0f, 0xc0, 0x86, 0x4b, 0x07, 0xc0, + 0x8e, 0x01, 0x2e, 0xc0, 0x12, 0x5e, 0x2b, 0xbf, 0x17, 0x3b, 0xb3, 0xbf, 0x69, 0xd2, 0x71, 0x3e, + 0x3b, 0xaa, 0x2c, 0xc0, 0x77, 0xbe, 0x0d, 0xc0, 0x9b, 0x99, 0xc7, 0xbf, 0x9a, 0x38, 0xed, 0xbf, + 0xe7, 0x22, 0x62, 0xc0, 0x0f, 0xc5, 0x84, 0xbf, 0x2f, 0x14, 0x0c, 0xc0, 0x65, 0x42, 0xea, 0xbf, + 0x07, 0xcb, 0x3d, 0xc0, 0xb8, 0x57, 0xf3, 0xbf, 0xb3, 0xec, 0x04, 0xc0, 0x12, 0xfc, 0x1e, 0xc0, + 0x5e, 0x3e, 0xd0, 0xbf, 0xab, 0xed, 0x5d, 0xc0, 0xee, 0x61, 0xaf, 0xbf, 0xe6, 0x31, 0x58, 0xc0, + 0xea, 0xf7, 0x25, 0xc0, 0x20, 0xa9, 0x99, 0xbf, 0xc0, 0xa0, 0x14, 0xc0, 0xf5, 0x90, 0xe1, 0xbf, + 0x3d, 0xe6, 0x2e, 0xc0, 0x12, 0x1f, 0x0c, 0xbf, 0xb5, 0x92, 0x1b, 0xc0, 0xc5, 0xef, 0x02, 0xbf, + 0x0b, 0xe4, 0xac, 0xbf, 0xed, 0x08, 0x96, 0xbf, 0x58, 0xfc, 0xe9, 0xbf, 0xcc, 0xd7, 0x32, 0xc0, + 0x64, 0xdf, 0x2d, 0xc0, 0xc3, 0x59, 0xb8, 0xbe, 0x5e, 0x11, 0x69, 0xbf, 0xa5, 0x69, 0xf9, 0xbf, + 0xa6, 0x18, 0x58, 0xc0, 0xb7, 0x43, 0xf7, 0xbf, 0x4f, 0x09, 0x4c, 0xbf, 0x8a, 0x4d, 0x17, 0xbf, + 0x5b, 0xf7, 0x81, 0xc0, 0x36, 0x7a, 0x32, 0xc0, 0x92, 0xe8, 0x05, 0xc0, 0x51, 0x3e, 0x2d, 0xc0, + 0x17, 0x32, 0x21, 0xc0, 0x41, 0xc1, 0x14, 0xbf, 0x70, 0xda, 0xdc, 0xbf, 0x9b, 0x45, 0x20, 0xc0, + 0xc9, 0xe3, 0x1b, 0xc0, 0xac, 0x46, 0x19, 0xc0, 0xc8, 0x63, 0x11, 0xc0, 0x90, 0x53, 0x04, 0xc0, + 0x87, 0x78, 0x14, 0xc0, 0x59, 0x69, 0xcb, 0xbf, 0x5f, 0x58, 0x7d, 0xc0, 0x0a, 0xbc, 0x07, 0xc0, + 0x15, 0xea, 0x30, 0xc0, 0xa8, 0xc0, 0x77, 0xc0, 0x9d, 0xed, 0x46, 0xc0, 0x9a, 0xc8, 0xb0, 0xbf, + 0xec, 0x4c, 0xb0, 0xbf, 0x41, 0x32, 0x0b, 0xc0, 0x85, 0x10, 0x81, 0xbf, 0x8b, 0x1b, 0x42, 0xc0, + 0xd3, 0x31, 0x30, 0xc0, 0x1e, 0x74, 0x9a, 0xbf, 0x48, 0xf1, 0xbc, 0xbf, 0xef, 0xb7, 0xdc, 0xbf, + 0xa0, 0x99, 0x0d, 0xc0, 0x6e, 0xae, 0xa4, 0xbf, 0x1e, 0x38, 0x28, 0x3c, 0x66, 0x4a, 0x2c, 0xc0, + 0x6e, 0xb2, 0xe5, 0xbf, 0x45, 0xaa, 0x1b, 0xc0, 0xd2, 0x62, 0xea, 0xbf, 0x6c, 0x33, 0xc2, 0xbf, + 0x1a, 0x6c, 0xe6, 0xbe, 0xf2, 0x94, 0x26, 0xbf, 0xc0, 0x8b, 0x6b, 0xc0, 0x6b, 0xd7, 0x2b, 0xc0, + 0x74, 0x48, 0x46, 0xc0, 0x31, 0xa1, 0x81, 0x3e, 0x07, 0xf6, 0x14, 0xc0, 0xd1, 0x68, 0x9c, 0xbf, + 0x4a, 0x44, 0x54, 0xc0, 0x78, 0xb3, 0x24, 0xc0, 0x25, 0xed, 0x23, 0xc0, 0x1e, 0x02, 0xf4, 0xbf, + 0x84, 0x7e, 0x14, 0xc0, 0x25, 0x68, 0x0e, 0xc0, 0xf0, 0x02, 0x7d, 0xbf, 0xc9, 0x4d, 0x29, 0xc0, + 0x1b, 0x85, 0x17, 0xc0, 0xf2, 0xfc, 0x28, 0xc0, 0xd3, 0x21, 0x44, 0xc0, 0xd8, 0xb4, 0x29, 0xc0, + 0x59, 0x21, 0x93, 0xbf, 0xee, 0x0d, 0x32, 0xc0, 0x90, 0x4d, 0x22, 0xc0, 0xd4, 0x13, 0x13, 0xc0, + 0x36, 0xdd, 0x85, 0xbf, 0xab, 0x99, 0x91, 0xbf, 0x42, 0xb1, 0xc4, 0xbf, 0x4a, 0x76, 0x82, 0xbf, + 0x03, 0xcf, 0x35, 0xbf, 0x01, 0x54, 0x0c, 0xc0, 0x22, 0x39, 0x08, 0xc0, 0x1e, 0x63, 0x83, 0xc0, + 0x62, 0x52, 0x23, 0xc0, 0xe4, 0xb7, 0x61, 0xc0, 0x79, 0xa9, 0xb6, 0xbf, 0xdd, 0xc7, 0x0e, 0xc0, + 0x82, 0xd9, 0xa3, 0xbd, 0xc7, 0x93, 0xec, 0xbf, 0x36, 0xb6, 0x36, 0xbf, 0xa1, 0x10, 0xfd, 0xbf, + 0x02, 0x36, 0x02, 0xc0, 0xac, 0x1f, 0xdf, 0xbf, 0xdd, 0x3e, 0x98, 0xc0, 0xe5, 0xd5, 0x22, 0xc0, + 0x80, 0xba, 0x14, 0xc0, 0xd6, 0x78, 0x04, 0xc0, 0x13, 0x7f, 0xce, 0xbf, 0xc7, 0x59, 0xb3, 0xbf, + 0xd1, 0x07, 0x1a, 0xc0, 0xfd, 0xbe, 0xeb, 0xbf, 0x38, 0xf5, 0xfa, 0xbf, 0x66, 0x9b, 0xf0, 0xbf, + 0xbc, 0x31, 0x6c, 0xc0, 0x17, 0x36, 0x25, 0xc0, 0x9f, 0x40, 0x46, 0xc0, 0x7c, 0x78, 0x97, 0xbf, + 0xa9, 0x00, 0x0f, 0xc0, 0x9a, 0x21, 0x3c, 0x3e, 0x99, 0xd6, 0x1f, 0xc0, 0x79, 0xb9, 0xfb, 0x3c, + 0x37, 0xc1, 0x50, 0xbe, 0xca, 0x37, 0x8b, 0xbf, 0xbd, 0xe4, 0x03, 0x3f, 0xe4, 0x04, 0x42, 0xc0, + 0x62, 0x4c, 0xb7, 0xbf, 0xf9, 0x09, 0x2f, 0xc0, 0x67, 0xe3, 0x10, 0xc0, 0x01, 0x9d, 0xac, 0xbe, + 0xe6, 0x56, 0x2c, 0xc0, 0x3d, 0x83, 0x24, 0xc0, 0x96, 0x22, 0xa8, 0xbf, 0xee, 0xf4, 0x8d, 0xbe, + 0x1e, 0x78, 0x27, 0xbf, 0x86, 0xf8, 0x08, 0xc0, 0xf8, 0xf5, 0x10, 0xc0, 0x69, 0xa4, 0xc3, 0xbf, + 0xe9, 0xcc, 0x4a, 0xc0, 0xae, 0xea, 0x13, 0xc0, 0xed, 0x5c, 0x0d, 0xc0, 0xe5, 0x54, 0x1f, 0xc0, + 0x02, 0x62, 0x60, 0xc0, 0x3d, 0x2f, 0x67, 0xbe, 0x1c, 0x4e, 0x48, 0xc0, 0x55, 0x9c, 0x38, 0xc0, + 0x0b, 0x52, 0x83, 0xc0, 0x36, 0xc4, 0x16, 0xc0, 0xb5, 0xa3, 0xe7, 0xbf, 0x57, 0xe4, 0x46, 0xbf, + 0xfc, 0xb4, 0xc2, 0xbf, 0x35, 0xec, 0x41, 0xc0, 0x42, 0x8f, 0x0b, 0xc0, 0x53, 0x1c, 0xd7, 0xbf, + 0x5a, 0x2a, 0xf3, 0xbf, 0xce, 0xd0, 0x2c, 0xc0, 0xb1, 0x52, 0xdd, 0xbf, 0xc0, 0x36, 0x5e, 0xc0, + 0x67, 0xae, 0x07, 0xc0, 0x4f, 0x32, 0xfb, 0xbf, 0xa4, 0x38, 0xe5, 0xbf, 0xdc, 0xdb, 0xd1, 0xbf, + 0x0e, 0x1f, 0x6e, 0xbf, 0xd4, 0x40, 0x27, 0xc0, 0xc7, 0x0e, 0xf9, 0xbf, 0xa9, 0xa2, 0xd0, 0xbf, + 0xfc, 0xe9, 0x23, 0xc0, 0xb5, 0x10, 0x4a, 0xc0, 0xd4, 0x58, 0xbf, 0xbf, 0x5e, 0xe2, 0x27, 0xbf, + 0x0c, 0xd8, 0x66, 0xc0, 0x37, 0xe3, 0xed, 0xbf, 0x1d, 0xdc, 0xc5, 0xbf, 0xb5, 0x29, 0x0d, 0xc0, + 0xc8, 0x29, 0x1a, 0xc0, 0xd3, 0x3d, 0x00, 0xc0, 0x67, 0x30, 0x38, 0xc0, 0xd1, 0x17, 0xb1, 0xbf, + 0x9e, 0xa2, 0x92, 0xbf, 0x1a, 0x3f, 0x52, 0xc0, 0x30, 0xb1, 0x40, 0xc0, 0x2d, 0xcd, 0x65, 0xc0, + 0x65, 0x2c, 0xe7, 0xbf, 0x3e, 0xe6, 0x5d, 0xc0, 0xcb, 0x1f, 0xaf, 0xbf, 0x71, 0xbb, 0x22, 0xc0, + 0x44, 0x6b, 0xce, 0xbf, 0xe5, 0xe6, 0xdf, 0xbf, 0xc3, 0x73, 0x07, 0xc0, 0x43, 0xf0, 0xbd, 0xbf, + 0x48, 0x47, 0x04, 0xc0, 0xdd, 0xec, 0xac, 0xbf, 0x74, 0x0c, 0x64, 0xbf, 0x74, 0x7d, 0x95, 0xbf, + 0xbc, 0xfb, 0x3d, 0xbf, 0x5c, 0x1c, 0x10, 0xc0, 0xf1, 0xac, 0x29, 0xc0, 0x4a, 0x74, 0x1b, 0xc0, + 0x06, 0xcb, 0xbf, 0xbf, 0xd4, 0xee, 0x09, 0xc0, 0x51, 0x4c, 0x6d, 0xc0, 0x7f, 0x9b, 0x05, 0xc0}; +unsigned char conv2d_winograd_fp32_ker1[] = { + 0xd1, 0x12, 0xc5, 0xbf, 0x07, 0x12, 0xea, 0xbf, 0xae, 0x60, 0xc9, 0xbf, 0x51, 0xc9, 0x83, 0xc0, + 0x18, 0xa1, 0x9a, 0xbf, 0x5a, 0xf2, 0xab, 0xbf, 0x34, 0x42, 0xf5, 0xbf, 0x6c, 0xc2, 0x6c, 0xc0, + 0xb3, 0x48, 0x23, 0xc0, 0x1e, 0x78, 0xf7, 0xbd, 0x5f, 0x30, 0x29, 0xc0, 0xae, 0xbe, 0x53, 0xc0, + 0xec, 0x61, 0x48, 0xc0, 0xc5, 0xbb, 0xb5, 0xbf, 0x55, 0x22, 0xfc, 0xbf, 0xfa, 0x50, 0xa9, 0xbe, + 0x63, 0x1a, 0x19, 0xc0, 0x2e, 0xe9, 0x07, 0xc0, 0x92, 0xf7, 0x4a, 0xbf, 0xc7, 0x17, 0x8d, 0xbf, + 0xa9, 0x2d, 0x12, 0xc0, 0x1f, 0xf1, 0xbb, 0xbf, 0x7e, 0x8b, 0xdb, 0xbf, 0x46, 0x23, 0x42, 0xc0, + 0x5d, 0x46, 0x28, 0xbf, 0x5f, 0x9a, 0x10, 0xc0, 0x87, 0x98, 0xba, 0xbf, 0x64, 0x3f, 0x1d, 0xc0, + 0x5b, 0x0e, 0x7d, 0xbf, 0x26, 0x87, 0x70, 0xbf, 0x06, 0x95, 0xdc, 0xbf, 0x03, 0xee, 0x4b, 0xbf, + 0x5f, 0x46, 0x85, 0x3f, 0x2a, 0xf3, 0x42, 0x3f, 0xb6, 0xf5, 0x9d, 0x3f, 0x23, 0x36, 0xfb, 0x3f, + 0x94, 0xd1, 0xc5, 0x3f, 0xa1, 0xb6, 0x6f, 0x3f, 0x04, 0x12, 0xba, 0x3f, 0xd8, 0x0f, 0x67, 0x3f, + 0xb4, 0x4c, 0x9e, 0x3f, 0xd8, 0xcb, 0xf5, 0x3e, 0xab, 0xf9, 0xb0, 0x3f, 0x52, 0x72, 0x9f, 0x3f, + 0x4e, 0xa0, 0x05, 0x40, 0xa7, 0xe1, 0x62, 0x3f, 0x0f, 0x6f, 0xb9, 0x3f, 0xd8, 0x65, 0xa6, 0x3f, + 0xb7, 0x00, 0xda, 0x3f, 0x7e, 0xed, 0x94, 0x3f, 0xb8, 0xd9, 0x3b, 0x3e, 0x40, 0xaa, 0x9c, 0x3f, + 0x00, 0x2d, 0xb4, 0x3f, 0x44, 0x1f, 0xe7, 0x3f, 0x8c, 0x47, 0x23, 0x3f, 0xf0, 0xd8, 0xe8, 0x3f, + 0xe8, 0xd0, 0xa2, 0x3f, 0x37, 0xd4, 0xa1, 0x3f, 0x0e, 0x4b, 0x14, 0x3f, 0x6d, 0x33, 0x81, 0x3f, + 0xae, 0x61, 0x90, 0x3f, 0x7c, 0x31, 0x95, 0x3f, 0x5f, 0xfd, 0xed, 0x3f, 0xce, 0x86, 0x08, 0x3f, + 0x5e, 0xa7, 0x1a, 0x3f, 0xb0, 0x13, 0x9f, 0x3e, 0xc9, 0x50, 0x3c, 0xbe, 0x6e, 0x2a, 0x48, 0x3f, + 0x70, 0x59, 0x6f, 0x3e, 0xd0, 0xe9, 0x23, 0x3e, 0xd4, 0xdf, 0x48, 0x3f, 0x22, 0x8f, 0x39, 0x3f, + 0x35, 0xad, 0xc3, 0x3e, 0x14, 0x57, 0x12, 0xbd, 0xbf, 0x5a, 0x8e, 0xbd, 0xb4, 0x4d, 0xbf, 0x3e, + 0x72, 0x8e, 0xa7, 0x3f, 0xbc, 0xac, 0x13, 0x3f, 0x6a, 0x3d, 0x64, 0x3e, 0x4b, 0xf9, 0xb5, 0xbe, + 0xc4, 0xa4, 0xfa, 0x3d, 0x83, 0xe3, 0x34, 0xbe, 0x37, 0xff, 0x94, 0x3e, 0x3a, 0x1b, 0x5a, 0x3e, + 0x64, 0x3a, 0x6c, 0x3e, 0x5c, 0x76, 0x25, 0x3e, 0xa8, 0x77, 0x47, 0x3f, 0x70, 0x40, 0x48, 0x3f, + 0xd4, 0xe8, 0x41, 0x3f, 0x46, 0xfa, 0x0b, 0x3f, 0x82, 0x76, 0x1d, 0x3e, 0x74, 0xe0, 0x0c, 0x3f, + 0x4c, 0xb5, 0xd0, 0x3d, 0xb0, 0xee, 0x4a, 0x3f, 0xf4, 0x4e, 0x53, 0x3e, 0xa6, 0xf8, 0x01, 0x3f, + 0xc4, 0x4e, 0x0a, 0xbe, 0xeb, 0xea, 0x8c, 0xbd, 0xdd, 0xde, 0xfc, 0xbd, 0x2e, 0xea, 0x48, 0xbe, + 0x8b, 0x1d, 0x50, 0xbe, 0xae, 0x57, 0xd4, 0xbd, 0x98, 0x0e, 0x46, 0xbe, 0xb5, 0xe8, 0x45, 0xbd, + 0x88, 0xe2, 0xf4, 0xbd, 0xfe, 0x91, 0x87, 0xbd, 0x9a, 0x27, 0xed, 0xbd, 0x42, 0x12, 0xc3, 0xbd, + 0x68, 0x6d, 0x8c, 0xbe, 0x33, 0x67, 0xea, 0xbd, 0x62, 0xb2, 0x26, 0xbe, 0x38, 0x31, 0x2a, 0xbe, + 0xe6, 0x36, 0x3a, 0xbe, 0xb6, 0x5d, 0xc2, 0xbd, 0xb2, 0x1e, 0x80, 0xbc, 0x53, 0x47, 0x21, 0xbe, + 0x80, 0x0f, 0x16, 0xbe, 0x7a, 0x80, 0x6b, 0xbe, 0xb9, 0xa1, 0x9e, 0xbd, 0x55, 0xed, 0x57, 0xbe, + 0x78, 0xb9, 0x53, 0xbe, 0x95, 0x11, 0x11, 0xbe, 0x54, 0x5c, 0x3c, 0xbd, 0xb7, 0xb3, 0xc6, 0xbd, + 0x1a, 0xbc, 0x10, 0xbe, 0x4d, 0x8c, 0x3b, 0xbe, 0xcb, 0x54, 0x6d, 0xbe, 0x6c, 0x70, 0xa1, 0xbd, + 0x3c, 0x19, 0xbb, 0xbd, 0xa7, 0x5a, 0xc2, 0xbc, 0xec, 0x1c, 0x96, 0x3c, 0x15, 0x06, 0xa0, 0xbd, + 0x3e, 0x97, 0x93, 0xbd, 0xf6, 0x6a, 0xd5, 0xbc, 0xea, 0x8c, 0x01, 0xbe, 0x85, 0x35, 0xfa, 0xbc, + 0x8e, 0xc0, 0x0b, 0xbd, 0x6a, 0x7a, 0x6f, 0xbc, 0xf8, 0xf6, 0xf0, 0x3c, 0x5d, 0xd3, 0x03, 0xbc, + 0x16, 0x19, 0x49, 0xbe, 0xaa, 0x09, 0xab, 0xbd, 0xea, 0xb1, 0x24, 0xbd, 0x80, 0xe2, 0x2d, 0xba, + 0x58, 0xc1, 0xc2, 0xbc, 0x2c, 0x31, 0x20, 0x3d, 0x10, 0x3c, 0xd8, 0xbc, 0x62, 0x07, 0x67, 0xbd, + 0x84, 0x57, 0xec, 0xbc, 0xfd, 0x4c, 0x86, 0xbd, 0x02, 0x95, 0xbb, 0xbd, 0xea, 0x7f, 0xdb, 0xbd, + 0x14, 0x09, 0x1f, 0xbe, 0x70, 0x31, 0x8f, 0xbd, 0x6c, 0x3a, 0x61, 0xbb, 0x32, 0x5d, 0x51, 0xbd, + 0xe3, 0xa8, 0x1e, 0xbd, 0xca, 0x5d, 0x15, 0xbe, 0xfa, 0x23, 0x88, 0xbd, 0xe5, 0x31, 0x9c, 0xbd, + 0xe0, 0x1a, 0x6a, 0xbd, 0x57, 0xe6, 0x61, 0xbd, 0x3b, 0x2f, 0x99, 0xbd, 0x28, 0xa4, 0x07, 0xbe, + 0x23, 0xf1, 0x99, 0xbd, 0x58, 0x56, 0x63, 0xbd, 0x04, 0xf5, 0x9c, 0xbd, 0xcd, 0x08, 0xb1, 0xbd, + 0xe0, 0x47, 0xac, 0xbd, 0x5b, 0xa2, 0xa7, 0xbc, 0xdd, 0x32, 0xc6, 0xbd, 0x89, 0x08, 0xc7, 0xbd, + 0xf7, 0xb5, 0xe9, 0xbd, 0xb8, 0x84, 0x4b, 0xbd, 0x41, 0xca, 0xac, 0xbd, 0x46, 0x73, 0x70, 0xbd, + 0xd7, 0x52, 0xd1, 0xbd, 0x59, 0x5d, 0xa6, 0xbd, 0x0b, 0xce, 0x86, 0xbc, 0xa0, 0x57, 0x7c, 0xbd, + 0x5c, 0x29, 0xb4, 0xbd, 0x92, 0xa7, 0xb8, 0xbd, 0x32, 0x2a, 0x2f, 0xbd, 0x33, 0x39, 0xdf, 0xbd, + 0x7c, 0x78, 0x49, 0xbd, 0xa3, 0x38, 0xa0, 0xbd, 0xda, 0x9e, 0x34, 0xbd, 0x5a, 0x4c, 0x93, 0xbd, + 0x94, 0x85, 0x6b, 0xbd, 0x16, 0x96, 0x4a, 0xbd, 0x42, 0x55, 0xc4, 0xbd, 0x1e, 0x9b, 0xdc, 0xbc, + 0x94, 0x96, 0x10, 0xbd, 0x17, 0x92, 0x05, 0xbd, 0xc4, 0x85, 0x7f, 0xbb, 0x2c, 0x61, 0x96, 0xbd, + 0xba, 0xf9, 0x1c, 0xbc, 0xcd, 0xb2, 0x88, 0xbc, 0xac, 0xe6, 0x30, 0xbd, 0x51, 0xd5, 0x9e, 0xbd, + 0x7f, 0x8d, 0x29, 0xbd, 0x18, 0x8c, 0xae, 0x3b, 0xc7, 0x00, 0xc6, 0xbc, 0x3e, 0x79, 0x5b, 0xbd, + 0x3d, 0xf4, 0x99, 0xbd, 0x30, 0x27, 0x0c, 0xbd, 0x65, 0x11, 0xbd, 0xbc, 0xcc, 0x26, 0xc5, 0x3c, + 0x70, 0xd0, 0xbd, 0xbc, 0x98, 0x11, 0x69, 0xbc, 0xba, 0xdc, 0xb2, 0xbc, 0xaa, 0x32, 0x35, 0xbc, + 0x32, 0x93, 0xee, 0xbc, 0xb6, 0x6c, 0x02, 0xbc, 0x7c, 0x1d, 0x4c, 0xbd, 0xa6, 0x17, 0x6a, 0xbd, + 0x66, 0x2f, 0xc0, 0xbc, 0x8b, 0x7f, 0x2d, 0xbd, 0x07, 0xf5, 0xba, 0xbc, 0x96, 0x93, 0x48, 0xbd, + 0x90, 0x0e, 0xcb, 0xbb, 0x1f, 0x72, 0xfc, 0xbc, 0xa0, 0x93, 0x58, 0xbc, 0x10, 0x1e, 0xd2, 0xbc, + 0xe7, 0x63, 0x0a, 0xc0, 0x35, 0x75, 0x15, 0xbf, 0xe4, 0x22, 0x4a, 0xbf, 0x23, 0xa2, 0x03, 0xc0, + 0xce, 0xe3, 0x32, 0xc0, 0xdd, 0xd4, 0x8f, 0xbf, 0x11, 0xb1, 0x47, 0xc0, 0xcb, 0x3f, 0x1a, 0x3d, + 0x9f, 0xac, 0x8b, 0xbf, 0x10, 0x02, 0x61, 0xbf, 0xfd, 0x30, 0x9f, 0xbe, 0x4b, 0x7e, 0xab, 0xbe, + 0x81, 0x63, 0x90, 0xc0, 0x6a, 0xa4, 0xef, 0xbf, 0xd2, 0x48, 0xe5, 0xbf, 0xad, 0xb4, 0xe5, 0xbf, + 0xfe, 0x8b, 0xdb, 0xbf, 0x3c, 0x40, 0xc6, 0xbd, 0x07, 0xa4, 0x8c, 0xbe, 0x79, 0x5f, 0x08, 0xc0, + 0x58, 0x7a, 0xb3, 0xbf, 0x42, 0x4f, 0x3d, 0xc0, 0x9c, 0x8b, 0xbc, 0xbf, 0x07, 0x75, 0x34, 0xc0, + 0x64, 0x2c, 0x7a, 0xc0, 0x4d, 0x62, 0xe8, 0xbf, 0xe5, 0x29, 0x44, 0xbe, 0x6e, 0xb1, 0x86, 0xbf, + 0x18, 0xae, 0xe3, 0xbf, 0x25, 0xdc, 0x5d, 0xc0, 0xa3, 0x29, 0x3b, 0xc0, 0x61, 0xd8, 0xc5, 0xbf, + 0xf9, 0x86, 0x92, 0x3f, 0x40, 0x6d, 0xa8, 0x3f, 0x18, 0x77, 0x7f, 0x3f, 0x96, 0x6b, 0x06, 0x40, + 0xaa, 0xe2, 0x58, 0x3f, 0xd0, 0xa1, 0x85, 0x3f, 0x09, 0xf4, 0xb2, 0x3f, 0x65, 0xe8, 0xbd, 0x3f, + 0xcb, 0xe4, 0xe5, 0x3f, 0xd7, 0xbe, 0x86, 0x3f, 0xd3, 0xd7, 0xcd, 0x3f, 0x52, 0x4c, 0xb7, 0x3f, + 0x84, 0x48, 0xed, 0x3f, 0xc0, 0xd0, 0x9a, 0x3f, 0x02, 0x71, 0xb4, 0x3f, 0x0c, 0xaa, 0x31, 0x3f, + 0x54, 0x72, 0x9a, 0x3f, 0x78, 0x1b, 0x94, 0x3f, 0x44, 0x9c, 0x57, 0x3f, 0x7d, 0x45, 0x9a, 0x3f, + 0xee, 0x3d, 0xc8, 0x3f, 0xf5, 0xc7, 0xc4, 0x3f, 0x64, 0x7a, 0x82, 0x3f, 0x91, 0xa9, 0xd8, 0x3f, + 0x22, 0x34, 0x85, 0x3f, 0x59, 0xe8, 0xb6, 0x3f, 0xa8, 0x50, 0x89, 0x3f, 0x3c, 0xeb, 0xd3, 0x3f, + 0x44, 0xd2, 0x88, 0x3f, 0xfa, 0x04, 0x98, 0x3f, 0x18, 0xe1, 0xc9, 0x3f, 0x15, 0xf6, 0x9e, 0x3f, + 0x9a, 0x2f, 0x51, 0xbf, 0x72, 0x90, 0x48, 0xbf, 0x70, 0x98, 0x45, 0xbf, 0xde, 0x02, 0x8a, 0xbf, + 0x8e, 0x63, 0x68, 0xbf, 0x60, 0x2e, 0x60, 0xbf, 0x5c, 0x1c, 0x7b, 0xbf, 0xe9, 0xc8, 0x6b, 0xbf, + 0xac, 0xdb, 0x51, 0xbf, 0x04, 0x0c, 0x42, 0xbf, 0x01, 0x92, 0x46, 0xbf, 0xec, 0x50, 0x51, 0xbf, + 0xcf, 0x1f, 0x8e, 0xbf, 0xc8, 0x59, 0x62, 0xbf, 0xf8, 0xb7, 0x5e, 0xbf, 0x86, 0x9a, 0x3f, 0xbf, + 0xae, 0xad, 0x77, 0xbf, 0xea, 0x83, 0x46, 0xbf, 0xc0, 0x07, 0x2c, 0xbf, 0x45, 0xcb, 0x0b, 0xbf, + 0xc4, 0xdc, 0xa0, 0xbf, 0xc0, 0xe5, 0x84, 0xbf, 0x6b, 0x8d, 0x27, 0xbf, 0x1c, 0x19, 0x89, 0xbf, + 0xf6, 0x7f, 0x6b, 0xbf, 0x9e, 0xe5, 0x70, 0xbf, 0xec, 0xf6, 0x44, 0xbf, 0xc5, 0x7b, 0x66, 0xbf, + 0xef, 0x54, 0x5e, 0xbf, 0x96, 0x1a, 0x72, 0xbf, 0x28, 0x1e, 0x8c, 0xbf, 0x14, 0xea, 0x41, 0xbf, + 0xc7, 0xfb, 0x8c, 0xbe, 0x2f, 0x6a, 0xc6, 0xbe, 0xaa, 0x1a, 0x90, 0xbe, 0x4a, 0x9f, 0xa1, 0xbe, + 0xee, 0x66, 0xa4, 0xbd, 0x8c, 0xe0, 0x28, 0xbe, 0x3e, 0xe2, 0xf4, 0xbe, 0x70, 0x3b, 0x4f, 0xbe, + 0x6c, 0xc4, 0xbc, 0xbe, 0xe1, 0x87, 0x50, 0xbe, 0x42, 0x8f, 0x43, 0xbe, 0xad, 0x1f, 0xa0, 0xbe, + 0x6a, 0xbf, 0x1e, 0xbf, 0x95, 0x15, 0x9a, 0xbe, 0x40, 0x19, 0xab, 0xbe, 0xc3, 0x60, 0x0d, 0xbe, + 0x10, 0x8a, 0xd0, 0xbc, 0xdc, 0xdd, 0x7e, 0xbe, 0x60, 0x56, 0xf1, 0xbd, 0xc5, 0x1f, 0x26, 0xbe, + 0x42, 0x75, 0x4a, 0xbe, 0x30, 0x9e, 0xd9, 0xbe, 0x6d, 0xe4, 0x9f, 0xbe, 0x65, 0x09, 0xdd, 0xbe, + 0xde, 0x94, 0xbe, 0xbe, 0xa6, 0x2d, 0xf2, 0xbe, 0xff, 0x94, 0x5d, 0xbe, 0xb6, 0x3f, 0xa6, 0xbe, + 0xb0, 0x58, 0x8f, 0xbe, 0xff, 0x1a, 0x9b, 0xbe, 0xfb, 0x22, 0xf3, 0xbe, 0x94, 0x89, 0x0c, 0xbf, + 0x6e, 0x12, 0xc9, 0x3d, 0x4e, 0x88, 0xbe, 0x3d, 0xb4, 0xcf, 0xc5, 0x3d, 0x74, 0x2b, 0xdc, 0x3d, + 0x2b, 0x40, 0xe5, 0x3d, 0x78, 0xa2, 0xd6, 0x3d, 0x34, 0xde, 0xfe, 0x3d, 0x7e, 0x61, 0xca, 0x3d, + 0x24, 0xf9, 0xa6, 0x3d, 0x5c, 0xd8, 0xb5, 0x3d, 0xb1, 0x0e, 0x94, 0x3d, 0xf0, 0x71, 0xb9, 0x3d, + 0xea, 0x05, 0x0b, 0x3e, 0x96, 0xde, 0xdb, 0x3d, 0x26, 0x3f, 0xcd, 0x3d, 0x5c, 0xaf, 0xc4, 0x3d, + 0x0a, 0x73, 0xd9, 0x3d, 0xe6, 0xb6, 0xb8, 0x3d, 0xd1, 0x01, 0xa1, 0x3d, 0x9e, 0x2c, 0x4c, 0x3d, + 0x8a, 0xc2, 0x15, 0x3e, 0x88, 0xc3, 0x00, 0x3e, 0xfb, 0x73, 0xa3, 0x3d, 0x40, 0xd8, 0x00, 0x3e, + 0x97, 0xac, 0xf9, 0x3d, 0x14, 0xf6, 0xef, 0x3d, 0xeb, 0x47, 0xb9, 0x3d, 0x78, 0xce, 0xc4, 0x3d, + 0xfc, 0x7e, 0xde, 0x3d, 0xfc, 0x77, 0xf0, 0x3d, 0xdc, 0x9e, 0x0a, 0x3e, 0x03, 0x22, 0xcc, 0x3d, + 0xb7, 0x3b, 0x34, 0x3d, 0xd8, 0xe4, 0x5a, 0x3d, 0xd5, 0xc0, 0x42, 0x3d, 0x5b, 0x00, 0x00, 0x3d, + 0x78, 0x1e, 0xef, 0x3c, 0x5d, 0x21, 0x0a, 0x3d, 0xcf, 0xee, 0x97, 0x3d, 0x33, 0xcc, 0xdc, 0x3c, + 0xf0, 0x2f, 0x15, 0x3d, 0xa6, 0xa0, 0x08, 0x3d, 0x1c, 0x81, 0x62, 0x3c, 0x56, 0x15, 0x24, 0x3d, + 0x44, 0xa5, 0xb1, 0x3d, 0x62, 0xd8, 0x48, 0x3d, 0xbd, 0x05, 0x3f, 0x3d, 0xfc, 0x5a, 0x0f, 0x3d, + 0x24, 0x29, 0x44, 0x3c, 0x48, 0xc0, 0x19, 0x3d, 0x03, 0x11, 0xbe, 0x3c, 0x5b, 0xd0, 0x3b, 0x3c, + 0x65, 0x43, 0x25, 0x3d, 0x56, 0xf0, 0x83, 0x3d, 0x6e, 0xbc, 0x3a, 0x3d, 0x94, 0x7e, 0x7d, 0x3d, + 0xc5, 0x81, 0x89, 0x3d, 0x72, 0x1d, 0x90, 0x3d, 0x2a, 0x0d, 0x10, 0x3d, 0xaf, 0xd6, 0x1d, 0x3d, + 0x39, 0xf0, 0x4b, 0x3d, 0xd3, 0xa7, 0x59, 0x3d, 0xaa, 0x4e, 0x96, 0x3d, 0x6a, 0x6e, 0xa1, 0x3d, + 0xa4, 0x8f, 0x3d, 0x3d, 0x1e, 0x53, 0x3e, 0x3d, 0x5c, 0x45, 0x2c, 0x3d, 0x9b, 0x6a, 0x92, 0x3d, + 0xce, 0x28, 0x41, 0x3d, 0xd5, 0x55, 0x45, 0x3d, 0xae, 0x9e, 0x5d, 0x3d, 0xb9, 0x64, 0x68, 0x3d, + 0xeb, 0xa5, 0x65, 0x3d, 0xaa, 0xdd, 0x31, 0x3d, 0xae, 0x25, 0x5b, 0x3d, 0x17, 0x56, 0x4f, 0x3d, + 0xe2, 0xac, 0x84, 0x3d, 0x60, 0xf8, 0x4a, 0x3d, 0xbc, 0xc1, 0x54, 0x3d, 0x39, 0x45, 0x1b, 0x3d, + 0xdb, 0xac, 0x65, 0x3d, 0x3e, 0x5b, 0x39, 0x3d, 0x4c, 0xae, 0x1a, 0x3d, 0x2b, 0xd9, 0x1d, 0x3d, + 0x94, 0xda, 0x90, 0x3d, 0x42, 0xaa, 0x72, 0x3d, 0x6b, 0x21, 0x1a, 0x3d, 0xd0, 0xfb, 0x80, 0x3d, + 0xa1, 0x48, 0x41, 0x3d, 0xfe, 0xde, 0x59, 0x3d, 0x96, 0x55, 0x34, 0x3d, 0x48, 0xb0, 0x6a, 0x3d, + 0x2b, 0x09, 0x40, 0x3d, 0x2d, 0xd4, 0x52, 0x3d, 0x23, 0x7f, 0x7b, 0x3d, 0x3e, 0x05, 0x2d, 0x3d, + 0x22, 0x36, 0x9d, 0x3c, 0x77, 0x7a, 0xda, 0x3c, 0x24, 0xac, 0x8f, 0x3c, 0xee, 0xa9, 0x08, 0x3d, + 0x07, 0x82, 0xbd, 0x3b, 0x2b, 0x10, 0x4f, 0x3c, 0x92, 0x5e, 0xed, 0x3c, 0x8e, 0x6c, 0xaa, 0x3c, + 0xbf, 0x44, 0x09, 0x3d, 0x41, 0xab, 0x80, 0x3c, 0x40, 0xce, 0xc6, 0x3c, 0xa6, 0xdd, 0xcf, 0x3c, + 0x34, 0xf3, 0x24, 0x3d, 0xf8, 0x0b, 0xa7, 0x3c, 0xe8, 0x0a, 0xce, 0x3c, 0xd7, 0x1a, 0xf2, 0x3b, + 0xd8, 0xfb, 0x12, 0x3c, 0xf6, 0x08, 0x9b, 0x3c, 0xef, 0xc2, 0x24, 0x3c, 0xd0, 0x79, 0x9e, 0x3c, + 0x90, 0xa3, 0x91, 0x3c, 0x11, 0x27, 0xea, 0x3c, 0x4f, 0x17, 0xa8, 0x3c, 0xd2, 0x0c, 0xfe, 0x3c, + 0x9d, 0x3b, 0xa2, 0x3c, 0xb8, 0x0c, 0xf4, 0x3c, 0x80, 0x28, 0x86, 0x3c, 0x4e, 0x9a, 0xe9, 0x3c, + 0x96, 0x04, 0x8f, 0x3c, 0x34, 0x60, 0x9e, 0x3c, 0x2a, 0x20, 0xf9, 0x3c, 0xa4, 0x51, 0x02, 0x3d, + 0x35, 0x1c, 0xa8, 0x3f, 0xfa, 0xd0, 0xa8, 0x3f, 0xf2, 0x9e, 0xaf, 0x3f, 0xe2, 0x98, 0x84, 0x3f, + 0x24, 0x1d, 0xb0, 0x3f, 0x83, 0x11, 0xa6, 0x3f, 0x23, 0x4b, 0xf1, 0x3f, 0x58, 0xa2, 0x85, 0x3f, + 0xa8, 0xc1, 0x60, 0x3f, 0xe5, 0x34, 0x8e, 0x3f, 0x71, 0x19, 0x11, 0x3f, 0x88, 0x40, 0x8e, 0x3f, + 0x32, 0x8b, 0x02, 0x40, 0x64, 0x80, 0xba, 0x3f, 0x2a, 0x5c, 0xa6, 0x3f, 0x07, 0x7c, 0xa6, 0x3f, + 0xeb, 0x85, 0x83, 0x3f, 0x55, 0xe7, 0x92, 0x3f, 0x77, 0x55, 0x6f, 0x3f, 0x80, 0xe0, 0xc6, 0x3e, + 0xc6, 0xa3, 0xda, 0x3f, 0xf7, 0xa5, 0xe0, 0x3f, 0x32, 0xf5, 0x93, 0x3f, 0x36, 0x24, 0xd8, 0x3f, + 0x90, 0xef, 0xee, 0x3f, 0xa4, 0x53, 0xe0, 0x3f, 0x08, 0x97, 0x92, 0x3f, 0xd7, 0xe3, 0x8c, 0x3f, + 0x2c, 0xef, 0xc1, 0x3f, 0x1f, 0x98, 0xcf, 0x3f, 0x6e, 0x26, 0xfa, 0x3f, 0x07, 0x4c, 0xd9, 0x3f, + 0x64, 0x51, 0x6c, 0x3e, 0x01, 0x28, 0xde, 0x3e, 0xda, 0x7b, 0xd1, 0x3e, 0x93, 0x5b, 0x09, 0x3f, + 0x5b, 0x1a, 0xfc, 0x3e, 0x9c, 0x39, 0xeb, 0x3d, 0xf6, 0x88, 0x57, 0x3f, 0xc6, 0xc5, 0x78, 0x3f, + 0xa4, 0x1a, 0x44, 0x3f, 0x57, 0x26, 0x9c, 0x3e, 0x68, 0xd1, 0x34, 0x3f, 0x7a, 0x28, 0x48, 0x3f, + 0x3d, 0x70, 0x43, 0x3f, 0xf8, 0x27, 0x6f, 0x3e, 0x57, 0xa6, 0x54, 0x3f, 0xe5, 0xa9, 0x05, 0x3e, + 0xaa, 0xf7, 0x0a, 0x3e, 0xee, 0xa9, 0x01, 0x3f, 0x9c, 0xa7, 0xef, 0x3e, 0xa5, 0xac, 0x91, 0x3e, + 0x82, 0xc6, 0x31, 0x3f, 0x72, 0xc8, 0x50, 0x3f, 0xb0, 0xae, 0x32, 0x3f, 0x14, 0x78, 0x5f, 0x3e, + 0x02, 0x8a, 0xab, 0x3e, 0xba, 0xa9, 0x39, 0x3e, 0x4c, 0x35, 0x30, 0x3f, 0xf4, 0x2e, 0x38, 0x3f, + 0xbf, 0x9e, 0x81, 0x3f, 0x4a, 0x5a, 0xc7, 0x3e, 0xc8, 0xc6, 0x6b, 0x3e, 0x3e, 0xda, 0x2a, 0x3f, + 0x58, 0x0e, 0x98, 0xbe, 0x14, 0xeb, 0xf1, 0xbd, 0x4e, 0x74, 0xbf, 0xbe, 0xe8, 0xad, 0x02, 0xbf, + 0xac, 0x65, 0x24, 0xbf, 0xf4, 0x82, 0x3e, 0xbe, 0xbc, 0x46, 0xb8, 0xbe, 0xa3, 0xf3, 0x2b, 0xbe, + 0x18, 0x5e, 0x85, 0xbe, 0x19, 0x9b, 0x74, 0xbe, 0x49, 0x7f, 0xa3, 0xbe, 0x54, 0xda, 0x48, 0xbd, + 0x2a, 0x8d, 0x2e, 0xbf, 0xb7, 0x71, 0x9e, 0xbd, 0x39, 0xa4, 0x79, 0xbe, 0x8c, 0x03, 0xb2, 0xbe, + 0x45, 0xa4, 0x8e, 0xbe, 0x98, 0x50, 0x83, 0xbe, 0x54, 0x2d, 0xd8, 0xbd, 0x5b, 0x28, 0x4a, 0xbe, + 0x9f, 0xaf, 0xa5, 0xbe, 0x6d, 0x00, 0x00, 0xbf, 0xde, 0xae, 0xb7, 0xbe, 0xed, 0xc7, 0xc5, 0xbe, + 0x68, 0xe8, 0xbd, 0xbe, 0xbb, 0x54, 0x54, 0xbe, 0x0a, 0xea, 0x6b, 0xbe, 0xb1, 0xca, 0x73, 0xbd, + 0x45, 0x4c, 0x12, 0xbf, 0x8f, 0xd8, 0x91, 0xbe, 0x19, 0xc5, 0xba, 0xbe, 0xb6, 0x84, 0xdb, 0xbe, + 0x40, 0x92, 0x09, 0xbe, 0x60, 0x7a, 0x98, 0x3b, 0xfd, 0x1b, 0xcb, 0x3d, 0xd0, 0x60, 0x31, 0xbc, + 0xdc, 0x50, 0x30, 0xbd, 0x3c, 0x24, 0xd4, 0xbc, 0xf8, 0x9b, 0xc1, 0xbd, 0x2c, 0x60, 0x2f, 0xbd, + 0x3e, 0xf7, 0x81, 0xbe, 0x44, 0xd5, 0x17, 0x3c, 0xcb, 0x1b, 0x04, 0xbe, 0x6c, 0x15, 0x29, 0xbe, + 0x2e, 0x0d, 0xae, 0xbe, 0x95, 0x2f, 0x90, 0xbd, 0xb6, 0xbb, 0x8e, 0xbe, 0x80, 0xb6, 0xaa, 0x3c, + 0x3e, 0x94, 0x02, 0x3d, 0xdc, 0x9c, 0x6a, 0xbd, 0x3d, 0x63, 0x88, 0xbe, 0xa7, 0xd0, 0xc5, 0x3c, + 0x90, 0x5c, 0x0a, 0xbe, 0x0f, 0xeb, 0xca, 0xbd, 0x52, 0x63, 0x80, 0xbe, 0x12, 0x66, 0x17, 0xbe, + 0x27, 0xaa, 0x9d, 0xbe, 0xc5, 0x1f, 0x7d, 0xbd, 0x53, 0x54, 0x1b, 0xbe, 0x12, 0x6f, 0x2f, 0xbe, + 0xde, 0x81, 0x1e, 0xbe, 0xe8, 0xa6, 0x44, 0xbe, 0x3c, 0x46, 0x8a, 0x3c, 0x04, 0x6d, 0x52, 0xbe, + 0xf2, 0x78, 0x32, 0x3d, 0xd4, 0x17, 0x46, 0x3b, 0x67, 0x92, 0x19, 0x3d, 0x14, 0x97, 0x72, 0x3d, + 0x1d, 0x12, 0xa8, 0x3d, 0x8b, 0xdb, 0xcf, 0x3c, 0xbb, 0x14, 0xfb, 0x3c, 0x40, 0xe9, 0xa5, 0xbb, + 0x8a, 0xe0, 0xd6, 0x3c, 0x89, 0x73, 0xce, 0x3c, 0x3c, 0x5d, 0xfc, 0x3c, 0xae, 0x39, 0x2b, 0xbc, + 0xb4, 0x27, 0xc0, 0x3d, 0xd2, 0x83, 0xf0, 0x3b, 0x12, 0xee, 0xba, 0x3c, 0xe8, 0x86, 0x3f, 0x3d, + 0xb1, 0x1c, 0x12, 0x3d, 0x0e, 0x4f, 0xc8, 0x3c, 0x42, 0x00, 0x5e, 0x3c, 0x62, 0x01, 0x9b, 0x3c, + 0x96, 0xae, 0x03, 0x3d, 0x18, 0x25, 0x58, 0x3d, 0x44, 0x7a, 0x30, 0x3d, 0x2f, 0xd2, 0x6d, 0x3d, + 0x24, 0x36, 0x75, 0x3d, 0x76, 0x99, 0xe6, 0x3c, 0xff, 0x45, 0x9d, 0x3c, 0xd9, 0x9a, 0xd4, 0xbb, + 0x18, 0x8e, 0x74, 0x3d, 0x4f, 0x2f, 0x21, 0x3d, 0xed, 0x3a, 0x3d, 0x3d, 0xca, 0x62, 0x56, 0x3d, + 0x57, 0xb6, 0xdf, 0x3c, 0x9e, 0xa2, 0x17, 0xbc, 0x8b, 0xda, 0x20, 0xbc, 0x32, 0xc0, 0x17, 0x3c, + 0xac, 0x79, 0xb5, 0x3c, 0x34, 0x53, 0x19, 0x3c, 0xf6, 0xba, 0x86, 0x3b, 0xca, 0xf6, 0x8f, 0xbc, + 0x2e, 0x6f, 0xd1, 0x3c, 0x00, 0x33, 0xcb, 0x39, 0x38, 0xe9, 0x40, 0x3c, 0x50, 0xbd, 0x97, 0x3a, + 0xe2, 0x44, 0x74, 0x3d, 0x9c, 0xb3, 0xd9, 0x3b, 0x08, 0x97, 0xd7, 0x3c, 0x2e, 0x52, 0x22, 0x3c, + 0xca, 0x86, 0x97, 0x3b, 0xb3, 0x82, 0x94, 0x3b, 0xfb, 0xbf, 0xf2, 0x3c, 0x8a, 0x04, 0x54, 0xbb, + 0x40, 0xe9, 0x59, 0x3c, 0x15, 0xb4, 0x4f, 0x3c, 0xd5, 0x3d, 0x04, 0x3d, 0xde, 0x27, 0x0c, 0x3d, + 0xbd, 0x6a, 0x5b, 0x3d, 0x10, 0xb8, 0x5e, 0x3c, 0x72, 0x9c, 0x39, 0x3c, 0x04, 0xc0, 0x99, 0x3b, + 0x36, 0xc3, 0x93, 0x3c, 0x73, 0x56, 0xf6, 0x3c, 0x24, 0x9a, 0x03, 0x3c, 0x0f, 0xe2, 0xf5, 0x3c, + 0x9a, 0xf8, 0x59, 0x3c, 0x06, 0x03, 0x40, 0x3c, 0xf2, 0xe3, 0xb4, 0x3c, 0xca, 0xf6, 0xe4, 0x3c, + 0x4e, 0x0c, 0x02, 0x3d, 0xe2, 0x5d, 0x0c, 0x3c, 0x5a, 0xd6, 0xd7, 0x3c, 0x3c, 0xe1, 0xb3, 0x3c, + 0xc0, 0x9d, 0x9e, 0x3c, 0xf8, 0xf0, 0x67, 0x3c, 0xe0, 0x52, 0xb5, 0x3c, 0x2c, 0xd0, 0x51, 0x3c, + 0x50, 0x71, 0x0b, 0x3d, 0x80, 0xc7, 0xc1, 0x3b, 0x4f, 0xc2, 0x9f, 0x3c, 0x31, 0x87, 0x7d, 0x3c, + 0xc8, 0xc1, 0x56, 0x3c, 0xde, 0x13, 0x8e, 0x3c, 0x2a, 0x1d, 0x0a, 0x3c, 0xd7, 0xee, 0x4b, 0x3c, + 0x96, 0x63, 0xb4, 0x3c, 0x99, 0xce, 0xfe, 0x3c, 0x5b, 0xd4, 0xb3, 0x3c, 0x18, 0x53, 0x85, 0x3c, + 0xd0, 0x27, 0x80, 0x3c, 0xa6, 0x40, 0x24, 0x3c, 0x12, 0x38, 0x95, 0x3c, 0xbd, 0xf1, 0x45, 0x3c, + 0xef, 0xfb, 0x14, 0x3d, 0x93, 0x01, 0x72, 0x3c, 0x2c, 0x3a, 0x91, 0x3c, 0x30, 0xca, 0xc9, 0x3c, + 0x1a, 0x7a, 0xa9, 0x3b, 0x79, 0xdd, 0xb6, 0x3b, 0x8d, 0x51, 0xce, 0xba, 0x15, 0x7e, 0x42, 0x3b, + 0x90, 0x13, 0xeb, 0x3a, 0x10, 0xbf, 0x12, 0x3a, 0xb8, 0x46, 0x56, 0x3c, 0x00, 0xa3, 0x80, 0x3c, + 0x12, 0xe5, 0x9b, 0x3c, 0xdc, 0x51, 0xe5, 0x3a, 0x20, 0xbd, 0x4e, 0x3c, 0x6c, 0x74, 0x98, 0x3c, + 0x1a, 0xd8, 0x8a, 0x3c, 0x64, 0x5f, 0xb6, 0x3b, 0xc9, 0x16, 0xae, 0x3c, 0x23, 0x76, 0x41, 0xbb, + 0x9c, 0x57, 0x1e, 0xbb, 0xb3, 0xf2, 0xf1, 0x3b, 0x82, 0xee, 0x86, 0x3c, 0x22, 0x67, 0xb3, 0x3a, + 0xa2, 0x5e, 0x4e, 0x3c, 0x0d, 0x2d, 0x35, 0x3c, 0xec, 0x97, 0x87, 0x3c, 0x1e, 0xa3, 0x8e, 0x3b, + 0xd4, 0xb8, 0x4c, 0x3c, 0xdb, 0x0c, 0x34, 0x3b, 0x5f, 0xf8, 0x69, 0x3c, 0x3b, 0xc4, 0x90, 0x3c, + 0xc2, 0x96, 0x7e, 0x3c, 0x69, 0xf9, 0x25, 0x3c, 0x74, 0xa3, 0xb1, 0xba, 0xda, 0xb0, 0x5c, 0x3c, + 0x0e, 0x5e, 0x3d, 0x3f, 0x28, 0xe0, 0x36, 0xbe, 0x2c, 0x14, 0x56, 0x3e, 0x1d, 0xe8, 0x22, 0x3f, + 0x7a, 0x51, 0x86, 0x3f, 0x04, 0x5a, 0xb9, 0x3e, 0xec, 0xe9, 0x38, 0x3e, 0x69, 0xc6, 0xfe, 0xbe, + 0xbb, 0x4a, 0xc8, 0x3e, 0x4c, 0xb6, 0x58, 0x3e, 0xd8, 0xda, 0x9a, 0x3e, 0x72, 0x9b, 0x99, 0xbe, + 0x1a, 0x8e, 0xc4, 0x3f, 0xfc, 0x9a, 0xca, 0x3d, 0x6a, 0xb2, 0xb0, 0x3e, 0xaf, 0xd8, 0x1a, 0x3f, + 0x15, 0xbc, 0xd6, 0x3e, 0x30, 0x3b, 0x4c, 0x3e, 0x2c, 0xd1, 0xbc, 0x3e, 0xa6, 0xbd, 0xd7, 0x3d, + 0x42, 0xe6, 0xac, 0x3e, 0xa0, 0x4a, 0x08, 0x3f, 0xc5, 0x65, 0x2c, 0x3f, 0x50, 0xcc, 0x7b, 0x3f, + 0xef, 0x9f, 0x98, 0x3f, 0x65, 0x3b, 0xd9, 0x3e, 0x22, 0x77, 0x2f, 0x3e, 0xe2, 0xdf, 0x4c, 0xbe, + 0x2a, 0x17, 0x1f, 0x3f, 0x5e, 0x04, 0x2f, 0x3f, 0x1a, 0x73, 0x0d, 0x3f, 0x60, 0x78, 0x42, 0x3f, + 0x7c, 0x18, 0x07, 0xbe, 0xb2, 0xe8, 0x21, 0xbe, 0xb8, 0x0d, 0xf1, 0xbd, 0x0c, 0x82, 0x51, 0xbe, + 0xd2, 0x36, 0xe4, 0xbd, 0xfa, 0xc5, 0xf0, 0xbd, 0xd9, 0x72, 0x40, 0xbe, 0x45, 0x5f, 0x17, 0xbe, + 0x2d, 0x02, 0x64, 0xbe, 0x5f, 0x2f, 0x2d, 0xbe, 0xf0, 0xf0, 0x40, 0xbe, 0x0e, 0x0f, 0x13, 0xbe, + 0xfa, 0xf4, 0x58, 0xbe, 0x29, 0x46, 0x15, 0xbe, 0xec, 0xd2, 0x3f, 0xbe, 0xea, 0xfc, 0xcb, 0xbd, + 0x34, 0x42, 0xdd, 0xbd, 0xfc, 0x2c, 0x03, 0xbe, 0xf4, 0x89, 0xfc, 0xbd, 0x82, 0x11, 0x22, 0xbe, + 0x32, 0xe2, 0x45, 0xbe, 0xac, 0xc6, 0x63, 0xbe, 0xd0, 0xc3, 0x05, 0xbe, 0xce, 0xa0, 0x27, 0xbe, + 0xc7, 0x8f, 0x1a, 0xbe, 0x93, 0xa6, 0x17, 0xbe, 0xc6, 0x42, 0x16, 0xbe, 0x20, 0x46, 0x4f, 0xbe, + 0xf8, 0x4a, 0x36, 0xbe, 0xc8, 0x49, 0x2a, 0xbe, 0xd9, 0x38, 0x43, 0xbe, 0xc0, 0xbb, 0x45, 0xbe, + 0xbc, 0x5a, 0xd2, 0x3d, 0x50, 0xc9, 0xc8, 0x3d, 0x02, 0x29, 0xbf, 0x3d, 0xe7, 0x84, 0xf9, 0x3d, + 0x4b, 0x1f, 0xef, 0x3d, 0x1d, 0x25, 0xe0, 0x3d, 0x61, 0xf3, 0xee, 0x3d, 0x3a, 0x85, 0xee, 0x3d, + 0xa6, 0x13, 0xc2, 0x3d, 0x49, 0x8b, 0xe0, 0x3d, 0xd6, 0x98, 0xb0, 0x3d, 0x99, 0x29, 0xab, 0x3d, + 0x2c, 0x31, 0x06, 0x3e, 0x95, 0x0a, 0xdb, 0x3d, 0x18, 0x54, 0xc1, 0x3d, 0x16, 0xc8, 0xb0, 0x3d, + 0x6d, 0x79, 0xd1, 0x3d, 0x9e, 0x0d, 0xb9, 0x3d, 0xc8, 0xb8, 0xcc, 0x3d, 0xea, 0xd3, 0x50, 0x3d, + 0x7c, 0x8e, 0x21, 0x3e, 0xb2, 0xe3, 0xf6, 0x3d, 0x22, 0x42, 0xc2, 0x3d, 0x04, 0x68, 0xf4, 0x3d, + 0x06, 0xbf, 0xe9, 0x3d, 0x2a, 0x00, 0xe0, 0x3d, 0x6c, 0x67, 0xdc, 0x3d, 0x8e, 0xc4, 0xd5, 0x3d, + 0x56, 0x50, 0xf8, 0x3d, 0xc2, 0x1f, 0xf0, 0x3d, 0x76, 0xb5, 0xf6, 0x3d, 0x9e, 0x31, 0xf0, 0x3d, + 0xc5, 0x6c, 0xe4, 0x3c, 0x69, 0xb9, 0x42, 0x3d, 0x0a, 0xb9, 0x31, 0x3d, 0x4f, 0xd1, 0xb2, 0x3c, + 0x14, 0xc9, 0xd1, 0x3b, 0xa4, 0x8c, 0xa9, 0x3c, 0xa8, 0x16, 0x4e, 0x3d, 0xaa, 0x92, 0x08, 0x3c, + 0x9f, 0x53, 0x62, 0x3d, 0x3b, 0x19, 0x00, 0x3d, 0x6d, 0x3f, 0x19, 0x3d, 0x72, 0xf3, 0x2e, 0x3d, + 0xb2, 0xf1, 0x87, 0x3d, 0xa8, 0x1e, 0xf1, 0x3c, 0xcc, 0xfa, 0x67, 0x3d, 0xd6, 0x15, 0x01, 0x3d, + 0xb0, 0x23, 0xf3, 0xba, 0xe4, 0xc4, 0x3c, 0x3d, 0x90, 0x72, 0xae, 0x3c, 0xe3, 0x4b, 0x83, 0x3c, + 0xd6, 0x4c, 0xeb, 0x3c, 0x92, 0xaf, 0x81, 0x3d, 0xe5, 0xd7, 0x08, 0x3d, 0x0f, 0xb9, 0x3c, 0x3d, + 0x66, 0x58, 0x3c, 0x3d, 0x8e, 0x9d, 0x64, 0x3d, 0x20, 0x05, 0x0f, 0x3d, 0x7d, 0x73, 0x1f, 0x3d, + 0xa4, 0xcd, 0x3d, 0x3d, 0xe6, 0x73, 0xea, 0x3c, 0xfb, 0x10, 0x82, 0x3d, 0x4b, 0x07, 0x9b, 0x3d, + 0x80, 0x37, 0x4b, 0xbc, 0x7c, 0x89, 0x41, 0xbc, 0xe6, 0xa7, 0x48, 0xbc, 0x32, 0x91, 0x4d, 0xbc, + 0x90, 0x55, 0x67, 0xbc, 0x02, 0xb9, 0x5d, 0xbc, 0xbc, 0x51, 0x61, 0xbc, 0x43, 0x52, 0x54, 0xbc, + 0x8c, 0x8d, 0x1c, 0xbc, 0x0e, 0xb5, 0x4a, 0xbc, 0xfe, 0xa9, 0x0b, 0xbc, 0x2d, 0xf4, 0x21, 0xbc, + 0x38, 0x5d, 0x82, 0xbc, 0x46, 0x59, 0x4f, 0xbc, 0x84, 0x15, 0x30, 0xbc, 0xe5, 0x8e, 0x37, 0xbc, + 0xe7, 0xd8, 0x3e, 0xbc, 0x76, 0xdb, 0x3d, 0xbc, 0x69, 0xc4, 0x43, 0xbc, 0x9a, 0xf5, 0x4f, 0xbb, + 0x2c, 0xdd, 0x98, 0xbc, 0x3e, 0xa9, 0x62, 0xbc, 0x2c, 0x23, 0x3d, 0xbc, 0x66, 0xa1, 0x71, 0xbc, + 0x4b, 0xbb, 0x6b, 0xbc, 0xbc, 0xa4, 0x69, 0xbc, 0x20, 0xf3, 0x54, 0xbc, 0x91, 0xdd, 0x31, 0xbc, + 0xe1, 0xb6, 0x6e, 0xbc, 0x16, 0xc6, 0x5c, 0xbc, 0x6d, 0xf5, 0x73, 0xbc, 0x42, 0xc9, 0x74, 0xbc, + 0x89, 0x3c, 0xa1, 0xbb, 0x66, 0x98, 0xdd, 0xbb, 0x35, 0xa2, 0xed, 0xbb, 0x87, 0xdc, 0x26, 0xbb, + 0x52, 0x1a, 0x4a, 0xbb, 0x18, 0xa2, 0x98, 0xbb, 0x30, 0x30, 0xe9, 0xbb, 0x30, 0xa6, 0x0c, 0xbb, + 0xf2, 0xa4, 0xb7, 0xbb, 0x71, 0x9f, 0x94, 0xbb, 0xf8, 0xbd, 0x6e, 0xbb, 0x27, 0x02, 0xbe, 0xbb, + 0xb8, 0xc6, 0x1a, 0xbc, 0xe2, 0xad, 0xa0, 0xbb, 0x1e, 0x6d, 0xe4, 0xbb, 0xb8, 0x88, 0xbb, 0xbb, + 0x20, 0xd4, 0xa1, 0xba, 0x0c, 0xa5, 0xea, 0xbb, 0x30, 0xc2, 0x85, 0xbb, 0x05, 0x06, 0xa9, 0x39, + 0x44, 0x98, 0xbc, 0xbb, 0xf1, 0xe5, 0x04, 0xbc, 0xa5, 0xef, 0xb0, 0xbb, 0x6d, 0x30, 0xf3, 0xbb, + 0x44, 0x25, 0xf8, 0xbb, 0xa0, 0xe3, 0x11, 0xbc, 0x44, 0xab, 0xbb, 0xbb, 0x6e, 0x43, 0x8d, 0xbb, + 0x24, 0xf8, 0xe7, 0xbb, 0x50, 0x21, 0x97, 0xbb, 0x0b, 0xa5, 0x16, 0xbc, 0x66, 0xa7, 0x30, 0xbc, + 0xa9, 0xd7, 0xba, 0xbb, 0xd2, 0x59, 0xbb, 0xbb, 0x6e, 0x34, 0xa1, 0xbb, 0x1b, 0x82, 0xfc, 0xbb, + 0x48, 0xb5, 0xc9, 0xbb, 0x3a, 0x37, 0xbe, 0xbb, 0x9d, 0x66, 0xe1, 0xbb, 0x04, 0x6e, 0xdc, 0xbb, + 0xfd, 0x6a, 0xd7, 0xbb, 0x2e, 0x48, 0xd6, 0xbb, 0x10, 0x89, 0xc1, 0xbb, 0x3e, 0xa2, 0xa3, 0xbb, + 0x52, 0x8d, 0xf9, 0xbb, 0x4d, 0xfb, 0xc6, 0xbb, 0xf1, 0x1a, 0xc3, 0xbb, 0xa5, 0x8e, 0x93, 0xbb, + 0x5b, 0x72, 0xb9, 0xbb, 0x8a, 0x1d, 0xa2, 0xbb, 0x55, 0x74, 0xb5, 0xbb, 0x1e, 0x85, 0x8e, 0xbb, + 0x06, 0xd2, 0x0f, 0xbc, 0x54, 0xad, 0xf3, 0xbb, 0x41, 0x36, 0xae, 0xbb, 0x55, 0x2c, 0xd9, 0xbb, + 0x96, 0xd8, 0xcb, 0xbb, 0x51, 0x6b, 0xc0, 0xbb, 0x5b, 0x14, 0xc6, 0xbb, 0x96, 0xdc, 0xde, 0xbb, + 0x16, 0x3b, 0xe3, 0xbb, 0x14, 0x2c, 0xdf, 0xbb, 0x66, 0x21, 0xe2, 0xbb, 0x83, 0x91, 0xda, 0xbb, + 0xdc, 0x7c, 0x00, 0xbb, 0x10, 0x39, 0x51, 0xbb, 0x46, 0xbb, 0x1e, 0xbb, 0x16, 0x50, 0x31, 0xbb, + 0x69, 0x32, 0x27, 0xba, 0x12, 0x3d, 0xb3, 0xba, 0xf2, 0x59, 0x69, 0xbb, 0x32, 0x15, 0xad, 0xba, + 0xeb, 0xaf, 0x96, 0xbb, 0xb0, 0xc5, 0x2b, 0xbb, 0x1f, 0x1d, 0x63, 0xbb, 0x4a, 0x5e, 0x41, 0xbb, + 0x9a, 0x99, 0x8f, 0xbb, 0xef, 0xf1, 0x0f, 0xbb, 0xfb, 0x3b, 0x85, 0xbb, 0x74, 0x10, 0xe7, 0xba, + 0x82, 0x3f, 0xed, 0xb9, 0x34, 0x29, 0x33, 0xbb, 0x0f, 0x44, 0xd2, 0xba, 0x0f, 0x7f, 0x2a, 0xbb, + 0xac, 0x6b, 0x18, 0xbb, 0x08, 0xea, 0x95, 0xbb, 0xd2, 0x15, 0x13, 0xbb, 0x4a, 0x46, 0x42, 0xbb, + 0xdc, 0x5f, 0x38, 0xbb, 0x6a, 0x54, 0x51, 0xbb, 0xbb, 0xed, 0x1d, 0xbb, 0x78, 0x41, 0x67, 0xbb, + 0x8f, 0x00, 0x51, 0xbb, 0x4d, 0xed, 0x1b, 0xbb, 0x04, 0xd1, 0x84, 0xbb, 0xa7, 0x6f, 0x96, 0xbb, + 0x2e, 0xcc, 0x25, 0xbe, 0x12, 0x82, 0x2d, 0xbe, 0x5c, 0x7f, 0x42, 0xbe, 0xaf, 0xfc, 0xf2, 0xbd, + 0xef, 0xa7, 0x29, 0xbe, 0x52, 0x76, 0x33, 0xbe, 0xb2, 0x4b, 0x40, 0xbe, 0x3c, 0x2b, 0x08, 0xbe, + 0xef, 0x45, 0xeb, 0xbd, 0x84, 0x7b, 0x17, 0xbe, 0x5e, 0xdd, 0xb7, 0xbd, 0xae, 0xe8, 0x0f, 0xbe, + 0x92, 0xe9, 0x6d, 0xbe, 0x5d, 0xf6, 0x24, 0xbe, 0xc2, 0x28, 0x1c, 0xbe, 0xec, 0x7e, 0x29, 0xbe, + 0xc0, 0x82, 0xf1, 0xbd, 0x16, 0x31, 0x37, 0xbe, 0x20, 0x1b, 0x19, 0xbe, 0xc8, 0x9e, 0xf5, 0x3b, + 0x00, 0xcc, 0x67, 0xbe, 0x02, 0xdf, 0x43, 0xbe, 0x06, 0xc0, 0x21, 0xbe, 0x4e, 0x7c, 0x55, 0xbe, + 0xd8, 0x58, 0x56, 0xbe, 0x3b, 0xf2, 0x64, 0xbe, 0x76, 0x24, 0x32, 0xbe, 0xee, 0xcf, 0xf5, 0xbd, + 0x14, 0xd3, 0x4b, 0xbe, 0x8b, 0xca, 0x25, 0xbe, 0x66, 0xa6, 0x64, 0xbe, 0x46, 0xe4, 0x76, 0xbe, + 0xca, 0x05, 0x26, 0xbd, 0x96, 0x32, 0x8f, 0xbd, 0x2b, 0xf3, 0x70, 0xbd, 0xc4, 0x83, 0x45, 0xbd, + 0x07, 0x8c, 0x9b, 0xbd, 0x26, 0xf0, 0xc9, 0xbc, 0x01, 0x80, 0x07, 0xbe, 0xee, 0xe8, 0xc5, 0xbd, + 0xff, 0x11, 0xf5, 0xbd, 0xbc, 0x3c, 0xc1, 0xbd, 0x14, 0x30, 0xc9, 0xbd, 0x94, 0xf7, 0xa0, 0xbd, + 0x84, 0x9c, 0xd2, 0xbd, 0x6e, 0x59, 0x45, 0xbd, 0xda, 0x87, 0x04, 0xbe, 0xb6, 0x2d, 0x31, 0xbd, + 0x20, 0x01, 0x78, 0xba, 0x5d, 0x1c, 0x81, 0xbd, 0x96, 0xe9, 0xaf, 0xbd, 0x7e, 0x92, 0x87, 0xbd, + 0xb7, 0x99, 0xd9, 0xbd, 0x49, 0xdd, 0x19, 0xbe, 0xf2, 0xb5, 0xc9, 0xbd, 0x44, 0x29, 0x85, 0xbc, + 0xf2, 0x9c, 0xa4, 0xbd, 0x8a, 0x20, 0xbf, 0xbc, 0xbc, 0xc8, 0xdd, 0xbd, 0x0b, 0xd3, 0xde, 0xbd, + 0x27, 0x88, 0x30, 0xbe, 0x1e, 0x16, 0xb1, 0xbd, 0x64, 0x2f, 0x65, 0xbd, 0xc7, 0xe7, 0x0a, 0xbe, + 0xf8, 0xa7, 0x4f, 0x3d, 0xb9, 0x0d, 0x01, 0x3d, 0xc2, 0x54, 0x5b, 0x3d, 0xa4, 0x3e, 0x85, 0x3d, + 0x97, 0xba, 0xb8, 0x3d, 0x02, 0xce, 0x25, 0x3d, 0x8f, 0x8b, 0x5f, 0x3d, 0xdb, 0x90, 0x28, 0x3d, + 0x1a, 0x13, 0x1f, 0x3d, 0x94, 0x74, 0x6c, 0x3d, 0x7f, 0x47, 0x26, 0x3d, 0x32, 0x09, 0xdc, 0x3b, + 0x63, 0xa0, 0xb4, 0x3d, 0x82, 0x38, 0xd7, 0x3c, 0xea, 0x29, 0x02, 0x3d, 0x92, 0x68, 0x3d, 0x3d, + 0xc7, 0xc6, 0x08, 0x3d, 0xa6, 0x88, 0x1d, 0x3d, 0x3c, 0x6e, 0x31, 0x3d, 0x74, 0x0a, 0x84, 0x3c, + 0xfc, 0x01, 0x84, 0x3d, 0xa4, 0xa7, 0x88, 0x3d, 0x8a, 0xb0, 0x85, 0x3d, 0xd4, 0x52, 0x50, 0x3d, + 0x6c, 0x9e, 0x72, 0x3d, 0xa2, 0x7f, 0x13, 0x3d, 0xcc, 0x07, 0x5c, 0x3d, 0x53, 0x48, 0xa6, 0x3c, + 0x9a, 0x7c, 0xbb, 0x3d, 0x05, 0x8f, 0x51, 0x3d, 0xe6, 0x74, 0x42, 0x3d, 0xa4, 0xde, 0xac, 0x3d, + 0x0e, 0xd1, 0x61, 0x3c, 0x84, 0x58, 0x08, 0x3c, 0x7c, 0x8a, 0xae, 0x3b, 0x6c, 0xd1, 0x0d, 0xbc, + 0xf0, 0x96, 0x2f, 0x3b, 0xbe, 0xa5, 0xde, 0x3b, 0xfb, 0x9c, 0x43, 0x3c, 0x51, 0xb2, 0xf9, 0xbb, + 0x14, 0x49, 0x33, 0x3d, 0xc6, 0x8f, 0x23, 0x3c, 0x14, 0xbc, 0xff, 0x3c, 0x8e, 0xf8, 0xe4, 0x3c, + 0x11, 0x22, 0x1d, 0x3d, 0x78, 0x09, 0xd1, 0x3b, 0x90, 0x49, 0x51, 0x3d, 0x70, 0xfe, 0x7f, 0x3c, + 0xea, 0xa9, 0xf8, 0xbb, 0x45, 0x91, 0xdc, 0x3c, 0xff, 0x10, 0x14, 0x3d, 0x18, 0x13, 0x2b, 0xbb, + 0xe0, 0x05, 0xb8, 0x3c, 0xd4, 0xb8, 0xfb, 0x3c, 0x9d, 0x47, 0xdf, 0x3c, 0xf0, 0xe7, 0x90, 0x3c, + 0x06, 0x03, 0x22, 0x3d, 0xa4, 0xbd, 0x70, 0x3c, 0xb8, 0x09, 0xe9, 0x3c, 0x4d, 0x40, 0xc1, 0x3c, + 0x70, 0x87, 0x0a, 0x3d, 0x06, 0x9b, 0x8f, 0x3c, 0xab, 0xd6, 0x6a, 0x3c, 0x10, 0x5e, 0x29, 0x3d, + 0x10, 0x7f, 0xe3, 0xbb, 0xf2, 0x40, 0x38, 0xbb, 0xca, 0xab, 0xcf, 0xbb, 0x54, 0xef, 0xfc, 0xbb, + 0xec, 0x63, 0x36, 0xbc, 0x47, 0x2c, 0xb7, 0xbb, 0x3b, 0x48, 0x8f, 0xbb, 0x09, 0x7c, 0x28, 0xbb, + 0x4a, 0xe6, 0x83, 0xbb, 0xf3, 0x0a, 0xc5, 0xbb, 0xc6, 0x61, 0x8f, 0xbb, 0xe0, 0x95, 0x61, 0x39, + 0xf2, 0x03, 0x40, 0xbc, 0x66, 0xea, 0x2d, 0xbb, 0x3c, 0x17, 0x45, 0xbb, 0x56, 0xa4, 0xcd, 0xbb, + 0x4c, 0xaa, 0x96, 0xbb, 0x81, 0x4b, 0xa4, 0xbb, 0xc5, 0x52, 0xb2, 0xbb, 0xc0, 0x1d, 0xa9, 0xb8, + 0xd4, 0x91, 0xed, 0xbb, 0xa8, 0x31, 0xd6, 0xbb, 0x4b, 0xf7, 0x00, 0xbc, 0x79, 0x9b, 0x02, 0xbc, + 0xac, 0x13, 0x06, 0xbc, 0xb8, 0x96, 0xaf, 0xbb, 0xfa, 0x5b, 0xc0, 0xbb, 0xfe, 0xa3, 0x1c, 0xba, + 0x20, 0x8a, 0x1e, 0xbc, 0xd6, 0xbe, 0xb9, 0xbb, 0x3e, 0xb7, 0xc3, 0xbb, 0xf8, 0x3b, 0x27, 0xbc, + 0xc9, 0x11, 0x55, 0xbb, 0x94, 0x39, 0x01, 0xba, 0x1d, 0xa4, 0xc6, 0xba, 0x70, 0x13, 0xb6, 0xb9, + 0xe2, 0xfc, 0x1b, 0xbb, 0x7e, 0x96, 0x11, 0xbb, 0x00, 0x65, 0xe4, 0xb8, 0x02, 0x29, 0x17, 0x3b, + 0x78, 0x11, 0x94, 0xbb, 0xf4, 0x5c, 0xa2, 0xba, 0x04, 0x4e, 0x61, 0xbb, 0x12, 0x24, 0xfa, 0xba, + 0x1e, 0xbc, 0xdc, 0xbb, 0x88, 0x2f, 0x2e, 0xba, 0x22, 0xd8, 0xa1, 0xbb, 0xbc, 0xa0, 0x52, 0xbb, + 0x7c, 0x05, 0x03, 0xba, 0x64, 0xfd, 0x7c, 0xbb, 0xfa, 0xd4, 0x9a, 0xbb, 0x16, 0xe9, 0xea, 0x3a, + 0xcc, 0xee, 0x47, 0xbb, 0x56, 0x78, 0x40, 0xbb, 0x2a, 0x57, 0x85, 0xbb, 0xa9, 0x84, 0x98, 0xbb, + 0x06, 0xab, 0xcb, 0xbb, 0xe3, 0x79, 0x53, 0xbb, 0xa7, 0x19, 0x5b, 0xbb, 0xba, 0xf0, 0x72, 0xba, + 0xe2, 0xd8, 0x7f, 0xbb, 0xdc, 0x14, 0x17, 0xbb, 0x83, 0x3c, 0x2e, 0xbb, 0xf4, 0x5e, 0xc1, 0xbb, + 0x58, 0x93, 0x1d, 0xbb, 0xa7, 0xfc, 0x12, 0xbb, 0x09, 0x7d, 0x3f, 0xbb, 0xb7, 0xa7, 0x5b, 0xbb, + 0x16, 0x38, 0x97, 0xbb, 0xf6, 0xae, 0xf1, 0xba, 0x9d, 0x70, 0x86, 0xbb, 0x12, 0xef, 0x54, 0xbb, + 0xb6, 0x4a, 0x3e, 0xbb, 0x49, 0xc1, 0x6c, 0xbb, 0x89, 0x7e, 0x35, 0xbb, 0xd2, 0xe8, 0x9e, 0xba, + 0x5d, 0xd3, 0x94, 0xbb, 0xff, 0xee, 0xdf, 0xba, 0x59, 0x9b, 0x31, 0xbb, 0xcf, 0x17, 0x14, 0xbb, + 0xb4, 0xa1, 0xb2, 0xba, 0x97, 0x52, 0x0d, 0xbb, 0x42, 0xaa, 0x2a, 0xbb, 0x9b, 0x1c, 0xe4, 0xba, + 0xc2, 0x07, 0x80, 0xbb, 0xc3, 0x77, 0x97, 0xbb, 0xba, 0xad, 0x75, 0xbb, 0x23, 0x42, 0x00, 0xbb, + 0x6c, 0xf6, 0x48, 0xbb, 0x36, 0xe4, 0xcb, 0xba, 0xd5, 0xff, 0x62, 0xbb, 0x39, 0x66, 0x15, 0xbb, + 0x19, 0xca, 0xc0, 0xbb, 0x1c, 0xd2, 0x4d, 0xbb, 0x50, 0x0c, 0x26, 0xbb, 0x32, 0xde, 0xa0, 0xbb, + 0xb2, 0x74, 0x12, 0xba, 0xc2, 0x06, 0x8e, 0xba, 0x1e, 0xe9, 0x05, 0xba, 0x32, 0x33, 0xaf, 0x39, + 0x58, 0x6c, 0x7a, 0xb9, 0x32, 0x67, 0x27, 0xb9, 0x93, 0x2a, 0x01, 0xbb, 0x32, 0x72, 0x54, 0xba, + 0xe4, 0x75, 0x4e, 0xbb, 0x26, 0x9b, 0xa0, 0xba, 0xc5, 0xc3, 0x16, 0xbb, 0x1d, 0x0a, 0x15, 0xbb, + 0xf6, 0x5a, 0x06, 0xbb, 0xf7, 0x20, 0x3b, 0xba, 0xdd, 0xe7, 0x70, 0xbb, 0x5e, 0x0f, 0x3f, 0xba, + 0x0c, 0xe3, 0x33, 0x3a, 0x90, 0x0b, 0xcf, 0xba, 0x77, 0x2c, 0x13, 0xbb, 0x46, 0xbe, 0x52, 0xba, + 0x2a, 0xea, 0xec, 0xba, 0x07, 0xfa, 0x38, 0xbb, 0x9c, 0x2c, 0xf2, 0xba, 0xd3, 0x7e, 0x9c, 0xb9, + 0x1a, 0x7a, 0x08, 0xbb, 0x4e, 0x61, 0x00, 0xba, 0xaf, 0x30, 0x10, 0xbb, 0xd1, 0x2f, 0x20, 0xbb, + 0x63, 0x6c, 0x44, 0xbb, 0x66, 0x3b, 0xbf, 0xba, 0x4c, 0xcd, 0x65, 0xba, 0x68, 0xa3, 0x34, 0xbb, + 0xde, 0x1c, 0xd6, 0xbd, 0xfc, 0x56, 0xa1, 0xbc, 0x3d, 0xd1, 0x96, 0xbd, 0x29, 0x28, 0xa1, 0xbd, + 0x34, 0x38, 0x08, 0xbe, 0x07, 0x5b, 0xa7, 0xbd, 0x94, 0x04, 0x8e, 0xbc, 0x2c, 0x8c, 0xad, 0x3c, + 0xb4, 0x95, 0x87, 0xbd, 0xb6, 0x8d, 0x6d, 0xbd, 0x27, 0xba, 0x81, 0xbd, 0x80, 0x3c, 0x1d, 0x3a, + 0x57, 0x49, 0x3a, 0xbe, 0x9e, 0x21, 0xcf, 0xbc, 0x6c, 0xa4, 0x69, 0xbd, 0x58, 0x7e, 0xc4, 0xbd, + 0x70, 0xef, 0x69, 0xbd, 0x14, 0x2f, 0xac, 0xbd, 0x8d, 0x45, 0xbe, 0xbd, 0xe9, 0xa7, 0x12, 0x3d, + 0x0f, 0xee, 0xb6, 0xbd, 0x94, 0x56, 0x8d, 0xbd, 0x8e, 0xaf, 0xe0, 0xbd, 0xaa, 0x4a, 0x0d, 0xbe, + 0x57, 0x4c, 0x11, 0xbe, 0xca, 0xdc, 0xb9, 0xbd, 0x81, 0xd5, 0x9c, 0xbd, 0x84, 0xb1, 0x24, 0x3c, + 0x6a, 0xa0, 0xe0, 0xbd, 0xfc, 0x71, 0x8b, 0xbd, 0x31, 0x38, 0xaa, 0xbd, 0x9a, 0xd7, 0x16, 0xbe, + 0xb3, 0x8f, 0x86, 0xbd, 0xf5, 0x5d, 0x98, 0xbd, 0x6f, 0xbb, 0x6f, 0xbd, 0x02, 0x12, 0x10, 0xbe, + 0xd1, 0x63, 0x3b, 0xbd, 0xca, 0x2e, 0x76, 0xbd, 0x84, 0x38, 0x97, 0xbd, 0xd2, 0x56, 0xd7, 0xbd, + 0x1e, 0x69, 0xcd, 0xbd, 0x1c, 0x54, 0x1a, 0xbd, 0x7d, 0xa8, 0xc3, 0xbd, 0xc6, 0xe4, 0xca, 0xbd, + 0x61, 0xb2, 0xe5, 0xbd, 0x64, 0x5f, 0x87, 0xbd, 0x53, 0x3c, 0x9a, 0xbd, 0x54, 0xfb, 0xf4, 0xbc, + 0x47, 0xd9, 0xaa, 0xbd, 0x40, 0x62, 0x94, 0xbd, 0x7b, 0x86, 0x1f, 0xbd, 0xce, 0xce, 0x75, 0xbd, + 0x53, 0x54, 0xb4, 0xbd, 0xe6, 0x4e, 0x93, 0xbd, 0x88, 0xed, 0x6d, 0xbd, 0x6c, 0xf3, 0xe3, 0xbd, + 0x52, 0x90, 0x3b, 0xbd, 0x80, 0x39, 0xb6, 0xbd, 0xe0, 0x0d, 0x65, 0xbd, 0x30, 0x99, 0xc0, 0xbd, + 0xb6, 0xd1, 0x33, 0xbd, 0xfc, 0xa0, 0x62, 0xbd, 0x18, 0x0e, 0xae, 0xbd, 0xdc, 0xf6, 0x52, 0xbd, + 0xcc, 0x62, 0x35, 0x3d, 0xf7, 0x46, 0x26, 0x3d, 0x37, 0xa8, 0x37, 0x3d, 0x80, 0xba, 0x88, 0x3d, + 0xa4, 0x0e, 0x54, 0x3d, 0x1d, 0xe8, 0x3c, 0x3d, 0xf4, 0x79, 0x67, 0x3d, 0x63, 0x7d, 0x42, 0x3d, + 0x2c, 0x01, 0x45, 0x3d, 0x80, 0xf8, 0x08, 0x3d, 0xe6, 0x2d, 0x45, 0x3d, 0xff, 0x1d, 0x50, 0x3d, + 0x32, 0x16, 0x8b, 0x3d, 0xde, 0x59, 0x40, 0x3d, 0x12, 0x0c, 0x5c, 0x3d, 0x03, 0xe8, 0x39, 0x3d, + 0x6e, 0x9c, 0x7a, 0x3d, 0xe3, 0x67, 0x39, 0x3d, 0x99, 0xac, 0xdc, 0x3c, 0xbf, 0x52, 0x1d, 0x3d, + 0xae, 0x47, 0x88, 0x3d, 0x26, 0x92, 0x80, 0x3d, 0xa0, 0x68, 0xfb, 0x3c, 0x63, 0x76, 0x86, 0x3d, + 0xf4, 0x24, 0x51, 0x3d, 0xfc, 0x3a, 0x5c, 0x3d, 0x87, 0xed, 0x11, 0x3d, 0x5a, 0x26, 0x4c, 0x3d, + 0x9a, 0x24, 0x35, 0x3d, 0x5c, 0x3f, 0x52, 0x3d, 0x34, 0x32, 0x8a, 0x3d, 0xca, 0xcd, 0x02, 0x3d, + 0xa2, 0x50, 0x99, 0x3c, 0x24, 0xf2, 0xa1, 0x3c, 0xba, 0x04, 0x0f, 0x3c, 0xab, 0x9e, 0xca, 0x3c, + 0xa6, 0xa9, 0xd0, 0x3b, 0xa1, 0xbb, 0x0b, 0x3c, 0x00, 0x62, 0xf4, 0x3c, 0x09, 0xc0, 0x9c, 0x3c, + 0x98, 0x72, 0x8b, 0x3c, 0x4e, 0x85, 0xea, 0x3b, 0xf0, 0xdd, 0x8f, 0x3b, 0x1e, 0x2e, 0x82, 0x3c, + 0xf8, 0x34, 0x27, 0x3d, 0x4c, 0x00, 0xa4, 0x3c, 0xc8, 0x16, 0x51, 0x3c, 0x00, 0xa9, 0xfe, 0xb9, + 0xa4, 0x5a, 0x5e, 0x3b, 0x96, 0xe7, 0xb9, 0x3b, 0x53, 0x9f, 0xc2, 0x3b, 0x13, 0xc8, 0x22, 0x3c, + 0xf0, 0xcb, 0x1c, 0x3c, 0x8c, 0x7c, 0x8e, 0x3c, 0xf2, 0xa3, 0xb0, 0x3c, 0x78, 0x48, 0xe0, 0x3c, + 0xff, 0x3b, 0xba, 0x3c, 0x35, 0xa2, 0xd5, 0x3c, 0x99, 0xdf, 0x0f, 0x3c, 0x18, 0x57, 0x9d, 0x3c, + 0x59, 0x24, 0x28, 0x3c, 0xf2, 0xa0, 0xb5, 0x3c, 0xee, 0xc8, 0xaf, 0x3c, 0x9c, 0xeb, 0xd8, 0x3c, + 0x60, 0x8f, 0xb0, 0xbb, 0xd0, 0xa8, 0x96, 0xbb, 0x04, 0xc3, 0xaa, 0xbb, 0x54, 0xff, 0xd6, 0xbb, + 0x5f, 0xee, 0xd6, 0xbb, 0x39, 0x03, 0xaf, 0xbb, 0x45, 0xff, 0xf5, 0xbb, 0xe8, 0xe3, 0x95, 0xbb, + 0x22, 0xbe, 0x9a, 0xbb, 0xc5, 0xef, 0x86, 0xbb, 0x1d, 0x75, 0x8b, 0xbb, 0x12, 0x91, 0xa7, 0xbb, + 0xcc, 0x17, 0x0b, 0xbc, 0x3a, 0x6c, 0xbf, 0xbb, 0xdc, 0xb4, 0xca, 0xbb, 0xa4, 0xa0, 0xbd, 0xbb, + 0x3c, 0x2c, 0xd7, 0xbb, 0xe9, 0xa4, 0x98, 0xbb, 0xec, 0x30, 0x47, 0xbb, 0x6c, 0x84, 0x8b, 0xbb, + 0x73, 0x93, 0xf6, 0xbb, 0x64, 0x40, 0x01, 0xbc, 0xa4, 0x73, 0x75, 0xbb, 0x22, 0xfe, 0xf5, 0xbb, + 0xfe, 0x33, 0xee, 0xbb, 0x16, 0xd1, 0xd1, 0xbb, 0x6e, 0x6b, 0x80, 0xbb, 0x8a, 0xba, 0xad, 0xbb, + 0xe6, 0x3b, 0xba, 0xbb, 0x39, 0x30, 0xe4, 0xbb, 0xc8, 0xff, 0x08, 0xbc, 0x71, 0x17, 0x90, 0xbb, + 0xfd, 0x8d, 0x39, 0xbb, 0xfe, 0xd4, 0x24, 0xbb, 0x5a, 0xc2, 0xd1, 0xba, 0x2f, 0x8f, 0x1a, 0xbb, + 0xa4, 0x4d, 0x04, 0xbb, 0xd4, 0x55, 0xcf, 0xba, 0xb4, 0x91, 0x9e, 0xbb, 0x6e, 0xfe, 0xe3, 0xba, + 0x1f, 0xac, 0xd3, 0xba, 0x9b, 0x3f, 0xc3, 0xba, 0x80, 0x8d, 0xf9, 0x38, 0x7c, 0x94, 0xd4, 0xba, + 0xa7, 0x69, 0xbd, 0xbb, 0xe6, 0x48, 0x4e, 0xbb, 0x1e, 0xf9, 0x08, 0xbb, 0x4e, 0x3e, 0x9d, 0xba, + 0xc4, 0x69, 0x4e, 0xba, 0xb0, 0x47, 0x37, 0xba, 0x0e, 0x4e, 0x76, 0xba, 0x28, 0xdc, 0xb8, 0xba, + 0x99, 0x8c, 0xef, 0xba, 0x87, 0x91, 0x5b, 0xbb, 0x19, 0xa3, 0x39, 0xbb, 0xce, 0x21, 0x71, 0xbb, + 0xa0, 0x61, 0x91, 0xbb, 0xf7, 0x2b, 0x6e, 0xbb, 0xe6, 0xce, 0xa1, 0xba, 0x98, 0xb0, 0x12, 0xbb, + 0x60, 0xe5, 0x15, 0xbb, 0x51, 0xa4, 0x84, 0xbb, 0x9d, 0x65, 0x76, 0xbb, 0xe7, 0x6e, 0x7c, 0xbb, + 0x1e, 0x44, 0x25, 0xbb, 0x8b, 0xd5, 0x24, 0xbb, 0xcf, 0xf8, 0x26, 0xbb, 0x5f, 0xbe, 0x94, 0xbb, + 0x08, 0xfd, 0x2b, 0xbb, 0x90, 0x01, 0x2c, 0xbb, 0x40, 0x19, 0x43, 0xbb, 0xce, 0xdd, 0x57, 0xbb, + 0x71, 0xcf, 0x55, 0xbb, 0x86, 0xf2, 0xea, 0xba, 0xd9, 0x0f, 0x5b, 0xbb, 0xef, 0x0f, 0x5c, 0xbb, + 0xb6, 0xf7, 0x7f, 0xbb, 0x91, 0x35, 0x2b, 0xbb, 0x36, 0xd5, 0x4b, 0xbb, 0x1e, 0x9b, 0x10, 0xbb, + 0xe0, 0xb4, 0x6e, 0xbb, 0xe1, 0xbb, 0x39, 0xbb, 0xea, 0xc0, 0xcf, 0xba, 0xb9, 0xcd, 0x17, 0xbb, + 0x32, 0xdf, 0x7b, 0xbb, 0xcd, 0x99, 0x5a, 0xbb, 0x78, 0x69, 0xf2, 0xba, 0x0e, 0x40, 0x82, 0xbb, + 0x53, 0xe1, 0x1c, 0xbb, 0x1a, 0xf6, 0x4f, 0xbb, 0xb4, 0x71, 0x0d, 0xbb, 0x9a, 0x79, 0x51, 0xbb, + 0x00, 0x3b, 0x14, 0xbb, 0x84, 0x6b, 0x28, 0xbb, 0x12, 0x16, 0x71, 0xbb, 0x49, 0x18, 0xe2, 0xba, + 0x7a, 0xf7, 0xa2, 0xba, 0x74, 0x2e, 0xc1, 0xba, 0x88, 0x99, 0x42, 0xba, 0x03, 0xc5, 0x1f, 0xbb, + 0xd6, 0xab, 0xb9, 0xb9, 0x32, 0x4f, 0x43, 0xba, 0x5e, 0x57, 0xd7, 0xba, 0x03, 0xf3, 0xf5, 0xba, + 0xaf, 0xf8, 0xdf, 0xba, 0x28, 0xcb, 0xfa, 0xb9, 0x0d, 0x69, 0x97, 0xba, 0xf5, 0x47, 0xd3, 0xba, + 0xc6, 0x31, 0x27, 0xbb, 0x95, 0xdb, 0xa5, 0xba, 0xd2, 0x39, 0x8b, 0xba, 0x70, 0xbe, 0xb6, 0x38, + 0x72, 0x57, 0x45, 0xba, 0xb5, 0xff, 0x5f, 0xba, 0x02, 0x47, 0x0c, 0xba, 0x60, 0x01, 0x6a, 0xba, + 0xc8, 0x5d, 0x82, 0xba, 0x90, 0xc3, 0x8b, 0xba, 0xec, 0x98, 0xb6, 0xba, 0xe2, 0x12, 0x07, 0xbb, + 0xec, 0x1d, 0x80, 0xba, 0x01, 0x76, 0xea, 0xba, 0x02, 0xe8, 0x55, 0xba, 0xb9, 0x2e, 0xda, 0xba, + 0x28, 0xc7, 0x13, 0xba, 0x38, 0xbf, 0x91, 0xba, 0xa0, 0x92, 0xb4, 0xba, 0x4f, 0x58, 0xbe, 0xba, + 0xcd, 0xbc, 0x9b, 0xbd, 0x2c, 0x94, 0x7b, 0xbd, 0xb4, 0xf1, 0x7d, 0xbd, 0xdc, 0x78, 0x85, 0xbd, + 0x6c, 0x36, 0xae, 0xbd, 0x81, 0xba, 0x80, 0xbd, 0xce, 0xa7, 0xf6, 0xbd, 0x86, 0x44, 0x37, 0xbd, + 0x52, 0x51, 0x3d, 0xbd, 0x82, 0xd0, 0x5b, 0xbd, 0x6d, 0x99, 0xb9, 0xbc, 0xd2, 0xed, 0x50, 0xbd, + 0x93, 0xad, 0x07, 0xbe, 0xe0, 0x45, 0xad, 0xbd, 0xaa, 0x1f, 0x98, 0xbd, 0x32, 0xa9, 0x91, 0xbd, + 0x28, 0x72, 0x7d, 0xbd, 0x61, 0xae, 0x2c, 0xbd, 0x80, 0x78, 0x0f, 0xbd, 0xea, 0xbb, 0x47, 0xbd, + 0x4f, 0x66, 0xaa, 0xbd, 0xfd, 0x1f, 0xde, 0xbd, 0x9b, 0xa0, 0x73, 0xbd, 0xb6, 0xbf, 0xc8, 0xbd, + 0x29, 0x43, 0xf6, 0xbd, 0x18, 0xb4, 0xb9, 0xbd, 0x57, 0x36, 0x34, 0xbd, 0xe6, 0x25, 0x7b, 0xbd, + 0x8b, 0x2a, 0xa1, 0xbd, 0x51, 0x61, 0xe1, 0xbd, 0xe4, 0xc3, 0xeb, 0xbd, 0x9d, 0xb0, 0xa3, 0xbd, + 0xa5, 0xe2, 0xa3, 0xbc, 0x3a, 0x3a, 0xf8, 0xbc, 0x9a, 0x4e, 0xee, 0xbc, 0xe9, 0x02, 0x80, 0xbd, + 0x0c, 0x72, 0xe5, 0xbc, 0x66, 0x93, 0x5f, 0xbc, 0x57, 0x8b, 0x3c, 0xbd, 0x04, 0xec, 0xa2, 0xbd, + 0xe2, 0xdf, 0x47, 0xbd, 0x40, 0x0d, 0x99, 0xb9, 0x2c, 0x9f, 0x4e, 0xbd, 0x82, 0x51, 0x88, 0xbd, + 0x52, 0x17, 0x6c, 0xbd, 0x59, 0xbe, 0x8d, 0xbc, 0x82, 0xe2, 0x3d, 0xbd, 0x64, 0xf3, 0xe2, 0xba, + 0xba, 0xc0, 0xf4, 0xbc, 0xe5, 0x86, 0x23, 0xbd, 0x3c, 0xcc, 0xa5, 0xbc, 0x1e, 0xf9, 0x64, 0xbc, + 0xf9, 0x7d, 0x36, 0xbd, 0x05, 0xcb, 0x12, 0xbd, 0xda, 0x1b, 0x2c, 0xbd, 0x8b, 0xef, 0x19, 0xbd, + 0xdc, 0x36, 0x2c, 0xbc, 0xf8, 0xdb, 0xd9, 0xbc, 0x10, 0x51, 0x16, 0xbd, 0x2a, 0x79, 0x41, 0xbd, + 0x16, 0x4c, 0x28, 0xbd, 0x25, 0x8e, 0x7c, 0xbc, 0x61, 0x84, 0x90, 0xbc, 0xd6, 0x9d, 0xba, 0xbc, + 0x18, 0xb8, 0x95, 0x3c, 0x23, 0x24, 0xf0, 0x3b, 0x2b, 0x53, 0xcc, 0x3c, 0xbc, 0x2e, 0x1d, 0x3d, + 0xef, 0xa9, 0x1d, 0x3d, 0x02, 0xa8, 0x3e, 0x3c, 0xb5, 0x98, 0xd0, 0x3c, 0x5e, 0x02, 0x21, 0x3c, + 0x25, 0xee, 0xa4, 0x3c, 0x0f, 0x3c, 0xf5, 0x3b, 0x9d, 0x71, 0xcf, 0x3c, 0xe6, 0xd3, 0x4a, 0x3c, + 0x6e, 0x6a, 0x3e, 0x3d, 0x50, 0xeb, 0xd8, 0x3b, 0xde, 0x99, 0xb7, 0x3c, 0x6c, 0xa8, 0xcf, 0x3c, + 0xc8, 0x0c, 0xdb, 0x3c, 0x2f, 0x3d, 0x9e, 0x3c, 0xcb, 0x6b, 0xb5, 0xba, 0xce, 0xd6, 0xab, 0x3c, + 0xc1, 0xe8, 0xa2, 0x3c, 0x3c, 0xe8, 0x12, 0x3d, 0x71, 0x45, 0x82, 0x3c, 0x2b, 0xae, 0x00, 0x3d, + 0x4b, 0x6a, 0xc1, 0x3c, 0x46, 0xf5, 0x8b, 0x3c, 0x03, 0x28, 0x0e, 0x3c, 0x00, 0x9f, 0xff, 0x3b, + 0xbf, 0xa1, 0xf0, 0x3c, 0x38, 0xce, 0x95, 0x3c, 0xcc, 0xd2, 0xfd, 0x3c, 0x42, 0xeb, 0x7d, 0x3c, + 0x07, 0x1d, 0x3f, 0x3c, 0xa0, 0x6e, 0x27, 0x39, 0xbe, 0xca, 0x28, 0xbc, 0x4d, 0x83, 0x1b, 0x3c, + 0xd2, 0xaa, 0x93, 0x3b, 0x68, 0xc3, 0xce, 0x3a, 0x2c, 0x65, 0x2e, 0x3c, 0x26, 0xca, 0x36, 0x3c, + 0x1a, 0xd0, 0x38, 0x3c, 0xa6, 0x9f, 0x64, 0xbb, 0x7c, 0x61, 0xa9, 0x3a, 0xe5, 0x6d, 0x0b, 0x3c, + 0x9e, 0xa8, 0xdb, 0x3c, 0x4c, 0x24, 0x0b, 0x3c, 0x52, 0xb4, 0x23, 0x3c, 0xbc, 0x21, 0x0a, 0xbc, + 0xa8, 0xe6, 0x09, 0x3a, 0x6f, 0x09, 0x80, 0xbb, 0x19, 0xff, 0x5a, 0x3c, 0xb8, 0x9c, 0x21, 0x3a, + 0xfc, 0x09, 0xd3, 0x3b, 0xd8, 0x9c, 0x8f, 0x3a, 0xdb, 0x6f, 0x97, 0x3c, 0xc0, 0x06, 0x58, 0x3c, + 0xa2, 0xe6, 0x9f, 0x3c, 0x44, 0xd0, 0xb4, 0x3b, 0x22, 0xbe, 0xb5, 0x3b, 0x84, 0x07, 0x3d, 0x3c, + 0xe8, 0x2f, 0x83, 0x3b, 0x83, 0x34, 0x88, 0x3c, 0xec, 0x1a, 0x4d, 0xbb, 0x2c, 0x76, 0x18, 0x3c, + 0xd0, 0x2e, 0x2e, 0xbb, 0x08, 0xe4, 0xcb, 0xb8, 0x06, 0xe2, 0x13, 0xbb, 0xcc, 0xe5, 0x87, 0xbb, + 0xba, 0xfc, 0xa5, 0xbb, 0x68, 0x81, 0xb3, 0xba, 0x4e, 0xa3, 0x38, 0xbb, 0xd0, 0x7a, 0x22, 0x3a, + 0x75, 0x47, 0x00, 0xbb, 0x86, 0x01, 0x76, 0xba, 0x04, 0xf8, 0x0e, 0xbb, 0x88, 0xa3, 0x85, 0xb8, + 0x17, 0x79, 0xd1, 0xbb, 0x60, 0x6c, 0x5c, 0xba, 0x4e, 0xca, 0x17, 0xbb, 0xd6, 0xc7, 0x59, 0xbb, + 0xf6, 0x50, 0x47, 0xbb, 0xce, 0xbc, 0xb3, 0xba, 0x06, 0x52, 0x60, 0x39, 0x82, 0xb2, 0x31, 0xbb, + 0x1c, 0x88, 0xee, 0xba, 0x5e, 0xf0, 0x8a, 0xbb, 0xc4, 0x37, 0xfa, 0xba, 0x33, 0xf8, 0x86, 0xbb, + 0x60, 0x8c, 0x88, 0xbb, 0xdb, 0xde, 0xff, 0xba, 0xb0, 0x64, 0xba, 0xb9, 0x2a, 0xe7, 0x34, 0xb9, + 0x4e, 0x1e, 0x54, 0xbb, 0x1e, 0x93, 0x48, 0xbb, 0x83, 0xcb, 0x7f, 0xbb, 0x4c, 0x14, 0x05, 0xbb, + 0x59, 0xda, 0x02, 0xbb, 0xd7, 0x70, 0x22, 0x3a, 0x03, 0x30, 0xa6, 0x3a, 0x44, 0x04, 0xa5, 0xba, + 0xd5, 0x7d, 0xda, 0xba, 0x92, 0xa0, 0xbe, 0xb9, 0x9e, 0x09, 0xaf, 0xba, 0x1c, 0x43, 0xff, 0x39, + 0x2a, 0x85, 0x8c, 0xba, 0x5a, 0x86, 0x26, 0x39, 0x72, 0x46, 0x68, 0x39, 0x22, 0x77, 0xa9, 0x39, + 0xfe, 0x00, 0x91, 0xbb, 0xe6, 0xc1, 0x86, 0xba, 0x46, 0xc8, 0x8c, 0xba, 0x60, 0x4c, 0x66, 0x38, + 0xd0, 0x30, 0xdc, 0xb9, 0xbc, 0x51, 0x79, 0x3a, 0x9a, 0x4c, 0xa5, 0xba, 0xba, 0x10, 0x31, 0xba, + 0x90, 0x2b, 0xf9, 0xb9, 0x38, 0x07, 0x48, 0xba, 0x9e, 0x0a, 0x0e, 0xbb, 0x08, 0x76, 0x16, 0xbb, + 0x06, 0x49, 0x76, 0xbb, 0xa8, 0x86, 0x50, 0xba, 0x80, 0x40, 0x32, 0xb8, 0x02, 0x2d, 0x0f, 0xba, + 0xc4, 0x66, 0x37, 0xba, 0xc0, 0xb1, 0x3d, 0xbb, 0x1c, 0xe7, 0x00, 0xba, 0x21, 0xfe, 0xb8, 0xba, + 0x84, 0xd8, 0x64, 0xba, 0x38, 0xe8, 0x4b, 0xba, 0xcd, 0x05, 0xcb, 0xba, 0x7f, 0xce, 0x1a, 0xbb, + 0xaa, 0xff, 0xf2, 0xba, 0x4c, 0xd8, 0x28, 0xba, 0x7c, 0x46, 0xd0, 0xba, 0xa5, 0xce, 0xcb, 0xba, + 0x9e, 0x13, 0xbb, 0xba, 0xd3, 0x8d, 0xac, 0xb9, 0x1c, 0x8c, 0xe8, 0xba, 0xca, 0x4f, 0xc0, 0xba, + 0xc0, 0x3d, 0x1b, 0xbb, 0x1c, 0xb0, 0xe1, 0xb9, 0xe5, 0x0f, 0xc3, 0xba, 0xbf, 0x30, 0x8e, 0xba, + 0xe3, 0xd5, 0xc3, 0xba, 0x42, 0x8f, 0xbc, 0xba, 0x30, 0x28, 0xf8, 0xb8, 0x85, 0x9f, 0x84, 0xba, + 0xcf, 0x85, 0xb8, 0xba, 0x76, 0xc8, 0xfc, 0xba, 0xaf, 0xaa, 0x8b, 0xba, 0xb0, 0xe9, 0xd6, 0xba, + 0xcc, 0x0f, 0x56, 0xba, 0xac, 0x07, 0x85, 0xba, 0x10, 0x75, 0x63, 0xba, 0x81, 0x40, 0x75, 0xba, + 0x08, 0x94, 0xe3, 0xba, 0xf7, 0xa1, 0x40, 0xba, 0xa8, 0xf5, 0xc3, 0xba, 0x4d, 0x63, 0x5d, 0xba, + 0x96, 0x2f, 0x0e, 0xba, 0x18, 0xe3, 0xdb, 0xb9, 0xd2, 0xa1, 0x5f, 0x39, 0x12, 0xa7, 0x70, 0xba, + 0x04, 0x29, 0x22, 0xb9, 0x1d, 0xfc, 0x13, 0xb9, 0xfd, 0x4f, 0x5e, 0xba, 0xf5, 0x84, 0xd4, 0xba, + 0xbe, 0x0e, 0x81, 0xba, 0x92, 0x2a, 0x66, 0x39, 0x61, 0x1f, 0x16, 0xba, 0xca, 0xf3, 0xa6, 0xba, + 0x4e, 0x8b, 0xb5, 0xba, 0xc4, 0x63, 0x09, 0xba, 0x74, 0x53, 0x63, 0xba, 0x8f, 0x60, 0x1e, 0x3a, + 0x3b, 0x58, 0x40, 0xb9, 0xb3, 0x71, 0x91, 0xb9, 0xe1, 0x5b, 0x60, 0xba, 0x60, 0xce, 0xce, 0x36, + 0x66, 0xce, 0x40, 0xba, 0x44, 0x4c, 0x47, 0xb9, 0x6a, 0x99, 0x9c, 0xba, 0xa1, 0xde, 0x3e, 0xba, + 0x56, 0x70, 0x20, 0xba, 0xa4, 0xe7, 0xe4, 0xb9, 0xe8, 0x6d, 0x3a, 0xba, 0x0f, 0x1d, 0x93, 0xba, + 0xab, 0x3d, 0xf5, 0xb9, 0x3b, 0xdf, 0x2a, 0xba, 0xa2, 0xe0, 0x5c, 0x39, 0xd5, 0x38, 0x0c, 0xba, + 0x1c, 0xfe, 0x41, 0xbd, 0xe9, 0x68, 0x5c, 0x3c, 0x78, 0x43, 0x7c, 0xbb, 0x31, 0x1d, 0x39, 0xbd, + 0xb0, 0xc6, 0x8c, 0xbd, 0xca, 0xc6, 0x83, 0xbc, 0x5e, 0x39, 0x10, 0xbd, 0xfe, 0x74, 0x04, 0x3d, + 0x2d, 0x42, 0xb3, 0xbc, 0x5a, 0x81, 0x0e, 0xbc, 0xfd, 0xa5, 0x1a, 0xbc, 0x03, 0x3c, 0xa0, 0x3c, + 0xec, 0xba, 0xdb, 0xbd, 0xd9, 0xce, 0x88, 0xbc, 0x0e, 0x80, 0xd9, 0xbc, 0xe4, 0xd2, 0x14, 0xbd, + 0x20, 0xe7, 0x00, 0xbd, 0xb8, 0x25, 0xac, 0x3b, 0x5e, 0x3e, 0xdc, 0xbb, 0xe2, 0xc1, 0x0d, 0xbd, + 0xbe, 0x8a, 0x70, 0xbc, 0x88, 0xd8, 0x41, 0xbd, 0x1b, 0xd0, 0x10, 0xbd, 0x11, 0x8e, 0x80, 0xbd, + 0xa9, 0x36, 0xb1, 0xbd, 0x0b, 0xc1, 0xc6, 0xbc, 0xaa, 0xe8, 0x99, 0x3b, 0x86, 0x03, 0x74, 0x3b, + 0x5a, 0x4f, 0x0b, 0xbd, 0xc4, 0x4f, 0x81, 0xbd, 0x2c, 0x73, 0x38, 0xbd, 0xe2, 0x47, 0x07, 0xbd, + 0xc3, 0x13, 0xc7, 0xbf, 0x25, 0xed, 0x06, 0xc0, 0xf7, 0xda, 0xcb, 0xbf, 0xe2, 0x45, 0xe8, 0xbf, + 0xbb, 0x2c, 0xe7, 0xbf, 0xd4, 0xcd, 0xa1, 0xbf, 0x7b, 0xee, 0x47, 0xc0, 0x89, 0xa4, 0xe9, 0xbf, + 0xae, 0xa7, 0x4d, 0xc0, 0xb9, 0xc5, 0x3b, 0xc0, 0x38, 0x18, 0x24, 0xc0, 0xe7, 0x1b, 0xd6, 0xbf, + 0xcb, 0x7e, 0x30, 0xc0, 0x29, 0xdd, 0xe9, 0xbf, 0x88, 0x8b, 0x44, 0xc0, 0xd6, 0x22, 0xc3, 0xbf, + 0x38, 0xc4, 0x20, 0xbf, 0x9d, 0x4a, 0xcf, 0xbf, 0x25, 0xf1, 0x09, 0xc0, 0xd7, 0xfa, 0x0f, 0xc0, + 0xab, 0x17, 0x33, 0xc0, 0x24, 0xd9, 0x74, 0xc0, 0x35, 0x86, 0x09, 0xc0, 0xc0, 0x10, 0xa2, 0xbf, + 0xdf, 0x07, 0x1c, 0xc0, 0xc7, 0x8d, 0xae, 0xbf, 0x74, 0x4c, 0x20, 0xc0, 0xa7, 0xc3, 0x38, 0xc0, + 0x8c, 0x7b, 0x6c, 0xc0, 0x35, 0xf5, 0x26, 0xc0, 0xac, 0xfa, 0x15, 0xc0, 0x1a, 0xf4, 0x5f, 0xc0, + 0x41, 0x97, 0xbb, 0x3f, 0xfa, 0x2d, 0xa2, 0x3f, 0x34, 0x07, 0xac, 0x3f, 0xf5, 0x53, 0xce, 0x3f, + 0xce, 0x90, 0xf8, 0x3f, 0xb0, 0xed, 0xb9, 0x3f, 0xaf, 0x15, 0xc8, 0x3f, 0x9e, 0x16, 0xc6, 0x3f, + 0x5a, 0xcf, 0x98, 0x3f, 0x2e, 0xa6, 0xe1, 0x3f, 0x2f, 0x62, 0x8a, 0x3f, 0x23, 0x50, 0x34, 0x3f, + 0xc6, 0xe5, 0xf8, 0x3f, 0x2a, 0x7c, 0xa3, 0x3f, 0x1a, 0x56, 0x87, 0x3f, 0xfc, 0x49, 0x95, 0x3f, + 0x15, 0xdf, 0x8c, 0x3f, 0x3e, 0x44, 0x94, 0x3f, 0xe1, 0x73, 0xc8, 0x3f, 0x1c, 0xd7, 0xe5, 0x3e, + 0x42, 0x7b, 0x09, 0x40, 0x27, 0xe6, 0xd3, 0x3f, 0xb0, 0x2d, 0xd2, 0x3f, 0x04, 0xe0, 0xba, 0x3f, + 0xc8, 0xf1, 0xd0, 0x3f, 0xee, 0xe5, 0xa8, 0x3f, 0x14, 0xca, 0xd5, 0x3f, 0x10, 0x3c, 0x93, 0x3f, + 0xd6, 0x29, 0x07, 0x40, 0x3e, 0x36, 0xcd, 0x3f, 0x58, 0x55, 0xb6, 0x3f, 0x4a, 0xaf, 0x08, 0x40, + 0x4b, 0xac, 0xa2, 0x3e, 0x77, 0xff, 0x0c, 0x3f, 0x54, 0x12, 0x18, 0x3f, 0x40, 0xad, 0x41, 0xbd, + 0x70, 0x1f, 0x33, 0x3d, 0xcd, 0xdc, 0x89, 0x3e, 0x91, 0x12, 0x01, 0x3f, 0xb4, 0x50, 0x31, 0xbe, + 0x43, 0x5e, 0x82, 0x3f, 0xc2, 0x36, 0xf2, 0x3e, 0x8f, 0x1b, 0x4a, 0x3f, 0xd6, 0x98, 0x33, 0x3f, + 0x9c, 0xe0, 0x59, 0x3f, 0xda, 0x71, 0x84, 0x3e, 0x1c, 0x00, 0x94, 0x3f, 0x3e, 0x82, 0x1e, 0x3f, + 0xf8, 0x1e, 0x0c, 0xbe, 0xbb, 0x93, 0x5d, 0x3f, 0x54, 0xd0, 0x12, 0x3f, 0x1e, 0x13, 0xc8, 0x3d, + 0x6d, 0xa7, 0x04, 0x3f, 0x3d, 0x43, 0x82, 0x3f, 0x20, 0xb2, 0xf9, 0x3e, 0x8e, 0x93, 0x05, 0x3f, + 0x11, 0xde, 0x45, 0x3f, 0x8b, 0x11, 0x28, 0x3f, 0x9e, 0xa5, 0x2c, 0x3f, 0xb2, 0xd5, 0x10, 0x3f, + 0x28, 0x56, 0x60, 0x3f, 0x24, 0x5b, 0xa4, 0x3e, 0xae, 0xfa, 0x52, 0x3f, 0x1d, 0x4d, 0x98, 0x3f, + 0xff, 0x49, 0x3c, 0xbe, 0xb8, 0xda, 0x16, 0xbe, 0x96, 0x7c, 0x36, 0xbe, 0xae, 0x3b, 0x37, 0xbe, + 0x08, 0xdf, 0x6e, 0xbe, 0xb0, 0xc0, 0x41, 0xbe, 0x36, 0x49, 0x1f, 0xbe, 0x19, 0x89, 0x26, 0xbe, + 0x56, 0xac, 0xfb, 0xbd, 0xe8, 0xda, 0x42, 0xbe, 0xe5, 0xed, 0xed, 0xbd, 0xe3, 0x05, 0xae, 0xbd, + 0x28, 0x1f, 0x78, 0xbe, 0x13, 0x10, 0x13, 0xbe, 0x3a, 0x8f, 0xe9, 0xbd, 0xf1, 0xcf, 0x1e, 0xbe, + 0xa8, 0x9a, 0x0c, 0xbe, 0x24, 0xf5, 0x26, 0xbe, 0x3e, 0x56, 0x44, 0xbe, 0xb0, 0x75, 0x35, 0x3b, + 0x35, 0x7c, 0x82, 0xbe, 0x56, 0xcc, 0x2f, 0xbe, 0x64, 0xd6, 0x4b, 0xbe, 0x77, 0xbf, 0x4f, 0xbe, + 0x27, 0x17, 0x4f, 0xbe, 0x6b, 0xbe, 0x3d, 0xbe, 0x82, 0x95, 0x4d, 0xbe, 0x10, 0x37, 0xd6, 0xbd, + 0xae, 0x22, 0x73, 0xbe, 0x4c, 0xa5, 0x2d, 0xbe, 0xca, 0x01, 0x35, 0xbe, 0xd0, 0xc2, 0x86, 0xbe, + 0x80, 0x80, 0x8d, 0xbd, 0x0d, 0x05, 0x9b, 0xbd, 0xb5, 0x62, 0xd3, 0xbd, 0xe8, 0x42, 0x55, 0xbc, + 0xb2, 0xfc, 0x31, 0xbd, 0x31, 0x2a, 0x91, 0xbd, 0x8f, 0x63, 0x4b, 0xbd, 0xb0, 0xaa, 0x1a, 0x3c, + 0x30, 0xc4, 0xd7, 0xbd, 0x3e, 0x1d, 0x7b, 0xbd, 0xa6, 0x33, 0xb2, 0xbd, 0x3f, 0x73, 0xad, 0xbd, + 0x96, 0x27, 0x08, 0xbe, 0x0c, 0x0e, 0x2b, 0xbd, 0x72, 0xd2, 0xfd, 0xbd, 0xb3, 0x91, 0xcd, 0xbd, + 0x74, 0x51, 0x5e, 0xbc, 0xa3, 0xf9, 0x08, 0xbe, 0x89, 0x66, 0xbd, 0xbd, 0xd1, 0x32, 0x1b, 0x3d, + 0x8b, 0x1f, 0xbc, 0xbd, 0x6b, 0xfa, 0xdc, 0xbd, 0x89, 0x44, 0xab, 0xbd, 0x58, 0x5b, 0xdf, 0xbd, + 0xea, 0x29, 0xee, 0xbd, 0x95, 0xb4, 0xf3, 0xbd, 0xfc, 0x38, 0xcf, 0xbd, 0xd7, 0x03, 0x3d, 0xbd, + 0x9c, 0x37, 0xe9, 0xbd, 0xba, 0x63, 0x29, 0xbd, 0x60, 0x10, 0xef, 0xbd, 0xd9, 0xaa, 0x2c, 0xbe, + 0x13, 0x82, 0x9d, 0xbd, 0x2e, 0x2b, 0x9b, 0xbd, 0x03, 0x5f, 0x8e, 0xbd, 0x5d, 0xf2, 0xba, 0xbd, + 0x9d, 0xaa, 0xd1, 0xbd, 0xcc, 0xcf, 0x93, 0xbd, 0x13, 0xc6, 0xd5, 0xbd, 0x33, 0x97, 0xb9, 0xbd, + 0x86, 0x4b, 0xaf, 0xbd, 0x8e, 0x6c, 0xdf, 0xbd, 0x24, 0x56, 0x96, 0xbd, 0xbc, 0x75, 0x3a, 0xbd, + 0x7e, 0xae, 0xdd, 0xbd, 0x1e, 0xd5, 0x99, 0xbd, 0xfd, 0x6c, 0x9c, 0xbd, 0x84, 0x84, 0x7b, 0xbd, + 0x3c, 0xec, 0x5a, 0xbd, 0xf0, 0x28, 0x74, 0xbd, 0xc5, 0x28, 0xb3, 0xbd, 0x5a, 0x87, 0x59, 0xbd, + 0x75, 0x3a, 0xf7, 0xbd, 0x2f, 0xac, 0xe7, 0xbd, 0xf3, 0xf7, 0xba, 0xbd, 0x20, 0xfc, 0x8d, 0xbd, + 0xdc, 0xcb, 0xbc, 0xbd, 0x69, 0x14, 0x83, 0xbd, 0x65, 0x80, 0xc4, 0xbd, 0xf3, 0x65, 0xac, 0xbd, + 0x79, 0xf1, 0x04, 0xbe, 0x19, 0xf4, 0xcb, 0xbd, 0x7f, 0x4a, 0xa8, 0xbd, 0xd9, 0x00, 0xfd, 0xbd, + 0x51, 0xe1, 0x9f, 0xbc, 0xfa, 0xa5, 0x23, 0xbd, 0x90, 0x27, 0x03, 0xbd, 0xe5, 0x56, 0x08, 0xbc, + 0x1e, 0xac, 0xf4, 0xbb, 0xa1, 0x21, 0x55, 0xbc, 0x83, 0xab, 0x52, 0xbd, 0xee, 0x36, 0x96, 0xbb, + 0x74, 0x57, 0x9d, 0xbd, 0xec, 0xb1, 0x36, 0xbd, 0x07, 0xf2, 0x70, 0xbd, 0x18, 0xe3, 0x39, 0xbd, + 0xd6, 0x6d, 0x5b, 0xbd, 0x37, 0x22, 0xc6, 0xbc, 0x99, 0x8e, 0xa6, 0xbd, 0x55, 0x76, 0x0b, 0xbd, + 0x8c, 0xfb, 0x09, 0x3c, 0xf0, 0x31, 0x38, 0xbd, 0x99, 0x0b, 0x1b, 0xbd, 0x9e, 0x99, 0x11, 0xbd, + 0xa2, 0xa3, 0x20, 0xbd, 0x0f, 0x5d, 0xa6, 0xbd, 0xa7, 0x87, 0x09, 0xbd, 0x51, 0xa9, 0xb7, 0xbc, + 0x54, 0x93, 0x49, 0xbd, 0x23, 0xc1, 0xfc, 0xbc, 0xc1, 0x0e, 0x3d, 0xbd, 0xc2, 0x16, 0x61, 0xbd, + 0x12, 0x5c, 0x8b, 0xbd, 0x78, 0x4f, 0x11, 0xbd, 0xca, 0xa1, 0x55, 0xbd, 0x13, 0x26, 0x9c, 0xbd, + 0xbc, 0x40, 0x1d, 0xc0, 0x8e, 0xac, 0xfd, 0xbf, 0xee, 0x24, 0x31, 0xc0, 0xd6, 0x58, 0xda, 0xbf, + 0xa5, 0x58, 0x2a, 0xc0, 0x96, 0x0a, 0x27, 0xc0, 0xb7, 0x88, 0xc3, 0xbf, 0xa8, 0x2f, 0xa2, 0xbf, + 0x46, 0xd7, 0xe1, 0xbf, 0xa0, 0x34, 0x06, 0xc0, 0x79, 0x8b, 0xd2, 0xbf, 0x31, 0xca, 0xbe, 0xbf, + 0x0a, 0x12, 0x62, 0xc0, 0x41, 0x7a, 0xd0, 0xbf, 0xe9, 0x6a, 0xf4, 0xbf, 0x13, 0x8b, 0x1f, 0xc0, + 0x1d, 0x2b, 0xc5, 0xbf, 0x90, 0xca, 0x3b, 0xc0, 0x66, 0x26, 0x2a, 0xc0, 0xff, 0x89, 0x82, 0x3f, + 0xd6, 0xdb, 0x4c, 0xc0, 0x6d, 0x15, 0x0c, 0xc0, 0x49, 0x27, 0x29, 0xc0, 0xa4, 0x56, 0x4c, 0xc0, + 0x0b, 0x55, 0x3e, 0xc0, 0xa6, 0x45, 0x45, 0xc0, 0x0b, 0x54, 0x31, 0xc0, 0x7b, 0x60, 0x79, 0xbf, + 0x0c, 0xd3, 0x41, 0xc0, 0xe6, 0x42, 0xdc, 0xbf, 0x58, 0xd2, 0x2d, 0xc0, 0x0d, 0xed, 0x7e, 0xc0, + 0xee, 0x16, 0xac, 0xbf, 0x7d, 0x67, 0xba, 0xbf, 0x7b, 0x05, 0xd6, 0xbe, 0x76, 0x7e, 0x75, 0xbe, + 0x7a, 0xc2, 0x53, 0xc0, 0x84, 0x5c, 0xd5, 0xbf, 0xb5, 0xc6, 0xc2, 0xbf, 0xbd, 0x5b, 0x7a, 0x3e, + 0x81, 0x24, 0x23, 0xc0, 0x44, 0x9a, 0x35, 0xc0, 0x74, 0x7d, 0x51, 0xbf, 0xcd, 0x4c, 0xb7, 0xbe, + 0xf4, 0x46, 0xee, 0xbf, 0x64, 0x84, 0x34, 0xc0, 0x20, 0xc1, 0xbb, 0xbf, 0xf5, 0xd5, 0x1e, 0xc0, + 0xe1, 0x5f, 0x00, 0xc0, 0xf8, 0x3a, 0x14, 0xc0, 0xa8, 0x24, 0xfe, 0xbf, 0xd0, 0x6a, 0xe9, 0xbf, + 0x78, 0x6d, 0xe6, 0xbf, 0x3f, 0x25, 0x3e, 0xc0, 0xdc, 0x09, 0xa7, 0xbf, 0xfd, 0x31, 0xae, 0xbf, + 0xc1, 0x09, 0x45, 0xc0, 0x80, 0xa3, 0x9c, 0xbf, 0x8f, 0x27, 0x9e, 0xbf, 0xf0, 0x4f, 0x3c, 0xc0, + 0x7f, 0x79, 0x38, 0xc0, 0x6f, 0x66, 0x02, 0xc0, 0xa5, 0xeb, 0xdb, 0xbf, 0x27, 0xe7, 0x20, 0xc0, + 0xb4, 0x6b, 0x9d, 0x3f, 0xfa, 0x6c, 0x98, 0x3f, 0x1f, 0xb7, 0x72, 0x3f, 0x45, 0xff, 0x8f, 0x3f, + 0x8f, 0xcd, 0xb6, 0x3f, 0xe9, 0x87, 0x2f, 0x3f, 0x39, 0x3b, 0x5a, 0x3f, 0xb8, 0x10, 0x85, 0x3f, + 0x6b, 0x3a, 0x8f, 0x3f, 0x99, 0x81, 0xf7, 0x3f, 0xa8, 0x8f, 0xa4, 0x3f, 0x3e, 0x75, 0x00, 0x3f, + 0x08, 0xa0, 0xbf, 0x3f, 0x32, 0xdc, 0x85, 0x3f, 0xef, 0x2c, 0x9d, 0x3f, 0xb0, 0x4b, 0xf2, 0x3f, + 0xcb, 0x87, 0x85, 0x3f, 0xca, 0x64, 0xa2, 0x3f, 0x46, 0x70, 0xb0, 0x3f, 0x14, 0x19, 0xff, 0x3f, + 0x06, 0x10, 0xbb, 0x3f, 0xd7, 0x3a, 0x93, 0x3f, 0xfa, 0xe4, 0xe9, 0x3f, 0xad, 0xaf, 0x55, 0x3f, + 0xec, 0xfe, 0x08, 0x40, 0x43, 0x0a, 0xb1, 0x3f, 0xcc, 0xbf, 0xc6, 0x3f, 0x60, 0xdb, 0xb0, 0x3f, + 0x01, 0x51, 0xcc, 0x3f, 0x22, 0xc7, 0xdc, 0x3f, 0xed, 0xc7, 0xed, 0x3f, 0x46, 0x4a, 0xdc, 0x3f, + 0x0a, 0x58, 0x94, 0x3e, 0x10, 0xef, 0xd4, 0xbc, 0x00, 0xe3, 0xd7, 0x3a, 0xd3, 0x84, 0x9b, 0x3e, + 0xd8, 0x2c, 0x10, 0x3f, 0x0e, 0xa9, 0xc6, 0x3e, 0x9b, 0x83, 0xdd, 0xbd, 0x48, 0x85, 0xaf, 0xbd, + 0xe6, 0x30, 0x87, 0x3f, 0x0b, 0x2f, 0x89, 0x3e, 0x27, 0x42, 0x24, 0x3f, 0x77, 0xcd, 0x86, 0xbe, + 0xda, 0xb2, 0x25, 0x3e, 0x5e, 0x46, 0x99, 0x3f, 0xb9, 0x03, 0xa5, 0x3e, 0x3e, 0x8e, 0x09, 0x3f, + 0x6f, 0xd3, 0x37, 0x3f, 0x08, 0x1b, 0x39, 0x3f, 0xec, 0xce, 0xbc, 0x3e, 0xa8, 0xdd, 0xde, 0x3e, + 0xc6, 0xfd, 0x1c, 0x3f, 0x71, 0xcb, 0x0f, 0x3f, 0x08, 0xce, 0xfe, 0x3d, 0x65, 0x5f, 0x68, 0x3e, + 0xe8, 0x52, 0x9f, 0x3e, 0x2c, 0x3f, 0xc0, 0x3d, 0x42, 0xdb, 0x3f, 0x3f, 0xdf, 0xcc, 0xf5, 0x3e, + 0x7c, 0x3b, 0x6a, 0x3f, 0xc6, 0x9b, 0x14, 0x3f, 0xe6, 0xa6, 0x73, 0x3f, 0xe1, 0x9d, 0xa3, 0x3e, + 0x45, 0xd9, 0x1d, 0xbe, 0x48, 0xdf, 0x03, 0xbe, 0x08, 0x72, 0x03, 0xbe, 0x53, 0x2a, 0x34, 0xbe, + 0x96, 0x42, 0x07, 0xbe, 0x8f, 0x93, 0x88, 0xbd, 0xd8, 0xeb, 0x92, 0xbd, 0xba, 0xa2, 0x23, 0xbe, + 0xdc, 0xf0, 0x0a, 0xbe, 0x04, 0xdf, 0x55, 0xbe, 0x9f, 0x64, 0x4a, 0xbe, 0x84, 0x7f, 0x4d, 0xbd, + 0xcb, 0xb1, 0x2e, 0xbe, 0x27, 0x55, 0xfb, 0xbd, 0x54, 0x0b, 0x1b, 0xbe, 0xb3, 0x8d, 0x69, 0xbe, + 0x09, 0x89, 0x00, 0xbe, 0x5f, 0xd6, 0x18, 0xbe, 0xea, 0xd5, 0x22, 0xbe, 0x6b, 0x14, 0x85, 0xbe, + 0x10, 0x6d, 0x42, 0xbe, 0x33, 0x0c, 0xd0, 0xbd, 0xf6, 0x7f, 0x72, 0xbe, 0xb0, 0xc1, 0xba, 0xbd, + 0xa3, 0xa2, 0x6f, 0xbe, 0xe4, 0x7b, 0x2f, 0xbe, 0x20, 0xb2, 0x6a, 0xbe, 0x0b, 0x60, 0x08, 0xbe, + 0xc6, 0xa3, 0x41, 0xbe, 0xc0, 0x1b, 0x61, 0xbe, 0x07, 0xb6, 0x89, 0xbe, 0x1a, 0xe5, 0x42, 0xbe, + 0xdb, 0x52, 0x76, 0xbd, 0x88, 0xa3, 0xe8, 0xbb, 0xa2, 0x1c, 0x0a, 0xbd, 0x8c, 0x24, 0xc0, 0xbd, + 0x4e, 0xc0, 0x3a, 0xbd, 0x50, 0x3b, 0x17, 0xbd, 0xc6, 0x42, 0xc7, 0x3c, 0x22, 0xa1, 0x41, 0xbd, + 0xd8, 0x82, 0x04, 0xbe, 0x2d, 0x36, 0x2d, 0xbd, 0x8f, 0x72, 0x08, 0xbe, 0xe5, 0xc0, 0xd7, 0x3c, + 0x68, 0xdb, 0x17, 0xbd, 0xb7, 0x32, 0x0d, 0xbe, 0x1a, 0x3a, 0x79, 0xbd, 0x7e, 0x7a, 0xbd, 0xbd, + 0x8c, 0x7b, 0xbe, 0xbd, 0xe7, 0xed, 0xc1, 0xbd, 0x1c, 0xc9, 0x6d, 0xbd, 0x04, 0x4f, 0xd5, 0xbd, + 0x1a, 0x25, 0xd7, 0xbd, 0x02, 0x08, 0x2f, 0xbd, 0xc3, 0x3f, 0x88, 0xbd, 0xea, 0x20, 0xf9, 0xbc, + 0x2d, 0x3a, 0x51, 0xbd, 0x5e, 0xdb, 0x2d, 0xbd, 0x64, 0x70, 0x18, 0xbe, 0xda, 0x32, 0x30, 0xbd, + 0x52, 0xc2, 0xf7, 0xbd, 0xe7, 0xdb, 0xd7, 0xbd, 0x79, 0xa8, 0x36, 0xbe, 0x6e, 0x8b, 0x4d, 0xbd, + 0x01, 0xed, 0x84, 0xbd, 0x22, 0xd4, 0x8d, 0xbd, 0xc0, 0x14, 0x2e, 0xbd, 0x68, 0x9c, 0x2d, 0xbd, + 0xe7, 0x33, 0xd0, 0xbd, 0xc9, 0x3e, 0x47, 0xbd, 0x78, 0x5b, 0x70, 0xbd, 0x37, 0x60, 0x13, 0xbd, + 0x79, 0xe8, 0x91, 0xbd, 0xd1, 0x7f, 0xee, 0xbd, 0x58, 0x7c, 0x5c, 0xbd, 0x76, 0xfd, 0xe5, 0xbc, + 0x89, 0x5f, 0xae, 0xbd, 0xda, 0xef, 0x91, 0xbd, 0x57, 0x1f, 0x88, 0xbd, 0x92, 0x55, 0xd8, 0xbd, + 0xbc, 0x33, 0x82, 0xbd, 0x5b, 0xfb, 0x9d, 0xbd, 0xba, 0x31, 0xa4, 0xbd, 0xfa, 0x28, 0xcc, 0xbd, + 0x5d, 0xfc, 0x9d, 0xbd, 0x8e, 0x5e, 0xaf, 0xbd, 0xfa, 0xb1, 0xb5, 0xbd, 0x4c, 0x7f, 0x51, 0xbd, + 0x04, 0xc2, 0x02, 0xbe, 0x5e, 0x97, 0x91, 0xbd, 0x09, 0x3d, 0x8e, 0xbd, 0xe2, 0x41, 0xc2, 0xbd, + 0x96, 0x8d, 0xc5, 0xbd, 0x12, 0x27, 0xbb, 0xbd, 0xe8, 0xf2, 0xb0, 0xbd, 0xe3, 0xce, 0xd1, 0xbd, + 0xca, 0xa1, 0x92, 0xbc, 0xaf, 0xcf, 0x09, 0xbc, 0xf6, 0x07, 0x9e, 0x3b, 0xb0, 0x89, 0xad, 0xba, + 0xcb, 0x42, 0x6f, 0xbd, 0xe1, 0x48, 0x0a, 0xbd, 0xb8, 0x7b, 0x2e, 0xbc, 0x56, 0x29, 0xa6, 0x3c, + 0x75, 0x7a, 0x8b, 0xbd, 0xb2, 0xdc, 0x07, 0xbd, 0x6f, 0x30, 0xb1, 0xbc, 0xff, 0xc4, 0x26, 0x3c, + 0x62, 0x92, 0x96, 0xbc, 0xfe, 0x77, 0xa1, 0xbd, 0x28, 0x8a, 0xad, 0xbc, 0x3a, 0x0a, 0x1b, 0xbd, + 0xf2, 0xd0, 0x41, 0xbd, 0xde, 0x37, 0x4c, 0xbd, 0x5e, 0x38, 0xf3, 0xbc, 0x9a, 0x9e, 0xb2, 0xbc, + 0xb4, 0x43, 0x0e, 0xbd, 0xec, 0x34, 0x66, 0xbd, 0x90, 0x3c, 0x6a, 0xbb, 0xac, 0x0b, 0xaa, 0xbc, + 0xe0, 0x5f, 0x14, 0xbd, 0x12, 0x25, 0xd9, 0xbb, 0x32, 0xed, 0xef, 0xbc, 0x1c, 0xdd, 0x4b, 0xbd, + 0xf3, 0x95, 0x7f, 0xbd, 0x8a, 0xf2, 0x0b, 0xbd, 0xa6, 0x5e, 0x28, 0xbd, 0x48, 0x99, 0x04, 0xbd, + 0x77, 0xc6, 0x04, 0xc0, 0x4d, 0x11, 0x95, 0xbf, 0x14, 0x06, 0xdc, 0xbf, 0xa1, 0x64, 0x3e, 0xc0, + 0x01, 0xfc, 0x95, 0xbf, 0xf2, 0xb6, 0x3f, 0xbf, 0x50, 0xab, 0x1c, 0xbe, 0x2c, 0x01, 0x19, 0xc0, + 0x3a, 0x14, 0x16, 0xc0, 0xc3, 0x6c, 0x07, 0xc0, 0x75, 0x27, 0x61, 0xc0, 0xf8, 0xe8, 0x36, 0xbe, + 0x6b, 0x7b, 0xef, 0xbf, 0x7f, 0x82, 0x0e, 0xc0, 0x09, 0x5b, 0x01, 0xc0, 0x34, 0x1f, 0x3f, 0xc0, + 0xa3, 0x7f, 0xfa, 0xbf, 0xa0, 0x95, 0x0a, 0xc0, 0x5a, 0x0c, 0xf9, 0xbf, 0x24, 0xf5, 0x68, 0xc0, + 0x0b, 0x8a, 0x37, 0xc0, 0x8a, 0x7d, 0x61, 0xbf, 0x28, 0x87, 0x45, 0xc0, 0x7a, 0x8e, 0x83, 0xbf, + 0x24, 0x03, 0x1c, 0xc0, 0x3c, 0x5e, 0x06, 0xc0, 0x53, 0x6f, 0x7c, 0xc0, 0xf7, 0x90, 0x9f, 0xbf, + 0x18, 0x23, 0x31, 0xc0, 0x36, 0x91, 0x49, 0xc0, 0x7d, 0x4c, 0x93, 0xc0, 0xcf, 0xf0, 0x04, 0xc0, + 0xf9, 0xc8, 0xb3, 0x3f, 0x24, 0x2f, 0xd5, 0x3f, 0xd3, 0x7f, 0x76, 0x3f, 0xd1, 0x13, 0x6e, 0x3f, + 0x51, 0xae, 0xca, 0x3f, 0x74, 0xa1, 0x9c, 0x3f, 0x10, 0x0f, 0x9e, 0x3f, 0x2e, 0xc9, 0x8b, 0x3f, + 0x9e, 0x1c, 0xe7, 0x3f, 0x06, 0x7e, 0xa9, 0x3f, 0xad, 0x58, 0x8f, 0x3f, 0xcc, 0x89, 0x2f, 0x3f, + 0xb3, 0xab, 0x76, 0x3f, 0x89, 0x1e, 0xd0, 0x3f, 0x66, 0xd0, 0x97, 0x3f, 0xa7, 0x3e, 0xa3, 0x3f, + 0x58, 0xf4, 0x9f, 0x3f, 0xd8, 0x73, 0xd1, 0x3f, 0xcc, 0x2b, 0xd9, 0x3f, 0x74, 0xf4, 0x9e, 0x3f, + 0xf3, 0x78, 0x8d, 0x3f, 0xd2, 0xf1, 0x82, 0x3f, 0x47, 0x2c, 0xba, 0x3f, 0xf3, 0xb0, 0x89, 0x3f, + 0x93, 0xc6, 0xc8, 0x3f, 0x0a, 0x3c, 0x00, 0x40, 0xb1, 0x8d, 0x6d, 0x3f, 0x9c, 0x6f, 0xad, 0x3f, + 0xaf, 0x1f, 0xb8, 0x3f, 0x39, 0x2e, 0x39, 0x3f, 0xb5, 0xef, 0x73, 0x3f, 0xec, 0x08, 0xb4, 0x3f, + 0xa8, 0x20, 0x58, 0xbf, 0x2d, 0x3e, 0x6c, 0xbf, 0xab, 0xd5, 0x44, 0xbf, 0xd0, 0x8c, 0x62, 0xbf, + 0x5c, 0x0b, 0x56, 0xbf, 0x08, 0x9d, 0x46, 0xbf, 0xbc, 0xa1, 0x56, 0xbf, 0xd1, 0x72, 0x6f, 0xbf, + 0x10, 0xc5, 0x77, 0xbf, 0xe6, 0xf5, 0x90, 0xbf, 0xf8, 0x3c, 0x76, 0xbf, 0xc3, 0x3a, 0x3b, 0xbf, + 0x8e, 0x4c, 0x75, 0xbf, 0xc0, 0x94, 0x71, 0xbf, 0x40, 0x7e, 0x46, 0xbf, 0x9a, 0x67, 0x86, 0xbf, + 0x32, 0xcd, 0x45, 0xbf, 0xf1, 0xe1, 0x64, 0xbf, 0x4d, 0xc3, 0x77, 0xbf, 0xa0, 0x77, 0x5b, 0xbf, + 0xff, 0x35, 0x87, 0xbf, 0x76, 0xfb, 0x19, 0xbf, 0x4c, 0x89, 0x84, 0xbf, 0xb2, 0x47, 0x4a, 0xbf, + 0x91, 0x84, 0x73, 0xbf, 0x06, 0x90, 0x5c, 0xbf, 0x96, 0xf7, 0x64, 0xbf, 0xb8, 0x69, 0x8b, 0xbf, + 0x8f, 0x72, 0x89, 0xbf, 0xc6, 0x35, 0x2a, 0xbf, 0x4b, 0x9c, 0x4e, 0xbf, 0x52, 0x8a, 0x53, 0xbf, + 0xc0, 0x12, 0xcd, 0xbe, 0x6c, 0x28, 0x9a, 0xbe, 0x20, 0x4d, 0xba, 0xbd, 0x18, 0x85, 0xe2, 0xbd, + 0x67, 0xd1, 0x73, 0xbe, 0x00, 0xc4, 0x9f, 0xbe, 0x7c, 0x70, 0x52, 0xbe, 0xa7, 0x6d, 0x50, 0xbe, + 0xe2, 0xaf, 0xea, 0xbe, 0x3c, 0x82, 0x6c, 0xbe, 0xf1, 0x27, 0x6f, 0xbe, 0xa8, 0x4a, 0x40, 0x3d, + 0x94, 0x65, 0x8d, 0xbd, 0xee, 0xcc, 0xe9, 0xbe, 0xcb, 0xcf, 0x01, 0xbe, 0x54, 0x4e, 0xa9, 0xbe, + 0xb6, 0x9b, 0x8c, 0xbe, 0x00, 0x9d, 0xe6, 0xbe, 0x39, 0xa3, 0x1d, 0xbe, 0x2a, 0x00, 0xa3, 0xbd, + 0xf2, 0x27, 0x64, 0xbe, 0x5b, 0x40, 0x7a, 0xbe, 0xa6, 0x8b, 0x64, 0xbe, 0x49, 0xd9, 0x74, 0xbe, + 0x23, 0x15, 0x30, 0xbe, 0x40, 0xba, 0x1b, 0xbf, 0x98, 0x92, 0x1c, 0xbf, 0x13, 0xf2, 0xa1, 0xbe, + 0xf7, 0x18, 0xda, 0xbe, 0x03, 0x5d, 0x23, 0xbe, 0x24, 0x13, 0xad, 0xbe, 0x41, 0x8b, 0xaa, 0xbe, + 0x7c, 0x7b, 0xcc, 0x3d, 0xf3, 0x9f, 0xc8, 0x3d, 0x92, 0xc8, 0xb3, 0x3d, 0xe3, 0xb2, 0xdb, 0x3d, + 0x78, 0x23, 0xad, 0x3d, 0xbb, 0xc0, 0xba, 0x3d, 0xd4, 0x4d, 0xc2, 0x3d, 0xea, 0xa0, 0xe9, 0x3d, + 0x62, 0x00, 0xdd, 0x3d, 0x84, 0x94, 0x0c, 0x3e, 0x32, 0xf3, 0xf2, 0x3d, 0xd6, 0x0e, 0xad, 0x3d, + 0x5b, 0xa6, 0xeb, 0x3d, 0x11, 0xa9, 0xe1, 0x3d, 0x99, 0x34, 0xaa, 0x3d, 0x2e, 0xaf, 0x06, 0x3e, + 0xa8, 0x2c, 0xb4, 0x3d, 0xe2, 0x12, 0xd1, 0x3d, 0x30, 0x41, 0xc5, 0x3d, 0x1e, 0xbc, 0xba, 0x3d, + 0x57, 0xef, 0x07, 0x3e, 0x7c, 0xf7, 0x8b, 0x3d, 0x45, 0xa6, 0xf1, 0x3d, 0x1e, 0xc9, 0xc1, 0x3d, + 0xd2, 0xbf, 0xca, 0x3d, 0xc1, 0x2e, 0xbe, 0x3d, 0x61, 0x05, 0x09, 0x3e, 0x93, 0x3d, 0x09, 0x3e, + 0xa7, 0xa5, 0x09, 0x3e, 0xfa, 0x34, 0xab, 0x3d, 0x76, 0x7f, 0xd9, 0x3d, 0x48, 0xf0, 0xbf, 0x3d, + 0x20, 0x38, 0x63, 0x3d, 0x5a, 0x96, 0x12, 0x3d, 0x98, 0xc8, 0xa3, 0x3c, 0x5c, 0x71, 0xf4, 0x3c, + 0xa5, 0xaa, 0xca, 0x3c, 0x69, 0x89, 0x37, 0x3d, 0x7b, 0x5f, 0x01, 0x3d, 0xc9, 0x82, 0x27, 0x3d, + 0x2e, 0x52, 0x69, 0x3d, 0xe4, 0x0c, 0x41, 0x3d, 0xd2, 0x94, 0x3b, 0x3d, 0x48, 0x8e, 0xda, 0x3b, + 0x7c, 0x33, 0xd6, 0x3c, 0xe0, 0xd4, 0x7b, 0x3d, 0x3c, 0x7e, 0x95, 0x3c, 0x43, 0x16, 0x74, 0x3d, + 0x5f, 0x5a, 0x1c, 0x3d, 0x0e, 0x6d, 0x6c, 0x3d, 0x20, 0x93, 0x74, 0x3c, 0x42, 0xb0, 0x5b, 0x3c, + 0xc1, 0x53, 0x4a, 0x3d, 0x93, 0xa9, 0x05, 0x3d, 0xd6, 0x99, 0x16, 0x3d, 0xd7, 0xdc, 0x21, 0x3d, + 0x2d, 0x9b, 0xac, 0x3c, 0x89, 0x50, 0x8a, 0x3d, 0x5e, 0x20, 0xd8, 0x3d, 0xa7, 0x65, 0x68, 0x3d, + 0xff, 0x9d, 0x8e, 0x3d, 0x4f, 0x6c, 0x07, 0x3d, 0xf6, 0xe0, 0x72, 0x3d, 0x0c, 0xd9, 0x35, 0x3d, + 0xf9, 0x0d, 0x4d, 0x3d, 0x34, 0x07, 0x70, 0x3d, 0x28, 0x2d, 0x33, 0x3d, 0x1e, 0xc1, 0x41, 0x3d, + 0x0c, 0x55, 0x60, 0x3d, 0x54, 0xba, 0x3a, 0x3d, 0x1e, 0x8e, 0x4a, 0x3d, 0xd6, 0xcc, 0x4f, 0x3d, + 0xe9, 0x73, 0x78, 0x3d, 0xe9, 0x85, 0x7c, 0x3d, 0x8b, 0x3c, 0x54, 0x3d, 0x34, 0x8d, 0x21, 0x3d, + 0x61, 0x6c, 0x51, 0x3d, 0xc7, 0x90, 0x68, 0x3d, 0xa4, 0x92, 0x41, 0x3d, 0x3d, 0x6b, 0x67, 0x3d, + 0xdd, 0xed, 0x3d, 0x3d, 0xea, 0xf9, 0x61, 0x3d, 0xdb, 0x4c, 0x80, 0x3d, 0x9d, 0x61, 0x54, 0x3d, + 0x1c, 0x4c, 0x62, 0x3d, 0xc4, 0xb6, 0x15, 0x3d, 0x98, 0xe8, 0x76, 0x3d, 0x98, 0x8f, 0x36, 0x3d, + 0x0e, 0x63, 0x74, 0x3d, 0xbe, 0xf9, 0x6d, 0x3d, 0xd1, 0x69, 0x29, 0x3d, 0xa6, 0x99, 0x73, 0x3d, + 0x9c, 0x53, 0x71, 0x3d, 0xe0, 0x56, 0x0f, 0x3d, 0x62, 0xb5, 0x2b, 0x3d, 0x6d, 0xe3, 0x4d, 0x3d, + 0x19, 0x5d, 0xe4, 0x3c, 0xdc, 0x64, 0xe1, 0x3c, 0xf4, 0x5a, 0x21, 0x3c, 0x48, 0xaa, 0x0c, 0x3c, + 0x7c, 0xb8, 0xcb, 0x3c, 0x9c, 0x7c, 0xb7, 0x3c, 0x0e, 0xe0, 0x91, 0x3c, 0x42, 0xb5, 0x67, 0x3c, + 0x9d, 0x1c, 0x10, 0x3d, 0xa6, 0xc6, 0x87, 0x3c, 0x09, 0x4f, 0x7c, 0x3c, 0xa0, 0xd0, 0x09, 0x3a, + 0x28, 0x97, 0xda, 0x3b, 0x26, 0xd2, 0x04, 0x3d, 0x66, 0xf6, 0x72, 0x3c, 0x04, 0x30, 0xa8, 0x3c, + 0xc8, 0xdc, 0xaf, 0x3c, 0x90, 0x1d, 0x07, 0x3d, 0xd4, 0xd5, 0xb3, 0x3c, 0x1c, 0x6e, 0x54, 0x3c, + 0x34, 0x5d, 0x5e, 0x3c, 0x22, 0x28, 0x99, 0x3c, 0x7c, 0x1e, 0xa1, 0x3c, 0xca, 0x69, 0x8b, 0x3c, + 0x0f, 0x94, 0xa9, 0x3c, 0x86, 0x1b, 0x3a, 0x3d, 0xda, 0xfe, 0xde, 0x3c, 0xa8, 0xa2, 0xaa, 0x3c, + 0x9a, 0x4c, 0xd9, 0x3c, 0x3a, 0x60, 0x1f, 0x3c, 0xd0, 0x4c, 0x97, 0x3c, 0x56, 0xbf, 0xd1, 0x3c, + 0x50, 0xb6, 0xb2, 0x3f, 0x8a, 0x4d, 0x8b, 0x3f, 0x9e, 0xc6, 0x78, 0x3f, 0x3a, 0xaf, 0xa7, 0x3f, + 0xb5, 0x62, 0x55, 0x3f, 0x75, 0xad, 0x9c, 0x3f, 0x86, 0x96, 0x8e, 0x3f, 0xd4, 0x36, 0xbc, 0x3f, + 0x02, 0xa4, 0xb3, 0x3f, 0xe0, 0x2f, 0xdf, 0x3f, 0x25, 0xef, 0xc8, 0x3f, 0xec, 0xaf, 0x5a, 0x3f, + 0x8e, 0x82, 0xac, 0x3f, 0x14, 0x2c, 0xc3, 0x3f, 0x25, 0x00, 0x58, 0x3f, 0x82, 0x66, 0xea, 0x3f, + 0x16, 0xaa, 0x8d, 0x3f, 0xa7, 0xc2, 0xb1, 0x3f, 0xa2, 0xcb, 0x53, 0x3f, 0x4c, 0xbc, 0x5d, 0x3f, + 0xc7, 0xeb, 0xe2, 0x3f, 0x58, 0x56, 0x61, 0x3f, 0xec, 0x4f, 0xb0, 0x3f, 0xd0, 0xbc, 0x9e, 0x3f, + 0x04, 0x69, 0x75, 0x3f, 0x7b, 0xdb, 0xa6, 0x3f, 0x4e, 0x7a, 0x1d, 0x40, 0x6c, 0x56, 0xe7, 0x3f, + 0x1e, 0xd0, 0xf7, 0x3f, 0x8c, 0xd7, 0x90, 0x3f, 0xbe, 0xd2, 0xcf, 0x3f, 0x06, 0xe1, 0x99, 0x3f, + 0x50, 0xfd, 0xeb, 0xbc, 0x3f, 0x1b, 0xb3, 0x3e, 0xc8, 0x7c, 0xe3, 0x3e, 0x86, 0xb3, 0x3a, 0x3f, + 0x72, 0xa0, 0x5d, 0x3f, 0x42, 0xe7, 0x23, 0x3f, 0x97, 0xf6, 0xb1, 0x3e, 0xe0, 0x6d, 0xdc, 0x3c, + 0xd8, 0x6b, 0x6c, 0x3e, 0xcb, 0x21, 0xf9, 0x3e, 0x64, 0x64, 0x03, 0x3f, 0x22, 0xfb, 0x4b, 0x3e, + 0xed, 0x96, 0x94, 0x3d, 0xf6, 0xfe, 0x55, 0x3e, 0x69, 0x78, 0x80, 0x3f, 0xbc, 0x6a, 0x07, 0x3f, + 0xe0, 0x5c, 0xde, 0x3b, 0x30, 0x13, 0xe5, 0x3e, 0xbe, 0x44, 0x2b, 0x3f, 0x6a, 0x28, 0x04, 0x3f, + 0x70, 0x30, 0x2f, 0x3f, 0xff, 0x5a, 0x81, 0x3f, 0xb7, 0xff, 0x06, 0x3f, 0x60, 0xfa, 0xd5, 0xbb, + 0x3c, 0x15, 0xbd, 0x3d, 0xed, 0x6f, 0x33, 0x3f, 0x6a, 0xef, 0x32, 0xbe, 0x74, 0x09, 0x6b, 0x3f, + 0x4e, 0xb1, 0x28, 0x3f, 0x14, 0x6c, 0xf3, 0x3e, 0x57, 0xec, 0x31, 0x3e, 0xac, 0xa2, 0x63, 0x3f, + 0x56, 0x08, 0xfe, 0xbd, 0x12, 0xc3, 0x59, 0xbe, 0xf4, 0xcf, 0xc3, 0xbe, 0x06, 0x74, 0xb2, 0xbe, + 0x8c, 0xdc, 0xe0, 0xbe, 0x8a, 0x6b, 0xa9, 0xbe, 0x8a, 0x35, 0xdf, 0xbd, 0x46, 0x6f, 0x8c, 0xbe, + 0x56, 0x37, 0xf3, 0xbd, 0xff, 0xba, 0xc9, 0xbe, 0xf8, 0xd1, 0xba, 0xbe, 0x00, 0x7d, 0x0a, 0xbe, + 0xbd, 0x06, 0x57, 0xbe, 0x29, 0xb5, 0x2a, 0xbe, 0xda, 0x91, 0x96, 0xbe, 0x8f, 0x19, 0x90, 0xbe, + 0x00, 0x40, 0x84, 0xbe, 0xcc, 0x1f, 0x26, 0xbe, 0x5d, 0xba, 0xbd, 0xbe, 0x9c, 0x29, 0xdc, 0xbe, + 0x97, 0x9f, 0xc0, 0xbe, 0x64, 0x6c, 0xb0, 0xbe, 0x8b, 0x82, 0xcd, 0xbe, 0x19, 0xc3, 0x83, 0x3d, + 0xff, 0xc2, 0x83, 0xbe, 0xce, 0x56, 0xd0, 0xbe, 0x6e, 0x5c, 0x67, 0xbd, 0x72, 0xe5, 0xc5, 0xbe, + 0x35, 0xda, 0x11, 0xbf, 0x3e, 0xaa, 0xa4, 0xbe, 0x00, 0x36, 0xb5, 0xbe, 0x03, 0x6c, 0x04, 0xbf, + 0x02, 0xdf, 0x04, 0xbe, 0xb7, 0xac, 0x3e, 0x3d, 0x39, 0x4e, 0xbb, 0xbd, 0x9a, 0xc9, 0x41, 0xbe, + 0xa8, 0x83, 0x25, 0xbe, 0x75, 0x34, 0xbd, 0xbd, 0xf4, 0xdb, 0xa0, 0x3d, 0x3c, 0x79, 0xcc, 0x3c, + 0xbe, 0xad, 0xa3, 0xbe, 0x3b, 0xc2, 0xb1, 0xbd, 0x2f, 0xed, 0xcc, 0xbd, 0xee, 0x8c, 0xf0, 0x3d, + 0x74, 0x80, 0x5a, 0x3d, 0xf2, 0x58, 0x1d, 0xbe, 0x4a, 0xce, 0x8f, 0xbe, 0xee, 0xf1, 0x9b, 0xbc, + 0xda, 0x2a, 0x5d, 0xbd, 0xab, 0x36, 0x3b, 0xbd, 0xf4, 0x96, 0x2c, 0xbe, 0xec, 0x70, 0x9d, 0xbe, + 0x30, 0x8a, 0x65, 0xbe, 0x43, 0x0a, 0x05, 0xbe, 0x8c, 0x02, 0x04, 0xbe, 0xaf, 0x45, 0x85, 0xbc, + 0x8b, 0xd0, 0x31, 0x3e, 0x88, 0x43, 0x21, 0xbe, 0xfe, 0x9d, 0x29, 0xbe, 0x26, 0x45, 0x5c, 0xbe, + 0x3e, 0xe5, 0x38, 0xbe, 0xec, 0xc2, 0xd8, 0xbe, 0x23, 0x37, 0xc9, 0xbd, 0x59, 0xba, 0x41, 0xbe, + 0x2e, 0x6f, 0xd5, 0x3c, 0x38, 0xb9, 0x92, 0x3c, 0xc3, 0x0a, 0x41, 0x3d, 0xce, 0x53, 0x19, 0x3d, + 0x5b, 0xbc, 0x38, 0x3d, 0x39, 0xcf, 0x06, 0x3d, 0xa0, 0xe0, 0x5a, 0x3a, 0x67, 0xbc, 0x1f, 0x3d, + 0xb4, 0xd3, 0xd5, 0x3c, 0xcf, 0x6a, 0x41, 0x3d, 0xdf, 0x99, 0x2e, 0x3d, 0x3b, 0xb9, 0xfe, 0x3b, + 0x96, 0x5e, 0xd8, 0x3c, 0xfc, 0xb8, 0xd2, 0x3c, 0x88, 0x57, 0xca, 0x3c, 0x30, 0x31, 0xd1, 0x3c, + 0xf4, 0xd4, 0x28, 0x3d, 0x53, 0xcb, 0x3f, 0x3c, 0x2a, 0xd9, 0x2a, 0x3d, 0xf9, 0x57, 0x80, 0x3d, + 0x2e, 0x9f, 0x37, 0x3d, 0x84, 0xac, 0xc8, 0x3c, 0xb7, 0x03, 0x49, 0x3d, 0x94, 0x38, 0x0d, 0xbc, + 0x26, 0xe4, 0xdb, 0x3c, 0x50, 0x8f, 0x3a, 0x3d, 0xa3, 0xf8, 0xb8, 0x3c, 0x32, 0x2d, 0x1c, 0x3d, + 0x18, 0x87, 0x94, 0x3d, 0xd6, 0x63, 0x5b, 0x3d, 0x01, 0xda, 0x55, 0x3d, 0x12, 0x27, 0x6b, 0x3d, + 0x40, 0x1e, 0xda, 0x3c, 0xf9, 0x3d, 0x03, 0xbc, 0xe2, 0xb4, 0x93, 0x3c, 0xd9, 0x28, 0xb0, 0x3c, + 0xc2, 0x1a, 0x8e, 0x3c, 0x21, 0x75, 0x14, 0x3c, 0x94, 0xc9, 0x92, 0xbc, 0x7b, 0xa8, 0x14, 0x3c, + 0x94, 0x36, 0x3d, 0x3d, 0x51, 0x2b, 0x87, 0x3c, 0xac, 0x42, 0x84, 0x3c, 0x76, 0x54, 0x8f, 0xbc, + 0x00, 0xe7, 0xa5, 0x38, 0xd0, 0x08, 0xc8, 0x3c, 0x0a, 0x85, 0xbf, 0x3c, 0x40, 0xbd, 0x38, 0xba, + 0x14, 0x4c, 0xaa, 0x3c, 0x40, 0x82, 0x61, 0x38, 0x1c, 0x34, 0xb0, 0x3c, 0x98, 0x82, 0x4e, 0x3d, + 0x91, 0xad, 0xf2, 0x3c, 0xcc, 0xa0, 0x46, 0x3b, 0x32, 0xd2, 0xb2, 0x3c, 0x98, 0x82, 0x12, 0xba, + 0xe4, 0x2d, 0x85, 0xbc, 0xc3, 0xc9, 0xa8, 0x3c, 0x46, 0x32, 0x09, 0x3d, 0x64, 0xef, 0xab, 0x3c, + 0x59, 0xa6, 0x09, 0x3d, 0x97, 0x88, 0x82, 0x3d, 0x76, 0x40, 0xda, 0x3c, 0x96, 0x89, 0xc9, 0x3c, + 0xaf, 0x13, 0x27, 0x3b, 0xe1, 0xb6, 0x6b, 0x3c, 0x12, 0xc9, 0xa8, 0x3c, 0x66, 0x44, 0xbb, 0x3c, + 0x01, 0x91, 0xec, 0x3c, 0x5f, 0x9b, 0xb3, 0x3c, 0x60, 0xf6, 0x31, 0x3c, 0x28, 0xfb, 0x34, 0x3c, + 0xfe, 0x13, 0x8d, 0x3b, 0xe6, 0x95, 0xb2, 0x3c, 0xee, 0xeb, 0xab, 0x3c, 0xfe, 0x8a, 0x21, 0x3c, + 0xb9, 0xc0, 0x1f, 0x3c, 0x4c, 0x05, 0x00, 0x3c, 0xa0, 0x9d, 0xc6, 0x3c, 0xa3, 0xbb, 0x9c, 0x3c, + 0xa1, 0x7e, 0x15, 0x3c, 0xb7, 0x7c, 0x54, 0x3c, 0xc9, 0xeb, 0xbb, 0x3c, 0xf0, 0x17, 0xab, 0x3c, + 0xe7, 0x0d, 0xba, 0x3c, 0x78, 0x87, 0xe6, 0x3c, 0xb8, 0x1a, 0xb6, 0x3c, 0x08, 0x93, 0x32, 0xbb, + 0x57, 0xe3, 0x5a, 0x3c, 0xb1, 0x93, 0xcc, 0x3c, 0xc7, 0xbd, 0x3b, 0xbb, 0x55, 0x10, 0xde, 0x3c, + 0xb0, 0x80, 0xf6, 0x3c, 0xf6, 0xb7, 0x70, 0x3c, 0x1c, 0xc7, 0x74, 0x3c, 0x34, 0x78, 0x02, 0x3d, + 0xf9, 0xcf, 0x39, 0x3b, 0x69, 0xf5, 0xba, 0x3a, 0x02, 0x63, 0xc6, 0x3b, 0x0c, 0x0a, 0x74, 0x3c, + 0x0e, 0xc4, 0x75, 0x3c, 0xde, 0xd2, 0x23, 0x3c, 0x35, 0xae, 0xc2, 0x3a, 0xd0, 0x52, 0x80, 0xbb, + 0x73, 0x23, 0x6b, 0x3c, 0xfe, 0x02, 0xd3, 0x3b, 0x92, 0xcd, 0xfd, 0x3b, 0x1b, 0xdf, 0x35, 0xbb, + 0xd6, 0xdf, 0x5f, 0xbb, 0x40, 0xaa, 0xea, 0x3b, 0x60, 0x34, 0xc1, 0x3c, 0x58, 0x00, 0xc5, 0x3b, + 0x98, 0xf9, 0x8e, 0xba, 0x9e, 0x0f, 0xea, 0x3b, 0x5b, 0x59, 0x52, 0x3c, 0x2b, 0xd5, 0x71, 0x3c, + 0x02, 0x8b, 0x77, 0x3c, 0x44, 0x9b, 0x8e, 0x3c, 0x34, 0x00, 0x0d, 0x3c, 0x98, 0x1b, 0xab, 0x3a, + 0xb3, 0x2e, 0x06, 0xbc, 0x85, 0xd2, 0x4c, 0x3c, 0xdc, 0xf1, 0x29, 0x3b, 0xd5, 0xda, 0x97, 0x3c, + 0xb4, 0x31, 0x2e, 0x3c, 0x53, 0x09, 0xa2, 0x3c, 0x43, 0x4e, 0x0d, 0x3b, 0x42, 0x1c, 0x7d, 0x3c, + 0xb3, 0x8f, 0x19, 0x3f, 0x20, 0xfa, 0xc3, 0x3c, 0x8f, 0x39, 0x1f, 0x3f, 0xce, 0x20, 0xf6, 0x3e, + 0x76, 0xe3, 0xf8, 0x3e, 0xf4, 0xd0, 0x9f, 0x3e, 0x32, 0xe4, 0x8e, 0xbe, 0x3a, 0xb9, 0x08, 0x3f, + 0x0f, 0x70, 0x41, 0x3f, 0x1a, 0x60, 0x18, 0x3f, 0x88, 0x6a, 0x08, 0x3f, 0x70, 0x00, 0x23, 0xbe, + 0xb9, 0x4d, 0x8f, 0x3e, 0x32, 0x08, 0x03, 0x3f, 0xad, 0x76, 0x94, 0x3e, 0x1e, 0xa4, 0x16, 0x3e, + 0x4c, 0x25, 0x32, 0x3f, 0x68, 0x7f, 0xa7, 0x3c, 0xd4, 0x41, 0x0b, 0x3f, 0xb7, 0x52, 0x92, 0x3f, + 0xd5, 0xa0, 0x2a, 0x3f, 0x7c, 0x9b, 0x69, 0x3d, 0x94, 0x74, 0x2a, 0x3f, 0x20, 0x20, 0xd0, 0xbd, + 0x4c, 0x9b, 0xc4, 0x3d, 0xb7, 0xa7, 0x11, 0x3f, 0xb9, 0xad, 0x2c, 0x3f, 0x25, 0xff, 0xde, 0x3e, + 0x4e, 0xbd, 0x83, 0x3f, 0x62, 0xb2, 0x99, 0x3f, 0x2c, 0xf9, 0x57, 0x3f, 0x2c, 0x49, 0x33, 0x3f, + 0x67, 0x60, 0x28, 0xbe, 0x1a, 0x06, 0x60, 0xbe, 0x57, 0x61, 0x1c, 0xbe, 0xea, 0x00, 0x2c, 0xbe, + 0x9e, 0x9a, 0x2e, 0xbe, 0x52, 0xd7, 0x23, 0xbe, 0xd0, 0x86, 0x1b, 0xbe, 0x4c, 0x77, 0x31, 0xbe, + 0x54, 0x25, 0x4a, 0xbe, 0x01, 0x73, 0x03, 0xbe, 0xb2, 0x5c, 0x2a, 0xbe, 0xa2, 0x9a, 0xce, 0xbd, + 0x69, 0x5a, 0xb0, 0xbd, 0xca, 0x2a, 0x24, 0xbe, 0x47, 0x7f, 0x37, 0xbe, 0xc1, 0x43, 0x0a, 0xbe, + 0x1c, 0xa7, 0xf7, 0xbd, 0xbe, 0x31, 0x43, 0xbe, 0x66, 0x16, 0x63, 0xbe, 0xd0, 0xee, 0x1a, 0xbe, + 0x80, 0x5b, 0x0f, 0xbe, 0xf8, 0xed, 0xd6, 0xbd, 0x1d, 0xdd, 0x4d, 0xbe, 0xd6, 0xe2, 0xec, 0xbd, + 0x28, 0x92, 0x0c, 0xbe, 0xa4, 0xf0, 0x96, 0xbe, 0x54, 0xd2, 0xb6, 0xbd, 0x85, 0xb2, 0x1a, 0xbe, + 0xf4, 0x4c, 0x1c, 0xbe, 0xc1, 0xce, 0x83, 0xbd, 0x9c, 0x39, 0xc1, 0xbd, 0x06, 0xc1, 0x2f, 0xbe, + 0xd4, 0x18, 0xbc, 0x3d, 0x56, 0xf9, 0xdf, 0x3d, 0xd4, 0xa3, 0xd2, 0x3d, 0x94, 0xc0, 0xe6, 0x3d, + 0xa4, 0x54, 0xcc, 0x3d, 0x70, 0x69, 0xe1, 0x3d, 0x06, 0x86, 0xd2, 0x3d, 0xfb, 0x74, 0xf4, 0x3d, + 0x9d, 0x18, 0xe9, 0x3d, 0xc1, 0x1f, 0x00, 0x3e, 0x79, 0x15, 0xf5, 0x3d, 0xd2, 0x44, 0xcc, 0x3d, + 0x52, 0xa9, 0xd5, 0x3d, 0x72, 0x93, 0xeb, 0x3d, 0x92, 0x7a, 0xb8, 0x3d, 0xb8, 0x2a, 0xde, 0x3d, + 0x7a, 0x98, 0xc0, 0x3d, 0x5a, 0xa9, 0xcc, 0x3d, 0x16, 0x29, 0xf1, 0x3d, 0x4d, 0x57, 0xab, 0x3d, + 0xd0, 0xa1, 0x03, 0x3e, 0x73, 0x8a, 0x8d, 0x3d, 0x0b, 0x72, 0xea, 0x3d, 0x3a, 0x2a, 0xb3, 0x3d, + 0xe7, 0x71, 0xac, 0x3d, 0xec, 0xeb, 0xd3, 0x3d, 0x84, 0x8b, 0xae, 0x3d, 0x5f, 0xec, 0x0b, 0x3e, + 0x83, 0x9f, 0x0b, 0x3e, 0xaf, 0xdf, 0x6e, 0x3d, 0xba, 0x5b, 0x9d, 0x3d, 0x40, 0x54, 0xbd, 0x3d, + 0x8f, 0x1a, 0x69, 0x3d, 0x30, 0x01, 0x33, 0x3d, 0xc7, 0xca, 0x94, 0x3c, 0xc6, 0x7b, 0x82, 0x3c, + 0x1e, 0x00, 0xcd, 0x3c, 0xc3, 0xa9, 0x1d, 0x3d, 0x00, 0xe4, 0xf9, 0x3c, 0x85, 0xa5, 0x03, 0x3d, + 0x64, 0xe4, 0x4a, 0x3d, 0x92, 0x32, 0xf6, 0x3c, 0x78, 0xc3, 0x98, 0x3c, 0x90, 0x9c, 0x87, 0xbb, + 0xb0, 0x69, 0x16, 0x3b, 0x74, 0x89, 0x14, 0x3d, 0x9a, 0xcf, 0xb6, 0x3c, 0xea, 0xb3, 0x05, 0x3d, + 0xda, 0x7e, 0xa3, 0x3c, 0xa6, 0x5f, 0x3b, 0x3d, 0xd2, 0x80, 0x9d, 0x3c, 0x6f, 0xc8, 0x51, 0x3c, + 0x4c, 0x25, 0xc6, 0x3c, 0x01, 0x23, 0xc8, 0x3c, 0xd4, 0x8a, 0x12, 0x3d, 0x1f, 0x84, 0xee, 0x3c, + 0x52, 0xcd, 0xdc, 0x3b, 0x5a, 0x97, 0xc4, 0x3d, 0xe9, 0xaf, 0x99, 0x3d, 0x8c, 0xd7, 0x2c, 0x3d, + 0xb1, 0xc5, 0x2d, 0x3d, 0xee, 0xed, 0xd2, 0x3c, 0x24, 0xba, 0xc3, 0x3c, 0x6a, 0xc4, 0x47, 0x3d, + 0x4a, 0x89, 0x36, 0xbc, 0x1e, 0x17, 0x39, 0xbc, 0x8e, 0x3e, 0x38, 0xbc, 0x41, 0x37, 0x46, 0xbc, + 0x1f, 0x93, 0x2c, 0xbc, 0x1a, 0xa5, 0x56, 0xbc, 0x78, 0xab, 0x42, 0xbc, 0x76, 0x07, 0x61, 0xbc, + 0x93, 0x7b, 0x54, 0xbc, 0x6e, 0x01, 0x83, 0xbc, 0x6a, 0x84, 0x5a, 0xbc, 0x4e, 0xa3, 0x3a, 0xbc, + 0x10, 0x3f, 0x53, 0xbc, 0xbe, 0xd7, 0x60, 0xbc, 0x5d, 0xca, 0x0d, 0xbc, 0x84, 0x99, 0x5b, 0xbc, + 0x6e, 0x6c, 0x35, 0xbc, 0x06, 0xf7, 0x32, 0xbc, 0x93, 0x07, 0x38, 0xbc, 0xaf, 0x77, 0x05, 0xbc, + 0x1c, 0xa3, 0x81, 0xbc, 0x50, 0x8c, 0x04, 0xbc, 0x5c, 0xd9, 0x48, 0xbc, 0x10, 0xae, 0x2f, 0xbc, + 0x98, 0x7b, 0x09, 0xbc, 0x8f, 0xf0, 0x2b, 0xbc, 0x2a, 0x2d, 0x5e, 0xbc, 0x88, 0xf0, 0x8f, 0xbc, + 0xaa, 0x3e, 0x8f, 0xbc, 0xbc, 0x44, 0x01, 0xbc, 0x42, 0xe0, 0x1c, 0xbc, 0xb3, 0x69, 0x2d, 0xbc, + 0xb4, 0x99, 0xfa, 0xbb, 0xa8, 0x06, 0x9b, 0xbb, 0x9a, 0xf6, 0x35, 0xbb, 0xc8, 0xd7, 0x1e, 0xbb, + 0xea, 0x70, 0x48, 0xbb, 0xea, 0xc2, 0xc2, 0xbb, 0x4a, 0x75, 0x98, 0xbb, 0x90, 0x3e, 0xa4, 0xbb, + 0x48, 0x53, 0xd6, 0xbb, 0x25, 0x81, 0xd4, 0xbb, 0xe9, 0x02, 0x54, 0xbb, 0xbc, 0x89, 0x83, 0xba, + 0x60, 0x4f, 0x30, 0xbb, 0xed, 0x97, 0xbf, 0xbb, 0x3a, 0x28, 0xf6, 0xba, 0xce, 0xb1, 0xbe, 0xbb, + 0x47, 0x2f, 0x70, 0xbb, 0xcc, 0x5e, 0xb4, 0xbb, 0x8c, 0xd0, 0xb4, 0xba, 0xe4, 0xfb, 0x8a, 0xba, + 0x8e, 0x95, 0xb0, 0xbb, 0x04, 0x5f, 0x6d, 0xbb, 0xb4, 0xd1, 0x8f, 0xbb, 0x36, 0x1a, 0xa0, 0xbb, + 0x44, 0x12, 0x29, 0xba, 0xe7, 0xac, 0x1f, 0xbc, 0x7a, 0x7d, 0x4d, 0xbc, 0x7c, 0x23, 0x05, 0xbc, + 0xfb, 0x99, 0x04, 0xbc, 0x4c, 0xcf, 0x97, 0xbb, 0x02, 0x45, 0x8c, 0xbb, 0x88, 0xb6, 0xcb, 0xbb, + 0x41, 0x59, 0xb3, 0xbb, 0x9a, 0xf6, 0xeb, 0xbb, 0xe0, 0x58, 0xca, 0xbb, 0x95, 0xa9, 0xdf, 0xbb, + 0x71, 0x78, 0xcd, 0xbb, 0xdb, 0xdc, 0xce, 0xbb, 0x62, 0xc4, 0xc4, 0xbb, 0x76, 0x28, 0xe4, 0xbb, + 0x2c, 0x61, 0xe3, 0xbb, 0x94, 0x8b, 0xd3, 0xbb, 0x9d, 0x45, 0xe6, 0xbb, 0xd2, 0x59, 0xb3, 0xbb, + 0xc7, 0x48, 0xae, 0xbb, 0x36, 0x0c, 0xd6, 0xbb, 0x71, 0x69, 0xc7, 0xbb, 0xc7, 0xab, 0xc1, 0xbb, + 0x60, 0x6c, 0xad, 0xbb, 0xc7, 0x29, 0xd0, 0xbb, 0x9d, 0xf7, 0x00, 0xbc, 0xdf, 0x30, 0xb4, 0xbb, + 0x2f, 0x82, 0xe0, 0xbb, 0x72, 0x3b, 0x84, 0xbb, 0x9b, 0xce, 0xeb, 0xbb, 0x28, 0xbe, 0x9e, 0xbb, + 0x97, 0xad, 0xaf, 0xbb, 0x5e, 0xd6, 0xf8, 0xbb, 0xe6, 0x7b, 0x75, 0xbb, 0x09, 0x21, 0xe9, 0xbb, + 0xe3, 0x87, 0xe9, 0xbb, 0x5b, 0x87, 0x40, 0xbb, 0xc2, 0x27, 0x88, 0xbb, 0x30, 0x5b, 0xbb, 0xbb, + 0xa2, 0x39, 0x74, 0xbb, 0xd0, 0x62, 0x80, 0xbb, 0xde, 0x5f, 0xfe, 0xba, 0x8b, 0x50, 0x02, 0xbb, + 0x1a, 0x03, 0x26, 0xbb, 0x6c, 0x32, 0x33, 0xbb, 0x20, 0xa7, 0x1c, 0xbb, 0x92, 0x80, 0x2a, 0xbb, + 0x7a, 0x1e, 0x74, 0xbb, 0x27, 0x25, 0xdf, 0xba, 0xdd, 0x83, 0x01, 0xbb, 0xba, 0x7b, 0x92, 0xb9, + 0xdc, 0xb1, 0x63, 0xb9, 0xde, 0x00, 0x2a, 0xbb, 0x36, 0xc8, 0x30, 0xbb, 0x55, 0xd6, 0x0a, 0xbb, + 0x0d, 0x2f, 0xd0, 0xba, 0x4e, 0xc4, 0x6e, 0xbb, 0x71, 0x03, 0x41, 0xbb, 0x59, 0x31, 0x00, 0xbb, + 0xf3, 0x1a, 0xd6, 0xba, 0x8b, 0x1b, 0xec, 0xba, 0x33, 0xbc, 0x55, 0xbb, 0xc9, 0x74, 0xfc, 0xba, + 0x8c, 0x0c, 0xc3, 0xba, 0xb6, 0x92, 0xec, 0xbb, 0x87, 0x1c, 0x54, 0xbb, 0xec, 0xc6, 0x1c, 0xbb, + 0x16, 0x49, 0x1f, 0xbb, 0x5b, 0x9a, 0xab, 0xba, 0x07, 0xa8, 0xc5, 0xba, 0x82, 0x99, 0x67, 0xbb, + 0x77, 0x5a, 0x2e, 0xbe, 0x11, 0xa3, 0x00, 0xbe, 0xfe, 0xdf, 0xf4, 0xbd, 0x1a, 0x95, 0xf8, 0xbd, + 0x46, 0xd9, 0xe1, 0xbd, 0xc8, 0x6e, 0x32, 0xbe, 0x18, 0x98, 0x17, 0xbe, 0x6a, 0x99, 0x2b, 0xbe, + 0xd5, 0x36, 0x2e, 0xbe, 0xaa, 0x12, 0x62, 0xbe, 0x70, 0x52, 0x14, 0xbe, 0x32, 0xee, 0xe9, 0xbd, + 0xfe, 0x7a, 0x1d, 0xbe, 0x68, 0x68, 0x38, 0xbe, 0x01, 0xea, 0x96, 0xbd, 0x82, 0xe1, 0x3a, 0xbe, + 0xa9, 0xd3, 0x0a, 0xbe, 0x94, 0x72, 0x0c, 0xbe, 0x1b, 0x08, 0xb1, 0xbd, 0x2d, 0xa7, 0x86, 0xbd, + 0x13, 0x8b, 0x50, 0xbe, 0x3a, 0x1d, 0xd8, 0xbd, 0x46, 0x51, 0x0c, 0xbe, 0x3f, 0x33, 0x16, 0xbe, + 0xd5, 0xe8, 0x89, 0xbd, 0x65, 0xb2, 0x1d, 0xbe, 0xd8, 0xec, 0x8a, 0xbe, 0x48, 0xad, 0x80, 0xbe, + 0x26, 0x99, 0x7f, 0xbe, 0xba, 0x92, 0xff, 0xbd, 0xbd, 0x76, 0x07, 0xbe, 0x3c, 0x9c, 0x15, 0xbe, + 0x10, 0xca, 0xac, 0xbc, 0x77, 0x98, 0xb2, 0xbd, 0x55, 0x8e, 0xce, 0xbd, 0xff, 0x73, 0x17, 0xbe, + 0x48, 0x38, 0xca, 0xbd, 0x22, 0x32, 0xd0, 0xbd, 0x2a, 0x58, 0x81, 0xbd, 0x0b, 0xc9, 0x88, 0xbd, + 0x16, 0x98, 0x23, 0xbd, 0x2a, 0xba, 0x36, 0xbd, 0xd3, 0x7b, 0xd8, 0xbd, 0x70, 0xf0, 0x55, 0xbd, + 0x80, 0xfb, 0x43, 0x3b, 0x04, 0x92, 0x98, 0xbc, 0x7c, 0xd2, 0x24, 0xbe, 0xb4, 0x57, 0x77, 0xbd, + 0xb0, 0x4e, 0x5c, 0x3b, 0x04, 0xe5, 0x92, 0xbd, 0x4f, 0xb7, 0xf3, 0xbd, 0xa1, 0x43, 0xa1, 0xbd, + 0xa2, 0x82, 0xc8, 0xbd, 0x0c, 0x63, 0xd4, 0xbd, 0x8e, 0xd9, 0xdd, 0xbd, 0x60, 0xe2, 0xf3, 0xbb, + 0x20, 0x87, 0x29, 0x3c, 0xca, 0x7a, 0x28, 0xbe, 0x73, 0x08, 0xac, 0x3c, 0x3b, 0xed, 0xdb, 0xbd, + 0x0e, 0xf5, 0x98, 0xbd, 0x6a, 0x10, 0x22, 0xbd, 0x70, 0xa4, 0x86, 0xbc, 0x83, 0x8f, 0xf5, 0xbd, + 0x8b, 0x64, 0xa2, 0x3c, 0x69, 0x10, 0x1d, 0x3d, 0x46, 0x7f, 0x83, 0x3d, 0x78, 0xc9, 0x71, 0x3d, + 0x5a, 0x14, 0x76, 0x3d, 0x07, 0x4a, 0x86, 0x3d, 0x65, 0x8e, 0xf4, 0x3c, 0xe0, 0x24, 0x5a, 0x3d, + 0xce, 0xce, 0xec, 0x3c, 0xba, 0x01, 0x52, 0x3d, 0x92, 0xa4, 0x75, 0x3d, 0x9e, 0x5d, 0x24, 0x3d, + 0xb3, 0xb5, 0xf1, 0x3c, 0xc1, 0xe7, 0x18, 0x3d, 0x04, 0xd3, 0x2b, 0x3d, 0xc4, 0x84, 0x01, 0x3d, + 0x3e, 0x82, 0x2e, 0x3d, 0x0a, 0x1f, 0xdb, 0x3c, 0xc8, 0xae, 0x6d, 0x3d, 0x16, 0xaa, 0x27, 0x3d, + 0xbf, 0xf3, 0x77, 0x3d, 0x45, 0xd9, 0x31, 0x3d, 0xc2, 0x2e, 0x51, 0x3d, 0x3a, 0x45, 0x05, 0x3b, + 0xc0, 0xae, 0x72, 0x3c, 0x3e, 0x9d, 0x6d, 0x3d, 0x20, 0x1e, 0xbc, 0x3a, 0xc7, 0xf1, 0x87, 0x3d, + 0x18, 0x03, 0xb0, 0x3d, 0x14, 0x8a, 0xc4, 0x3c, 0xc8, 0x1b, 0x01, 0x3d, 0x98, 0x11, 0x7c, 0x3d, + 0x1e, 0x63, 0xf4, 0x3c, 0x83, 0x71, 0x12, 0x3c, 0x9e, 0x31, 0x95, 0x3c, 0xd4, 0xe7, 0xc2, 0x3c, + 0x86, 0x5b, 0x8e, 0x3c, 0x1c, 0x62, 0x87, 0x3c, 0x55, 0x90, 0x09, 0x3b, 0xbf, 0x34, 0x18, 0x3c, + 0xe2, 0x15, 0x12, 0x3d, 0x14, 0x18, 0x80, 0x3c, 0x2a, 0xa3, 0xad, 0x3b, 0x5b, 0x54, 0x37, 0xbc, + 0x3e, 0xea, 0x22, 0xbc, 0xe0, 0x7b, 0xc3, 0x3b, 0x84, 0x86, 0x1a, 0x3d, 0xa4, 0xc4, 0xc1, 0x3a, + 0x10, 0xf4, 0x09, 0xbb, 0xb9, 0xd5, 0xac, 0x3b, 0x02, 0x77, 0xa9, 0x3c, 0x69, 0xcc, 0x11, 0x3d, + 0xb4, 0x40, 0xc7, 0x3c, 0x3e, 0xbc, 0x54, 0x3c, 0x2e, 0xdb, 0xd7, 0x3c, 0x72, 0xdb, 0xdf, 0x3b, + 0xd2, 0xea, 0xe3, 0xbc, 0x1c, 0x86, 0x50, 0x3d, 0xef, 0x68, 0xf9, 0x3c, 0xc1, 0x64, 0x03, 0x3d, + 0x35, 0x81, 0x92, 0x3c, 0x4c, 0x87, 0x55, 0x3d, 0x00, 0xd5, 0xb4, 0xb8, 0xbf, 0xd2, 0x0c, 0x3d, + 0x8c, 0x8c, 0x76, 0xbb, 0x97, 0xbd, 0x57, 0xbb, 0xce, 0x44, 0xeb, 0xbb, 0x54, 0x93, 0xa7, 0xbb, + 0x94, 0xea, 0xd7, 0xbb, 0x38, 0x55, 0xee, 0xbb, 0xda, 0x60, 0x22, 0xbb, 0x92, 0x0a, 0xcc, 0xbb, + 0xd0, 0xe4, 0x9c, 0xbb, 0x51, 0xe6, 0xe4, 0xbb, 0xce, 0xfe, 0xbb, 0xbb, 0xf8, 0xb5, 0x73, 0xbb, + 0x30, 0x00, 0x84, 0xbb, 0xf8, 0xea, 0xac, 0xbb, 0xc2, 0x60, 0x38, 0xbb, 0xd8, 0xc5, 0x37, 0xbb, + 0x2b, 0xa2, 0xd1, 0xbb, 0x36, 0x15, 0xf7, 0xba, 0x60, 0x21, 0xbd, 0xbb, 0x55, 0x82, 0xad, 0xbb, + 0x4d, 0x74, 0xe6, 0xbb, 0x83, 0xd6, 0x72, 0xbb, 0xda, 0xde, 0xaf, 0xbb, 0x4a, 0xf0, 0xd0, 0xb9, + 0xbd, 0x17, 0x9a, 0xba, 0xc2, 0xd5, 0xbc, 0xbb, 0x8e, 0xbf, 0x1f, 0xbb, 0xd2, 0xc3, 0x02, 0xbc, + 0x8e, 0x13, 0x39, 0xbc, 0x31, 0x1e, 0xa0, 0xbb, 0x2e, 0xd6, 0x88, 0xbb, 0x9a, 0xd8, 0xe3, 0xbb, + 0x81, 0x12, 0x9c, 0xbb, 0x80, 0x1f, 0xb8, 0xb9, 0x06, 0x1a, 0x29, 0xbb, 0xc2, 0x6e, 0xd0, 0xba, + 0xcf, 0xfd, 0x17, 0xbb, 0x3c, 0x3f, 0x1b, 0xbb, 0x12, 0x15, 0x9c, 0x39, 0x7a, 0xde, 0xef, 0xba, + 0x00, 0x0a, 0xb3, 0xbb, 0x87, 0x43, 0x60, 0xbb, 0x30, 0x84, 0x8d, 0xb9, 0x50, 0x36, 0xb9, 0x3a, + 0x00, 0xf7, 0xae, 0xb8, 0xb3, 0x48, 0x0c, 0xbb, 0x27, 0xb3, 0x1c, 0xbb, 0xf8, 0x4b, 0x5c, 0x39, + 0xe0, 0x83, 0xfc, 0xba, 0x41, 0x82, 0x89, 0x39, 0x4e, 0x8a, 0x05, 0xbb, 0x31, 0x04, 0x9c, 0xbb, + 0x5e, 0x96, 0x5f, 0xbb, 0x1a, 0x7f, 0x2d, 0xba, 0xd4, 0xbb, 0x3d, 0xbb, 0x36, 0x3f, 0x66, 0xba, + 0x4a, 0x5c, 0x4a, 0x3b, 0x0d, 0x90, 0xa5, 0xbb, 0x66, 0xef, 0xae, 0xbb, 0x34, 0x22, 0x95, 0xbb, + 0xa3, 0x22, 0x93, 0xbb, 0x33, 0x53, 0xfc, 0xbb, 0xb0, 0x11, 0x85, 0xba, 0x86, 0xd9, 0x8a, 0xbb, + 0x4e, 0x45, 0x3d, 0xba, 0xd2, 0x2a, 0x36, 0xbb, 0x58, 0x0a, 0x7d, 0xbb, 0xd8, 0x90, 0x8f, 0xbb, + 0x35, 0x43, 0x71, 0xbb, 0xbf, 0x52, 0x81, 0xbb, 0x89, 0xa1, 0x0c, 0xbb, 0x72, 0x3a, 0x44, 0xbb, + 0x93, 0xe6, 0xaa, 0xba, 0x02, 0xec, 0x21, 0xbb, 0x5a, 0x40, 0x81, 0xbb, 0x1b, 0xd6, 0x24, 0xbb, + 0xdc, 0xc8, 0x9a, 0xba, 0xca, 0x65, 0xd6, 0xba, 0xf6, 0xff, 0x77, 0xbb, 0x49, 0x75, 0x0e, 0xbb, + 0x0a, 0x84, 0xcd, 0xba, 0x61, 0xc2, 0x0b, 0xbb, 0x22, 0xa5, 0x7f, 0xbb, 0x4e, 0x72, 0x1d, 0xbb, + 0xeb, 0xc8, 0x6b, 0xbb, 0x71, 0x58, 0x51, 0xbb, 0x6f, 0x3e, 0x5e, 0xbb, 0x60, 0xfb, 0xf0, 0xb8, + 0xad, 0x91, 0x3f, 0xba, 0xa6, 0x49, 0x8c, 0xbb, 0xf3, 0x67, 0x31, 0x3a, 0x1a, 0x26, 0x7e, 0xbb, + 0x14, 0x12, 0x8b, 0xbb, 0x90, 0xe4, 0x6d, 0xba, 0xe2, 0xf9, 0xbe, 0xba, 0xd0, 0x0a, 0x7e, 0xbb, + 0x1d, 0x3b, 0xa0, 0xba, 0xfc, 0x9b, 0xab, 0xba, 0x1b, 0xa5, 0xcc, 0xba, 0x0d, 0xaa, 0x2b, 0xbb, + 0x13, 0xaf, 0xca, 0xba, 0xc9, 0xdf, 0xc3, 0xba, 0x2c, 0xbf, 0x46, 0xba, 0xfc, 0x9d, 0x50, 0xba, + 0xf2, 0x30, 0xd7, 0xba, 0xd2, 0x9d, 0x34, 0xba, 0x52, 0xb4, 0x9e, 0xba, 0x5e, 0x9d, 0x54, 0x38, + 0x97, 0x7f, 0x4f, 0x3a, 0xd0, 0x88, 0x8d, 0xb8, 0x28, 0x29, 0x6a, 0xbb, 0xf4, 0xbf, 0x2e, 0xba, + 0xb9, 0xfc, 0x2e, 0x3a, 0xdf, 0xc9, 0x8a, 0xba, 0xe9, 0x48, 0x05, 0xbb, 0x2a, 0xf4, 0x0b, 0xbb, + 0x9a, 0x3f, 0xea, 0xba, 0x26, 0x3a, 0xdb, 0xba, 0x7f, 0x3d, 0x0d, 0xbb, 0xe9, 0x05, 0xba, 0xb9, + 0x51, 0x9f, 0xb7, 0x3a, 0xcc, 0xa6, 0x80, 0xbb, 0x86, 0xd6, 0x4a, 0xba, 0xaa, 0xc0, 0x0d, 0xbb, + 0xc0, 0x7e, 0x5c, 0xba, 0x26, 0xae, 0x17, 0xbb, 0x04, 0x4d, 0x01, 0x39, 0xbc, 0x0b, 0x25, 0xbb, + 0xcb, 0x9d, 0xb9, 0xbd, 0xae, 0x27, 0x9d, 0xbc, 0xf1, 0x3b, 0xad, 0xbd, 0x1f, 0x7a, 0x1d, 0xbd, + 0x18, 0xb2, 0x9a, 0xbd, 0x9e, 0x1b, 0xaa, 0xbd, 0xa8, 0x20, 0x0f, 0xbc, 0x47, 0x6f, 0x97, 0xbd, + 0xe7, 0xc0, 0xd7, 0xbd, 0x68, 0xf2, 0xd8, 0xbd, 0x86, 0x90, 0x28, 0xbd, 0xfc, 0x0a, 0x53, 0xbc, + 0x5e, 0x88, 0x40, 0xbd, 0xa4, 0x5d, 0xa1, 0xbd, 0x06, 0xff, 0x95, 0xbc, 0x60, 0xcb, 0x66, 0xbc, + 0xd2, 0x81, 0xc1, 0xbd, 0x00, 0x80, 0xaa, 0xb9, 0x86, 0x00, 0x6e, 0xbd, 0xae, 0x61, 0xbf, 0xbd, + 0xfa, 0x83, 0xbe, 0xbd, 0x4a, 0x19, 0xbe, 0xbc, 0x56, 0xe6, 0x86, 0xbd, 0xa0, 0xdc, 0x4c, 0xbc, + 0x31, 0x08, 0x9b, 0x3c, 0x31, 0xf2, 0xa4, 0xbd, 0x32, 0xea, 0xbd, 0xbd, 0x1e, 0xc4, 0xe9, 0xbd, + 0x4b, 0xbd, 0x22, 0xbe, 0xd6, 0xde, 0x06, 0xbe, 0xd9, 0x60, 0x5e, 0xbd, 0x5e, 0x71, 0xc4, 0xbd, + 0xc2, 0x7f, 0x9a, 0xbd, 0xb9, 0xa9, 0xa8, 0xbd, 0xe2, 0xaf, 0x19, 0xbd, 0xb3, 0xbe, 0xf3, 0xbc, + 0x66, 0x62, 0xd4, 0xbd, 0x04, 0x7c, 0x86, 0xbd, 0xa1, 0xe0, 0x89, 0xbd, 0xc0, 0xac, 0x15, 0xbd, + 0x13, 0xc4, 0xdb, 0xbd, 0xef, 0x17, 0xba, 0xbd, 0xec, 0x99, 0x49, 0xbd, 0xba, 0x20, 0xef, 0xbc, + 0x37, 0xaf, 0x87, 0xbd, 0x29, 0xcc, 0xd7, 0xbd, 0xa6, 0xec, 0x66, 0xbd, 0x19, 0x1e, 0xa9, 0xbd, + 0xef, 0x43, 0xa4, 0xbd, 0x21, 0x46, 0xc1, 0xbd, 0x83, 0xf2, 0xb4, 0xbd, 0xd9, 0x66, 0x90, 0xbd, + 0xda, 0xcf, 0x80, 0xbd, 0x4a, 0x1b, 0x9a, 0xbd, 0x5c, 0xbf, 0x8e, 0xbd, 0xe5, 0x3b, 0x81, 0xbd, + 0x5a, 0x30, 0xdf, 0xbd, 0x40, 0xb5, 0xb1, 0xbd, 0xf4, 0xd3, 0x6b, 0xbd, 0xb8, 0xfd, 0xb4, 0xbd, + 0xd0, 0xfd, 0xbf, 0xbd, 0x0e, 0xdc, 0x61, 0xbd, 0xb0, 0xc1, 0x7e, 0xbd, 0x80, 0x12, 0xab, 0xbd, + 0x90, 0x48, 0x4f, 0x3d, 0xbe, 0x26, 0x54, 0x3d, 0x60, 0x3e, 0x23, 0x3d, 0x1a, 0xe2, 0x42, 0x3d, + 0x74, 0x6f, 0x4b, 0x3d, 0xc6, 0x08, 0x15, 0x3d, 0xd3, 0x64, 0x35, 0x3d, 0x04, 0x98, 0x48, 0x3d, + 0x46, 0xf8, 0x5a, 0x3d, 0x37, 0xe4, 0x8e, 0x3d, 0xab, 0xd3, 0x58, 0x3d, 0x35, 0xab, 0x0b, 0x3d, + 0xd0, 0xa0, 0x6e, 0x3d, 0x2a, 0xce, 0x4f, 0x3d, 0xa9, 0xdd, 0x3b, 0x3d, 0xcd, 0x0c, 0x8a, 0x3d, + 0x12, 0xac, 0x30, 0x3d, 0x21, 0xa2, 0x57, 0x3d, 0x17, 0xc9, 0x5f, 0x3d, 0x63, 0xb7, 0x75, 0x3d, + 0xea, 0xc1, 0x72, 0x3d, 0x05, 0x45, 0x19, 0x3d, 0x40, 0x9e, 0x83, 0x3d, 0x24, 0xa5, 0x35, 0x3d, + 0x1d, 0x88, 0x8b, 0x3d, 0xce, 0xec, 0x4d, 0x3d, 0xe3, 0x7c, 0x70, 0x3d, 0x63, 0xd4, 0x71, 0x3d, + 0x3c, 0xbc, 0x74, 0x3d, 0x62, 0x68, 0x4b, 0x3d, 0x68, 0xa9, 0x68, 0x3d, 0x8a, 0xed, 0x59, 0x3d, + 0xb2, 0x50, 0x95, 0x3c, 0xe8, 0x6c, 0x3b, 0x3c, 0x34, 0x70, 0x16, 0x3b, 0xa0, 0x48, 0xe0, 0x3b, + 0xc4, 0x7c, 0x85, 0x3c, 0x0e, 0x60, 0x8b, 0x3c, 0xf2, 0xa0, 0xe1, 0x3b, 0x42, 0x88, 0xd5, 0x3b, + 0xbe, 0x47, 0xfc, 0x3c, 0x46, 0x4b, 0x45, 0x3c, 0xe0, 0x1a, 0x95, 0x3c, 0xe8, 0x5d, 0x99, 0xbb, + 0x9c, 0x0f, 0xc3, 0x3b, 0xf6, 0x04, 0x10, 0x3d, 0xae, 0xbe, 0xd8, 0x3b, 0x4f, 0xff, 0xac, 0x3c, + 0x50, 0xe0, 0xb0, 0x3c, 0x1c, 0x69, 0xe8, 0x3c, 0x28, 0xc3, 0x20, 0x3c, 0x34, 0x92, 0xd1, 0x3b, + 0xb6, 0x38, 0x81, 0x3c, 0x3e, 0x76, 0x8a, 0x3c, 0x07, 0xb3, 0x10, 0x3c, 0x1f, 0x8b, 0x4d, 0x3c, + 0x6c, 0x2f, 0x62, 0x3c, 0x28, 0x90, 0xb7, 0x3c, 0xdc, 0x9f, 0x08, 0x3d, 0xe5, 0xaf, 0x8c, 0x3c, + 0xfc, 0x4a, 0xed, 0x3c, 0x45, 0xd4, 0x29, 0x3c, 0x58, 0xbb, 0xdf, 0x3c, 0x5e, 0x2b, 0x7d, 0x3c, + 0x25, 0x34, 0xc4, 0xbb, 0xdc, 0x5e, 0xb7, 0xbb, 0x8d, 0xab, 0x9e, 0xbb, 0x20, 0x93, 0xd4, 0xbb, + 0x5b, 0x8c, 0x9d, 0xbb, 0xbb, 0xfd, 0x86, 0xbb, 0x5f, 0x6b, 0x9b, 0xbb, 0xd5, 0x79, 0xd3, 0xbb, + 0xe8, 0x02, 0xc4, 0xbb, 0xa4, 0xb5, 0x03, 0xbc, 0xbb, 0x40, 0xec, 0xbb, 0xec, 0x0c, 0x80, 0xbb, + 0xe8, 0xbd, 0xdf, 0xbb, 0xd6, 0xe1, 0xbf, 0xbb, 0x4a, 0xb2, 0xae, 0xbb, 0x99, 0xdc, 0x09, 0xbc, + 0x0d, 0xc6, 0x9f, 0xbb, 0xa5, 0x29, 0xca, 0xbb, 0x15, 0x1d, 0xbc, 0xbb, 0xed, 0x52, 0xe4, 0xbb, + 0x1f, 0x74, 0xf8, 0xbb, 0xfc, 0x6c, 0x81, 0xbb, 0xb4, 0x38, 0xfe, 0xbb, 0x8a, 0x9a, 0xa9, 0xbb, + 0xe6, 0x75, 0xee, 0xbb, 0x88, 0x0b, 0xbd, 0xbb, 0x82, 0x2c, 0x0c, 0xbc, 0xed, 0xcd, 0xdd, 0xbb, + 0xd9, 0xbe, 0xee, 0xbb, 0xcd, 0xd7, 0xc8, 0xbb, 0xb0, 0x00, 0x00, 0xbc, 0x85, 0x97, 0xc3, 0xbb, + 0xbe, 0x34, 0x34, 0xbb, 0xcc, 0x89, 0xcc, 0xba, 0x74, 0x63, 0x8e, 0xba, 0x69, 0x31, 0x1e, 0xbb, + 0x96, 0xc7, 0xc0, 0xba, 0x11, 0x07, 0x0f, 0xbb, 0x04, 0x78, 0x83, 0xba, 0x4a, 0xb5, 0x10, 0xbb, + 0xc3, 0x7e, 0x73, 0xbb, 0xc6, 0x80, 0x14, 0xbb, 0xe4, 0xdd, 0x74, 0xbb, 0x40, 0x91, 0xfa, 0xb7, + 0xab, 0x61, 0xd1, 0xba, 0x79, 0xda, 0x8c, 0xbb, 0x1d, 0x50, 0xb8, 0xba, 0xad, 0x15, 0x78, 0xbb, + 0x71, 0x5f, 0x32, 0xbb, 0x92, 0x3d, 0x75, 0xbb, 0x60, 0xf3, 0xa4, 0xba, 0xf0, 0xd4, 0xd2, 0xba, + 0x58, 0xdf, 0x53, 0xbb, 0x78, 0xc7, 0xf8, 0xba, 0x07, 0x25, 0x11, 0xbb, 0xe7, 0xca, 0x02, 0xbb, + 0x96, 0xc9, 0xf1, 0xba, 0xe6, 0x75, 0x43, 0xbb, 0xcc, 0x41, 0xc5, 0xbb, 0x26, 0x3b, 0x29, 0xbb, + 0xda, 0xdf, 0x89, 0xbb, 0x4c, 0x2a, 0x10, 0xbb, 0x62, 0x5e, 0x9f, 0xbb, 0x52, 0xc4, 0x0f, 0xbb, + 0x8a, 0xd6, 0x3f, 0xbb, 0x84, 0xdf, 0x4f, 0xbb, 0x86, 0x0c, 0x09, 0xbb, 0x79, 0xd2, 0x0f, 0xbb, + 0xf2, 0xfb, 0x5d, 0xbb, 0x44, 0x35, 0x13, 0xbb, 0xf1, 0xba, 0x30, 0xbb, 0x68, 0xee, 0x1a, 0xbb, + 0xc4, 0x5c, 0x5f, 0xbb, 0x6e, 0x6d, 0x82, 0xbb, 0x28, 0xf2, 0x28, 0xbb, 0xce, 0x09, 0xef, 0xba, + 0x59, 0xaa, 0x52, 0xbb, 0xa9, 0xf8, 0x52, 0xbb, 0x78, 0xe1, 0x28, 0xbb, 0x6a, 0x8e, 0x6e, 0xbb, + 0x82, 0xec, 0x2f, 0xbb, 0xe8, 0x37, 0x51, 0xbb, 0x08, 0xbd, 0x5e, 0xbb, 0x6e, 0x34, 0x5b, 0xbb, + 0x9a, 0x6e, 0x49, 0xbb, 0x46, 0x4e, 0x20, 0xbb, 0xbd, 0xd6, 0x62, 0xbb, 0x78, 0xa2, 0x27, 0xbb, + 0xd8, 0x94, 0x89, 0xbb, 0xef, 0xf9, 0x47, 0xbb, 0xf3, 0xda, 0x33, 0xbb, 0xc2, 0x8e, 0x63, 0xbb, + 0xe1, 0x81, 0x61, 0xbb, 0x4c, 0xc8, 0x2d, 0xbb, 0xb1, 0x28, 0x39, 0xbb, 0x3a, 0x56, 0x51, 0xbb, + 0x89, 0x79, 0xab, 0xba, 0x37, 0x46, 0x97, 0xba, 0x60, 0x9d, 0x5f, 0xb9, 0xd4, 0x80, 0x25, 0xb9, + 0xf8, 0x42, 0xe1, 0xba, 0x24, 0x76, 0xa7, 0xba, 0x4d, 0xb6, 0x58, 0xba, 0x74, 0xf3, 0x7c, 0xb9, + 0x3e, 0x19, 0x15, 0xbb, 0xf0, 0x5f, 0x8f, 0xba, 0x81, 0x81, 0x5c, 0xba, 0x84, 0xae, 0xf1, 0x38, + 0x6c, 0x13, 0x1d, 0xba, 0x4d, 0xf1, 0x1f, 0xbb, 0xd6, 0x0c, 0x21, 0xba, 0x1d, 0xc0, 0xad, 0xba, + 0x5c, 0xac, 0xd2, 0xba, 0x0c, 0xad, 0x01, 0xbb, 0x98, 0xb9, 0x97, 0xba, 0xf6, 0x5a, 0x2e, 0xba, + 0x9a, 0xa8, 0x6b, 0xba, 0x50, 0x26, 0xba, 0xba, 0x2f, 0xc2, 0x34, 0xba, 0x8c, 0xb5, 0x7d, 0xba, + 0x61, 0x4c, 0xc2, 0xba, 0xb4, 0x52, 0xd9, 0xba, 0x73, 0x87, 0xc1, 0xba, 0xd0, 0xbc, 0xb4, 0xba, + 0xc6, 0x45, 0xf9, 0xba, 0x94, 0x16, 0x34, 0xba, 0x6a, 0x0b, 0xb1, 0xba, 0xba, 0x41, 0xab, 0xba, + 0x44, 0xaf, 0xa2, 0xbd, 0xfb, 0x70, 0x75, 0xbd, 0x3c, 0xc4, 0x6a, 0xbd, 0xe6, 0xd8, 0xbd, 0xbd, + 0x58, 0x22, 0x37, 0xbd, 0xc5, 0x27, 0x5f, 0xbd, 0xdd, 0xd6, 0x43, 0xbd, 0xce, 0xdb, 0xb4, 0xbd, + 0x96, 0x7b, 0xa8, 0xbd, 0xb8, 0xe6, 0xbe, 0xbd, 0x47, 0x00, 0xe3, 0xbd, 0x71, 0x8e, 0x17, 0xbd, + 0xe5, 0x33, 0xa0, 0xbd, 0x5c, 0x01, 0xb4, 0xbd, 0xb9, 0xbb, 0x7c, 0xbd, 0x50, 0xce, 0xee, 0xbd, + 0xd3, 0xfb, 0x85, 0xbd, 0x64, 0x0b, 0xb4, 0xbd, 0x34, 0x0d, 0x68, 0xbd, 0x06, 0x80, 0xa1, 0xbd, + 0x34, 0xfa, 0xd8, 0xbd, 0xbd, 0x69, 0x40, 0xbd, 0x0c, 0x17, 0xc2, 0xbd, 0x04, 0xed, 0x84, 0xbd, + 0x3e, 0x5f, 0x9a, 0xbd, 0x3e, 0x36, 0x9d, 0xbd, 0x2e, 0x2b, 0x19, 0xbe, 0x28, 0x34, 0xaa, 0xbd, + 0x21, 0xd0, 0xd8, 0xbd, 0x19, 0xab, 0xa3, 0xbd, 0x8f, 0x1b, 0x02, 0xbe, 0xee, 0x4c, 0x91, 0xbd, + 0x98, 0x13, 0x77, 0xbb, 0x6b, 0xbf, 0x87, 0xbc, 0x22, 0xee, 0x3d, 0xbc, 0x07, 0x8b, 0xa1, 0xbc, + 0xec, 0xe3, 0x8a, 0xbd, 0x85, 0x7b, 0x15, 0xbd, 0x98, 0x17, 0xbc, 0xbc, 0x98, 0xf1, 0x88, 0x3c, + 0x14, 0x13, 0xea, 0xbc, 0xf3, 0xa6, 0x48, 0xbd, 0xb4, 0xb8, 0x9a, 0xbc, 0x9a, 0x6f, 0x9f, 0xbb, + 0x4f, 0xc8, 0xb1, 0xbc, 0x3f, 0x67, 0x0d, 0xbd, 0x11, 0x93, 0x41, 0xbd, 0x89, 0x60, 0x39, 0xbd, + 0x9b, 0xfc, 0x93, 0xbc, 0xca, 0x0d, 0x0f, 0xbd, 0x8a, 0x6f, 0x17, 0xbd, 0xb4, 0x33, 0x0c, 0xbd, + 0x55, 0x6b, 0x2b, 0xbd, 0xd4, 0xd5, 0x98, 0xbd, 0x16, 0x3c, 0xbf, 0xbc, 0x6e, 0x4c, 0x13, 0xbc, + 0x7f, 0x0b, 0x10, 0xbd, 0x0f, 0x3b, 0xb1, 0xbc, 0x37, 0xf8, 0x1f, 0xbb, 0xd0, 0x41, 0x88, 0xbd, + 0xc5, 0x56, 0x60, 0xbd, 0x82, 0x15, 0x2f, 0xbd, 0x60, 0x62, 0xbe, 0xbc, 0x76, 0x32, 0x6c, 0xbd, + 0x7c, 0x23, 0x6f, 0x3c, 0x3a, 0x6b, 0x85, 0x3c, 0xa4, 0x33, 0xa8, 0x3c, 0x82, 0x0c, 0xaa, 0x3c, + 0xfb, 0x49, 0xf4, 0x3c, 0x76, 0xa5, 0x67, 0x3c, 0x96, 0x09, 0x00, 0x3c, 0xf1, 0x6a, 0x82, 0x3c, + 0x71, 0x4d, 0x34, 0x3c, 0xcb, 0x89, 0x06, 0x3d, 0xf5, 0x20, 0xbd, 0x3c, 0x8e, 0xa9, 0x8c, 0x3b, + 0xd6, 0xc9, 0xaa, 0x3c, 0x63, 0xba, 0x42, 0x3c, 0x35, 0x99, 0xb2, 0x3c, 0x86, 0x62, 0xed, 0x3c, + 0x6f, 0xa9, 0x8e, 0x3c, 0x14, 0x81, 0x83, 0x3c, 0xca, 0xee, 0xca, 0x3c, 0x22, 0x35, 0x1e, 0x3d, + 0x52, 0xf0, 0xce, 0x3c, 0x69, 0x4e, 0xc9, 0x3c, 0xd5, 0x61, 0x05, 0x3d, 0x6c, 0xa2, 0xd4, 0x3a, + 0x2c, 0xf4, 0x08, 0x3d, 0x06, 0x9f, 0xe1, 0x3c, 0xaf, 0xc3, 0x89, 0x3c, 0xd6, 0xda, 0xc3, 0x3c, + 0x4e, 0x80, 0x0d, 0x3d, 0x0e, 0x1b, 0x05, 0x3d, 0x92, 0xdb, 0x0b, 0x3d, 0x17, 0xa2, 0x1a, 0x3d, + 0xca, 0x9e, 0x99, 0x3b, 0x6d, 0xb9, 0xc2, 0xbb, 0x90, 0x0b, 0x18, 0x3b, 0x5c, 0x90, 0x30, 0x3c, + 0xf2, 0x54, 0x4c, 0x3c, 0x69, 0x9d, 0xc5, 0x3b, 0xfd, 0xc2, 0xef, 0xbb, 0x10, 0xd9, 0xa0, 0xbb, + 0x3b, 0x79, 0xc3, 0x3c, 0x90, 0x61, 0x9e, 0x3b, 0x0f, 0xdb, 0x3c, 0x3c, 0xff, 0x71, 0x06, 0xbc, + 0x20, 0xa2, 0x2e, 0xb9, 0xf6, 0xef, 0xa3, 0x3c, 0xc8, 0x9c, 0x6a, 0x3c, 0x24, 0x66, 0xb5, 0x3b, + 0x45, 0x03, 0x2d, 0x3c, 0x5e, 0x48, 0x05, 0x3c, 0x5a, 0xb9, 0x2c, 0x3c, 0xda, 0xbe, 0x91, 0x3c, + 0x68, 0xc6, 0x81, 0x3c, 0x9f, 0x27, 0x37, 0x3c, 0x12, 0xf1, 0x86, 0x3b, 0x06, 0xef, 0xb7, 0x3a, + 0xfc, 0xba, 0x63, 0xbb, 0x40, 0x37, 0x0c, 0xb9, 0xb9, 0x28, 0x35, 0x3c, 0x31, 0x7a, 0x46, 0x3c, + 0xe6, 0xc5, 0x88, 0x3c, 0x75, 0xfa, 0xc0, 0x3c, 0xb7, 0x07, 0x7b, 0x3c, 0x09, 0x48, 0x07, 0x3c, + 0xfc, 0x9b, 0x16, 0xbb, 0x92, 0xdc, 0xd0, 0xba, 0xcc, 0x1e, 0x38, 0xbb, 0x13, 0x4b, 0x44, 0xbb, + 0x0d, 0xdd, 0x39, 0xbb, 0xf3, 0x0c, 0x9e, 0xba, 0x90, 0x0e, 0x2c, 0xb8, 0x50, 0xef, 0x30, 0xbb, + 0xf2, 0xe6, 0xf7, 0xba, 0x28, 0xb7, 0x67, 0xbb, 0x71, 0x77, 0x5f, 0xbb, 0x50, 0xc7, 0x95, 0xb8, + 0xe2, 0xfd, 0x1c, 0xbb, 0x4d, 0xfe, 0xd5, 0xba, 0x3c, 0x00, 0x1e, 0xbb, 0x4a, 0x22, 0x4c, 0xbb, + 0x6a, 0x55, 0x26, 0xbb, 0x17, 0x55, 0xd8, 0xba, 0x66, 0x4c, 0x45, 0xbb, 0x68, 0x9c, 0xb5, 0xbb, + 0x1a, 0xd0, 0x50, 0xbb, 0x34, 0x59, 0xe6, 0xba, 0xc6, 0x4e, 0x8d, 0xbb, 0x70, 0xf7, 0x67, 0xb7, + 0x88, 0x7a, 0x70, 0xbb, 0xb5, 0x0b, 0x5f, 0xbb, 0x7e, 0x37, 0x44, 0xbb, 0x5f, 0x61, 0x01, 0xbb, + 0x12, 0x55, 0x89, 0xbb, 0x28, 0xa2, 0x97, 0xbb, 0x35, 0x8c, 0xa7, 0xbb, 0x1f, 0x1a, 0x88, 0xbb, + 0x19, 0x5b, 0xab, 0xba, 0x22, 0xff, 0x24, 0x3a, 0xe2, 0x86, 0x81, 0xba, 0x36, 0xe1, 0x02, 0xbb, + 0x7e, 0x54, 0x90, 0xba, 0x2b, 0x1e, 0xcf, 0xb9, 0x36, 0xf5, 0xc0, 0x3a, 0x66, 0xb9, 0x21, 0xba, + 0x7b, 0x35, 0x50, 0xbb, 0x96, 0x85, 0x40, 0xba, 0xb3, 0xb4, 0x13, 0xbb, 0xc3, 0x75, 0x9a, 0x3a, + 0xfc, 0x27, 0x9a, 0xb9, 0x5e, 0x41, 0x20, 0xbb, 0x2a, 0xef, 0xd9, 0xba, 0x01, 0x06, 0x4a, 0xba, + 0x8d, 0xd1, 0xf2, 0xba, 0x87, 0x1a, 0x61, 0xba, 0x6a, 0x15, 0xd0, 0xba, 0xaf, 0xaf, 0x62, 0xbb, + 0xf9, 0x14, 0x13, 0xbb, 0xef, 0x20, 0xdb, 0xb9, 0x75, 0x62, 0xc0, 0xba, 0x40, 0x67, 0x07, 0x37, + 0xc0, 0xd1, 0xb5, 0x37, 0x94, 0xb0, 0x26, 0xba, 0x09, 0x78, 0x1e, 0xbb, 0x86, 0x59, 0x50, 0xba, + 0x61, 0xae, 0x1d, 0xbb, 0x31, 0xae, 0x74, 0xbb, 0x30, 0xbc, 0x53, 0xbb, 0xa0, 0xce, 0x9d, 0xba, + 0xc9, 0x97, 0x10, 0xba, 0x4e, 0xf9, 0x7b, 0xba, 0x4b, 0xe3, 0x74, 0xba, 0xf4, 0xe6, 0x7e, 0xba, + 0x1b, 0x25, 0x09, 0xbb, 0x50, 0x56, 0x8b, 0xba, 0x4d, 0x1d, 0x49, 0xba, 0x4d, 0x19, 0xc7, 0xb9, + 0xf3, 0xd4, 0x1a, 0xba, 0x4a, 0x45, 0x02, 0xbb, 0xca, 0xd9, 0x87, 0xba, 0x18, 0xb1, 0xb4, 0xb9, + 0x92, 0x27, 0x96, 0xba, 0x94, 0x17, 0x4a, 0xba, 0x05, 0xf0, 0xba, 0xba, 0x8f, 0x3a, 0xe8, 0xba, + 0x98, 0x84, 0x57, 0xba, 0xa2, 0xde, 0x8d, 0xba, 0xc3, 0x40, 0xb9, 0xba, 0x6e, 0x79, 0xeb, 0xba, + 0xc0, 0xa1, 0xbd, 0xba, 0x0d, 0xbf, 0x04, 0xbb, 0xb3, 0x4e, 0xcc, 0xba, 0xf4, 0x83, 0x4a, 0xb9, + 0x9d, 0xdc, 0xf6, 0xba, 0x73, 0xda, 0xb6, 0xba, 0x06, 0xc0, 0x0b, 0xba, 0x8d, 0x01, 0xf3, 0xba, + 0x10, 0x0c, 0x03, 0xbb, 0x61, 0x82, 0xd6, 0xba, 0xd1, 0x7e, 0xc1, 0xba, 0x72, 0x00, 0x15, 0xbb, + 0xa0, 0xae, 0x6b, 0xb8, 0x51, 0x8b, 0x1d, 0x39, 0x98, 0x92, 0xc5, 0xb7, 0x75, 0x26, 0xf8, 0xb9, + 0x68, 0x97, 0xa0, 0xba, 0x38, 0x67, 0x2c, 0xba, 0xa8, 0xe8, 0x31, 0xb7, 0x48, 0x75, 0x2d, 0x3a, + 0x7c, 0xac, 0xa1, 0xba, 0xa6, 0xe9, 0x19, 0xba, 0x31, 0x5c, 0xf0, 0xb9, 0x58, 0xf3, 0x92, 0x39, + 0x02, 0xeb, 0xc6, 0xb8, 0x01, 0x4e, 0x9a, 0xba, 0x5d, 0xe7, 0x89, 0xba, 0x8b, 0x33, 0x1d, 0xba, + 0xa0, 0x56, 0xfb, 0xb9, 0x6f, 0xf5, 0x33, 0xba, 0x24, 0xfe, 0x37, 0xba, 0x9a, 0xe0, 0x45, 0xba, + 0x40, 0xcd, 0x7f, 0xba, 0x9f, 0xb5, 0xb1, 0xba, 0x14, 0x13, 0x0f, 0xb9, 0xfe, 0x08, 0x3f, 0xb9, + 0x1c, 0xce, 0x1e, 0xb8, 0x90, 0x71, 0x3d, 0xb7, 0x38, 0x82, 0x80, 0xb9, 0x8f, 0xb6, 0xa5, 0xba, + 0x5e, 0x1c, 0x91, 0xba, 0x42, 0xec, 0x9b, 0xba, 0x2c, 0x45, 0x0c, 0xba, 0xea, 0x67, 0x51, 0xba, + 0x65, 0x47, 0x22, 0xbd, 0x27, 0xa6, 0xdb, 0xbb, 0x30, 0x20, 0x23, 0xbd, 0xc2, 0xd9, 0x51, 0xbd, + 0x56, 0xf6, 0xdf, 0xbc, 0x4b, 0x97, 0x11, 0xbc, 0x7e, 0xea, 0xb2, 0x3c, 0x0e, 0xf4, 0x29, 0xbd, + 0x68, 0x4a, 0x4c, 0xbd, 0x86, 0x9a, 0x12, 0xbd, 0xf2, 0xa3, 0x71, 0xbd, 0x89, 0xf9, 0x5f, 0x3c, + 0xf8, 0x6b, 0xcb, 0xbc, 0x8d, 0x8f, 0x18, 0xbd, 0x7b, 0x51, 0x0b, 0xbd, 0x08, 0xb3, 0x04, 0xbd, + 0x26, 0xd2, 0x37, 0xbd, 0x42, 0xb8, 0x9f, 0xbc, 0x5c, 0x05, 0x2e, 0xbd, 0x46, 0xdd, 0xbd, 0xbd, + 0x3c, 0x62, 0x4f, 0xbd, 0xb7, 0x96, 0xbe, 0xbb, 0xe0, 0x71, 0x72, 0xbd, 0x18, 0x0e, 0x0f, 0x3b, + 0x99, 0x13, 0x04, 0xbd, 0xe3, 0xf9, 0x23, 0xbd, 0x89, 0xe3, 0x76, 0xbd, 0x2c, 0xdd, 0x6d, 0xbc, + 0x8c, 0xe8, 0x77, 0xbd, 0x91, 0xc0, 0xaa, 0xbd, 0x98, 0x58, 0xb4, 0xbd, 0xc2, 0xd2, 0x3b, 0xbd, + 0x5a, 0x21, 0xe0, 0xbf, 0xf4, 0x00, 0x45, 0xc0, 0x4f, 0xe2, 0x2f, 0xc0, 0x39, 0x98, 0x5f, 0xc0, + 0xe1, 0xeb, 0x0c, 0xc0, 0x75, 0xb9, 0x21, 0xc0, 0xf2, 0x7a, 0x02, 0xc0, 0xfe, 0xc7, 0x30, 0xc0, + 0xda, 0x1a, 0x02, 0xc0, 0x09, 0x4a, 0x9e, 0xbf, 0xde, 0xcc, 0x36, 0xc0, 0x74, 0x06, 0xd1, 0xbf, + 0xb7, 0x7d, 0xf0, 0xbe, 0xa4, 0x6b, 0xa7, 0xbf, 0x59, 0x71, 0x5d, 0xc0, 0x22, 0xf9, 0xc9, 0xbf, + 0xa8, 0x34, 0x52, 0xbf, 0xbf, 0xd4, 0x17, 0xc0, 0x9c, 0x95, 0x55, 0xc0, 0x56, 0x74, 0x08, 0xc0, + 0x94, 0x7c, 0x0e, 0xc0, 0x57, 0x62, 0xcd, 0xbf, 0xb8, 0xdc, 0x49, 0xc0, 0xb1, 0xba, 0x85, 0xbf, + 0x8f, 0x84, 0x28, 0xbf, 0x26, 0x92, 0x9b, 0xc0, 0x0c, 0x12, 0xeb, 0xbe, 0xf0, 0x02, 0x0b, 0xc0, + 0xc4, 0x1b, 0xeb, 0xbf, 0xdb, 0xe7, 0x28, 0xbf, 0xff, 0x18, 0x51, 0xbf, 0x63, 0xae, 0x29, 0xc0, + 0x68, 0xe4, 0x72, 0x3f, 0xd6, 0x97, 0xae, 0x3f, 0xc9, 0x39, 0xd2, 0x3f, 0x6a, 0x40, 0xd3, 0x3f, + 0x47, 0x7b, 0xb8, 0x3f, 0x32, 0xf9, 0xe6, 0x3f, 0xde, 0xbb, 0xa3, 0x3f, 0x0a, 0x4f, 0xd7, 0x3f, + 0x0e, 0xb7, 0xa9, 0x3f, 0xdc, 0x20, 0xc0, 0x3f, 0x0a, 0x8b, 0xd9, 0x3f, 0xa4, 0x5a, 0xb9, 0x3f, + 0xfc, 0xcf, 0x90, 0x3f, 0x1a, 0xee, 0xb9, 0x3f, 0x21, 0xd3, 0x96, 0x3f, 0xca, 0x2b, 0x8d, 0x3f, + 0x0c, 0x63, 0xa3, 0x3f, 0x55, 0xd2, 0x8d, 0x3f, 0x44, 0x04, 0xd1, 0x3f, 0xd2, 0x49, 0x67, 0x3f, + 0x3d, 0x83, 0xe1, 0x3f, 0x0a, 0x7a, 0x7a, 0x3f, 0x60, 0xe9, 0xb3, 0x3f, 0xc1, 0x62, 0x4c, 0x3f, + 0xf7, 0x29, 0x14, 0x3f, 0x98, 0x48, 0xbc, 0x3f, 0x76, 0x36, 0x16, 0x3f, 0x8e, 0x23, 0xf8, 0x3f, + 0x1e, 0x83, 0x06, 0x40, 0x82, 0x54, 0xfd, 0x3e, 0x8e, 0x2c, 0x41, 0x3f, 0xda, 0xaa, 0xa6, 0x3f, + 0x84, 0x46, 0x67, 0x3f, 0xa8, 0x44, 0x19, 0x3f, 0x7f, 0x4f, 0xd1, 0x3e, 0xd5, 0xe8, 0xbd, 0x3e, + 0xff, 0x25, 0xac, 0x3e, 0xba, 0x9e, 0x05, 0x3f, 0xba, 0xa3, 0xc9, 0x3e, 0xa3, 0x9b, 0xf9, 0x3e, + 0x86, 0xc7, 0x31, 0x3f, 0xba, 0xe0, 0xe4, 0x3e, 0x34, 0x38, 0xdf, 0x3d, 0x34, 0x5b, 0xda, 0xbd, + 0xd6, 0x88, 0x01, 0xbe, 0x62, 0xef, 0x33, 0x3e, 0x1a, 0x4b, 0x18, 0x3f, 0x8a, 0x83, 0x7f, 0x3e, + 0x60, 0xb8, 0xae, 0x3c, 0x8e, 0x52, 0xc9, 0x3e, 0x84, 0xb2, 0xb6, 0x3e, 0x80, 0x10, 0xdf, 0x3e, + 0xd6, 0xec, 0xc8, 0x3e, 0x10, 0x9d, 0x8f, 0x3e, 0x3a, 0xf6, 0x2a, 0x3f, 0x8b, 0x02, 0xb2, 0x3e, + 0xbd, 0x45, 0xa1, 0xbe, 0x94, 0x88, 0xd0, 0x3f, 0x74, 0xd7, 0x7f, 0x3f, 0xca, 0x30, 0x37, 0x3f, + 0x1b, 0x43, 0xe6, 0x3e, 0xd6, 0x23, 0x3b, 0x3f, 0x80, 0xc4, 0xf4, 0x3c, 0x8c, 0x06, 0x5b, 0x3f, + 0xeb, 0x38, 0x04, 0xbe, 0xc2, 0x18, 0x07, 0xbe, 0xaa, 0x65, 0x33, 0xbe, 0x10, 0x3e, 0x19, 0xbe, + 0x44, 0x6f, 0x23, 0xbe, 0x9a, 0xa3, 0x59, 0xbe, 0x1c, 0x0e, 0x13, 0xbe, 0x47, 0x0c, 0x3d, 0xbe, + 0x1d, 0xd3, 0x29, 0xbe, 0x3a, 0x3b, 0x53, 0xbe, 0x86, 0x23, 0x29, 0xbe, 0xd6, 0x39, 0x21, 0xbe, + 0xf5, 0x43, 0x17, 0xbe, 0xa6, 0x77, 0x3b, 0xbe, 0xec, 0xb0, 0xba, 0xbd, 0x4a, 0x52, 0x00, 0xbe, + 0xfe, 0x20, 0x29, 0xbe, 0xeb, 0xae, 0xda, 0xbd, 0x94, 0x2d, 0x1b, 0xbe, 0x86, 0x9b, 0xb0, 0xbd, + 0xfe, 0xb6, 0x56, 0xbe, 0xae, 0xc1, 0xdb, 0xbd, 0x2f, 0x6d, 0x0e, 0xbe, 0xe9, 0x8a, 0xd1, 0xbd, + 0x52, 0x35, 0x49, 0xbd, 0x52, 0x69, 0x0f, 0xbe, 0xcb, 0x3f, 0xfb, 0xbd, 0xf6, 0x21, 0x82, 0xbe, + 0x76, 0x94, 0x8d, 0xbe, 0x03, 0xd0, 0xb5, 0xbd, 0xec, 0x1b, 0xb3, 0xbd, 0x31, 0x4f, 0x19, 0xbe, + 0xba, 0x26, 0xff, 0xbd, 0x03, 0xeb, 0x62, 0xbd, 0x88, 0x50, 0x54, 0xbd, 0xc2, 0xc8, 0xb1, 0xbc, + 0xc4, 0x1d, 0x49, 0xbd, 0xdf, 0x9d, 0xac, 0xbd, 0x70, 0x95, 0x61, 0xbd, 0xf4, 0x71, 0x85, 0xbd, + 0x29, 0x54, 0xd2, 0xbd, 0xc6, 0x9b, 0xce, 0xbd, 0xd0, 0x7c, 0xc8, 0xbb, 0x20, 0x97, 0x01, 0xbb, + 0x48, 0xb2, 0xb3, 0xbc, 0x0a, 0xde, 0x62, 0xbd, 0x95, 0x69, 0x06, 0xbd, 0x73, 0xbe, 0x23, 0xbd, + 0xd4, 0x69, 0x22, 0xbd, 0x6a, 0x98, 0x10, 0xbd, 0x90, 0x08, 0xc4, 0xbc, 0xf0, 0x9a, 0x21, 0xbd, + 0xbd, 0xfa, 0x94, 0xbd, 0x26, 0xa4, 0x19, 0xbd, 0x8a, 0xc3, 0x85, 0xbd, 0xd9, 0x79, 0x6a, 0xbd, + 0x38, 0xdf, 0x24, 0x3d, 0x82, 0x9c, 0x1f, 0xbe, 0x4b, 0xe0, 0x27, 0xbe, 0xcc, 0x07, 0x07, 0xbe, + 0x4f, 0xfc, 0xe3, 0xbd, 0x47, 0x31, 0xe6, 0xbd, 0x20, 0x83, 0x75, 0xbc, 0xdc, 0x2b, 0xd7, 0xbd, + 0x31, 0x04, 0x5b, 0xbd, 0x69, 0x7f, 0xc2, 0xbd, 0x88, 0x79, 0xd1, 0xbd, 0x88, 0x81, 0xec, 0xbd, + 0x56, 0x3d, 0xb1, 0xbd, 0xa0, 0x79, 0xd3, 0xbd, 0x71, 0xbf, 0x9d, 0xbd, 0xf8, 0xfc, 0xd2, 0xbd, + 0xbc, 0x70, 0x99, 0xbd, 0x28, 0x0b, 0x92, 0xbd, 0xa7, 0x3a, 0xe1, 0xbd, 0x95, 0xae, 0xa9, 0xbd, + 0x2f, 0x51, 0x54, 0xbd, 0x83, 0xb4, 0x97, 0xbd, 0x4a, 0x5e, 0xc1, 0xbd, 0x9f, 0x2c, 0x84, 0xbd, + 0xf2, 0x06, 0x7b, 0xbd, 0xdf, 0x00, 0x9c, 0xbd, 0xd3, 0x2f, 0xe6, 0xbd, 0x4d, 0x02, 0x83, 0xbd, + 0x13, 0x41, 0xc9, 0xbd, 0x7f, 0x76, 0x75, 0xbd, 0xb9, 0x82, 0xc6, 0xbd, 0x1a, 0x27, 0x30, 0xbd, + 0xb4, 0xf6, 0x15, 0xbd, 0xaa, 0x34, 0xed, 0xbd, 0xa6, 0x9a, 0x8c, 0xbc, 0x27, 0xb4, 0xcc, 0xbd, + 0xdc, 0x98, 0xd4, 0xbd, 0xa0, 0x39, 0xa7, 0xbc, 0x4e, 0x22, 0x2a, 0xbd, 0x32, 0x98, 0xa8, 0xbd, + 0x15, 0xb9, 0x51, 0xbd, 0xcf, 0x42, 0x68, 0xbd, 0xff, 0x4f, 0x26, 0xbd, 0x1f, 0xf9, 0x52, 0xbd, + 0x07, 0x2b, 0x00, 0xbd, 0xe9, 0x49, 0x20, 0xbd, 0x62, 0x2d, 0x06, 0xbd, 0x55, 0x53, 0x31, 0xbd, + 0x67, 0x8f, 0x31, 0xbd, 0x41, 0x77, 0x98, 0xbc, 0x1d, 0x6c, 0xf9, 0xbc, 0xae, 0xb1, 0xa7, 0xbb, + 0xa3, 0x28, 0x35, 0x3c, 0xf9, 0xa2, 0x27, 0xbc, 0x39, 0xa0, 0x85, 0xbd, 0xc6, 0x27, 0xb3, 0xbc, + 0xe0, 0xc3, 0xc0, 0x3a, 0x08, 0x9f, 0x25, 0xbd, 0xa2, 0x06, 0x47, 0xbd, 0x8c, 0x36, 0x26, 0xbd, + 0xcf, 0x1d, 0xf4, 0xbc, 0xca, 0x0d, 0xcd, 0xbc, 0x9e, 0xee, 0x75, 0xbd, 0x3a, 0xb2, 0xa7, 0xbc, + 0x45, 0x4e, 0x04, 0x3c, 0xda, 0x67, 0xfd, 0xbd, 0x1e, 0xce, 0x1a, 0xbd, 0x0e, 0xf0, 0x1e, 0xbd, + 0x5c, 0xb8, 0xad, 0xbc, 0x14, 0xfe, 0x03, 0xbd, 0x30, 0xb6, 0xad, 0xbb, 0xde, 0xbd, 0x75, 0xbd, + 0x77, 0xa7, 0x1a, 0xc0, 0xf6, 0x40, 0xab, 0xbf, 0x1e, 0xfa, 0xee, 0xbf, 0x74, 0xf3, 0x86, 0xbf, + 0xfd, 0x12, 0xe6, 0xbf, 0x2b, 0x48, 0x2d, 0xc0, 0xe6, 0xdc, 0xdc, 0xbf, 0xb2, 0xa4, 0x07, 0xc0, + 0x50, 0xd3, 0x20, 0xc0, 0x27, 0x5f, 0x49, 0xc0, 0x02, 0x43, 0x9b, 0xbf, 0xab, 0x50, 0xb1, 0xbf, + 0x0c, 0x46, 0xe5, 0xbf, 0xb4, 0xc3, 0x16, 0xc0, 0xaf, 0x99, 0x07, 0xbf, 0x64, 0x86, 0xbb, 0xbf, + 0x82, 0x54, 0x06, 0xc0, 0x32, 0xae, 0x80, 0xbf, 0xcb, 0xe2, 0x91, 0xbf, 0x50, 0x87, 0x61, 0xbf, + 0xac, 0xb9, 0x27, 0xc0, 0x4d, 0x2f, 0x9d, 0xbf, 0xaa, 0x68, 0xc1, 0xbf, 0xd6, 0x55, 0xc4, 0xbf, + 0xc9, 0xa4, 0x84, 0x3d, 0x05, 0x47, 0x07, 0xc0, 0x92, 0x05, 0x47, 0xc0, 0x81, 0x30, 0x73, 0xc0, + 0x01, 0xdc, 0x79, 0xc0, 0xf6, 0x49, 0x06, 0xc0, 0x11, 0xc1, 0x72, 0xbf, 0x81, 0x05, 0x0d, 0xc0, + 0x43, 0x26, 0x52, 0xc0, 0x24, 0x39, 0x84, 0xbf, 0xe1, 0xe6, 0x04, 0xc0, 0xd1, 0x83, 0xce, 0xbf, + 0x4a, 0x9d, 0xdc, 0xbf, 0x79, 0x7b, 0x22, 0xc0, 0xa3, 0xf6, 0x2b, 0xc0, 0x54, 0x01, 0x88, 0xbf, + 0xc2, 0xd2, 0x72, 0xbf, 0xc5, 0xb6, 0x24, 0xc0, 0x05, 0x6d, 0xf4, 0xbf, 0x11, 0x24, 0xd7, 0xbf, + 0xcb, 0x9e, 0x23, 0xbf, 0x94, 0x87, 0xfa, 0xbf, 0xe3, 0xd4, 0xda, 0xbf, 0x59, 0x8c, 0x1a, 0xc0, + 0xfb, 0x68, 0x1a, 0xc0, 0x40, 0xe7, 0x8a, 0xbf, 0x9e, 0x64, 0xc0, 0xbf, 0x97, 0x9b, 0x04, 0xc0, + 0xae, 0x3d, 0x04, 0xc0, 0x2e, 0xd6, 0x0e, 0xc0, 0xa3, 0x58, 0x00, 0xc0, 0x8e, 0xa2, 0x7b, 0xbf, + 0x97, 0x21, 0xb2, 0xbe, 0xf8, 0xc6, 0x08, 0xc0, 0x65, 0x84, 0x9a, 0xbf, 0x78, 0xb5, 0x28, 0xbf, + 0x6a, 0x3e, 0x04, 0xc0, 0xee, 0xa6, 0x4e, 0xc0, 0xbf, 0x65, 0xfd, 0xbf, 0x3b, 0x3b, 0x2b, 0xc0, + 0x82, 0x5a, 0xcf, 0x3f, 0x8b, 0x8b, 0x30, 0x3f, 0x9c, 0x02, 0xc0, 0x3f, 0xa0, 0xa1, 0x91, 0x3f, + 0x0e, 0x62, 0xaa, 0x3f, 0xe9, 0xc0, 0x9e, 0x3f, 0x23, 0xe3, 0x8c, 0x3f, 0x60, 0xbc, 0xc8, 0x3f, + 0x9e, 0xd7, 0xa6, 0x3f, 0xd6, 0x9a, 0xf7, 0x3f, 0xc1, 0xfb, 0xd4, 0x3f, 0x36, 0xe4, 0x96, 0x3f, + 0xb2, 0x44, 0xc4, 0x3e, 0xb4, 0x7a, 0xdd, 0x3f, 0x7f, 0x8a, 0xa5, 0x3f, 0x06, 0x5c, 0xa6, 0x3f, + 0xe6, 0x06, 0xa8, 0x3f, 0xd8, 0x5d, 0x8d, 0x3f, 0x4e, 0x44, 0x0c, 0x3f, 0xe1, 0x6f, 0x9b, 0x3f, + 0x34, 0x87, 0x99, 0x3f, 0x71, 0x67, 0xbd, 0x3f, 0x38, 0xdd, 0xa0, 0x3f, 0xfe, 0xd8, 0x5b, 0x3f, + 0xad, 0x53, 0x58, 0x3f, 0xa4, 0x3b, 0x78, 0x3f, 0xa0, 0x8f, 0x92, 0x3f, 0xe2, 0x5f, 0x9d, 0x3f, + 0x59, 0x10, 0xac, 0x3f, 0x83, 0x18, 0xa7, 0x3f, 0x10, 0x2f, 0x02, 0x40, 0x50, 0xde, 0xf3, 0x3f, + 0x8a, 0x5a, 0xd9, 0x3e, 0x31, 0xae, 0x56, 0x3e, 0x2c, 0xff, 0xdd, 0x3d, 0x74, 0x6c, 0x06, 0xbd, + 0x59, 0x9e, 0xd3, 0x3e, 0x1f, 0xb9, 0x0d, 0x3f, 0x0a, 0x25, 0x4a, 0x3f, 0xca, 0x15, 0xb1, 0x3e, + 0xce, 0x18, 0xa8, 0x3e, 0x30, 0xfa, 0xd8, 0x3c, 0x22, 0x4e, 0xcd, 0x3e, 0x2c, 0x14, 0xa0, 0x3e, + 0x45, 0x26, 0x81, 0x3e, 0x7d, 0x81, 0xb4, 0x3e, 0xf2, 0x1a, 0x7c, 0x3f, 0xe3, 0xb1, 0x8d, 0x3e, + 0x6c, 0xed, 0xae, 0x3e, 0x4a, 0x3a, 0xca, 0x3e, 0x45, 0xca, 0xc8, 0xbe, 0x65, 0x13, 0x20, 0x3f, + 0x74, 0xc8, 0x2a, 0x3e, 0x54, 0x0b, 0x47, 0x3e, 0xb0, 0xc8, 0x97, 0x3c, 0x28, 0xba, 0x2f, 0xbd, + 0xe2, 0x95, 0xf6, 0x3e, 0xc6, 0x55, 0x0d, 0x3f, 0xa2, 0x07, 0x6a, 0x3e, 0x00, 0xba, 0xf1, 0xbd, + 0x4a, 0x4b, 0x39, 0x3e, 0x12, 0xce, 0xdc, 0x3e, 0x26, 0x37, 0x0c, 0x3f, 0xc6, 0x5f, 0xf0, 0x3e, + 0xdd, 0x7a, 0x1e, 0xbe, 0x99, 0xcd, 0xa2, 0xbd, 0xc6, 0x14, 0x25, 0xbe, 0x6a, 0x03, 0xec, 0xbd, + 0x7d, 0xca, 0x26, 0xbe, 0x10, 0x31, 0x04, 0xbe, 0x42, 0x8f, 0xeb, 0xbd, 0xd0, 0x52, 0x5e, 0xbe, + 0xda, 0xa4, 0x38, 0xbe, 0x6d, 0xa2, 0x52, 0xbe, 0x2e, 0xee, 0x52, 0xbe, 0xeb, 0xb4, 0x0b, 0xbe, + 0xeb, 0xea, 0x47, 0xbd, 0x93, 0x04, 0x59, 0xbe, 0x14, 0xb7, 0x3e, 0xbe, 0x08, 0x60, 0x03, 0xbe, + 0xe6, 0xc5, 0x08, 0xbe, 0x60, 0xd2, 0x18, 0xbe, 0x90, 0x6b, 0x4c, 0xbc, 0xc1, 0xd0, 0x13, 0xbe, + 0x22, 0x7d, 0xf4, 0xbd, 0xbc, 0x0e, 0x21, 0xbe, 0x8e, 0x11, 0xfb, 0xbd, 0x41, 0x52, 0xc0, 0xbd, + 0x4a, 0x94, 0x0e, 0xbe, 0x94, 0x84, 0xd0, 0xbd, 0xc2, 0x5e, 0x12, 0xbe, 0xd0, 0x4f, 0x20, 0xbe, + 0x17, 0x36, 0x11, 0xbe, 0x0c, 0xc3, 0xe0, 0xbd, 0xb0, 0x74, 0x88, 0xbe, 0x29, 0x5b, 0x61, 0xbe, + 0x72, 0x45, 0x10, 0xbd, 0x9a, 0x01, 0x01, 0xbd, 0x2c, 0x95, 0xb4, 0xbc, 0x80, 0xf3, 0xed, 0x3a, + 0xd5, 0x9d, 0x91, 0xbd, 0x24, 0x83, 0x77, 0xbd, 0x12, 0xdb, 0xab, 0xbd, 0xbe, 0x4d, 0xc2, 0xbd, + 0x3d, 0x94, 0xa9, 0xbd, 0x80, 0x34, 0x74, 0xbc, 0x9c, 0x35, 0xa3, 0xbd, 0x89, 0x09, 0x4c, 0xbd, + 0xfa, 0x38, 0x12, 0xbd, 0x04, 0xdf, 0x97, 0xbd, 0xdc, 0x1f, 0x1f, 0xbe, 0x1e, 0x03, 0xd5, 0xbc, + 0x0e, 0x59, 0x15, 0xbd, 0x1e, 0x59, 0xa0, 0xbd, 0x1b, 0xfa, 0xa6, 0x3d, 0x04, 0xfe, 0xae, 0xbd, + 0x6d, 0x00, 0x84, 0xbc, 0x84, 0x82, 0xeb, 0xbc, 0xc0, 0x3e, 0xa1, 0x3a, 0xe0, 0x04, 0x75, 0xbb, + 0x30, 0xbb, 0xd2, 0xbd, 0xc6, 0xff, 0x75, 0xbd, 0xe4, 0x18, 0x52, 0xbd, 0x42, 0x9b, 0xb2, 0xbc, + 0x8a, 0xb7, 0xd0, 0xbc, 0x3c, 0xec, 0xb6, 0xbc, 0x45, 0x68, 0xf1, 0xbd, 0x22, 0xac, 0x9c, 0xbd, + 0xc9, 0x9f, 0xe1, 0xbd, 0x4b, 0xb6, 0x25, 0xbd, 0xe9, 0x4f, 0xb7, 0xbd, 0x23, 0x4a, 0x8f, 0xbd, + 0xba, 0x7a, 0x96, 0xbd, 0xb0, 0xbc, 0xa7, 0xbd, 0x1e, 0x08, 0x9c, 0xbd, 0xf6, 0xda, 0x93, 0xbd, + 0x65, 0x27, 0x78, 0xbd, 0x78, 0xbb, 0xeb, 0xbd, 0x7c, 0xb4, 0xb6, 0xbd, 0xde, 0xe6, 0x8b, 0xbd, + 0xfc, 0x31, 0xb3, 0xbc, 0x92, 0xab, 0xbe, 0xbd, 0x1b, 0x78, 0x84, 0xbd, 0x1b, 0x28, 0xaf, 0xbd, + 0xfe, 0x6b, 0xae, 0xbd, 0xb8, 0x7f, 0x5f, 0xbd, 0x46, 0xd9, 0x4e, 0xbd, 0x92, 0xfa, 0x93, 0xbd, + 0x34, 0x60, 0x9e, 0xbd, 0x8c, 0xd8, 0xb8, 0xbd, 0xd9, 0x7e, 0xa4, 0xbd, 0x34, 0x35, 0x49, 0xbd, + 0x6e, 0xe3, 0x00, 0xbd, 0xc4, 0x48, 0x85, 0xbd, 0xb7, 0x94, 0x76, 0xbd, 0xb4, 0xe1, 0x6f, 0xbd, + 0xd0, 0x22, 0xa9, 0xbd, 0x80, 0x6e, 0xc7, 0xbd, 0x8e, 0xc5, 0xd1, 0xbd, 0x30, 0xa0, 0xe1, 0xbd, + 0x93, 0x6c, 0x4e, 0xbd, 0xfe, 0xd2, 0x86, 0xbc, 0x21, 0x37, 0xa3, 0xbc, 0x35, 0x64, 0x3b, 0xbc, + 0x9c, 0xfc, 0xe1, 0xbc, 0xd0, 0xd8, 0x42, 0xbd, 0x0c, 0x5c, 0x78, 0xbd, 0x1e, 0x78, 0x35, 0xbc, + 0xb7, 0xc7, 0x41, 0xbc, 0x4a, 0x71, 0xa1, 0xbc, 0x70, 0x84, 0xd5, 0xbc, 0x57, 0xd1, 0xcc, 0xbc, + 0x18, 0x00, 0x7b, 0xbc, 0x04, 0x5a, 0xc6, 0xbc, 0xc6, 0xc1, 0x49, 0xbd, 0xee, 0xd0, 0x0c, 0xbd, + 0xb6, 0xf8, 0x15, 0xbd, 0x2b, 0x68, 0x9c, 0xbc, 0x90, 0x1b, 0x65, 0xbb, 0xa6, 0x51, 0x2f, 0xbd, + 0xc4, 0x86, 0xd2, 0xbc, 0x7f, 0xe8, 0xd4, 0xbc, 0x50, 0xce, 0x96, 0xbc, 0xd0, 0x58, 0x84, 0xbb, + 0x2a, 0xd8, 0x59, 0xbc, 0xd6, 0x0c, 0x35, 0xbd, 0x8e, 0x8e, 0x6b, 0xbc, 0x72, 0x5c, 0x10, 0x3c, + 0xa8, 0x0e, 0xc8, 0xbc, 0x04, 0xd5, 0x5b, 0xbd, 0x46, 0x41, 0xe6, 0xbc, 0x30, 0x36, 0x1d, 0xbd, + 0x0e, 0x82, 0xa8, 0xbf, 0xad, 0x89, 0x7d, 0xbf, 0xff, 0x6f, 0xc5, 0xbf, 0x6b, 0x68, 0x5f, 0xbf, + 0x33, 0xe4, 0x0c, 0xc0, 0x5d, 0xab, 0xbf, 0xbf, 0x72, 0x7b, 0xc8, 0xbf, 0x64, 0xa1, 0x4f, 0xc0, + 0xdb, 0x44, 0x2e, 0xc0, 0xa4, 0x4f, 0xeb, 0xbf, 0xb0, 0x22, 0x2f, 0xc0, 0xc0, 0x68, 0xd6, 0xbf, + 0x8a, 0x79, 0x4a, 0xbf, 0xac, 0xaa, 0x2e, 0xc0, 0x85, 0xa0, 0x5a, 0xc0, 0x6b, 0xea, 0x90, 0xbf, + 0x1d, 0xa3, 0xa7, 0xbf, 0x58, 0x76, 0x12, 0xc0, 0x8e, 0x89, 0x93, 0x3f, 0x2f, 0x4d, 0x04, 0xc0, + 0x38, 0xfd, 0x80, 0xbf, 0x93, 0x77, 0xc4, 0xbf, 0x0a, 0x2d, 0x5d, 0xbf, 0x7f, 0x4f, 0x5a, 0xbf, + 0x06, 0xc5, 0x28, 0xc0, 0xa7, 0xb5, 0xa4, 0xbf, 0xe8, 0x10, 0xf1, 0xbf, 0xd8, 0xbe, 0xeb, 0xbf, + 0x1f, 0xc5, 0xae, 0xbf, 0x67, 0xba, 0x2d, 0xbf, 0x08, 0x16, 0x75, 0xc0, 0xce, 0xb9, 0x2a, 0xc0, + 0x2c, 0x68, 0xae, 0x3f, 0xb7, 0x64, 0xa5, 0x3f, 0x2d, 0x39, 0xa3, 0x3f, 0x14, 0xdf, 0x41, 0x3f, + 0xc8, 0x4a, 0x8e, 0x3f, 0xb5, 0xcc, 0xb4, 0x3f, 0x10, 0xaa, 0xc7, 0x3f, 0x78, 0x4b, 0x71, 0x3f, + 0xf2, 0x3a, 0xad, 0x3f, 0x94, 0x60, 0xbf, 0x3f, 0xdd, 0x36, 0x9b, 0x3f, 0x12, 0x42, 0xa3, 0x3f, + 0x97, 0xca, 0x73, 0x3f, 0x56, 0x1b, 0xc4, 0x3f, 0x48, 0xc8, 0x78, 0x3f, 0x3d, 0xe7, 0xe4, 0x3f, + 0xe3, 0x15, 0xa1, 0x3f, 0x56, 0x0a, 0xe1, 0x3f, 0xc6, 0xdd, 0x94, 0x3f, 0xf5, 0x42, 0x9f, 0x3f, + 0x8a, 0xd7, 0x43, 0x3f, 0x43, 0x9a, 0xc7, 0x3f, 0x84, 0x3b, 0xcf, 0x3f, 0xd3, 0x4d, 0x9a, 0x3f, + 0x2a, 0x9b, 0x89, 0x3f, 0xd0, 0x78, 0x12, 0x40, 0x32, 0xea, 0x68, 0x3f, 0xfa, 0x7c, 0x51, 0x3f, + 0x87, 0x4f, 0x9f, 0x3f, 0xfc, 0x37, 0xc9, 0x3f, 0xd9, 0x05, 0xa4, 0x3f, 0xf3, 0xc5, 0x80, 0x3f, + 0x53, 0xf3, 0x49, 0xbf, 0x32, 0xa5, 0x50, 0xbf, 0x08, 0x65, 0x7a, 0xbf, 0x80, 0xc3, 0x65, 0xbf, + 0x80, 0x7c, 0x44, 0xbf, 0x06, 0x58, 0x36, 0xbf, 0x78, 0xec, 0x4c, 0xbf, 0x56, 0x1c, 0x57, 0xbf, + 0xf0, 0x02, 0x8e, 0xbf, 0x6b, 0x0f, 0x80, 0xbf, 0x62, 0xd8, 0x76, 0xbf, 0x25, 0xcb, 0x43, 0xbf, + 0xe5, 0xe3, 0x36, 0xbf, 0x68, 0xc3, 0x7b, 0xbf, 0x93, 0x9b, 0x63, 0xbf, 0x4a, 0x14, 0x80, 0xbf, + 0x19, 0xc3, 0x6d, 0xbf, 0x20, 0x30, 0x73, 0xbf, 0x9c, 0x62, 0x18, 0xbf, 0x9d, 0x99, 0x39, 0xbf, + 0x33, 0x05, 0x54, 0xbf, 0xa5, 0x12, 0x86, 0xbf, 0xd4, 0xd0, 0x60, 0xbf, 0xdb, 0x27, 0x5f, 0xbf, + 0x56, 0x68, 0x41, 0xbf, 0xa0, 0xc0, 0x6e, 0xbf, 0xbc, 0xa5, 0x61, 0xbf, 0x20, 0x59, 0x4e, 0xbf, + 0x02, 0x96, 0x61, 0xbf, 0x54, 0xa2, 0x78, 0xbf, 0x54, 0x4f, 0x6d, 0xbf, 0xd9, 0x9c, 0x3a, 0xbf, + 0x6c, 0x01, 0x86, 0xbe, 0xdf, 0xea, 0xaa, 0xbe, 0xe2, 0x2a, 0x3f, 0xbe, 0x3a, 0x0e, 0x33, 0xbe, + 0x02, 0x5d, 0xaf, 0xbe, 0x10, 0xf7, 0x96, 0xbe, 0x94, 0x6b, 0x03, 0xbf, 0x82, 0x9e, 0xee, 0xbd, + 0x5e, 0x36, 0xce, 0xbe, 0x8c, 0x56, 0x6c, 0xbe, 0xc9, 0x8b, 0xb7, 0xbe, 0x9b, 0x0e, 0x34, 0xbe, + 0x10, 0x9a, 0x8f, 0xbd, 0xe8, 0xea, 0x07, 0xbf, 0x40, 0x9f, 0x78, 0xbe, 0x59, 0x7e, 0xd9, 0xbe, + 0x3a, 0x1c, 0x58, 0xbe, 0x3a, 0xa8, 0x1d, 0xbf, 0x72, 0x71, 0x4c, 0xbe, 0x93, 0x14, 0xa3, 0xbe, + 0xa8, 0x34, 0xca, 0xbc, 0xc1, 0x0d, 0xfe, 0xbe, 0xae, 0x17, 0xb1, 0xbe, 0x7e, 0x53, 0xec, 0xbe, + 0x40, 0x98, 0x84, 0xbe, 0x5a, 0x35, 0x0c, 0xbf, 0xbe, 0x79, 0x1e, 0xbe, 0xd6, 0x08, 0xa6, 0x3d, + 0x74, 0x67, 0x42, 0xbe, 0x4e, 0xec, 0xd1, 0xbe, 0x18, 0xe9, 0x96, 0xbd, 0x08, 0x73, 0x67, 0xbe, + 0xea, 0x1f, 0xb0, 0x3d, 0x18, 0x59, 0xc4, 0x3d, 0xb4, 0x89, 0xe8, 0x3d, 0x46, 0xec, 0xf1, 0x3d, + 0x96, 0xf7, 0xc2, 0x3d, 0x44, 0x94, 0x98, 0x3d, 0x29, 0xfd, 0xbf, 0x3d, 0xf0, 0xb7, 0xcd, 0x3d, + 0x1d, 0xd7, 0x10, 0x3e, 0x18, 0xe9, 0xe4, 0x3d, 0x7d, 0x24, 0xfa, 0x3d, 0x10, 0xe3, 0xa5, 0x3d, + 0x27, 0xa3, 0xa1, 0x3d, 0x8a, 0xe4, 0xfb, 0x3d, 0x45, 0xa6, 0xe7, 0x3d, 0xa4, 0xce, 0xe4, 0x3d, + 0x68, 0x03, 0xdd, 0x3d, 0xef, 0xdd, 0xea, 0x3d, 0x86, 0xd2, 0x77, 0x3d, 0x8a, 0x65, 0xaa, 0x3d, + 0x38, 0xba, 0xcc, 0x3d, 0xbe, 0x10, 0x05, 0x3e, 0x64, 0xac, 0xc2, 0x3d, 0xaf, 0xc1, 0xe8, 0x3d, + 0xd4, 0x37, 0xb9, 0x3d, 0x7d, 0x59, 0xba, 0x3d, 0x8a, 0x83, 0xe0, 0x3d, 0xda, 0x73, 0xb7, 0x3d, + 0x8b, 0x2d, 0xcd, 0x3d, 0x66, 0x07, 0xe9, 0x3d, 0xab, 0xd6, 0xcc, 0x3d, 0xae, 0x66, 0xb2, 0x3d, + 0x40, 0x55, 0x08, 0x3d, 0xf9, 0x98, 0x43, 0x3d, 0x4e, 0xe9, 0x0c, 0x3d, 0xa2, 0xd8, 0x3b, 0x3d, + 0xc6, 0xd8, 0x57, 0x3d, 0x25, 0x2e, 0x06, 0x3d, 0xa5, 0x2f, 0x85, 0x3d, 0xac, 0xf7, 0xe5, 0x3c, + 0x78, 0xf2, 0x90, 0x3d, 0x42, 0x90, 0x0e, 0x3d, 0x66, 0x2b, 0x7c, 0x3d, 0xde, 0x0d, 0xb5, 0x3c, + 0x88, 0x7b, 0x6d, 0x3c, 0x57, 0x37, 0x9f, 0x3d, 0x52, 0x93, 0x46, 0x3d, 0x6e, 0xc1, 0x5d, 0x3d, + 0x23, 0x0d, 0x14, 0x3d, 0x36, 0x71, 0xa6, 0x3d, 0x0e, 0x91, 0xab, 0x3c, 0x28, 0x4c, 0x2e, 0x3d, + 0x4e, 0xaa, 0xa0, 0x3c, 0x5c, 0x3c, 0x99, 0x3d, 0xcb, 0x50, 0x2b, 0x3d, 0x33, 0xc3, 0x94, 0x3d, + 0x53, 0x0f, 0x27, 0x3d, 0x57, 0x07, 0x57, 0x3d, 0xcc, 0x61, 0x17, 0x3d, 0xc0, 0xa1, 0x62, 0x3a, + 0x1b, 0x5c, 0xfe, 0x3c, 0x84, 0x2e, 0x6c, 0x3d, 0x48, 0xa7, 0x70, 0x3c, 0x38, 0xcd, 0x16, 0x3d, + 0x69, 0xc9, 0x48, 0x3d, 0x25, 0x4e, 0x44, 0x3d, 0x1b, 0xbb, 0x63, 0x3d, 0xbf, 0x9a, 0x34, 0x3d, + 0x9e, 0x3e, 0x30, 0x3d, 0x7c, 0xbc, 0x3e, 0x3d, 0x66, 0x28, 0x4b, 0x3d, 0x3b, 0x72, 0x3b, 0x3d, + 0x98, 0x2e, 0x72, 0x3d, 0x5a, 0xec, 0x73, 0x3d, 0x94, 0x88, 0x54, 0x3d, 0x54, 0x8b, 0x43, 0x3d, + 0xb6, 0x27, 0x2b, 0x3d, 0xfb, 0x76, 0x64, 0x3d, 0xb9, 0x79, 0x3e, 0x3d, 0x0a, 0x07, 0x7e, 0x3d, + 0x98, 0xc3, 0x59, 0x3d, 0x25, 0x67, 0x6a, 0x3d, 0xb4, 0x9a, 0x20, 0x3d, 0xd3, 0xfd, 0x33, 0x3d, + 0x45, 0x2c, 0x32, 0x3d, 0xce, 0xef, 0x71, 0x3d, 0x47, 0xb4, 0x63, 0x3d, 0x67, 0x8e, 0x40, 0x3d, + 0xcc, 0x2c, 0x30, 0x3d, 0x27, 0xb9, 0x87, 0x3d, 0x46, 0x93, 0x3d, 0x3d, 0xc2, 0xd2, 0x37, 0x3d, + 0xd9, 0x98, 0x52, 0x3d, 0x3a, 0x81, 0x6b, 0x3d, 0x8f, 0x17, 0x62, 0x3d, 0xff, 0x12, 0x29, 0x3d, + 0x3e, 0xa8, 0xb9, 0x3c, 0x14, 0x83, 0xc3, 0x3c, 0x1c, 0x4c, 0x83, 0x3c, 0x27, 0x6b, 0x02, 0x3c, + 0xd6, 0x66, 0xb2, 0x3c, 0x96, 0x7e, 0xd2, 0x3c, 0xe2, 0x5a, 0x10, 0x3d, 0xda, 0xe0, 0x1c, 0x3c, + 0xad, 0xe5, 0xc2, 0x3c, 0xc9, 0x96, 0xac, 0x3c, 0x94, 0xf3, 0xb0, 0x3c, 0x77, 0xd7, 0x95, 0x3c, + 0xfe, 0xcf, 0x1c, 0x3c, 0xc7, 0xc9, 0x07, 0x3d, 0x75, 0x74, 0x68, 0x3c, 0x1e, 0x19, 0x08, 0x3d, + 0x82, 0x8d, 0x8d, 0x3c, 0x6d, 0xfa, 0x25, 0x3d, 0x6a, 0x2b, 0x9f, 0x3c, 0xb9, 0x7c, 0xc1, 0x3c, + 0x00, 0xca, 0x59, 0x3b, 0xad, 0x0a, 0x01, 0x3d, 0x8f, 0x60, 0xed, 0x3c, 0xd6, 0x1f, 0xd9, 0x3c, + 0x42, 0xf9, 0x94, 0x3c, 0x7c, 0x9c, 0x40, 0x3d, 0x89, 0x02, 0x23, 0x3c, 0xf8, 0x0a, 0x09, 0x3a, + 0xaa, 0x04, 0x8a, 0x3c, 0x2c, 0x22, 0xf1, 0x3c, 0x34, 0x57, 0x4d, 0x3c, 0xda, 0x25, 0x84, 0x3c, + 0x5e, 0x2a, 0x80, 0x3f, 0x3e, 0x79, 0xa5, 0x3f, 0x8c, 0x3c, 0xac, 0x3f, 0x67, 0xe8, 0xd3, 0x3f, + 0x99, 0x65, 0xb1, 0x3f, 0x96, 0x82, 0x5a, 0x3f, 0xfd, 0xb8, 0xb2, 0x3f, 0x6f, 0xe8, 0x9a, 0x3f, + 0x20, 0x25, 0x03, 0x40, 0x76, 0x3a, 0xa3, 0x3f, 0x32, 0xbb, 0xe1, 0x3f, 0x25, 0x4d, 0x57, 0x3f, + 0x80, 0x19, 0x50, 0x3f, 0xc4, 0x08, 0xf0, 0x3f, 0xab, 0x97, 0xc9, 0x3f, 0x7c, 0x9d, 0xb5, 0x3f, + 0x8a, 0x2d, 0xa7, 0x3f, 0x10, 0xe9, 0xe1, 0x3f, 0x22, 0x22, 0x20, 0x3f, 0x6e, 0x45, 0x8d, 0x3f, + 0xee, 0xb5, 0x93, 0x3f, 0x6c, 0xf7, 0xf4, 0x3f, 0xbd, 0x4c, 0x91, 0x3f, 0xfa, 0xad, 0xe5, 0x3f, + 0xda, 0x8f, 0x9a, 0x3f, 0x18, 0x63, 0x85, 0x3f, 0x98, 0xf7, 0xb5, 0x3f, 0x12, 0x19, 0x50, 0x3f, + 0x54, 0x26, 0x95, 0x3f, 0x8e, 0x93, 0xc4, 0x3f, 0x5c, 0x58, 0x78, 0x3f, 0xdb, 0x42, 0x92, 0x3f, + 0x85, 0x95, 0x87, 0x3f, 0x00, 0x26, 0xca, 0x3e, 0x00, 0x41, 0x6a, 0xbe, 0x81, 0xf9, 0x17, 0x3f, + 0xe6, 0x4d, 0x58, 0x3f, 0xeb, 0x87, 0x97, 0x3e, 0x6c, 0x7e, 0xf4, 0x3e, 0x42, 0x57, 0x94, 0x3e, + 0x40, 0x75, 0xfe, 0xbd, 0x1b, 0xe6, 0x29, 0x3f, 0x45, 0xa9, 0xdc, 0x3e, 0x08, 0xd0, 0x78, 0x3f, + 0x50, 0xbf, 0x10, 0x3e, 0x3e, 0x2d, 0x46, 0x3f, 0x9a, 0xfe, 0xf9, 0x3e, 0x21, 0x02, 0x19, 0x3f, + 0xf6, 0xee, 0xfe, 0x3e, 0xab, 0x17, 0xa4, 0x3e, 0x50, 0xf4, 0xd8, 0x3e, 0x4d, 0x9b, 0x50, 0x3f, + 0x91, 0x49, 0xe0, 0x3e, 0x18, 0x7d, 0xa3, 0x3e, 0x34, 0xa0, 0x1c, 0x3f, 0xa0, 0x3b, 0x87, 0xbd, + 0xdc, 0x35, 0x3c, 0xbe, 0xc0, 0x0b, 0x13, 0x3e, 0x26, 0xc0, 0xbe, 0x3e, 0x1d, 0x5b, 0xe6, 0x3e, + 0xc0, 0x12, 0x40, 0x3e, 0xe4, 0x95, 0x49, 0x3f, 0xbd, 0x32, 0x0a, 0x3f, 0x84, 0x1e, 0x9f, 0x3e, + 0x80, 0x9a, 0x6c, 0xbe, 0xb6, 0x82, 0x50, 0xbe, 0x58, 0xa5, 0xac, 0x3d, 0x2d, 0xdd, 0xca, 0xbe, + 0xf6, 0x81, 0x0c, 0xbf, 0x59, 0x1c, 0x5a, 0xbe, 0xf7, 0x69, 0x20, 0xbe, 0x57, 0x26, 0xb7, 0xbe, + 0x3a, 0xa6, 0x3f, 0xbe, 0xb6, 0x30, 0xd0, 0xbe, 0xb9, 0x13, 0xf6, 0xbe, 0x42, 0xb0, 0x9a, 0xbe, + 0xc6, 0x9c, 0xcf, 0x3d, 0x30, 0xe1, 0x21, 0xbf, 0x00, 0x07, 0x85, 0xbe, 0x08, 0xcf, 0xb4, 0xbe, + 0x2c, 0x67, 0x49, 0xbe, 0x5f, 0xdc, 0x38, 0xbe, 0x50, 0x28, 0x30, 0xbe, 0x11, 0x6d, 0xcf, 0xbe, + 0x5d, 0x19, 0xe5, 0xbe, 0xe3, 0x78, 0xe0, 0xbd, 0xac, 0xd6, 0xd0, 0xbe, 0x96, 0x15, 0x45, 0xbe, + 0x42, 0x56, 0x5d, 0xbe, 0xf5, 0xa0, 0x08, 0xbe, 0x22, 0xfc, 0xa6, 0xbe, 0x5b, 0x73, 0x80, 0xbe, + 0x1b, 0x9b, 0x26, 0xbe, 0x79, 0xea, 0xa4, 0xbe, 0x5e, 0xab, 0xfa, 0xbe, 0xcb, 0x79, 0x0a, 0xbf, + 0x4e, 0x0d, 0x2e, 0xbd, 0x18, 0x18, 0xbd, 0xbd, 0x44, 0xe5, 0x45, 0x3e, 0x1d, 0x70, 0x1a, 0x3e, + 0xc2, 0x64, 0x8b, 0xbe, 0x0c, 0xf8, 0x8a, 0xbe, 0x92, 0x2a, 0xf4, 0xbd, 0xa7, 0x37, 0x98, 0xbd, + 0x7c, 0xe9, 0x1d, 0x3d, 0xa6, 0x54, 0x0f, 0xbd, 0xea, 0x01, 0x08, 0xbe, 0xfd, 0x82, 0x4d, 0xbe, + 0xc6, 0xf3, 0x90, 0x3d, 0x94, 0x19, 0x5f, 0xbe, 0xbc, 0x05, 0x93, 0xbe, 0xb6, 0x9f, 0xfd, 0xbd, + 0xd0, 0x36, 0x41, 0xbe, 0x4e, 0xc2, 0x43, 0xbe, 0x3a, 0x9d, 0xaa, 0xbc, 0x6a, 0xdc, 0x0f, 0xbe, + 0x88, 0x0a, 0x7a, 0x3c, 0x19, 0x83, 0xd2, 0x3d, 0x59, 0xb4, 0x89, 0xbd, 0xb9, 0xad, 0xdf, 0x3d, + 0x94, 0x1d, 0x12, 0x3d, 0x6a, 0xfa, 0x8f, 0xbe, 0xc0, 0xf1, 0x48, 0x3c, 0x38, 0x55, 0x12, 0x3d, + 0x60, 0xd1, 0x3d, 0xbb, 0x18, 0xda, 0xca, 0xbe, 0x33, 0x00, 0xf1, 0xbd, 0x8c, 0x4b, 0x76, 0x3d, + 0xa0, 0x55, 0xc1, 0x3a, 0x13, 0x38, 0xb4, 0x3c, 0x5c, 0x8e, 0x70, 0xbc, 0xfd, 0x7e, 0x03, 0x3d, + 0x8a, 0xce, 0x8a, 0x3d, 0xb7, 0x0b, 0x12, 0x3d, 0xd4, 0xdd, 0x5d, 0x3c, 0xa2, 0x71, 0x43, 0x3d, + 0xa0, 0x1d, 0xf8, 0x3c, 0xd4, 0x61, 0x26, 0x3d, 0x52, 0xd3, 0x83, 0x3d, 0x96, 0x0e, 0xbc, 0x3c, + 0x3c, 0x28, 0xc0, 0xbc, 0xec, 0xb7, 0xa3, 0x3d, 0xcc, 0xc6, 0x17, 0x3d, 0x79, 0xba, 0x20, 0x3d, + 0x38, 0x01, 0xb7, 0x3c, 0xe2, 0x9e, 0xd4, 0x3c, 0xe0, 0x6b, 0x50, 0x3c, 0x34, 0x6d, 0x26, 0x3d, + 0xc2, 0xfb, 0x53, 0x3d, 0x10, 0x62, 0x2c, 0x3a, 0xcc, 0xd7, 0x34, 0x3d, 0xe8, 0xcb, 0xd1, 0x3c, + 0x53, 0x97, 0x16, 0x3d, 0xbe, 0xec, 0xef, 0x3c, 0xd4, 0x00, 0x13, 0x3d, 0xac, 0xca, 0xaa, 0x3c, + 0xd6, 0xe4, 0x95, 0x3c, 0x10, 0x87, 0x2b, 0x3d, 0x9b, 0x32, 0x7b, 0x3d, 0x32, 0xcc, 0x8a, 0x3d, + 0x95, 0x63, 0x8e, 0xbc, 0xae, 0x18, 0x32, 0x3c, 0x42, 0x89, 0xd1, 0xbc, 0x01, 0x24, 0xb9, 0xbc, + 0xf0, 0x50, 0x24, 0x3d, 0x04, 0xfa, 0x29, 0x3d, 0x8b, 0x89, 0x20, 0x3c, 0x31, 0xbc, 0x9e, 0x3c, + 0xfc, 0xda, 0xfc, 0x3b, 0x98, 0x9b, 0x62, 0x3b, 0x78, 0x62, 0xf2, 0x3c, 0xea, 0xed, 0x51, 0x3c, + 0xd6, 0x17, 0xa7, 0xbc, 0x5f, 0xab, 0x1d, 0x3d, 0xfd, 0xf8, 0x22, 0x3d, 0xc7, 0x9c, 0x85, 0x3c, + 0x22, 0x74, 0xb0, 0x3c, 0xd4, 0x56, 0xdd, 0x3c, 0x21, 0x3b, 0x1d, 0xbb, 0xdb, 0x1e, 0x68, 0x3c, + 0x00, 0xa0, 0xb3, 0x3b, 0x54, 0x9b, 0xa8, 0xbc, 0x85, 0x40, 0x25, 0x3c, 0x9a, 0x5e, 0x95, 0xbb, + 0x74, 0xc7, 0x3d, 0x3c, 0x52, 0x7e, 0x34, 0x3d, 0x7c, 0x44, 0x06, 0x3b, 0xc6, 0xfb, 0xff, 0xbb, + 0xee, 0xda, 0x17, 0x3b, 0x5c, 0xe0, 0x49, 0x3d, 0x80, 0xb9, 0xc5, 0x3c, 0x88, 0x3c, 0xfb, 0x3b, + 0xb5, 0x40, 0xd3, 0x3c, 0x6b, 0x02, 0x56, 0x3c, 0x5e, 0x5d, 0x95, 0xbb, 0x1f, 0x37, 0xda, 0x3c, + 0xe0, 0x0b, 0x00, 0x3d, 0x6a, 0x18, 0x1c, 0x3c, 0x7b, 0x3a, 0x4a, 0x3c, 0x28, 0xd6, 0x8d, 0x3c, + 0x38, 0xea, 0xb1, 0x3b, 0x92, 0xf1, 0xd3, 0x3c, 0x73, 0xe3, 0xc0, 0x3c, 0x46, 0xf6, 0xcc, 0x3c, + 0x00, 0x07, 0x13, 0xba, 0xe7, 0xd1, 0x0a, 0x3d, 0x7e, 0x27, 0x6a, 0x3c, 0x92, 0x68, 0xb1, 0x3c, + 0xdb, 0x29, 0x5a, 0x3c, 0xda, 0x47, 0x1f, 0x3c, 0x5c, 0x23, 0x59, 0x3c, 0x8e, 0x53, 0xdd, 0x3c, + 0x0a, 0xd5, 0xc6, 0x3c, 0x3a, 0x96, 0x2f, 0x3c, 0xa0, 0xf3, 0xc9, 0x3c, 0xc0, 0x2c, 0xf5, 0x3b, + 0x46, 0xd6, 0xb3, 0x3b, 0x8f, 0x64, 0x7e, 0x3b, 0x4d, 0x50, 0x98, 0x3c, 0x2c, 0x28, 0x8e, 0x3c, + 0x02, 0x97, 0x16, 0x3c, 0x35, 0xe4, 0xa5, 0x3c, 0xac, 0x0e, 0xd4, 0x3c, 0x94, 0xc2, 0xd6, 0x3c, + 0x3d, 0x04, 0x86, 0x3c, 0x5e, 0xad, 0xf5, 0x3b, 0xc3, 0xf0, 0x23, 0xbc, 0xd6, 0x3b, 0x3b, 0xba, + 0x9e, 0xcb, 0x8e, 0x3c, 0x03, 0xf5, 0x4b, 0x3c, 0x57, 0x90, 0x2b, 0x3c, 0xef, 0x14, 0x4e, 0x3b, + 0x89, 0xe3, 0xbf, 0xbb, 0xda, 0xe5, 0xee, 0x3b, 0x26, 0xb8, 0xc9, 0x3b, 0x76, 0x6a, 0xa3, 0x3c, + 0x68, 0xff, 0x7d, 0x3a, 0xa8, 0xbe, 0x57, 0x3c, 0xef, 0x45, 0x80, 0x3c, 0xf8, 0xf8, 0x26, 0x3c, + 0xc5, 0x9c, 0x53, 0x3c, 0xcd, 0xff, 0x27, 0x3c, 0x10, 0x8c, 0xba, 0x3b, 0x25, 0xdc, 0x61, 0x3c, + 0x7a, 0xd4, 0x94, 0x3a, 0xa0, 0xea, 0xcb, 0x38, 0xd6, 0xaf, 0xf9, 0x3b, 0x5e, 0x1a, 0xf9, 0xbb, + 0x92, 0xbf, 0xe8, 0xbb, 0x0a, 0xa9, 0x38, 0x3c, 0xf6, 0x3b, 0xdb, 0x3a, 0xf2, 0x1a, 0x46, 0x3b, + 0x54, 0x6c, 0x9d, 0x3a, 0x81, 0x3d, 0xc4, 0x3c, 0x46, 0xe3, 0xee, 0x3b, 0xf4, 0x71, 0x7b, 0xbb, + 0x87, 0x34, 0xe3, 0xbe, 0x9a, 0xca, 0x8a, 0x3e, 0x6e, 0x9e, 0xca, 0xbe, 0xf0, 0xfb, 0x29, 0xbd, + 0xf0, 0x53, 0x80, 0x3f, 0x0a, 0x43, 0x4b, 0x3f, 0x78, 0xa1, 0x12, 0x3e, 0x27, 0xaf, 0x2e, 0x3f, + 0xa6, 0xce, 0xea, 0x3e, 0x32, 0xf1, 0xa8, 0x3e, 0xa1, 0x02, 0x73, 0x3f, 0xc2, 0x3f, 0x23, 0x3e, + 0x7b, 0x58, 0x07, 0xbf, 0xe9, 0xc5, 0x91, 0x3f, 0x06, 0x0f, 0x3e, 0x3f, 0x65, 0x77, 0xf3, 0x3e, + 0xc5, 0x02, 0xbd, 0x3e, 0x6c, 0x0d, 0x04, 0x3f, 0xc0, 0xe4, 0x26, 0x3c, 0xc3, 0x56, 0xd3, 0x3e, + 0x62, 0xcd, 0x08, 0x3f, 0xd6, 0xa2, 0x9b, 0xbe, 0x0b, 0x18, 0xea, 0x3e, 0x78, 0xb5, 0x81, 0x3e, + 0xd3, 0x01, 0x17, 0x3f, 0x50, 0x11, 0x4a, 0x3f, 0x26, 0xd6, 0xaa, 0x3e, 0xbb, 0x03, 0x0c, 0x3d, + 0x98, 0x76, 0x3d, 0x3e, 0x3f, 0x27, 0x54, 0x3f, 0x1c, 0x96, 0x53, 0x3f, 0x28, 0x60, 0x45, 0x3f, + 0x36, 0x71, 0x17, 0xbe, 0x6c, 0x6d, 0x37, 0xbe, 0x74, 0x8c, 0xe2, 0xbd, 0x54, 0x4b, 0xb7, 0xbd, + 0x4e, 0x2e, 0x1b, 0xbe, 0x96, 0x74, 0x11, 0xbe, 0x2e, 0x55, 0x2c, 0xbe, 0x93, 0xaf, 0xf6, 0xbd, + 0x70, 0x23, 0x29, 0xbe, 0xf8, 0xc8, 0x2f, 0xbe, 0xa0, 0x24, 0x0f, 0xbe, 0x24, 0x4d, 0x3c, 0xbe, + 0x36, 0xb2, 0x03, 0xbe, 0xd4, 0x27, 0x50, 0xbe, 0xdb, 0xd3, 0xe7, 0xbd, 0x20, 0xdc, 0x5e, 0xbe, + 0xa3, 0x71, 0x08, 0xbe, 0xb6, 0x6a, 0x79, 0xbe, 0xba, 0x07, 0x15, 0xbe, 0x6e, 0x1c, 0x22, 0xbe, + 0x92, 0xcf, 0x8a, 0xbd, 0x43, 0xb1, 0x33, 0xbe, 0x84, 0x8d, 0x53, 0xbe, 0x8f, 0x3b, 0x14, 0xbe, + 0x65, 0xd7, 0x0f, 0xbe, 0xf2, 0xf7, 0x8e, 0xbe, 0x1f, 0x3d, 0xeb, 0xbd, 0xcf, 0x77, 0xfc, 0xbd, + 0x16, 0x3f, 0x02, 0xbe, 0xaa, 0x90, 0x2b, 0xbe, 0xf8, 0xe4, 0x1c, 0xbe, 0x9c, 0x3b, 0x9e, 0xbd, + 0x90, 0x69, 0x9b, 0x3d, 0x8f, 0x25, 0xe0, 0x3d, 0x94, 0x6f, 0xbd, 0x3d, 0x82, 0x9e, 0xee, 0x3d, + 0xa0, 0x1d, 0xc9, 0x3d, 0x83, 0xf4, 0x9b, 0x3d, 0x5b, 0xcf, 0xba, 0x3d, 0x24, 0xb4, 0xbb, 0x3d, + 0x47, 0x81, 0x07, 0x3e, 0x3d, 0xed, 0xd8, 0x3d, 0xab, 0xd6, 0xe7, 0x3d, 0x1d, 0x6a, 0xb9, 0x3d, + 0x24, 0xeb, 0xb6, 0x3d, 0xda, 0xbe, 0xf8, 0x3d, 0x3a, 0x72, 0xd3, 0x3d, 0xb0, 0xd3, 0xfe, 0x3d, + 0x28, 0xd7, 0xd7, 0x3d, 0x8c, 0xea, 0xea, 0x3d, 0xd0, 0x12, 0xa3, 0x3d, 0x37, 0x4e, 0xb5, 0x3d, + 0xcf, 0x5c, 0xda, 0x3d, 0x9f, 0xfb, 0xe7, 0x3d, 0x9e, 0xc0, 0xe1, 0x3d, 0x86, 0xde, 0xe4, 0x3d, + 0x1c, 0x89, 0xc4, 0x3d, 0x69, 0xf8, 0xe9, 0x3d, 0x0a, 0x02, 0xe2, 0x3d, 0xf3, 0x5f, 0xbd, 0x3d, + 0xac, 0x98, 0xc3, 0x3d, 0xf2, 0x39, 0xf2, 0x3d, 0x03, 0x0b, 0xc4, 0x3d, 0x2c, 0x43, 0x95, 0x3d, + 0xba, 0x18, 0xdf, 0x3c, 0xe6, 0x62, 0x43, 0x3d, 0x5c, 0x49, 0x71, 0x3c, 0x98, 0x0d, 0xa2, 0x3c, + 0x03, 0xc3, 0x51, 0x3d, 0xf8, 0x28, 0x21, 0x3d, 0xc2, 0x03, 0x68, 0x3d, 0xd0, 0x56, 0x1e, 0x3c, + 0xc3, 0xbb, 0x42, 0x3d, 0x64, 0x5a, 0x11, 0x3d, 0x6e, 0xb5, 0x40, 0x3d, 0xd3, 0xe5, 0xd4, 0x3c, + 0x98, 0x58, 0xbb, 0xba, 0xfa, 0x57, 0xa1, 0x3d, 0x5a, 0x0d, 0x93, 0x3c, 0xbf, 0x91, 0x78, 0x3d, + 0xc6, 0x52, 0xf3, 0x3c, 0x00, 0xcd, 0xb5, 0x3d, 0xac, 0x79, 0x34, 0x3d, 0x4a, 0x18, 0x0b, 0x3d, + 0xd0, 0xb2, 0x2b, 0xbb, 0x14, 0xa3, 0x80, 0x3d, 0x70, 0xc0, 0x5f, 0x3d, 0x61, 0x8a, 0x85, 0x3d, + 0x7f, 0x1a, 0xac, 0x3c, 0x3c, 0x5b, 0x9f, 0x3d, 0x6a, 0x7a, 0x75, 0x3c, 0x2b, 0x0c, 0x24, 0xbc, + 0x41, 0x0f, 0xb9, 0x3c, 0x64, 0x15, 0x89, 0x3d, 0xb0, 0x10, 0xad, 0xba, 0x04, 0xd2, 0x79, 0x3c, + 0x0a, 0x09, 0x00, 0xbc, 0xb1, 0x39, 0x52, 0xbc, 0x72, 0xf9, 0x32, 0xbc, 0x7a, 0xab, 0x7d, 0xbc, + 0x31, 0x87, 0x48, 0xbc, 0x4b, 0xcd, 0x0d, 0xbc, 0x04, 0xaa, 0x32, 0xbc, 0xe4, 0x60, 0x27, 0xbc, + 0xca, 0xf9, 0x88, 0xbc, 0x72, 0xa1, 0x43, 0xbc, 0xb0, 0x67, 0x70, 0xbc, 0x1c, 0x5c, 0x0f, 0xbc, + 0xe6, 0x17, 0x14, 0xbc, 0x0a, 0x04, 0x7c, 0xbc, 0xb6, 0x9e, 0x4e, 0xbc, 0xc8, 0xa5, 0x6c, 0xbc, + 0x20, 0x92, 0x52, 0xbc, 0xf0, 0x98, 0x5d, 0xbc, 0xed, 0x4c, 0x18, 0xbc, 0x28, 0xed, 0x1e, 0xbc, + 0x62, 0xdf, 0x5e, 0xbc, 0x92, 0xff, 0x69, 0xbc, 0x38, 0xd3, 0x4a, 0xbc, 0x7b, 0x00, 0x79, 0xbc, + 0x70, 0x56, 0x30, 0xbc, 0xd8, 0x01, 0x40, 0xbc, 0xce, 0xc0, 0x5c, 0xbc, 0xfc, 0xb8, 0x17, 0xbc, + 0xc1, 0xc1, 0x37, 0xbc, 0xf0, 0x00, 0x7e, 0xbc, 0x31, 0x7e, 0x16, 0xbc, 0x14, 0x69, 0x15, 0xbc, + 0x88, 0x4c, 0x41, 0xbb, 0x02, 0x20, 0xda, 0xbb, 0xa9, 0x37, 0x4e, 0xbb, 0xc9, 0x5e, 0xbe, 0xbb, + 0x96, 0x14, 0xf7, 0xbb, 0xf2, 0x00, 0xa3, 0xbb, 0x46, 0x0b, 0xf4, 0xbb, 0xa3, 0x32, 0x04, 0xbb, + 0x72, 0x09, 0x07, 0xbc, 0x06, 0x76, 0xa0, 0xbb, 0x87, 0x04, 0x04, 0xbc, 0x55, 0x6e, 0x16, 0xbb, + 0x40, 0x51, 0x15, 0x38, 0x24, 0x18, 0x36, 0xbc, 0x33, 0xbf, 0x85, 0xbb, 0xba, 0x36, 0x04, 0xbc, + 0x4f, 0x20, 0xad, 0xbb, 0xe7, 0x1a, 0x33, 0xbc, 0x48, 0x10, 0xbc, 0xbb, 0x33, 0x0a, 0x8b, 0xbb, + 0x74, 0x8f, 0x2f, 0xbb, 0x55, 0x52, 0x17, 0xbc, 0x99, 0x72, 0xdf, 0xbb, 0x2a, 0xbd, 0x2c, 0xbc, + 0x9a, 0x1e, 0x56, 0xbb, 0xe7, 0x50, 0x04, 0xbc, 0x3a, 0xfd, 0x80, 0xbb, 0xbe, 0x74, 0x81, 0x3a, + 0x56, 0x95, 0x80, 0xbb, 0xb2, 0xe3, 0x29, 0xbc, 0xa0, 0x13, 0x08, 0x3a, 0x2c, 0xee, 0x5b, 0xbb, + 0x9e, 0xdc, 0xa2, 0xbb, 0x01, 0xc7, 0xd4, 0xbb, 0xd6, 0x0a, 0xa8, 0xbb, 0x82, 0xf2, 0xb7, 0xbb, + 0xd6, 0x74, 0xb6, 0xbb, 0x86, 0x08, 0x9b, 0xbb, 0x9a, 0xcb, 0xb4, 0xbb, 0x55, 0x72, 0xae, 0xbb, + 0xfe, 0x56, 0xe9, 0xbb, 0x6c, 0x5f, 0xd1, 0xbb, 0xdc, 0x2c, 0xc4, 0xbb, 0xfd, 0x07, 0xc9, 0xbb, + 0x13, 0x93, 0xb4, 0xbb, 0xb1, 0xfe, 0xe3, 0xbb, 0xf6, 0x55, 0xb5, 0xbb, 0xd8, 0xe5, 0xf6, 0xbb, + 0xb2, 0x1b, 0xbe, 0xbb, 0xa3, 0x9c, 0xed, 0xbb, 0x82, 0x47, 0x9f, 0xbb, 0x44, 0x55, 0xb5, 0xbb, + 0x5b, 0x1b, 0xa9, 0xbb, 0x5e, 0x4c, 0xd1, 0xbb, 0x89, 0xe7, 0xe1, 0xbb, 0xbd, 0xab, 0xbd, 0xbb, + 0x16, 0x08, 0xba, 0xbb, 0x5f, 0x81, 0x02, 0xbc, 0xbb, 0x32, 0xc0, 0xbb, 0x06, 0x0d, 0xb9, 0xbb, + 0xd1, 0x91, 0xb1, 0xbb, 0xa5, 0x6b, 0xd1, 0xbb, 0xfa, 0xd7, 0xc9, 0xbb, 0xc4, 0x03, 0x7b, 0xbb, + 0x6b, 0x4d, 0x26, 0xbb, 0xa2, 0x3a, 0x5f, 0xbb, 0x3a, 0x7d, 0xa2, 0xba, 0x66, 0xb3, 0x4b, 0xba, + 0xe1, 0xef, 0x52, 0xbb, 0x68, 0x77, 0x3d, 0xbb, 0x71, 0x4e, 0x78, 0xbb, 0x6b, 0x78, 0xa0, 0xba, + 0xb4, 0xd9, 0x3c, 0xbb, 0xf9, 0xf1, 0x3b, 0xbb, 0x64, 0x93, 0x2f, 0xbb, 0xea, 0x8e, 0x3e, 0xbb, + 0xd7, 0x2e, 0x7f, 0xba, 0xcb, 0x12, 0x9e, 0xbb, 0x6a, 0x5b, 0xa6, 0xba, 0xca, 0x76, 0x8e, 0xbb, + 0x73, 0x33, 0x04, 0xbb, 0x9b, 0x1e, 0xc3, 0xbb, 0x74, 0x05, 0x4a, 0xbb, 0x6b, 0xda, 0x37, 0xbb, + 0x9c, 0x02, 0x9e, 0x39, 0x42, 0x3e, 0x7d, 0xbb, 0x9d, 0xcd, 0x86, 0xbb, 0xd8, 0xd0, 0x62, 0xbb, + 0x30, 0xe5, 0xfc, 0xba, 0xce, 0x51, 0xc9, 0xbb, 0x28, 0xc2, 0x8f, 0xba, 0x93, 0x2b, 0x09, 0xba, + 0xef, 0x6a, 0xe8, 0xba, 0xce, 0x9c, 0x7a, 0xbb, 0x3b, 0x62, 0xab, 0xba, 0xc5, 0xa2, 0x78, 0xba, + 0x02, 0x49, 0xac, 0xbd, 0x76, 0xa4, 0x32, 0xbe, 0x9e, 0xc5, 0x05, 0xbe, 0x7a, 0x60, 0x5e, 0xbe, + 0xb8, 0x10, 0x3d, 0xbe, 0x10, 0x4b, 0xf1, 0xbd, 0x36, 0x56, 0x28, 0xbe, 0x28, 0x2e, 0xdc, 0xbd, + 0x0c, 0x49, 0x75, 0xbe, 0xc4, 0x04, 0x16, 0xbe, 0xee, 0x12, 0x62, 0xbe, 0xc0, 0x55, 0xa0, 0xbd, + 0x87, 0x96, 0x8d, 0xbd, 0xdb, 0x31, 0x7d, 0xbe, 0x54, 0x52, 0x23, 0xbe, 0x02, 0xa4, 0x4b, 0xbe, + 0xb2, 0xcf, 0x2e, 0xbe, 0xc6, 0x63, 0x5b, 0xbe, 0xe1, 0xf1, 0x07, 0xbe, 0x75, 0x32, 0xf0, 0xbd, + 0x56, 0x37, 0x2a, 0xbe, 0x46, 0x01, 0x62, 0xbe, 0x6c, 0x47, 0x28, 0xbe, 0x3a, 0xbd, 0x81, 0xbe, + 0x9e, 0x54, 0xfb, 0xbd, 0x34, 0x8e, 0x1c, 0xbe, 0xf3, 0x28, 0x2b, 0xbe, 0x52, 0xf9, 0x7e, 0xbd, + 0xf6, 0xd8, 0x0d, 0xbe, 0x97, 0x28, 0x7f, 0xbe, 0x7d, 0x72, 0x72, 0xbd, 0x98, 0xde, 0xf7, 0xbd, + 0x95, 0xc4, 0xf0, 0xbd, 0xb2, 0x15, 0xb7, 0xbd, 0x44, 0xeb, 0x22, 0x3d, 0xde, 0xc6, 0x95, 0xbd, + 0xe2, 0xbc, 0xff, 0xbd, 0xa8, 0xf9, 0xf8, 0xbc, 0x0a, 0x00, 0x76, 0xbd, 0x00, 0xf9, 0x61, 0xbd, + 0x88, 0xa6, 0x8e, 0xbc, 0x1a, 0x49, 0xb5, 0xbd, 0x5c, 0x6a, 0x7c, 0xbd, 0x4c, 0x38, 0x1d, 0xbe, + 0x08, 0x9e, 0x42, 0xbd, 0x75, 0x8a, 0x02, 0xbe, 0xab, 0xcc, 0x84, 0xbd, 0x95, 0xe1, 0xc9, 0xbd, + 0xae, 0x3f, 0x6a, 0xbd, 0xf2, 0x67, 0xcc, 0xbd, 0xf2, 0xa7, 0x92, 0xbd, 0xf8, 0x49, 0xec, 0xbd, + 0x54, 0xb4, 0x0f, 0xbd, 0x14, 0xd5, 0x52, 0xbd, 0x93, 0xd5, 0xd8, 0xbd, 0xf0, 0x38, 0x90, 0xbc, + 0xb0, 0xf7, 0x6e, 0xbc, 0xbc, 0x92, 0x84, 0xbd, 0x0c, 0x6a, 0x7a, 0xbd, 0x12, 0x05, 0xb1, 0xbd, + 0xb0, 0x0a, 0xb0, 0xbc, 0xde, 0x72, 0xb6, 0xbd, 0x2b, 0xe9, 0xa1, 0xbd, 0x46, 0x7e, 0xfd, 0xbb, + 0xd4, 0xb0, 0xa4, 0x3c, 0x7e, 0xdd, 0x45, 0x3d, 0xd7, 0x27, 0x61, 0xbc, 0x2d, 0xf4, 0x87, 0x3d, + 0x32, 0x55, 0x9c, 0x3d, 0x0d, 0xcf, 0xd6, 0x3c, 0xb0, 0xd0, 0xdb, 0x3c, 0x01, 0xc0, 0x31, 0x3d, + 0x2e, 0x3e, 0x24, 0x3d, 0xe6, 0x9c, 0x3e, 0x3d, 0x0e, 0xcb, 0x84, 0x3d, 0x68, 0x4f, 0x35, 0x3d, + 0x90, 0x85, 0xfd, 0x3b, 0xae, 0xd6, 0xb0, 0x3d, 0x54, 0x24, 0x25, 0x3d, 0x7a, 0x72, 0x74, 0x3d, + 0x9f, 0xd2, 0x03, 0x3d, 0x07, 0xad, 0x1a, 0x3d, 0x2e, 0xcb, 0x18, 0x3d, 0x80, 0x97, 0x67, 0x3d, + 0xcb, 0x62, 0x8c, 0x3d, 0x84, 0xa7, 0x9f, 0x3c, 0x6b, 0x6f, 0x81, 0x3d, 0xe9, 0x85, 0x33, 0x3d, + 0x65, 0x27, 0x2c, 0x3d, 0x99, 0x96, 0x0c, 0x3d, 0x6a, 0x91, 0x60, 0x3d, 0x62, 0x5a, 0x17, 0x3d, + 0x88, 0xcb, 0xc1, 0x3c, 0x26, 0x92, 0x5a, 0x3d, 0xfb, 0xec, 0x54, 0x3d, 0x74, 0x81, 0x5d, 0x3d, + 0x16, 0xfe, 0xad, 0x3b, 0x3c, 0xf1, 0xc0, 0x3c, 0x76, 0x9b, 0xbe, 0xbc, 0xc5, 0x7b, 0x51, 0xbc, + 0x6b, 0xfc, 0x34, 0x3d, 0x28, 0x90, 0x17, 0x3d, 0xb2, 0x26, 0x8d, 0x3c, 0x7c, 0x6f, 0xb2, 0x3b, + 0x8c, 0x8e, 0x5f, 0x3b, 0x46, 0x4d, 0x82, 0x3c, 0xbc, 0x8c, 0xc8, 0x3c, 0xef, 0x42, 0xe9, 0x3c, + 0xf1, 0x42, 0x7e, 0xbc, 0x89, 0x75, 0x42, 0x3d, 0xea, 0x63, 0xb7, 0x3c, 0x3a, 0x99, 0xfa, 0x3c, + 0xa4, 0x01, 0xe1, 0x3c, 0x5c, 0xa7, 0x3d, 0x3d, 0x1b, 0x75, 0xd6, 0x3c, 0x9b, 0x59, 0x84, 0x3c, + 0x54, 0xbd, 0xd8, 0xbb, 0xac, 0xdb, 0x3e, 0x3b, 0xb9, 0x3c, 0xdb, 0x3c, 0x48, 0x23, 0x05, 0x3c, + 0xd5, 0x8a, 0x0a, 0xbc, 0xd6, 0x8f, 0x51, 0x3d, 0xd0, 0x6a, 0xe1, 0xba, 0x96, 0x81, 0xb3, 0xbb, + 0x9a, 0x45, 0x7f, 0x3b, 0x4e, 0x41, 0x86, 0x3d, 0x7c, 0xc1, 0x49, 0x3b, 0x40, 0x04, 0x5b, 0xbc, + 0xf4, 0x29, 0x27, 0x3a, 0xaa, 0x61, 0xb2, 0xbb, 0x6d, 0xe4, 0xf8, 0x3a, 0xb6, 0x75, 0xe1, 0xbb, + 0x8e, 0x99, 0x1b, 0xbc, 0x57, 0x50, 0x9c, 0xbb, 0xe0, 0x09, 0x3d, 0xbb, 0x62, 0xf6, 0xa1, 0xbb, + 0x0a, 0xa9, 0xb7, 0xbb, 0x36, 0x1c, 0x9e, 0xbb, 0x5b, 0xc0, 0x11, 0xbc, 0xea, 0x0b, 0x41, 0xbb, + 0xef, 0x2e, 0x9d, 0x3a, 0xf2, 0x79, 0x35, 0xbc, 0x9e, 0x05, 0xa4, 0xbb, 0x50, 0xc9, 0xeb, 0xbb, + 0xe4, 0xb8, 0x8c, 0xbb, 0xbe, 0x74, 0x98, 0xbb, 0x01, 0x07, 0x94, 0xbb, 0x02, 0x5c, 0xb2, 0xbb, + 0x4e, 0xe2, 0x0f, 0xbc, 0x51, 0x5e, 0xb1, 0xba, 0xd8, 0xd8, 0xee, 0xbb, 0x08, 0x83, 0xd1, 0xbb, + 0x9c, 0xcc, 0xb0, 0xbb, 0x32, 0xd3, 0xb5, 0xbb, 0x04, 0xe3, 0xc7, 0xbb, 0xc2, 0x79, 0x1c, 0xbb, + 0x8a, 0x5f, 0x46, 0xbb, 0x0c, 0x31, 0x08, 0xbc, 0xcb, 0x33, 0xae, 0xbb, 0x94, 0x74, 0xeb, 0xbb, + 0xbd, 0xbe, 0x0a, 0x3b, 0x55, 0x88, 0x42, 0xbb, 0xa4, 0xde, 0x3a, 0x3b, 0x18, 0xda, 0x87, 0x3a, + 0x20, 0xdb, 0xcd, 0xbb, 0x0b, 0xa4, 0xbf, 0xbb, 0xf6, 0x36, 0xfc, 0xba, 0x82, 0x6c, 0x96, 0xba, + 0x7f, 0xc6, 0xfd, 0xba, 0x69, 0xf6, 0xe6, 0xba, 0xeb, 0x40, 0x9f, 0xbb, 0x69, 0x1e, 0xb3, 0xba, + 0x28, 0x00, 0x67, 0x3b, 0x09, 0x94, 0xeb, 0xbb, 0x73, 0x87, 0x52, 0xbb, 0xd2, 0x77, 0x8c, 0xbb, + 0x81, 0x88, 0x7a, 0xbb, 0x34, 0x70, 0xb4, 0xbb, 0x9b, 0x26, 0x5f, 0xbb, 0x95, 0x1a, 0xb8, 0xba, + 0x0c, 0xe8, 0xa5, 0xba, 0xb2, 0xa0, 0x9f, 0x39, 0x21, 0xe4, 0x6e, 0xbb, 0xb6, 0x0a, 0x39, 0xbb, + 0x18, 0xe2, 0xb5, 0xb9, 0xfc, 0x00, 0xed, 0xbb, 0x08, 0x8d, 0xe9, 0xb9, 0x5c, 0x2e, 0xf3, 0x3a, + 0x8b, 0xba, 0x89, 0xba, 0xd5, 0x2a, 0x1c, 0xbc, 0x4c, 0x31, 0xdf, 0xb9, 0x10, 0x29, 0xe7, 0xb9, + 0x8b, 0x77, 0x2d, 0xbb, 0xf2, 0x11, 0x45, 0xbb, 0x79, 0xa1, 0x68, 0x3a, 0xed, 0x7f, 0x7d, 0xbb, + 0xd8, 0xe5, 0x8f, 0xbb, 0x47, 0xa7, 0x86, 0xba, 0xe5, 0xdb, 0xea, 0xba, 0xd1, 0x72, 0x22, 0xbb, + 0xee, 0x8a, 0xe5, 0xba, 0xef, 0x05, 0x46, 0xbb, 0x8e, 0x9b, 0x4e, 0xbb, 0x45, 0x29, 0x7d, 0xbb, + 0x14, 0x40, 0xa7, 0xba, 0x6f, 0x47, 0x9c, 0xbb, 0x90, 0x92, 0x17, 0xbb, 0x47, 0x47, 0x65, 0xbb, + 0x61, 0x1e, 0xee, 0xba, 0x2e, 0xe4, 0x23, 0xbb, 0x70, 0x72, 0x14, 0xbb, 0xa8, 0xbd, 0x7b, 0xbb, + 0xe7, 0x33, 0x54, 0xbb, 0x0f, 0x77, 0xcb, 0xba, 0x24, 0x0b, 0x79, 0xbb, 0xc0, 0x92, 0xf0, 0xba, + 0x1d, 0x4d, 0xfc, 0xba, 0xe6, 0x4a, 0xe2, 0xba, 0x42, 0xed, 0x4a, 0xbb, 0xc7, 0xb5, 0x3d, 0xbb, + 0xf6, 0xd6, 0x9c, 0xba, 0x19, 0xbf, 0x2e, 0xbb, 0xfe, 0x95, 0x53, 0xbb, 0xd4, 0x4d, 0x18, 0xbb, + 0xd6, 0xfa, 0xf9, 0xba, 0xe3, 0xe8, 0xe7, 0xba, 0x2a, 0xbd, 0xb2, 0x3a, 0xd4, 0x89, 0xbd, 0x38, + 0xb7, 0x73, 0x36, 0xbb, 0xb0, 0x4e, 0xcd, 0xba, 0x7f, 0xed, 0xab, 0xba, 0x3e, 0x5e, 0x18, 0xba, + 0x84, 0x3e, 0xae, 0x38, 0xa5, 0x4e, 0xc3, 0xba, 0x86, 0xb7, 0x94, 0xba, 0xeb, 0x6a, 0x49, 0xbb, + 0x38, 0x76, 0xed, 0xb8, 0x01, 0x2f, 0x39, 0xbb, 0x55, 0xa1, 0xb9, 0xba, 0xc9, 0xf5, 0x05, 0xbb, + 0x19, 0x35, 0xcf, 0xba, 0xa6, 0xdf, 0x3f, 0xbb, 0x78, 0xfd, 0xdf, 0xba, 0x99, 0xd0, 0xee, 0xba, + 0xc9, 0x5a, 0x08, 0x3a, 0xa0, 0xa7, 0x3d, 0xba, 0x5c, 0xa4, 0x01, 0xbb, 0xd0, 0xec, 0x52, 0xb8, + 0xb2, 0x1f, 0x1d, 0x3a, 0x3d, 0x53, 0x28, 0xbb, 0x7f, 0x18, 0x8d, 0xb9, 0x3b, 0xb5, 0x4a, 0xba, + 0x96, 0xa5, 0x5a, 0xb9, 0xab, 0xb2, 0x56, 0xbb, 0x22, 0x55, 0x4d, 0xba, 0x5f, 0x68, 0x89, 0x3a, + 0xc7, 0x52, 0x77, 0x3d, 0x2d, 0x0b, 0x94, 0xbd, 0x09, 0x2a, 0x32, 0x3d, 0x6c, 0x67, 0x42, 0xbd, + 0x64, 0xcf, 0x15, 0xbe, 0x3a, 0x18, 0xe5, 0xbd, 0x42, 0x16, 0x20, 0xbd, 0x5f, 0x26, 0x60, 0xbd, + 0x57, 0xd4, 0xa4, 0xbd, 0xc0, 0xe5, 0x54, 0xbd, 0x6d, 0xb1, 0x0e, 0xbe, 0x14, 0xf3, 0x25, 0xbc, + 0x0f, 0x2b, 0x85, 0x3d, 0x2c, 0xc9, 0x31, 0xbe, 0x67, 0x24, 0x9c, 0xbd, 0x66, 0x15, 0xd6, 0xbd, + 0x08, 0xbe, 0x9d, 0xbd, 0xfd, 0xf6, 0xb6, 0xbd, 0x82, 0xde, 0x91, 0xbd, 0xbe, 0x65, 0x45, 0xbd, + 0x7e, 0x89, 0xd5, 0xbd, 0x80, 0x37, 0x8c, 0x3a, 0x46, 0xb7, 0xc5, 0xbd, 0x56, 0x5a, 0xcb, 0xbd, + 0xa0, 0xac, 0x79, 0xbd, 0x40, 0xac, 0x02, 0xbe, 0x9f, 0x03, 0x6f, 0xbd, 0xe2, 0xec, 0x7f, 0x3c, + 0x6b, 0xe5, 0x25, 0xbd, 0x3f, 0xc2, 0x36, 0xbe, 0x15, 0xa2, 0x37, 0xbd, 0x8d, 0xc0, 0xab, 0xbd, + 0x68, 0x9b, 0xbd, 0xbd, 0xbb, 0xc6, 0x78, 0xbd, 0xde, 0xab, 0xae, 0xbd, 0xb7, 0x11, 0x44, 0xbd, + 0x94, 0x26, 0x75, 0xbd, 0x7b, 0x8f, 0xbb, 0xbd, 0xa8, 0x49, 0xc7, 0xbd, 0x9a, 0x7a, 0x4a, 0xbd, + 0xce, 0x7e, 0x8b, 0xbd, 0x14, 0xaf, 0xb9, 0xbd, 0x98, 0x46, 0x93, 0xbd, 0xca, 0x71, 0x82, 0xbd, + 0x67, 0xab, 0x36, 0xbd, 0x80, 0xa7, 0xa4, 0xbd, 0xea, 0xfa, 0x70, 0xbd, 0x84, 0x77, 0xcc, 0xbd, + 0xb5, 0x41, 0xa6, 0xbd, 0x4a, 0xdc, 0xa3, 0xbd, 0xe2, 0xc6, 0x81, 0xbd, 0xa0, 0x6b, 0x91, 0xbd, + 0x29, 0x22, 0x6c, 0xbd, 0x44, 0xc4, 0xbb, 0xbd, 0xd4, 0x20, 0xb1, 0xbd, 0xf6, 0xa5, 0x7f, 0xbd, + 0xe6, 0x49, 0x46, 0xbd, 0xbc, 0x09, 0xf5, 0xbd, 0x3f, 0x15, 0x4b, 0xbd, 0x78, 0xa5, 0x13, 0xbd, + 0x89, 0x50, 0xa1, 0xbd, 0x80, 0xc9, 0xd2, 0xbd, 0xa3, 0x2a, 0x98, 0xbd, 0xa3, 0xa1, 0xa0, 0xbd, + 0xbe, 0xed, 0x5b, 0x3d, 0x20, 0xd7, 0x21, 0x3d, 0xac, 0x7a, 0x80, 0x3d, 0xea, 0x3d, 0x43, 0x3d, + 0x45, 0xa9, 0x34, 0x3d, 0x5a, 0x2b, 0x37, 0x3d, 0xca, 0x12, 0x3e, 0x3d, 0x82, 0xcf, 0x59, 0x3d, + 0x58, 0xd0, 0x79, 0x3d, 0x60, 0x74, 0x84, 0x3d, 0x32, 0x17, 0x6d, 0x3d, 0x93, 0x7b, 0x36, 0x3d, + 0xf3, 0x17, 0x0d, 0x3d, 0xec, 0xcd, 0x6c, 0x3d, 0xe8, 0x8c, 0x53, 0x3d, 0xdc, 0x56, 0x60, 0x3d, + 0x95, 0x33, 0x5e, 0x3d, 0xb6, 0x10, 0x54, 0x3d, 0x33, 0x0e, 0xf0, 0x3c, 0x66, 0x84, 0x2d, 0x3d, + 0x17, 0x2f, 0x3a, 0x3d, 0x43, 0x25, 0x80, 0x3d, 0xc0, 0xcc, 0x47, 0x3d, 0x4d, 0xaa, 0x36, 0x3d, + 0x34, 0xaa, 0x22, 0x3d, 0x91, 0x9d, 0x4a, 0x3d, 0x18, 0xdb, 0x44, 0x3d, 0x9c, 0xe5, 0x42, 0x3d, + 0xe6, 0xc9, 0x5a, 0x3d, 0x7d, 0xce, 0x5d, 0x3d, 0x43, 0x17, 0x80, 0x3d, 0xeb, 0x55, 0x56, 0x3d, + 0x54, 0x03, 0x86, 0x3c, 0x8e, 0x70, 0x73, 0x3c, 0x12, 0xe1, 0x37, 0x3c, 0x01, 0x5c, 0x01, 0x3c, + 0x58, 0xc5, 0x85, 0x3c, 0xf6, 0x5e, 0x8a, 0x3c, 0x72, 0x4e, 0xfd, 0x3c, 0x36, 0xcf, 0x16, 0x3c, + 0x48, 0x80, 0xac, 0x3c, 0x6e, 0xde, 0x0d, 0x3c, 0x63, 0x46, 0x97, 0x3c, 0xa7, 0x4e, 0x18, 0x3c, + 0x38, 0xe9, 0xfc, 0x3b, 0x10, 0xed, 0xbc, 0x3c, 0x66, 0xa3, 0xb3, 0x3c, 0xa8, 0x99, 0x9b, 0x3c, + 0x65, 0x51, 0x38, 0x3c, 0x41, 0x8a, 0xdf, 0x3c, 0xd7, 0xff, 0x34, 0x3a, 0xfb, 0xd5, 0xa8, 0x3c, + 0x1c, 0x55, 0x7f, 0x3b, 0x41, 0x7f, 0xbd, 0x3c, 0xcd, 0xcc, 0x4b, 0x3c, 0x3e, 0x1f, 0x93, 0x3c, + 0x42, 0xf7, 0x94, 0x3c, 0x4a, 0xd5, 0xd6, 0x3c, 0x4e, 0x01, 0x20, 0x3c, 0xa6, 0x72, 0x95, 0xbb, + 0x06, 0x76, 0x25, 0x3c, 0xc8, 0x43, 0x90, 0x3c, 0x78, 0xbc, 0x2a, 0x3c, 0x24, 0xb1, 0x8b, 0x3c, + 0xdf, 0x97, 0xbd, 0xbb, 0x48, 0x36, 0x98, 0xbb, 0x76, 0x94, 0xe9, 0xbb, 0xe8, 0xf0, 0xc2, 0xbb, + 0x93, 0x2c, 0xb2, 0xbb, 0xf4, 0x71, 0x93, 0xbb, 0x6b, 0x76, 0xac, 0xbb, 0x92, 0x75, 0xde, 0xbb, + 0x8e, 0xf0, 0x01, 0xbc, 0xe6, 0x0b, 0xe9, 0xbb, 0x76, 0x37, 0xec, 0xbb, 0xbc, 0xa3, 0xa4, 0xbb, + 0xf6, 0x3e, 0x85, 0xbb, 0x84, 0x22, 0xea, 0xbb, 0xdc, 0x86, 0xe1, 0xbb, 0x39, 0x47, 0xbf, 0xbb, + 0x42, 0x67, 0xc4, 0xbb, 0x48, 0xcb, 0xd3, 0xbb, 0x7f, 0xe6, 0x17, 0xbb, 0x24, 0x6e, 0xa4, 0xbb, + 0xcb, 0xd5, 0xa6, 0xbb, 0x78, 0x4e, 0xf5, 0xbb, 0xe0, 0xab, 0xa5, 0xbb, 0x27, 0x73, 0xb4, 0xbb, + 0x30, 0x1d, 0xac, 0xbb, 0x14, 0x6b, 0x9b, 0xbb, 0x73, 0x12, 0xc6, 0xbb, 0x14, 0x08, 0xbb, 0xbb, + 0xde, 0x0e, 0xc1, 0xbb, 0xe1, 0x99, 0xb6, 0xbb, 0x13, 0x56, 0xf3, 0xbb, 0x96, 0x78, 0xc7, 0xbb, + 0xd2, 0x82, 0x06, 0xbb, 0xfb, 0xda, 0x0e, 0xbb, 0x00, 0x94, 0x01, 0xbb, 0x27, 0x3d, 0x01, 0xbb, + 0x97, 0x4e, 0x2e, 0xbb, 0x3e, 0x08, 0xe1, 0xba, 0xef, 0x73, 0x73, 0xbb, 0xd1, 0xbe, 0x1c, 0xbb, + 0x58, 0x07, 0x81, 0xbb, 0xbf, 0xf8, 0xc5, 0xba, 0xb8, 0x1b, 0x56, 0xbb, 0x9e, 0x75, 0xc4, 0xba, + 0x17, 0xa6, 0xb6, 0xba, 0x03, 0x86, 0x70, 0xbb, 0x4c, 0x24, 0x80, 0xbb, 0xfe, 0x17, 0x14, 0xbb, + 0xe6, 0x05, 0xde, 0xba, 0xd1, 0x8e, 0x83, 0xbb, 0xde, 0x79, 0x0e, 0x3a, 0x3a, 0x4d, 0x3a, 0xbb, + 0x98, 0x3c, 0x75, 0xba, 0x1c, 0x25, 0x68, 0xbb, 0xb9, 0x5d, 0xba, 0xba, 0x38, 0x55, 0x3a, 0xbb, + 0x0f, 0x23, 0x4b, 0xbb, 0xae, 0x84, 0x1e, 0xbb, 0x43, 0x2d, 0x11, 0xbb, 0x94, 0x5a, 0x01, 0xba, + 0x31, 0x7a, 0xcc, 0xba, 0x65, 0x72, 0xfb, 0xba, 0x7e, 0x13, 0x11, 0xbb, 0x38, 0xc2, 0x27, 0xbb, + 0xfe, 0xad, 0x5b, 0xbb, 0xc7, 0x45, 0x17, 0xbb, 0x9e, 0x23, 0x6e, 0xbb, 0x57, 0xf6, 0x22, 0xbb, + 0xa6, 0x64, 0x20, 0xbb, 0xc5, 0x18, 0x44, 0xbb, 0x0c, 0x00, 0x43, 0xbb, 0xe7, 0x23, 0x31, 0xbb, + 0x25, 0x09, 0x4f, 0xbb, 0x2b, 0xe2, 0x7a, 0xbb, 0x88, 0xac, 0x4d, 0xbb, 0xd5, 0x71, 0x2b, 0xbb, + 0x29, 0x17, 0xfe, 0xba, 0x72, 0x01, 0x53, 0xbb, 0x50, 0x3a, 0x2d, 0xbb, 0xb1, 0x17, 0x64, 0xbb, + 0x75, 0xc6, 0x54, 0xbb, 0x05, 0x47, 0x40, 0xbb, 0x55, 0xaa, 0x0c, 0xbb, 0xc6, 0xc8, 0x24, 0xbb, + 0xd6, 0x7c, 0x2b, 0xbb, 0x99, 0xf7, 0x6a, 0xbb, 0x85, 0x28, 0x4c, 0xbb, 0x50, 0x0f, 0x23, 0xbb, + 0x4d, 0x94, 0x07, 0xbb, 0xde, 0xc5, 0x66, 0xbb, 0xcc, 0x44, 0x24, 0xbb, 0xf5, 0x0b, 0x20, 0xbb, + 0xdc, 0x08, 0x51, 0xbb, 0xc9, 0x13, 0x67, 0xbb, 0xe7, 0x53, 0x62, 0xbb, 0x60, 0x4d, 0x48, 0xbb, + 0x11, 0xaf, 0xc2, 0xba, 0xfa, 0xf9, 0x8c, 0xba, 0x50, 0xb2, 0x8a, 0xba, 0x08, 0x90, 0x02, 0xba, + 0xbe, 0xbe, 0x8a, 0xba, 0xc1, 0xd1, 0xd1, 0xba, 0x98, 0x43, 0x10, 0xbb, 0xf8, 0x36, 0x04, 0xba, + 0xc2, 0x5e, 0x98, 0xba, 0xea, 0xa8, 0x86, 0xba, 0xdb, 0x05, 0x99, 0xba, 0x05, 0xae, 0x5f, 0xba, + 0x92, 0xd8, 0x1d, 0xba, 0xdf, 0x43, 0xc2, 0xba, 0x80, 0xaf, 0x97, 0xba, 0xee, 0xb8, 0xdd, 0xba, + 0x58, 0xc1, 0x8f, 0xba, 0x1c, 0x15, 0xe0, 0xba, 0x67, 0x9f, 0x3b, 0xba, 0x7f, 0x02, 0xbb, 0xba, + 0x76, 0x3a, 0x0d, 0xba, 0x5e, 0x77, 0xd3, 0xba, 0x27, 0x28, 0xaa, 0xba, 0x8a, 0x8d, 0x97, 0xba, + 0x4a, 0x11, 0x82, 0xba, 0x22, 0x9d, 0x1a, 0xbb, 0xe7, 0x23, 0x1b, 0xba, 0xba, 0x0c, 0x2d, 0x39, + 0x16, 0x31, 0x86, 0xba, 0x02, 0xad, 0xde, 0xba, 0x4a, 0x1e, 0x5e, 0xba, 0xcc, 0x6b, 0xa9, 0xba, + 0xed, 0x31, 0x85, 0xbd, 0x9d, 0x4c, 0x7c, 0xbd, 0x73, 0x1f, 0xa6, 0xbd, 0xac, 0xfe, 0x9d, 0xbd, + 0x26, 0xea, 0x9b, 0xbd, 0x68, 0xad, 0x40, 0xbd, 0x9b, 0x07, 0x9d, 0xbd, 0x6c, 0x36, 0xba, 0xbd, + 0xbe, 0x93, 0xee, 0xbd, 0x52, 0x3d, 0x98, 0xbd, 0x17, 0x8b, 0xcc, 0xbd, 0xb3, 0x5e, 0x6b, 0xbd, + 0x13, 0xec, 0x4d, 0xbd, 0x81, 0x05, 0xd0, 0xbd, 0xfa, 0x8c, 0xda, 0xbd, 0xa2, 0x70, 0x87, 0xbd, + 0x37, 0x8f, 0x87, 0xbd, 0x47, 0x74, 0xc8, 0xbd, 0x9e, 0xf9, 0x86, 0xbb, 0xaf, 0xc1, 0x90, 0xbd, + 0xc2, 0xaf, 0x5a, 0xbd, 0x27, 0x27, 0xcf, 0xbd, 0x3b, 0xee, 0x51, 0xbd, 0x2e, 0x6e, 0xa0, 0xbd, + 0x9c, 0xa5, 0xa7, 0xbd, 0x00, 0x7f, 0x4f, 0xbd, 0x3a, 0xec, 0xa4, 0xbd, 0xe9, 0xd6, 0x78, 0xbd, + 0xcc, 0x5b, 0x83, 0xbd, 0xda, 0xc9, 0x6f, 0xbd, 0xb7, 0x0e, 0xb8, 0xbd, 0xa9, 0x12, 0x9f, 0xbd, + 0x7c, 0x8c, 0x9e, 0xbd, 0x2e, 0x03, 0x82, 0xbc, 0x97, 0x56, 0x25, 0xbc, 0x41, 0x8d, 0x22, 0xbd, + 0xda, 0x86, 0x3e, 0xbd, 0x33, 0x74, 0x12, 0xbd, 0xfa, 0xe8, 0x30, 0xbd, 0x0d, 0x8f, 0x89, 0xbc, + 0x1a, 0xfd, 0xbc, 0x3b, 0x53, 0x15, 0x49, 0xbd, 0x1e, 0x79, 0x06, 0xbd, 0xe4, 0xb9, 0x46, 0xbd, + 0xea, 0xaf, 0x84, 0xbb, 0x41, 0x14, 0x2e, 0xbd, 0xba, 0xf3, 0x0d, 0xbd, 0x5c, 0x18, 0x25, 0xbd, + 0xfc, 0xbf, 0x30, 0xbd, 0x66, 0x2c, 0x05, 0xbc, 0x80, 0x4c, 0xd8, 0xbc, 0x5d, 0xe8, 0x4a, 0xbd, + 0xc1, 0x2c, 0x29, 0xbd, 0x18, 0x21, 0xf3, 0xbc, 0x32, 0xfc, 0x13, 0xbd, 0x78, 0x5d, 0x98, 0x3a, + 0xb9, 0x17, 0x6e, 0x3c, 0xaa, 0x19, 0x4b, 0xbc, 0x4c, 0x1a, 0xba, 0xbc, 0x76, 0x65, 0x90, 0xbc, + 0x27, 0x4b, 0xd4, 0xbc, 0x44, 0x72, 0x82, 0xbd, 0x80, 0x74, 0x18, 0xbd, 0x8e, 0xdf, 0x32, 0xbd, + 0xc4, 0x60, 0xd3, 0x3c, 0x41, 0x81, 0x12, 0x3c, 0x23, 0x83, 0x1d, 0x3c, 0x2b, 0x27, 0xb9, 0x3c, + 0xd7, 0xe0, 0x07, 0x3d, 0x34, 0xd5, 0xa1, 0x3c, 0x6c, 0xde, 0x68, 0x3c, 0xbe, 0xf6, 0xed, 0x3c, + 0x8c, 0xb3, 0x73, 0x3c, 0xf6, 0x49, 0x0f, 0x3d, 0x94, 0x0b, 0x0a, 0x3d, 0x52, 0x72, 0xaf, 0x3c, + 0xc2, 0xd6, 0xcf, 0xbb, 0xbe, 0xe5, 0x24, 0x3d, 0xb3, 0x59, 0xa5, 0x3c, 0xd1, 0x78, 0xb7, 0x3c, + 0x79, 0x8b, 0x90, 0x3c, 0xb9, 0xf2, 0x59, 0x3c, 0x7d, 0x67, 0x05, 0x3c, 0xde, 0x03, 0xd8, 0x3c, + 0x28, 0x6a, 0xd8, 0x3c, 0x8f, 0x71, 0x80, 0x3c, 0x1a, 0xf7, 0xce, 0x3c, 0xee, 0x3a, 0x2e, 0x3c, + 0x26, 0xd3, 0x50, 0x3c, 0xd4, 0xc1, 0x1b, 0x3c, 0x85, 0x43, 0xa6, 0x3c, 0xb4, 0x65, 0xa2, 0x3c, + 0x3b, 0x48, 0x8f, 0x3c, 0x3a, 0xbb, 0xb1, 0x3c, 0x02, 0x9a, 0x26, 0x3d, 0x7a, 0xd3, 0x2f, 0x3d, + 0xe6, 0xda, 0xb0, 0x3b, 0xfc, 0x6f, 0x36, 0x3b, 0x1a, 0xbe, 0xfe, 0xbb, 0x7c, 0x6f, 0x09, 0xbc, + 0x80, 0xfd, 0x51, 0x3c, 0x26, 0xc6, 0x80, 0x3c, 0x14, 0xbc, 0x37, 0x3c, 0x58, 0x7f, 0xe8, 0x3b, + 0x38, 0x29, 0x84, 0xba, 0x9c, 0x48, 0x94, 0xba, 0x47, 0x5d, 0xeb, 0x3b, 0xc2, 0xab, 0x2c, 0x3c, + 0xb8, 0x8a, 0x30, 0x3a, 0x90, 0xca, 0xf2, 0x3b, 0xae, 0xce, 0xc5, 0x3c, 0x1b, 0x52, 0x81, 0x3b, + 0x44, 0x00, 0x26, 0x3c, 0x75, 0x5e, 0xc6, 0x3b, 0x3d, 0x2e, 0x07, 0xbc, 0xfa, 0xd4, 0x3f, 0x3c, + 0x40, 0xe2, 0xf8, 0x3a, 0xb0, 0x64, 0xdf, 0xbb, 0xce, 0xba, 0xc3, 0xba, 0x64, 0x59, 0x2d, 0xbc, + 0x6c, 0x3a, 0x62, 0x3b, 0x52, 0x5d, 0x53, 0x3c, 0x50, 0x6b, 0xb9, 0x3a, 0x88, 0x4e, 0x16, 0xbb, + 0xb4, 0xf7, 0x44, 0x3a, 0x9d, 0x9b, 0x8a, 0x3c, 0xb3, 0xc5, 0x4e, 0x3c, 0x94, 0xdc, 0x33, 0x3b, + 0xda, 0x70, 0xcc, 0xba, 0x5a, 0x6b, 0x79, 0xba, 0xb6, 0xf3, 0x3b, 0xba, 0x25, 0x01, 0xd8, 0xba, + 0xee, 0x3e, 0x85, 0xbb, 0xa9, 0x99, 0x27, 0xbb, 0x7f, 0x3b, 0xa4, 0xba, 0xe0, 0x0f, 0x88, 0xbb, + 0x1a, 0x84, 0x1b, 0xbb, 0x1f, 0xf1, 0x68, 0xbb, 0x20, 0x90, 0x8d, 0xbb, 0x32, 0x15, 0x0b, 0xbb, + 0x3e, 0xfb, 0x89, 0x3a, 0x6a, 0x98, 0xa3, 0xbb, 0xed, 0xd5, 0x49, 0xbb, 0x32, 0x0c, 0x11, 0xbb, + 0x54, 0xc6, 0xe0, 0xba, 0x6d, 0xd9, 0x04, 0xbb, 0x50, 0x8b, 0x95, 0x38, 0xfa, 0x5d, 0x3d, 0xbb, + 0x70, 0xb1, 0x2f, 0xbb, 0xc1, 0xea, 0x85, 0xba, 0x26, 0x8a, 0x24, 0xbb, 0xbd, 0xd0, 0x90, 0xba, + 0x1a, 0x5a, 0x28, 0xbb, 0x58, 0x4b, 0xd9, 0xba, 0x4c, 0x86, 0x1a, 0xbb, 0x10, 0xdb, 0x14, 0xbb, + 0xff, 0x93, 0xeb, 0xba, 0xf8, 0x93, 0x01, 0xbb, 0xc8, 0xf1, 0xb3, 0xbb, 0xcd, 0xc9, 0xa7, 0xbb, + 0x72, 0x16, 0xfc, 0x39, 0x13, 0xf9, 0xaf, 0xb9, 0x4d, 0xee, 0x85, 0x3a, 0x51, 0x30, 0xbe, 0x3a, + 0x1d, 0x15, 0x05, 0xbb, 0x38, 0x27, 0x0d, 0xbb, 0xa4, 0xd9, 0x79, 0xba, 0x9f, 0x40, 0x00, 0xbb, + 0xb4, 0xb5, 0x4d, 0xba, 0x00, 0x03, 0x6f, 0x38, 0x2b, 0xa7, 0xda, 0xba, 0x7c, 0x96, 0x87, 0xba, + 0x36, 0x13, 0xb8, 0x39, 0x84, 0xd3, 0xdf, 0xba, 0x1e, 0xcd, 0x63, 0xbb, 0xec, 0x12, 0xc1, 0xb9, + 0x92, 0x9b, 0x7c, 0xba, 0x0f, 0xb0, 0xaa, 0xba, 0xe4, 0x36, 0xe0, 0x3a, 0x58, 0x93, 0xba, 0xba, + 0xd8, 0x16, 0x70, 0xb9, 0x38, 0xf3, 0xa0, 0x3a, 0xc8, 0x1a, 0x2d, 0x39, 0x85, 0x3f, 0x85, 0x3a, + 0xc6, 0xe3, 0xd6, 0xba, 0xde, 0xe3, 0x02, 0xbb, 0xc2, 0x46, 0xf6, 0xb9, 0xb0, 0x0f, 0x7a, 0xb8, + 0x80, 0x66, 0x01, 0xb9, 0xf4, 0x8e, 0xc4, 0xba, 0x07, 0x09, 0x30, 0xbb, 0x67, 0x7b, 0x90, 0xba, + 0x26, 0x8d, 0x0f, 0xbb, 0xfc, 0x1e, 0x14, 0xba, 0x62, 0x12, 0x24, 0xba, 0x97, 0x87, 0xd3, 0xba, + 0x50, 0xab, 0xf3, 0xba, 0x21, 0x74, 0x95, 0xba, 0xd7, 0xd6, 0x91, 0xba, 0xf5, 0xd5, 0xa7, 0xba, + 0x16, 0xa7, 0xf8, 0xb9, 0xb7, 0x85, 0x0c, 0xbb, 0xe3, 0x9a, 0xe1, 0xba, 0xf5, 0xfc, 0xc1, 0xba, + 0xa0, 0x77, 0x3b, 0x39, 0x2e, 0x4a, 0x0b, 0xbb, 0x46, 0x5b, 0x87, 0xba, 0xfa, 0xaa, 0xbf, 0xba, + 0xb7, 0x64, 0xa4, 0xba, 0xe5, 0x71, 0x12, 0xba, 0x25, 0x70, 0x5e, 0xba, 0xaf, 0xa4, 0xda, 0xba, + 0xdd, 0xf6, 0xd8, 0xba, 0xff, 0x11, 0x99, 0xba, 0x1a, 0x8d, 0xcd, 0xba, 0x0d, 0x21, 0x06, 0xba, + 0xe4, 0xf7, 0x49, 0xb9, 0xd2, 0xae, 0xd2, 0xb9, 0x63, 0x11, 0x93, 0xba, 0xa4, 0xb3, 0x8b, 0xba, + 0x90, 0x59, 0x8d, 0xba, 0x5d, 0x0f, 0xda, 0xba, 0xec, 0x46, 0x02, 0xbb, 0x69, 0xf2, 0x16, 0xbb, + 0x12, 0x5f, 0x99, 0xba, 0x27, 0xcf, 0x86, 0xb9, 0x13, 0x57, 0x7f, 0x39, 0xb6, 0xed, 0x86, 0xb8, + 0x22, 0x85, 0x5c, 0xba, 0x60, 0x03, 0x76, 0xba, 0x02, 0x5f, 0x7c, 0xba, 0xac, 0xb6, 0x3f, 0xb9, + 0xa0, 0xb3, 0xa7, 0x39, 0x11, 0x79, 0xb1, 0xb9, 0xe7, 0x38, 0xcf, 0xb9, 0x02, 0x66, 0x75, 0xba, + 0x23, 0x62, 0x34, 0xb9, 0x29, 0x35, 0xfd, 0xb9, 0x76, 0x52, 0xa1, 0xba, 0x4a, 0x82, 0x0d, 0xba, + 0xe4, 0x50, 0x66, 0xba, 0x66, 0xbc, 0x4d, 0xb9, 0xa8, 0x3e, 0xf2, 0xb7, 0xc2, 0x20, 0x75, 0xba, + 0x6d, 0x1b, 0xe1, 0xb9, 0x5b, 0x60, 0x34, 0xb8, 0x3a, 0x8a, 0x71, 0xb9, 0x35, 0xef, 0x0f, 0x3a, + 0xec, 0xa4, 0x8e, 0x39, 0xcd, 0xd3, 0x15, 0xba, 0x78, 0x9f, 0x3a, 0xb9, 0x92, 0xe5, 0x27, 0x38, + 0xc6, 0x2f, 0x7d, 0xb9, 0xdf, 0xc2, 0xba, 0xba, 0x9e, 0x66, 0x19, 0xba, 0xac, 0x3b, 0x9a, 0xb9, + 0x57, 0x74, 0xec, 0x3b, 0x74, 0x3b, 0x2c, 0xbc, 0xcd, 0x0a, 0xc3, 0x3b, 0x30, 0x7f, 0xfc, 0x3b, + 0xa1, 0x51, 0x69, 0xbd, 0x92, 0x7a, 0x34, 0xbd, 0xb7, 0xb7, 0x65, 0xbc, 0xf0, 0x28, 0x84, 0xbd, + 0xbc, 0x69, 0x17, 0xbd, 0xf8, 0xc7, 0xdd, 0xbc, 0x2a, 0x53, 0x72, 0xbd, 0xab, 0x8e, 0xbf, 0xbc, + 0x06, 0xab, 0x89, 0x3c, 0xd1, 0x9c, 0x84, 0xbd, 0xda, 0x4c, 0x85, 0xbd, 0x4c, 0x5d, 0x9b, 0xbc, + 0x26, 0x7a, 0x9e, 0xbc, 0xff, 0x17, 0x11, 0xbd, 0x18, 0x4c, 0xda, 0x3c, 0xc9, 0x03, 0x14, 0xbd, + 0x26, 0x95, 0xb7, 0xbc, 0x74, 0x94, 0x1f, 0x3c, 0x52, 0x2d, 0x8e, 0xbc, 0x20, 0x39, 0xa8, 0xba, + 0xea, 0xcc, 0x50, 0xbd, 0x1a, 0xbb, 0x1b, 0xbd, 0xb7, 0x10, 0xd6, 0xbc, 0x49, 0xb9, 0xb2, 0xbc, + 0x8e, 0xe1, 0x77, 0xbc, 0x55, 0xfa, 0xbd, 0xbc, 0x9d, 0x5a, 0xa9, 0xbd, 0x47, 0x07, 0x72, 0xbd, + 0x25, 0x97, 0x0a, 0xc0, 0x6c, 0xcf, 0x30, 0xc0, 0x6d, 0x42, 0x8e, 0xbe, 0xd7, 0x8f, 0xb6, 0xbf, + 0x4d, 0x71, 0x2b, 0xc0, 0x19, 0x12, 0xa7, 0xbf, 0x7a, 0xd8, 0xf2, 0xbf, 0xa8, 0xe4, 0xda, 0xbf, + 0xc3, 0x92, 0xe8, 0xbf, 0x50, 0x27, 0x12, 0xc0, 0xa4, 0xed, 0xe4, 0xbf, 0x4f, 0x0d, 0x58, 0xc0, + 0x52, 0x2a, 0xe9, 0xbf, 0x69, 0xd4, 0x4e, 0xc0, 0xa5, 0xab, 0xc9, 0xbf, 0xfe, 0x08, 0x3d, 0xc0, + 0xb3, 0x05, 0xc5, 0xbf, 0xaa, 0xde, 0x65, 0xc0, 0xe6, 0x4b, 0x04, 0xc0, 0x18, 0xe7, 0x23, 0xc0, + 0x02, 0x01, 0x24, 0xbf, 0x18, 0x65, 0xff, 0xbf, 0x6e, 0xe4, 0x40, 0xc0, 0x71, 0x59, 0xca, 0xbf, + 0x91, 0x25, 0xd4, 0xbf, 0x84, 0x76, 0x55, 0xc0, 0x29, 0xcf, 0xd6, 0xbf, 0x8f, 0x72, 0x0c, 0xc0, + 0x62, 0xfb, 0x93, 0xbf, 0x5e, 0x1c, 0x05, 0xc0, 0x3f, 0x90, 0x07, 0xc0, 0x63, 0x20, 0x93, 0xbe, + 0x5b, 0xc5, 0x2c, 0x3f, 0xd6, 0x18, 0xcd, 0x3f, 0x9a, 0xcf, 0x02, 0x3f, 0xba, 0xf6, 0xe2, 0x3f, + 0x39, 0xbc, 0xd0, 0x3f, 0x2e, 0x74, 0x57, 0x3f, 0xb2, 0xc4, 0x86, 0x3f, 0x11, 0x49, 0x90, 0x3f, + 0xc0, 0x95, 0xce, 0x3f, 0x41, 0xa3, 0x9d, 0x3f, 0xc6, 0x22, 0xcb, 0x3f, 0x7a, 0x63, 0x9c, 0x3f, + 0x52, 0xfa, 0x7e, 0x3f, 0x76, 0xde, 0xf3, 0x3f, 0x76, 0x58, 0xa5, 0x3f, 0x14, 0x86, 0xdf, 0x3f, + 0x9a, 0x19, 0x9c, 0x3f, 0x47, 0x36, 0xb8, 0x3f, 0x1f, 0xd8, 0x96, 0x3f, 0x49, 0x0a, 0xaa, 0x3f, + 0xea, 0xdc, 0xd5, 0x3f, 0x82, 0xd3, 0x8f, 0x3f, 0x78, 0x86, 0xd1, 0x3f, 0x69, 0x8e, 0xc4, 0x3f, + 0xcc, 0xab, 0xab, 0x3f, 0x26, 0xe8, 0xb6, 0x3f, 0xa6, 0x38, 0xc9, 0x3f, 0x45, 0x05, 0x93, 0x3f, + 0x09, 0x94, 0x80, 0x3f, 0x00, 0x62, 0xcd, 0x3f, 0x86, 0x9b, 0x93, 0x3f, 0x54, 0xb6, 0x73, 0x3f, + 0x69, 0x1c, 0x85, 0x3e, 0x76, 0xcb, 0x3f, 0x3f, 0xe0, 0x45, 0xfc, 0xbd, 0xc0, 0xfd, 0xb1, 0x3d, + 0xce, 0x4a, 0x78, 0x3f, 0xe2, 0x73, 0x38, 0x3f, 0xfc, 0x38, 0x22, 0x3f, 0xb2, 0x0a, 0xab, 0x3d, + 0xde, 0x77, 0xfb, 0x3e, 0xc4, 0x50, 0x12, 0x3f, 0x46, 0x57, 0x34, 0x3f, 0xce, 0xd7, 0x08, 0x3f, + 0xbd, 0xe8, 0x81, 0xbe, 0x17, 0x87, 0xaa, 0x3f, 0x54, 0x94, 0x54, 0x3e, 0x1c, 0x2a, 0x75, 0x3f, + 0x00, 0xc8, 0x0e, 0x3f, 0x55, 0xdd, 0xb5, 0x3f, 0x01, 0x64, 0x63, 0x3f, 0x77, 0x9f, 0xd0, 0x3e, + 0x1c, 0x13, 0x15, 0xbe, 0xc6, 0xd7, 0x30, 0x3f, 0x16, 0x37, 0x69, 0x3f, 0xb4, 0xf0, 0x55, 0x3f, + 0xc0, 0x85, 0xaa, 0x3c, 0xfd, 0x0d, 0xa8, 0x3f, 0x94, 0x90, 0xbc, 0x3d, 0xdf, 0x3c, 0x14, 0xbe, + 0xed, 0x64, 0x81, 0x3e, 0x15, 0xfc, 0xb0, 0x3f, 0x3d, 0xec, 0xef, 0xbd, 0x68, 0x0e, 0xc5, 0xbd, + 0x2f, 0x3c, 0x2c, 0xbd, 0x33, 0x2d, 0x3e, 0xbe, 0x79, 0xc8, 0x7a, 0xbd, 0x86, 0x1f, 0x64, 0xbe, + 0x66, 0xb4, 0x50, 0xbe, 0xa6, 0x38, 0xf3, 0xbd, 0x5f, 0x68, 0x01, 0xbe, 0x14, 0x18, 0xee, 0xbd, + 0x7b, 0x07, 0x53, 0xbe, 0xd4, 0x7a, 0x0c, 0xbe, 0x9c, 0xc8, 0x5a, 0xbe, 0x42, 0x9d, 0xc7, 0xbd, + 0xff, 0xa2, 0x9b, 0xbd, 0x70, 0x8b, 0x7a, 0xbe, 0xcc, 0x43, 0x1b, 0xbe, 0x53, 0x71, 0x58, 0xbe, + 0x45, 0x56, 0x23, 0xbe, 0xfb, 0x33, 0x2b, 0xbe, 0xde, 0xee, 0x1b, 0xbe, 0xe8, 0x7e, 0x09, 0xbe, + 0x8c, 0x50, 0x63, 0xbe, 0xce, 0xda, 0x0b, 0xbe, 0x8d, 0x32, 0x43, 0xbe, 0x1e, 0xb2, 0x60, 0xbe, + 0xa4, 0x7f, 0x16, 0xbe, 0xf8, 0xdc, 0x2c, 0xbe, 0xb0, 0xe5, 0x3c, 0xbe, 0x08, 0x37, 0xbc, 0xbd, + 0x01, 0x8b, 0xff, 0xbd, 0xc4, 0x42, 0x76, 0xbe, 0x30, 0xa9, 0xc5, 0xbd, 0x14, 0x7e, 0x03, 0xbe, + 0x70, 0x95, 0x12, 0xba, 0x3c, 0xa2, 0xcd, 0xbd, 0x08, 0x81, 0x0f, 0x3b, 0x86, 0xce, 0x5d, 0xbd, + 0x8b, 0x08, 0x0d, 0xbe, 0x9d, 0x6b, 0xda, 0xbd, 0xd1, 0xf6, 0xac, 0xbd, 0x84, 0xbb, 0x42, 0xbc, + 0x1c, 0x1c, 0xc0, 0xbd, 0xa8, 0xca, 0x91, 0xbd, 0x4e, 0x69, 0x00, 0xbe, 0xc6, 0x77, 0xeb, 0xbc, + 0x4f, 0x9e, 0x48, 0x3d, 0x24, 0xdf, 0x3f, 0xbe, 0x3f, 0xfc, 0x30, 0xbd, 0x81, 0xb0, 0x07, 0xbe, + 0x93, 0x23, 0xbf, 0xbd, 0x39, 0x53, 0x29, 0xbe, 0x8b, 0x6d, 0xfc, 0xbd, 0xae, 0xc0, 0x2c, 0xbd, + 0x66, 0x44, 0x25, 0xbd, 0xd1, 0x0f, 0xbf, 0xbd, 0x05, 0xba, 0xf1, 0xbd, 0xdf, 0x06, 0x19, 0xbe, + 0x30, 0xb1, 0x74, 0xbc, 0x3e, 0xfb, 0x20, 0xbe, 0xc6, 0x64, 0x15, 0xbd, 0xbf, 0x54, 0x19, 0x3d, + 0xa2, 0x26, 0x4b, 0xbd, 0xd5, 0x8a, 0x5f, 0xbe, 0x82, 0x03, 0xfa, 0x3c, 0x02, 0x3d, 0xc1, 0xbc, + 0x66, 0x2e, 0x6e, 0xbd, 0x72, 0x2d, 0xc6, 0xbd, 0xcc, 0x85, 0xcf, 0xbc, 0xf4, 0xa2, 0xb6, 0xbd, + 0x14, 0xd9, 0xbf, 0xbd, 0x74, 0x7c, 0x35, 0xbd, 0xc7, 0x65, 0x81, 0xbd, 0x04, 0xce, 0x8e, 0xbd, + 0xa9, 0x65, 0xad, 0xbd, 0x50, 0xe6, 0x9d, 0xbd, 0x1b, 0xe5, 0xa4, 0xbd, 0x67, 0x60, 0xc3, 0xbd, + 0x8d, 0x2a, 0x91, 0xbd, 0x69, 0x86, 0xde, 0xbd, 0xf0, 0xab, 0x93, 0xbd, 0xa3, 0x6a, 0xd2, 0xbd, + 0xd0, 0xeb, 0x83, 0xbd, 0x64, 0xc0, 0xc4, 0xbd, 0x5a, 0x53, 0x8a, 0xbd, 0xfb, 0x01, 0xb3, 0xbd, + 0x6e, 0xe9, 0x99, 0xbd, 0x7c, 0xb7, 0x88, 0xbd, 0x00, 0x45, 0xcd, 0xbd, 0xe6, 0x7f, 0x96, 0xbd, + 0xd0, 0x0a, 0x9f, 0xbd, 0xe7, 0xfa, 0xbd, 0xbd, 0x7e, 0xa8, 0xaf, 0xbd, 0xda, 0xd2, 0xa6, 0xbd, + 0x3a, 0x47, 0x5c, 0xbd, 0x3f, 0xd3, 0x9e, 0xbd, 0x34, 0xdd, 0xa3, 0xbd, 0x47, 0xc5, 0x2a, 0xbd, + 0x7a, 0x35, 0x19, 0xbd, 0xba, 0xa2, 0x5d, 0xbd, 0xf9, 0xea, 0xd0, 0x3b, 0xf8, 0x70, 0x9f, 0xbb, + 0x73, 0x5a, 0x78, 0xbd, 0x6a, 0xaf, 0x1c, 0xbd, 0xa3, 0xf1, 0x2c, 0xbd, 0xc8, 0xb6, 0x8f, 0xbc, + 0xef, 0xb0, 0xe9, 0xbc, 0xa0, 0xa1, 0x34, 0xbd, 0x9a, 0x0b, 0x15, 0xbd, 0x7e, 0x01, 0x7a, 0xbd, + 0xcd, 0x8b, 0x09, 0xbc, 0x1d, 0xda, 0xa3, 0xbd, 0xd2, 0x9c, 0x92, 0xbc, 0xd1, 0xa9, 0x81, 0xbd, + 0xaa, 0x4e, 0x00, 0xbd, 0xa2, 0xdf, 0xc2, 0xbd, 0x83, 0x36, 0x59, 0xbd, 0x7e, 0x66, 0x29, 0xbd, + 0x10, 0x58, 0x80, 0x3c, 0x2e, 0xc9, 0x38, 0xbd, 0x76, 0xef, 0x82, 0xbd, 0x4b, 0xa9, 0x1d, 0xbd, + 0xf6, 0xb1, 0x3e, 0xbc, 0x2d, 0x19, 0xb2, 0xbd, 0x00, 0xe0, 0x40, 0xbc, 0x96, 0x88, 0x89, 0xbc, + 0x12, 0x9f, 0x84, 0xbc, 0x50, 0x1b, 0x88, 0xbd, 0xad, 0x20, 0x87, 0xbc, 0x83, 0x9e, 0x2f, 0x3c, + 0x05, 0x09, 0x7e, 0x3d, 0xf6, 0xce, 0x21, 0xc0, 0x3c, 0x3e, 0x18, 0xbf, 0x7d, 0x91, 0x30, 0xc0, + 0x87, 0x0c, 0x4b, 0xc0, 0x7c, 0x69, 0x0d, 0xc0, 0x31, 0xe2, 0xf2, 0xbf, 0x3f, 0xcd, 0x81, 0xbf, + 0xcc, 0xd8, 0x3a, 0xc0, 0x90, 0xfb, 0xe2, 0xbf, 0x63, 0x81, 0x57, 0xc0, 0xf1, 0x6f, 0x13, 0xbf, + 0x02, 0x1d, 0x1b, 0x3e, 0xeb, 0xae, 0x81, 0xc0, 0x1b, 0x25, 0xe6, 0xbf, 0x89, 0x55, 0x48, 0xc0, + 0x74, 0x6a, 0x1d, 0xc0, 0x66, 0xf7, 0x35, 0xc0, 0x7d, 0x4f, 0x25, 0xc0, 0xa7, 0x22, 0xac, 0xbf, + 0x98, 0xa1, 0x32, 0xc0, 0xc5, 0x94, 0x05, 0xc0, 0xd5, 0x01, 0x2e, 0xc0, 0xe3, 0x4a, 0x70, 0xc0, + 0xad, 0x1b, 0xb4, 0xbf, 0x64, 0x5e, 0x35, 0xc0, 0x51, 0x3a, 0x04, 0xc0, 0xed, 0x59, 0x83, 0xbd, + 0x78, 0x1a, 0xd6, 0xbf, 0xbc, 0x86, 0x94, 0xc0, 0xbb, 0x01, 0x5a, 0xbe, 0xfe, 0xae, 0xd1, 0xbf, + 0xd9, 0xb7, 0xac, 0xbf, 0x8e, 0x01, 0x2e, 0xc0, 0x6e, 0xb2, 0xe5, 0xbf, 0xe6, 0x56, 0x2c, 0xc0, + 0x42, 0xd4, 0x41, 0xc0, 0x0f, 0xc5, 0x84, 0xbf, 0x31, 0xa1, 0x81, 0x3e, 0xae, 0xea, 0x13, 0xc0, + 0x41, 0xbf, 0x41, 0xc0, 0xee, 0x61, 0xaf, 0xbf, 0xf0, 0x02, 0x7d, 0xbf, 0xb5, 0xa3, 0xe7, 0xbf, + 0x76, 0x5d, 0x77, 0xbf, 0xc5, 0xef, 0x02, 0xbf, 0xd4, 0x13, 0x13, 0xc0, 0xc0, 0x36, 0x5e, 0xc0, + 0x6e, 0x53, 0x3e, 0xc0, 0xa6, 0x18, 0x58, 0xc0, 0x62, 0x52, 0x23, 0xc0, 0xfc, 0xe9, 0x23, 0xc0, + 0xfe, 0x2a, 0x0b, 0xc0, 0x41, 0xc1, 0x14, 0xbf, 0xac, 0x1f, 0xdf, 0xbf, 0xd3, 0x3d, 0x00, 0xc0, + 0x08, 0x2f, 0xd7, 0xbf, 0x5f, 0x58, 0x7d, 0xc0, 0x38, 0xf5, 0xfa, 0xbf, 0xcb, 0x1f, 0xaf, 0xbf, + 0x10, 0xa2, 0x78, 0xc0, 0x8b, 0x1b, 0x42, 0xc0, 0x79, 0xb9, 0xfb, 0x3c, 0x74, 0x7d, 0x95, 0xbf, + 0x12, 0x67, 0x2e, 0x3f, 0xda, 0xf2, 0x65, 0x3f, 0xf8, 0xe2, 0xc6, 0x3f, 0xaa, 0xe8, 0x94, 0x3f, + 0x3d, 0x9f, 0x96, 0x3f, 0x48, 0x84, 0xb8, 0x3f, 0xfa, 0x5c, 0x8d, 0x3f, 0x02, 0x84, 0xf7, 0x3f, + 0x68, 0xa8, 0xc3, 0x3f, 0x90, 0xda, 0x96, 0x3f, 0xf0, 0xe9, 0x87, 0x3f, 0x37, 0xb3, 0xbf, 0x3f, + 0x27, 0xee, 0x3b, 0x3f, 0xf6, 0x92, 0x19, 0x3f, 0xfc, 0x71, 0xab, 0x3f, 0xd8, 0x08, 0xe0, 0x3f, + 0x6e, 0x24, 0xca, 0x3f, 0x7e, 0x5e, 0xac, 0x3f, 0xcc, 0x58, 0x9c, 0x3f, 0x2c, 0x79, 0x87, 0x3f, + 0x74, 0xd9, 0xf3, 0x3f, 0xd9, 0x9f, 0x90, 0x3f, 0x53, 0x8a, 0x9b, 0x3f, 0x40, 0xb7, 0xbf, 0x3f, + 0x2a, 0xf1, 0xd8, 0x3f, 0xaa, 0xf1, 0x02, 0x40, 0x9b, 0xc0, 0xc0, 0x3f, 0x80, 0x76, 0x93, 0x3f, + 0xa8, 0xd4, 0x02, 0x40, 0x66, 0xda, 0xa4, 0x3f, 0x9a, 0x10, 0xf9, 0x3e, 0x54, 0xef, 0xa9, 0x3f, + 0x80, 0x59, 0x3d, 0x3e, 0xac, 0x68, 0x80, 0x3f, 0x72, 0x26, 0x61, 0x3f, 0xc8, 0x22, 0x85, 0x3f, + 0x4e, 0x73, 0x50, 0x3f, 0xf4, 0xfc, 0xfc, 0x3d, 0x02, 0x46, 0xbd, 0xbe, 0xc8, 0x41, 0xc0, 0x3e, + 0xb0, 0x5b, 0x92, 0x3f, 0xcb, 0x1e, 0x25, 0x3f, 0xd4, 0xc3, 0xe4, 0xbd, 0x42, 0x98, 0x1a, 0x3e, + 0xc0, 0x86, 0x12, 0x3f, 0x24, 0xa7, 0x59, 0xbe, 0x84, 0x0d, 0x28, 0x3f, 0x1e, 0x51, 0x74, 0x3f, + 0x72, 0xcc, 0xc7, 0xbd, 0x0a, 0xbf, 0x8a, 0x3f, 0x0a, 0x72, 0x6b, 0x3e, 0xd8, 0x54, 0x44, 0x3f, + 0xfd, 0x18, 0x4e, 0x3f, 0x68, 0xc8, 0x41, 0x3d, 0x94, 0xb2, 0x3a, 0x3e, 0xd6, 0x16, 0x7d, 0x3f, + 0x17, 0xfe, 0x04, 0x3f, 0x6d, 0xa7, 0xa6, 0x3e, 0x6f, 0x97, 0xb7, 0x3e, 0xbf, 0xcd, 0xbf, 0x3e, + 0xcc, 0xa1, 0x1a, 0x3f, 0xe2, 0xc1, 0x21, 0x3f, 0xda, 0x26, 0x37, 0x3f, 0xbe, 0xe5, 0x18, 0x3e, + 0x0c, 0x1a, 0x88, 0xbd, 0x19, 0x0f, 0xc1, 0xbd, 0x01, 0x71, 0x5e, 0xbe, 0xd3, 0x07, 0x0c, 0xbe, + 0x38, 0x1f, 0xee, 0xbd, 0xea, 0x56, 0x40, 0xbe, 0xb8, 0x59, 0x1f, 0xbe, 0x07, 0x5b, 0x6d, 0xbe, + 0x3b, 0x00, 0x3e, 0xbe, 0xa7, 0x47, 0x27, 0xbe, 0x02, 0x49, 0xf7, 0xbd, 0xd2, 0xff, 0x2f, 0xbe, + 0xb1, 0x29, 0xda, 0xbd, 0xe0, 0x36, 0x7f, 0xbd, 0x4f, 0xe7, 0x20, 0xbe, 0xd6, 0x30, 0x47, 0xbe, + 0x93, 0x11, 0x08, 0xbe, 0x0f, 0x17, 0x13, 0xbe, 0x2e, 0x91, 0xe0, 0xbd, 0xb2, 0xd3, 0xe4, 0xbd, + 0xd7, 0xce, 0x81, 0xbe, 0x3a, 0x23, 0x1c, 0xbe, 0xce, 0x7b, 0x08, 0xbe, 0x07, 0x48, 0x54, 0xbe, + 0xf0, 0x8b, 0x65, 0xbe, 0x46, 0xd0, 0x43, 0xbe, 0x6a, 0xbd, 0x36, 0xbe, 0xa2, 0x6f, 0x15, 0xbe, + 0xd5, 0x50, 0x54, 0xbe, 0x66, 0x4d, 0xfd, 0xbd, 0x1e, 0xcc, 0xe0, 0xbd, 0xe6, 0xb3, 0x2b, 0xbe, + 0x5e, 0xcc, 0x89, 0xbc, 0xe4, 0x8d, 0xd6, 0xbd, 0x69, 0x64, 0x19, 0xbe, 0x0a, 0xd3, 0xfe, 0xbd, + 0x14, 0xe3, 0xa3, 0xbd, 0x8a, 0x80, 0x65, 0xbd, 0xc0, 0x7b, 0x06, 0xbc, 0xc0, 0x96, 0x9b, 0xbd, + 0xa8, 0x8f, 0x16, 0xbe, 0x3d, 0x4a, 0xe1, 0xbd, 0x70, 0x4a, 0x5e, 0xbb, 0x15, 0x65, 0x18, 0xbd, + 0x2b, 0x0a, 0xb9, 0xbd, 0x74, 0x21, 0x9b, 0x3c, 0xdb, 0xef, 0xb5, 0xbd, 0x38, 0x61, 0xeb, 0xbd, + 0x5c, 0x8a, 0x0e, 0x3d, 0x31, 0x62, 0xf0, 0xbd, 0x74, 0x02, 0x2c, 0xbc, 0x80, 0x22, 0xa9, 0xbd, + 0xb6, 0xf9, 0x12, 0xbe, 0x6a, 0x21, 0x35, 0xbd, 0xd2, 0xc4, 0xf5, 0xbc, 0xf7, 0x24, 0x20, 0xbe, + 0x48, 0x61, 0xda, 0xbd, 0x58, 0x12, 0x9d, 0xbc, 0x3a, 0x83, 0x82, 0xbd, 0x28, 0xa7, 0x8b, 0xbd, + 0xce, 0xad, 0x81, 0xbd, 0xef, 0xe2, 0x6d, 0xbd, 0xae, 0xd8, 0x07, 0xbe, 0x9f, 0x2d, 0x4c, 0xbd, + 0x5a, 0x4a, 0x3b, 0xbd, 0x27, 0x1b, 0x88, 0xbd, 0xc9, 0x11, 0x9e, 0xbd, 0xec, 0xa1, 0x9a, 0xbd, + 0x5a, 0xe7, 0xac, 0xbd, 0xf4, 0xf3, 0x8e, 0xbd, 0x7c, 0x47, 0x2b, 0xbd, 0x14, 0xc9, 0xd9, 0xbd, + 0x71, 0x74, 0xbf, 0xbd, 0xaf, 0x89, 0x71, 0xbd, 0x12, 0x49, 0x6c, 0xbd, 0xfa, 0xec, 0xac, 0xbd, + 0xbb, 0x6a, 0x15, 0xbd, 0x8a, 0xf1, 0x09, 0xbd, 0xd0, 0x80, 0xa4, 0xbd, 0xcc, 0x80, 0xe4, 0xbd, + 0x3e, 0x4a, 0xe1, 0xbd, 0x6f, 0xec, 0xbe, 0xbd, 0xe7, 0x06, 0xaf, 0xbd, 0x58, 0x12, 0x95, 0xbd, + 0x3c, 0xec, 0xc7, 0xbd, 0x85, 0xcb, 0x52, 0xbd, 0x0b, 0x28, 0x94, 0xbd, 0xb5, 0x1d, 0x9e, 0xbd, + 0x90, 0x3f, 0xae, 0xbd, 0x72, 0xf5, 0x0d, 0xbe, 0xda, 0x64, 0xad, 0xbd, 0xb7, 0x2c, 0x7b, 0xbd, + 0xa8, 0x11, 0x09, 0xbe, 0x9c, 0x42, 0xba, 0xbd, 0x32, 0xf7, 0x0f, 0xbc, 0xa5, 0x00, 0x8a, 0xbd, + 0xcc, 0x46, 0xab, 0xbc, 0x8c, 0xda, 0x92, 0xbd, 0x61, 0x0a, 0x32, 0xbd, 0x9e, 0x03, 0x8e, 0xbd, + 0x48, 0xc9, 0x87, 0xbd, 0xb8, 0xa5, 0x7f, 0xbb, 0x7c, 0x9c, 0x02, 0x3d, 0xb7, 0xe5, 0xe8, 0xbc, + 0xde, 0x03, 0x98, 0xbd, 0x9d, 0x44, 0x04, 0xbd, 0x84, 0xb3, 0x81, 0x3a, 0xba, 0x19, 0x8c, 0xbc, + 0x6c, 0x96, 0xe8, 0xbc, 0x13, 0x92, 0xe3, 0x3b, 0xdf, 0x22, 0x3d, 0xbd, 0x92, 0x00, 0x93, 0xbd, + 0x51, 0x58, 0xd6, 0xbc, 0x79, 0x06, 0xa4, 0xbd, 0xed, 0xfc, 0x12, 0xbd, 0x7e, 0x73, 0x6e, 0xbd, + 0x88, 0x90, 0x2e, 0xbd, 0x98, 0xa3, 0x2e, 0x3b, 0x5d, 0x93, 0xa9, 0xbc, 0x48, 0xf5, 0x53, 0xbd, + 0x13, 0x91, 0xd7, 0xbc, 0xd2, 0x79, 0x57, 0xbd, 0x34, 0xa4, 0xdf, 0xbc, 0x3a, 0xe9, 0xb7, 0xbc, + 0xc2, 0x52, 0x7d, 0xbd, 0x49, 0x29, 0x6e, 0xbd, 0x14, 0xc6, 0xa5, 0xbc, 0x36, 0xc1, 0x0a, 0xbc, + 0x8c, 0x7a, 0x19, 0xbf, 0x9b, 0x99, 0xc7, 0xbf, 0xc0, 0x8b, 0x6b, 0xc0, 0xf8, 0xf5, 0x10, 0xc0, + 0x7d, 0xbf, 0xb9, 0xbf, 0x12, 0xfc, 0x1e, 0xc0, 0x1e, 0x02, 0xf4, 0xbf, 0x55, 0x9c, 0x38, 0xc0, + 0x5b, 0x05, 0x3f, 0xc0, 0x3d, 0xe6, 0x2e, 0xc0, 0x59, 0x21, 0x93, 0xbf, 0x5a, 0x2a, 0xf3, 0xbf, + 0xc8, 0x94, 0xfc, 0xbf, 0xc3, 0x59, 0xb8, 0xbe, 0x01, 0x54, 0x0c, 0xc0, 0xd4, 0x40, 0x27, 0xc0, + 0x6e, 0x49, 0xb8, 0xbe, 0x92, 0xe8, 0x05, 0xc0, 0x36, 0xb6, 0x36, 0xbf, 0x1d, 0xdc, 0xc5, 0xbf, + 0xb3, 0x17, 0x7b, 0xc0, 0x90, 0x53, 0x04, 0xc0, 0xc7, 0x59, 0xb3, 0xbf, 0x2d, 0xcd, 0x65, 0xc0, + 0xb7, 0x46, 0x53, 0xc0, 0xec, 0x4c, 0xb0, 0xbf, 0xa9, 0x00, 0x0f, 0xc0, 0x48, 0x47, 0x04, 0xc0, + 0x6c, 0xb7, 0x04, 0xc0, 0x6e, 0xae, 0xa4, 0xbf, 0xf9, 0x09, 0x2f, 0xc0, 0xd4, 0xee, 0x09, 0xc0, + 0x88, 0x88, 0x87, 0x3f, 0xb8, 0x34, 0x88, 0x3f, 0x66, 0x50, 0xac, 0x3f, 0x4e, 0x13, 0xbb, 0x3f, + 0x88, 0x31, 0xff, 0x3f, 0xbd, 0xd1, 0x8f, 0x3f, 0x96, 0x85, 0x3b, 0x3f, 0x00, 0x62, 0xc7, 0x3f, + 0x64, 0xa5, 0xe6, 0x3f, 0x04, 0xd3, 0xd0, 0x3f, 0x64, 0xb3, 0xaa, 0x3f, 0x7e, 0xaf, 0x69, 0x3f, + 0x80, 0x32, 0xd4, 0x3f, 0x9a, 0x9e, 0x2c, 0x3f, 0x21, 0xf1, 0x7e, 0x3f, 0xce, 0xe2, 0xd6, 0x3f, + 0xa2, 0x44, 0x6b, 0x3f, 0x42, 0xa9, 0xad, 0x3f, 0x8f, 0x7f, 0xd5, 0x3f, 0xd4, 0x2d, 0xcd, 0x3f, + 0x48, 0x46, 0x47, 0x3f, 0xba, 0xd6, 0x88, 0x3f, 0x53, 0xa4, 0x00, 0x40, 0x48, 0x36, 0xb2, 0x3f, + 0x8d, 0xbb, 0xc9, 0x3f, 0x8e, 0x8d, 0xfb, 0x3f, 0x3e, 0x36, 0xd6, 0x3f, 0xe0, 0x1c, 0x9d, 0x3f, + 0xcc, 0x5b, 0xe5, 0x3f, 0xdb, 0xe6, 0xc6, 0x3f, 0xb4, 0x72, 0x8f, 0x3e, 0xcc, 0xc1, 0x6c, 0x3f, + 0x10, 0x2f, 0x3a, 0xbf, 0xbd, 0x77, 0x4f, 0xbf, 0x94, 0x51, 0x65, 0xbf, 0xf8, 0xfd, 0x51, 0xbf, + 0xb3, 0x85, 0x89, 0xbf, 0x74, 0x82, 0x77, 0xbf, 0x8a, 0x13, 0x66, 0xbf, 0x0c, 0xbe, 0x92, 0xbf, + 0x8c, 0x02, 0x7f, 0xbf, 0x6e, 0xd2, 0x67, 0xbf, 0x1a, 0xa3, 0x83, 0xbf, 0x7c, 0x0f, 0x5b, 0xbf, + 0xaa, 0xf8, 0x71, 0xbf, 0x28, 0xd6, 0x2b, 0xbf, 0x1c, 0x7a, 0x4c, 0xbf, 0x30, 0xdf, 0x64, 0xbf, + 0x40, 0x84, 0x51, 0xbf, 0x2f, 0xe2, 0x83, 0xbf, 0xd4, 0x34, 0x4f, 0xbf, 0xf0, 0x73, 0x76, 0xbf, + 0x70, 0xa5, 0x43, 0xbf, 0x82, 0x29, 0x62, 0xbf, 0x1d, 0xdb, 0x82, 0xbf, 0x32, 0xab, 0x8e, 0xbf, + 0x8c, 0x27, 0x88, 0xbf, 0x2a, 0x93, 0x89, 0xbf, 0x5d, 0x75, 0x6f, 0xbf, 0x43, 0xed, 0x3f, 0xbf, + 0x29, 0xb9, 0x8a, 0xbf, 0xd1, 0xff, 0x4e, 0xbf, 0xb0, 0x4f, 0x05, 0xbf, 0x28, 0x4a, 0x6c, 0xbf, + 0x28, 0xa3, 0x46, 0xbd, 0x95, 0x74, 0xb2, 0xbe, 0x68, 0x20, 0x23, 0xbf, 0x6b, 0x54, 0x04, 0xbf, + 0x30, 0x43, 0xed, 0xbe, 0xa1, 0xe5, 0x8f, 0xbe, 0x28, 0x1e, 0x83, 0xbd, 0x3d, 0xff, 0xef, 0xbe, + 0xa8, 0xed, 0xb6, 0xbe, 0x3b, 0x64, 0xc4, 0xbe, 0x56, 0xf0, 0x4f, 0xbe, 0x28, 0x4b, 0x33, 0xbe, + 0xa9, 0x8c, 0xaf, 0xbe, 0xe0, 0xa2, 0x5b, 0x3d, 0x82, 0xcb, 0xf3, 0xbe, 0x5c, 0x3e, 0xed, 0xbe, + 0xdc, 0xdc, 0x09, 0xbe, 0xce, 0x3f, 0x97, 0xbe, 0x0b, 0x17, 0xcc, 0xbe, 0x30, 0x5e, 0xb7, 0xbe, + 0x80, 0x4d, 0x85, 0xbe, 0xa5, 0x56, 0x3a, 0xbe, 0x67, 0xa9, 0xdb, 0xbe, 0xab, 0x5f, 0xc2, 0xbe, + 0x92, 0xd9, 0x0a, 0xbf, 0x59, 0x0d, 0x7f, 0xbe, 0x3c, 0xe5, 0x83, 0xbe, 0x9b, 0xac, 0x62, 0xbe, + 0xd9, 0x63, 0xb2, 0xbe, 0x8e, 0x46, 0x8c, 0xbe, 0xbc, 0xec, 0x05, 0xbe, 0xa3, 0xea, 0x8b, 0xbe, + 0x23, 0x1a, 0x9c, 0x3d, 0x5a, 0x02, 0xd4, 0x3d, 0x3f, 0x88, 0xf8, 0x3d, 0x18, 0x26, 0xcd, 0x3d, + 0x18, 0x67, 0xf1, 0x3d, 0x58, 0x16, 0xf9, 0x3d, 0x44, 0xa4, 0xe8, 0x3d, 0xfb, 0xeb, 0x12, 0x3e, + 0x1d, 0x96, 0xdb, 0x3d, 0x7c, 0x17, 0xce, 0x3d, 0xbe, 0xae, 0xf5, 0x3d, 0x5e, 0x7c, 0xda, 0x3d, + 0xba, 0x4d, 0xd4, 0x3d, 0x5a, 0xae, 0x9a, 0x3d, 0x99, 0x25, 0xe2, 0x3d, 0x33, 0x7d, 0xcf, 0x3d, + 0xd2, 0x77, 0xca, 0x3d, 0xab, 0x26, 0xfe, 0x3d, 0x92, 0x99, 0xaf, 0x3d, 0xfc, 0xfc, 0xde, 0x3d, + 0xd2, 0x4b, 0xd0, 0x3d, 0x3a, 0x0c, 0xd9, 0x3d, 0xbe, 0xc4, 0xdc, 0x3d, 0xdc, 0x1d, 0x0f, 0x3e, + 0x28, 0x5c, 0x09, 0x3e, 0xf8, 0x85, 0xdd, 0x3d, 0x9f, 0x7b, 0xc7, 0x3d, 0xa4, 0x2f, 0xa9, 0x3d, + 0x10, 0x4a, 0xf6, 0x3d, 0x90, 0x5f, 0xaa, 0x3d, 0x57, 0x3d, 0x9a, 0x3d, 0x4e, 0x65, 0xf8, 0x3d, + 0xf4, 0xb1, 0x08, 0x3c, 0x97, 0xd5, 0x6a, 0x3d, 0x1c, 0x94, 0xc3, 0x3d, 0xdb, 0x04, 0x8f, 0x3d, + 0x4e, 0x8b, 0x68, 0x3d, 0xde, 0x46, 0x59, 0x3d, 0x00, 0xc5, 0xf6, 0x3c, 0xfb, 0x0d, 0x9b, 0x3d, + 0xe0, 0x7f, 0x31, 0x3d, 0xab, 0x61, 0x46, 0x3d, 0x49, 0x4d, 0x19, 0x3d, 0x38, 0x31, 0x1e, 0x3d, + 0x84, 0xe4, 0x31, 0x3d, 0x58, 0x5c, 0x47, 0x3b, 0x50, 0x15, 0xa0, 0x3d, 0xfe, 0x93, 0x6e, 0x3d, + 0x8e, 0xb5, 0xf9, 0x3c, 0x30, 0x46, 0x4f, 0x3d, 0x0e, 0xf1, 0x36, 0x3d, 0x9f, 0x58, 0x46, 0x3d, + 0x56, 0x33, 0x52, 0x3d, 0x7d, 0xc5, 0x12, 0x3d, 0x0a, 0x87, 0x46, 0x3d, 0x79, 0xb6, 0x87, 0x3d, + 0xe5, 0xf3, 0xa7, 0x3d, 0xe5, 0xa7, 0xd1, 0x3c, 0x7c, 0xb2, 0xf2, 0x3c, 0x04, 0xeb, 0xf3, 0x3c, + 0x48, 0x60, 0x3f, 0x3d, 0x57, 0x89, 0xf3, 0x3c, 0x46, 0xc0, 0x14, 0x3d, 0x43, 0xa9, 0x66, 0x3d, + 0x04, 0x83, 0x35, 0x3d, 0x3c, 0x47, 0x33, 0x3d, 0xe1, 0xdd, 0x44, 0x3d, 0x87, 0x4d, 0x47, 0x3d, + 0xac, 0xb3, 0x8a, 0x3d, 0xd7, 0xd1, 0x52, 0x3d, 0x0f, 0xc6, 0x38, 0x3d, 0xde, 0x36, 0x81, 0x3d, + 0xb8, 0xdc, 0x80, 0x3d, 0xb5, 0xd3, 0x66, 0x3d, 0xa6, 0x9b, 0x6e, 0x3d, 0xaa, 0xc9, 0x38, 0x3d, + 0xbc, 0xcc, 0x70, 0x3d, 0x17, 0xe1, 0x17, 0x3d, 0x8c, 0x4a, 0x26, 0x3d, 0x60, 0x34, 0x64, 0x3d, + 0x14, 0x8e, 0x35, 0x3d, 0x2e, 0xc1, 0x6b, 0x3d, 0xd2, 0xc7, 0x59, 0x3d, 0xb7, 0xf6, 0x6e, 0x3d, + 0x86, 0x32, 0x1d, 0x3d, 0x6e, 0x5c, 0x47, 0x3d, 0xb4, 0x23, 0x88, 0x3d, 0xc0, 0xcf, 0x76, 0x3d, + 0xc6, 0x34, 0x73, 0x3d, 0xd9, 0x3f, 0x8f, 0x3d, 0x5c, 0xbb, 0x74, 0x3d, 0xaa, 0x9d, 0x3b, 0x3d, + 0x0d, 0x7a, 0x87, 0x3d, 0xc1, 0x40, 0x58, 0x3d, 0x79, 0xd5, 0xb8, 0x3c, 0xd8, 0xea, 0x3e, 0x3d, + 0x7f, 0xfc, 0x29, 0x3c, 0x5a, 0x5f, 0xa9, 0x3c, 0xbe, 0xe9, 0x0f, 0x3d, 0x49, 0x2c, 0x09, 0x3d, + 0xe5, 0x45, 0x18, 0x3d, 0xda, 0xbd, 0x8c, 0x3c, 0xb0, 0x30, 0x5d, 0x3b, 0x82, 0x47, 0xef, 0x3c, + 0x88, 0xc6, 0xfd, 0x3c, 0x1f, 0xda, 0xf7, 0x3c, 0x1a, 0x27, 0x8b, 0x3c, 0xa2, 0x97, 0x35, 0x3c, + 0x88, 0xe2, 0xea, 0x3c, 0xc0, 0x67, 0x5b, 0x3a, 0x87, 0x74, 0xc8, 0x3c, 0x2c, 0x01, 0x0c, 0x3d, + 0x96, 0x0e, 0x26, 0x3c, 0x36, 0x7b, 0xaa, 0x3c, 0xc8, 0xa6, 0x05, 0x3d, 0x16, 0x4c, 0xe6, 0x3c, + 0x7d, 0x01, 0x58, 0x3c, 0xcc, 0xcb, 0x5e, 0x3c, 0x30, 0xc6, 0x16, 0x3d, 0x03, 0x95, 0xc0, 0x3c, + 0x5a, 0x70, 0x08, 0x3d, 0x65, 0xc7, 0xea, 0x3c, 0xb9, 0xd8, 0xd3, 0x3c, 0x8c, 0xd1, 0x9e, 0x3c, + 0x5c, 0xb4, 0xf0, 0x3c, 0x0e, 0x87, 0xd5, 0x3c, 0x8a, 0xd8, 0x48, 0x3b, 0xb0, 0x68, 0x67, 0x3c, + 0xc4, 0xc7, 0x2f, 0x3f, 0x90, 0x93, 0xc5, 0x3f, 0xeb, 0x97, 0x06, 0x40, 0x5f, 0x09, 0xc6, 0x3f, + 0x14, 0xb1, 0xbb, 0x3f, 0x30, 0x92, 0xd7, 0x3f, 0x6e, 0x83, 0xb7, 0x3f, 0x96, 0xe4, 0x04, 0x40, + 0x28, 0x23, 0x9f, 0x3f, 0x18, 0x72, 0xa2, 0x3f, 0x2f, 0xf7, 0xb7, 0x3f, 0xd0, 0x06, 0xb4, 0x3f, + 0x5d, 0xc4, 0x9e, 0x3f, 0x58, 0x20, 0x37, 0x3f, 0x47, 0xb3, 0xef, 0x3f, 0x5b, 0x0b, 0xb0, 0x3f, + 0x95, 0xd8, 0x9c, 0x3f, 0x8c, 0x27, 0xd0, 0x3f, 0xd5, 0x68, 0x86, 0x3f, 0x94, 0x39, 0xaf, 0x3f, + 0x93, 0x72, 0xc3, 0x3f, 0x57, 0x00, 0xaa, 0x3f, 0xa5, 0xb3, 0xa0, 0x3f, 0xb8, 0x20, 0xfc, 0x3f, + 0x19, 0x69, 0x02, 0x40, 0x52, 0xb9, 0x81, 0x3f, 0xbc, 0x5e, 0x81, 0x3f, 0x2e, 0x1d, 0x75, 0x3f, + 0xfa, 0x1c, 0xb7, 0x3f, 0x3e, 0xc1, 0x61, 0x3f, 0x8e, 0xc7, 0x97, 0x3f, 0x82, 0x26, 0xe2, 0x3f, + 0xe8, 0x37, 0xe1, 0x3d, 0xa2, 0x3f, 0x44, 0x3f, 0xbf, 0xc7, 0x87, 0x3e, 0x24, 0x5e, 0xa3, 0x3e, + 0x41, 0x81, 0x87, 0x3f, 0x28, 0x70, 0x1a, 0x3e, 0x7a, 0x98, 0x9a, 0xbe, 0x22, 0x74, 0x13, 0x3f, + 0x2d, 0x7d, 0x2c, 0x3f, 0x42, 0x35, 0x05, 0x3e, 0xe0, 0xaa, 0x21, 0x3e, 0x02, 0x4a, 0x11, 0x3f, + 0x9f, 0xd5, 0x55, 0x3f, 0xbc, 0x82, 0x97, 0x3d, 0x9b, 0xf3, 0x07, 0x3f, 0x8e, 0x8f, 0x3c, 0x3f, + 0x58, 0x7d, 0x96, 0x3e, 0x36, 0x10, 0xff, 0x3e, 0xa4, 0xc1, 0xcd, 0x3d, 0x04, 0x86, 0x4c, 0x3e, + 0x10, 0x24, 0xcf, 0x3d, 0x22, 0xba, 0x9a, 0x3e, 0xf8, 0xff, 0xd5, 0xbd, 0x94, 0xd0, 0xe7, 0x3d, + 0x84, 0xea, 0xb0, 0x3d, 0x68, 0xe6, 0x82, 0x3f, 0x05, 0x8d, 0x56, 0x3f, 0x34, 0x64, 0xc7, 0x3d, + 0x4c, 0x7d, 0xef, 0x3d, 0x29, 0x23, 0xa9, 0x3e, 0xc2, 0x1d, 0x42, 0x3e, 0xea, 0x51, 0x18, 0x3f, + 0xb1, 0x7e, 0x5a, 0xbd, 0x05, 0xf1, 0x96, 0xbe, 0xe2, 0x1b, 0xb1, 0xbe, 0xb0, 0xa0, 0xb3, 0xbe, + 0x56, 0xf5, 0x8e, 0xbe, 0x65, 0xef, 0xcd, 0xbe, 0xf6, 0xfb, 0x3d, 0xbe, 0x82, 0x2b, 0x00, 0xbf, + 0x44, 0x86, 0x15, 0xbf, 0xe8, 0xb5, 0x93, 0xbe, 0x79, 0x3f, 0x42, 0xbe, 0xc6, 0x04, 0x98, 0xbe, + 0xda, 0xf4, 0x56, 0xbe, 0x86, 0x0c, 0x30, 0xbe, 0x4c, 0xe5, 0xc4, 0xbe, 0x02, 0x3d, 0xa9, 0xbe, + 0x4c, 0x05, 0x27, 0xbe, 0x85, 0x2c, 0x2f, 0xbe, 0xa1, 0x0b, 0x84, 0xbe, 0x7f, 0x45, 0x9d, 0xbd, + 0xfa, 0xb2, 0x72, 0xbe, 0x0a, 0x50, 0x7d, 0xbe, 0xdf, 0xdc, 0xe1, 0xbd, 0xc2, 0xb1, 0xa7, 0xbe, + 0xf6, 0x15, 0xe1, 0xbe, 0x6f, 0xe4, 0xaf, 0xbe, 0x42, 0x82, 0xec, 0xbe, 0xb6, 0xde, 0x1f, 0xbe, + 0xcd, 0x80, 0x91, 0xbe, 0xd4, 0x1a, 0xbc, 0xbe, 0xa2, 0x70, 0xb7, 0x3c, 0x32, 0x75, 0x79, 0xbe, + 0xa9, 0x0b, 0x91, 0x3b, 0x8f, 0x50, 0xbb, 0xbe, 0x06, 0x7b, 0x34, 0xbe, 0x73, 0xed, 0x34, 0xbe, + 0xff, 0x7c, 0xa9, 0xbe, 0xb4, 0x55, 0xbc, 0xbd, 0xb7, 0x39, 0x10, 0x3e, 0x56, 0x99, 0x07, 0x3e, + 0x44, 0x10, 0xc3, 0xbd, 0x68, 0xa7, 0x66, 0xbe, 0x56, 0x49, 0xf8, 0x3c, 0x3d, 0xaa, 0x40, 0xbd, + 0xfb, 0x3d, 0x85, 0xbe, 0x80, 0x6d, 0x79, 0x3a, 0x5b, 0xd1, 0x86, 0xbe, 0xf6, 0x1a, 0x65, 0xbe, + 0x68, 0x25, 0x23, 0x3e, 0x7c, 0x1e, 0x74, 0xbe, 0x30, 0x5f, 0x0c, 0x3e, 0x16, 0x24, 0x3d, 0xbe, + 0x3e, 0x42, 0xa7, 0xbd, 0xe4, 0x71, 0x9b, 0xbc, 0xbf, 0xd4, 0x97, 0x3d, 0xde, 0x44, 0x77, 0xbe, + 0x07, 0x26, 0x62, 0x3e, 0xfe, 0xf2, 0x06, 0xbe, 0x70, 0xc6, 0xca, 0xbe, 0x3a, 0xbb, 0x3b, 0xbd, + 0x28, 0x3a, 0x27, 0x3d, 0xae, 0x8d, 0x31, 0xbe, 0x20, 0xf2, 0x3e, 0xbe, 0x8e, 0xe3, 0x96, 0xbd, + 0x3b, 0xd3, 0x86, 0x3b, 0xc7, 0x63, 0x17, 0x3d, 0x32, 0x6b, 0x54, 0x3d, 0x8a, 0x2c, 0x50, 0x3d, + 0x10, 0x9d, 0xbd, 0x3c, 0x28, 0x5d, 0x75, 0x3d, 0xec, 0xbd, 0xfc, 0x3c, 0xa0, 0xd8, 0x49, 0x3d, + 0x7c, 0x2f, 0x8f, 0x3d, 0x55, 0x9f, 0x4d, 0x3d, 0x68, 0x92, 0xb1, 0x3c, 0xe0, 0x22, 0xe5, 0x3c, + 0x8c, 0x73, 0x88, 0x3c, 0xe6, 0xaa, 0xbe, 0x3c, 0xc6, 0xb1, 0x59, 0x3d, 0x60, 0x57, 0x14, 0x3d, + 0xf4, 0x9e, 0xdb, 0x3b, 0x93, 0xd2, 0xab, 0x3c, 0x7a, 0x53, 0xe9, 0x3c, 0x97, 0xf5, 0x66, 0x3c, + 0x35, 0x89, 0x14, 0x3d, 0xf0, 0x39, 0xe5, 0x3c, 0x4e, 0xaf, 0x85, 0x3c, 0x94, 0x3b, 0x6b, 0x3d, + 0x2e, 0x14, 0x55, 0x3d, 0x2d, 0xde, 0xc4, 0x3c, 0x58, 0x7e, 0x7a, 0x3d, 0x16, 0x08, 0xb8, 0x3c, + 0xc4, 0x46, 0x16, 0x3d, 0x17, 0x22, 0x58, 0x3d, 0x80, 0xba, 0xb9, 0xb9, 0x6f, 0x0d, 0xa7, 0x3c, + 0xc1, 0xe6, 0xd9, 0xba, 0x02, 0x7d, 0x34, 0x3d, 0x80, 0xec, 0x0e, 0x3d, 0xc6, 0xd7, 0x08, 0x3d, + 0xb8, 0x0f, 0xe8, 0x3c, 0xc3, 0x90, 0xec, 0x3c, 0x80, 0x03, 0xea, 0xba, 0x84, 0xdc, 0x65, 0xbc, + 0xff, 0x49, 0xac, 0x3c, 0x2c, 0xb7, 0x33, 0x3d, 0x60, 0xed, 0xa9, 0xb9, 0xde, 0x83, 0x43, 0x3b, + 0x70, 0xac, 0xb1, 0x3c, 0x22, 0x32, 0xc4, 0x3b, 0x39, 0x08, 0x28, 0x3d, 0x80, 0x2f, 0xd1, 0x3c, + 0x6d, 0x3a, 0xd1, 0xbc, 0x8c, 0xfa, 0xe2, 0x3c, 0xf7, 0x7c, 0x34, 0xbc, 0x44, 0xe2, 0xcb, 0x3c, + 0xee, 0xd0, 0xa9, 0x3c, 0x92, 0x7b, 0xa8, 0x3b, 0x5c, 0x24, 0x0b, 0xbb, 0x1e, 0xfc, 0x47, 0x3d, + 0xfe, 0xcb, 0x65, 0xbc, 0x2d, 0x32, 0x3b, 0x3b, 0xb0, 0x81, 0x5f, 0x3d, 0x1f, 0x5d, 0x3b, 0x3c, + 0xb2, 0x35, 0x89, 0x3b, 0xb2, 0xab, 0x08, 0x3d, 0xdb, 0x32, 0xa8, 0x3c, 0x00, 0xbc, 0x5e, 0x3b, + 0xed, 0xd6, 0x7e, 0x3b, 0x92, 0xc4, 0x9d, 0x3c, 0xc9, 0xd9, 0x78, 0x3c, 0xa4, 0x3f, 0x85, 0x3c, + 0x79, 0x6a, 0xc4, 0x3c, 0x71, 0xbc, 0x86, 0x3c, 0xdc, 0x89, 0x71, 0x3b, 0x7c, 0x01, 0xf6, 0x3c, + 0xcd, 0xd5, 0x02, 0x3d, 0xe0, 0xa4, 0x26, 0x3c, 0x05, 0xde, 0x25, 0x3c, 0xd1, 0xe1, 0xa3, 0x3c, + 0x31, 0xe2, 0x97, 0x3c, 0x22, 0x0d, 0xfc, 0x3b, 0x6e, 0xae, 0xa3, 0x3c, 0xbe, 0x32, 0xb3, 0x3c, + 0x4f, 0x1a, 0x55, 0x3c, 0x76, 0x50, 0x40, 0x3c, 0x8c, 0xf6, 0x55, 0x3c, 0x47, 0x92, 0x7f, 0x3b, + 0x2c, 0x83, 0x1c, 0x3c, 0x62, 0x9f, 0x66, 0x3c, 0x6e, 0xa2, 0x59, 0x3b, 0x26, 0xb0, 0x36, 0x3c, + 0xf3, 0x75, 0xa9, 0x3c, 0xcc, 0xaa, 0xe7, 0x3c, 0x03, 0xc7, 0xd7, 0x3c, 0xec, 0x3d, 0xe2, 0x3b, + 0xa4, 0xe5, 0x56, 0x3c, 0xf9, 0x35, 0x8c, 0x3c, 0x80, 0x38, 0x3b, 0xb7, 0xbc, 0x87, 0x94, 0x3c, + 0x03, 0x14, 0x83, 0x3a, 0xce, 0xdd, 0xba, 0x3c, 0xca, 0xb8, 0xdb, 0x3b, 0x79, 0xab, 0xf7, 0x3b, + 0xcd, 0xa3, 0xd9, 0x3c, 0x40, 0xf5, 0x74, 0x3a, 0xad, 0xfb, 0x4e, 0xbc, 0x42, 0xe4, 0xd4, 0xba, + 0x82, 0xc6, 0xf5, 0x3b, 0x19, 0xa9, 0xe5, 0x3b, 0x82, 0xc1, 0x65, 0xba, 0x78, 0x22, 0xf6, 0x3b, + 0xa3, 0x7e, 0xac, 0x3c, 0xc0, 0x7c, 0x79, 0xba, 0xc0, 0x09, 0x64, 0x3c, 0x1e, 0x73, 0x87, 0x3c, + 0x6b, 0x1f, 0x4c, 0xbb, 0x70, 0x78, 0x77, 0x3c, 0xd5, 0x36, 0xdb, 0xbb, 0x0a, 0x4c, 0x18, 0x3c, + 0x7c, 0x0d, 0xea, 0x3a, 0x5a, 0x11, 0x2e, 0x3b, 0x7c, 0x56, 0xc1, 0xbb, 0x74, 0x62, 0xe0, 0x3b, + 0x77, 0x22, 0x4a, 0xbc, 0xd8, 0xee, 0x90, 0x3c, 0x5b, 0xca, 0xbc, 0x3c, 0x7c, 0x2b, 0xb6, 0x3a, + 0xe1, 0x69, 0x4d, 0xbb, 0x52, 0xfe, 0xf2, 0x3b, 0xf7, 0xea, 0x2a, 0x3c, 0x8a, 0xd9, 0x1d, 0x3c, + 0x00, 0xf0, 0xc3, 0x38, 0x24, 0x4a, 0x38, 0x3f, 0xb2, 0xe0, 0x68, 0x3f, 0x44, 0x2b, 0x5e, 0x3f, + 0x3b, 0xfc, 0xa0, 0x3e, 0x62, 0x09, 0x76, 0x3f, 0x01, 0x13, 0xce, 0x3e, 0x08, 0x4f, 0x81, 0x3e, + 0x7f, 0xcd, 0x5a, 0x3f, 0xbf, 0x4e, 0x83, 0x3f, 0x52, 0x8f, 0x4d, 0x3e, 0x36, 0x4d, 0x53, 0x3e, + 0x5a, 0xe7, 0x63, 0x3e, 0xcd, 0x14, 0x9f, 0x3e, 0xe0, 0x39, 0x6d, 0x3f, 0x3e, 0xb4, 0x02, 0x3f, + 0x79, 0x21, 0x92, 0xbe, 0x2b, 0xa4, 0xd8, 0x3e, 0x52, 0x7d, 0x2f, 0x3e, 0xae, 0xfc, 0xc6, 0x3e, + 0xcc, 0xaa, 0x1d, 0x3f, 0xed, 0x1b, 0x98, 0x3e, 0x20, 0x49, 0x3e, 0x3e, 0x52, 0x62, 0x95, 0x3f, + 0x01, 0xcc, 0xcf, 0x3e, 0xc0, 0xcd, 0x3f, 0x3d, 0x62, 0xd2, 0x8b, 0x3f, 0x28, 0xce, 0xb6, 0x3e, + 0x1f, 0x7a, 0xdc, 0x3e, 0x4c, 0xec, 0x62, 0x3f, 0x5a, 0xeb, 0x37, 0x3e, 0x0c, 0x80, 0xf3, 0x3d, + 0xb0, 0x6c, 0xf4, 0xbd, 0x85, 0xc9, 0xdb, 0xbd, 0x34, 0x1a, 0x1f, 0xbe, 0xb3, 0xe9, 0x14, 0xbe, + 0x80, 0x87, 0x80, 0xbe, 0x58, 0xe6, 0x10, 0xbe, 0x1a, 0x64, 0xd3, 0xbd, 0x77, 0xdc, 0x3d, 0xbe, + 0xf8, 0xf0, 0x4f, 0xbe, 0x1a, 0x7b, 0x52, 0xbe, 0x7c, 0x30, 0x33, 0xbe, 0x4a, 0xff, 0xd6, 0xbd, + 0xf7, 0x35, 0x84, 0xbe, 0x74, 0xcd, 0xb3, 0xbd, 0xa6, 0x6d, 0xcb, 0xbd, 0xae, 0x0f, 0x31, 0xbe, + 0x8d, 0xcf, 0x5a, 0xbd, 0xfa, 0x4b, 0xed, 0xbd, 0xa5, 0x3c, 0x2e, 0xbe, 0x2a, 0x05, 0x29, 0xbe, + 0xd5, 0x14, 0x4a, 0xbd, 0x63, 0xd8, 0x1f, 0xbe, 0x10, 0xe4, 0x73, 0xbe, 0x1d, 0x41, 0x17, 0xbe, + 0xa6, 0x1e, 0x3d, 0xbe, 0x29, 0x1b, 0x5b, 0xbe, 0xd6, 0x0a, 0x69, 0xbe, 0x03, 0xd2, 0x12, 0xbe, + 0xff, 0x9c, 0x14, 0xbe, 0x64, 0x12, 0x18, 0xbe, 0x06, 0x28, 0x57, 0xbd, 0x79, 0xa7, 0x04, 0xbe, + 0x28, 0x60, 0xb6, 0x3d, 0x77, 0xd4, 0xd9, 0x3d, 0x04, 0x88, 0xcc, 0x3d, 0x7a, 0x7f, 0xd0, 0x3d, + 0x1c, 0x28, 0x0b, 0x3e, 0x24, 0xca, 0xef, 0x3d, 0x41, 0xb2, 0xdb, 0x3d, 0x7d, 0xb8, 0x07, 0x3e, + 0x46, 0xbc, 0x02, 0x3e, 0xee, 0x45, 0xe3, 0x3d, 0xea, 0x6e, 0x03, 0x3e, 0x4a, 0x09, 0xbf, 0x3d, + 0x8e, 0xdf, 0x02, 0x3e, 0x0d, 0xdb, 0xb6, 0x3d, 0x66, 0x50, 0xc1, 0x3d, 0x3e, 0x02, 0xbd, 0x3d, + 0x11, 0x50, 0xa0, 0x3d, 0x92, 0x1b, 0xf2, 0x3d, 0xf6, 0xab, 0xbf, 0x3d, 0xc4, 0x5a, 0xe7, 0x3d, + 0x0d, 0xfe, 0x80, 0x3d, 0xc4, 0x97, 0xdb, 0x3d, 0x7a, 0x64, 0xf2, 0x3d, 0x03, 0xd9, 0x08, 0x3e, + 0xf0, 0x0a, 0x00, 0x3e, 0x3d, 0xaf, 0xe1, 0x3d, 0xc4, 0xd9, 0xe7, 0x3d, 0xfc, 0xa6, 0xa7, 0x3d, + 0xda, 0x7b, 0xde, 0x3d, 0x32, 0x19, 0xc6, 0x3d, 0x9c, 0xf1, 0x78, 0x3d, 0x87, 0xdc, 0xd9, 0x3d, + 0xe0, 0x1d, 0x69, 0x3a, 0x54, 0x1a, 0x18, 0x3d, 0x8a, 0xc2, 0x99, 0x3d, 0xec, 0xc3, 0x53, 0x3d, + 0x32, 0x56, 0x71, 0x3d, 0xdc, 0xa4, 0x2e, 0x3d, 0x54, 0xda, 0x73, 0x3c, 0xab, 0x9b, 0x51, 0x3d, + 0x5a, 0xcf, 0xa5, 0x3c, 0x89, 0xbc, 0x41, 0x3d, 0xa0, 0xce, 0x05, 0x3d, 0x8e, 0x31, 0xc1, 0x3c, + 0x0d, 0xcf, 0x39, 0x3d, 0x80, 0xd6, 0x83, 0xba, 0x2b, 0x6d, 0x80, 0x3d, 0x06, 0x36, 0x48, 0x3d, + 0x1e, 0xa2, 0x7d, 0x3c, 0x62, 0xa7, 0xa4, 0x3c, 0xe2, 0x70, 0x39, 0x3d, 0xdb, 0x28, 0x19, 0x3d, + 0x78, 0x8a, 0x85, 0x3c, 0xa2, 0x75, 0xda, 0x3c, 0x97, 0x84, 0x5f, 0x3d, 0xa8, 0xb8, 0x13, 0x3d, + 0xa5, 0x14, 0x59, 0x3d, 0x7f, 0x94, 0x07, 0x3d, 0x46, 0x6d, 0x3e, 0x3d, 0xa8, 0x7c, 0xbc, 0x3c, + 0xf8, 0x78, 0xf6, 0x3c, 0x44, 0x24, 0xeb, 0x3c, 0x9c, 0xe7, 0xa6, 0x3b, 0x38, 0x9a, 0x22, 0x3d, + 0x3c, 0x66, 0x1a, 0xbc, 0xe2, 0x34, 0x69, 0xbc, 0x43, 0x16, 0x5e, 0xbc, 0x62, 0x21, 0x55, 0xbc, + 0x3b, 0x25, 0x75, 0xbc, 0x7e, 0x65, 0x75, 0xbc, 0x10, 0x75, 0x5b, 0xbc, 0x2a, 0x32, 0x85, 0xbc, + 0xac, 0x71, 0x5b, 0xbc, 0xf8, 0x37, 0x47, 0xbc, 0x32, 0xa2, 0x76, 0xbc, 0xf6, 0x3a, 0x3f, 0xbc, + 0x94, 0x3b, 0x52, 0xbc, 0xcd, 0xa7, 0x2a, 0xbc, 0x93, 0x19, 0x65, 0xbc, 0x88, 0x6b, 0x2c, 0xbc, + 0xd8, 0xe1, 0x2f, 0xbc, 0x2a, 0xb7, 0x73, 0xbc, 0xfc, 0x2a, 0x2e, 0xbc, 0x02, 0x1d, 0x5a, 0xbc, + 0x66, 0x33, 0x0d, 0xbc, 0x46, 0x1b, 0x48, 0xbc, 0xdc, 0x7f, 0x4d, 0xbc, 0xa6, 0xa7, 0x8a, 0xbc, + 0xd8, 0xda, 0x79, 0xbc, 0xb4, 0x14, 0x35, 0xbc, 0x54, 0x04, 0x40, 0xbc, 0x2d, 0xba, 0x0d, 0xbc, + 0x0e, 0x5e, 0x54, 0xbc, 0x97, 0x20, 0x34, 0xbc, 0xe8, 0xb3, 0xf9, 0xbb, 0x8e, 0x34, 0x5f, 0xbc, + 0x1c, 0x4b, 0x1f, 0xba, 0x49, 0x91, 0xef, 0xbb, 0x47, 0x78, 0x35, 0xbc, 0xc4, 0x09, 0x03, 0xbc, + 0x12, 0x0f, 0xee, 0xbb, 0x3f, 0xd8, 0xf6, 0xbb, 0xc8, 0x2b, 0x88, 0xbb, 0x03, 0x15, 0x05, 0xbc, + 0xa0, 0xb7, 0x2d, 0xbb, 0x14, 0xca, 0xbd, 0xbb, 0xf8, 0xb9, 0xb3, 0xbb, 0x7a, 0x14, 0x9a, 0xbb, + 0x6d, 0x51, 0x96, 0xbb, 0x9e, 0xc8, 0xb5, 0xba, 0x97, 0x30, 0x31, 0xbc, 0x4c, 0x98, 0xca, 0xbb, + 0x9d, 0xfd, 0x91, 0xbb, 0x91, 0xeb, 0xa5, 0xbb, 0x8a, 0x03, 0xbe, 0xbb, 0x44, 0x96, 0xbc, 0xbb, + 0x1a, 0x6e, 0x81, 0xbb, 0x96, 0x3f, 0x88, 0xbb, 0x39, 0xfc, 0xc9, 0xbb, 0x4a, 0xe2, 0xea, 0xbb, + 0x99, 0xd1, 0x03, 0xbc, 0xa1, 0x0f, 0x5b, 0xbb, 0x0e, 0x6a, 0xa5, 0xbb, 0x12, 0x36, 0x35, 0xbb, + 0x89, 0x59, 0xa7, 0xbb, 0x91, 0x5a, 0x89, 0xbb, 0xf5, 0x79, 0x06, 0xbb, 0x0c, 0xea, 0xe3, 0xbb, + 0xb3, 0x3d, 0xae, 0xbb, 0xfc, 0x18, 0xae, 0xbb, 0x48, 0xd2, 0xb0, 0xbb, 0xaa, 0x57, 0xb7, 0xbb, + 0xe4, 0xf9, 0x0b, 0xbc, 0x74, 0xb0, 0xcb, 0xbb, 0xa8, 0x17, 0xb6, 0xbb, 0xcd, 0x29, 0xf3, 0xbb, + 0x3d, 0xbf, 0x01, 0xbc, 0x29, 0x3f, 0xe5, 0xbb, 0x29, 0xe8, 0xef, 0xbb, 0x71, 0x4b, 0xa2, 0xbb, + 0xf6, 0xbe, 0x0b, 0xbc, 0xa0, 0x12, 0x9e, 0xbb, 0xfb, 0x8c, 0x90, 0xbb, 0x1c, 0xd1, 0xbb, 0xbb, + 0x0e, 0x0b, 0x6e, 0xbb, 0xa2, 0x4f, 0xc8, 0xbb, 0xa2, 0x62, 0xbd, 0xbb, 0x10, 0xa4, 0xd5, 0xbb, + 0x3c, 0x53, 0x43, 0xbb, 0x62, 0x15, 0xce, 0xbb, 0xaa, 0x28, 0xfd, 0xbb, 0x36, 0x1d, 0xe6, 0xbb, + 0x9d, 0xce, 0xe8, 0xbb, 0x95, 0xb5, 0xee, 0xbb, 0x61, 0x5b, 0xf4, 0xbb, 0xa2, 0x78, 0xa9, 0xbb, + 0x54, 0x6d, 0xc8, 0xbb, 0xfc, 0x34, 0xbc, 0xbb, 0x96, 0x91, 0x4a, 0xbb, 0x9d, 0x34, 0xb9, 0xbb, + 0x45, 0x61, 0x6e, 0xba, 0xf9, 0xb2, 0xf2, 0xba, 0x4c, 0x34, 0x88, 0xbb, 0x1b, 0x80, 0x4a, 0xbb, + 0x15, 0xd6, 0x99, 0xbb, 0x29, 0x6e, 0x23, 0xbb, 0xe0, 0xc3, 0x75, 0xba, 0xfa, 0xb4, 0x5b, 0xbb, + 0x6b, 0xf5, 0x26, 0xbb, 0x76, 0xd8, 0x79, 0xbb, 0xe6, 0x45, 0x26, 0xbb, 0xe0, 0x6a, 0xc0, 0xba, + 0x0f, 0x6b, 0x90, 0xbb, 0x66, 0x3e, 0xa2, 0xb9, 0xfe, 0x47, 0x39, 0xbb, 0x72, 0x63, 0x69, 0xbb, + 0xe6, 0x13, 0x01, 0xba, 0x05, 0x39, 0x9e, 0xba, 0xd6, 0x72, 0x5c, 0xbb, 0x62, 0xa4, 0x33, 0xbb, + 0x2b, 0x6a, 0x29, 0xba, 0xcc, 0x33, 0x14, 0xbb, 0xea, 0xa6, 0x94, 0xbb, 0x1b, 0x7e, 0x0c, 0xbb, + 0xba, 0x8a, 0x65, 0xbb, 0x92, 0xc9, 0x60, 0xbb, 0x13, 0x0c, 0x87, 0xbb, 0xf3, 0x17, 0x12, 0xbb, + 0x16, 0x78, 0x0f, 0xbb, 0x5a, 0x83, 0x19, 0xbb, 0x3e, 0xd5, 0xa0, 0xb9, 0x29, 0xea, 0x17, 0xbb, + 0xb2, 0x04, 0xaa, 0xbd, 0x12, 0xb9, 0x5c, 0xbe, 0xac, 0xf9, 0x73, 0xbe, 0xea, 0xc3, 0x4c, 0xbe, + 0xb9, 0xcb, 0x3f, 0xbe, 0xcc, 0x19, 0x5f, 0xbe, 0x30, 0xc1, 0x2f, 0xbe, 0x32, 0x6a, 0x69, 0xbe, + 0xf6, 0xd8, 0x04, 0xbe, 0x9e, 0x2d, 0x1a, 0xbe, 0x4c, 0xcd, 0x3f, 0xbe, 0xbe, 0xc0, 0x21, 0xbe, + 0x88, 0x8f, 0x06, 0xbe, 0xde, 0xfc, 0xe2, 0xbd, 0x38, 0x1f, 0x82, 0xbe, 0x3b, 0x31, 0x14, 0xbe, + 0xfc, 0x50, 0x21, 0xbe, 0x1a, 0x08, 0x48, 0xbe, 0x4e, 0xb4, 0x11, 0xbe, 0xee, 0x67, 0x31, 0xbe, + 0x8b, 0x27, 0x04, 0xbe, 0x6f, 0xa3, 0x14, 0xbe, 0x8e, 0x87, 0x1a, 0xbe, 0x08, 0xbf, 0x6f, 0xbe, + 0x95, 0x15, 0x5d, 0xbe, 0x07, 0x1b, 0xde, 0xbd, 0x9e, 0xe7, 0x06, 0xbe, 0xd0, 0x99, 0xbd, 0xbd, + 0x5c, 0x00, 0x2b, 0xbe, 0x28, 0xec, 0x08, 0xbe, 0x5f, 0xf4, 0xc3, 0xbd, 0x5e, 0xe7, 0x4b, 0xbe, + 0x4e, 0x5f, 0xc8, 0xbc, 0x46, 0xdb, 0x9e, 0xbd, 0xef, 0xa0, 0x41, 0xbd, 0xab, 0x68, 0xff, 0xbc, + 0xc8, 0x4e, 0x21, 0xbe, 0x13, 0x27, 0x35, 0xbd, 0x00, 0xeb, 0x6d, 0x39, 0x3c, 0xac, 0xb2, 0xbd, + 0x0e, 0xd7, 0xb8, 0xbd, 0x0e, 0xf8, 0x62, 0xbd, 0xa4, 0x2e, 0x6b, 0xbd, 0xb3, 0x47, 0x90, 0xbd, + 0x94, 0x32, 0x34, 0xbe, 0x94, 0x6e, 0xe3, 0xbc, 0xdc, 0x78, 0x58, 0xbd, 0x50, 0x27, 0xa1, 0xbd, + 0xfb, 0xe7, 0x14, 0x3c, 0x5e, 0xdb, 0xf5, 0xbc, 0x00, 0x9e, 0x5b, 0xbc, 0x74, 0xa2, 0xca, 0xbc, + 0xd2, 0xa8, 0x96, 0x3c, 0xaa, 0xa3, 0xa2, 0xbd, 0x30, 0xe9, 0xda, 0xbc, 0x22, 0x34, 0xa2, 0xbc, + 0x56, 0x51, 0x12, 0xbd, 0xe4, 0x2a, 0xf5, 0xbd, 0x40, 0x7e, 0x13, 0xbe, 0x28, 0xcc, 0xf8, 0xbc, + 0x74, 0x31, 0xb7, 0x3c, 0x14, 0x39, 0xd6, 0xbc, 0x5d, 0x0b, 0x32, 0xbd, 0x3d, 0xc2, 0xc5, 0xbd, + 0xc4, 0x69, 0xb1, 0x3c, 0xf4, 0x76, 0x60, 0x3d, 0x06, 0xd7, 0x37, 0x3d, 0xf6, 0xb5, 0x60, 0x3d, + 0x10, 0xec, 0x66, 0x3d, 0xe0, 0x4f, 0x78, 0x3d, 0xa1, 0x43, 0x13, 0x3d, 0x83, 0x30, 0x8b, 0x3d, + 0x1e, 0x15, 0xb1, 0x3d, 0xb2, 0xcc, 0x49, 0x3d, 0x83, 0x31, 0x36, 0x3d, 0xd2, 0x30, 0x19, 0x3d, + 0x4a, 0x52, 0x5e, 0x3d, 0x42, 0x31, 0x21, 0x3d, 0x42, 0xfb, 0x58, 0x3d, 0x98, 0x36, 0x13, 0x3d, + 0x31, 0xd4, 0x50, 0x3c, 0xf4, 0x41, 0x04, 0x3d, 0x18, 0x73, 0x1d, 0x3d, 0x7e, 0xad, 0xc7, 0x3c, + 0x22, 0x1f, 0x28, 0x3c, 0xba, 0xa6, 0x32, 0x3d, 0xef, 0x6f, 0xde, 0x3c, 0xa0, 0x01, 0x61, 0x3d, + 0xcc, 0x45, 0x80, 0x3d, 0xe7, 0xd7, 0x17, 0x3d, 0x2e, 0xe3, 0x86, 0x3d, 0xdf, 0x56, 0xb8, 0x3c, + 0x3f, 0xe9, 0xea, 0x3c, 0x5a, 0x7b, 0x57, 0x3d, 0x34, 0xca, 0xd3, 0x3b, 0x4a, 0x71, 0x1d, 0x3d, + 0x5a, 0x46, 0x90, 0xbb, 0xb6, 0x30, 0x1f, 0x3d, 0x46, 0x6b, 0xed, 0x3c, 0x45, 0xd1, 0x90, 0x3c, + 0x0a, 0x1e, 0x3b, 0x3d, 0xff, 0x62, 0xc2, 0x3c, 0xc0, 0x9a, 0xb7, 0xbb, 0x30, 0x84, 0x12, 0xbc, + 0x54, 0x66, 0xc3, 0xbb, 0x82, 0xe2, 0x00, 0x3d, 0x7c, 0xde, 0x18, 0x3c, 0x58, 0x95, 0x30, 0x3c, + 0x83, 0xf6, 0x17, 0x3d, 0xbc, 0xa1, 0x8b, 0x3b, 0xd1, 0xab, 0x29, 0x3d, 0xd8, 0x1d, 0xcc, 0x3c, + 0x1d, 0xfb, 0x63, 0xbc, 0x8c, 0xe6, 0x6b, 0x3c, 0x57, 0xeb, 0x07, 0xbc, 0x48, 0x3e, 0xa4, 0x3c, + 0xc0, 0x70, 0xcd, 0xba, 0xd6, 0xdd, 0x29, 0x3c, 0xfc, 0x60, 0x8b, 0x3b, 0x8c, 0x42, 0xb6, 0x3c, + 0xc2, 0x13, 0xbf, 0xbc, 0xe9, 0x13, 0xaf, 0x3c, 0x70, 0x21, 0x77, 0x3d, 0x6d, 0xc6, 0xb2, 0x3b, + 0x3a, 0xcb, 0x10, 0xbc, 0x51, 0xbe, 0x98, 0x3c, 0xa0, 0xaf, 0x2e, 0x3c, 0xa4, 0xb1, 0xa1, 0x3c, + 0x23, 0x09, 0x11, 0xbb, 0x6c, 0x4f, 0xf8, 0xbb, 0x1a, 0x75, 0xd8, 0xbb, 0xec, 0x47, 0x04, 0xbc, + 0x54, 0xe4, 0xb3, 0xbb, 0xae, 0x45, 0x10, 0xbc, 0xde, 0xc8, 0xa7, 0xbb, 0x6a, 0x1c, 0xe0, 0xbb, + 0xc6, 0x52, 0x1e, 0xbc, 0x3a, 0x30, 0xe9, 0xbb, 0x28, 0x34, 0xab, 0xbb, 0x5c, 0x13, 0x79, 0xbb, + 0x34, 0x5c, 0x87, 0xbb, 0x97, 0x16, 0xaa, 0xbb, 0x9b, 0x42, 0x07, 0xbc, 0xd8, 0x87, 0x83, 0xbb, + 0x51, 0x1c, 0xc7, 0xba, 0xae, 0x84, 0x95, 0xbb, 0xd8, 0xb4, 0xa0, 0xbb, 0x51, 0x46, 0x7b, 0xbb, + 0xae, 0xe9, 0x07, 0xbb, 0x92, 0xa0, 0x90, 0xbb, 0x80, 0x7c, 0x5e, 0xbb, 0x8e, 0x6a, 0x0e, 0xbc, + 0xf0, 0x9e, 0xe6, 0xbb, 0x72, 0xf1, 0x2c, 0xbb, 0xc5, 0xa0, 0x04, 0xbc, 0x1e, 0xbd, 0x2c, 0xbb, + 0x9f, 0xe4, 0x96, 0xbb, 0x2c, 0x48, 0x02, 0xbc, 0x34, 0x19, 0x61, 0xb8, 0xd2, 0x98, 0x67, 0xbb, + 0x96, 0xff, 0xcd, 0x39, 0x3a, 0x17, 0xc4, 0xbb, 0x64, 0x5a, 0xa4, 0xbb, 0x64, 0xb8, 0x8e, 0xbb, + 0x1c, 0xd9, 0x90, 0xbb, 0x42, 0xa6, 0xa7, 0xbb, 0x14, 0xe7, 0x7c, 0xba, 0xec, 0x29, 0x5f, 0x3a, + 0x10, 0x65, 0xdc, 0xb9, 0x46, 0xdb, 0xae, 0xbb, 0x62, 0x17, 0xe0, 0xba, 0x2b, 0x35, 0x95, 0xba, + 0x91, 0x25, 0x1e, 0xbb, 0xd1, 0x63, 0xdc, 0xba, 0x0f, 0xac, 0xe8, 0xbb, 0x38, 0xd0, 0x3e, 0xbb, + 0x55, 0x56, 0x96, 0x3a, 0xdc, 0xc8, 0x35, 0xbb, 0x30, 0x03, 0x72, 0xb9, 0x59, 0xed, 0x5e, 0xbb, + 0x8f, 0x91, 0x69, 0xba, 0xd0, 0xb7, 0x8e, 0xba, 0x1a, 0xd3, 0x90, 0xba, 0x3a, 0xbb, 0xb1, 0xbb, + 0x5c, 0x2f, 0xcc, 0x3a, 0xa8, 0x1c, 0x8c, 0xba, 0x00, 0x24, 0xf7, 0xbb, 0xd0, 0x16, 0x74, 0xba, + 0x34, 0xc2, 0x5f, 0xba, 0x97, 0x46, 0x95, 0xbb, 0xd6, 0x44, 0xf8, 0xb9, 0x8c, 0x16, 0xda, 0xba, + 0x21, 0x18, 0xa7, 0xba, 0x72, 0x90, 0x3b, 0xbb, 0x44, 0x32, 0x0a, 0xbb, 0x34, 0x66, 0x1a, 0xbb, + 0x80, 0x98, 0x88, 0xbb, 0x1c, 0xdb, 0x31, 0xbb, 0xce, 0x66, 0xb9, 0xba, 0x9c, 0xd2, 0x86, 0xbb, + 0x91, 0xfb, 0x9d, 0xbb, 0xee, 0xd3, 0x1b, 0xbb, 0x49, 0x63, 0x24, 0xbb, 0xde, 0x08, 0x20, 0xbb, + 0x03, 0x9b, 0x93, 0xbb, 0x0c, 0x19, 0xf7, 0xba, 0x06, 0xfb, 0x19, 0xbb, 0x68, 0xdd, 0x19, 0xbb, + 0x69, 0x2f, 0x03, 0xba, 0x52, 0xae, 0xc8, 0xba, 0x51, 0xd0, 0xe7, 0xba, 0x6d, 0xfe, 0x8c, 0xba, + 0x18, 0x42, 0xcd, 0xb8, 0x3b, 0x70, 0x39, 0xbb, 0xa0, 0xde, 0xb8, 0xba, 0xac, 0x67, 0x0a, 0xbb, + 0x8d, 0x0d, 0x54, 0xbb, 0xb2, 0x5a, 0x4c, 0xbb, 0xe6, 0xe1, 0x86, 0xbb, 0xc7, 0x02, 0xa8, 0xba, + 0x2c, 0x26, 0x55, 0xba, 0xec, 0x93, 0x0e, 0xbb, 0x86, 0x83, 0x5a, 0xba, 0x78, 0xc4, 0x37, 0xbb, + 0x6c, 0x3d, 0xec, 0x37, 0x40, 0x58, 0x07, 0xbb, 0x1e, 0x2f, 0xac, 0xba, 0xfb, 0x3a, 0x02, 0xba, + 0xc7, 0x25, 0x6e, 0xbb, 0x0a, 0xd8, 0x63, 0xba, 0x50, 0xe2, 0x2d, 0x3a, 0x7b, 0x1c, 0x8d, 0xb9, + 0x7c, 0x7e, 0x51, 0xb9, 0xf7, 0xfd, 0xc2, 0xba, 0xe7, 0xd3, 0x44, 0xba, 0xf4, 0x98, 0x91, 0xba, + 0x9b, 0xec, 0x6e, 0xbb, 0x38, 0xee, 0x08, 0xb9, 0xbe, 0x43, 0xe8, 0xba, 0x56, 0x7b, 0xeb, 0xba, + 0x3c, 0x43, 0x5a, 0x3a, 0xa0, 0xdb, 0x26, 0xba, 0x5a, 0xf2, 0x14, 0x3a, 0xeb, 0x4a, 0x61, 0xba, + 0x52, 0x62, 0x01, 0x3a, 0x3c, 0xfb, 0x98, 0xba, 0xb6, 0x2e, 0x8b, 0xb9, 0x54, 0x6e, 0xfa, 0xb9, + 0x71, 0x3a, 0x8b, 0x3a, 0x22, 0xe9, 0x18, 0xbb, 0x43, 0xa6, 0x7b, 0xbb, 0x76, 0x2d, 0xe1, 0xb9, + 0x9b, 0x45, 0x8b, 0x3a, 0x5a, 0x51, 0xfa, 0xb9, 0xf9, 0xc5, 0x88, 0xba, 0x65, 0xfb, 0xf4, 0xba, + 0x32, 0x4e, 0x6b, 0xbc, 0xad, 0x60, 0x08, 0xbe, 0xc6, 0x8d, 0xf3, 0xbd, 0x20, 0x34, 0x07, 0xbe, + 0xce, 0xad, 0x93, 0xbd, 0x00, 0x0f, 0x15, 0xbe, 0x48, 0x51, 0x8c, 0xbd, 0x79, 0x97, 0x3a, 0xbd, + 0x13, 0x1e, 0xba, 0xbd, 0x8a, 0x44, 0x01, 0xbe, 0xf4, 0x5e, 0x82, 0xbd, 0x74, 0x72, 0x1b, 0xbd, + 0xfe, 0xb2, 0xf2, 0xbc, 0x84, 0x1e, 0x90, 0xbd, 0xc4, 0x5f, 0x23, 0xbe, 0xc7, 0x8d, 0x6e, 0xbd, + 0xd0, 0xa0, 0xd3, 0xbb, 0x32, 0xac, 0x99, 0xbd, 0x14, 0xe7, 0x5e, 0xbd, 0x02, 0x0c, 0x9a, 0xbd, + 0xff, 0x73, 0x1b, 0xbd, 0x97, 0x3c, 0x2c, 0xbd, 0x9b, 0xfc, 0x33, 0xbd, 0x1d, 0x8d, 0x1d, 0xbe, + 0x84, 0xf2, 0x57, 0xbd, 0x94, 0x19, 0x41, 0xbc, 0x22, 0x44, 0x0f, 0xbe, 0x76, 0x43, 0x05, 0xbd, + 0x68, 0x36, 0x89, 0xbd, 0xc7, 0x63, 0x09, 0xbe, 0x9c, 0xbd, 0xa1, 0x3b, 0x42, 0xa2, 0x0c, 0xbd, + 0x3a, 0x20, 0x79, 0xbd, 0xe6, 0xe6, 0x9a, 0xbd, 0x1a, 0xda, 0x9d, 0xbd, 0xc1, 0xfd, 0xc3, 0xbd, + 0x01, 0x64, 0xe5, 0xbd, 0xfa, 0xab, 0x6b, 0xbd, 0x9e, 0x60, 0xdd, 0xbc, 0xf4, 0xc5, 0xb7, 0xbd, + 0x74, 0x78, 0xe0, 0xbd, 0xa8, 0xbb, 0xa8, 0xbd, 0x16, 0x1d, 0x84, 0xbd, 0xce, 0xb1, 0x6a, 0xbd, + 0xbc, 0xea, 0x8a, 0xbd, 0x4c, 0xb3, 0x06, 0xbd, 0x27, 0x54, 0x8d, 0xbd, 0xee, 0x96, 0xe4, 0xbd, + 0x28, 0x90, 0xa4, 0xbd, 0xe5, 0xb1, 0xce, 0xbd, 0xc9, 0xa1, 0xd4, 0xbd, 0x45, 0x6a, 0xcd, 0xbd, + 0x7f, 0x6a, 0x83, 0xbd, 0xdd, 0x66, 0x3c, 0xbd, 0x80, 0x8b, 0xd8, 0xbd, 0xce, 0x6c, 0xac, 0xbd, + 0xe0, 0x35, 0xb0, 0xbd, 0x42, 0xef, 0x01, 0xbe, 0x81, 0xfe, 0xad, 0xbd, 0x9e, 0x79, 0x8a, 0xbd, + 0xe9, 0x9c, 0x05, 0xbe, 0x83, 0x68, 0xd6, 0xbd, 0xe5, 0xd8, 0xf9, 0xbb, 0x78, 0x51, 0x3f, 0xbd, + 0x8a, 0x7c, 0x1b, 0x3d, 0xc4, 0xb4, 0x2a, 0x3d, 0x02, 0xf4, 0x61, 0x3d, 0x54, 0x73, 0x3b, 0x3d, + 0x08, 0x10, 0x67, 0x3d, 0x37, 0x52, 0x62, 0x3d, 0xb8, 0xf0, 0x4b, 0x3d, 0x8c, 0x3b, 0x8d, 0x3d, + 0xb2, 0x73, 0x63, 0x3d, 0x08, 0xc7, 0x4c, 0x3d, 0x50, 0xbd, 0x5c, 0x3d, 0x76, 0x79, 0x5a, 0x3d, + 0xbb, 0x8c, 0x38, 0x3d, 0xa9, 0x40, 0x07, 0x3d, 0x20, 0x6f, 0x42, 0x3d, 0x66, 0xe2, 0x6f, 0x3d, + 0x0d, 0x2b, 0x61, 0x3d, 0x86, 0x6e, 0x71, 0x3d, 0x18, 0x81, 0x42, 0x3d, 0x3f, 0x31, 0x58, 0x3d, + 0x4c, 0x4e, 0x6b, 0x3d, 0x29, 0x2a, 0x48, 0x3d, 0xdb, 0x55, 0x6a, 0x3d, 0x3a, 0x57, 0x80, 0x3d, + 0x24, 0x4a, 0x80, 0x3d, 0x4d, 0x96, 0x8f, 0x3d, 0x51, 0xd3, 0x5e, 0x3d, 0x3a, 0xcf, 0x39, 0x3d, + 0x36, 0xbc, 0x91, 0x3d, 0x8d, 0x4f, 0x41, 0x3d, 0x10, 0xaf, 0xe4, 0x3c, 0xd0, 0xee, 0x5b, 0x3d, + 0x08, 0x64, 0xa2, 0x3b, 0x6a, 0x94, 0xce, 0x3c, 0x59, 0xe4, 0x14, 0x3d, 0x76, 0x5b, 0x0d, 0x3d, + 0x24, 0xa2, 0xdb, 0x3c, 0x1d, 0x93, 0x3b, 0x3c, 0xc0, 0x0f, 0x03, 0xbb, 0x9e, 0xd3, 0xd0, 0x3c, + 0x42, 0x0f, 0x00, 0x3d, 0x6c, 0xd5, 0xb6, 0x3c, 0xdc, 0xef, 0xc8, 0x3b, 0x11, 0x2b, 0x0b, 0x3c, + 0x22, 0xd4, 0x9c, 0x3c, 0x31, 0x95, 0xb5, 0xbb, 0x0a, 0xf1, 0xd0, 0x3c, 0xeb, 0xbc, 0xf9, 0x3c, + 0x06, 0x6a, 0xb1, 0x3b, 0x6a, 0x55, 0xd6, 0x3c, 0x49, 0xe2, 0xa7, 0x3c, 0x16, 0xd1, 0xc3, 0x3c, + 0x95, 0x89, 0xb5, 0x3c, 0xee, 0xab, 0xf1, 0x3b, 0x16, 0x48, 0xa4, 0x3c, 0xd3, 0xe0, 0xe2, 0x3c, + 0x00, 0x15, 0x02, 0x3d, 0x6d, 0xec, 0x56, 0x3c, 0x79, 0x14, 0x2f, 0x3c, 0xcd, 0xe9, 0x64, 0x3c, + 0x65, 0xaf, 0xc1, 0x3c, 0x3c, 0xae, 0x98, 0x3c, 0x10, 0x87, 0x6d, 0x3c, 0x65, 0x95, 0x41, 0x3c, + 0x0e, 0x45, 0x80, 0xbb, 0x86, 0x8c, 0xa3, 0xbb, 0x4b, 0x83, 0xf6, 0xbb, 0x71, 0xf4, 0xb0, 0xbb, + 0x32, 0xdc, 0xc6, 0xbb, 0x3e, 0x7f, 0xe3, 0xbb, 0x51, 0xef, 0xd3, 0xbb, 0x08, 0x5b, 0x0d, 0xbc, + 0xd3, 0x72, 0xcc, 0xbb, 0x0a, 0x4f, 0xc0, 0xbb, 0x42, 0x79, 0xcc, 0xbb, 0x3f, 0x80, 0xd5, 0xbb, + 0x82, 0xbc, 0xb2, 0xbb, 0x93, 0x9b, 0x6a, 0xbb, 0xe1, 0xbb, 0xc7, 0xbb, 0xf7, 0xe4, 0xd7, 0xbb, + 0x36, 0x4e, 0xbf, 0xbb, 0xf9, 0x58, 0xde, 0xbb, 0x05, 0x94, 0x99, 0xbb, 0x1a, 0x0a, 0xbd, 0xbb, + 0x02, 0x96, 0xf8, 0xbb, 0x6c, 0x0a, 0xca, 0xbb, 0x72, 0x91, 0xc6, 0xbb, 0x7a, 0xb7, 0x02, 0xbc, + 0x78, 0xf9, 0x04, 0xbc, 0x28, 0x81, 0xe3, 0xbb, 0x07, 0x1a, 0xc0, 0xbb, 0x63, 0x02, 0xac, 0xbb, + 0x24, 0xfb, 0xf5, 0xbb, 0x6f, 0x28, 0x94, 0xbb, 0x84, 0xa2, 0x98, 0xbb, 0x74, 0x42, 0xe8, 0xbb, + 0x02, 0xf5, 0x20, 0xba, 0x5b, 0x3b, 0x5b, 0xbb, 0x2a, 0xdd, 0xb8, 0xbb, 0x8c, 0x14, 0x8c, 0xbb, + 0x0e, 0xba, 0x4b, 0xbb, 0xfd, 0xe8, 0x27, 0xbb, 0x08, 0xeb, 0xa8, 0xba, 0x09, 0x45, 0x8c, 0xbb, + 0x8e, 0xde, 0x79, 0xbb, 0x91, 0x3d, 0x4b, 0xbb, 0x29, 0xe7, 0xbf, 0xba, 0x2e, 0x1c, 0x05, 0xbb, + 0xf5, 0xa7, 0x3b, 0xbb, 0x60, 0xe7, 0x90, 0x39, 0xca, 0x86, 0x7f, 0xbb, 0xa0, 0xc3, 0x77, 0xbb, + 0x96, 0x4c, 0x67, 0xba, 0x08, 0xdf, 0x65, 0xbb, 0xb6, 0x41, 0x02, 0xbb, 0x14, 0xd3, 0x3c, 0xbb, + 0xce, 0xf4, 0x84, 0xbb, 0xfb, 0x26, 0x04, 0xbb, 0xfc, 0x9f, 0x19, 0xbb, 0xea, 0xd6, 0x92, 0xbb, + 0x50, 0xc0, 0xa4, 0xbb, 0xd7, 0xff, 0xa2, 0xba, 0xe0, 0x6d, 0xc3, 0xba, 0xf0, 0x49, 0x0a, 0xbb, + 0xba, 0x8e, 0x34, 0xbb, 0x8e, 0x53, 0xda, 0xba, 0x68, 0x55, 0x59, 0xbb, 0x5e, 0x0f, 0x3e, 0xbb, + 0x6b, 0xaa, 0x1b, 0xbb, 0x4d, 0x61, 0x24, 0xbb, 0x30, 0xfb, 0x3d, 0xbb, 0x3e, 0xba, 0x3c, 0xbb, + 0x14, 0x04, 0x6f, 0xbb, 0x80, 0x42, 0x3d, 0xbb, 0xf9, 0x84, 0x19, 0xbb, 0xdf, 0x9b, 0x76, 0xbb, + 0xac, 0x93, 0x66, 0xbb, 0xf6, 0x92, 0x42, 0xbb, 0x82, 0xe1, 0x45, 0xbb, 0x24, 0xb6, 0x3a, 0xbb, + 0xd1, 0x22, 0x29, 0xbb, 0xf2, 0x38, 0xf3, 0xba, 0x41, 0x24, 0x2b, 0xbb, 0x0a, 0xdf, 0x70, 0xbb, + 0xba, 0x6e, 0x5a, 0xbb, 0xb9, 0xa9, 0x69, 0xbb, 0x62, 0x51, 0x55, 0xbb, 0x40, 0xb0, 0x5b, 0xbb, + 0x49, 0x1e, 0x41, 0xbb, 0x29, 0x4c, 0x24, 0xbb, 0xe8, 0xa9, 0x6f, 0xbb, 0xf4, 0x47, 0x5f, 0xbb, + 0xf8, 0xe8, 0x5d, 0xbb, 0xe0, 0x13, 0x96, 0xbb, 0xf8, 0xbc, 0x59, 0xbb, 0xb6, 0xe4, 0x2d, 0xbb, + 0x60, 0x04, 0x95, 0xbb, 0xc8, 0x11, 0x57, 0xbb, 0x31, 0x1d, 0x82, 0xba, 0x79, 0x4c, 0x2d, 0xbb, + 0xf4, 0x0f, 0x3e, 0xba, 0xe7, 0xe4, 0xdc, 0xba, 0x10, 0x55, 0x00, 0xbb, 0x58, 0xda, 0x17, 0xbb, + 0xe8, 0x04, 0x0e, 0xbb, 0x01, 0xdf, 0x36, 0xba, 0xac, 0x7c, 0x81, 0x39, 0xb0, 0x55, 0xd0, 0xba, + 0x20, 0x10, 0x17, 0xbb, 0x68, 0xc5, 0xcf, 0xba, 0x27, 0x88, 0x25, 0xba, 0xee, 0x0f, 0x1f, 0xba, + 0x92, 0x74, 0xa8, 0xba, 0xd0, 0xf8, 0x97, 0x38, 0x8a, 0x57, 0xc6, 0xba, 0xe4, 0xdb, 0x14, 0xbb, + 0x5f, 0x28, 0x60, 0xba, 0x87, 0x80, 0xfc, 0xba, 0x6f, 0xbc, 0xf9, 0xba, 0x5f, 0x1f, 0xfa, 0xba, + 0x29, 0xfa, 0x9a, 0xba, 0xd2, 0xa9, 0xe2, 0xb9, 0xe8, 0xd0, 0xeb, 0xba, 0xd7, 0x5f, 0xd9, 0xba, + 0xb2, 0x6c, 0xf1, 0xba, 0x1c, 0xcd, 0xe2, 0xba, 0xd2, 0xfc, 0x94, 0xba, 0x94, 0x0e, 0x8e, 0xba, + 0xf9, 0x54, 0x0e, 0xbb, 0x78, 0xfc, 0xf2, 0xba, 0x44, 0xb6, 0xa8, 0xb9, 0xd2, 0x46, 0x10, 0xba, + 0x1e, 0x64, 0x12, 0xbd, 0xf3, 0x57, 0x99, 0xbd, 0xa6, 0xec, 0x03, 0xbe, 0xe1, 0xea, 0xad, 0xbd, + 0x3c, 0x19, 0x9a, 0xbd, 0xe1, 0x87, 0xbd, 0xbd, 0x8b, 0xdf, 0xa4, 0xbd, 0x10, 0x77, 0xfb, 0xbd, + 0xdd, 0x7a, 0xaf, 0xbd, 0x4e, 0x7c, 0xa4, 0xbd, 0x9f, 0x79, 0x90, 0xbd, 0xda, 0x93, 0xa7, 0xbd, + 0xe9, 0xea, 0x9c, 0xbd, 0x74, 0x18, 0xed, 0xbc, 0x73, 0xf0, 0xc2, 0xbd, 0x08, 0xc2, 0xb5, 0xbd, + 0x5e, 0x66, 0x63, 0xbd, 0x7e, 0x7a, 0xb9, 0xbd, 0x76, 0x3d, 0x49, 0xbd, 0xbe, 0xf2, 0x93, 0xbd, + 0xf9, 0x6a, 0xeb, 0xbd, 0x2d, 0xf8, 0xa4, 0xbd, 0xa2, 0x7d, 0x8b, 0xbd, 0xed, 0xf5, 0xf3, 0xbd, + 0x48, 0x64, 0x01, 0xbe, 0x4f, 0x4c, 0x77, 0xbd, 0xf9, 0xd9, 0x7b, 0xbd, 0x44, 0xf1, 0x86, 0xbd, + 0x4e, 0xa0, 0xa9, 0xbd, 0xfb, 0xe5, 0x31, 0xbd, 0xe7, 0xd2, 0xb3, 0xbd, 0xf2, 0x35, 0xce, 0xbd, + 0x76, 0x2d, 0x5b, 0xbc, 0x8c, 0xdf, 0x78, 0xbd, 0x85, 0xa0, 0xbc, 0xbc, 0x48, 0x02, 0x1e, 0xbd, + 0xc8, 0xa3, 0x85, 0xbd, 0x4d, 0xcb, 0x11, 0xbc, 0x82, 0x55, 0xca, 0x3c, 0x35, 0x7f, 0x26, 0xbd, + 0x06, 0xe6, 0x59, 0xbd, 0x2c, 0xe9, 0xef, 0xbb, 0x32, 0x84, 0xbb, 0xbb, 0x38, 0xfa, 0x23, 0xbd, + 0x87, 0xc5, 0xda, 0xbc, 0xd6, 0x17, 0x3c, 0xbb, 0x17, 0x77, 0x3b, 0xbd, 0xe8, 0x1a, 0x84, 0xbd, + 0x89, 0x1b, 0x49, 0xbd, 0xa7, 0x8e, 0x6d, 0xbd, 0x14, 0x7c, 0xd0, 0xbc, 0xa6, 0xe5, 0xf6, 0xbc, + 0xbe, 0x40, 0xf8, 0xbc, 0x00, 0xce, 0xfa, 0xbb, 0x00, 0xac, 0x12, 0xb7, 0xd3, 0xc8, 0xa5, 0xbc, + 0x78, 0x5c, 0x3d, 0xbc, 0xcc, 0x58, 0xa3, 0xbd, 0xd4, 0xe3, 0x30, 0xbd, 0x03, 0x09, 0x36, 0xbc, + 0x48, 0x6d, 0x3f, 0xbd, 0x86, 0x3a, 0x32, 0xbd, 0x80, 0xcc, 0x4a, 0xbb, 0x87, 0x89, 0xf7, 0xbc, + 0x95, 0xb6, 0x8b, 0x3b, 0x92, 0x37, 0x82, 0x3c, 0x00, 0xaf, 0xe2, 0x3c, 0xaa, 0x9d, 0xb6, 0x3c, + 0xb2, 0x6b, 0x88, 0x3c, 0x06, 0x60, 0xdd, 0x3c, 0x19, 0x81, 0x67, 0x3c, 0xa1, 0x36, 0x16, 0x3d, + 0x44, 0x10, 0x0f, 0x3d, 0xe8, 0xce, 0x9c, 0x3c, 0xfb, 0xe0, 0x43, 0x3c, 0x2c, 0x11, 0xd0, 0x3c, + 0x13, 0xdb, 0x07, 0x3c, 0xf4, 0xf8, 0x03, 0x3c, 0xb8, 0x38, 0xdb, 0x3c, 0xe6, 0xf6, 0xf8, 0x3c, + 0x02, 0xeb, 0xb5, 0x3c, 0xde, 0xe7, 0x82, 0x3c, 0x5e, 0x1d, 0xa3, 0x3c, 0x67, 0x07, 0x0b, 0x3c, + 0x44, 0xa8, 0xfe, 0x3c, 0x84, 0xcb, 0x8b, 0x3c, 0x70, 0x35, 0x3e, 0x3c, 0x80, 0xfa, 0xbe, 0x3c, + 0x30, 0xc4, 0x00, 0x3d, 0x4f, 0x69, 0x09, 0x3d, 0x74, 0xb9, 0xfb, 0x3c, 0xe9, 0x7b, 0x80, 0x3c, + 0xc0, 0x6d, 0xff, 0x3c, 0x12, 0xe8, 0xcd, 0x3c, 0x50, 0xd7, 0x46, 0x3a, 0xd9, 0x95, 0xa1, 0x3c, + 0xf6, 0x5d, 0x17, 0x3b, 0xcc, 0xaa, 0xd5, 0x3c, 0x82, 0xf2, 0x59, 0x3c, 0xa2, 0x5b, 0x8f, 0x3c, + 0xfc, 0x69, 0xa5, 0x3c, 0x84, 0xb1, 0x02, 0x3b, 0xc8, 0x97, 0x48, 0xbc, 0x69, 0xa4, 0x95, 0xbb, + 0x0c, 0xca, 0x94, 0x3c, 0xcc, 0xf6, 0x6b, 0x3c, 0x28, 0x6c, 0xb5, 0xbb, 0xb4, 0x10, 0x09, 0x3b, + 0x30, 0xf7, 0x75, 0x3c, 0x02, 0x93, 0x3e, 0xbb, 0x0a, 0x85, 0x73, 0x3c, 0xd0, 0x95, 0x97, 0x3c, + 0x9b, 0x19, 0x18, 0xbc, 0x5c, 0xfb, 0xbe, 0x3c, 0xbc, 0x25, 0xce, 0xbb, 0xbe, 0x8e, 0x79, 0x3c, + 0xa6, 0xb1, 0x58, 0x3c, 0x38, 0x0c, 0x4b, 0xba, 0x89, 0x41, 0xa7, 0xbb, 0x71, 0x49, 0xaa, 0x3c, + 0x1c, 0xd2, 0xd1, 0xbb, 0xb4, 0xae, 0xed, 0x3b, 0x65, 0x3e, 0x90, 0x3c, 0x84, 0x3d, 0xaa, 0x3b, + 0x64, 0x00, 0x89, 0x3b, 0x88, 0xf6, 0x5e, 0x3c, 0x7e, 0x92, 0x8d, 0x3c, 0xd4, 0x94, 0xf0, 0x3a, + 0xb0, 0x11, 0xa2, 0xb9, 0x6b, 0xfd, 0xd9, 0xba, 0x95, 0xa7, 0x84, 0xbb, 0xf8, 0x35, 0x40, 0xbb, + 0x86, 0xa4, 0xae, 0xba, 0xdc, 0xbe, 0x7c, 0xbb, 0x5f, 0xbd, 0x18, 0xbb, 0xf2, 0x5f, 0x80, 0xbb, + 0xed, 0x53, 0x8f, 0xbb, 0x5e, 0x5e, 0x5b, 0xbb, 0x96, 0xc4, 0xad, 0xba, 0x42, 0x14, 0x29, 0xbb, + 0x26, 0xbc, 0x90, 0xba, 0x3e, 0x8a, 0x7d, 0xba, 0xd3, 0xc9, 0x53, 0xbb, 0xb4, 0x7a, 0x5a, 0xbb, + 0x9c, 0x3f, 0xa1, 0xba, 0x9a, 0x90, 0xd5, 0xba, 0xd4, 0x0d, 0xef, 0xba, 0xb3, 0xfa, 0x86, 0xba, + 0x06, 0x5d, 0x8d, 0xbb, 0xde, 0x7e, 0x14, 0xbb, 0xd6, 0xc2, 0xc2, 0xba, 0x6d, 0x14, 0x7d, 0xbb, + 0x8f, 0x67, 0x83, 0xbb, 0xa0, 0x51, 0x33, 0xbb, 0x77, 0xb6, 0x84, 0xbb, 0xfa, 0xee, 0x12, 0xbb, + 0xca, 0x1e, 0x5a, 0xbb, 0x0b, 0xa0, 0x44, 0xbb, 0x49, 0x4d, 0x65, 0xba, 0x33, 0xe8, 0x05, 0xbb, + 0x7e, 0x7c, 0xee, 0xb8, 0x4b, 0xc1, 0x2f, 0xbb, 0x2a, 0x24, 0x2b, 0xbb, 0xf2, 0xcd, 0x20, 0xbb, + 0x31, 0x08, 0xdd, 0xba, 0xff, 0x6d, 0xb1, 0xba, 0x10, 0xcb, 0xa0, 0x39, 0x38, 0x53, 0x58, 0x39, + 0x10, 0xc9, 0x30, 0xbb, 0xc3, 0x4e, 0x3c, 0xbb, 0x3e, 0x04, 0xde, 0x39, 0xe8, 0xa7, 0x82, 0xb9, + 0xa4, 0xd2, 0xe8, 0xba, 0xf2, 0x61, 0x07, 0x39, 0xab, 0xd1, 0x05, 0xbb, 0x6e, 0x93, 0x0c, 0xbb, + 0x18, 0x80, 0xfb, 0x3a, 0xe4, 0xd7, 0x1a, 0xbb, 0xe8, 0xc6, 0x50, 0x3a, 0xfa, 0x66, 0xdf, 0xba, + 0xb2, 0xad, 0x25, 0xbb, 0xc6, 0xaa, 0xfc, 0xb9, 0x64, 0x77, 0x42, 0x39, 0xc8, 0x86, 0x6c, 0xbb, + 0x78, 0x19, 0x6d, 0xb9, 0x98, 0xe8, 0xdf, 0xb8, 0xe1, 0x70, 0x33, 0xbb, 0x80, 0x63, 0x9c, 0xba, + 0xdd, 0xb5, 0x24, 0xba, 0x9a, 0x2b, 0xf2, 0xba, 0xbb, 0x9c, 0x25, 0xbb, 0xb2, 0x57, 0x85, 0xb9, + 0x00, 0x96, 0xb9, 0xb9, 0x12, 0x21, 0xa8, 0xba, 0x16, 0x84, 0xa4, 0xba, 0x5b, 0x2b, 0xa5, 0xba, + 0x96, 0x51, 0xbf, 0xba, 0x37, 0xff, 0x94, 0xba, 0x28, 0x38, 0x8e, 0xb9, 0xad, 0x44, 0x0a, 0xbb, + 0xc4, 0x10, 0x01, 0xbb, 0x16, 0xf7, 0x2c, 0xba, 0xf6, 0xab, 0x20, 0xba, 0x45, 0xde, 0xd0, 0xba, + 0x9f, 0xff, 0x14, 0xba, 0xe3, 0x80, 0xca, 0xb9, 0x94, 0x29, 0xcf, 0xba, 0xa3, 0xfc, 0x01, 0xbb, + 0x41, 0x9e, 0xe7, 0xba, 0x55, 0x0b, 0xa7, 0xba, 0xaa, 0xc5, 0xa3, 0xba, 0x2c, 0x2f, 0x25, 0xba, + 0x64, 0x53, 0xc5, 0xba, 0x93, 0xbb, 0x4b, 0xba, 0x98, 0xe3, 0x02, 0xba, 0x55, 0x7a, 0x79, 0xba, + 0x43, 0x42, 0xbe, 0xba, 0x82, 0x9f, 0x23, 0xbb, 0xf8, 0x86, 0xd8, 0xba, 0x14, 0xb3, 0x39, 0xba, + 0x61, 0x80, 0xf8, 0xba, 0x88, 0x4d, 0xc4, 0xba, 0x14, 0x9c, 0x0e, 0x39, 0x31, 0xf4, 0x9f, 0xba, + 0xb8, 0x5c, 0x86, 0xb9, 0xa8, 0xe3, 0xea, 0xba, 0x2e, 0xb2, 0x0c, 0xba, 0x54, 0xc3, 0x85, 0xba, + 0x6c, 0x83, 0xd6, 0xba, 0x60, 0x8a, 0xf0, 0x38, 0xb7, 0x48, 0x89, 0x3a, 0x58, 0x88, 0x48, 0xb8, + 0xbc, 0x42, 0x94, 0xba, 0xbd, 0xaf, 0xdd, 0xb9, 0x5d, 0x33, 0x89, 0x39, 0x00, 0x7c, 0xe0, 0xb9, + 0x1d, 0x16, 0x6d, 0xba, 0xad, 0xc3, 0x29, 0x39, 0x6c, 0x31, 0x81, 0xba, 0x02, 0x12, 0xb6, 0xba, + 0x38, 0xf3, 0x49, 0xb9, 0xed, 0x1a, 0xd7, 0xba, 0x4c, 0x5f, 0x7e, 0x38, 0x72, 0x9b, 0x7d, 0xba, + 0x14, 0x8e, 0x20, 0xba, 0xba, 0xbb, 0xf0, 0x38, 0x57, 0x5c, 0xb0, 0x39, 0x0a, 0x5f, 0x58, 0xba, + 0x0c, 0xed, 0xe6, 0x39, 0xaa, 0xec, 0x9a, 0xba, 0xec, 0x8a, 0x82, 0xba, 0x82, 0xe2, 0x40, 0xb9, + 0x1d, 0x1e, 0x0f, 0xba, 0x96, 0x86, 0x71, 0xba, 0xcc, 0xeb, 0x34, 0xba, 0xd1, 0xdb, 0xaa, 0xb9, + 0x5c, 0x26, 0xb9, 0xba, 0x1d, 0xff, 0x09, 0xbd, 0x83, 0xa6, 0x8d, 0xbd, 0x2e, 0xb6, 0x50, 0xbd, + 0xa8, 0x91, 0x90, 0xbc, 0x2a, 0xfa, 0x66, 0xbd, 0xfc, 0x1b, 0xed, 0xbc, 0x77, 0x65, 0x01, 0xbd, + 0xf1, 0xb0, 0x87, 0xbd, 0x39, 0x93, 0x8b, 0xbd, 0x6e, 0xde, 0x0e, 0xbc, 0xe4, 0xbe, 0xb2, 0xbc, + 0x04, 0xc7, 0xd2, 0xbc, 0x7e, 0xb7, 0x0e, 0xbc, 0x8e, 0x23, 0x44, 0xbd, 0x7c, 0x68, 0x3a, 0xbd, + 0x1c, 0x03, 0xa4, 0x3c, 0xf2, 0x21, 0xf9, 0xbc, 0x18, 0x43, 0xaa, 0xbb, 0x44, 0x43, 0xbe, 0xbc, + 0x6d, 0x20, 0x8e, 0xbd, 0x8d, 0x8f, 0xed, 0xbc, 0xd6, 0xa9, 0x6f, 0xbc, 0x03, 0xc4, 0xa1, 0xbd, + 0x38, 0x5f, 0x37, 0xbd, 0xeb, 0x51, 0x45, 0xbc, 0x80, 0x49, 0x86, 0xbd, 0x72, 0xea, 0x12, 0xbd, + 0x4c, 0x76, 0x06, 0xbd, 0x3a, 0xd5, 0x32, 0xbd, 0x97, 0x95, 0x19, 0xbd, 0x90, 0xdc, 0x95, 0xbc, + 0x36, 0xe7, 0xa3, 0xbf, 0x17, 0x3b, 0xb3, 0xbf, 0xd2, 0x62, 0xea, 0xbf, 0x96, 0x22, 0xa8, 0xbf, + 0xe0, 0xb4, 0x75, 0xc0, 0x65, 0x42, 0xea, 0xbf, 0xd1, 0x68, 0x9c, 0xbf, 0xe5, 0x54, 0x1f, 0xc0, + 0x38, 0xc1, 0x22, 0xc0, 0xea, 0xf7, 0x25, 0xc0, 0x1b, 0x85, 0x17, 0xc0, 0xfc, 0xb4, 0xc2, 0xbf, + 0xce, 0x94, 0x94, 0xc0, 0xed, 0x08, 0x96, 0xbf, 0xab, 0x99, 0x91, 0xbf, 0x4f, 0x32, 0xfb, 0xbf, + 0x19, 0x99, 0x7a, 0x3e, 0x4f, 0x09, 0x4c, 0xbf, 0x79, 0xa9, 0xb6, 0xbf, 0xd4, 0x58, 0xbf, 0xbf, + 0xc7, 0x2e, 0x48, 0x3e, 0x9b, 0x45, 0x20, 0xc0, 0xe5, 0xd5, 0x22, 0xc0, 0xd1, 0x17, 0xb1, 0xbf, + 0xf8, 0xcb, 0x03, 0xc0, 0x15, 0xea, 0x30, 0xc0, 0xbc, 0x31, 0x6c, 0xc0, 0x44, 0x6b, 0xce, 0xbf, + 0x30, 0xe0, 0xd1, 0xbe, 0x1e, 0x74, 0x9a, 0xbf, 0xca, 0x37, 0x8b, 0xbf, 0x5c, 0x1c, 0x10, 0xc0, + 0x52, 0x9e, 0x89, 0x3f, 0xda, 0x54, 0xcb, 0x3f, 0x7d, 0xb8, 0x9e, 0x3f, 0x6f, 0x5f, 0xbc, 0x3f, + 0x96, 0x37, 0xef, 0x3f, 0x24, 0xc5, 0xd1, 0x3f, 0xde, 0xe7, 0xaa, 0x3f, 0x8b, 0xd8, 0xe2, 0x3f, + 0xc0, 0xb8, 0x01, 0x40, 0x7e, 0x08, 0xc1, 0x3f, 0xe5, 0xe6, 0xd6, 0x3f, 0xe4, 0x40, 0x8c, 0x3f, + 0x88, 0xb5, 0xee, 0x3f, 0xf7, 0x0a, 0xa6, 0x3f, 0x5e, 0x58, 0xa9, 0x3f, 0x8e, 0xa4, 0x80, 0x3f, + 0xae, 0x13, 0x21, 0x3f, 0xf2, 0xa2, 0xad, 0x3f, 0x2b, 0x09, 0x97, 0x3f, 0x06, 0xe7, 0xa3, 0x3f, + 0x02, 0x0c, 0xb2, 0x3e, 0x5d, 0x0d, 0xb5, 0x3f, 0xbf, 0xa5, 0xaa, 0x3f, 0xee, 0x9d, 0xdf, 0x3f, + 0x2b, 0x04, 0xd8, 0x3f, 0x4a, 0x98, 0x92, 0x3f, 0xb3, 0xac, 0xd1, 0x3f, 0x1d, 0xd6, 0x62, 0x3f, + 0xc1, 0x4f, 0x84, 0x3f, 0x7b, 0xd4, 0xad, 0x3f, 0x14, 0x85, 0x22, 0x3f, 0x0d, 0x0d, 0xa6, 0x3f, + 0x14, 0xa1, 0xaf, 0xbd, 0x6e, 0xac, 0x1a, 0x3f, 0xab, 0x67, 0x73, 0x3f, 0xf0, 0x3d, 0x05, 0x3f, + 0xe7, 0x24, 0x79, 0x3f, 0xd1, 0x3a, 0x37, 0x3f, 0x1e, 0x8c, 0x49, 0x3e, 0xfe, 0x2f, 0xc3, 0x3e, + 0xc8, 0x3d, 0x00, 0xbe, 0x21, 0x90, 0x39, 0x3f, 0x83, 0x1a, 0x00, 0x3f, 0x50, 0x98, 0xb2, 0x3e, + 0x38, 0xdd, 0x48, 0x3f, 0x98, 0xd4, 0xb7, 0x3d, 0x79, 0xf1, 0x80, 0x3f, 0x32, 0x74, 0x17, 0x3f, + 0x00, 0x0c, 0x2c, 0x3d, 0xd0, 0xd6, 0xef, 0x3d, 0xbe, 0x8f, 0xb7, 0x3e, 0xd3, 0xb2, 0xe8, 0x3e, + 0xac, 0x72, 0x11, 0xbd, 0x72, 0x44, 0xcf, 0x3e, 0x18, 0xbe, 0x1d, 0x3f, 0x0b, 0x57, 0xd1, 0x3e, + 0x34, 0xd8, 0x50, 0x3e, 0x7c, 0x0c, 0x08, 0x3f, 0xb5, 0x5a, 0x8e, 0x3f, 0x31, 0x02, 0x68, 0x3e, + 0xd8, 0x52, 0xb4, 0x3d, 0x5e, 0xda, 0xbf, 0x3e, 0xc0, 0xf8, 0xd2, 0xba, 0x86, 0x9e, 0x21, 0x3f, + 0x76, 0x16, 0xea, 0xbd, 0x37, 0x23, 0x63, 0xbe, 0x92, 0xa4, 0x30, 0xbe, 0x16, 0xdc, 0x4f, 0xbe, + 0x8c, 0xd8, 0x4d, 0xbe, 0xa0, 0xe6, 0x61, 0xbe, 0x7e, 0x74, 0x2d, 0xbe, 0xc6, 0xc1, 0x4e, 0xbe, + 0x11, 0x1e, 0x5a, 0xbe, 0x64, 0x3c, 0x34, 0xbe, 0x40, 0xb1, 0x4a, 0xbe, 0xe7, 0x3d, 0x06, 0xbe, + 0x18, 0x23, 0x28, 0xbe, 0x7e, 0xd6, 0x23, 0xbe, 0xfc, 0xf6, 0x57, 0xbe, 0x50, 0x5a, 0xeb, 0xbd, + 0xca, 0x4c, 0xd6, 0xbd, 0xaa, 0x27, 0x3b, 0xbe, 0xa7, 0xe3, 0x16, 0xbe, 0x4a, 0xed, 0x28, 0xbe, + 0x2d, 0x13, 0x69, 0xbd, 0xd2, 0x82, 0x18, 0xbe, 0xd6, 0x7a, 0x15, 0xbe, 0xab, 0x0c, 0x72, 0xbe, + 0xc4, 0x5e, 0x47, 0xbe, 0xdc, 0x89, 0xd9, 0xbd, 0xbb, 0x94, 0x36, 0xbe, 0x7a, 0x50, 0xb9, 0xbd, + 0x4c, 0x4a, 0x15, 0xbe, 0xfc, 0x97, 0x3a, 0xbe, 0x06, 0x38, 0x71, 0xbd, 0xd8, 0xb9, 0x1a, 0xbe, + 0xc0, 0x89, 0xea, 0x3a, 0x03, 0xaf, 0xfc, 0xbd, 0x3f, 0x07, 0x13, 0xbe, 0xa0, 0xea, 0xdc, 0xbd, + 0x7a, 0x42, 0xe4, 0xbd, 0x71, 0x60, 0x03, 0xbe, 0x73, 0x8a, 0x63, 0xbd, 0xad, 0xa2, 0x80, 0xbd, + 0x80, 0xba, 0x10, 0x3b, 0xb4, 0x11, 0xc8, 0xbd, 0x48, 0x06, 0xa4, 0xbd, 0x36, 0x08, 0x67, 0xbd, + 0xd3, 0x0f, 0x66, 0xbd, 0xcd, 0xc7, 0x20, 0xbd, 0xac, 0xa4, 0x37, 0xbe, 0xfb, 0xaf, 0x96, 0xbd, + 0xe2, 0x14, 0x3c, 0xbd, 0x63, 0xf3, 0x70, 0xbd, 0xbb, 0x8b, 0x85, 0xbd, 0x79, 0xb0, 0xa8, 0xbd, + 0x68, 0x2f, 0x98, 0xbc, 0xaf, 0x7d, 0x44, 0xbd, 0x26, 0x1e, 0x98, 0xbd, 0x44, 0x0c, 0xd2, 0xbd, + 0x32, 0xc4, 0x3d, 0xbd, 0xc5, 0xa6, 0x37, 0xbd, 0x89, 0xb9, 0x00, 0xbe, 0xe7, 0xfc, 0xc8, 0xbc, + 0x25, 0xd4, 0x51, 0xbd, 0xf2, 0xcc, 0xab, 0xbd, 0x04, 0xc7, 0x9b, 0x3b, 0xd3, 0x10, 0xad, 0xbd, + 0x54, 0x79, 0x80, 0xbd, 0x8f, 0x53, 0x9a, 0xbd, 0x14, 0x65, 0x85, 0xbd, 0x17, 0x89, 0x90, 0xbd, + 0xcb, 0xa8, 0xf9, 0xbd, 0x1b, 0x02, 0xaa, 0xbd, 0xc5, 0x36, 0x8b, 0xbd, 0x34, 0x53, 0xd3, 0xbd, + 0xb3, 0xac, 0xf5, 0xbd, 0xb4, 0xc7, 0xb9, 0xbd, 0x03, 0xf1, 0xc4, 0xbd, 0xd4, 0x49, 0x7e, 0xbd, + 0x18, 0xd0, 0x0c, 0xbe, 0x4e, 0x56, 0x89, 0xbd, 0x58, 0x4a, 0x65, 0xbd, 0xaa, 0x06, 0x81, 0xbd, + 0xf8, 0x93, 0x9b, 0xbc, 0x87, 0x63, 0x80, 0xbd, 0x3e, 0x27, 0x82, 0xbd, 0x7f, 0xbb, 0x89, 0xbd, + 0xef, 0x8b, 0x27, 0xbc, 0x0d, 0xbf, 0xb7, 0xbd, 0xb7, 0x75, 0xad, 0xbd, 0x8e, 0xed, 0xaa, 0xbd, + 0x26, 0xae, 0xc2, 0xbd, 0xd5, 0xb4, 0xa8, 0xbd, 0xdf, 0x4d, 0xdf, 0xbd, 0x70, 0x97, 0x6a, 0xbd, + 0xca, 0x3f, 0x36, 0xbd, 0x9a, 0xe4, 0x87, 0xbd, 0x09, 0xd8, 0x2b, 0xbd, 0xe7, 0x53, 0xa0, 0xbd, + 0x4f, 0x90, 0x99, 0xbb, 0x66, 0x1f, 0xd6, 0xbc, 0x82, 0x8f, 0x4f, 0xbd, 0x45, 0x89, 0xbc, 0xbc, + 0x7d, 0xf1, 0x9d, 0xbd, 0xd8, 0xf7, 0x16, 0xbd, 0x19, 0x27, 0x35, 0xbc, 0x8a, 0xc5, 0x09, 0xbd, + 0xbe, 0x5d, 0x4a, 0xbc, 0x54, 0x28, 0x53, 0xbd, 0xcc, 0x85, 0x18, 0xbd, 0x43, 0xa4, 0xca, 0xbc, + 0x0d, 0x01, 0xab, 0xbd, 0x22, 0x1c, 0xdb, 0xbb, 0xb6, 0xa5, 0x24, 0xbd, 0xfe, 0x62, 0x2d, 0xbd, + 0x71, 0xe1, 0x29, 0x3c, 0x70, 0x5a, 0x3c, 0xba, 0xd2, 0x25, 0xb8, 0xbc, 0xc2, 0x99, 0xd4, 0xbc, + 0x02, 0x6b, 0x12, 0x3c, 0x4e, 0xb7, 0x20, 0xbd, 0xe8, 0x13, 0x48, 0xbd, 0x16, 0x9c, 0x87, 0xbc, + 0xba, 0x01, 0xab, 0xbc, 0x33, 0xb3, 0x53, 0xbd, 0xad, 0x72, 0xa9, 0xbd, 0x5c, 0x0c, 0xc7, 0xbc, + 0xb0, 0xab, 0x36, 0x3b, 0x5d, 0xcc, 0x8c, 0xbc, 0x55, 0xfe, 0x25, 0xbc, 0xf2, 0x44, 0x38, 0xbd, + 0xb5, 0x14, 0x72, 0xbf, 0xe7, 0x22, 0x62, 0xc0, 0x74, 0x48, 0x46, 0xc0, 0xe9, 0xcc, 0x4a, 0xc0, + 0x69, 0x8e, 0x23, 0xc0, 0xab, 0xed, 0x5d, 0xc0, 0x25, 0x68, 0x0e, 0xc0, 0x36, 0xc4, 0x16, 0xc0, + 0x92, 0x2b, 0xde, 0xbf, 0xb5, 0x92, 0x1b, 0xc0, 0x90, 0x4d, 0x22, 0xc0, 0xb1, 0x52, 0xdd, 0xbf, + 0x1b, 0xbe, 0xa8, 0xbf, 0xa5, 0x69, 0xf9, 0xbf, 0x1e, 0x63, 0x83, 0xc0, 0xa9, 0xa2, 0xd0, 0xbf, + 0x1e, 0xa2, 0xe0, 0xbf, 0x17, 0x32, 0x21, 0xc0, 0x02, 0x36, 0x02, 0xc0, 0xc8, 0x29, 0x1a, 0xc0, + 0x13, 0xe5, 0x65, 0xbf, 0x59, 0x69, 0xcb, 0xbf, 0xfd, 0xbe, 0xeb, 0xbf, 0x3e, 0xe6, 0x5d, 0xc0, + 0x20, 0x97, 0x0c, 0xc0, 0x85, 0x10, 0x81, 0xbf, 0x99, 0xd6, 0x1f, 0xc0, 0x74, 0x0c, 0x64, 0xbf, + 0x86, 0x4b, 0x07, 0xc0, 0x66, 0x4a, 0x2c, 0xc0, 0x01, 0x9d, 0xac, 0xbe, 0x7f, 0x9b, 0x05, 0xc0}; +unsigned char conv2d_winograd_fp32_bias[] = { + 0x94, 0xcb, 0xde, 0x3f, 0x6f, 0x1d, 0xf0, 0x3f, 0x61, 0xfb, 0x8f, 0x40, 0x24, 0xce, 0xdb, 0x3f, + 0x55, 0x18, 0xf2, 0x40, 0x38, 0xa5, 0x64, 0x41, 0x87, 0x80, 0x94, 0xc0, 0xee, 0x19, 0x40, 0x40, + 0x28, 0x08, 0x8a, 0x40, 0x99, 0x24, 0x8c, 0xc0, 0x05, 0x80, 0x41, 0x40, 0xd4, 0x8a, 0xb3, 0x41, + 0x24, 0xe3, 0x2e, 0x41, 0x3c, 0xe6, 0xf7, 0x40, 0xa3, 0x0f, 0xdf, 0xc0, 0x6c, 0xd6, 0xdf, 0x40}; +unsigned char conv2d_winograd_fp32_out[] = { + 0xd3, 0xab, 0x56, 0x42, 0xf0, 0xb2, 0xa1, 0x42, 0xc4, 0x6b, 0xac, 0x42, 0x9c, 0x19, 0xbd, 0x42, + 0x3b, 0xac, 0xcf, 0x42, 0xc7, 0x8f, 0xc6, 0x42, 0x62, 0x76, 0xe7, 0x42, 0xed, 0x1f, 0xc5, 0x42, + 0xf6, 0x91, 0xcf, 0x42, 0xfa, 0x2c, 0x9b, 0x42, 0x5e, 0x2a, 0xcd, 0x42, 0xad, 0x6c, 0xb6, 0x42, + 0xf2, 0xd6, 0xd9, 0x42, 0xc9, 0x6c, 0x41, 0x42, 0x77, 0xc0, 0xa9, 0x42, 0x5c, 0xd0, 0xf6, 0x42, + 0x86, 0x25, 0xb6, 0x42, 0x18, 0x6e, 0xcf, 0x42, 0xf2, 0x6b, 0x19, 0x43, 0xe8, 0x8d, 0xf1, 0x42, + 0x95, 0xa8, 0x3e, 0x43, 0x1d, 0xd9, 0x16, 0x43, 0xce, 0x47, 0x3f, 0x43, 0x8c, 0x4f, 0xf0, 0x42, + 0x1e, 0x75, 0x27, 0x43, 0xa5, 0xbf, 0x0f, 0x43, 0x64, 0xbe, 0x21, 0x43, 0x72, 0xd6, 0xb4, 0x42, + 0x26, 0xf0, 0xb9, 0x42, 0x5e, 0x17, 0x02, 0x43, 0x7b, 0x2b, 0xeb, 0x42, 0xdd, 0x00, 0x0c, 0x43, + 0x0d, 0x07, 0x2c, 0x43, 0xef, 0xf1, 0x1f, 0x43, 0xc8, 0xe6, 0x3e, 0x43, 0x27, 0x94, 0x41, 0x43, + 0x1d, 0x29, 0x42, 0x43, 0xd7, 0xa9, 0x1d, 0x43, 0x9b, 0x9b, 0x32, 0x43, 0x5b, 0x4f, 0x26, 0x43, + 0xf1, 0xb6, 0x21, 0x43, 0x4e, 0xc5, 0xc5, 0x42, 0xb5, 0x89, 0xcd, 0x42, 0xca, 0xb4, 0xf2, 0x42, + 0x27, 0xbb, 0xe3, 0x42, 0xcb, 0xa9, 0x02, 0x43, 0xe8, 0xb7, 0x00, 0x43, 0x69, 0xbd, 0x18, 0x43, + 0x97, 0x31, 0x3c, 0x43, 0x8e, 0xb8, 0x41, 0x43, 0x9a, 0x24, 0x42, 0x43, 0x80, 0x71, 0x1a, 0x43, + 0xe9, 0x22, 0x2d, 0x43, 0xcf, 0x2f, 0x1c, 0x43, 0x64, 0x93, 0x1b, 0x43, 0xe6, 0x73, 0xad, 0x42, + 0x22, 0x21, 0xb0, 0x42, 0x3e, 0xfd, 0xf8, 0x42, 0x78, 0xa9, 0xf0, 0x42, 0xfd, 0x66, 0x14, 0x43, + 0x4a, 0xcd, 0x18, 0x43, 0x6f, 0x6b, 0x21, 0x43, 0x46, 0x57, 0x3c, 0x43, 0x61, 0x26, 0x42, 0x43, + 0xf7, 0x97, 0x37, 0x43, 0xe7, 0xf9, 0x1f, 0x43, 0x59, 0x44, 0x27, 0x43, 0xe3, 0xe2, 0x12, 0x43, + 0x1e, 0x8f, 0xee, 0x42, 0x04, 0xca, 0xa9, 0x42, 0xbe, 0x76, 0xd4, 0x42, 0x61, 0x6f, 0x22, 0x43, + 0x95, 0x55, 0x0b, 0x43, 0xdd, 0xef, 0x12, 0x43, 0xf5, 0x95, 0x1d, 0x43, 0x21, 0xab, 0x24, 0x43, + 0xbe, 0x0f, 0x47, 0x43, 0x07, 0xf5, 0x51, 0x43, 0xe2, 0x6c, 0x3c, 0x43, 0x45, 0xa5, 0x1b, 0x43, + 0x14, 0x27, 0x1f, 0x43, 0x9b, 0x6a, 0x10, 0x43, 0x63, 0x9f, 0x0e, 0x43, 0x6a, 0x11, 0x96, 0x42, + 0xd4, 0x1b, 0xe6, 0x42, 0x4f, 0xa2, 0x1c, 0x43, 0x9e, 0x1e, 0x04, 0x43, 0x83, 0x21, 0x12, 0x43, + 0x3a, 0x68, 0x14, 0x43, 0xc8, 0x9a, 0x2d, 0x43, 0x78, 0x8a, 0x41, 0x43, 0xd4, 0xaf, 0x33, 0x43, + 0xfd, 0xfc, 0x1c, 0x43, 0x12, 0x47, 0x04, 0x43, 0x79, 0x1b, 0x04, 0x43, 0x60, 0x5d, 0x0d, 0x43, + 0xf9, 0xd9, 0x26, 0x43, 0x0c, 0xad, 0xb2, 0x42, 0x99, 0x79, 0xcd, 0x42, 0x89, 0x7c, 0x16, 0x43, + 0x12, 0x19, 0x02, 0x43, 0x87, 0x31, 0x09, 0x43, 0xd2, 0x5e, 0x18, 0x43, 0xb1, 0x9d, 0x22, 0x43, + 0xa3, 0x85, 0x29, 0x43, 0x16, 0xef, 0x23, 0x43, 0xbb, 0xe4, 0x02, 0x43, 0x6f, 0x04, 0xe1, 0x42, + 0x7e, 0xe6, 0xeb, 0x42, 0x8e, 0x77, 0x0d, 0x43, 0xd9, 0x88, 0x19, 0x43, 0xc1, 0xb4, 0xcc, 0x42, + 0xa1, 0xe3, 0xc3, 0x42, 0x4f, 0x4c, 0x1b, 0x43, 0x83, 0x64, 0x12, 0x43, 0x39, 0x24, 0x23, 0x43, + 0x86, 0xb3, 0x17, 0x43, 0xcd, 0x1f, 0x28, 0x43, 0x6b, 0xe6, 0x29, 0x43, 0xe9, 0xc4, 0x26, 0x43, + 0xf2, 0x3a, 0x0a, 0x43, 0xd5, 0xe0, 0x01, 0x43, 0xde, 0x28, 0x0d, 0x43, 0x59, 0xeb, 0x01, 0x43, + 0xa3, 0x0c, 0x22, 0x43, 0x6c, 0x75, 0xb1, 0x42, 0x52, 0x6a, 0xba, 0x42, 0x1a, 0xbb, 0x25, 0x43, + 0xed, 0x1c, 0x1c, 0x43, 0x89, 0xa2, 0x2e, 0x43, 0x71, 0xc3, 0x14, 0x43, 0x5b, 0x24, 0x2c, 0x43, + 0x4d, 0x07, 0x29, 0x43, 0xe6, 0x9b, 0x35, 0x43, 0x79, 0x11, 0x24, 0x43, 0xe7, 0xdd, 0x13, 0x43, + 0x77, 0x57, 0x15, 0x43, 0xd5, 0xe5, 0x19, 0x43, 0xc3, 0x05, 0x3e, 0x43, 0xa9, 0xb0, 0xea, 0x42, + 0xcd, 0x58, 0xae, 0x42, 0xae, 0xa7, 0x26, 0x43, 0xf3, 0xf5, 0x29, 0x43, 0x40, 0x73, 0x1c, 0x43, + 0xe3, 0xf0, 0xfe, 0x42, 0x60, 0xb4, 0x25, 0x43, 0xc7, 0xf9, 0x15, 0x43, 0xb8, 0x11, 0x30, 0x43, + 0xa7, 0x2f, 0x2d, 0x43, 0x05, 0x68, 0x1c, 0x43, 0xe9, 0xfc, 0x2a, 0x43, 0x2f, 0x5f, 0x34, 0x43, + 0xcf, 0xcb, 0x45, 0x43, 0xf2, 0x4d, 0xec, 0x42, 0x43, 0x6f, 0xb8, 0x42, 0x66, 0x50, 0x0c, 0x43, + 0xb5, 0x48, 0x0a, 0x43, 0x58, 0x80, 0x0a, 0x43, 0x6f, 0xb9, 0x03, 0x43, 0xee, 0x18, 0x12, 0x43, + 0x69, 0x67, 0x14, 0x43, 0xc9, 0x6e, 0x2a, 0x43, 0x93, 0xa2, 0x1d, 0x43, 0x37, 0xcf, 0x40, 0x43, + 0x2a, 0x44, 0x38, 0x43, 0x3b, 0x79, 0x3e, 0x43, 0x9f, 0xbb, 0x1d, 0x43, 0x2a, 0xd4, 0xb3, 0x42, + 0xe2, 0x4d, 0xa8, 0x42, 0xd6, 0x40, 0xe4, 0x42, 0x33, 0xf8, 0xf5, 0x42, 0xfc, 0xe7, 0xef, 0x42, + 0x71, 0xab, 0x04, 0x43, 0x9f, 0x94, 0x00, 0x43, 0xfb, 0x6e, 0x02, 0x43, 0x10, 0x52, 0x31, 0x43, + 0x2c, 0x32, 0x2e, 0x43, 0xad, 0xb6, 0x49, 0x43, 0x77, 0xc1, 0x26, 0x43, 0xc3, 0xa6, 0x27, 0x43, + 0xe9, 0x8b, 0x08, 0x43, 0x60, 0xcc, 0xa6, 0x42, 0x3d, 0x16, 0x50, 0x42, 0x82, 0x11, 0x9b, 0x42, + 0xaf, 0xef, 0x9c, 0x42, 0x2a, 0x4e, 0xb4, 0x42, 0xd9, 0xce, 0xad, 0x42, 0x78, 0x21, 0xa5, 0x42, + 0x8c, 0x99, 0xc2, 0x42, 0xe0, 0xf9, 0xf1, 0x42, 0x46, 0x8c, 0xeb, 0x42, 0xdd, 0x72, 0x0f, 0x43, + 0x90, 0x5d, 0xba, 0x42, 0x19, 0x3a, 0xb8, 0x42, 0x1e, 0x50, 0x81, 0x42, 0xfd, 0xef, 0x6c, 0x42, + 0xeb, 0xa1, 0x40, 0x42, 0x1b, 0x04, 0x97, 0x42, 0x48, 0x55, 0x78, 0x42, 0x48, 0x02, 0xa2, 0x42, + 0x50, 0xe0, 0xc7, 0x42, 0xd2, 0xd3, 0xb7, 0x42, 0x7c, 0x93, 0xc5, 0x42, 0xd1, 0x6c, 0xcf, 0x42, + 0x2a, 0x2e, 0xba, 0x42, 0x32, 0x9f, 0x9c, 0x42, 0xe9, 0xe6, 0xb8, 0x42, 0xf3, 0x43, 0xaa, 0x42, + 0x82, 0xb9, 0xb4, 0x42, 0x09, 0x54, 0x42, 0x42, 0x0a, 0x0e, 0xb8, 0x42, 0xbb, 0x96, 0xd5, 0x42, + 0xdc, 0xda, 0xca, 0x42, 0x71, 0x6f, 0xdf, 0x42, 0x0c, 0x81, 0xfd, 0x42, 0xd3, 0x7f, 0xf6, 0x42, + 0xa8, 0x50, 0x20, 0x43, 0xff, 0x1f, 0x26, 0x43, 0xd1, 0x51, 0x1c, 0x43, 0xef, 0xae, 0xef, 0x42, + 0x85, 0x76, 0x07, 0x43, 0x91, 0x3e, 0x16, 0x43, 0x25, 0x58, 0x0c, 0x43, 0x57, 0x0a, 0x9b, 0x42, + 0x50, 0xe7, 0xc5, 0x42, 0x6a, 0x76, 0xea, 0x42, 0x5a, 0x31, 0xcd, 0x42, 0x1e, 0xdb, 0xed, 0x42, + 0xe5, 0x92, 0x07, 0x43, 0x45, 0x45, 0x19, 0x43, 0x07, 0x27, 0x24, 0x43, 0xfd, 0xb5, 0x26, 0x43, + 0x15, 0x32, 0x21, 0x43, 0xdb, 0x0b, 0x11, 0x43, 0x74, 0x6e, 0x1a, 0x43, 0xc3, 0x08, 0x1b, 0x43, + 0xab, 0x72, 0x1c, 0x43, 0x11, 0x1b, 0xbe, 0x42, 0x08, 0x69, 0xd9, 0x42, 0xf6, 0x0e, 0xf6, 0x42, + 0x8a, 0x0c, 0xc2, 0x42, 0x89, 0x99, 0x01, 0x43, 0xd2, 0xb7, 0xf0, 0x42, 0x5c, 0xba, 0x07, 0x43, + 0xfb, 0xac, 0x28, 0x43, 0x3d, 0xfc, 0x31, 0x43, 0xc2, 0x51, 0x2e, 0x43, 0xb7, 0x06, 0x23, 0x43, + 0x01, 0xdd, 0x14, 0x43, 0x22, 0x6a, 0x18, 0x43, 0xa1, 0x21, 0x07, 0x43, 0x06, 0x45, 0x9f, 0x42, + 0xf1, 0x8d, 0xbc, 0x42, 0x4a, 0x57, 0xe2, 0x42, 0x8d, 0x38, 0xea, 0x42, 0xbb, 0x86, 0x11, 0x43, + 0x16, 0xdf, 0x0a, 0x43, 0xaf, 0x1c, 0x1c, 0x43, 0x79, 0x0b, 0x2d, 0x43, 0x92, 0x90, 0x37, 0x43, + 0x0f, 0x4a, 0x27, 0x43, 0x90, 0x82, 0x15, 0x43, 0x90, 0x8c, 0x07, 0x43, 0xb4, 0x2e, 0x0c, 0x43, + 0xbe, 0xde, 0xfb, 0x42, 0xf8, 0x42, 0x98, 0x42, 0x3a, 0x9e, 0xd5, 0x42, 0x63, 0x07, 0x06, 0x43, + 0x67, 0x8e, 0x02, 0x43, 0x7a, 0x3c, 0xff, 0x42, 0x77, 0x1b, 0xf4, 0x42, 0xdd, 0x00, 0x20, 0x43, + 0x3c, 0x94, 0x4b, 0x43, 0xd7, 0x51, 0x3f, 0x43, 0x27, 0xe9, 0x38, 0x43, 0x71, 0xfb, 0x06, 0x43, + 0xd3, 0x7e, 0xfe, 0x42, 0x26, 0xcb, 0xf5, 0x42, 0x21, 0x06, 0x0a, 0x43, 0x92, 0xe1, 0x9f, 0x42, + 0xe4, 0x92, 0xda, 0x42, 0x3b, 0x6b, 0x11, 0x43, 0x56, 0x8f, 0xff, 0x42, 0xff, 0x32, 0xf9, 0x42, + 0x08, 0x31, 0x10, 0x43, 0xdf, 0xe4, 0x1a, 0x43, 0x16, 0x29, 0x31, 0x43, 0x91, 0x73, 0x0e, 0x43, + 0x7f, 0x5d, 0x11, 0x43, 0x88, 0xf6, 0xee, 0x42, 0x2a, 0x71, 0x02, 0x43, 0x74, 0x04, 0xfe, 0x42, + 0x15, 0xe0, 0x0c, 0x43, 0x04, 0xb5, 0xc5, 0x42, 0x98, 0x8b, 0xd3, 0x42, 0xfd, 0xa6, 0x04, 0x43, + 0xbe, 0xdf, 0xdf, 0x42, 0xc1, 0xaf, 0x0b, 0x43, 0x98, 0xf1, 0x0a, 0x43, 0xbb, 0x4e, 0x13, 0x43, + 0x3f, 0x60, 0x2f, 0x43, 0x43, 0x2c, 0x19, 0x43, 0xb5, 0xa3, 0x05, 0x43, 0xaf, 0xc0, 0xe4, 0x42, + 0x78, 0x4b, 0xdc, 0x42, 0x02, 0x9b, 0xfb, 0x42, 0xf0, 0xe5, 0x0c, 0x43, 0x04, 0x1b, 0xc4, 0x42, + 0x8f, 0x2d, 0xd0, 0x42, 0xe2, 0x72, 0x0f, 0x43, 0xd7, 0x3c, 0x03, 0x43, 0x16, 0x85, 0x07, 0x43, + 0x24, 0x00, 0x19, 0x43, 0xa6, 0x01, 0x15, 0x43, 0xa7, 0x10, 0x1b, 0x43, 0x6b, 0x13, 0x0e, 0x43, + 0xcf, 0x1d, 0x03, 0x43, 0x85, 0x41, 0xe5, 0x42, 0x94, 0x53, 0xf0, 0x42, 0x3f, 0x5e, 0x05, 0x43, + 0xb7, 0xff, 0x0f, 0x43, 0xb2, 0x43, 0xbd, 0x42, 0xaa, 0x50, 0xd3, 0x42, 0x54, 0x9b, 0x14, 0x43, + 0x58, 0xc1, 0x1c, 0x43, 0x9d, 0xe0, 0x19, 0x43, 0xa4, 0x79, 0x12, 0x43, 0x3f, 0x71, 0x17, 0x43, + 0xf5, 0x90, 0x0b, 0x43, 0xb5, 0x3c, 0x24, 0x43, 0xa5, 0xbe, 0x18, 0x43, 0x34, 0xb1, 0xfa, 0x42, + 0x95, 0xd5, 0x06, 0x43, 0xc1, 0x17, 0x1a, 0x43, 0xbf, 0xf2, 0x20, 0x43, 0x09, 0xb8, 0xd1, 0x42, + 0x7c, 0xb9, 0xd1, 0x42, 0x15, 0x7c, 0x0d, 0x43, 0x38, 0x95, 0x1c, 0x43, 0x0e, 0xa1, 0x11, 0x43, + 0x31, 0x34, 0x09, 0x43, 0xd5, 0x82, 0x0b, 0x43, 0xca, 0xf4, 0x0e, 0x43, 0x5c, 0xa3, 0x1a, 0x43, + 0xbc, 0x2d, 0x11, 0x43, 0x49, 0x76, 0x10, 0x43, 0x70, 0xdf, 0x1f, 0x43, 0xce, 0x47, 0x1b, 0x43, + 0xf7, 0x49, 0x29, 0x43, 0xbc, 0x7f, 0xd8, 0x42, 0x8e, 0xc5, 0xbc, 0x42, 0xe8, 0x4e, 0xf7, 0x42, + 0x92, 0xa7, 0xf0, 0x42, 0x24, 0xc6, 0x05, 0x43, 0x85, 0x5c, 0xfa, 0x42, 0x75, 0x7d, 0xf8, 0x42, + 0x95, 0x28, 0x0d, 0x43, 0x74, 0x25, 0x1f, 0x43, 0x3d, 0x31, 0x1a, 0x43, 0xbe, 0xe4, 0x24, 0x43, + 0xa6, 0x3a, 0x2b, 0x43, 0x3d, 0x67, 0x2a, 0x43, 0xbf, 0x5c, 0x10, 0x43, 0x56, 0x2b, 0xad, 0x42, + 0xdf, 0x90, 0xb1, 0x42, 0x35, 0x38, 0xdf, 0x42, 0x94, 0xa3, 0xd9, 0x42, 0x43, 0xf1, 0xee, 0x42, + 0x32, 0xbe, 0xe6, 0x42, 0xb5, 0xe3, 0xe2, 0x42, 0x8a, 0x26, 0xf9, 0x42, 0xae, 0xf9, 0x10, 0x43, + 0x04, 0x96, 0x1c, 0x43, 0xb4, 0xf5, 0x34, 0x43, 0x4d, 0x9f, 0x1c, 0x43, 0xe8, 0xcb, 0x0b, 0x43, + 0x7a, 0xe9, 0x05, 0x43, 0x73, 0xf3, 0xa3, 0x42, 0x55, 0x3f, 0x61, 0x42, 0x89, 0xee, 0x83, 0x42, + 0x91, 0x9f, 0x82, 0x42, 0xf6, 0xbf, 0x92, 0x42, 0x3f, 0x8f, 0xa0, 0x42, 0x9c, 0x06, 0xab, 0x42, + 0x02, 0x90, 0xae, 0x42, 0xec, 0x3c, 0xc3, 0x42, 0xb6, 0xaa, 0xd7, 0x42, 0xe7, 0xfc, 0xf4, 0x42, + 0x1f, 0xb0, 0xcd, 0x42, 0x3e, 0xfa, 0xb4, 0x42, 0x2f, 0x68, 0x62, 0x42, 0x45, 0x9f, 0x33, 0x42, + 0xdd, 0xd2, 0x4a, 0x42, 0x06, 0xbd, 0x77, 0x42, 0x8a, 0xdd, 0x72, 0x42, 0x75, 0x3a, 0x93, 0x42, + 0x4c, 0x5e, 0xb1, 0x42, 0x46, 0x09, 0xa2, 0x42, 0x22, 0x31, 0xcc, 0x42, 0x6e, 0xae, 0x9b, 0x42, + 0xde, 0x88, 0xc0, 0x42, 0x66, 0xf0, 0x8b, 0x42, 0xeb, 0xc9, 0xb4, 0x42, 0xf5, 0x8d, 0xb5, 0x42, + 0x8c, 0x1f, 0x9f, 0x42, 0x2e, 0x8b, 0xe3, 0x41, 0xc9, 0x9b, 0xa3, 0x42, 0xee, 0x59, 0xc5, 0x42, + 0x87, 0x9e, 0xc9, 0x42, 0x38, 0x93, 0xdc, 0x42, 0x60, 0x2b, 0xf5, 0x42, 0x88, 0x9e, 0xfa, 0x42, + 0x21, 0xb0, 0x15, 0x43, 0x5e, 0xb2, 0x11, 0x43, 0x9a, 0x24, 0x15, 0x43, 0x1f, 0x5d, 0x01, 0x43, + 0x5b, 0x45, 0x17, 0x43, 0x51, 0x3f, 0x09, 0x43, 0xff, 0xd5, 0x0d, 0x43, 0x93, 0x95, 0x9e, 0x42, + 0x0a, 0x99, 0xaf, 0x42, 0xaf, 0x0a, 0xc8, 0x42, 0x2a, 0x68, 0xd2, 0x42, 0x84, 0x88, 0x0b, 0x43, + 0x6a, 0xde, 0xf8, 0x42, 0x5b, 0xeb, 0x01, 0x43, 0x10, 0xbb, 0x27, 0x43, 0x82, 0x2b, 0x22, 0x43, + 0x62, 0x67, 0x0f, 0x43, 0x13, 0xc4, 0xeb, 0x42, 0x78, 0xd3, 0x08, 0x43, 0x20, 0x2a, 0x11, 0x43, + 0xcc, 0x61, 0x02, 0x43, 0x43, 0x30, 0xa2, 0x42, 0xf2, 0xd5, 0xa7, 0x42, 0xd7, 0x1d, 0xe5, 0x42, + 0x59, 0xc6, 0xe8, 0x42, 0x68, 0x99, 0xe8, 0x42, 0x18, 0x1a, 0xfe, 0x42, 0xdd, 0x52, 0x0a, 0x43, + 0x91, 0xcd, 0x2b, 0x43, 0xa0, 0xa7, 0x21, 0x43, 0xd1, 0x2a, 0x28, 0x43, 0x7f, 0xb7, 0x01, 0x43, + 0x21, 0x1c, 0x13, 0x43, 0x2f, 0x43, 0x0a, 0x43, 0xb7, 0xda, 0x01, 0x43, 0x36, 0x7b, 0xa2, 0x42, + 0xf1, 0xe7, 0xa6, 0x42, 0x20, 0xec, 0xff, 0x42, 0xc2, 0x7c, 0xff, 0x42, 0x29, 0x9a, 0xf8, 0x42, + 0x17, 0xa9, 0x09, 0x43, 0xb0, 0xdc, 0x14, 0x43, 0x95, 0xfc, 0x34, 0x43, 0x0b, 0x40, 0x25, 0x43, + 0xc5, 0x6d, 0x23, 0x43, 0xb8, 0x09, 0x14, 0x43, 0x10, 0xea, 0xfe, 0x42, 0xf9, 0x97, 0x03, 0x43, + 0x2c, 0xc5, 0xe0, 0x42, 0x32, 0x5a, 0x8c, 0x42, 0x3a, 0xd3, 0xc3, 0x42, 0x92, 0xdf, 0x01, 0x43, + 0x8d, 0x11, 0xe9, 0x42, 0x36, 0x42, 0x19, 0x43, 0xb5, 0x01, 0xee, 0x42, 0xbd, 0x8f, 0x09, 0x43, + 0x60, 0x29, 0x3b, 0x43, 0x17, 0x93, 0x46, 0x43, 0xf2, 0x9b, 0x2f, 0x43, 0xfe, 0x9e, 0x09, 0x43, + 0xab, 0x43, 0xf8, 0x42, 0xaf, 0x19, 0xe1, 0x42, 0x16, 0x06, 0xe6, 0x42, 0x48, 0x21, 0x8c, 0x42, + 0x93, 0x0f, 0xd7, 0x42, 0x96, 0xaa, 0xfb, 0x42, 0x14, 0xed, 0xeb, 0x42, 0xde, 0x34, 0xef, 0x42, + 0xbc, 0xe5, 0x08, 0x43, 0x82, 0x47, 0x0d, 0x43, 0x6b, 0x34, 0x24, 0x43, 0x84, 0x0f, 0x28, 0x43, + 0xf3, 0xa2, 0x1a, 0x43, 0x0a, 0x20, 0xce, 0x42, 0x6c, 0x11, 0xdd, 0x42, 0xa0, 0xd5, 0xf5, 0x42, + 0xd9, 0xe1, 0x05, 0x43, 0x9c, 0x1c, 0xa8, 0x42, 0xfc, 0xd6, 0xc6, 0x42, 0x25, 0xaa, 0x13, 0x43, + 0xb7, 0x4d, 0xe6, 0x42, 0x30, 0x76, 0xe7, 0x42, 0xbf, 0x08, 0x11, 0x43, 0x87, 0x69, 0x15, 0x43, + 0x44, 0xd2, 0x14, 0x43, 0xf5, 0x04, 0x07, 0x43, 0x90, 0xf3, 0x02, 0x43, 0x04, 0xf7, 0xc0, 0x42, + 0x42, 0x9a, 0xd5, 0x42, 0x6a, 0x3e, 0x08, 0x43, 0x14, 0xde, 0x0f, 0x43, 0x2c, 0xd8, 0xc4, 0x42, + 0x29, 0xee, 0xb0, 0x42, 0x54, 0x07, 0x1d, 0x43, 0x47, 0x34, 0x03, 0x43, 0xe4, 0xc0, 0x04, 0x43, + 0xb0, 0x5c, 0x0f, 0x43, 0xb2, 0x46, 0x0a, 0x43, 0xe4, 0x39, 0x19, 0x43, 0x09, 0x52, 0x05, 0x43, + 0xde, 0x55, 0xdf, 0x42, 0x52, 0x08, 0xf6, 0x42, 0x1a, 0x45, 0xfb, 0x42, 0xbe, 0xc2, 0xe6, 0x42, + 0x0b, 0x48, 0x07, 0x43, 0x79, 0x3f, 0xb9, 0x42, 0x54, 0xfe, 0xd1, 0x42, 0x31, 0xfc, 0x0d, 0x43, + 0x6a, 0x5d, 0x09, 0x43, 0x72, 0x8a, 0x16, 0x43, 0x0c, 0x88, 0x19, 0x43, 0xf1, 0xe6, 0x0f, 0x43, + 0x8a, 0x30, 0x08, 0x43, 0x7f, 0x11, 0x0e, 0x43, 0x47, 0x85, 0xfb, 0x42, 0x9e, 0xf1, 0x10, 0x43, + 0x2a, 0x3b, 0xf1, 0x42, 0x86, 0x5a, 0x0a, 0x43, 0x4b, 0xa1, 0x2c, 0x43, 0x6c, 0x79, 0xcc, 0x42, + 0xe0, 0x36, 0xcb, 0x42, 0xa5, 0xff, 0x20, 0x43, 0xa6, 0xd7, 0x0e, 0x43, 0x63, 0xf4, 0x06, 0x43, + 0x4e, 0xed, 0xed, 0x42, 0xd5, 0xb1, 0x0b, 0x43, 0x70, 0xb7, 0x19, 0x43, 0x85, 0xe2, 0x15, 0x43, + 0x70, 0x6c, 0x0c, 0x43, 0xb7, 0xe7, 0xef, 0x42, 0xb8, 0xe7, 0x1c, 0x43, 0xe7, 0x8d, 0x20, 0x43, + 0x19, 0x1b, 0x36, 0x43, 0x3c, 0x8e, 0xa7, 0x42, 0x58, 0x2f, 0xb4, 0x42, 0x99, 0x9d, 0xfe, 0x42, + 0x92, 0x54, 0xcd, 0x42, 0x78, 0xae, 0x07, 0x43, 0x7c, 0xb1, 0xe2, 0x42, 0x50, 0xfd, 0xf4, 0x42, + 0xdc, 0x2d, 0xea, 0x42, 0x09, 0xe8, 0x19, 0x43, 0xc8, 0xba, 0x08, 0x43, 0x9f, 0x3f, 0x24, 0x43, + 0xc5, 0x00, 0x22, 0x43, 0xcd, 0xc2, 0x1d, 0x43, 0xc6, 0xcc, 0xf9, 0x42, 0xd6, 0xf1, 0xb3, 0x42, + 0xd4, 0xe3, 0xa2, 0x42, 0x14, 0x3e, 0xd2, 0x42, 0x4c, 0x3b, 0xc7, 0x42, 0x8d, 0x73, 0xe3, 0x42, + 0x31, 0x64, 0xd4, 0x42, 0x41, 0x46, 0xfa, 0x42, 0xe9, 0x09, 0xf1, 0x42, 0xb8, 0x4a, 0x0a, 0x43, + 0x85, 0x85, 0x25, 0x43, 0x72, 0xc8, 0x25, 0x43, 0x30, 0xad, 0x19, 0x43, 0xa5, 0x26, 0x0b, 0x43, + 0x69, 0x7e, 0x07, 0x43, 0x6a, 0x5b, 0x87, 0x42, 0xfa, 0x4d, 0x42, 0x42, 0x69, 0x27, 0x8e, 0x42, + 0xa2, 0x41, 0x8e, 0x42, 0x93, 0xe2, 0x99, 0x42, 0x76, 0x0d, 0x9c, 0x42, 0xaa, 0x22, 0x71, 0x42, + 0x70, 0x35, 0xac, 0x42, 0x32, 0x72, 0xdb, 0x42, 0x51, 0x46, 0xc5, 0x42, 0x1c, 0xa6, 0xe3, 0x42, + 0x62, 0x7e, 0xb4, 0x42, 0x20, 0x49, 0x97, 0x42, 0x26, 0xc8, 0x85, 0x42, 0x70, 0xf0, 0x51, 0x42, + 0xf9, 0x0c, 0x28, 0x42, 0x71, 0xb7, 0x84, 0x42, 0x9b, 0xed, 0x7f, 0x42, 0x82, 0x61, 0x83, 0x42, + 0x2d, 0x0b, 0x9c, 0x42, 0xd2, 0xb0, 0x95, 0x42, 0xee, 0x4a, 0xb5, 0x42, 0x82, 0x8f, 0xa8, 0x42, + 0x8d, 0x76, 0xd1, 0x42, 0x33, 0x2f, 0x7b, 0x42, 0x1f, 0x4d, 0x92, 0x42, 0x29, 0x30, 0xbc, 0x42, + 0x1c, 0xa4, 0x8d, 0x42, 0x91, 0x0c, 0x2c, 0x42, 0x87, 0x35, 0xc9, 0x42, 0x0a, 0x01, 0xdf, 0x42, + 0x0e, 0x98, 0xa0, 0x42, 0x53, 0xdb, 0xcb, 0x42, 0x91, 0x12, 0x0a, 0x43, 0xc0, 0x39, 0x06, 0x43, + 0x8b, 0xe9, 0x07, 0x43, 0x3d, 0x64, 0x00, 0x43, 0x06, 0xba, 0x11, 0x43, 0x40, 0xd4, 0x0e, 0x43, + 0xa1, 0xc9, 0x00, 0x43, 0xb2, 0xf3, 0x03, 0x43, 0x54, 0xaa, 0x0e, 0x43, 0x3b, 0x6f, 0xd1, 0x42, + 0xa1, 0x9a, 0x9f, 0x42, 0x00, 0xd3, 0xff, 0x42, 0x92, 0x6e, 0xd1, 0x42, 0x85, 0x6b, 0xfa, 0x42, + 0xe9, 0xaa, 0xfb, 0x42, 0x74, 0xd0, 0x09, 0x43, 0xc6, 0x3b, 0x1f, 0x43, 0xa2, 0xd1, 0x20, 0x43, + 0x92, 0xd2, 0x1b, 0x43, 0x29, 0x0a, 0x04, 0x43, 0xbb, 0x7f, 0x0e, 0x43, 0xdb, 0x50, 0x16, 0x43, + 0xb3, 0x0d, 0x15, 0x43, 0x79, 0xcc, 0xb2, 0x42, 0xb4, 0xdb, 0xbd, 0x42, 0xe2, 0xad, 0xfb, 0x42, + 0xab, 0xed, 0xdd, 0x42, 0x91, 0x1c, 0x00, 0x43, 0x6f, 0x47, 0x06, 0x43, 0xe5, 0x5f, 0xf2, 0x42, + 0x5e, 0xb6, 0x2d, 0x43, 0xd0, 0xd3, 0x2e, 0x43, 0x03, 0x5a, 0x39, 0x43, 0xe3, 0x42, 0xe7, 0x42, + 0xcc, 0xa5, 0x1e, 0x43, 0x1e, 0xd5, 0x15, 0x43, 0xbe, 0x72, 0x16, 0x43, 0x84, 0x09, 0xa7, 0x42, + 0x36, 0xcf, 0xb2, 0x42, 0x98, 0x87, 0xe7, 0x42, 0x63, 0xd3, 0xd8, 0x42, 0xca, 0x1a, 0xf8, 0x42, + 0xba, 0xf3, 0x04, 0x43, 0x4b, 0x0c, 0x08, 0x43, 0xb2, 0x6d, 0x3d, 0x43, 0xa3, 0x8c, 0x34, 0x43, + 0x7c, 0x80, 0x26, 0x43, 0x05, 0x15, 0xf7, 0x42, 0x63, 0xa1, 0x13, 0x43, 0xfe, 0x4d, 0x1a, 0x43, + 0xa8, 0x79, 0x02, 0x43, 0x2c, 0x88, 0x94, 0x42, 0x25, 0x7a, 0xc0, 0x42, 0xe8, 0x0d, 0x03, 0x43, + 0x6b, 0x0c, 0xcb, 0x42, 0x7f, 0x29, 0xfa, 0x42, 0xf6, 0x99, 0xf9, 0x42, 0x4c, 0xec, 0x08, 0x43, + 0x33, 0x44, 0x2f, 0x43, 0xe6, 0x9f, 0x2d, 0x43, 0xb8, 0xa9, 0x2b, 0x43, 0x16, 0x06, 0x05, 0x43, + 0x8f, 0x45, 0x0e, 0x43, 0x94, 0x41, 0x07, 0x43, 0x63, 0x85, 0xf9, 0x42, 0xe3, 0x46, 0xaf, 0x42, + 0x15, 0x1b, 0xcf, 0x42, 0x0e, 0x81, 0x0b, 0x43, 0xb1, 0x0c, 0xf2, 0x42, 0xbf, 0x90, 0xf7, 0x42, + 0x74, 0x1b, 0xf7, 0x42, 0x45, 0xf6, 0x21, 0x43, 0xd4, 0x1f, 0x36, 0x43, 0x75, 0xbb, 0x2d, 0x43, + 0xd8, 0x8d, 0x18, 0x43, 0xd9, 0x94, 0xe6, 0x42, 0xb4, 0x9c, 0xfd, 0x42, 0x73, 0x68, 0xef, 0x42, + 0x2a, 0xa1, 0x07, 0x43, 0x61, 0xff, 0xb3, 0x42, 0xb1, 0x27, 0xc7, 0x42, 0xf3, 0x17, 0x04, 0x43, + 0x23, 0xf9, 0xd1, 0x42, 0xfc, 0x13, 0xde, 0x42, 0xed, 0x10, 0x1a, 0x43, 0x24, 0x1a, 0x0d, 0x43, + 0x5b, 0xe3, 0x1c, 0x43, 0x62, 0x8c, 0x1f, 0x43, 0x20, 0xc3, 0xfd, 0x42, 0x21, 0x8b, 0xc9, 0x42, + 0x6e, 0xd4, 0xfe, 0x42, 0x64, 0xba, 0x02, 0x43, 0x64, 0xd9, 0x04, 0x43, 0x51, 0x5e, 0xb9, 0x42, + 0x0d, 0xa3, 0xd7, 0x42, 0xf9, 0x50, 0x08, 0x43, 0x09, 0x9c, 0x0c, 0x43, 0xcf, 0x1e, 0x02, 0x43, + 0x87, 0xfa, 0x05, 0x43, 0x45, 0xb9, 0xf1, 0x42, 0x34, 0x9b, 0x0c, 0x43, 0xa2, 0x3b, 0x13, 0x43, + 0x30, 0x44, 0xec, 0x42, 0xd0, 0xd2, 0xc9, 0x42, 0xd0, 0xb9, 0xd6, 0x42, 0x58, 0x42, 0x08, 0x43, + 0x86, 0xc7, 0x08, 0x43, 0x59, 0x14, 0xb4, 0x42, 0x36, 0x6c, 0xd1, 0x42, 0xd6, 0xed, 0x0a, 0x43, + 0x73, 0xb5, 0x1c, 0x43, 0x04, 0x9e, 0x2b, 0x43, 0x0a, 0xd6, 0x00, 0x43, 0x94, 0xd0, 0x11, 0x43, + 0x62, 0xd9, 0x03, 0x43, 0xa8, 0x01, 0x12, 0x43, 0x5c, 0x9c, 0x0f, 0x43, 0x29, 0xac, 0x13, 0x43, + 0x9e, 0x06, 0xed, 0x42, 0x9e, 0xe6, 0xf3, 0x42, 0x8c, 0x5d, 0x22, 0x43, 0x56, 0x3a, 0xdd, 0x42, + 0x63, 0x97, 0xa0, 0x42, 0x63, 0xa8, 0x16, 0x43, 0x62, 0xac, 0x19, 0x43, 0x58, 0x5b, 0x25, 0x43, + 0xf4, 0x25, 0xff, 0x42, 0x32, 0x04, 0x17, 0x43, 0x5a, 0x67, 0x1a, 0x43, 0x02, 0x75, 0x17, 0x43, + 0xd5, 0x6a, 0x14, 0x43, 0x60, 0x44, 0x06, 0x43, 0x81, 0xf5, 0x25, 0x43, 0x96, 0x17, 0x25, 0x43, + 0x70, 0x61, 0x2c, 0x43, 0xdf, 0xcb, 0xd1, 0x42, 0xf9, 0x9c, 0xb0, 0x42, 0xf4, 0x2e, 0x0a, 0x43, + 0xaf, 0x0e, 0xd0, 0x42, 0x3a, 0x38, 0x01, 0x43, 0x10, 0xb6, 0xea, 0x42, 0x3e, 0x69, 0x05, 0x43, + 0x37, 0x9f, 0xf8, 0x42, 0x2b, 0x84, 0x16, 0x43, 0x5a, 0x22, 0x06, 0x43, 0x2f, 0xae, 0x1c, 0x43, + 0x32, 0x7e, 0x1f, 0x43, 0x6e, 0x54, 0x29, 0x43, 0x99, 0xf0, 0x18, 0x43, 0xb0, 0xd4, 0xe7, 0x42, + 0x74, 0x96, 0xa1, 0x42, 0x92, 0x06, 0xe8, 0x42, 0x3d, 0xc4, 0xd5, 0x42, 0x81, 0x8c, 0xda, 0x42, + 0x0a, 0x31, 0xcf, 0x42, 0xfd, 0x1b, 0xee, 0x42, 0x96, 0xdd, 0xec, 0x42, 0x70, 0xcc, 0x11, 0x43, + 0x5f, 0x09, 0x17, 0x43, 0xea, 0xdf, 0x2b, 0x43, 0xeb, 0x0e, 0x1e, 0x43, 0xea, 0xab, 0x1f, 0x43, + 0x59, 0xf1, 0xf9, 0x42, 0xf3, 0x5f, 0xbe, 0x42, 0x3f, 0xb9, 0x4f, 0x42, 0x7e, 0x74, 0xae, 0x42, + 0x8f, 0x9e, 0xa0, 0x42, 0xa4, 0x7e, 0xac, 0x42, 0xe5, 0x59, 0xa4, 0x42, 0x99, 0xe1, 0x8d, 0x42, + 0x1c, 0x35, 0xbb, 0x42, 0x1c, 0x02, 0xe1, 0x42, 0xe1, 0xcc, 0xe9, 0x42, 0xd1, 0xcb, 0x00, 0x43, + 0xe4, 0xe0, 0xcb, 0x42, 0xcd, 0xc2, 0xc5, 0x42, 0x73, 0x0d, 0x88, 0x42, 0x46, 0xdc, 0x24, 0x42, + 0xcb, 0xe2, 0x50, 0x42, 0x89, 0x2e, 0xa3, 0x42, 0xb7, 0x8a, 0x94, 0x42, 0x4d, 0x4e, 0xa8, 0x42, + 0x6d, 0x30, 0xbd, 0x42, 0xe3, 0x45, 0xca, 0x42, 0xef, 0xf9, 0xdf, 0x42, 0xd2, 0x71, 0xd3, 0x42, + 0x47, 0x08, 0xd2, 0x42, 0xef, 0xdc, 0xb4, 0x42, 0xe1, 0x3b, 0xd6, 0x42, 0xcb, 0x03, 0xc4, 0x42, + 0x6b, 0x20, 0xc6, 0x42, 0xa1, 0xd5, 0x60, 0x42, 0xd5, 0x5f, 0x9d, 0x42, 0xf2, 0x11, 0x05, 0x43, + 0xb5, 0xc1, 0xeb, 0x42, 0xa2, 0x87, 0x02, 0x43, 0x49, 0x2e, 0x0f, 0x43, 0x7e, 0x2a, 0x12, 0x43, + 0xa1, 0x35, 0x25, 0x43, 0xf2, 0x36, 0x1a, 0x43, 0xfc, 0xb0, 0x36, 0x43, 0x0c, 0x54, 0xfa, 0x42, + 0xd2, 0x74, 0x1f, 0x43, 0x55, 0xdb, 0x18, 0x43, 0xa9, 0x01, 0x28, 0x43, 0x3e, 0xa5, 0xc6, 0x42, + 0xdf, 0x25, 0xd5, 0x42, 0x09, 0x24, 0x05, 0x43, 0x1a, 0xd2, 0xbe, 0x42, 0xd8, 0xe1, 0x01, 0x43, + 0xfa, 0x7d, 0x19, 0x43, 0x4d, 0x0d, 0x1c, 0x43, 0xf8, 0x44, 0x38, 0x43, 0xe1, 0xa1, 0x30, 0x43, + 0x85, 0x73, 0x32, 0x43, 0x2a, 0x53, 0x1d, 0x43, 0xb3, 0x09, 0x32, 0x43, 0xa2, 0x2f, 0x1a, 0x43, + 0xd3, 0x67, 0x28, 0x43, 0xc9, 0xcf, 0xd2, 0x42, 0x42, 0xe2, 0xca, 0x42, 0x2b, 0xcf, 0x08, 0x43, + 0x6d, 0x71, 0xea, 0x42, 0xb2, 0xd6, 0x19, 0x43, 0x33, 0x65, 0x13, 0x43, 0x9f, 0xab, 0x11, 0x43, + 0xc5, 0x0b, 0x32, 0x43, 0xbd, 0x93, 0x3f, 0x43, 0x5f, 0x2e, 0x32, 0x43, 0xd8, 0x30, 0x26, 0x43, + 0xf2, 0xd3, 0x2e, 0x43, 0xfe, 0x6d, 0x1f, 0x43, 0x99, 0xb9, 0x21, 0x43, 0xde, 0x4f, 0xdb, 0x42, + 0xfb, 0x46, 0xd9, 0x42, 0xed, 0xc1, 0x0a, 0x43, 0xe6, 0xbd, 0xfb, 0x42, 0xa2, 0xf0, 0x10, 0x43, + 0x97, 0xa9, 0x0c, 0x43, 0x9e, 0x3d, 0x1c, 0x43, 0x3b, 0xb2, 0x3c, 0x43, 0xf3, 0x04, 0x4e, 0x43, + 0xd7, 0x24, 0x40, 0x43, 0x79, 0x1c, 0x24, 0x43, 0x24, 0x3b, 0x27, 0x43, 0x68, 0xaf, 0x07, 0x43, + 0x03, 0x44, 0x11, 0x43, 0x4b, 0x14, 0xc6, 0x42, 0x39, 0xcd, 0xd2, 0x42, 0x05, 0x7c, 0x15, 0x43, + 0x98, 0xe0, 0x00, 0x43, 0x55, 0xa8, 0x1c, 0x43, 0x15, 0xe6, 0x09, 0x43, 0xcf, 0x2e, 0x16, 0x43, + 0x16, 0xb4, 0x48, 0x43, 0x0e, 0x33, 0x4f, 0x43, 0xb7, 0x9b, 0x47, 0x43, 0xf3, 0x4d, 0x24, 0x43, + 0x80, 0x97, 0x12, 0x43, 0x11, 0x30, 0x0f, 0x43, 0x55, 0x78, 0x11, 0x43, 0xcb, 0xb4, 0xdd, 0x42, + 0xd2, 0xd8, 0xfa, 0x42, 0x75, 0xe7, 0x1d, 0x43, 0x95, 0xfa, 0x0b, 0x43, 0xe6, 0x7d, 0x17, 0x43, + 0xe5, 0x54, 0x18, 0x43, 0xba, 0xc6, 0x1d, 0x43, 0x76, 0x6a, 0x44, 0x43, 0x85, 0xf0, 0x41, 0x43, + 0x3b, 0xee, 0x20, 0x43, 0x6d, 0x49, 0x0d, 0x43, 0x55, 0x9d, 0x05, 0x43, 0x62, 0x36, 0x06, 0x43, + 0x05, 0x0b, 0x1a, 0x43, 0xb9, 0x06, 0xca, 0x42, 0x7a, 0x0a, 0xdf, 0x42, 0x7a, 0x01, 0x13, 0x43, + 0xba, 0x30, 0x06, 0x43, 0x0e, 0xfa, 0x16, 0x43, 0x4c, 0x14, 0x1f, 0x43, 0x05, 0xa5, 0x10, 0x43, + 0x94, 0x27, 0x2a, 0x43, 0x81, 0x83, 0x30, 0x43, 0x3c, 0xfd, 0x0c, 0x43, 0xcb, 0x09, 0x08, 0x43, + 0xf6, 0x56, 0xf6, 0x42, 0x73, 0x90, 0x11, 0x43, 0xf3, 0xab, 0x30, 0x43, 0xd9, 0x89, 0xee, 0x42, + 0x1d, 0xbf, 0xce, 0x42, 0xc5, 0x12, 0x13, 0x43, 0xed, 0x7f, 0x19, 0x43, 0xfb, 0xda, 0x0f, 0x43, + 0x18, 0xfd, 0x11, 0x43, 0xc8, 0xbf, 0x26, 0x43, 0x5b, 0xa8, 0x27, 0x43, 0xf2, 0xbf, 0x1c, 0x43, + 0xf5, 0xa2, 0x0d, 0x43, 0x73, 0xa5, 0x08, 0x43, 0x80, 0x39, 0x05, 0x43, 0x05, 0x12, 0x12, 0x43, + 0xcb, 0x6b, 0x23, 0x43, 0x46, 0x10, 0xd4, 0x42, 0x35, 0x30, 0xce, 0x42, 0x93, 0x17, 0x3d, 0x43, + 0x6b, 0xac, 0x2b, 0x43, 0x1d, 0xa9, 0x32, 0x43, 0x71, 0x82, 0x14, 0x43, 0x84, 0x93, 0x29, 0x43, + 0xe3, 0x91, 0x21, 0x43, 0x35, 0x12, 0x29, 0x43, 0x1b, 0xaf, 0x21, 0x43, 0xd9, 0xb9, 0x18, 0x43, + 0xa0, 0x54, 0x0d, 0x43, 0x9e, 0xe4, 0x10, 0x43, 0x67, 0x1f, 0x2e, 0x43, 0x73, 0xe2, 0xf4, 0x42, + 0xcd, 0xe6, 0xd0, 0x42, 0xa7, 0xd5, 0x26, 0x43, 0xf3, 0xd9, 0x28, 0x43, 0x22, 0x97, 0x25, 0x43, + 0xfb, 0x22, 0x11, 0x43, 0x57, 0x03, 0x2b, 0x43, 0x07, 0x57, 0x18, 0x43, 0x5a, 0xf6, 0x2a, 0x43, + 0xcb, 0xc6, 0x21, 0x43, 0xcd, 0xd5, 0x21, 0x43, 0xbd, 0x9c, 0x27, 0x43, 0x73, 0x85, 0x31, 0x43, + 0x11, 0xa6, 0x3f, 0x43, 0xa6, 0x67, 0xf4, 0x42, 0x75, 0x46, 0xb9, 0x42, 0x28, 0x3c, 0x0b, 0x43, + 0x45, 0x9b, 0x0d, 0x43, 0x80, 0x23, 0x07, 0x43, 0x7a, 0x05, 0x11, 0x43, 0x44, 0x96, 0x1b, 0x43, + 0x15, 0x7d, 0x14, 0x43, 0x8b, 0x6c, 0x23, 0x43, 0xa3, 0xa5, 0x23, 0x43, 0x1b, 0x40, 0x2c, 0x43, + 0x91, 0x0a, 0x41, 0x43, 0xca, 0xa0, 0x41, 0x43, 0x75, 0x1a, 0x2a, 0x43, 0xb5, 0xd4, 0xe1, 0x42, + 0xba, 0x35, 0xb6, 0x42, 0x47, 0xc1, 0xf1, 0x42, 0xb0, 0x87, 0x06, 0x43, 0x6b, 0xd8, 0xdb, 0x42, + 0x39, 0x4a, 0xf9, 0x42, 0xad, 0x71, 0x00, 0x43, 0x5c, 0x4a, 0x0c, 0x43, 0xc3, 0xfb, 0x2c, 0x43, + 0xce, 0x20, 0x2b, 0x43, 0x7b, 0xd9, 0x3e, 0x43, 0xa3, 0x84, 0x29, 0x43, 0xa3, 0x7e, 0x33, 0x43, + 0xb5, 0x19, 0xf9, 0x42, 0x78, 0xfe, 0xbd, 0x42, 0x1f, 0x05, 0x88, 0x42, 0xc7, 0xea, 0x9f, 0x42, + 0xb8, 0xd3, 0xa1, 0x42, 0x63, 0xfe, 0xb6, 0x42, 0xb8, 0xe3, 0xba, 0x42, 0x3d, 0x8c, 0xc1, 0x42, + 0xfd, 0x7c, 0xc3, 0x42, 0xf0, 0xbd, 0xee, 0x42, 0xf2, 0x24, 0xeb, 0x42, 0xac, 0xe5, 0x0b, 0x43, + 0x79, 0xd6, 0xf6, 0x42, 0x9f, 0x33, 0xd6, 0x42, 0x85, 0x8c, 0xae, 0x42, 0x05, 0x1f, 0x56, 0x42, + 0xfc, 0xf8, 0x45, 0x42, 0x2d, 0x44, 0x80, 0x42, 0xb6, 0x40, 0x81, 0x42, 0x15, 0xf5, 0xab, 0x42, + 0x7a, 0x10, 0xb7, 0x42, 0x64, 0x7c, 0xc9, 0x42, 0x7f, 0x59, 0xcc, 0x42, 0xfe, 0x04, 0xd3, 0x42, + 0x6f, 0x8e, 0xd8, 0x42, 0xf8, 0x43, 0x97, 0x42, 0x5d, 0x88, 0xdb, 0x42, 0x23, 0x6d, 0xa4, 0x42, + 0x0d, 0x82, 0xa0, 0x42, 0xa1, 0x11, 0x73, 0x42, 0x1d, 0x1d, 0xbc, 0x42, 0x55, 0x0f, 0xd6, 0x42, + 0xbb, 0x1d, 0xbc, 0x42, 0x05, 0xcd, 0xf9, 0x42, 0xe9, 0xd3, 0x0c, 0x43, 0x32, 0xaf, 0xf1, 0x42, + 0xd6, 0xe5, 0x0f, 0x43, 0x70, 0x58, 0x20, 0x43, 0xb2, 0xea, 0x1c, 0x43, 0xcc, 0x61, 0xf1, 0x42, + 0x82, 0x89, 0x13, 0x43, 0x1a, 0x58, 0x1d, 0x43, 0xc8, 0xa4, 0x14, 0x43, 0xa2, 0xbb, 0xaa, 0x42, + 0x4d, 0x92, 0xd0, 0x42, 0xa1, 0xf8, 0xdc, 0x42, 0x19, 0x3e, 0xe0, 0x42, 0x81, 0xc7, 0xfb, 0x42, + 0x06, 0xf0, 0x15, 0x43, 0x3a, 0x91, 0x23, 0x43, 0x84, 0x89, 0x27, 0x43, 0xf5, 0x80, 0x0a, 0x43, + 0xf4, 0xdb, 0x15, 0x43, 0x85, 0x53, 0xfa, 0x42, 0x44, 0xf5, 0x18, 0x43, 0x96, 0xc6, 0x13, 0x43, + 0x0a, 0xac, 0x1a, 0x43, 0x80, 0xc8, 0xe1, 0x42, 0xf3, 0x5e, 0xc9, 0x42, 0x3a, 0x03, 0x07, 0x43, + 0x66, 0x58, 0x04, 0x43, 0xe7, 0xde, 0xfc, 0x42, 0x7e, 0x1f, 0x09, 0x43, 0x4e, 0x3e, 0x06, 0x43, + 0x24, 0xf3, 0x3a, 0x43, 0xe8, 0x34, 0x3b, 0x43, 0xa6, 0x57, 0x27, 0x43, 0xda, 0x29, 0x17, 0x43, + 0x1e, 0x05, 0x1a, 0x43, 0xfc, 0x6c, 0x1d, 0x43, 0x5a, 0x36, 0x0d, 0x43, 0x5d, 0x21, 0xad, 0x42, + 0x1b, 0xbc, 0xc5, 0x42, 0x3a, 0xf2, 0x06, 0x43, 0xe3, 0xa1, 0xe5, 0x42, 0x26, 0x4d, 0x0e, 0x43, + 0x87, 0xf9, 0x09, 0x43, 0x06, 0x17, 0x22, 0x43, 0x32, 0xb5, 0x16, 0x43, 0x8e, 0xfb, 0x3a, 0x43, + 0xac, 0x56, 0x2d, 0x43, 0x6a, 0xa4, 0x21, 0x43, 0xb8, 0xce, 0x17, 0x43, 0xfc, 0xb6, 0x16, 0x43, + 0x21, 0x43, 0xfa, 0x42, 0xf2, 0x0e, 0xc1, 0x42, 0xb7, 0x78, 0xd5, 0x42, 0xbc, 0x63, 0x18, 0x43, + 0x24, 0x7f, 0xf8, 0x42, 0x4c, 0xe5, 0xfa, 0x42, 0xcb, 0xea, 0xf9, 0x42, 0x10, 0x9b, 0x1d, 0x43, + 0xae, 0xab, 0x3b, 0x43, 0xf6, 0x37, 0x48, 0x43, 0x5c, 0x32, 0x4a, 0x43, 0xd8, 0x00, 0x1b, 0x43, + 0xb2, 0x6a, 0x0e, 0x43, 0xba, 0x72, 0x10, 0x43, 0xe4, 0x44, 0x0f, 0x43, 0x7b, 0x01, 0xbb, 0x42, + 0xae, 0x87, 0xc8, 0x42, 0x8a, 0x44, 0x0e, 0x43, 0x72, 0x14, 0x0b, 0x43, 0x81, 0xd5, 0xf5, 0x42, + 0xda, 0xa7, 0x0f, 0x43, 0xa2, 0xd3, 0x18, 0x43, 0x12, 0x9d, 0x38, 0x43, 0x02, 0xec, 0x1a, 0x43, + 0xe0, 0x18, 0x0f, 0x43, 0xd6, 0xf2, 0xfd, 0x42, 0x80, 0x18, 0x0d, 0x43, 0xd8, 0xb7, 0x03, 0x43, + 0x0a, 0xb9, 0x16, 0x43, 0x21, 0xe3, 0xd6, 0x42, 0x1a, 0xb3, 0xbe, 0x42, 0x92, 0x98, 0x1d, 0x43, + 0xbd, 0x89, 0x0b, 0x43, 0x28, 0x2e, 0x07, 0x43, 0x92, 0x68, 0x0e, 0x43, 0x76, 0x9d, 0x2b, 0x43, + 0xe0, 0xaa, 0x2f, 0x43, 0xa4, 0xde, 0x20, 0x43, 0x56, 0x2c, 0x1c, 0x43, 0x93, 0xff, 0xe9, 0x42, + 0x93, 0x4f, 0xf3, 0x42, 0x96, 0x8f, 0x02, 0x43, 0xe4, 0xe2, 0x0f, 0x43, 0xa9, 0xac, 0xdb, 0x42, + 0x95, 0x97, 0xbf, 0x42, 0xc4, 0x2c, 0x25, 0x43, 0x92, 0x06, 0x17, 0x43, 0x40, 0x91, 0x08, 0x43, + 0x54, 0x83, 0x1d, 0x43, 0x84, 0x6d, 0x1c, 0x43, 0xa6, 0xc6, 0x1e, 0x43, 0x4a, 0xc9, 0x09, 0x43, + 0x88, 0x73, 0xfb, 0x42, 0xe4, 0x34, 0x12, 0x43, 0x36, 0xba, 0x16, 0x43, 0x12, 0xd1, 0x06, 0x43, + 0x42, 0xa3, 0x10, 0x43, 0xef, 0x33, 0xd8, 0x42, 0x88, 0x37, 0xd4, 0x42, 0xf6, 0x01, 0x28, 0x43, + 0x98, 0xe0, 0x0e, 0x43, 0xfa, 0xd4, 0x20, 0x43, 0x7a, 0xc9, 0x10, 0x43, 0xd4, 0x22, 0x29, 0x43, + 0x08, 0x45, 0x21, 0x43, 0x14, 0x40, 0x30, 0x43, 0xa6, 0x71, 0x22, 0x43, 0xea, 0x06, 0x10, 0x43, + 0xe4, 0xfc, 0x08, 0x43, 0x50, 0xb9, 0x14, 0x43, 0xba, 0x24, 0x2e, 0x43, 0x8f, 0xa3, 0xf1, 0x42, + 0xe9, 0x0f, 0xb3, 0x42, 0x8c, 0x78, 0x1a, 0x43, 0x5e, 0x49, 0x2e, 0x43, 0x0c, 0x1f, 0x30, 0x43, + 0x7c, 0x12, 0x09, 0x43, 0x4a, 0x21, 0x18, 0x43, 0x6a, 0x02, 0x1c, 0x43, 0xde, 0x87, 0x1a, 0x43, + 0xae, 0x69, 0x20, 0x43, 0xd2, 0xf4, 0x06, 0x43, 0xd2, 0x50, 0x22, 0x43, 0xfe, 0x1e, 0x2f, 0x43, + 0xac, 0x57, 0x28, 0x43, 0x55, 0xb9, 0xce, 0x42, 0x9a, 0x05, 0xc5, 0x42, 0xa1, 0x81, 0xf7, 0x42, + 0xf6, 0x4e, 0xeb, 0x42, 0xbc, 0xf8, 0x18, 0x43, 0xe2, 0x01, 0x02, 0x43, 0xe6, 0xb1, 0x19, 0x43, + 0x92, 0x84, 0x16, 0x43, 0xa4, 0x0d, 0x24, 0x43, 0x72, 0xa6, 0x1a, 0x43, 0x4c, 0x4b, 0x26, 0x43, + 0x40, 0x68, 0x34, 0x43, 0xb0, 0x77, 0x45, 0x43, 0xc2, 0xaa, 0x16, 0x43, 0x2c, 0x45, 0xc2, 0x42, + 0xc7, 0x6d, 0xc5, 0x42, 0x02, 0x48, 0xdd, 0x42, 0xcb, 0xa9, 0xf2, 0x42, 0xc3, 0xc1, 0xef, 0x42, + 0x3e, 0x4e, 0xff, 0x42, 0x87, 0x27, 0xde, 0x42, 0xb6, 0x7f, 0x00, 0x43, 0x36, 0x5b, 0x2a, 0x43, + 0xd8, 0x7b, 0x20, 0x43, 0x64, 0xa4, 0x2e, 0x43, 0xfe, 0xcf, 0x20, 0x43, 0xfe, 0x62, 0x16, 0x43, + 0x06, 0x1d, 0x20, 0x43, 0x87, 0xce, 0xa6, 0x42, 0x9c, 0x57, 0x7c, 0x42, 0x65, 0xa3, 0x9a, 0x42, + 0xe5, 0x96, 0xa5, 0x42, 0xf1, 0x25, 0xbc, 0x42, 0x6b, 0x38, 0xc8, 0x42, 0x3b, 0x7c, 0xaa, 0x42, + 0x99, 0x9e, 0xc9, 0x42, 0xd9, 0x41, 0xee, 0x42, 0xc6, 0x2c, 0x01, 0x43, 0xd3, 0x25, 0x0d, 0x43, + 0xcc, 0x93, 0xdd, 0x42, 0xf9, 0xa5, 0xa9, 0x42, 0x6d, 0x3b, 0x8b, 0x42, 0xff, 0xb0, 0x80, 0x42, + 0x17, 0x80, 0x36, 0x42, 0x79, 0x25, 0x87, 0x42, 0x12, 0xc8, 0x64, 0x42, 0x21, 0x02, 0x9a, 0x42, + 0x68, 0xc2, 0xba, 0x42, 0x36, 0x67, 0xb2, 0x42, 0x86, 0xd6, 0xb8, 0x42, 0xbf, 0xcc, 0xab, 0x42, + 0xba, 0xad, 0xb7, 0x42, 0x25, 0x9f, 0x87, 0x42, 0xf6, 0xe1, 0x95, 0x42, 0xc6, 0x1a, 0xbd, 0x42, + 0xa6, 0xce, 0x9f, 0x42, 0x4a, 0xa0, 0x4d, 0x42, 0x4f, 0xf0, 0x93, 0x42, 0xcf, 0x5b, 0xc6, 0x42, + 0xae, 0x87, 0xc7, 0x42, 0x99, 0xb9, 0xd9, 0x42, 0xda, 0xbf, 0xfd, 0x42, 0x58, 0x8a, 0xe9, 0x42, + 0x2e, 0x11, 0x0d, 0x43, 0x89, 0xbe, 0x13, 0x43, 0xbb, 0x88, 0x15, 0x43, 0x7b, 0x9e, 0xea, 0x42, + 0x0b, 0xf5, 0x0d, 0x43, 0xed, 0x16, 0x10, 0x43, 0x3a, 0x7b, 0x10, 0x43, 0x62, 0xdb, 0xbb, 0x42, + 0xdc, 0x1b, 0xaa, 0x42, 0x36, 0x29, 0xe1, 0x42, 0x8a, 0xaf, 0x9b, 0x42, 0xe0, 0x69, 0xe3, 0x42, + 0x38, 0xe8, 0xf7, 0x42, 0xc1, 0x3e, 0x09, 0x43, 0x98, 0xa9, 0x1f, 0x43, 0x41, 0x1d, 0x1e, 0x43, + 0x40, 0x7d, 0x0f, 0x43, 0x90, 0x94, 0x08, 0x43, 0x1e, 0xf8, 0x01, 0x43, 0x16, 0x53, 0x16, 0x43, + 0x3e, 0xc2, 0x15, 0x43, 0x10, 0x86, 0xb0, 0x42, 0x4b, 0x74, 0xb3, 0x42, 0x40, 0x30, 0xea, 0x42, + 0x30, 0x20, 0xc0, 0x42, 0xce, 0xe8, 0xfa, 0x42, 0xf2, 0xbc, 0xe7, 0x42, 0xa0, 0xf9, 0x02, 0x43, + 0x9c, 0xb5, 0x2a, 0x43, 0x56, 0xa6, 0x2f, 0x43, 0xf4, 0xf8, 0x35, 0x43, 0x42, 0x97, 0x0c, 0x43, + 0x61, 0x64, 0x05, 0x43, 0xa9, 0x61, 0x18, 0x43, 0xf1, 0x9e, 0x04, 0x43, 0x9f, 0xfe, 0xa1, 0x42, + 0x8f, 0xb6, 0x8a, 0x42, 0x3c, 0x0d, 0xde, 0x42, 0xff, 0x42, 0xde, 0x42, 0x72, 0x2a, 0xf4, 0x42, + 0x45, 0xea, 0x0b, 0x43, 0x9c, 0xc5, 0x04, 0x43, 0xa6, 0x39, 0x21, 0x43, 0x01, 0x34, 0x2e, 0x43, + 0xbd, 0x9d, 0x29, 0x43, 0x19, 0xed, 0x10, 0x43, 0x64, 0x2a, 0x11, 0x43, 0xcc, 0xbe, 0x06, 0x43, + 0xa2, 0x46, 0xeb, 0x42, 0xc8, 0xbc, 0x9a, 0x42, 0x7e, 0x67, 0xb1, 0x42, 0x8b, 0xcf, 0x0a, 0x43, + 0xe7, 0x1c, 0xe4, 0x42, 0x58, 0xc5, 0xfb, 0x42, 0xea, 0xac, 0xee, 0x42, 0x8b, 0x84, 0x17, 0x43, + 0xdd, 0xf4, 0x2e, 0x43, 0xfb, 0xe5, 0x29, 0x43, 0x3e, 0xb2, 0x3c, 0x43, 0x3e, 0x98, 0x0b, 0x43, + 0xd6, 0x37, 0x04, 0x43, 0x79, 0x5b, 0xc5, 0x42, 0xb6, 0xcb, 0x00, 0x43, 0x10, 0x06, 0xae, 0x42, + 0x69, 0xdc, 0xbe, 0x42, 0x77, 0x58, 0x13, 0x43, 0x78, 0x2d, 0x00, 0x43, 0xc2, 0x60, 0xdc, 0x42, + 0x66, 0xd8, 0x03, 0x43, 0xc2, 0xc5, 0x04, 0x43, 0xa7, 0x16, 0x25, 0x43, 0x57, 0x57, 0x11, 0x43, + 0x9e, 0x08, 0x1a, 0x43, 0x82, 0x7f, 0xe4, 0x42, 0x94, 0x6f, 0xe5, 0x42, 0x7b, 0x52, 0x02, 0x43, + 0x70, 0xeb, 0x08, 0x43, 0x89, 0x11, 0xb7, 0x42, 0xd4, 0xe4, 0xba, 0x42, 0x6b, 0x95, 0x0d, 0x43, + 0x4e, 0x94, 0xea, 0x42, 0x53, 0x8b, 0xf3, 0x42, 0x9a, 0x28, 0x06, 0x43, 0xb2, 0x4f, 0x0f, 0x43, + 0x6d, 0x68, 0x25, 0x43, 0x15, 0x43, 0xf5, 0x42, 0x6e, 0xe4, 0xf9, 0x42, 0x8e, 0x17, 0xdc, 0x42, + 0x59, 0x7c, 0xb3, 0x42, 0xb9, 0xa7, 0xe4, 0x42, 0xe8, 0x6a, 0xf5, 0x42, 0xf4, 0x10, 0xc2, 0x42, + 0xb3, 0x62, 0xa1, 0x42, 0xa7, 0xba, 0x08, 0x43, 0xc6, 0xa0, 0x03, 0x43, 0x8f, 0x90, 0x1c, 0x43, + 0xa9, 0x37, 0x23, 0x43, 0x64, 0x8f, 0x14, 0x43, 0x76, 0xd0, 0x0a, 0x43, 0xf2, 0x51, 0xfd, 0x42, + 0x6c, 0x57, 0xe2, 0x42, 0xdf, 0x0a, 0xe3, 0x42, 0x9c, 0xe8, 0xed, 0x42, 0x8e, 0xdf, 0xea, 0x42, + 0x0c, 0x31, 0x0e, 0x43, 0x26, 0xa4, 0xc6, 0x42, 0x97, 0x38, 0xab, 0x42, 0xe4, 0x88, 0x0a, 0x43, + 0x47, 0xda, 0x0c, 0x43, 0x7a, 0x9f, 0x10, 0x43, 0xb6, 0x4b, 0x09, 0x43, 0x38, 0x22, 0x16, 0x43, + 0x9b, 0x5a, 0x1d, 0x43, 0x38, 0x48, 0x1b, 0x43, 0x2d, 0x96, 0x16, 0x43, 0xa8, 0x66, 0xf8, 0x42, + 0x43, 0xbd, 0x03, 0x43, 0xa7, 0xbd, 0x17, 0x43, 0xba, 0x24, 0x18, 0x43, 0xa3, 0x1c, 0xce, 0x42, + 0xea, 0x34, 0xbe, 0x42, 0x35, 0x42, 0x16, 0x43, 0xff, 0xbd, 0x0b, 0x43, 0x35, 0x47, 0x14, 0x43, + 0x5e, 0xd8, 0x06, 0x43, 0xc2, 0xf2, 0x02, 0x43, 0xfe, 0x70, 0x0e, 0x43, 0x22, 0x89, 0x1a, 0x43, + 0x92, 0x81, 0x07, 0x43, 0x82, 0xd0, 0x01, 0x43, 0xf7, 0x5c, 0x1b, 0x43, 0x7b, 0x8f, 0x11, 0x43, + 0xc0, 0xc5, 0x29, 0x43, 0xd0, 0x5c, 0xe9, 0x42, 0x05, 0x59, 0x92, 0x42, 0x16, 0x05, 0x03, 0x43, + 0x64, 0xc1, 0xd2, 0x42, 0xc0, 0x81, 0x05, 0x43, 0xc8, 0x5d, 0xf5, 0x42, 0xa4, 0x46, 0xf0, 0x42, + 0x29, 0x7d, 0xe9, 0x42, 0x51, 0x7d, 0x14, 0x43, 0xbc, 0xcd, 0x10, 0x43, 0x04, 0x53, 0x13, 0x43, + 0x92, 0x86, 0x1d, 0x43, 0x46, 0x7f, 0x33, 0x43, 0x30, 0xd8, 0x09, 0x43, 0xf4, 0x71, 0xb4, 0x42, + 0x28, 0x02, 0x8c, 0x42, 0xd9, 0x85, 0xf5, 0x42, 0xae, 0x08, 0xc8, 0x42, 0xe7, 0x09, 0xc2, 0x42, + 0x9a, 0x44, 0xc9, 0x42, 0x54, 0x82, 0xea, 0x42, 0x9b, 0x2e, 0xef, 0x42, 0x60, 0xf8, 0x13, 0x43, + 0x0b, 0x08, 0x0e, 0x43, 0x80, 0x73, 0x1f, 0x43, 0x45, 0x7f, 0x30, 0x43, 0xcc, 0xab, 0x14, 0x43, + 0xc0, 0xd6, 0xf3, 0x42, 0x58, 0x7d, 0xa7, 0x42, 0x13, 0x6f, 0x39, 0x42, 0x0a, 0x75, 0x82, 0x42, + 0x7d, 0x01, 0x89, 0x42, 0xc0, 0xdf, 0x89, 0x42, 0x26, 0xf9, 0x9b, 0x42, 0x29, 0x72, 0xa4, 0x42, + 0xce, 0xab, 0xa5, 0x42, 0x74, 0xc7, 0xc5, 0x42, 0x11, 0xf7, 0xcd, 0x42, 0xc2, 0x37, 0xf1, 0x42, + 0x0b, 0xcf, 0xaf, 0x42, 0xb1, 0x5d, 0xa2, 0x42, 0xc7, 0xa3, 0x24, 0x42, 0x51, 0x2e, 0x2e, 0x42, + 0x71, 0xa7, 0x5f, 0x42, 0x3e, 0x43, 0x96, 0x42, 0xfe, 0x56, 0x8e, 0x42, 0x9e, 0xc3, 0xa9, 0x42, + 0x9d, 0x94, 0xd4, 0x42, 0xed, 0x4e, 0xb8, 0x42, 0xda, 0x74, 0xd7, 0x42, 0xeb, 0xca, 0xc0, 0x42, + 0xaf, 0xc7, 0xec, 0x42, 0xd9, 0x2c, 0x8e, 0x42, 0x32, 0x60, 0xab, 0x42, 0xba, 0xfd, 0xce, 0x42, + 0xbc, 0x9a, 0xb7, 0x42, 0x45, 0x35, 0x49, 0x42, 0x6b, 0xb2, 0xbb, 0x42, 0xc8, 0xae, 0x02, 0x43, + 0x77, 0x74, 0xac, 0x42, 0x03, 0x77, 0xdc, 0x42, 0x5f, 0xa8, 0x01, 0x43, 0xef, 0x79, 0xde, 0x42, + 0x71, 0xee, 0x1b, 0x43, 0x69, 0xcf, 0x20, 0x43, 0xf4, 0xbf, 0x30, 0x43, 0x1f, 0x66, 0xfb, 0x42, + 0xf1, 0xae, 0x1c, 0x43, 0x66, 0x6e, 0x0f, 0x43, 0x00, 0x98, 0x13, 0x43, 0xd1, 0xfa, 0xc1, 0x42, + 0xd7, 0x67, 0xc3, 0x42, 0xc7, 0x1a, 0xe0, 0x42, 0xf1, 0xfe, 0xbd, 0x42, 0xd7, 0xdc, 0x08, 0x43, + 0x58, 0x72, 0x15, 0x43, 0x58, 0xd5, 0x11, 0x43, 0x92, 0x57, 0x23, 0x43, 0xc2, 0x9f, 0x27, 0x43, + 0x1e, 0xca, 0x29, 0x43, 0xe2, 0xbf, 0x07, 0x43, 0x05, 0x82, 0x1a, 0x43, 0x0c, 0x67, 0x1c, 0x43, + 0xae, 0xa2, 0x1a, 0x43, 0x8c, 0xb9, 0xbf, 0x42, 0x73, 0xf9, 0xcf, 0x42, 0x0c, 0x0b, 0x02, 0x43, + 0x46, 0xb0, 0xe3, 0x42, 0xbd, 0xdc, 0xde, 0x42, 0xf5, 0x1e, 0x03, 0x43, 0x3c, 0xf4, 0x09, 0x43, + 0x7e, 0x74, 0x47, 0x43, 0x02, 0x44, 0x37, 0x43, 0x56, 0x50, 0x33, 0x43, 0xbf, 0x77, 0x16, 0x43, + 0xeb, 0x9a, 0x1f, 0x43, 0x8a, 0x9f, 0x1f, 0x43, 0x8d, 0xbb, 0x0f, 0x43, 0x98, 0x19, 0xb4, 0x42, + 0x0b, 0x1c, 0xb0, 0x42, 0x3b, 0xf9, 0xf0, 0x42, 0x70, 0xbc, 0xe4, 0x42, 0xfc, 0x5f, 0x06, 0x43, + 0xb7, 0x5f, 0x03, 0x43, 0x8a, 0xf0, 0x15, 0x43, 0x58, 0xc6, 0x43, 0x43, 0x06, 0x20, 0x3a, 0x43, + 0x23, 0xe3, 0x1b, 0x43, 0x21, 0xba, 0x21, 0x43, 0x00, 0xbd, 0x22, 0x43, 0x41, 0x5e, 0x12, 0x43, + 0x0b, 0x07, 0x05, 0x43, 0x25, 0xa7, 0xa0, 0x42, 0xb5, 0xd0, 0xce, 0x42, 0xf2, 0x04, 0x0a, 0x43, + 0x88, 0xe8, 0xfd, 0x42, 0xf0, 0xab, 0x10, 0x43, 0x4e, 0x2e, 0x05, 0x43, 0x20, 0xfa, 0x23, 0x43, + 0x75, 0x3b, 0x3b, 0x43, 0x5a, 0x30, 0x4e, 0x43, 0x5a, 0xd4, 0x3a, 0x43, 0xdb, 0x30, 0x11, 0x43, + 0xa7, 0x31, 0x11, 0x43, 0x5f, 0xdf, 0x04, 0x43, 0x3b, 0xcb, 0xe7, 0x42, 0xdb, 0x76, 0xaa, 0x42, + 0x82, 0xbd, 0xe0, 0x42, 0xc1, 0xfc, 0x10, 0x43, 0x13, 0x5d, 0xfd, 0x42, 0xcd, 0x26, 0x02, 0x43, + 0x2e, 0x8b, 0x15, 0x43, 0xc3, 0x45, 0x20, 0x43, 0x51, 0x07, 0x30, 0x43, 0x5a, 0xb6, 0x40, 0x43, + 0x02, 0xca, 0x19, 0x43, 0x40, 0xfc, 0xf1, 0x42, 0x57, 0xcd, 0xee, 0x42, 0x5e, 0x1f, 0x0d, 0x43, + 0x2a, 0x26, 0x0e, 0x43, 0x1b, 0x02, 0xcf, 0x42, 0x43, 0xfc, 0xd3, 0x42, 0xc8, 0xca, 0x0d, 0x43, + 0x33, 0xb2, 0xf6, 0x42, 0x23, 0xc6, 0xfe, 0x42, 0x56, 0x6f, 0x04, 0x43, 0x24, 0xdf, 0x2d, 0x43, + 0x8d, 0xf3, 0x27, 0x43, 0x6b, 0xec, 0x15, 0x43, 0x9a, 0x97, 0xfe, 0x42, 0x89, 0x20, 0xe2, 0x42, + 0x0a, 0x93, 0xdd, 0x42, 0xcf, 0xb1, 0xfe, 0x42, 0x16, 0xa4, 0x10, 0x43, 0x4c, 0x28, 0xcf, 0x42, + 0x5c, 0x01, 0xbe, 0x42, 0xed, 0xc5, 0x07, 0x43, 0x55, 0x13, 0x1c, 0x43, 0x75, 0xca, 0x18, 0x43, + 0x3e, 0x35, 0x0f, 0x43, 0x4d, 0xab, 0x14, 0x43, 0xf5, 0xaa, 0x15, 0x43, 0x36, 0x75, 0x14, 0x43, + 0x4b, 0xeb, 0x0a, 0x43, 0x46, 0x27, 0x0e, 0x43, 0xee, 0xfe, 0x00, 0x43, 0xc0, 0x58, 0x01, 0x43, + 0xe4, 0xcd, 0x0d, 0x43, 0x46, 0x63, 0xc1, 0x42, 0x85, 0xc6, 0xd2, 0x42, 0x8e, 0x4b, 0x14, 0x43, + 0xa1, 0x69, 0x18, 0x43, 0x45, 0xbd, 0x22, 0x43, 0xa0, 0x62, 0x15, 0x43, 0x7e, 0x3c, 0x22, 0x43, + 0x5e, 0xd7, 0x1b, 0x43, 0xe0, 0x18, 0x2c, 0x43, 0x6a, 0x9b, 0x22, 0x43, 0xc0, 0xbf, 0x12, 0x43, + 0xf4, 0xbd, 0x0d, 0x43, 0x98, 0x54, 0x1b, 0x43, 0xdc, 0x3a, 0x23, 0x43, 0x86, 0xbb, 0xe2, 0x42, + 0x6f, 0x8e, 0xc7, 0x42, 0x71, 0x56, 0x1f, 0x43, 0xba, 0xe9, 0x13, 0x43, 0x62, 0xb3, 0x1f, 0x43, + 0xee, 0xae, 0x1b, 0x43, 0xe6, 0x36, 0x1e, 0x43, 0xfa, 0x59, 0x15, 0x43, 0x44, 0xe1, 0x1f, 0x43, + 0x96, 0x33, 0x18, 0x43, 0xc0, 0x35, 0x18, 0x43, 0x81, 0x48, 0x20, 0x43, 0xc0, 0xd3, 0x1b, 0x43, + 0xfe, 0x3f, 0x42, 0x43, 0x8f, 0xf9, 0xf7, 0x42, 0x16, 0xd7, 0xa6, 0x42, 0xca, 0x49, 0x07, 0x43, + 0x6d, 0x59, 0xde, 0x42, 0x4b, 0x50, 0x0d, 0x43, 0xa6, 0x80, 0xf4, 0x42, 0x34, 0xac, 0xe7, 0x42, + 0x50, 0x0b, 0x08, 0x43, 0x22, 0x74, 0x1b, 0x43, 0x9a, 0xee, 0x1f, 0x43, 0x3a, 0x1f, 0x2b, 0x43, + 0x2f, 0x6f, 0x27, 0x43, 0x48, 0x7b, 0x3d, 0x43, 0x73, 0x5c, 0x18, 0x43, 0xe3, 0xd0, 0xc1, 0x42, + 0xa9, 0x29, 0xc3, 0x42, 0x31, 0x61, 0xe6, 0x42, 0xc1, 0x8d, 0xa6, 0x42, 0xb4, 0x30, 0xf4, 0x42, + 0xe3, 0x90, 0x02, 0x43, 0x18, 0x53, 0x04, 0x43, 0xc5, 0x3f, 0xfe, 0x42, 0x78, 0x89, 0x16, 0x43, + 0x9d, 0x49, 0x25, 0x43, 0x49, 0xe9, 0x39, 0x43, 0xea, 0x85, 0x40, 0x43, 0xaa, 0x0e, 0x22, 0x43, + 0xf3, 0x35, 0xe8, 0x42, 0x89, 0x36, 0xa6, 0x42, 0xf3, 0x0a, 0x72, 0x42, 0xc9, 0x7e, 0x8b, 0x42, + 0x89, 0x25, 0x99, 0x42, 0xa2, 0xd7, 0x9a, 0x42, 0x3f, 0x01, 0xb6, 0x42, 0x0d, 0x75, 0xb9, 0x42, + 0x41, 0xe7, 0xb4, 0x42, 0x95, 0xf9, 0xd2, 0x42, 0xf1, 0x91, 0xe3, 0x42, 0xb6, 0x0d, 0x06, 0x43, + 0x99, 0xc3, 0xcd, 0x42, 0x93, 0x43, 0xa1, 0x42, 0xeb, 0x50, 0x76, 0x42, 0xe3, 0x82, 0x6d, 0x42, + 0x92, 0x15, 0x36, 0x42, 0x70, 0x82, 0x8a, 0x42, 0x9f, 0x24, 0x7f, 0x42, 0xda, 0x5f, 0x9f, 0x42, + 0xd0, 0x1c, 0xc9, 0x42, 0x92, 0x36, 0xc4, 0x42, 0x86, 0x27, 0xc1, 0x42, 0x2a, 0xac, 0xbc, 0x42, + 0x58, 0xc1, 0xc3, 0x42, 0x62, 0x7d, 0x88, 0x42, 0x3c, 0x6a, 0xd6, 0x42, 0xdc, 0xda, 0xa9, 0x42, + 0x52, 0xbb, 0xab, 0x42, 0x09, 0x51, 0x34, 0x42, 0x06, 0x65, 0x9f, 0x42, 0xda, 0x70, 0xcd, 0x42, + 0x40, 0x31, 0xd5, 0x42, 0x48, 0x53, 0xfc, 0x42, 0xc2, 0x32, 0x0b, 0x43, 0x52, 0x85, 0xfb, 0x42, + 0x4b, 0xc0, 0x17, 0x43, 0x1b, 0xfc, 0x11, 0x43, 0x64, 0xe7, 0x19, 0x43, 0xc4, 0xd5, 0xd7, 0x42, + 0xba, 0x06, 0x19, 0x43, 0x63, 0xa7, 0x05, 0x43, 0xa7, 0xf8, 0x18, 0x43, 0xf8, 0x9e, 0xaa, 0x42, + 0x32, 0xbf, 0xba, 0x42, 0x50, 0x7d, 0xb7, 0x42, 0x16, 0xd3, 0xbd, 0x42, 0xcc, 0xcc, 0x00, 0x43, + 0xd3, 0xd6, 0x09, 0x43, 0x71, 0xca, 0x06, 0x43, 0x87, 0x8c, 0x20, 0x43, 0xf3, 0x21, 0x23, 0x43, + 0xa7, 0x0c, 0x13, 0x43, 0xa0, 0xd4, 0x01, 0x43, 0x97, 0x68, 0x0d, 0x43, 0x66, 0xdd, 0x07, 0x43, + 0xca, 0x1d, 0x0f, 0x43, 0xc0, 0xdd, 0xc4, 0x42, 0xb8, 0xf1, 0xa0, 0x42, 0x1e, 0x48, 0xf6, 0x42, + 0x3e, 0x9f, 0xd9, 0x42, 0x32, 0xfe, 0x06, 0x43, 0x38, 0x3e, 0xfa, 0x42, 0x49, 0x11, 0x15, 0x43, + 0xab, 0x3f, 0x1b, 0x43, 0xc7, 0xfd, 0x27, 0x43, 0x21, 0xfc, 0x1f, 0x43, 0x50, 0xaf, 0x1d, 0x43, + 0x29, 0xad, 0x02, 0x43, 0x49, 0xe3, 0x16, 0x43, 0xe0, 0x1a, 0xfb, 0x42, 0xa6, 0x32, 0xbd, 0x42, + 0x90, 0xd9, 0xcd, 0x42, 0xce, 0x5a, 0xea, 0x42, 0xe4, 0xbb, 0xd2, 0x42, 0xf4, 0x73, 0x01, 0x43, + 0x26, 0x9a, 0xda, 0x42, 0x7a, 0x81, 0x17, 0x43, 0x7b, 0x8d, 0x28, 0x43, 0xf1, 0x59, 0x23, 0x43, + 0x51, 0xf3, 0x28, 0x43, 0xdf, 0x50, 0x19, 0x43, 0x73, 0xae, 0x09, 0x43, 0x9a, 0x7c, 0xf8, 0x42, + 0x66, 0x04, 0xf2, 0x42, 0x20, 0x5b, 0x9f, 0x42, 0xec, 0x3c, 0xdb, 0x42, 0x0d, 0xc4, 0x04, 0x43, + 0x8c, 0xac, 0xeb, 0x42, 0x72, 0x47, 0x0b, 0x43, 0x2c, 0xba, 0xf5, 0x42, 0x73, 0xd7, 0x06, 0x43, + 0x15, 0x6a, 0x36, 0x43, 0xdd, 0xb7, 0x35, 0x43, 0x57, 0x89, 0x33, 0x43, 0x6f, 0xf0, 0x0c, 0x43, + 0xd1, 0x77, 0x16, 0x43, 0x3c, 0x21, 0x00, 0x43, 0xe3, 0x6a, 0x09, 0x43, 0xaa, 0xb1, 0xa8, 0x42, + 0x18, 0x9c, 0xd8, 0x42, 0x9f, 0xe6, 0x0b, 0x43, 0xea, 0x77, 0xe7, 0x42, 0xa8, 0xc4, 0xfb, 0x42, + 0x35, 0xb3, 0x0f, 0x43, 0xe8, 0xc9, 0x12, 0x43, 0x5b, 0x2d, 0x33, 0x43, 0x51, 0xfc, 0x1e, 0x43, + 0xeb, 0x43, 0x03, 0x43, 0x06, 0x11, 0xcf, 0x42, 0x62, 0x1a, 0xed, 0x42, 0xa2, 0xe5, 0x02, 0x43, + 0xa0, 0x6b, 0x0d, 0x43, 0x32, 0x25, 0xa3, 0x42, 0x58, 0x7b, 0xcd, 0x42, 0x3b, 0x7e, 0x12, 0x43, + 0xb4, 0x6a, 0xdc, 0x42, 0x20, 0x02, 0xf6, 0x42, 0x9e, 0x4d, 0xfc, 0x42, 0x94, 0xab, 0x20, 0x43, + 0xcb, 0xdb, 0x1d, 0x43, 0x0c, 0x19, 0x13, 0x43, 0xc7, 0xd8, 0x00, 0x43, 0xe6, 0xc5, 0xd9, 0x42, + 0xe2, 0xae, 0xc9, 0x42, 0x28, 0x70, 0x01, 0x43, 0x93, 0x22, 0x0e, 0x43, 0xf2, 0xbc, 0xb7, 0x42, + 0xba, 0x29, 0xaa, 0x42, 0xe1, 0x49, 0x1a, 0x43, 0xa0, 0xde, 0x00, 0x43, 0xac, 0x00, 0x02, 0x43, + 0x59, 0x3f, 0x01, 0x43, 0x25, 0x1f, 0x20, 0x43, 0x38, 0x32, 0x1c, 0x43, 0x55, 0x7b, 0x05, 0x43, + 0x6a, 0x15, 0x06, 0x43, 0x9b, 0xa0, 0x05, 0x43, 0x5c, 0x86, 0xf0, 0x42, 0xaa, 0xa6, 0xfa, 0x42, + 0x69, 0x51, 0x16, 0x43, 0x54, 0xb6, 0xc9, 0x42, 0x94, 0x73, 0xc5, 0x42, 0x31, 0x68, 0x19, 0x43, + 0x4c, 0xf1, 0x20, 0x43, 0xd8, 0xda, 0x16, 0x43, 0x19, 0x29, 0x0b, 0x43, 0xf1, 0x45, 0x21, 0x43, + 0x38, 0x2f, 0x0c, 0x43, 0xcd, 0xa2, 0x20, 0x43, 0xab, 0xb1, 0x0f, 0x43, 0x02, 0xf4, 0x01, 0x43, + 0x27, 0x9e, 0x02, 0x43, 0x2b, 0x67, 0x12, 0x43, 0x7b, 0x2d, 0x1f, 0x43, 0xfc, 0x3a, 0xde, 0x42, + 0xdc, 0xca, 0xd8, 0x42, 0x52, 0x88, 0x00, 0x43, 0x42, 0x53, 0x22, 0x43, 0x5f, 0xd1, 0x09, 0x43, + 0x9c, 0x0b, 0x07, 0x43, 0x54, 0x98, 0x0c, 0x43, 0xa1, 0xe0, 0x07, 0x43, 0x23, 0x25, 0x26, 0x43, + 0x33, 0x1c, 0x0b, 0x43, 0x3b, 0x39, 0x04, 0x43, 0xd1, 0xcc, 0x11, 0x43, 0x70, 0xae, 0x17, 0x43, + 0x09, 0x5e, 0x2c, 0x43, 0x4a, 0x81, 0xbf, 0x42, 0x52, 0x5f, 0xad, 0x42, 0xc0, 0x89, 0xe5, 0x42, + 0xea, 0xf0, 0x0a, 0x43, 0x9e, 0x70, 0xfc, 0x42, 0xc8, 0x95, 0xe3, 0x42, 0xf8, 0x98, 0xf5, 0x42, + 0xb1, 0xcc, 0x09, 0x43, 0x47, 0x10, 0x11, 0x43, 0x64, 0xd6, 0x0d, 0x43, 0x18, 0x19, 0x19, 0x43, + 0x80, 0xb2, 0x2a, 0x43, 0x2f, 0x18, 0x2b, 0x43, 0xe6, 0xcd, 0x13, 0x43, 0xd0, 0x9f, 0xa5, 0x42, + 0xd4, 0x99, 0xaa, 0x42, 0x7a, 0x76, 0xc2, 0x42, 0xd6, 0xe5, 0xe2, 0x42, 0x5c, 0x4a, 0x03, 0x43, + 0x14, 0x51, 0xc9, 0x42, 0x0c, 0xf1, 0xce, 0x42, 0xa9, 0x85, 0x09, 0x43, 0x12, 0xd6, 0x1d, 0x43, + 0xa2, 0x30, 0x15, 0x43, 0xdd, 0xe0, 0x2e, 0x43, 0x5f, 0x78, 0x13, 0x43, 0x35, 0x50, 0x08, 0x43, + 0xa4, 0x61, 0xfc, 0x42, 0x8c, 0x96, 0x97, 0x42, 0x79, 0x23, 0x61, 0x42, 0xfe, 0x55, 0x87, 0x42, + 0x94, 0xa3, 0x8b, 0x42, 0x06, 0xf9, 0xb2, 0x42, 0xba, 0xb3, 0xb1, 0x42, 0xde, 0x1a, 0x8c, 0x42, + 0xba, 0x0b, 0xa1, 0x42, 0x5c, 0xab, 0xd3, 0x42, 0x64, 0x98, 0xed, 0x42, 0x10, 0x97, 0xfd, 0x42, + 0x66, 0xfd, 0xc9, 0x42, 0x9c, 0xbc, 0x8a, 0x42, 0xea, 0xed, 0x97, 0x42, 0x17, 0xcd, 0x4c, 0x42, + 0x32, 0xcb, 0xb6, 0x41, 0xb5, 0x7d, 0x60, 0x42, 0x23, 0xc4, 0x86, 0x42, 0x4c, 0xb5, 0x92, 0x42, + 0xd3, 0xf7, 0xab, 0x42, 0x90, 0x26, 0x9e, 0x42, 0x82, 0x0f, 0xbd, 0x42, 0x0a, 0x00, 0xa7, 0x42, + 0x08, 0x96, 0xc0, 0x42, 0xc5, 0x33, 0x8c, 0x42, 0x04, 0xcc, 0xa6, 0x42, 0xf6, 0x85, 0x92, 0x42, + 0xae, 0x54, 0xb9, 0x42, 0xb5, 0x5c, 0x37, 0x42, 0xc3, 0x69, 0xb1, 0x42, 0x73, 0x78, 0xd0, 0x42, + 0x16, 0xc4, 0xa6, 0x42, 0x8c, 0x65, 0xd0, 0x42, 0x3c, 0x2d, 0x0f, 0x43, 0x42, 0x7c, 0xf1, 0x42, + 0x63, 0x70, 0x1c, 0x43, 0xb5, 0xec, 0x10, 0x43, 0x9f, 0x30, 0x19, 0x43, 0x53, 0xf2, 0xed, 0x42, + 0x0b, 0xc2, 0x0d, 0x43, 0x9b, 0x83, 0x1b, 0x43, 0xf6, 0xc6, 0x0a, 0x43, 0x68, 0xc9, 0x97, 0x42, + 0x31, 0xc0, 0xb8, 0x42, 0x3a, 0xd1, 0xd1, 0x42, 0x57, 0x5f, 0xe1, 0x42, 0x44, 0x6e, 0xf5, 0x42, + 0x32, 0x3b, 0x1a, 0x43, 0xee, 0x35, 0x19, 0x43, 0x4d, 0x67, 0x1e, 0x43, 0x87, 0xd1, 0x23, 0x43, + 0x5f, 0x47, 0x14, 0x43, 0x22, 0xff, 0x0a, 0x43, 0x87, 0x46, 0x18, 0x43, 0x2f, 0xbb, 0x0f, 0x43, + 0xdf, 0xa4, 0x12, 0x43, 0xaf, 0xf7, 0xbc, 0x42, 0xb2, 0x53, 0xdb, 0x42, 0x59, 0xd2, 0xe8, 0x42, + 0x38, 0xdd, 0xc4, 0x42, 0x00, 0xdb, 0xe4, 0x42, 0x7b, 0x9f, 0x01, 0x43, 0x02, 0x67, 0x01, 0x43, + 0x90, 0x79, 0x3f, 0x43, 0xa4, 0x6e, 0x33, 0x43, 0x3f, 0x2f, 0x34, 0x43, 0x7e, 0x67, 0x11, 0x43, + 0x69, 0x0b, 0x1e, 0x43, 0x15, 0x70, 0x20, 0x43, 0x4f, 0xc7, 0x06, 0x43, 0x7c, 0x5c, 0xaa, 0x42, + 0x6c, 0x80, 0xad, 0x42, 0x00, 0x1f, 0xe4, 0x42, 0x56, 0x69, 0xf4, 0x42, 0xcb, 0xbb, 0xf6, 0x42, + 0x61, 0x45, 0x06, 0x43, 0x40, 0x83, 0x1b, 0x43, 0x8a, 0xbe, 0x1d, 0x43, 0x23, 0xd9, 0x40, 0x43, + 0xca, 0xbd, 0x29, 0x43, 0x53, 0x64, 0x10, 0x43, 0x7d, 0x59, 0x14, 0x43, 0x2f, 0x9e, 0x19, 0x43, + 0x7e, 0xb4, 0xfc, 0x42, 0x96, 0x91, 0x96, 0x42, 0x6f, 0xf6, 0xcf, 0x42, 0xf5, 0x17, 0x13, 0x43, + 0x65, 0x53, 0xe8, 0x42, 0x40, 0xf5, 0xfc, 0x42, 0x67, 0xc2, 0x08, 0x43, 0xc9, 0x39, 0x0a, 0x43, + 0x5d, 0x71, 0x36, 0x43, 0xe3, 0xd0, 0x4b, 0x43, 0x45, 0x41, 0x3c, 0x43, 0xee, 0xfd, 0x12, 0x43, + 0x67, 0xaf, 0x0d, 0x43, 0xe7, 0xfe, 0x05, 0x43, 0x6d, 0xfe, 0x00, 0x43, 0x6c, 0xf7, 0xa4, 0x42, + 0xc9, 0x10, 0xd0, 0x42, 0x2b, 0xf1, 0x0f, 0x43, 0xfe, 0x3d, 0xfd, 0x42, 0xdc, 0xc8, 0xfa, 0x42, + 0xdf, 0xa4, 0x0f, 0x43, 0x54, 0x08, 0x16, 0x43, 0x2f, 0x0a, 0x2a, 0x43, 0x3e, 0x13, 0x2c, 0x43, + 0xd8, 0x7f, 0x19, 0x43, 0x25, 0x04, 0xf3, 0x42, 0x27, 0x86, 0xe1, 0x42, 0x51, 0xb9, 0xf3, 0x42, + 0xf5, 0x35, 0x18, 0x43, 0x74, 0xb9, 0xb0, 0x42, 0x34, 0x2e, 0xc8, 0x42, 0xdc, 0x39, 0x05, 0x43, + 0x50, 0x0b, 0xf5, 0x42, 0x5c, 0x63, 0x0b, 0x43, 0x1c, 0x45, 0xf9, 0x42, 0x03, 0x4b, 0x1c, 0x43, + 0x8c, 0xf5, 0x2c, 0x43, 0xfc, 0x67, 0x29, 0x43, 0xff, 0x60, 0x21, 0x43, 0xe6, 0x4b, 0xcb, 0x42, + 0x1f, 0x99, 0xcb, 0x42, 0xb0, 0x24, 0x0f, 0x43, 0x7b, 0x9b, 0x1c, 0x43, 0x83, 0x6f, 0xb7, 0x42, + 0x51, 0xd7, 0xc8, 0x42, 0x79, 0xd8, 0x23, 0x43, 0x3e, 0x5c, 0x0e, 0x43, 0x3b, 0x82, 0xf0, 0x42, + 0x77, 0x13, 0x03, 0x43, 0x7f, 0x8e, 0x12, 0x43, 0xe7, 0x62, 0x11, 0x43, 0x72, 0xa1, 0x07, 0x43, + 0x11, 0xdd, 0x16, 0x43, 0x8f, 0x6f, 0xef, 0x42, 0x19, 0x29, 0x05, 0x43, 0x4e, 0x2f, 0xe8, 0x42, + 0x9b, 0x32, 0x16, 0x43, 0x33, 0x9c, 0xd7, 0x42, 0xee, 0x05, 0xb7, 0x42, 0x83, 0x9b, 0x20, 0x43, + 0x34, 0xe0, 0x12, 0x43, 0xb4, 0xc2, 0x23, 0x43, 0xe3, 0x37, 0x1e, 0x43, 0xa3, 0xc0, 0x09, 0x43, + 0x39, 0xf4, 0x17, 0x43, 0x05, 0xf9, 0x1f, 0x43, 0xf5, 0xad, 0x17, 0x43, 0xf4, 0xed, 0x15, 0x43, + 0x78, 0x60, 0xfa, 0x42, 0xb5, 0x9c, 0x07, 0x43, 0x49, 0xa8, 0x26, 0x43, 0x59, 0xa4, 0xe6, 0x42, + 0xb4, 0x29, 0xa6, 0x42, 0xca, 0x81, 0x1c, 0x43, 0x50, 0x63, 0x18, 0x43, 0xef, 0x23, 0x1b, 0x43, + 0x47, 0x01, 0x1b, 0x43, 0x11, 0x17, 0x19, 0x43, 0x2d, 0xfc, 0x18, 0x43, 0x33, 0x66, 0x10, 0x43, + 0x81, 0x5e, 0x0e, 0x43, 0xbc, 0xb7, 0x09, 0x43, 0xac, 0x63, 0x25, 0x43, 0xec, 0xf6, 0x20, 0x43, + 0xbf, 0xb5, 0x1f, 0x43, 0x56, 0xcf, 0xd7, 0x42, 0x80, 0xb3, 0x98, 0x42, 0x66, 0x90, 0x0d, 0x43, + 0xf8, 0x0f, 0xf9, 0x42, 0x9f, 0x7a, 0x05, 0x43, 0x34, 0x07, 0xed, 0x42, 0xb3, 0x1f, 0x05, 0x43, + 0xc6, 0x38, 0x17, 0x43, 0x5c, 0x1c, 0x2d, 0x43, 0xe1, 0xf8, 0x0b, 0x43, 0x9f, 0xfe, 0x25, 0x43, + 0xb6, 0xb7, 0x1d, 0x43, 0x1b, 0xb5, 0x39, 0x43, 0xdf, 0xde, 0x1c, 0x43, 0x1b, 0x7f, 0xc4, 0x42, + 0xaf, 0x61, 0xa9, 0x42, 0xd2, 0x23, 0xdd, 0x42, 0x06, 0x1a, 0xe6, 0x42, 0x72, 0xd4, 0xf6, 0x42, + 0x01, 0x1f, 0xcb, 0x42, 0xd8, 0x79, 0xdd, 0x42, 0x3d, 0x05, 0xdc, 0x42, 0xac, 0xdb, 0x28, 0x43, + 0x55, 0x02, 0x24, 0x43, 0xb9, 0xdd, 0x2c, 0x43, 0x51, 0xbc, 0x1c, 0x43, 0x99, 0xc3, 0x1c, 0x43, + 0x70, 0x4d, 0x05, 0x43, 0xf2, 0xd9, 0xac, 0x42, 0xfd, 0xac, 0x2a, 0x42, 0x19, 0x32, 0x9c, 0x42, + 0xa4, 0x19, 0x85, 0x42, 0xc3, 0xe3, 0x98, 0x42, 0xb2, 0xa7, 0xb1, 0x42, 0x36, 0xac, 0x8c, 0x42, + 0x15, 0x0b, 0xa6, 0x42, 0xdd, 0xdf, 0xcd, 0x42, 0xcc, 0x82, 0xed, 0x42, 0x08, 0x66, 0x05, 0x43, + 0x21, 0xf0, 0xd2, 0x42, 0xa3, 0x24, 0xa7, 0x42, 0xb5, 0xf1, 0x45, 0x42, 0xdc, 0x76, 0x52, 0x42, + 0x66, 0x8a, 0x49, 0x42, 0x56, 0x70, 0x9b, 0x42, 0x66, 0x61, 0x60, 0x42, 0xb6, 0xa1, 0xa5, 0x42, + 0x5b, 0x5f, 0xbe, 0x42, 0xc9, 0x3a, 0xc3, 0x42, 0xc4, 0x26, 0xc9, 0x42, 0x5e, 0x81, 0xb2, 0x42, + 0x0b, 0x47, 0xd4, 0x42, 0x6b, 0xd2, 0xae, 0x42, 0x4f, 0x8a, 0xb5, 0x42, 0x22, 0x7a, 0xa8, 0x42, + 0x97, 0xc9, 0xa2, 0x42, 0x85, 0xb0, 0x23, 0x42, 0xea, 0xe8, 0xb0, 0x42, 0xe8, 0xa0, 0xcc, 0x42, + 0x49, 0x0f, 0xd2, 0x42, 0x5c, 0xd2, 0xfd, 0x42, 0xb2, 0xc0, 0xef, 0x42, 0xe8, 0x3a, 0xf4, 0x42, + 0xf7, 0x51, 0x0d, 0x43, 0x76, 0x03, 0x0f, 0x43, 0xae, 0xfc, 0x18, 0x43, 0xba, 0x21, 0xdc, 0x42, + 0x2f, 0x93, 0x08, 0x43, 0x90, 0x30, 0x18, 0x43, 0xce, 0x79, 0x15, 0x43, 0x86, 0x70, 0xb2, 0x42, + 0x04, 0xa4, 0x99, 0x42, 0xfe, 0xf0, 0xe0, 0x42, 0x20, 0xbc, 0xe0, 0x42, 0x5e, 0x23, 0xdc, 0x42, + 0x22, 0xd9, 0x08, 0x43, 0xb2, 0x79, 0x08, 0x43, 0x89, 0xc7, 0x1d, 0x43, 0x94, 0x98, 0x1d, 0x43, + 0xd8, 0xc3, 0x1a, 0x43, 0x04, 0x0a, 0xf2, 0x42, 0x5c, 0xcf, 0x15, 0x43, 0x92, 0x8e, 0x11, 0x43, + 0x22, 0xd0, 0x1b, 0x43, 0x24, 0x30, 0xbe, 0x42, 0x3a, 0x9b, 0xbb, 0x42, 0xf9, 0xaa, 0x04, 0x43, + 0xdb, 0x74, 0xf4, 0x42, 0x43, 0xc3, 0x01, 0x43, 0x71, 0xfe, 0x00, 0x43, 0xfe, 0x2b, 0x0e, 0x43, + 0x56, 0xf6, 0x1b, 0x43, 0xc3, 0xf5, 0x3a, 0x43, 0xe7, 0xa6, 0x31, 0x43, 0x24, 0xd0, 0x24, 0x43, + 0x21, 0x67, 0x17, 0x43, 0x49, 0x04, 0x17, 0x43, 0x1f, 0xb0, 0x0b, 0x43, 0x1c, 0x32, 0x9f, 0x42, + 0x56, 0x49, 0xb4, 0x42, 0xa8, 0x62, 0xe6, 0x42, 0x14, 0xb4, 0xd8, 0x42, 0x2c, 0xa1, 0xe9, 0x42, + 0x6f, 0x3e, 0x01, 0x43, 0x91, 0x47, 0x14, 0x43, 0xbb, 0x17, 0x21, 0x43, 0x6a, 0x13, 0x3d, 0x43, + 0x4b, 0x56, 0x2e, 0x43, 0x34, 0x5a, 0x1d, 0x43, 0x2c, 0xed, 0x0b, 0x43, 0xa2, 0xf6, 0x0d, 0x43, + 0xa0, 0xb7, 0xfb, 0x42, 0xbe, 0x88, 0xb2, 0x42, 0x24, 0x91, 0xba, 0x42, 0x16, 0xc2, 0xf8, 0x42, + 0xe0, 0xf1, 0xfb, 0x42, 0x6f, 0x7c, 0x0b, 0x43, 0x18, 0xcb, 0xea, 0x42, 0xad, 0xf4, 0x14, 0x43, + 0x3a, 0xeb, 0x3e, 0x43, 0xf5, 0x76, 0x40, 0x43, 0x6c, 0xf9, 0x42, 0x43, 0x15, 0x36, 0x17, 0x43, + 0x92, 0x62, 0x02, 0x43, 0x47, 0xc6, 0xf7, 0x42, 0xc9, 0xcc, 0x03, 0x43, 0x7a, 0x56, 0xa8, 0x42, + 0x9e, 0x52, 0xd5, 0x42, 0x75, 0x8a, 0x09, 0x43, 0x75, 0x17, 0xfc, 0x42, 0x57, 0x17, 0xfe, 0x42, + 0x98, 0x84, 0x05, 0x43, 0xf0, 0x43, 0x19, 0x43, 0xe4, 0xc1, 0x27, 0x43, 0x40, 0xd8, 0x11, 0x43, + 0x47, 0x72, 0x18, 0x43, 0x86, 0xcb, 0xea, 0x42, 0x55, 0x31, 0x05, 0x43, 0xac, 0xf4, 0xfa, 0x42, + 0xa0, 0x09, 0x06, 0x43, 0x6d, 0x81, 0xc6, 0x42, 0x98, 0x56, 0xca, 0x42, 0xdb, 0x4b, 0x10, 0x43, + 0x0e, 0xa3, 0xf4, 0x42, 0x1c, 0x0d, 0x00, 0x43, 0x68, 0xb6, 0x05, 0x43, 0x71, 0xc2, 0x08, 0x43, + 0x09, 0xf1, 0x2b, 0x43, 0x0d, 0x1f, 0x10, 0x43, 0x46, 0x21, 0x0a, 0x43, 0x08, 0x5c, 0xea, 0x42, + 0xe3, 0x2b, 0xf8, 0x42, 0x3c, 0x26, 0x04, 0x43, 0xd4, 0x43, 0x04, 0x43, 0xba, 0x6a, 0xce, 0x42, + 0x64, 0xd2, 0xc2, 0x42, 0x96, 0xde, 0x14, 0x43, 0x81, 0xee, 0x01, 0x43, 0x48, 0xe2, 0xf2, 0x42, + 0xd6, 0x50, 0x12, 0x43, 0xc1, 0x08, 0x0a, 0x43, 0xc1, 0x63, 0x1e, 0x43, 0x98, 0xe2, 0x06, 0x43, + 0x03, 0x86, 0xee, 0x42, 0xf6, 0x4e, 0xff, 0x42, 0x84, 0x5e, 0xf7, 0x42, 0xc6, 0x54, 0xfe, 0x42, + 0x16, 0xde, 0x19, 0x43, 0x00, 0x73, 0xc5, 0x42, 0x58, 0xab, 0xb0, 0x42, 0x19, 0x32, 0x20, 0x43, + 0x64, 0xa9, 0x1c, 0x43, 0xd8, 0xcb, 0x1e, 0x43, 0x58, 0x6e, 0x1c, 0x43, 0x1e, 0x82, 0x21, 0x43, + 0xdf, 0x4e, 0x1e, 0x43, 0xea, 0x0d, 0x1e, 0x43, 0x48, 0x71, 0x13, 0x43, 0x02, 0xb8, 0xfb, 0x42, + 0xa8, 0xaa, 0xfd, 0x42, 0x25, 0x6d, 0x1a, 0x43, 0xc0, 0xb9, 0x28, 0x43, 0x27, 0xd9, 0xc6, 0x42, + 0xca, 0x69, 0xb3, 0x42, 0x1a, 0xa5, 0x19, 0x43, 0x64, 0xa7, 0x17, 0x43, 0xe0, 0xcf, 0x0c, 0x43, + 0x45, 0xb3, 0xfc, 0x42, 0xbe, 0x6c, 0x0d, 0x43, 0x24, 0xcf, 0x11, 0x43, 0xfe, 0x89, 0x1a, 0x43, + 0xf6, 0x27, 0x13, 0x43, 0xbb, 0xd7, 0x06, 0x43, 0x3c, 0xc5, 0x1c, 0x43, 0xa4, 0x8c, 0x1a, 0x43, + 0x60, 0x6c, 0x2e, 0x43, 0x5a, 0x77, 0xdd, 0x42, 0x8d, 0x46, 0x9e, 0x42, 0xe8, 0xd5, 0xfa, 0x42, + 0x81, 0x60, 0xe8, 0x42, 0x25, 0xa3, 0x04, 0x43, 0xbc, 0x0f, 0xf9, 0x42, 0x74, 0x4f, 0x04, 0x43, + 0xf1, 0x3c, 0x03, 0x43, 0x56, 0xe8, 0x16, 0x43, 0xcc, 0x1c, 0x10, 0x43, 0xb5, 0xb0, 0x1c, 0x43, + 0x8e, 0x8e, 0x19, 0x43, 0x28, 0xd0, 0x32, 0x43, 0x30, 0x71, 0x19, 0x43, 0xb7, 0xf4, 0xbe, 0x42, + 0x67, 0x0f, 0x99, 0x42, 0x23, 0x3b, 0xeb, 0x42, 0xd8, 0x80, 0xec, 0x42, 0x85, 0xb6, 0xdf, 0x42, + 0x4b, 0x7d, 0xf9, 0x42, 0x21, 0x00, 0xde, 0x42, 0xe4, 0x7f, 0xfb, 0x42, 0x01, 0xc9, 0x17, 0x43, + 0x5c, 0x6f, 0x1d, 0x43, 0xfc, 0x28, 0x32, 0x43, 0x47, 0xc3, 0x1d, 0x43, 0xc4, 0xdb, 0x0f, 0x43, + 0x16, 0x01, 0x06, 0x43, 0xfa, 0x3f, 0xa3, 0x42, 0xe2, 0x2d, 0x6d, 0x42, 0x83, 0x79, 0x94, 0x42, + 0xc2, 0x7f, 0x96, 0x42, 0xf1, 0x10, 0xa1, 0x42, 0x9b, 0xea, 0xa0, 0x42, 0xb4, 0x79, 0x97, 0x42, + 0x2c, 0xf8, 0xa1, 0x42, 0xac, 0x97, 0xd0, 0x42, 0x2e, 0xba, 0xdb, 0x42, 0xb6, 0x0b, 0xfc, 0x42, + 0xd6, 0x52, 0xd2, 0x42, 0x0c, 0xfd, 0xb2, 0x42, 0x6c, 0xa5, 0x83, 0x42, 0x65, 0x4b, 0x69, 0x42, + 0xe1, 0x3f, 0x7a, 0x42, 0x59, 0x6c, 0xbf, 0x42, 0x1c, 0xd6, 0x9c, 0x42, 0x13, 0x33, 0xb5, 0x42, + 0xbc, 0x23, 0xe1, 0x42, 0x31, 0x9f, 0xbf, 0x42, 0x7a, 0x37, 0x03, 0x43, 0xd6, 0xb9, 0xd1, 0x42, + 0xfb, 0x0f, 0xed, 0x42, 0x43, 0x14, 0xc0, 0x42, 0x8d, 0xb0, 0xde, 0x42, 0xdf, 0x7f, 0xc9, 0x42, + 0x6f, 0x4e, 0xf5, 0x42, 0x10, 0xb4, 0x68, 0x42, 0xb5, 0x8f, 0xe9, 0x42, 0x0f, 0x35, 0xf9, 0x42, + 0xf0, 0xd9, 0xbc, 0x42, 0xd3, 0x00, 0x03, 0x43, 0xf8, 0x67, 0x0a, 0x43, 0x2e, 0xa5, 0x07, 0x43, + 0x20, 0x2c, 0x2c, 0x43, 0x9c, 0x88, 0x20, 0x43, 0xf2, 0xfb, 0x27, 0x43, 0x9c, 0x95, 0x0a, 0x43, + 0xaa, 0xbb, 0x1f, 0x43, 0x5a, 0xe4, 0x17, 0x43, 0x9a, 0x18, 0x13, 0x43, 0x29, 0xd3, 0xb6, 0x42, + 0xb8, 0xed, 0xbe, 0x42, 0xb0, 0x31, 0xff, 0x42, 0xcb, 0x76, 0xf5, 0x42, 0x82, 0x45, 0x15, 0x43, + 0x6a, 0xd2, 0x18, 0x43, 0x6a, 0xe0, 0x14, 0x43, 0xb6, 0xe4, 0x3a, 0x43, 0x3a, 0x8b, 0x28, 0x43, + 0x5c, 0x85, 0x33, 0x43, 0x6c, 0x5d, 0x2a, 0x43, 0x6c, 0x7a, 0x1e, 0x43, 0x7a, 0x63, 0x22, 0x43, + 0x10, 0x9d, 0x22, 0x43, 0x1b, 0x21, 0xe5, 0x42, 0xe8, 0xfd, 0xde, 0x42, 0xb5, 0xec, 0xfb, 0x42, + 0x31, 0x8a, 0xdc, 0x42, 0xe4, 0x1a, 0x05, 0x43, 0xbe, 0x56, 0x01, 0x43, 0xbe, 0x10, 0x13, 0x43, + 0x14, 0xef, 0x31, 0x43, 0x48, 0xf0, 0x26, 0x43, 0xac, 0x62, 0x43, 0x43, 0xd2, 0x8f, 0x23, 0x43, + 0x8a, 0x5e, 0x1a, 0x43, 0xa0, 0x5d, 0x1d, 0x43, 0xa0, 0x9b, 0x0f, 0x43, 0x20, 0x4a, 0xd9, 0x42, + 0x19, 0x1c, 0xbb, 0x42, 0x02, 0xc3, 0x05, 0x43, 0x96, 0xe1, 0x12, 0x43, 0x4a, 0x5e, 0x06, 0x43, + 0x8e, 0x0b, 0x17, 0x43, 0x4c, 0xb0, 0x27, 0x43, 0xd0, 0x6e, 0x3f, 0x43, 0xb0, 0x07, 0x3c, 0x43, + 0x36, 0xfe, 0x45, 0x43, 0x5a, 0x42, 0x2e, 0x43, 0xea, 0x02, 0x25, 0x43, 0xaa, 0x46, 0x10, 0x43, + 0x52, 0xa2, 0x15, 0x43, 0x2e, 0xd2, 0xab, 0x42, 0xed, 0xa2, 0xcd, 0x42, 0x58, 0x5d, 0x14, 0x43, + 0xa2, 0x6c, 0x07, 0x43, 0x68, 0xfd, 0x18, 0x43, 0x42, 0x0b, 0x15, 0x43, 0xc0, 0x6f, 0x26, 0x43, + 0x94, 0xb5, 0x4a, 0x43, 0x4e, 0xd8, 0x4f, 0x43, 0xc8, 0x9b, 0x3c, 0x43, 0x96, 0x73, 0x2a, 0x43, + 0xe4, 0xab, 0x0c, 0x43, 0x3b, 0x9e, 0xf5, 0x42, 0xb0, 0x32, 0x0c, 0x43, 0x2d, 0x40, 0xcf, 0x42, + 0xdf, 0x27, 0xd2, 0x42, 0x2e, 0x88, 0x1c, 0x43, 0xb0, 0xeb, 0x12, 0x43, 0x32, 0xa2, 0x0d, 0x43, + 0x0a, 0xdf, 0x02, 0x43, 0x6e, 0x9c, 0x2c, 0x43, 0x84, 0xf5, 0x40, 0x43, 0xf0, 0x02, 0x30, 0x43, + 0x10, 0x90, 0x28, 0x43, 0xe0, 0xc6, 0x03, 0x43, 0x9a, 0x4a, 0xfd, 0x42, 0x57, 0x6b, 0x0e, 0x43, + 0x4a, 0xb9, 0x14, 0x43, 0x8a, 0x3b, 0xcc, 0x42, 0xc1, 0x8e, 0xc6, 0x42, 0x20, 0xa5, 0x23, 0x43, + 0xf8, 0x72, 0x11, 0x43, 0x2a, 0x55, 0x0a, 0x43, 0xda, 0xfa, 0x1a, 0x43, 0xf8, 0xfa, 0x1f, 0x43, + 0x98, 0x66, 0x2c, 0x43, 0x94, 0xf9, 0x14, 0x43, 0xde, 0x7e, 0x12, 0x43, 0x2c, 0x09, 0x00, 0x43, + 0x9d, 0x8b, 0xfc, 0x42, 0xa8, 0x33, 0x21, 0x43, 0xbc, 0x1e, 0x18, 0x43, 0x39, 0xe4, 0xe2, 0x42, + 0xf1, 0xa2, 0xdb, 0x42, 0xb6, 0x59, 0x25, 0x43, 0xce, 0x1a, 0x19, 0x43, 0x98, 0xa5, 0x0d, 0x43, + 0x46, 0x00, 0x15, 0x43, 0xfe, 0x60, 0x29, 0x43, 0xca, 0xe4, 0x20, 0x43, 0x9a, 0x55, 0x1f, 0x43, + 0xc0, 0x08, 0x17, 0x43, 0xfc, 0xdf, 0x0e, 0x43, 0x1b, 0x68, 0x05, 0x43, 0xb2, 0xa4, 0x05, 0x43, + 0xa8, 0x1a, 0x17, 0x43, 0x7b, 0x8d, 0xdb, 0x42, 0xff, 0xd6, 0xe0, 0x42, 0xde, 0x18, 0x1b, 0x43, + 0xae, 0xa5, 0x24, 0x43, 0x84, 0x65, 0x2b, 0x43, 0x9c, 0xa0, 0x2b, 0x43, 0x8c, 0x2f, 0x34, 0x43, + 0x96, 0xe9, 0x24, 0x43, 0x14, 0xbb, 0x3a, 0x43, 0x16, 0x17, 0x1a, 0x43, 0x10, 0xea, 0x06, 0x43, + 0x48, 0xe0, 0x0c, 0x43, 0xe2, 0xd6, 0x1d, 0x43, 0xc4, 0x66, 0x3a, 0x43, 0x37, 0xe4, 0xe4, 0x42, + 0x6a, 0xda, 0xc7, 0x42, 0x02, 0x0e, 0x27, 0x43, 0x40, 0x04, 0x18, 0x43, 0xb8, 0x61, 0x29, 0x43, + 0x9c, 0x9c, 0x0b, 0x43, 0x98, 0xb9, 0x12, 0x43, 0x76, 0x90, 0x22, 0x43, 0xe6, 0x16, 0x27, 0x43, + 0xaa, 0x13, 0x1c, 0x43, 0xf0, 0x33, 0x23, 0x43, 0xd0, 0x45, 0x31, 0x43, 0x18, 0xe3, 0x38, 0x43, + 0x20, 0x7b, 0x3f, 0x43, 0xe9, 0xb7, 0xe6, 0x42, 0x97, 0x1c, 0xc0, 0x42, 0x7f, 0x5b, 0x11, 0x43, + 0x24, 0x17, 0xff, 0x42, 0xf4, 0x04, 0x1b, 0x43, 0xfa, 0xc2, 0x0b, 0x43, 0x02, 0xf7, 0x0a, 0x43, + 0xb8, 0x9a, 0x17, 0x43, 0x8e, 0x15, 0x28, 0x43, 0xd0, 0x45, 0x2e, 0x43, 0xac, 0x1d, 0x2a, 0x43, + 0x80, 0x82, 0x2d, 0x43, 0x0e, 0x65, 0x42, 0x43, 0xbe, 0x63, 0x1c, 0x43, 0x78, 0x4c, 0xdd, 0x42, + 0xea, 0x8f, 0xa9, 0x42, 0xfd, 0x2b, 0xfb, 0x42, 0x73, 0x23, 0xf5, 0x42, 0xc0, 0xbd, 0x06, 0x43, + 0x30, 0x12, 0xfe, 0x42, 0x04, 0x8c, 0x09, 0x43, 0x1a, 0x72, 0x09, 0x43, 0x30, 0x6d, 0x26, 0x43, + 0xec, 0x79, 0x33, 0x43, 0x1c, 0x9e, 0x4b, 0x43, 0xac, 0xcf, 0x25, 0x43, 0xa4, 0x4b, 0x1a, 0x43, + 0xf0, 0x0d, 0x03, 0x43, 0xd1, 0x08, 0xbe, 0x42, 0x05, 0x5e, 0x85, 0x42, 0x7b, 0xe3, 0xb3, 0x42, + 0x95, 0xdc, 0xb0, 0x42, 0x03, 0x35, 0xbb, 0x42, 0x8e, 0x2b, 0xcc, 0x42, 0x0a, 0xdc, 0xd2, 0x42, + 0x3b, 0xd8, 0xc2, 0x42, 0x62, 0xef, 0xf1, 0x42, 0x9f, 0x54, 0xea, 0x42, 0x58, 0x1e, 0x0c, 0x43, + 0xba, 0x43, 0xd6, 0x42, 0x9e, 0xa3, 0xd4, 0x42, 0x8d, 0xb0, 0xa8, 0x42, 0x6b, 0xd7, 0x84, 0x42, + 0xde, 0xe2, 0x4b, 0x42, 0x1e, 0x3e, 0x99, 0x42, 0xa7, 0x7e, 0x93, 0x42, 0x28, 0x5f, 0xd2, 0x42, + 0x98, 0x53, 0xdf, 0x42, 0x52, 0x91, 0xd4, 0x42, 0xb6, 0x76, 0xd9, 0x42, 0x82, 0x53, 0xe4, 0x42, + 0x5a, 0xf1, 0xca, 0x42, 0x6a, 0x8d, 0xa7, 0x42, 0x86, 0x4d, 0xc1, 0x42, 0x50, 0x34, 0xd2, 0x42, + 0xe2, 0x53, 0xaa, 0x42, 0x3e, 0xa7, 0x6d, 0x42, 0x36, 0xc4, 0xcd, 0x42, 0x58, 0x28, 0xce, 0x42, + 0x12, 0xb9, 0xca, 0x42, 0xdf, 0xb4, 0x00, 0x43, 0x57, 0xa2, 0x12, 0x43, 0x4f, 0xa9, 0x13, 0x43, + 0x1a, 0x74, 0x25, 0x43, 0xe5, 0xa9, 0x3d, 0x43, 0x66, 0x7b, 0x44, 0x43, 0x1e, 0xbd, 0x07, 0x43, + 0x97, 0xfc, 0x20, 0x43, 0x27, 0xd6, 0x24, 0x43, 0xbc, 0xc5, 0x23, 0x43, 0x82, 0x03, 0xc2, 0x42, + 0x28, 0x4e, 0xe9, 0x42, 0xf4, 0xab, 0xea, 0x42, 0x58, 0xb6, 0xbf, 0x42, 0xfc, 0xa4, 0xf5, 0x42, + 0x26, 0x8a, 0x25, 0x43, 0x0d, 0xd5, 0x0e, 0x43, 0xc0, 0xd6, 0x3b, 0x43, 0xed, 0x5a, 0x39, 0x43, + 0x86, 0x54, 0x39, 0x43, 0x82, 0x6a, 0x12, 0x43, 0x2a, 0xb5, 0x22, 0x43, 0x4a, 0x7e, 0x23, 0x43, + 0xc0, 0x1b, 0x29, 0x43, 0xb8, 0x23, 0xe0, 0x42, 0x7a, 0x0e, 0xcc, 0x42, 0x36, 0xcf, 0x13, 0x43, + 0xf0, 0x80, 0x04, 0x43, 0x58, 0xd9, 0xfc, 0x42, 0xf6, 0xfe, 0x0e, 0x43, 0x23, 0x9f, 0x1d, 0x43, + 0x55, 0x6d, 0x27, 0x43, 0xcc, 0xa1, 0x46, 0x43, 0x60, 0x15, 0x3a, 0x43, 0x3c, 0x48, 0x28, 0x43, + 0xd2, 0xc9, 0x23, 0x43, 0xce, 0x45, 0x2f, 0x43, 0xe2, 0x4c, 0x26, 0x43, 0x2a, 0xce, 0xd9, 0x42, + 0x58, 0x8b, 0xe3, 0x42, 0x58, 0x5f, 0xfe, 0x42, 0x10, 0x99, 0x0a, 0x43, 0xf7, 0x2a, 0x08, 0x43, + 0xd1, 0x73, 0x1e, 0x43, 0x60, 0xf6, 0x33, 0x43, 0xf1, 0x15, 0x30, 0x43, 0x43, 0x73, 0x47, 0x43, + 0x1b, 0x43, 0x38, 0x43, 0x1f, 0x86, 0x20, 0x43, 0xaf, 0x93, 0x15, 0x43, 0x58, 0xc0, 0x22, 0x43, + 0x06, 0x8b, 0x08, 0x43, 0xda, 0x45, 0xc3, 0x42, 0x72, 0x8c, 0xf3, 0x42, 0x3f, 0x76, 0x2e, 0x43, + 0x2f, 0x7f, 0x10, 0x43, 0x7d, 0xbf, 0x19, 0x43, 0x7c, 0x17, 0x17, 0x43, 0xb4, 0x29, 0x47, 0x43, + 0xe0, 0x5e, 0x55, 0x43, 0xd6, 0xa5, 0x4f, 0x43, 0xce, 0x52, 0x58, 0x43, 0x11, 0xb4, 0x1d, 0x43, + 0x88, 0x41, 0x12, 0x43, 0x9e, 0x67, 0x0b, 0x43, 0xd5, 0xee, 0x11, 0x43, 0x78, 0xea, 0xd2, 0x42, + 0xac, 0x5d, 0xc6, 0x42, 0xc6, 0x1e, 0x24, 0x43, 0x1e, 0xad, 0x17, 0x43, 0x46, 0x47, 0x06, 0x43, + 0x09, 0x0a, 0x18, 0x43, 0x43, 0x85, 0x3a, 0x43, 0x7c, 0xfe, 0x3f, 0x43, 0xc6, 0x58, 0x36, 0x43, + 0x70, 0x11, 0x30, 0x43, 0x00, 0x37, 0xf7, 0x42, 0xec, 0x34, 0x06, 0x43, 0x81, 0xc5, 0x0a, 0x43, + 0x56, 0x86, 0x1f, 0x43, 0x02, 0xf3, 0xee, 0x42, 0x1a, 0xf9, 0xee, 0x42, 0xd0, 0x32, 0x1c, 0x43, + 0xd2, 0xa8, 0x02, 0x43, 0xb7, 0x09, 0x09, 0x43, 0x54, 0x5e, 0x1f, 0x43, 0x02, 0x66, 0x2b, 0x43, + 0x5e, 0xb6, 0x42, 0x43, 0x76, 0x34, 0x23, 0x43, 0x2c, 0x69, 0x1b, 0x43, 0xae, 0xce, 0x0b, 0x43, + 0x36, 0xfd, 0xe9, 0x42, 0x9b, 0x59, 0x07, 0x43, 0x7e, 0x19, 0x1c, 0x43, 0x08, 0xea, 0xfc, 0x42, + 0x5e, 0x3f, 0xdd, 0x42, 0x1d, 0x9b, 0x22, 0x43, 0xe8, 0xfc, 0x20, 0x43, 0xeb, 0xaf, 0x19, 0x43, + 0xfb, 0x23, 0x28, 0x43, 0x79, 0x8b, 0x2f, 0x43, 0x5a, 0xd6, 0x22, 0x43, 0xb8, 0x21, 0x29, 0x43, + 0x13, 0x94, 0x15, 0x43, 0x15, 0x5c, 0x04, 0x43, 0x97, 0x2e, 0x11, 0x43, 0x2e, 0xe1, 0x11, 0x43, + 0x72, 0x05, 0x2c, 0x43, 0x12, 0xde, 0xf4, 0x42, 0xca, 0x5a, 0xcf, 0x42, 0x94, 0x19, 0x3b, 0x43, + 0x67, 0x2e, 0x1d, 0x43, 0xa1, 0x30, 0x1b, 0x43, 0xb7, 0xc9, 0x22, 0x43, 0xca, 0x8b, 0x35, 0x43, + 0x3d, 0x4f, 0x2b, 0x43, 0x72, 0x5f, 0x34, 0x43, 0x72, 0x71, 0x2d, 0x43, 0x05, 0xec, 0x18, 0x43, + 0x1c, 0x64, 0x1d, 0x43, 0x17, 0x42, 0x17, 0x43, 0x72, 0x3f, 0x2b, 0x43, 0xc6, 0x09, 0x0d, 0x43, + 0x78, 0xf5, 0xe1, 0x42, 0xe0, 0xae, 0x20, 0x43, 0x12, 0x35, 0x2a, 0x43, 0xa0, 0x21, 0x41, 0x43, + 0x0b, 0x8a, 0x1c, 0x43, 0xdf, 0xd8, 0x13, 0x43, 0x2a, 0x9d, 0x20, 0x43, 0x04, 0xa8, 0x2e, 0x43, + 0xe1, 0x5f, 0x28, 0x43, 0x4a, 0xf3, 0x16, 0x43, 0x31, 0x5d, 0x2c, 0x43, 0xe6, 0x4d, 0x3b, 0x43, + 0x06, 0x91, 0x2c, 0x43, 0x04, 0xd7, 0xfe, 0x42, 0xba, 0xf8, 0xa7, 0x42, 0xe4, 0x72, 0x0d, 0x43, + 0x21, 0x8d, 0x0f, 0x43, 0xa4, 0x09, 0x21, 0x43, 0x9f, 0x6e, 0x0f, 0x43, 0xbc, 0xac, 0x0e, 0x43, + 0xbe, 0x5d, 0x1b, 0x43, 0xf5, 0xc6, 0x1e, 0x43, 0xca, 0x01, 0x2e, 0x43, 0xe7, 0x60, 0x2c, 0x43, + 0xd2, 0x74, 0x36, 0x43, 0x74, 0xca, 0x41, 0x43, 0x4e, 0x0a, 0x2c, 0x43, 0x28, 0x39, 0xb1, 0x42, + 0x46, 0x1f, 0xaa, 0x42, 0x1a, 0xc1, 0xed, 0x42, 0x4a, 0x9c, 0x00, 0x43, 0xb0, 0x02, 0x0e, 0x43, + 0x08, 0x4e, 0xf3, 0x42, 0x42, 0xb7, 0xfc, 0x42, 0xc7, 0x6f, 0x1c, 0x43, 0x5d, 0xda, 0x31, 0x43, + 0xc6, 0xe6, 0x27, 0x43, 0x0a, 0x88, 0x41, 0x43, 0x52, 0x92, 0x37, 0x43, 0x74, 0xf5, 0x30, 0x43, + 0x52, 0xba, 0x0f, 0x43, 0xcc, 0x93, 0xd8, 0x42, 0x4c, 0xd6, 0x94, 0x42, 0xc4, 0x73, 0x89, 0x42, + 0xe2, 0x7c, 0xad, 0x42, 0xf8, 0x99, 0xc9, 0x42, 0x96, 0xe8, 0xdc, 0x42, 0xc6, 0xaf, 0xb9, 0x42, + 0xf6, 0x6f, 0x95, 0x42, 0x4e, 0xda, 0xf0, 0x42, 0x1b, 0x91, 0x0b, 0x43, 0x79, 0x6b, 0x0c, 0x43, + 0x5c, 0xc4, 0xea, 0x42, 0x4c, 0x44, 0xbe, 0x42, 0x48, 0x19, 0xa9, 0x42, 0xdd, 0x92, 0x51, 0x42, + 0xb2, 0x13, 0x6d, 0x42, 0xd6, 0x6a, 0x98, 0x42, 0x65, 0x83, 0x8e, 0x42, 0x31, 0x08, 0x93, 0x42, + 0x7c, 0x98, 0xbc, 0x42, 0x88, 0x63, 0xbc, 0x42, 0x65, 0x26, 0xd5, 0x42, 0x90, 0xb9, 0xcd, 0x42, + 0x08, 0x86, 0xaf, 0x42, 0x05, 0x15, 0x93, 0x42, 0x86, 0xc6, 0xc7, 0x42, 0x96, 0x1b, 0xac, 0x42, + 0x8c, 0xaa, 0xc5, 0x42, 0xa8, 0xb0, 0x5b, 0x42, 0xc7, 0x70, 0xac, 0x42, 0xac, 0x19, 0xef, 0x42, + 0xac, 0xd8, 0xd2, 0x42, 0x03, 0x6d, 0x07, 0x43, 0x1a, 0x11, 0x16, 0x43, 0xe2, 0x8b, 0x14, 0x43, + 0xa0, 0x84, 0x30, 0x43, 0xac, 0xec, 0x22, 0x43, 0xbf, 0x23, 0x27, 0x43, 0x40, 0xb5, 0xf4, 0x42, + 0x62, 0x2c, 0x15, 0x43, 0x26, 0x41, 0x17, 0x43, 0x2e, 0x1d, 0x1f, 0x43, 0x34, 0x7d, 0x9b, 0x42, + 0x5e, 0x56, 0xd9, 0x42, 0x1e, 0xca, 0xd7, 0x42, 0x9d, 0xab, 0xd7, 0x42, 0x19, 0xaa, 0x06, 0x43, + 0xf1, 0xca, 0x07, 0x43, 0xb1, 0x86, 0x11, 0x43, 0xd5, 0xf5, 0x35, 0x43, 0x90, 0xae, 0x30, 0x43, + 0x8c, 0x4a, 0x2a, 0x43, 0x50, 0xa3, 0x0f, 0x43, 0x7c, 0x6e, 0x17, 0x43, 0xd2, 0xfe, 0x24, 0x43, + 0x74, 0x80, 0x1d, 0x43, 0x74, 0x30, 0xd1, 0x42, 0xda, 0x22, 0xc9, 0x42, 0x58, 0x48, 0xfa, 0x42, + 0x4d, 0x77, 0xc6, 0x42, 0x64, 0xce, 0x0c, 0x43, 0xaf, 0x03, 0x17, 0x43, 0x5b, 0x88, 0x0b, 0x43, + 0xaf, 0x6d, 0x3c, 0x43, 0x55, 0xb1, 0x27, 0x43, 0x62, 0x4f, 0x31, 0x43, 0xdc, 0x4e, 0x22, 0x43, + 0x1a, 0x95, 0x1a, 0x43, 0x1c, 0x9e, 0x23, 0x43, 0xda, 0x91, 0x12, 0x43, 0x0a, 0x8e, 0xdc, 0x42, + 0x42, 0xfc, 0xb5, 0x42, 0xf9, 0x91, 0xf7, 0x42, 0xf9, 0x19, 0xf7, 0x42, 0xf3, 0x07, 0x09, 0x43, + 0x09, 0x88, 0x0f, 0x43, 0xea, 0xa2, 0x22, 0x43, 0xb8, 0x65, 0x1f, 0x43, 0xdb, 0xbb, 0x3f, 0x43, + 0xf3, 0x0f, 0x2d, 0x43, 0xf2, 0x99, 0x1c, 0x43, 0xd0, 0xc8, 0x1c, 0x43, 0x8b, 0xd3, 0x04, 0x43, + 0x38, 0x8b, 0x07, 0x43, 0x9e, 0x73, 0x9a, 0x42, 0x97, 0xe3, 0xd0, 0x42, 0xf8, 0xe2, 0x0e, 0x43, + 0x33, 0xeb, 0x04, 0x43, 0x61, 0x16, 0x0b, 0x43, 0x86, 0x59, 0x05, 0x43, 0x85, 0xd0, 0x1b, 0x43, + 0x9b, 0x56, 0x3f, 0x43, 0x34, 0x66, 0x43, 0x43, 0xaa, 0xf8, 0x49, 0x43, 0xe9, 0xa0, 0x1c, 0x43, + 0xed, 0xa6, 0x02, 0x43, 0x38, 0x92, 0xfd, 0x42, 0xc2, 0x98, 0x13, 0x43, 0x55, 0x05, 0xc7, 0x42, + 0x10, 0x44, 0xe0, 0x42, 0x0c, 0xa2, 0x1f, 0x43, 0x3e, 0x2d, 0x07, 0x43, 0x24, 0xae, 0x10, 0x43, + 0x22, 0x02, 0x1b, 0x43, 0x01, 0xaf, 0x24, 0x43, 0x50, 0x77, 0x4c, 0x43, 0x3f, 0x08, 0x33, 0x43, + 0x83, 0xd2, 0x11, 0x43, 0x5e, 0xc0, 0x01, 0x43, 0xfa, 0x51, 0xe8, 0x42, 0x28, 0xcc, 0x01, 0x43, + 0xbc, 0x87, 0x17, 0x43, 0x98, 0x72, 0xb9, 0x42, 0x30, 0xda, 0xd7, 0x42, 0x50, 0x31, 0x16, 0x43, + 0x8e, 0xb6, 0x09, 0x43, 0xc9, 0xba, 0x12, 0x43, 0x37, 0x7b, 0x1a, 0x43, 0x07, 0xe9, 0x24, 0x43, + 0xae, 0x60, 0x1f, 0x43, 0x54, 0xd8, 0x1f, 0x43, 0x9c, 0xf8, 0x0b, 0x43, 0xd1, 0xc1, 0xe7, 0x42, + 0xce, 0xa8, 0xe8, 0x42, 0x3c, 0x87, 0x08, 0x43, 0x24, 0xce, 0x17, 0x43, 0xc9, 0xfb, 0xdc, 0x42, + 0x48, 0xb2, 0xdb, 0x42, 0xad, 0x32, 0x1d, 0x43, 0x66, 0x5c, 0x11, 0x43, 0xfd, 0x61, 0x02, 0x43, + 0xac, 0x2b, 0x15, 0x43, 0x19, 0x8a, 0x1d, 0x43, 0x97, 0x4e, 0x23, 0x43, 0xb0, 0x0d, 0x20, 0x43, + 0xa4, 0x22, 0x07, 0x43, 0x56, 0x9c, 0xfe, 0x42, 0xeb, 0x67, 0x03, 0x43, 0x24, 0xa6, 0x0a, 0x43, + 0x18, 0x8c, 0x1f, 0x43, 0x6c, 0x6b, 0xcd, 0x42, 0xd4, 0x5d, 0xd1, 0x42, 0x38, 0x8a, 0x2e, 0x43, + 0xa4, 0xf0, 0x25, 0x43, 0xa8, 0x11, 0x21, 0x43, 0x23, 0x07, 0x29, 0x43, 0x42, 0xd7, 0x2f, 0x43, + 0xd1, 0x58, 0x20, 0x43, 0xb9, 0x00, 0x26, 0x43, 0x1d, 0xe4, 0x18, 0x43, 0x79, 0x6a, 0x0b, 0x43, + 0xf6, 0x6e, 0x0c, 0x43, 0x65, 0x9a, 0x12, 0x43, 0x3e, 0xe5, 0x2c, 0x43, 0x42, 0x17, 0xf9, 0x42, + 0x31, 0xc0, 0xd4, 0x42, 0x86, 0xeb, 0x27, 0x43, 0x60, 0x37, 0x28, 0x43, 0xfc, 0xae, 0x28, 0x43, + 0x66, 0xbb, 0x07, 0x43, 0x76, 0x2f, 0x1f, 0x43, 0xcd, 0x3b, 0x11, 0x43, 0xfe, 0xaa, 0x2f, 0x43, + 0xad, 0xf9, 0x08, 0x43, 0x1f, 0x6c, 0x13, 0x43, 0xd1, 0x14, 0x25, 0x43, 0x0e, 0x63, 0x33, 0x43, + 0x06, 0xa7, 0x33, 0x43, 0xa2, 0x74, 0xf7, 0x42, 0x80, 0xd2, 0xaf, 0x42, 0xa2, 0x42, 0x0e, 0x43, + 0xf1, 0x57, 0x0c, 0x43, 0x70, 0x43, 0x0f, 0x43, 0x7f, 0xe2, 0xef, 0x42, 0xcc, 0x11, 0x05, 0x43, + 0x67, 0xaa, 0x15, 0x43, 0x20, 0xfd, 0x1d, 0x43, 0x89, 0xfd, 0x25, 0x43, 0x14, 0xa5, 0x22, 0x43, + 0xea, 0x28, 0x30, 0x43, 0x78, 0xec, 0x40, 0x43, 0x34, 0xc3, 0x21, 0x43, 0x88, 0xd9, 0xcd, 0x42, + 0xda, 0xb0, 0xa9, 0x42, 0x16, 0x3b, 0xe1, 0x42, 0xf8, 0x5c, 0x05, 0x43, 0x2f, 0x39, 0xf7, 0x42, + 0xae, 0x31, 0xf0, 0x42, 0x9a, 0xbd, 0xf2, 0x42, 0x04, 0xb2, 0x0a, 0x43, 0x69, 0xb0, 0x1e, 0x43, + 0xdf, 0xc4, 0x30, 0x43, 0x8c, 0x7f, 0x35, 0x43, 0x79, 0x5a, 0x2c, 0x43, 0x40, 0x43, 0x1b, 0x43, + 0x12, 0xf9, 0xed, 0x42, 0xcb, 0xde, 0xa6, 0x42, 0xa4, 0x2c, 0x82, 0x42, 0xfc, 0xfe, 0x99, 0x42, + 0xd0, 0x83, 0xaa, 0x42, 0xf4, 0xc4, 0xb7, 0x42, 0x8f, 0xb3, 0xb1, 0x42, 0xd6, 0x0c, 0xb9, 0x42, + 0x6a, 0x1a, 0xc4, 0x42, 0x56, 0x75, 0xe0, 0x42, 0x94, 0x2b, 0xf7, 0x42, 0xe0, 0xeb, 0x08, 0x43, + 0xf3, 0xf5, 0xd0, 0x42, 0xc6, 0x78, 0xc6, 0x42, 0x2c, 0xf4, 0xa0, 0x42, 0x7a, 0x33, 0x5d, 0x42, + 0xee, 0xf4, 0x13, 0x42, 0x30, 0xb3, 0x66, 0x42, 0x3e, 0x45, 0x61, 0x42, 0xf4, 0x84, 0x7f, 0x42, + 0xe1, 0x9a, 0x8c, 0x42, 0x8d, 0x34, 0x99, 0x42, 0x5e, 0x82, 0xa5, 0x42, 0x3c, 0x22, 0xbf, 0x42, + 0x1b, 0xaf, 0x9f, 0x42, 0xd2, 0xc8, 0x9b, 0x42, 0x63, 0x54, 0x90, 0x42, 0x52, 0x0c, 0x9b, 0x42, + 0x56, 0x22, 0xb4, 0x42, 0x66, 0x13, 0x1b, 0x42, 0xf8, 0xde, 0x9c, 0x42, 0x68, 0x3a, 0xc9, 0x42, + 0xba, 0x72, 0xb4, 0x42, 0xb5, 0x35, 0xb9, 0x42, 0xd5, 0x9a, 0xe9, 0x42, 0x19, 0xe7, 0xd2, 0x42, + 0x11, 0xd2, 0x11, 0x43, 0x29, 0xd3, 0xef, 0x42, 0xb4, 0x54, 0x10, 0x43, 0xdc, 0x52, 0xc2, 0x42, + 0x76, 0xcd, 0xdc, 0x42, 0xcb, 0x23, 0x0e, 0x43, 0xc6, 0x9f, 0xfb, 0x42, 0x42, 0xce, 0x96, 0x42, + 0x8c, 0xaa, 0xa0, 0x42, 0x2a, 0x2b, 0xed, 0x42, 0xfb, 0x73, 0xdf, 0x42, 0x26, 0x9a, 0xde, 0x42, + 0x57, 0xee, 0x0e, 0x43, 0xcb, 0xf6, 0x0c, 0x43, 0xa1, 0x8e, 0x11, 0x43, 0xe6, 0x30, 0x0c, 0x43, + 0x6b, 0x76, 0x18, 0x43, 0x28, 0xb9, 0xfe, 0x42, 0x69, 0xb6, 0x13, 0x43, 0xa4, 0xa7, 0x10, 0x43, + 0xc3, 0x30, 0x10, 0x43, 0x89, 0xc7, 0xde, 0x42, 0x3a, 0x2d, 0xc4, 0x42, 0xef, 0x50, 0xce, 0x42, + 0x66, 0xc9, 0x9c, 0x42, 0xd5, 0x94, 0xe3, 0x42, 0x60, 0xd3, 0x08, 0x43, 0x59, 0x9c, 0xe8, 0x42, + 0x0f, 0x4a, 0x1c, 0x43, 0x68, 0x81, 0x25, 0x43, 0x72, 0x47, 0x2f, 0x43, 0x6d, 0x1b, 0x0a, 0x43, + 0xf5, 0x62, 0x09, 0x43, 0xb3, 0x11, 0x08, 0x43, 0x21, 0x7f, 0x02, 0x43, 0x86, 0xd0, 0x8b, 0x42, + 0x9c, 0xe1, 0x83, 0x42, 0x5c, 0x77, 0xc4, 0x42, 0xaa, 0xb4, 0xcd, 0x42, 0x12, 0xcf, 0xe0, 0x42, + 0x96, 0x16, 0xf9, 0x42, 0xbc, 0xe0, 0x07, 0x43, 0x3d, 0xb8, 0x19, 0x43, 0x5c, 0x3f, 0x35, 0x43, + 0x05, 0xab, 0x22, 0x43, 0x37, 0x42, 0x06, 0x43, 0x82, 0x68, 0x04, 0x43, 0xdd, 0x20, 0x01, 0x43, + 0xaa, 0x28, 0xd8, 0x42, 0xd1, 0x67, 0x94, 0x42, 0x84, 0xe7, 0xa9, 0x42, 0xde, 0x15, 0xdd, 0x42, + 0x21, 0x0f, 0xd0, 0x42, 0x2e, 0x8f, 0xc6, 0x42, 0x37, 0x33, 0xe6, 0x42, 0x46, 0x04, 0xf6, 0x42, + 0xac, 0x0e, 0x33, 0x43, 0xe5, 0x7a, 0x3d, 0x43, 0x5f, 0x95, 0x1d, 0x43, 0xa5, 0xb1, 0xf0, 0x42, + 0xd7, 0xc1, 0x05, 0x43, 0xd0, 0xc9, 0xe8, 0x42, 0xce, 0x14, 0xea, 0x42, 0xea, 0xe0, 0x8c, 0x42, + 0xe4, 0x08, 0xb9, 0x42, 0xa8, 0xf4, 0x07, 0x43, 0xbb, 0x58, 0xc8, 0x42, 0x7b, 0x74, 0xf0, 0x42, + 0xd7, 0x37, 0x04, 0x43, 0x76, 0xd3, 0x0b, 0x43, 0x37, 0x43, 0x21, 0x43, 0x96, 0x7e, 0x06, 0x43, + 0x46, 0xf6, 0xf5, 0x42, 0x5c, 0xca, 0xe0, 0x42, 0xce, 0xf2, 0xfa, 0x42, 0xa4, 0x95, 0x07, 0x43, + 0x5a, 0x7d, 0xfb, 0x42, 0x46, 0x4d, 0xa6, 0x42, 0x73, 0xbd, 0xd3, 0x42, 0x52, 0x21, 0x01, 0x43, + 0xf7, 0x35, 0xcc, 0x42, 0x18, 0xa8, 0xe8, 0x42, 0x39, 0x93, 0x07, 0x43, 0x83, 0x4c, 0x16, 0x43, + 0x01, 0xf1, 0x12, 0x43, 0x88, 0x2c, 0x15, 0x43, 0x5e, 0x23, 0xf2, 0x42, 0xa8, 0x52, 0xbf, 0x42, + 0x6b, 0xc7, 0xbf, 0x42, 0x2e, 0x86, 0xfb, 0x42, 0xf9, 0x63, 0x08, 0x43, 0xfd, 0xbc, 0xb8, 0x42, + 0x82, 0x25, 0xc1, 0x42, 0xaf, 0xd3, 0x0b, 0x43, 0x15, 0x3a, 0xe9, 0x42, 0x60, 0x46, 0xeb, 0x42, + 0xcb, 0xe0, 0xec, 0x42, 0x12, 0x9a, 0x0e, 0x43, 0x2f, 0xb5, 0x0d, 0x43, 0x1b, 0x7d, 0x12, 0x43, + 0xde, 0x97, 0xe3, 0x42, 0x79, 0xf5, 0xc7, 0x42, 0x79, 0xb0, 0xe4, 0x42, 0xa2, 0xd2, 0xcf, 0x42, + 0xfa, 0x3c, 0xf3, 0x42, 0xef, 0x01, 0x9e, 0x42, 0x0e, 0x25, 0xb0, 0x42, 0xd9, 0xbe, 0x05, 0x43, + 0x00, 0x72, 0x0f, 0x43, 0xf8, 0x72, 0x29, 0x43, 0xfe, 0x3c, 0x0e, 0x43, 0xd3, 0x8a, 0x08, 0x43, + 0x17, 0xd0, 0x08, 0x43, 0xc7, 0xe0, 0x15, 0x43, 0x74, 0xb8, 0x0a, 0x43, 0x90, 0xf5, 0xda, 0x42, + 0xfb, 0xd2, 0xf1, 0x42, 0x1d, 0x9a, 0x10, 0x43, 0xef, 0x9c, 0x1e, 0x43, 0x42, 0x6e, 0xbd, 0x42, + 0xb9, 0xa0, 0x85, 0x42, 0xdf, 0x9c, 0x10, 0x43, 0xad, 0x00, 0x0d, 0x43, 0xcd, 0x01, 0x12, 0x43, + 0xf0, 0x9e, 0xc2, 0x42, 0x34, 0x3f, 0x06, 0x43, 0x8f, 0x46, 0x0c, 0x43, 0xe7, 0x58, 0x07, 0x43, + 0x82, 0x24, 0x00, 0x43, 0xc0, 0xa3, 0x04, 0x43, 0xef, 0x84, 0x1a, 0x43, 0x94, 0xf3, 0x1e, 0x43, + 0x39, 0xc6, 0x16, 0x43, 0x0b, 0x1c, 0xe3, 0x42, 0x13, 0xc2, 0x9f, 0x42, 0x46, 0x36, 0xe7, 0x42, + 0xb2, 0xe7, 0xe3, 0x42, 0x49, 0xd1, 0xea, 0x42, 0x57, 0x47, 0xd8, 0x42, 0xde, 0xdc, 0xf3, 0x42, + 0xaa, 0x16, 0xf5, 0x42, 0x03, 0x47, 0x19, 0x43, 0xa9, 0xb3, 0x16, 0x43, 0x02, 0x3a, 0x1e, 0x43, + 0xa6, 0x2d, 0x1c, 0x43, 0x9b, 0xdf, 0x21, 0x43, 0x7e, 0xc3, 0x15, 0x43, 0x78, 0x93, 0xb7, 0x42, + 0xb0, 0xf2, 0x9b, 0x42, 0xad, 0xdd, 0xdc, 0x42, 0xe2, 0x68, 0xdd, 0x42, 0xc2, 0x61, 0xc7, 0x42, + 0x24, 0xb6, 0xc8, 0x42, 0x56, 0xf7, 0xc9, 0x42, 0x96, 0xc0, 0xd4, 0x42, 0x78, 0x58, 0x04, 0x43, + 0x33, 0x0e, 0x0f, 0x43, 0x81, 0x82, 0x21, 0x43, 0x1f, 0x59, 0x0c, 0x43, 0xf4, 0xdd, 0x01, 0x43, + 0x52, 0xe7, 0xee, 0x42, 0x04, 0xc8, 0x86, 0x42, 0xa1, 0x7e, 0x54, 0x42, 0x68, 0x63, 0x6f, 0x42, + 0x3c, 0xf8, 0x63, 0x42, 0xf8, 0xd5, 0x7b, 0x42, 0xf2, 0x8e, 0x84, 0x42, 0x4a, 0x7b, 0x96, 0x42, + 0x5d, 0x49, 0xac, 0x42, 0xb6, 0x7c, 0xc0, 0x42, 0xa9, 0x8f, 0xbe, 0x42, 0xae, 0x9e, 0xcf, 0x42, + 0x44, 0x57, 0xb2, 0x42, 0x39, 0xef, 0xaf, 0x42, 0xec, 0xa4, 0x4a, 0x42, 0x96, 0x71, 0x46, 0x42, + 0x38, 0xf8, 0x70, 0x42, 0xb1, 0x2c, 0x86, 0x42, 0x9a, 0xde, 0xa0, 0x42, 0x19, 0x05, 0xae, 0x42, + 0x70, 0x85, 0xc3, 0x42, 0x1a, 0xa9, 0xc7, 0x42, 0x8e, 0x52, 0xda, 0x42, 0x6d, 0x50, 0xda, 0x42, + 0x49, 0x6d, 0xd4, 0x42, 0xc0, 0x4f, 0xaa, 0x42, 0x99, 0x3e, 0xcd, 0x42, 0x23, 0x8b, 0xd6, 0x42, + 0x12, 0x8e, 0xbf, 0x42, 0x7c, 0x70, 0x6b, 0x42, 0x9f, 0xe3, 0xc5, 0x42, 0xdf, 0xdb, 0xf8, 0x42, + 0xcf, 0xce, 0xe3, 0x42, 0x1b, 0x12, 0xf3, 0x42, 0xad, 0xd0, 0x14, 0x43, 0x37, 0xea, 0x0c, 0x43, + 0x23, 0x92, 0x2a, 0x43, 0x5e, 0x19, 0x1d, 0x43, 0xdd, 0x1b, 0x2a, 0x43, 0xf6, 0x06, 0x0b, 0x43, + 0xa7, 0xfc, 0x26, 0x43, 0x55, 0xf6, 0x11, 0x43, 0x63, 0x49, 0x36, 0x43, 0xf6, 0xca, 0xc8, 0x42, + 0xeb, 0x08, 0xc8, 0x42, 0x1e, 0x9f, 0x03, 0x43, 0xf0, 0xbf, 0xd9, 0x42, 0x88, 0x0c, 0x0d, 0x43, + 0xac, 0x0d, 0x1f, 0x43, 0x6f, 0xa2, 0x1f, 0x43, 0xdb, 0xa2, 0x47, 0x43, 0x6f, 0x62, 0x37, 0x43, + 0x2c, 0x63, 0x2b, 0x43, 0x59, 0x79, 0x0b, 0x43, 0x17, 0xa5, 0x22, 0x43, 0x20, 0xc9, 0x24, 0x43, + 0xc5, 0x1b, 0x20, 0x43, 0x12, 0x48, 0xdd, 0x42, 0x24, 0x5d, 0xd0, 0x42, 0xec, 0x10, 0x04, 0x43, + 0xdb, 0xa9, 0xda, 0x42, 0x92, 0xd8, 0x06, 0x43, 0xc3, 0x22, 0x19, 0x43, 0xa7, 0xe5, 0x11, 0x43, + 0xdc, 0xd1, 0x2f, 0x43, 0x17, 0x6f, 0x51, 0x43, 0xe9, 0xa6, 0x4e, 0x43, 0x80, 0x3b, 0x1d, 0x43, + 0x13, 0xa0, 0x1f, 0x43, 0xf3, 0xb5, 0x1c, 0x43, 0xb6, 0x5a, 0x0f, 0x43, 0xbd, 0xbc, 0xb8, 0x42, + 0x3d, 0x79, 0xc9, 0x42, 0x56, 0xfd, 0x07, 0x43, 0x24, 0x9e, 0x02, 0x43, 0x64, 0xed, 0x12, 0x43, + 0xfa, 0xb7, 0x1d, 0x43, 0x2c, 0x40, 0x1a, 0x43, 0xa5, 0x37, 0x42, 0x43, 0x1e, 0xed, 0x3f, 0x43, + 0x3b, 0x4a, 0x45, 0x43, 0x4d, 0x09, 0x1f, 0x43, 0x73, 0x3d, 0x1c, 0x43, 0x8c, 0xaa, 0x14, 0x43, + 0x29, 0xe6, 0xf6, 0x42, 0x57, 0x51, 0xc9, 0x42, 0x4b, 0x59, 0xcd, 0x42, 0x41, 0x39, 0x1f, 0x43, + 0x75, 0x0b, 0x0b, 0x43, 0xd5, 0x1c, 0x17, 0x43, 0xad, 0x94, 0x11, 0x43, 0xb8, 0x07, 0x24, 0x43, + 0xe5, 0xe9, 0x49, 0x43, 0x3b, 0xdf, 0x5e, 0x43, 0x7b, 0x7f, 0x42, 0x43, 0xd8, 0x40, 0x1b, 0x43, + 0xea, 0x7a, 0x1d, 0x43, 0x93, 0xf5, 0x0a, 0x43, 0x41, 0x91, 0x15, 0x43, 0x35, 0xe8, 0xb2, 0x42, + 0x4f, 0x39, 0xe8, 0x42, 0xff, 0xcb, 0x1c, 0x43, 0xc9, 0x3d, 0x01, 0x43, 0xb1, 0x85, 0x10, 0x43, + 0xde, 0x62, 0x26, 0x43, 0xe1, 0x97, 0x23, 0x43, 0x51, 0x37, 0x3a, 0x43, 0xf7, 0xac, 0x31, 0x43, + 0x68, 0x02, 0x11, 0x43, 0xf1, 0xcf, 0xec, 0x42, 0x9a, 0xc5, 0x00, 0x43, 0xc5, 0x20, 0x06, 0x43, + 0x9b, 0x91, 0x21, 0x43, 0x3f, 0xbc, 0xd4, 0x42, 0x7d, 0x29, 0xe0, 0x42, 0xf9, 0x72, 0x22, 0x43, + 0x15, 0xe9, 0xfd, 0x42, 0x8c, 0x7f, 0x11, 0x43, 0x76, 0x23, 0x23, 0x43, 0xdd, 0x70, 0x29, 0x43, + 0x4f, 0x92, 0x2c, 0x43, 0x8f, 0x2e, 0x2a, 0x43, 0x27, 0xcf, 0x1b, 0x43, 0xa3, 0x60, 0xfe, 0x42, + 0x3e, 0xee, 0xe1, 0x42, 0xd9, 0x41, 0x08, 0x43, 0x2f, 0xb5, 0x1b, 0x43, 0xaa, 0x6e, 0xee, 0x42, + 0x10, 0x4b, 0xc5, 0x42, 0x93, 0x46, 0x22, 0x43, 0xb8, 0xa2, 0x14, 0x43, 0x14, 0xe8, 0x22, 0x43, + 0x83, 0x2e, 0x19, 0x43, 0x41, 0x0d, 0x2a, 0x43, 0x3d, 0x94, 0x28, 0x43, 0x7f, 0x7a, 0x26, 0x43, + 0xcd, 0x1c, 0x07, 0x43, 0xdf, 0x39, 0x05, 0x43, 0x57, 0xda, 0x04, 0x43, 0xa3, 0x98, 0x0a, 0x43, + 0xdb, 0x40, 0x1a, 0x43, 0xdd, 0x43, 0xd7, 0x42, 0x9a, 0xd0, 0xce, 0x42, 0x2d, 0x1f, 0x23, 0x43, + 0x0a, 0x7e, 0x23, 0x43, 0x86, 0x54, 0x37, 0x43, 0x0b, 0x35, 0x2b, 0x43, 0x68, 0xf0, 0x2b, 0x43, + 0x6b, 0xdf, 0x1e, 0x43, 0x27, 0x4e, 0x1f, 0x43, 0x06, 0x74, 0x19, 0x43, 0x74, 0x45, 0x0e, 0x43, + 0x5d, 0x68, 0x13, 0x43, 0x8d, 0xf2, 0x16, 0x43, 0x41, 0x7d, 0x3c, 0x43, 0x8f, 0xa1, 0x0a, 0x43, + 0xab, 0xd3, 0xc5, 0x42, 0x6c, 0x88, 0x23, 0x43, 0xed, 0xed, 0x2a, 0x43, 0x94, 0x0c, 0x18, 0x43, + 0x24, 0x68, 0x08, 0x43, 0xd7, 0x70, 0x1b, 0x43, 0xed, 0x30, 0x20, 0x43, 0x30, 0x0f, 0x34, 0x43, + 0xf8, 0x3a, 0x14, 0x43, 0x77, 0x0f, 0x14, 0x43, 0x9a, 0xf1, 0x30, 0x43, 0x1d, 0xd3, 0x33, 0x43, + 0x45, 0x35, 0x3b, 0x43, 0x4f, 0xe5, 0xe6, 0x42, 0x72, 0x58, 0xc6, 0x42, 0x21, 0xff, 0x13, 0x43, + 0xd0, 0xe1, 0x04, 0x43, 0x32, 0x02, 0x0e, 0x43, 0x65, 0x72, 0xf6, 0x42, 0x09, 0xe2, 0x0e, 0x43, + 0xf1, 0xe4, 0x14, 0x43, 0xc5, 0x4b, 0x33, 0x43, 0x99, 0xde, 0x29, 0x43, 0xf7, 0x6c, 0x37, 0x43, + 0x9f, 0xde, 0x31, 0x43, 0xbc, 0xf7, 0x40, 0x43, 0x5e, 0x4a, 0x29, 0x43, 0x6b, 0x14, 0xe5, 0x42, + 0xb3, 0x32, 0xb9, 0x42, 0x50, 0xd7, 0x03, 0x43, 0x95, 0xca, 0xf0, 0x42, 0xbe, 0xf0, 0x00, 0x43, + 0xf3, 0x62, 0xfe, 0x42, 0x82, 0xdd, 0x00, 0x43, 0xf3, 0x07, 0x08, 0x43, 0xa3, 0x5e, 0x28, 0x43, + 0xc3, 0xfd, 0x32, 0x43, 0x20, 0xff, 0x39, 0x43, 0xc0, 0xc6, 0x28, 0x43, 0xec, 0x59, 0x1c, 0x43, + 0xde, 0xfa, 0x12, 0x43, 0x0e, 0x75, 0xbe, 0x42, 0x1a, 0xe3, 0x64, 0x42, 0x3d, 0x9c, 0x9d, 0x42, + 0xc9, 0xd9, 0x98, 0x42, 0x3b, 0x1a, 0xa0, 0x42, 0xd6, 0x79, 0xaf, 0x42, 0xd0, 0xfa, 0xa1, 0x42, + 0xb9, 0x9c, 0xc7, 0x42, 0xf9, 0xea, 0xe3, 0x42, 0x96, 0xd9, 0xf2, 0x42, 0x13, 0x88, 0x07, 0x43, + 0xc5, 0x59, 0xc8, 0x42, 0x70, 0xd9, 0xc1, 0x42, 0xaf, 0xd3, 0x98, 0x42, 0xe0, 0xae, 0x85, 0x42}; + +unsigned char conv2d_winograd_fp16_in[] = { + 0x3a, 0xb9, 0xc0, 0x30, 0x28, 0xbc, 0x72, 0xc1, 0x3c, 0xbe, 0xee, 0xc0, 0x1b, 0x3d, 0xf5, 0xbf, + 0x77, 0xbd, 0x05, 0xbd, 0x12, 0x2b, 0x5f, 0xb8, 0x73, 0xa2, 0xac, 0xbc, 0x19, 0xbf, 0x62, 0xc2, + 0xc5, 0xb7, 0x84, 0x3a, 0x70, 0xb4, 0xe9, 0xbd, 0xcf, 0xb9, 0x9b, 0xbe, 0xad, 0xb8, 0x4c, 0x39, + 0xaa, 0xc1, 0x50, 0xad, 0x4c, 0xbf, 0x8b, 0xb9, 0x9e, 0xbe, 0xbe, 0xb8, 0x05, 0xbf, 0x1c, 0xbc, + 0x7c, 0xbb, 0xce, 0xb3, 0x8a, 0x2c, 0xe7, 0xc1, 0xca, 0xb4, 0xde, 0x38, 0xe0, 0xbc, 0x46, 0xb9, + 0x37, 0xbf, 0xe0, 0x36, 0xef, 0xbd, 0xe9, 0xc0, 0x97, 0xc0, 0x5e, 0xbd, 0x5b, 0xbb, 0xf9, 0x2a, + 0x23, 0xb8, 0x6c, 0xbe, 0x09, 0xba, 0xd4, 0xbc, 0x39, 0xc0, 0x9d, 0xbd, 0xf8, 0xba, 0x7c, 0xb2, + 0x05, 0xc0, 0x14, 0xb5, 0xd0, 0x2e, 0x67, 0xb5, 0x20, 0xb9, 0x91, 0xb9, 0x3e, 0xa6, 0x78, 0xc0, + 0xcc, 0xbc, 0x10, 0xc1, 0x2f, 0xbd, 0x4a, 0xc1, 0x38, 0xbe, 0x2f, 0xb3, 0x01, 0xbc, 0x8d, 0x3b, + 0xcb, 0xc0, 0xa2, 0xbc, 0xb4, 0x22, 0x7c, 0xbe, 0x82, 0xbf, 0xa7, 0xbb, 0xf6, 0xbd, 0xd8, 0xbf, + 0x30, 0xb2, 0xb4, 0xb8, 0xe2, 0xbb, 0x5a, 0xbc, 0x93, 0xab, 0xb1, 0x3a, 0x08, 0xb8, 0x92, 0xbd, + 0xa7, 0xbc, 0x1a, 0xb8, 0x6f, 0xbe, 0xc8, 0xc1, 0xac, 0xbd, 0x32, 0xc0, 0x42, 0xbb, 0x60, 0x3c, + 0x3f, 0x34, 0x04, 0xbe, 0xed, 0xbe, 0x3e, 0x33, 0xbb, 0xbc, 0x4e, 0xbf, 0x48, 0xba, 0xaf, 0xbd, + 0x89, 0xb9, 0x06, 0x2b, 0x49, 0x38, 0x2d, 0xb9, 0x4f, 0xc0, 0xc7, 0xbd, 0xeb, 0x30, 0x47, 0x34, + 0x03, 0xbe, 0x47, 0xbe, 0x6d, 0xbf, 0x9a, 0xbe, 0x33, 0xbe, 0x89, 0xbf, 0x3b, 0x3a, 0xbc, 0x37, + 0xfb, 0xbd, 0xe4, 0xb9, 0x80, 0xb9, 0xd4, 0xbc, 0xe4, 0xc1, 0x63, 0xbb, 0xe6, 0x39, 0x0c, 0xc1, + 0x16, 0xbd, 0xdc, 0xaa, 0x06, 0xb5, 0x3b, 0xc0, 0xd4, 0xc4, 0x85, 0x28, 0x5c, 0xbf, 0x36, 0xbb, + 0x10, 0xbc, 0x3b, 0xbc, 0x28, 0x35, 0xe0, 0xb6, 0x99, 0xc0, 0x6f, 0xbe, 0xae, 0xbc, 0xe2, 0xac, + 0x21, 0xc0, 0x52, 0xc0, 0x7e, 0xb6, 0x0f, 0xc0, 0x9c, 0xb7, 0x44, 0xba, 0xb0, 0xb9, 0xd9, 0xc0, + 0xb9, 0xc0, 0x9f, 0xb9, 0x99, 0xaf, 0x71, 0xbd, 0x32, 0xc0, 0x53, 0x3b, 0x19, 0xc0, 0x78, 0x3a, + 0x6f, 0xb9, 0x43, 0xb9, 0x67, 0xbb, 0x20, 0xba, 0xf3, 0xb8, 0x1a, 0xb0, 0x45, 0xc2, 0x38, 0xaf, + 0x03, 0xbe, 0xbf, 0xb9, 0xae, 0xba, 0xc9, 0xb2, 0xb3, 0xbc, 0x1f, 0xbc, 0x35, 0xbc, 0x39, 0xc0, + 0x2a, 0xbe, 0x2f, 0xbd, 0x8c, 0xc0, 0xd4, 0xc1, 0x4e, 0x38, 0x13, 0xc1, 0x4c, 0xba, 0x31, 0xb9, + 0xa7, 0xbe, 0x7e, 0xc0, 0x1e, 0xb8, 0x86, 0xb4, 0xce, 0xbc, 0x51, 0xb7, 0x9d, 0xb0, 0xd7, 0xc1, + 0x89, 0xb4, 0xc4, 0x39, 0x55, 0xbc, 0x44, 0x33, 0x84, 0x3a, 0x29, 0xb9, 0x61, 0xb5, 0x8e, 0xbd, + 0xe2, 0xb2, 0x54, 0xa1, 0x46, 0xb5, 0xb5, 0x34, 0x4b, 0xc0, 0x84, 0xb8, 0x0d, 0x38, 0x31, 0xc4, + 0xe1, 0xbe, 0x40, 0x34, 0x47, 0xc0, 0xf4, 0xba, 0x4a, 0x39, 0x92, 0x2d, 0x62, 0x38, 0x44, 0xbd, + 0x72, 0xbc, 0xf1, 0xbc, 0x01, 0xbf, 0xed, 0xbb, 0xbd, 0x40, 0xa6, 0xc1, 0x2c, 0x40, 0xec, 0x2f, + 0x5f, 0xc1, 0x96, 0xbc, 0xfc, 0xba, 0xef, 0xbc, 0x3f, 0xbd, 0x0f, 0xbc, 0x9d, 0xba, 0x2b, 0xc2, + 0xda, 0xbd, 0x9c, 0xc2, 0x39, 0xb1, 0xd3, 0xbf, 0x59, 0xc1, 0xac, 0xc0, 0x01, 0xb4, 0x32, 0xb8, + 0xac, 0xb4, 0xfa, 0xbb, 0x44, 0xbd, 0xa8, 0xb5, 0x8a, 0xbd, 0x10, 0xbb, 0x34, 0xb8, 0x0c, 0x3d, + 0xfd, 0xac, 0x69, 0xbc, 0xd8, 0xc0, 0x60, 0xbc, 0x1c, 0x33, 0x16, 0xb7, 0x58, 0xc0, 0xad, 0xb8, + 0x35, 0xc3, 0xba, 0xbe, 0xec, 0xb5, 0x95, 0xc2, 0xeb, 0xbd, 0x72, 0xb5, 0x97, 0x38, 0x24, 0x30, + 0xc8, 0xba, 0xab, 0x3a, 0x4c, 0xbf, 0xef, 0xba, 0xe9, 0xb6, 0xa2, 0xb8, 0x64, 0xbe, 0x0e, 0xc0, + 0xfb, 0xbd, 0x06, 0x32, 0xd2, 0xbe, 0x65, 0xb8, 0xd4, 0x3a, 0xa4, 0xbb, 0x0d, 0x39, 0x7a, 0xbc, + 0x9d, 0x2a, 0x92, 0xb3, 0x02, 0xc0, 0x54, 0xbe, 0x12, 0x2e, 0x84, 0xc0, 0x44, 0xc3, 0x8a, 0xbc, + 0xfb, 0xbc, 0x8b, 0xba, 0x91, 0xbc, 0x74, 0xba, 0x25, 0xab, 0xb3, 0xba, 0xd0, 0xbc, 0x8e, 0x3a, + 0xb9, 0xb8, 0x6f, 0x22, 0x92, 0xbc, 0xdc, 0xc1, 0x58, 0xc1, 0xea, 0xba, 0xbf, 0xa4, 0xaf, 0x40, + 0x10, 0xbb, 0x93, 0xbf, 0x33, 0xb5, 0x8b, 0xbe, 0xbe, 0xc1, 0x3b, 0xb9, 0x1e, 0xbe, 0xb0, 0x37, + 0x7e, 0xc1, 0x5c, 0xb9, 0x26, 0xc0, 0x0c, 0xbd, 0x18, 0xbe, 0x37, 0x3c, 0xdb, 0x2d, 0xea, 0xb4, + 0x18, 0xbc, 0x09, 0xba, 0xee, 0xb2, 0xc0, 0xc0, 0xae, 0xbd, 0x73, 0xbc, 0x12, 0xc0, 0x69, 0x3b, + 0x14, 0xbc, 0x46, 0xc0, 0x8d, 0x38, 0xd8, 0xbb, 0x31, 0xbb, 0x88, 0xbc, 0x2e, 0x39, 0x22, 0xc0, + 0x67, 0xba, 0x14, 0x32, 0x24, 0xb7, 0x20, 0xc1, 0x72, 0xc0, 0xc8, 0x33, 0x0e, 0xbe, 0xab, 0x3a, + 0x95, 0xbd, 0x93, 0xb4, 0xf1, 0xb8, 0x72, 0xc0, 0x13, 0xc0, 0x2e, 0xc0, 0x2c, 0xbd, 0x4b, 0xc1, + 0x0a, 0x31, 0x34, 0xb3, 0x13, 0xb5, 0x4c, 0xb9, 0x45, 0xbe, 0x5d, 0xba, 0x4d, 0xbe, 0x15, 0x36, + 0xcb, 0xbe, 0x55, 0xc0, 0x53, 0xbd, 0x48, 0xb4, 0x39, 0xbc, 0xbd, 0xbc, 0x9a, 0x2d, 0x2c, 0xbc, + 0x84, 0x3b, 0xb4, 0xba, 0x32, 0xb2, 0x9b, 0xba, 0xba, 0xbc, 0x9f, 0xbc, 0xca, 0xb6, 0x32, 0xbe, + 0x36, 0x37, 0x3f, 0xbe, 0xe9, 0xbb, 0x51, 0xbc, 0x96, 0xb8, 0xb0, 0xbc, 0x4c, 0xbf, 0xad, 0xbc, + 0x03, 0xb6, 0x9d, 0xbe, 0xcc, 0xbf, 0x62, 0x29, 0x59, 0xbe, 0xaa, 0xb6, 0xcb, 0xbf, 0x1c, 0xb8, + 0x59, 0x3c, 0x8e, 0xb4, 0x2d, 0xb6, 0xb7, 0xac, 0x0b, 0xba, 0x91, 0xbe, 0x3a, 0xb5, 0xd7, 0xbe, + 0xea, 0xbe, 0x92, 0xb5, 0x40, 0xaf, 0x90, 0xb9, 0xa2, 0xbe, 0xab, 0x35, 0x22, 0xbc, 0xa0, 0xb8, + 0x10, 0x2e, 0xce, 0xbb, 0xd6, 0xbe, 0x2e, 0x32, 0x64, 0x32, 0x52, 0xb4, 0xe2, 0xc0, 0x95, 0xbd, + 0xb5, 0xc0, 0x33, 0xbe, 0x52, 0xb4, 0x5b, 0xbd, 0x77, 0x38, 0xe1, 0xbf, 0x2f, 0xbd, 0x94, 0xb9, + 0xd0, 0xb8, 0x47, 0xbc, 0xc2, 0xb5, 0xa0, 0x39, 0x0b, 0x42, 0xb1, 0xbc, 0x35, 0xbb, 0xd7, 0xb3, + 0xc1, 0xbe, 0xe7, 0xc0, 0x27, 0xb7, 0x7c, 0xb6, 0x57, 0x35, 0x93, 0xbd, 0x23, 0xb6, 0x5f, 0xbe, + 0xa7, 0xbc, 0x49, 0xb9, 0x5b, 0xb8, 0x36, 0xb6, 0xb8, 0xba, 0xc3, 0x33, 0x24, 0xb3, 0xef, 0xb8, + 0xba, 0xc0, 0x57, 0x39, 0x9c, 0xb6, 0xcf, 0xbe, 0x4c, 0xba, 0x4e, 0x34, 0x55, 0xbc, 0xaa, 0xb9, + 0xd8, 0xbe, 0xfc, 0x3a, 0xb9, 0xc1, 0x7b, 0x30, 0xb2, 0xbc, 0x0e, 0xa9, 0xb0, 0xb7, 0x31, 0xbc, + 0x13, 0xb1, 0x15, 0x3a, 0xbf, 0x32, 0x2f, 0x39, 0xb9, 0xc2, 0xb9, 0xbf, 0x04, 0xba, 0xf7, 0xbd, + 0x61, 0x37, 0x99, 0xbe, 0x8d, 0xb8, 0x5c, 0xb5, 0xc3, 0xc2, 0xb8, 0x32, 0xc5, 0xb4, 0xb1, 0xb6, + 0xe2, 0x2e, 0xb9, 0xbb, 0x95, 0x39, 0xc9, 0xbf, 0x58, 0xb4, 0xa3, 0xb9, 0xeb, 0xb5, 0x09, 0xc0, + 0x9f, 0xc1, 0x10, 0xba, 0x28, 0xbf, 0x09, 0xc0, 0x64, 0xb9, 0xd7, 0x3d, 0xad, 0xbc, 0xf6, 0xb8, + 0xa5, 0xba, 0x16, 0xbe, 0xec, 0x3c, 0xf8, 0xbb, 0x42, 0xbe, 0x90, 0xb8, 0x89, 0xb8, 0x91, 0xb8, + 0xa5, 0xbd, 0x63, 0xbb, 0xe8, 0xb3, 0x22, 0xb8, 0x8c, 0xba, 0x17, 0xbd, 0xc4, 0xba, 0x84, 0xbc, + 0x2f, 0xbf, 0xb2, 0xbc, 0x2c, 0xb6, 0xfe, 0xbc, 0x0b, 0xb9, 0xb7, 0xb3, 0x8f, 0xbe, 0xe9, 0xbd, + 0xe7, 0xbe, 0x78, 0xb8, 0x3c, 0x3d, 0xf8, 0xba, 0x7c, 0xb0, 0x3d, 0xbd, 0x62, 0xc0, 0xdf, 0xbc, + 0xc7, 0xb8, 0x5c, 0xc1, 0x3b, 0xbe, 0x9d, 0xb8, 0x63, 0xba, 0x26, 0xbb, 0x3c, 0xbf, 0x24, 0xbf, + 0x83, 0xbd, 0xb3, 0xc0, 0x89, 0x34, 0xf5, 0xb0, 0xf1, 0x32, 0xa0, 0xbb, 0xaf, 0xbf, 0x31, 0xbe, + 0xe3, 0x2f, 0x56, 0x36, 0x3d, 0xb4, 0x7a, 0x9b, 0x77, 0xbd, 0x9f, 0x31, 0xf1, 0xb8, 0xb3, 0x34, + 0xc4, 0xbe, 0xbd, 0x2d, 0xfc, 0xbb, 0xbb, 0xba, 0xc5, 0xbc, 0xa4, 0xb5, 0xd7, 0xb9, 0x1b, 0xbc, + 0x8b, 0xbd, 0x0e, 0xb8, 0x18, 0xbe, 0x6b, 0xb6, 0xee, 0x2d, 0xd2, 0xb1, 0xbf, 0xba, 0x36, 0xbf, + 0xc3, 0xba, 0xa7, 0x3b, 0x9f, 0xbd, 0x91, 0xbf, 0x3e, 0x2f, 0x55, 0xb9, 0x24, 0xbe, 0xb4, 0xbe, + 0x2d, 0x32, 0x42, 0xbe, 0x7a, 0x3d, 0x5b, 0xbf, 0x97, 0xc0, 0x69, 0xbc, 0xf9, 0xb2, 0xd5, 0xbf, + 0xe8, 0x39, 0xb4, 0xb3, 0xbb, 0xbe, 0xc9, 0xb7, 0x62, 0xbc, 0xd2, 0xbc, 0x1c, 0x38, 0xac, 0x3b, + 0xd2, 0x34, 0x58, 0xaf, 0x8c, 0xbc, 0xda, 0xbf, 0xb6, 0xb1, 0x21, 0xbf, 0x77, 0xb9, 0x70, 0xbe, + 0xbe, 0x38, 0xc3, 0x35, 0xe2, 0xbc, 0xa4, 0xb8, 0x7c, 0xb9, 0xad, 0xbc, 0x50, 0xc0, 0xcd, 0xba, + 0x3c, 0x35, 0x4e, 0xbf, 0x3f, 0xc0, 0xd2, 0xbe, 0xaa, 0xbc, 0x2e, 0xb9, 0x57, 0xb9, 0x04, 0xb3, + 0x47, 0xc0, 0x46, 0x30, 0xa6, 0x3e, 0x52, 0x39, 0x13, 0x3e, 0x4f, 0x36, 0x99, 0xbd, 0xf9, 0xbc, + 0x61, 0x38, 0x8a, 0xbc, 0xf6, 0xbb, 0x07, 0xaa, 0x27, 0xb3, 0x26, 0xbe, 0xfa, 0xbd, 0x8a, 0xbb, + 0xb1, 0xb0, 0x44, 0xc3, 0x71, 0xb6, 0x34, 0xc0, 0xfe, 0xbd, 0x23, 0xc0, 0xde, 0x2e, 0x68, 0xc0, + 0x74, 0xbd, 0xeb, 0xb2, 0x9e, 0xbb, 0xd7, 0xb3, 0x44, 0xbe, 0x8b, 0xc1, 0x35, 0xba, 0xfd, 0x30, + 0xc0, 0xbd, 0x7f, 0xc0, 0xb7, 0xc1, 0xb7, 0xbe, 0x25, 0xb9, 0xd0, 0xc0, 0xcb, 0xbd, 0x41, 0xc0, + 0x2e, 0x3b, 0x01, 0xbe, 0x72, 0xbc, 0xf4, 0x2f, 0x56, 0xb2, 0xc9, 0xbe, 0xfa, 0x3d, 0xc6, 0xba, + 0x33, 0xc0, 0xdf, 0xaa, 0xf8, 0xb9, 0xe0, 0xc0, 0x7e, 0xbc, 0x5a, 0x3a, 0xbd, 0xc0, 0x06, 0xbe, + 0xe0, 0xbe, 0x6b, 0xbb, 0x2a, 0xc0, 0xee, 0xbe, 0x88, 0xb2, 0x7c, 0xb2, 0xb7, 0xbe, 0xea, 0xc0, + 0x2d, 0xb3, 0x97, 0xb9, 0xf1, 0xb9, 0x5c, 0x28, 0xc7, 0xbc, 0x4d, 0xbd, 0x63, 0xb5, 0x51, 0xb1, + 0x6b, 0xbf, 0xf9, 0xbf, 0x36, 0xbb, 0xad, 0xab, 0x8d, 0xbd, 0xe5, 0xbc, 0x9e, 0xbd, 0x14, 0xc0, + 0x05, 0xba, 0xbe, 0xbf, 0xfe, 0xad, 0xfd, 0xbe, 0x3e, 0x2f, 0x03, 0x37, 0x78, 0x38, 0xc6, 0xb9, + 0xd3, 0x35, 0x6f, 0xbe, 0x55, 0xbb, 0x61, 0xbe, 0xa8, 0xb3, 0xdf, 0xbf, 0x63, 0xbd, 0x28, 0xbb, + 0xda, 0xbe, 0xf2, 0xbc, 0x15, 0xa1, 0xfd, 0xb8, 0x0d, 0xbe, 0x0e, 0x2e, 0x91, 0x38, 0x75, 0xbc, + 0x64, 0xb2, 0x32, 0xbe, 0x10, 0xc4, 0x6b, 0xbe, 0xa9, 0x39, 0x18, 0xbe, 0x26, 0xaf, 0xc5, 0xb4, + 0x58, 0xc2, 0xe6, 0x3c, 0xaa, 0xbe, 0x15, 0xbe, 0xab, 0xbe, 0xda, 0xbe, 0x95, 0xbc, 0x38, 0xc0, + 0x27, 0xc0, 0x6d, 0xbc, 0x27, 0xbb, 0x59, 0xba, 0x7c, 0xb9, 0xd1, 0xba, 0x8a, 0xbf, 0xa5, 0x40, + 0x07, 0x3c, 0x53, 0xbf, 0x9f, 0xc2, 0x6a, 0x39, 0x6e, 0xc0, 0x81, 0xbf, 0x73, 0xbd, 0x37, 0xbf, + 0x50, 0x24, 0xfc, 0xbe, 0x1f, 0xc1, 0x07, 0x32, 0x42, 0xb0, 0xa8, 0x39, 0x73, 0x39, 0x07, 0xb9, + 0xce, 0xc0, 0xb4, 0xbc, 0xfd, 0xbd, 0xa6, 0x30, 0xb7, 0xbf, 0xf7, 0xbb, 0x64, 0xc1, 0x6f, 0x39, + 0xf2, 0xbe, 0x9a, 0x3a, 0xc5, 0xbe, 0x8d, 0xb4, 0xd3, 0x35, 0x67, 0xbf, 0x40, 0xb9, 0xcf, 0xbc, + 0x7c, 0xbd, 0x2b, 0x32, 0x4c, 0xbe, 0xaa, 0xbe, 0xea, 0xc0, 0x9c, 0xb2, 0xa6, 0x34, 0x1b, 0x9b, + 0xde, 0xbc, 0x30, 0xbc, 0x52, 0xbc, 0x7b, 0xbc, 0x11, 0xc0, 0x03, 0xbb, 0x65, 0xbb, 0x8e, 0x3a, + 0x85, 0xba, 0x3f, 0x41, 0x84, 0xbd, 0xe0, 0xbf, 0x73, 0x35, 0xce, 0xb9, 0xac, 0x33, 0xcb, 0x3a, + 0x28, 0xb5, 0xd9, 0xbb, 0x7e, 0xbc, 0xe9, 0xbf, 0x33, 0xbc, 0x3c, 0xbf, 0x04, 0x36, 0xd4, 0xa0, + 0x76, 0xbe, 0x3c, 0x2d, 0x1e, 0xc0, 0x28, 0xbe, 0xcb, 0xc0, 0x41, 0x36, 0xcd, 0xba, 0x0d, 0xc0, + 0x6e, 0xc0, 0x58, 0xb8, 0x2b, 0xc0, 0x4d, 0xc4, 0x98, 0xbd, 0xa6, 0xbd, 0x16, 0x38, 0x6d, 0xb8, + 0x07, 0xbd, 0xd5, 0x3d, 0x2f, 0xbd, 0x0a, 0xba, 0x23, 0xba, 0x11, 0xb5, 0xf9, 0xbd, 0x67, 0xb6, + 0x60, 0xbc, 0x0e, 0xc0, 0xa9, 0xbc, 0x13, 0xba, 0xd1, 0xb4, 0xc4, 0xbe, 0xd1, 0xb1, 0x0e, 0xc0, + 0xa5, 0x2d, 0xd6, 0xb4, 0x68, 0xbb, 0xa3, 0xb9, 0x3d, 0xbd, 0x31, 0xbc, 0x11, 0xb4, 0xba, 0xb7, + 0xf2, 0x37, 0x91, 0xb6, 0x20, 0xbf, 0x0b, 0xc0, 0xd4, 0xbb, 0x0e, 0xb8, 0xad, 0xc1, 0x59, 0xbd, + 0xf9, 0xb7, 0x45, 0xc0, 0xe2, 0xba, 0x8f, 0xbf, 0xd1, 0x3a, 0xe2, 0xb9, 0x5b, 0xbc, 0x4d, 0xbe, + 0x75, 0xbd, 0x2e, 0xbc, 0xa2, 0x30, 0x4f, 0x28, 0xe3, 0xbf, 0x06, 0xb9, 0xd6, 0xbf, 0x18, 0xb8, + 0x2e, 0xc0, 0xc2, 0x38, 0x42, 0xb7, 0x08, 0xc1, 0xb3, 0xb8, 0xa7, 0xba, 0xc4, 0xb8, 0x31, 0xa6, + 0xbe, 0xc1, 0x79, 0xb4, 0x52, 0xb0, 0x43, 0xbb, 0x76, 0xba, 0x08, 0xba, 0x05, 0xc1, 0xfb, 0xc2, + 0x25, 0xc0, 0x9b, 0x3b, 0x49, 0x34, 0xda, 0x2d, 0xfd, 0xb9, 0xa8, 0x32, 0x05, 0x34, 0x59, 0xb8, + 0x5b, 0x33, 0x8f, 0xba, 0xd4, 0xb4, 0x60, 0xbd, 0x28, 0xc2, 0x31, 0xbb, 0xdf, 0xc0, 0x1c, 0xbf, + 0x23, 0xb6, 0x3a, 0xbd, 0x76, 0xb9, 0x43, 0xb9, 0xe8, 0xb7, 0x84, 0xbf, 0x8f, 0x34, 0xbf, 0xbb, + 0x4c, 0xc0, 0xfb, 0x3c, 0x6e, 0xbf, 0x82, 0xbd, 0xe1, 0xbd, 0x6d, 0xc1, 0x08, 0xbe, 0x01, 0xbc, + 0x28, 0xbc, 0xf4, 0xba, 0x77, 0xba, 0xa0, 0xc1, 0x64, 0xb8, 0xcc, 0xbc, 0x74, 0xc2, 0xed, 0xaf, + 0x26, 0xc0, 0x21, 0xbe, 0x07, 0xbd, 0x7b, 0xc1, 0xba, 0xba, 0x38, 0x39, 0xf7, 0xbc, 0xc1, 0xb4, + 0xc6, 0xc0, 0x92, 0xc0, 0x30, 0xbb, 0xdf, 0xbe, 0xcb, 0xb8, 0x91, 0xbd, 0x52, 0x3b, 0xa9, 0xb9, + 0x43, 0xba, 0xbd, 0xb8, 0xc3, 0xbd, 0x47, 0xbb, 0x93, 0xaa, 0xc8, 0xc1, 0xf6, 0x38, 0x62, 0xbb, + 0xba, 0xb6, 0xb8, 0xb1, 0xe8, 0xb8, 0xb4, 0xc0, 0x61, 0xb1, 0x6b, 0xba, 0xc3, 0xbe, 0x1a, 0xbb, + 0x81, 0xc0, 0x21, 0xbd, 0x0d, 0xc2, 0x49, 0xac, 0x80, 0xbe, 0xc0, 0x34, 0xe7, 0xac, 0x09, 0xb1, + 0xc0, 0xb5, 0x17, 0xbd, 0x45, 0xb9, 0xba, 0x35, 0x6f, 0xbd, 0x91, 0xbd, 0x01, 0xbf, 0xca, 0xb9, + 0x2c, 0xad, 0xd7, 0x3d, 0x1a, 0xbb, 0x63, 0xbc, 0x1b, 0xc2, 0x46, 0xb0, 0xe2, 0xba, 0x06, 0xbc, + 0x2e, 0xba, 0xc0, 0xb8, 0xeb, 0xbc, 0xed, 0xbc, 0xe5, 0xb9, 0x47, 0xba, 0xd0, 0x37, 0xf7, 0xbc, + 0x72, 0xbe, 0x00, 0xbd, 0xdb, 0x2e, 0xbc, 0xb8, 0x5b, 0xbe, 0x3c, 0xbd, 0x69, 0xbe, 0x5d, 0x34, + 0xd2, 0xbf, 0x4f, 0xbf, 0xb2, 0xb9, 0x50, 0xbe, 0xfc, 0xbc, 0x5c, 0xb9, 0x9d, 0xc0, 0xc9, 0xbf, + 0x38, 0xc1, 0xfa, 0xc0, 0xa5, 0x3c, 0x67, 0xbc, 0xc6, 0xc0, 0x5a, 0x32, 0x92, 0xbd, 0x10, 0xc1, + 0x79, 0xc0, 0xe3, 0xbf, 0x0d, 0xba, 0xb0, 0xc1, 0x5f, 0xba, 0xb1, 0xbc, 0x42, 0xbc, 0x4e, 0x3f, + 0x4b, 0xb8, 0x77, 0x2f, 0x87, 0xc1, 0x89, 0xc0, 0xf9, 0xc0, 0x12, 0xbe, 0x19, 0xbe, 0x75, 0xb6, + 0xe1, 0xc2, 0xad, 0xbb, 0x3e, 0xbc, 0x23, 0xba, 0xcd, 0xbc, 0xe1, 0x37, 0x7c, 0xb9, 0xa8, 0xb1, + 0x07, 0xb4, 0xe9, 0x38, 0x12, 0xb7, 0x06, 0xbd, 0x2d, 0xb0, 0x4e, 0xc1, 0xc6, 0xc0, 0x9a, 0x39, + 0x49, 0x3c, 0x00, 0xbe, 0x24, 0xb5, 0x86, 0xbd, 0x9f, 0xb4, 0x64, 0xbf, 0xf7, 0xba, 0x5f, 0xbe, + 0x31, 0x36, 0x64, 0xbe, 0x41, 0x35, 0x35, 0xc1, 0x81, 0xbf, 0x7f, 0xbf, 0xb2, 0xbe, 0xf9, 0xbd, + 0x65, 0xc2, 0x09, 0xba, 0x20, 0x30, 0x10, 0xbd, 0xf2, 0xc1, 0x64, 0xc0, 0xab, 0xbc, 0x43, 0xc0, + 0xd1, 0xb8, 0xd0, 0xbe, 0x09, 0xb9, 0xac, 0xbd, 0x27, 0xb8, 0x14, 0xb8, 0x3b, 0xc0, 0x26, 0xb7, + 0x57, 0xbd, 0x3a, 0xbb, 0x20, 0x3b, 0xe7, 0xb9, 0xb3, 0x36, 0xeb, 0xbd, 0x4a, 0xb8, 0x6a, 0x34, + 0xae, 0x3d, 0xc4, 0xb6, 0x78, 0xbf, 0xa6, 0xbe, 0x3e, 0x2c, 0xb3, 0x3a, 0xcd, 0xbb, 0x71, 0xbe, + 0x69, 0xbc, 0x5a, 0x27, 0x90, 0xbd, 0x65, 0xbf, 0x9d, 0xbc, 0x76, 0xad, 0x28, 0xb7, 0x54, 0xbd, + 0xe7, 0xbe, 0x68, 0xb6, 0xe8, 0xaa, 0x46, 0xbe, 0xc4, 0xbd, 0x1e, 0xc0, 0x15, 0x2a, 0x7c, 0xba, + 0xf9, 0xbd, 0x6b, 0xbd, 0x55, 0x3b, 0x07, 0xbd, 0x07, 0xc0, 0x85, 0xb8, 0xd5, 0xb4, 0x30, 0xc0, + 0x1c, 0x27, 0x27, 0xbb, 0xef, 0xbd, 0x37, 0xbb, 0x65, 0xb8, 0x76, 0x33, 0x9b, 0xbc, 0x89, 0xbc, + 0x64, 0xc2, 0x06, 0xba, 0x39, 0x3c, 0xd6, 0xb9, 0x35, 0xc0, 0xb9, 0xbf, 0xcf, 0xb6, 0x4d, 0xbf, + 0x72, 0xbb, 0x85, 0xbd, 0x34, 0xb0, 0xd1, 0xbe, 0x5c, 0xb9, 0x07, 0x35, 0x03, 0xb9, 0xea, 0xbc, + 0x00, 0xc0, 0x0d, 0xc1, 0x2f, 0xbc, 0x1b, 0xc0, 0x1f, 0xbf, 0x72, 0xbb, 0x83, 0xbc, 0x0e, 0xba, + 0xb0, 0xad, 0xd9, 0xb6, 0xc5, 0xbd, 0x80, 0xbf, 0xc6, 0xbc, 0x54, 0xb9, 0x8a, 0xbc, 0x95, 0xbc, + 0x67, 0xbe, 0x16, 0xa7, 0x9a, 0xbf, 0xc2, 0x33, 0xa6, 0xbd, 0xa3, 0xb9, 0x08, 0xc0, 0xe6, 0xbb, + 0xc5, 0x37, 0x12, 0xbc, 0xd8, 0xbf, 0x92, 0xbd, 0x71, 0xc0, 0xa7, 0x38, 0x43, 0xb8, 0x27, 0xbd, + 0x55, 0xbd, 0x21, 0xb8, 0xe8, 0xa9, 0x9e, 0x3d, 0x87, 0xbe, 0x43, 0xc0, 0xa8, 0xba, 0x66, 0xb2, + 0x0d, 0xb8, 0xa8, 0xb2, 0x50, 0xb4, 0x3b, 0xbe, 0xc0, 0xbe, 0xf4, 0x32, 0xda, 0xbd, 0x71, 0xbc, + 0x10, 0xbd, 0xc3, 0xb6, 0x0c, 0xbf, 0xb1, 0xbc, 0xbe, 0xbd, 0xf9, 0xba, 0xe5, 0x34, 0xfa, 0xbc, + 0x1e, 0xb9, 0xec, 0xb7, 0x72, 0xb8, 0x96, 0xbf, 0xa0, 0xbc, 0xea, 0xac, 0x36, 0x2c, 0xf8, 0xc0, + 0x5f, 0x38, 0xae, 0xc0, 0x80, 0x3c, 0xab, 0xc1, 0x3f, 0xbf, 0xde, 0xc1, 0x12, 0xb7, 0x85, 0xc0, + 0xc2, 0xbf, 0xa4, 0xba, 0x4d, 0xbd, 0x2e, 0x3a, 0x26, 0x30, 0x4e, 0xbe, 0x09, 0x38, 0x2d, 0xb9, + 0xa6, 0xbc, 0xe7, 0x38, 0x6c, 0xc0, 0x9e, 0x36, 0xd7, 0xbb, 0x86, 0xc0, 0xa1, 0xbd, 0xb9, 0xba, + 0x6c, 0xa4, 0x9b, 0xbe, 0x94, 0xbc, 0x91, 0xaa, 0x98, 0x3a, 0xb5, 0x3a, 0x1a, 0xc1, 0x36, 0xc2, + 0x28, 0xbd, 0x5d, 0xbc, 0x97, 0xbc, 0x2e, 0xbc, 0x55, 0xc0, 0x94, 0xbc, 0xa5, 0xbc, 0xcb, 0xa1, + 0x25, 0x9d, 0xe3, 0xbd, 0x19, 0xbf, 0x89, 0x1b, 0x9b, 0xbf, 0x9d, 0xbf, 0x59, 0xbc, 0xeb, 0xb2, + 0x4f, 0xb8, 0x6b, 0xbc, 0x20, 0xc2, 0xb6, 0xb4, 0xef, 0xc0, 0x72, 0xbe, 0xed, 0xba, 0xbd, 0xbe, + 0x5b, 0x32, 0x1a, 0xbd, 0x9c, 0xc2, 0xbd, 0xba, 0x19, 0xc0, 0x94, 0xc0, 0x75, 0x3b, 0x5f, 0xbe, + 0x8c, 0xbe, 0x8d, 0x32, 0xf2, 0xbd, 0xd1, 0xc0, 0xa8, 0xbd, 0xf7, 0x2e, 0xad, 0x36, 0x9c, 0xbd, + 0x75, 0x3c, 0x7d, 0xb8, 0x9e, 0xbe, 0xde, 0x29, 0x3d, 0xbf, 0x29, 0xc0, 0x47, 0xbd, 0x39, 0xbf, + 0x71, 0xbd, 0x32, 0xc1, 0x25, 0xb8, 0xb2, 0xb5, 0x7e, 0xae, 0x7c, 0x38, 0x5f, 0xbc, 0xa0, 0xb6, + 0xc9, 0xc0, 0xf2, 0xbc, 0x74, 0xbc, 0x2f, 0x37, 0xa0, 0xb2, 0xfc, 0xbc, 0x09, 0xc2, 0xc6, 0x35, + 0x45, 0xc1, 0x62, 0xc1, 0x18, 0xc4, 0x25, 0xbb, 0x74, 0xba, 0x83, 0xb9, 0x6b, 0x36, 0x7b, 0xbc, + 0xa2, 0xb0, 0xf8, 0xbe, 0x20, 0xbe, 0xfc, 0xba, 0x35, 0xbe, 0x51, 0xbe, 0xbf, 0xbd, 0x4d, 0x3d, + 0x15, 0xb4, 0xd8, 0xbd, 0x37, 0xc0, 0x93, 0xbc, 0x9d, 0xbc, 0xdd, 0xbd, 0xd5, 0xc0, 0x1c, 0xbe, + 0x09, 0xc1, 0x97, 0xc0, 0xe9, 0xba, 0x22, 0xba, 0xc6, 0xbe, 0x27, 0xbe, 0x38, 0xb9, 0x99, 0xb6, + 0xca, 0x38, 0x1d, 0xc1, 0xdc, 0xb4, 0x9c, 0xbe, 0xeb, 0xbe, 0x63, 0xba, 0x9f, 0xbc, 0xef, 0xc1, + 0xa8, 0xae, 0x9d, 0xbc, 0x21, 0x31, 0x5e, 0xbc, 0x34, 0xc1, 0x3f, 0xbd, 0x2b, 0xb0, 0x4c, 0xba, + 0x55, 0xbe, 0x83, 0xc0, 0x6f, 0xc1, 0x92, 0xb6, 0x99, 0x35, 0x94, 0x35, 0x0a, 0xb2, 0x11, 0xbf, + 0x0f, 0xa1, 0xb8, 0x1e, 0x69, 0xbe, 0x49, 0xba, 0xd2, 0xbd, 0xa4, 0x37, 0xb8, 0xb8, 0x1b, 0xb9, + 0x37, 0xbc, 0x7c, 0xbe, 0xba, 0x2c, 0x1b, 0xc3, 0x2a, 0x32, 0x25, 0xbb, 0x35, 0xc1, 0x44, 0xbe, + 0x91, 0xba, 0x39, 0xc0, 0xee, 0x34, 0xd7, 0xc2, 0xd4, 0x94, 0x2c, 0xbe, 0xd3, 0xc0, 0x6a, 0xb1, + 0x21, 0x34, 0x65, 0xb9, 0x78, 0x35, 0x30, 0x3d, 0xdc, 0xbe, 0x71, 0xbf, 0xa2, 0xb9, 0x02, 0xbd, + 0x67, 0xbc, 0x06, 0xc0, 0x49, 0xaa, 0x7c, 0xbd, 0xc7, 0xb0, 0xdc, 0xbf, 0x9c, 0xb8, 0x3c, 0xb9, + 0x35, 0xbc, 0xf7, 0xb5, 0xfa, 0xbe, 0x0c, 0x34, 0x3d, 0xbd, 0x68, 0xbf, 0xba, 0xb9, 0x20, 0xb7, + 0x6e, 0xbf, 0x0b, 0xad, 0x5a, 0xbf, 0xf9, 0xbd, 0xe8, 0xbc, 0x77, 0xc0, 0x30, 0xbe, 0x0b, 0xbf, + 0xeb, 0xae, 0x1e, 0xb8, 0xd6, 0xc1, 0x06, 0xb9, 0xf2, 0xbe, 0x0c, 0xbc, 0x65, 0xbc, 0x95, 0xbc, + 0xb5, 0xba, 0x7d, 0xb9, 0x76, 0xb8, 0x95, 0x34, 0x88, 0xbe, 0x53, 0xbe, 0x49, 0xbe, 0xd8, 0xbd, + 0xa4, 0xb9, 0xf2, 0xb8, 0x68, 0x21, 0x39, 0xc2, 0x88, 0xc0, 0x8d, 0xb8, 0x90, 0x37, 0xa2, 0xb5, + 0xce, 0xba, 0xa5, 0xbd, 0x27, 0xc0, 0x5a, 0xc0, 0x4a, 0xbd, 0x0c, 0xbf, 0x5c, 0xc0, 0x37, 0xb6, + 0x05, 0xc2, 0x58, 0xc1, 0xf5, 0xc1, 0xb4, 0xbb, 0xed, 0xb3, 0x5e, 0xbe, 0x17, 0xb6, 0xce, 0xb9, + 0xfb, 0xb6, 0x9f, 0xbc, 0xb6, 0xbc, 0xe1, 0x30, 0x82, 0xc0, 0x1d, 0xb9, 0xf0, 0xb9, 0x1e, 0xbd, + 0x11, 0xb2, 0x3e, 0x3b, 0x14, 0xb9, 0x93, 0xbd, 0xdf, 0xbd, 0x81, 0xbd, 0x6b, 0xbb, 0xbd, 0xbe, + 0xb9, 0xa5, 0x06, 0xbb, 0x43, 0xb4, 0x08, 0xbe, 0x5c, 0x34, 0x57, 0xc1, 0x2e, 0xc1, 0xb3, 0xb9, + 0xa3, 0xbc, 0xd7, 0xb8, 0x14, 0xc0, 0xff, 0xba, 0x4c, 0xc1, 0x47, 0xbd, 0xe3, 0x35, 0x6d, 0xbc, + 0xf5, 0xbd, 0x0f, 0xbd, 0x2d, 0x21, 0x9a, 0x36, 0x8d, 0xbf, 0x0b, 0xbe, 0x80, 0xb8, 0xec, 0xb8, + 0xba, 0xbf, 0x45, 0xc0, 0xd3, 0xb6, 0xfc, 0xbc, 0xff, 0xba, 0x2c, 0xc3, 0x5e, 0xb9, 0x56, 0xbd, + 0x75, 0xbc, 0x27, 0x34, 0x08, 0xbd, 0x1b, 0xbd, 0xf4, 0xb8, 0x43, 0xb9, 0x95, 0xb6, 0x79, 0xbf, + 0xbc, 0xba, 0x50, 0xbd, 0xc6, 0xbe, 0x79, 0xb7, 0xe9, 0xbc, 0xe1, 0xb8, 0x65, 0x2a, 0x07, 0xb1, + 0x66, 0x39, 0xbc, 0x38, 0xd7, 0xbe, 0xdc, 0xb8, 0x0e, 0x3a, 0x23, 0xbe, 0x8e, 0xbc, 0xa3, 0xbb, + 0x41, 0xbb, 0x56, 0x29, 0x58, 0x2b, 0xef, 0xbe, 0x69, 0xc0, 0xbd, 0xbd, 0x8c, 0xb5, 0x63, 0xbe, + 0xb1, 0xbf, 0x93, 0xbe, 0xf3, 0xb8, 0xbe, 0x36, 0x4b, 0xbd, 0x4f, 0x38, 0xb6, 0xbe, 0xe9, 0xbe, + 0xbb, 0xba, 0x5d, 0x3c, 0xdb, 0x25, 0x3e, 0xc1, 0x65, 0xbc, 0x41, 0xbd, 0x22, 0xbe, 0xfa, 0x31, + 0x32, 0xbd, 0x4e, 0x38, 0xb7, 0xbe, 0x3f, 0xbc, 0x81, 0xad, 0x82, 0xbb, 0x22, 0xba, 0xe2, 0xb3, + 0x39, 0xbc, 0x7d, 0xb4, 0x3e, 0xc0, 0x2b, 0xbc, 0xaf, 0xb9, 0x91, 0xbd, 0x51, 0xc0, 0x27, 0xc1}; +unsigned char conv2d_winograd_fp16_ker[] = { + 0x28, 0xbe, 0x1c, 0xc0, 0x38, 0xbe, 0xde, 0xbb, 0xad, 0xbf, 0x2a, 0xc1, 0x53, 0xc0, 0x29, 0xbd, + 0xea, 0xc0, 0xd5, 0xbc, 0x63, 0xba, 0x39, 0xbf, 0xe7, 0xc1, 0x9f, 0xbc, 0x45, 0xc4, 0x97, 0xc1, + 0xe0, 0xb9, 0x52, 0xc1, 0x1a, 0xc1, 0xa2, 0xc0, 0x6d, 0xc2, 0xb0, 0xbf, 0x7f, 0xc0, 0x4f, 0xb6, + 0x5d, 0xbc, 0x61, 0xbc, 0x0e, 0xbf, 0x43, 0xc2, 0xe8, 0xc0, 0x83, 0xc1, 0x02, 0xbf, 0x01, 0xba, + 0xeb, 0xc0, 0x83, 0xc4, 0x89, 0xbc, 0x10, 0xc3, 0xc8, 0xc0, 0xd1, 0xc0, 0x06, 0xb9, 0x1d, 0xc3, + 0x65, 0xc2, 0x91, 0xc1, 0xdc, 0xbe, 0x79, 0xbd, 0x29, 0xbe, 0x91, 0xc0, 0xd4, 0xbf, 0x98, 0xc1, + 0x4b, 0xc1, 0x68, 0xc4, 0x55, 0xc3, 0x9b, 0xbd, 0x2a, 0xc2, 0x66, 0xc2, 0x42, 0xb9, 0x59, 0xbe, + 0xe0, 0xc0, 0xa1, 0xbc, 0xe8, 0xc0, 0xbc, 0xbf, 0xd1, 0xc3, 0x11, 0xbe, 0xf2, 0xc1, 0xe8, 0xbb, + 0x0c, 0xb0, 0x63, 0xc3, 0x9e, 0xc0, 0xf5, 0xba, 0x8f, 0xc1, 0x1d, 0xbf, 0x05, 0xc0, 0x0e, 0xc2, + 0x50, 0xbf, 0xef, 0xbf, 0x37, 0xc0, 0x0e, 0xbc, 0x87, 0xbd, 0x72, 0xbe, 0xab, 0xb8, 0xbd, 0xc2, + 0xed, 0xbf, 0x5f, 0xbd, 0x2e, 0xc0, 0x0e, 0xbd, 0xfc, 0xbe, 0x93, 0xc1, 0x53, 0xc1, 0x7e, 0xbc, + 0x35, 0xc0, 0x38, 0xc1, 0xbb, 0xaf, 0xba, 0xbe, 0xde, 0xc1, 0xa4, 0xbc, 0x33, 0xbe, 0xcd, 0xc1, + 0x08, 0xbb, 0x0c, 0xc0, 0x31, 0xc0, 0xad, 0xbd, 0x64, 0xc0, 0x4e, 0xbf, 0x91, 0xb9, 0xd5, 0xc1, + 0x95, 0xc0, 0x7d, 0xbf, 0x1c, 0xc2, 0x83, 0xbe, 0x3f, 0xc0, 0xda, 0xbd, 0x7a, 0xbe, 0x07, 0xc2, + 0xa1, 0xbe, 0x45, 0xb9, 0x32, 0xae, 0x44, 0xc0, 0xde, 0xc1, 0xdf, 0xbd, 0x7f, 0xbe, 0xa6, 0xc3, + 0x65, 0xc3, 0x4c, 0xbc, 0xbd, 0xbd, 0xea, 0xc1, 0x80, 0xc1, 0x60, 0xc0, 0x84, 0xc0, 0x9d, 0xc1, + 0x74, 0xbd, 0x75, 0xbe, 0x87, 0xbe, 0xf7, 0xbd, 0x43, 0xbf, 0xfa, 0xc1, 0x2a, 0xc2, 0x84, 0xbb, + 0x2f, 0xbf, 0x37, 0xc1, 0xb6, 0xba, 0x91, 0xc1, 0xc5, 0xc1, 0xee, 0xc2, 0x38, 0xc0, 0xe2, 0xbe, + 0x4b, 0xbe, 0x4c, 0xbd, 0x5e, 0xbe, 0x61, 0xc2, 0x9a, 0xad, 0xbf, 0xbe, 0x51, 0xba, 0x3b, 0xc1, + 0x89, 0xc1, 0xaa, 0xbf, 0x01, 0xbd, 0x3f, 0xc2, 0x05, 0xbe, 0xcd, 0xbc, 0xc3, 0xc0, 0x3d, 0xc2, + 0xab, 0xc3, 0x1c, 0xbe, 0x49, 0xc1, 0x0e, 0xc0, 0x20, 0xc1, 0x88, 0xc2, 0xfc, 0xbf, 0x3f, 0xb9, + 0xf9, 0xb4, 0xc2, 0xb8, 0x94, 0xbe, 0xe1, 0xbf, 0x36, 0xbd, 0x24, 0xc2, 0x84, 0xc1, 0xc7, 0xc1, + 0x1f, 0x33, 0x2a, 0xbf, 0x4b, 0xc0, 0xa3, 0xbf, 0x57, 0xba, 0xbc, 0xba, 0x4f, 0xc0, 0xbe, 0x33, + 0x3d, 0xc3, 0x77, 0xc0, 0x65, 0xb4, 0x18, 0xbd, 0x51, 0xc1, 0xdc, 0xbe, 0xc8, 0xb9, 0x4c, 0xc0, + 0x16, 0x35, 0xbe, 0xbc, 0x31, 0xc1, 0xe4, 0xbd, 0x57, 0xbc, 0x49, 0xc1, 0xd4, 0xbd, 0xeb, 0xba, + 0x02, 0xc1, 0xa8, 0xbb, 0xcd, 0xc0, 0x7b, 0xc0, 0x21, 0xb2, 0x61, 0xc0, 0x8a, 0xc1, 0xe4, 0xbe, + 0x0f, 0xc2, 0xaf, 0xc0, 0x70, 0xc3, 0xd2, 0xbc, 0x67, 0xbd, 0xd9, 0xc1, 0x4e, 0xc2, 0x6e, 0xc1, + 0x1e, 0xc4, 0x09, 0xc3, 0x42, 0xbf, 0x50, 0xc1, 0x52, 0xbd, 0x77, 0xc3, 0x1d, 0xc0, 0x31, 0xbb, + 0xd2, 0xbe, 0x66, 0xc3, 0x9b, 0xbc, 0x4d, 0xbf, 0x66, 0xb6, 0x02, 0xc2, 0xbe, 0xc3, 0xd1, 0x28, + 0xef, 0xc2, 0x11, 0xbd, 0x9d, 0xc2, 0xd9, 0xbd, 0xb0, 0xbe, 0xd9, 0xbf, 0x49, 0xc2, 0x71, 0x9e, + 0x5b, 0xb5, 0x59, 0xc2, 0xf6, 0xbd, 0x4a, 0xb5, 0x12, 0xbd, 0x19, 0xbe, 0x73, 0xc3, 0xe5, 0xbc, + 0xec, 0xbc, 0x2d, 0xbf, 0x43, 0xbe, 0xfc, 0xc0, 0x68, 0xbc, 0x24, 0xc0, 0x7f, 0xc0, 0x8c, 0xc0, + 0x92, 0xba, 0x52, 0xba, 0x42, 0xc0, 0x18, 0xb9, 0x14, 0x3c, 0x11, 0xc2, 0xa2, 0xc2, 0x10, 0xbd, + 0xaa, 0xc0, 0x0f, 0xc0, 0x38, 0xc0, 0xa3, 0xc1, 0x58, 0xbe, 0x62, 0xc2, 0xe9, 0xc0, 0x36, 0xc0, + 0xc6, 0xc1, 0x21, 0xbc, 0xf5, 0xc2, 0x42, 0xbd, 0x35, 0xbc, 0xda, 0xc1, 0xcb, 0xbb, 0x5f, 0xba, + 0x2b, 0xbd, 0xff, 0xc2, 0x5f, 0xab, 0xc7, 0x2c, 0x41, 0xc0, 0x2e, 0xbe, 0x38, 0xc0, 0xf7, 0xc3, + 0x60, 0xbd, 0x73, 0xc2, 0x01, 0xbf, 0x3b, 0xc0, 0x8c, 0xc0, 0x88, 0xae, 0x26, 0xc0, 0x2a, 0xbf, + 0xd5, 0xc0, 0x9e, 0xc2, 0x75, 0xbe, 0x67, 0xc0, 0xc8, 0xbf, 0x7d, 0xbe, 0xf9, 0xc0, 0xaf, 0xbc, + 0x40, 0xba, 0x30, 0xbf, 0x19, 0xc1, 0x16, 0xc3, 0x10, 0xc0, 0x85, 0xb0, 0x31, 0xc3, 0xae, 0xbd, + 0xb0, 0xc0, 0xd4, 0xbd, 0x06, 0xc1, 0x72, 0xbf, 0x02, 0xc0, 0x83, 0xb7, 0x02, 0xc2, 0x56, 0xc2, + 0xa9, 0xc1, 0x7b, 0xbf, 0xce, 0xc0, 0x2a, 0xbf, 0x02, 0xc0, 0x97, 0xc1, 0x91, 0xba, 0xda, 0xb9, + 0xf2, 0xbd, 0xa5, 0xc1, 0xd3, 0xbf, 0x65, 0xbb, 0x32, 0xc0, 0x33, 0xbf, 0x93, 0xbb, 0x73, 0xc0, + 0xa2, 0xbf, 0xe6, 0xc2, 0x29, 0xc2, 0xbc, 0xc1, 0xfa, 0xc0, 0x3d, 0xc1, 0x28, 0xc2, 0xa4, 0xc2, + 0x44, 0xb9, 0x1d, 0xc4, 0x0d, 0xbf, 0x05, 0xc0, 0xe0, 0xc0, 0xc3, 0xbf, 0x25, 0x2c, 0xc3, 0xc1, + 0x03, 0xbf, 0x58, 0xbf, 0x21, 0xbe, 0x3c, 0xbd, 0x6f, 0xc3, 0x89, 0xc1, 0x14, 0xc0, 0xce, 0xc3, + 0xd3, 0xbd, 0xeb, 0xc1, 0x28, 0xc2, 0x79, 0xc1, 0x57, 0xbf, 0xe3, 0xbe, 0xa8, 0xbc, 0xca, 0xc0, + 0x5a, 0xbd, 0xaa, 0xbe, 0x40, 0xbd, 0x0d, 0xc1, 0x5b, 0xb9, 0x8f, 0xbc, 0xc5, 0xc1, 0xfd, 0xb9, + 0x1a, 0xc0, 0x6a, 0xc1, 0xac, 0xc1, 0x89, 0xbf, 0xf2, 0xbc, 0x7e, 0xc3, 0x04, 0xc2, 0xbe, 0xc0, + 0x3b, 0xc0, 0x2a, 0xc1, 0x4a, 0xc2, 0xa4, 0xc1, 0x60, 0xc2, 0x3b, 0xbd, 0x75, 0x35, 0xcc, 0xc0, + 0xbe, 0xc1, 0x74, 0xc0, 0x8e, 0xc0, 0xb6, 0xc0, 0xa1, 0xc0, 0x59, 0xc1, 0xbe, 0xc0, 0xe9, 0xbc, + 0x9f, 0xbe, 0x6e, 0xbe, 0x54, 0xc0, 0x28, 0xc2, 0x05, 0xbc, 0xf1, 0xc1, 0x26, 0xa7, 0x6b, 0xbe, + 0x4b, 0xbd, 0xc4, 0xb9, 0x48, 0xbe, 0x0b, 0xbb, 0x68, 0xbf, 0xe9, 0xbc, 0xe5, 0xbc, 0xdc, 0xc1, + 0xdc, 0xc4, 0xcd, 0xc1, 0xf7, 0xa4, 0xb1, 0x35, 0x32, 0xc0, 0x9c, 0xbe, 0x3a, 0xc0, 0x13, 0xc0, + 0x76, 0xb8, 0x47, 0xb9, 0x26, 0xc1, 0x25, 0xc2, 0x40, 0x38, 0x4c, 0xc2, 0xfb, 0x30, 0x32, 0xc0, + 0xb0, 0xb6, 0xaa, 0xbc, 0x7f, 0xc1, 0x42, 0xc0, 0xd5, 0xbf, 0x8d, 0xc1, 0xe0, 0xbe, 0x4b, 0xba, + 0x77, 0xbf, 0x16, 0xbe, 0xfc, 0xbf, 0x13, 0xc0, 0x52, 0xc0, 0x82, 0xc0, 0xf7, 0xbf, 0xe5, 0xb0, + 0x44, 0xc2, 0xe6, 0xbe, 0x8b, 0xba, 0x75, 0xbd, 0xb6, 0xc1, 0xcb, 0xbd, 0xb1, 0xc0, 0x28, 0xc3, + 0x09, 0xc3, 0xaa, 0xc0, 0xda, 0xbc, 0xde, 0xbd, 0x90, 0xb6, 0xeb, 0xc2, 0x13, 0xc0, 0x6e, 0xc2, + 0x40, 0xbd, 0x0a, 0xc0, 0xfb, 0xbc, 0x3c, 0xb8, 0xf1, 0xbf, 0x9f, 0xc0, 0xac, 0xc2, 0x8b, 0xc0, + 0x31, 0xc2, 0xbe, 0xc1, 0xc8, 0xbf, 0x19, 0xb9, 0x8f, 0xbc, 0x38, 0xbd, 0x2c, 0xc0, 0x4e, 0xc2, + 0xa9, 0xc3, 0x77, 0xc1, 0xa3, 0xbe, 0x2c, 0xc2, 0x67, 0xbe, 0x0b, 0xbe, 0xf1, 0xbc, 0xf6, 0xc0, + 0x58, 0xb7, 0x3a, 0xbf, 0xef, 0xbf, 0x6d, 0x3b, 0xe3, 0xc3, 0x04, 0xc4, 0x38, 0xc2, 0xdf, 0xbe, + 0x03, 0xbf, 0x88, 0xba, 0x13, 0xc0, 0x52, 0xbc, 0x85, 0xbe, 0x9a, 0xc4, 0x05, 0xbf, 0x96, 0xbb, + 0xab, 0xb3, 0x39, 0xb7, 0xfc, 0xc2, 0x64, 0xbf, 0x3a, 0xc2, 0xc1, 0xc1, 0xf3, 0xc1, 0x76, 0xbf, + 0x37, 0xbc, 0xd2, 0x33, 0xcb, 0xc0, 0x86, 0xc1, 0x10, 0xc1, 0x61, 0xc0, 0x60, 0xc1, 0xc8, 0xc0, + 0x36, 0xc0, 0x3d, 0xc0, 0xba, 0xb5, 0x60, 0xbc, 0x88, 0xbe, 0xe2, 0xbe, 0x52, 0xc1, 0xff, 0xc2, + 0xb7, 0xb1, 0x8f, 0xc0, 0x8a, 0xbd, 0xf6, 0xc0, 0xb7, 0xbe, 0x4f, 0xbe, 0x19, 0xc2, 0xa0, 0xc0, + 0xae, 0xbf, 0xf8, 0xc1, 0x94, 0xc3, 0xdc, 0xbd, 0x4b, 0xbf, 0x87, 0xbe, 0x43, 0xc0, 0x02, 0xc3, + 0xa2, 0xc2, 0x35, 0xbc, 0x47, 0xc3, 0xfc, 0x38, 0x0c, 0xbb, 0x71, 0xbd, 0xde, 0xc0, 0x2d, 0xbc, + 0x78, 0xbd, 0x65, 0xc2, 0x0e, 0xbc, 0x1c, 0xbc, 0x09, 0xc2, 0x22, 0xbe, 0xe2, 0xc1, 0xdd, 0xbb, + 0x58, 0xc0, 0x0e, 0xc0, 0x16, 0xc2, 0x80, 0xc1, 0xfc, 0xbc, 0x2c, 0xc2, 0x99, 0xc3, 0x07, 0xc1, + 0xa7, 0xbc, 0x4d, 0xc1, 0x4e, 0xc2, 0xb0, 0xba, 0x04, 0xbc, 0x27, 0xc0, 0x84, 0xbc, 0x68, 0xc0, + 0x91, 0xc2, 0x75, 0xb9, 0x54, 0xc0, 0x61, 0xc1, 0xdb, 0xbe, 0x77, 0xbb, 0x44, 0xbd, 0x80, 0xc2, + 0xf0, 0x2b, 0xe4, 0xbe, 0xcd, 0xb8, 0x5b, 0xc1, 0x21, 0xc0, 0x02, 0xba, 0xf2, 0xbd, 0x67, 0xc0, + 0xe6, 0xba, 0x58, 0xc2, 0x96, 0xbb, 0xa6, 0xc2, 0x44, 0xbf, 0x63, 0xc0, 0xde, 0xc0, 0x0d, 0xc1, + 0x72, 0xc1, 0x28, 0xc3, 0xd6, 0xc1, 0x1c, 0xb9, 0x4c, 0xbf, 0x49, 0xbf, 0xb8, 0xb4, 0xd5, 0xc2, + 0x9f, 0xc1, 0x53, 0xba, 0x09, 0xc2, 0xd8, 0x30, 0xd3, 0xc0, 0xd8, 0xbe, 0x28, 0xbe, 0x5e, 0xc0, + 0x2f, 0xc3, 0xf4, 0xbd, 0x3d, 0xbd, 0x37, 0xc0, 0xeb, 0xc0, 0x21, 0xc0, 0xe2, 0xb9, 0x20, 0xb9, + 0xa5, 0xc0, 0xe6, 0xbe, 0x16, 0xc4, 0x07, 0xbc, 0x93, 0xbd, 0x95, 0xc1, 0x91, 0xb5, 0xaa, 0xc1, + 0xa1, 0xbe, 0x8a, 0xba, 0xf4, 0xbc, 0xf1, 0xc1, 0x46, 0xc1, 0x8f, 0xbd, 0xa0, 0xbd, 0x21, 0xc0, + 0xc1, 0xc0, 0x9f, 0xbc, 0x3c, 0xc1, 0x61, 0xc1, 0xc4, 0xbe, 0x76, 0xbd, 0x69, 0xc0, 0xb0, 0xbe, + 0x21, 0xbc, 0x09, 0xc0, 0x86, 0xc1, 0x51, 0xbc, 0x7d, 0xbf, 0xad, 0xbf, 0xec, 0xbb, 0x98, 0xc0, + 0x0e, 0xc1, 0x13, 0xc1, 0x06, 0xc1, 0x38, 0xbd, 0x2e, 0xbe, 0xd1, 0xc0, 0x5c, 0xb4, 0xfd, 0xbd, + 0x49, 0xb0, 0x6b, 0xc0, 0x25, 0xc1, 0x7b, 0xbf, 0x91, 0xc0, 0x4a, 0xc4, 0x07, 0xc0, 0xf0, 0xbd, + 0x5a, 0xbf, 0x40, 0xc0, 0x17, 0xbf, 0xd4, 0xbf, 0xd2, 0xbe, 0x76, 0xc2, 0x33, 0xc2, 0x2a, 0xb2, + 0x28, 0xbd, 0x75, 0xc1, 0xa0, 0xbe, 0x0d, 0xc4, 0x57, 0xbc, 0x78, 0xc2, 0x2e, 0xc3, 0x62, 0xbe, + 0xfb, 0xbe, 0x48, 0xa9, 0x93, 0xc0, 0x9e, 0xc1, 0xaf, 0xc1, 0x76, 0xc0, 0x94, 0xc1, 0xfb, 0xbf, + 0xc8, 0xc1, 0xdc, 0xbe, 0xca, 0xbb, 0x23, 0xbe, 0xfd, 0xc4, 0x2c, 0xc0, 0x46, 0xc0, 0xd3, 0xc4, + 0xab, 0xc2, 0x84, 0xbb, 0x64, 0xc1, 0x2d, 0xb4, 0x25, 0xbd, 0x8c, 0xb8, 0xaa, 0xc1, 0x75, 0xc2, + 0x0f, 0xbf, 0x28, 0xc0, 0xde, 0xbf, 0x6e, 0xc2, 0xfc, 0xb7, 0x6d, 0xb9, 0x5c, 0xbe, 0xa4, 0xc4, + 0x27, 0xc0, 0xc4, 0xc2, 0x72, 0xb4, 0x43, 0xc2, 0xe8, 0xc2, 0xb5, 0xbd, 0x2b, 0xbe, 0xd6, 0xc3, + 0xc1, 0xb8, 0x5f, 0xc1, 0xde, 0xc0, 0x96, 0xbf, 0x99, 0xb9, 0x0e, 0xbd, 0x8b, 0xbb, 0x43, 0xbe, + 0xa3, 0xc1, 0x97, 0xbf, 0xa3, 0xbf, 0x08, 0xbf, 0x27, 0xbf, 0xae, 0xc1, 0x39, 0xbd, 0xf1, 0xbf, + 0x79, 0xc1, 0x54, 0xbf, 0xbc, 0xc2, 0xd6, 0xbe, 0x5a, 0xbc, 0x4d, 0xbe, 0x8d, 0xb9, 0xd2, 0xc2, + 0xe0, 0xc0, 0xd5, 0xc2, 0x7e, 0xbf, 0x31, 0xbf, 0x03, 0xbe, 0xa7, 0xbe, 0x22, 0xc0, 0x3a, 0xc0, + 0xf2, 0xbc, 0x39, 0xb9, 0x9c, 0x3c, 0x89, 0xbd, 0x2a, 0xc1, 0x02, 0xc0, 0x88, 0xc0, 0x07, 0xc2, + 0x92, 0xc1, 0xc3, 0xbb, 0x88, 0xbe, 0xe9, 0xba, 0x19, 0xbe, 0x70, 0xc1, 0xd4, 0xbc, 0xd5, 0xbc, + 0xb6, 0xbe, 0x1f, 0xc0, 0xdc, 0xbf, 0xa8, 0xc2, 0x88, 0xbf, 0xe5, 0xc0, 0x21, 0xc0, 0xeb, 0xbf, + 0xac, 0xbe, 0x3c, 0xc0, 0xb0, 0xc2, 0xdf, 0xc0, 0xb7, 0xc1, 0xa8, 0xc3, 0x2b, 0xb5, 0xd0, 0xb2, + 0x74, 0xbe, 0xe4, 0xb5, 0xb4, 0xbd, 0x44, 0xc1, 0x1c, 0xbb, 0x96, 0xc3, 0xfb, 0xba, 0xa2, 0xc3, + 0x84, 0xc1, 0x40, 0xbc, 0xe0, 0xbd, 0xd7, 0xbe, 0x80, 0xc1, 0x75, 0xc0, 0xb2, 0xc0, 0x7d, 0xc2, + 0xc0, 0xbc, 0x0e, 0xbc, 0xb9, 0xbe, 0x76, 0xb9, 0xc0, 0xc2, 0xcb, 0xbf, 0xef, 0xc0, 0x2f, 0xbe, + 0xb3, 0xbe, 0x22, 0xbe, 0x9b, 0xb8, 0xd4, 0xc0, 0x5b, 0xc1, 0xe8, 0xc1, 0x9a, 0xc0, 0x04, 0xbf, + 0x18, 0xbf, 0x87, 0xbc, 0x3e, 0xc0, 0x42, 0xc2, 0x24, 0xc0, 0xba, 0xbb, 0x1f, 0xc1, 0x4d, 0xbd, + 0xbe, 0xb9, 0x24, 0xc0, 0x22, 0xc0, 0x37, 0xbe, 0x61, 0xbd, 0xdd, 0xbb, 0xb8, 0xc1, 0x52, 0xbe, + 0x0e, 0xc0, 0x64, 0xb8, 0x4c, 0xbe, 0xd2, 0xba, 0xef, 0xc2, 0x82, 0xc3, 0x45, 0xb9, 0xa1, 0xba, + 0x63, 0xc0, 0x10, 0xc2, 0x14, 0xc2, 0xd1, 0xc1, 0x5d, 0xbf, 0x02, 0xbf, 0x1a, 0xac, 0x59, 0xc1, + 0x41, 0xbe, 0x99, 0xb4, 0x75, 0xc2, 0xf2, 0x37, 0xb7, 0xc0, 0x55, 0xc1, 0xb0, 0xba, 0x8d, 0xbe, + 0x65, 0xbd, 0x45, 0xc0, 0x1f, 0xbd, 0x77, 0xbc, 0x49, 0xc2, 0x39, 0xc1, 0xcb, 0xb8, 0x2d, 0xbe, + 0x90, 0xbb, 0x0e, 0xc2, 0x35, 0xc0, 0xad, 0xc3, 0x86, 0xba, 0xb5, 0xc2, 0x07, 0xc0, 0xcd, 0xbd, + 0x2f, 0xc1, 0x1c, 0xc1, 0x0d, 0xc2, 0x13, 0xc1, 0x16, 0xc1, 0xee, 0xba, 0x13, 0xba, 0xd7, 0xc4, + 0xf8, 0xc1, 0xfe, 0xba, 0xf1, 0xbe, 0xba, 0xbb, 0x67, 0xbf, 0xa4, 0xc4, 0xd2, 0xb5, 0x9b, 0xc2, + 0xdc, 0xc0, 0xe4, 0xbf, 0x94, 0xc0, 0x45, 0xbd, 0xf2, 0xc1, 0xa0, 0xbd, 0xd4, 0x33, 0x8b, 0xc3, + 0x51, 0xbf, 0x48, 0xbd, 0xc2, 0xb5, 0xcc, 0xc2, 0x05, 0xbf, 0x59, 0xc0, 0x18, 0xbe, 0x41, 0x32, + 0xf3, 0xc0, 0x0e, 0xbf, 0xe6, 0xba, 0xd8, 0xc3, 0x19, 0xc0, 0x2f, 0xbb, 0xb9, 0xbe, 0xb4, 0xc2, + 0x1e, 0xc0, 0x4a, 0xc1, 0xa2, 0x39, 0xad, 0xc2, 0x9a, 0xc2, 0x57, 0xc3, 0x64, 0xc0, 0xc5, 0xc3, + 0x89, 0xc3, 0x8f, 0xb6, 0x7b, 0xc2, 0x27, 0xc0, 0x41, 0xc0, 0x25, 0xc0, 0x7f, 0xc0, 0x3a, 0xc0, + 0x70, 0xc1, 0x5a, 0xb9, 0x99, 0xbd, 0x8e, 0x33, 0x65, 0xc1, 0x6d, 0xc0, 0x3c, 0xbe, 0x69, 0xbf, + 0x11, 0xc3, 0x26, 0xbc, 0x60, 0xc0, 0x52, 0xbf, 0xee, 0xc1, 0x9a, 0xbf, 0x27, 0xc0, 0xf7, 0xc0, + 0x81, 0xbe, 0xef, 0xc2, 0x7b, 0xbd, 0xc1, 0xc2, 0x2f, 0xc1, 0xcd, 0xbc, 0xa5, 0xc0, 0x0c, 0xbf, + 0x77, 0xc1, 0x60, 0xb8, 0xdc, 0xc0, 0x17, 0xb8, 0x67, 0xbd, 0xb0, 0xbc, 0x4f, 0xbf, 0x96, 0xc1, + 0x6e, 0xc1, 0xc2, 0xb5, 0x48, 0xbb, 0xcb, 0xbf, 0xc0, 0xc2, 0xba, 0xbf, 0x60, 0xba, 0xba, 0xb8, + 0x0f, 0xc4, 0x93, 0xc1, 0x2f, 0xc0, 0x69, 0xc1, 0x09, 0xc1, 0xa6, 0xb8, 0xe6, 0xbe, 0x02, 0xc1, + 0xdf, 0xc0, 0xca, 0xc0, 0x8b, 0xc0, 0x22, 0xc0, 0xa3, 0xc0, 0x5b, 0xbe, 0xea, 0xc3, 0x3d, 0xc0, + 0x87, 0xc1, 0xbe, 0xc3, 0x37, 0xc2, 0x86, 0xbd, 0x82, 0xbd, 0x59, 0xc0, 0x08, 0xbc, 0x10, 0xc2, + 0x81, 0xc1, 0xd3, 0xbc, 0xe7, 0xbd, 0xe5, 0xbe, 0x6c, 0xc0, 0x25, 0xbd, 0x41, 0x21, 0x62, 0xc1, + 0x2d, 0xbf, 0xdd, 0xc0, 0x53, 0xbf, 0x11, 0xbe, 0x33, 0xb7, 0x34, 0xb9, 0x5c, 0xc3, 0x5e, 0xc1, + 0x32, 0xc2, 0x0d, 0x34, 0xa7, 0xc0, 0xe3, 0xbc, 0xa2, 0xc2, 0x25, 0xc1, 0x1f, 0xc1, 0xa0, 0xbf, + 0xa3, 0xc0, 0x73, 0xc0, 0xe8, 0xbb, 0x4a, 0xc1, 0xbc, 0xc0, 0x47, 0xc1, 0x21, 0xc2, 0x4d, 0xc1, + 0x99, 0xbc, 0x90, 0xc1, 0x12, 0xc1, 0x98, 0xc0, 0x2e, 0xbc, 0x8c, 0xbc, 0x25, 0xbe, 0x13, 0xbc, + 0xae, 0xb9, 0x62, 0xc0, 0x41, 0xc0, 0x1b, 0xc4, 0x1a, 0xc1, 0x0d, 0xc3, 0xb5, 0xbd, 0x76, 0xc0, + 0x1e, 0xad, 0x64, 0xbf, 0xb5, 0xb9, 0xe8, 0xbf, 0x11, 0xc0, 0xf8, 0xbe, 0xc1, 0xc4, 0x16, 0xc1, + 0xa5, 0xc0, 0x23, 0xc0, 0x73, 0xbe, 0x9a, 0xbd, 0xd0, 0xc0, 0x5d, 0xbf, 0xd7, 0xbf, 0x84, 0xbf, + 0x61, 0xc3, 0x29, 0xc1, 0x32, 0xc2, 0xbb, 0xbc, 0x78, 0xc0, 0xe1, 0x31, 0xfe, 0xc0, 0xdd, 0x27, + 0x86, 0xb2, 0x59, 0xbc, 0x1f, 0x38, 0x10, 0xc2, 0xba, 0xbd, 0x78, 0xc1, 0x87, 0xc0, 0x64, 0xb5, + 0x62, 0xc1, 0x24, 0xc1, 0x41, 0xbd, 0x6f, 0xb4, 0x3b, 0xb9, 0x47, 0xc0, 0x87, 0xc0, 0x1d, 0xbe, + 0x56, 0xc2, 0x9f, 0xc0, 0x6a, 0xc0, 0xfa, 0xc0, 0x03, 0xc3, 0x39, 0xb3, 0x42, 0xc2, 0xc4, 0xc1, + 0x1a, 0xc4, 0xb6, 0xc0, 0x3d, 0xbf, 0x37, 0xba, 0x15, 0xbe, 0x0f, 0xc2, 0x5c, 0xc0, 0xb8, 0xbe, + 0x99, 0xbf, 0x66, 0xc1, 0xea, 0xbe, 0xf1, 0xc2, 0x3d, 0xc0, 0xd9, 0xbf, 0x29, 0xbf, 0x8e, 0xbe, + 0x70, 0xbb, 0x3a, 0xc1, 0xc8, 0xbf, 0x85, 0xbe, 0x1f, 0xc1, 0x50, 0xc2, 0xfa, 0xbd, 0x3f, 0xb9, + 0x36, 0xc3, 0x6f, 0xbf, 0x2e, 0xbe, 0x69, 0xc0, 0xd1, 0xc0, 0x01, 0xc0, 0xc1, 0xc1, 0x88, 0xbd, + 0x95, 0xbc, 0x91, 0xc2, 0x05, 0xc2, 0x2e, 0xc3, 0x39, 0xbf, 0xef, 0xc2, 0x78, 0xbd, 0x15, 0xc1, + 0x73, 0xbe, 0xff, 0xbe, 0x3b, 0xc0, 0xef, 0xbd, 0x22, 0xc0, 0x67, 0xbd, 0x20, 0xbb, 0xab, 0xbc, + 0xef, 0xb9, 0x80, 0xc0, 0x4d, 0xc1, 0xdb, 0xc0, 0xfe, 0xbd, 0x4f, 0xc0, 0x6a, 0xc3, 0x2c, 0xc0}; +unsigned char conv2d_winograd_fp16_ker1[] = { + 0x28, 0xbe, 0x50, 0xbf, 0x4b, 0xbe, 0x1e, 0xc4, 0x60, 0xbd, 0xd3, 0xbd, 0xb0, 0xb6, 0xab, 0xb3, + 0xd5, 0xbc, 0x5f, 0xbd, 0xaa, 0xbf, 0x66, 0xc3, 0x9e, 0xc2, 0xaa, 0xbe, 0x16, 0xbe, 0xd2, 0x33, + 0x1a, 0xc1, 0xbb, 0xaf, 0x49, 0xc1, 0x9d, 0xc2, 0x19, 0xc1, 0xac, 0xc1, 0x8b, 0xba, 0xba, 0xb5, + 0x43, 0xc2, 0xad, 0xbd, 0xe1, 0xbf, 0x4a, 0xb5, 0x72, 0xbf, 0xa4, 0xc1, 0xde, 0xbd, 0xf6, 0xc0, + 0xc8, 0xc0, 0x3f, 0xc0, 0x57, 0xba, 0x68, 0xbc, 0x02, 0xc0, 0xa1, 0xc0, 0xf1, 0xbf, 0x4b, 0xbf, + 0x91, 0xc0, 0xdf, 0xbd, 0xdc, 0xbe, 0x11, 0xc2, 0x33, 0xbf, 0xf1, 0xc1, 0x38, 0xbd, 0x71, 0xbd, + 0x42, 0xb9, 0x84, 0xc0, 0xd4, 0xbd, 0xe9, 0xc0, 0x28, 0xc2, 0xe5, 0xbc, 0xf1, 0xbc, 0xe2, 0xc1, + 0xe8, 0xbb, 0x84, 0xbb, 0xe4, 0xbe, 0x5f, 0xba, 0xc3, 0xc1, 0x13, 0xc0, 0xdf, 0xbe, 0x07, 0xc1, + 0x2a, 0x3c, 0x16, 0x3a, 0xf0, 0x3c, 0xd9, 0x3f, 0xeb, 0x3c, 0xc3, 0x3c, 0x95, 0x3b, 0x7f, 0x3c, + 0x2e, 0x3e, 0x7d, 0x3b, 0xd0, 0x3d, 0x38, 0x3b, 0xb6, 0x3d, 0x7a, 0x39, 0xd2, 0x3a, 0x28, 0x3c, + 0xf2, 0x3c, 0xae, 0x37, 0x87, 0x3d, 0xfb, 0x3c, 0x79, 0x3c, 0xba, 0x3f, 0x24, 0x3d, 0x03, 0x38, + 0x2c, 0x40, 0x16, 0x3b, 0xcc, 0x3d, 0x32, 0x3d, 0xfc, 0x3d, 0x2e, 0x3c, 0xe8, 0x3c, 0x91, 0x3f, + 0xcf, 0x3e, 0xa6, 0x3c, 0xde, 0x31, 0xe4, 0x3c, 0x2c, 0x3c, 0x12, 0x3d, 0x84, 0x3d, 0xf8, 0x3f, + 0xa1, 0x3d, 0x38, 0x3f, 0x1a, 0x39, 0x45, 0x3f, 0xd8, 0x3d, 0x99, 0x3c, 0x4e, 0x3f, 0xac, 0x3a, + 0x16, 0x3d, 0x0e, 0x3d, 0xa1, 0x38, 0x09, 0x3c, 0x47, 0x40, 0x88, 0x3d, 0x35, 0x3e, 0x86, 0x3d, + 0x82, 0x3c, 0xa9, 0x3c, 0x6f, 0x3f, 0x44, 0x38, 0x62, 0x3e, 0xe6, 0x3e, 0x6d, 0x3f, 0xe1, 0x3e, + 0xd5, 0x38, 0xf8, 0x34, 0xdf, 0xb1, 0x40, 0x3a, 0xa2, 0x34, 0xa0, 0xa6, 0x00, 0x17, 0xdb, 0x34, + 0x7a, 0x33, 0x1e, 0x31, 0x46, 0x3a, 0xcc, 0x39, 0x81, 0x38, 0x34, 0x36, 0xe7, 0xae, 0x78, 0xad, + 0x1e, 0x36, 0x90, 0xa8, 0x75, 0xac, 0xfa, 0x35, 0x39, 0x3c, 0x49, 0x34, 0x21, 0x39, 0x36, 0xb4, + 0x3c, 0x3d, 0x9d, 0x38, 0x20, 0x33, 0xb2, 0xb5, 0x2c, 0x31, 0xca, 0x3c, 0x27, 0x35, 0x4c, 0x38, + 0xd4, 0x2f, 0xa4, 0xb1, 0xa7, 0x34, 0xce, 0x32, 0xbd, 0x39, 0xc7, 0x39, 0xe5, 0x35, 0xf7, 0x36, + 0x62, 0x33, 0x2c, 0x31, 0x3b, 0x3a, 0x41, 0x3a, 0xe8, 0x38, 0x7e, 0x38, 0xf0, 0x2f, 0x42, 0x33, + 0x0e, 0x3a, 0x5e, 0x38, 0xea, 0x30, 0x66, 0x38, 0xfc, 0x34, 0xfc, 0x2d, 0xfe, 0x39, 0xad, 0x37, + 0x88, 0x2e, 0x57, 0x3a, 0x98, 0x32, 0x0f, 0x38, 0x51, 0x3b, 0xa5, 0x38, 0x9c, 0x3b, 0x1d, 0x35, + 0x52, 0xb0, 0x67, 0xac, 0xe6, 0xaf, 0x46, 0xb2, 0xee, 0xb0, 0x1e, 0xb0, 0x1b, 0xb0, 0xa1, 0xb1, + 0x80, 0xb2, 0xa2, 0xae, 0x30, 0xb2, 0x2f, 0xaa, 0x39, 0xb0, 0x44, 0xac, 0x97, 0xac, 0x1c, 0xb1, + 0xa6, 0xaf, 0x3c, 0xac, 0x68, 0xaf, 0x18, 0xae, 0x57, 0xb0, 0xae, 0xb2, 0x52, 0xb2, 0x6b, 0xaa, + 0x63, 0xb4, 0x52, 0xaf, 0x35, 0xb1, 0x51, 0xb1, 0x74, 0xb1, 0xda, 0xaf, 0xd7, 0xb0, 0x4b, 0xb3, + 0xd1, 0xb1, 0x12, 0xae, 0x01, 0xa4, 0x09, 0xb1, 0x04, 0xb0, 0xc6, 0xb0, 0x16, 0xb1, 0x28, 0xb4, + 0xb0, 0xb0, 0x5a, 0xb3, 0xf4, 0xac, 0xbe, 0xb2, 0x13, 0xb2, 0x7f, 0xae, 0x93, 0xb3, 0xd6, 0xad, + 0x9e, 0xb2, 0x88, 0xb0, 0xe2, 0xa9, 0x34, 0xae, 0x7b, 0xb3, 0x7b, 0xb1, 0x54, 0xb3, 0x42, 0xb0, + 0x86, 0xb0, 0xdb, 0xb1, 0x6a, 0xb3, 0x0b, 0xad, 0x0c, 0xb2, 0x08, 0xb3, 0x4d, 0xb4, 0x16, 0xb2, + 0xd8, 0xad, 0x12, 0xa6, 0xb0, 0x24, 0x00, 0xad, 0xb1, 0xab, 0x48, 0x9f, 0x50, 0xa8, 0x01, 0xae, + 0x9d, 0xac, 0xaa, 0xa6, 0x0b, 0xb0, 0xd2, 0xa7, 0xd5, 0xa9, 0xb9, 0xa8, 0x38, 0x26, 0x0c, 0xaa, + 0x5e, 0xa8, 0x7e, 0xa3, 0x87, 0x27, 0x1d, 0xa0, 0x23, 0xb0, 0x68, 0xa9, 0x43, 0xb0, 0xbe, 0x26, + 0x48, 0xb2, 0x58, 0xad, 0x25, 0xa9, 0x00, 0x91, 0xbe, 0xa8, 0x69, 0xb0, 0xc7, 0xab, 0xea, 0xad, + 0x10, 0xa6, 0x00, 0x29, 0xc1, 0xa6, 0x36, 0xab, 0xf2, 0xad, 0x0e, 0xae, 0x6c, 0xab, 0xa9, 0xae, + 0x60, 0xa7, 0x31, 0xac, 0xdc, 0xad, 0xdb, 0xae, 0xb9, 0xae, 0x78, 0xa9, 0x42, 0xac, 0xc8, 0xa7, + 0xf8, 0xb0, 0x7a, 0xac, 0x0c, 0x9b, 0x89, 0xaa, 0x8a, 0xaa, 0x6c, 0xa9, 0xc3, 0xb0, 0x81, 0xa9, + 0xf5, 0xa8, 0xaa, 0xb0, 0x40, 0xac, 0xe1, 0xac, 0xbe, 0xaf, 0xbe, 0xae, 0xb5, 0xb1, 0x6b, 0xaa, + 0x50, 0xab, 0x0e, 0xab, 0xc9, 0xac, 0x3d, 0xb0, 0x27, 0xac, 0x6e, 0xac, 0x70, 0xa9, 0x6c, 0xa9, + 0xcf, 0xac, 0x19, 0xab, 0xe7, 0xac, 0x89, 0xad, 0x81, 0xae, 0x39, 0xaa, 0x82, 0xab, 0x9a, 0xa8, + 0x61, 0xad, 0x3c, 0xa5, 0x30, 0xae, 0x37, 0xae, 0x8f, 0xac, 0x72, 0xaf, 0xe2, 0xaa, 0x2f, 0xa7, + 0x4e, 0xaf, 0x5b, 0xaa, 0x66, 0xad, 0x84, 0xab, 0x72, 0xad, 0x90, 0xac, 0x41, 0xac, 0xc2, 0xae, + 0x8a, 0xae, 0x33, 0xad, 0x36, 0xa4, 0xe2, 0xab, 0x10, 0xac, 0xef, 0xac, 0x21, 0xad, 0x60, 0xae, + 0xa0, 0xad, 0xc5, 0xad, 0x78, 0xa9, 0xf8, 0xae, 0xef, 0xac, 0x7a, 0xad, 0xad, 0xad, 0x8b, 0xaa, + 0x4c, 0xaa, 0x01, 0xad, 0xa4, 0xa9, 0x99, 0xac, 0x15, 0xb0, 0x8c, 0xac, 0x71, 0xac, 0x11, 0xae, + 0x5c, 0xab, 0x54, 0xaa, 0x22, 0xae, 0xe4, 0xa6, 0x2c, 0xae, 0xd8, 0xad, 0x87, 0xad, 0x8d, 0xae, + 0x84, 0xa8, 0x2c, 0xa8, 0xfc, 0x9b, 0xb3, 0xac, 0x93, 0xa4, 0x50, 0xa0, 0xf0, 0x1c, 0x70, 0x95, + 0xe9, 0xa0, 0x45, 0xa4, 0x86, 0xa9, 0xf7, 0xac, 0x79, 0xab, 0x52, 0xa8, 0x75, 0xa1, 0x30, 0x25, + 0x4c, 0xa9, 0x72, 0x1d, 0x2f, 0xa6, 0xdb, 0xaa, 0x5c, 0xac, 0x3d, 0xa8, 0x89, 0xa5, 0x36, 0x21, + 0xd0, 0xac, 0x61, 0xa8, 0xe8, 0xa5, 0x29, 0x26, 0xb4, 0xa4, 0x0c, 0xad, 0x6c, 0xa5, 0xd7, 0xa8, + 0xea, 0xa5, 0x4a, 0xa3, 0x96, 0xa5, 0xa8, 0xa1, 0x0d, 0xaa, 0x60, 0xaa, 0x98, 0xa7, 0x94, 0xa5, + 0x73, 0xa7, 0x14, 0xa0, 0x60, 0xaa, 0x50, 0xab, 0x72, 0xa8, 0x30, 0xab, 0x58, 0x9b, 0x50, 0xa5, + 0x02, 0xa6, 0x6a, 0xa9, 0xd8, 0xa5, 0x42, 0xaa, 0xa2, 0xa8, 0xc6, 0x9e, 0x7f, 0xa7, 0x5f, 0xaa, + 0x56, 0x9e, 0xe2, 0xa7, 0xc0, 0xa2, 0x90, 0xa6, 0xfc, 0xab, 0x5f, 0xa8, 0x43, 0xa9, 0x25, 0xa8, + 0x53, 0xc0, 0xab, 0xb8, 0x51, 0xba, 0x1d, 0xc0, 0x26, 0xc0, 0xa8, 0xbc, 0xe0, 0xbe, 0xf3, 0xc1, + 0x97, 0xc1, 0x7e, 0xbc, 0x3d, 0xc2, 0xd1, 0x28, 0xaf, 0xbc, 0xfd, 0xb9, 0xe5, 0xb0, 0xc8, 0xc0, + 0x5d, 0xbc, 0x08, 0xbb, 0xf9, 0xb4, 0x5b, 0xb5, 0xb0, 0xc0, 0x3b, 0xc0, 0x09, 0xc3, 0xb7, 0xb1, + 0x83, 0xc4, 0x7d, 0xbf, 0x2a, 0xbf, 0x2d, 0xbf, 0x7b, 0xbf, 0x74, 0xc0, 0x0a, 0xc0, 0xf8, 0xc1, + 0xdc, 0xbe, 0x32, 0xae, 0x65, 0xb4, 0x42, 0xc0, 0xd3, 0xbf, 0x54, 0xc0, 0xc8, 0xbf, 0x47, 0xc3, + 0x9b, 0xbd, 0xea, 0xc1, 0xe4, 0xbd, 0xa3, 0xc1, 0xbc, 0xc1, 0x0b, 0xbb, 0x2c, 0xc2, 0x1c, 0xbc, + 0xd1, 0xc3, 0x43, 0xbf, 0x21, 0xb2, 0x35, 0xbc, 0xe0, 0xc0, 0x32, 0xc0, 0xe3, 0xc3, 0xfc, 0xbc, + 0x1d, 0xbf, 0xee, 0xc2, 0xd9, 0xc1, 0x2e, 0xbe, 0x89, 0xc1, 0x4c, 0xc2, 0x9a, 0xc4, 0x27, 0xc0, + 0x94, 0x3c, 0x42, 0x3d, 0xfa, 0x3b, 0x32, 0x40, 0x9d, 0x3d, 0xa8, 0x3e, 0xb2, 0x3b, 0x70, 0x3b, + 0xc6, 0x3a, 0x2c, 0x3c, 0x97, 0x3d, 0xef, 0x3d, 0x55, 0x3e, 0xe4, 0x3c, 0xf0, 0x3c, 0x5e, 0x3c, + 0x2f, 0x3f, 0x36, 0x3c, 0x6d, 0x3e, 0xb9, 0x3d, 0x38, 0x3f, 0x4b, 0x3d, 0x7a, 0x3c, 0x7c, 0x39, + 0x69, 0x3f, 0xd6, 0x3c, 0xa2, 0x3d, 0x8c, 0x39, 0xb5, 0x3b, 0x80, 0x3e, 0xbe, 0x3c, 0x19, 0x3d, + 0xd3, 0x3c, 0xa0, 0x3c, 0xbc, 0x3a, 0xd1, 0x3c, 0xff, 0x3c, 0x8a, 0x3e, 0xc8, 0x3e, 0xf7, 0x3c, + 0x42, 0x3e, 0x26, 0x3e, 0x13, 0x3c, 0xc4, 0x3e, 0x6b, 0x3c, 0x18, 0x3c, 0xd0, 0x3d, 0x4c, 0x3c, + 0x29, 0x3c, 0xb6, 0x3d, 0x4a, 0x3c, 0x9e, 0x3e, 0x46, 0x3e, 0x02, 0x40, 0x6c, 0x3b, 0x6a, 0x3d, + 0x46, 0x3c, 0xbf, 0x3c, 0x4e, 0x3e, 0xf7, 0x3c, 0xc0, 0x3d, 0xc9, 0x39, 0x9e, 0x3b, 0xa0, 0x3d, + 0x89, 0xba, 0x43, 0xba, 0x2c, 0xba, 0x4f, 0xbc, 0xbf, 0xba, 0x61, 0xbb, 0x26, 0xba, 0x14, 0xbb, + 0x42, 0xbb, 0x00, 0xbb, 0xd8, 0xbb, 0x5c, 0xbb, 0xaf, 0xba, 0x34, 0xba, 0xb4, 0xba, 0x7a, 0xbb, + 0x8e, 0xba, 0x0f, 0xba, 0x33, 0xba, 0x89, 0xba, 0xbc, 0xbb, 0x86, 0xbc, 0xb0, 0xbb, 0xd8, 0xb9, + 0x70, 0xbc, 0x10, 0xbb, 0xf3, 0xba, 0xfc, 0xb9, 0xa9, 0xbb, 0x8b, 0xbb, 0x34, 0xba, 0x32, 0xbc, + 0xbc, 0xbb, 0x32, 0xba, 0x5f, 0xb9, 0x5d, 0xb8, 0x2d, 0xba, 0x26, 0xbb, 0xbc, 0xbb, 0xdb, 0xba, + 0x06, 0xbd, 0x26, 0xbc, 0x3c, 0xb9, 0x48, 0xbc, 0x38, 0xbc, 0xcf, 0xb8, 0x23, 0xbc, 0x51, 0xba, + 0x5a, 0xbb, 0x85, 0xbb, 0x27, 0xba, 0x32, 0xbb, 0x9a, 0xbb, 0xe4, 0xba, 0x26, 0xbb, 0x5a, 0xbc, + 0xf0, 0xba, 0x90, 0xbb, 0x60, 0xbc, 0x0e, 0xba, 0x4b, 0xbc, 0x50, 0xb9, 0x74, 0xba, 0x9a, 0xba, + 0x67, 0xb4, 0x32, 0xb6, 0x80, 0xb4, 0x0a, 0xb5, 0x68, 0xb6, 0xcf, 0xb4, 0xce, 0xad, 0x14, 0xaf, + 0x1e, 0xad, 0x46, 0xb1, 0xa8, 0xb7, 0x78, 0xb2, 0x9e, 0xb3, 0xfe, 0xb4, 0x90, 0xb2, 0x81, 0xb2, + 0xe4, 0xb5, 0x85, 0xb2, 0x1b, 0xb2, 0x00, 0xb5, 0x54, 0xb7, 0x60, 0xb3, 0x77, 0xb3, 0xfc, 0x29, + 0xf6, 0xb8, 0xd0, 0xb4, 0x57, 0xb5, 0x6a, 0xb0, 0x6a, 0xac, 0x4d, 0xb7, 0x0d, 0xb0, 0x48, 0xb5, + 0xa0, 0xa6, 0xf6, 0xb3, 0x8a, 0xaf, 0x2e, 0xb1, 0x64, 0xb4, 0x34, 0xb7, 0xeb, 0xb0, 0x18, 0xad, + 0x56, 0xb2, 0xcd, 0xb6, 0xfe, 0xb4, 0xe7, 0xb6, 0x22, 0xb3, 0xd3, 0xb3, 0x22, 0xb3, 0xa3, 0xb3, + 0xf2, 0xb5, 0x8f, 0xb7, 0xec, 0xb2, 0x32, 0xb5, 0x82, 0xb1, 0xde, 0xb8, 0xe4, 0xb8, 0x0e, 0xb5, + 0x78, 0xb4, 0xd8, 0xb4, 0x97, 0xb7, 0x64, 0xb8, 0xcf, 0xb6, 0x1a, 0xb1, 0x68, 0xb5, 0x54, 0xb5, + 0x48, 0x2e, 0xf3, 0x2d, 0x2d, 0x2e, 0xe0, 0x2e, 0x62, 0x2e, 0x44, 0x2e, 0x9d, 0x2d, 0xdc, 0x2e, + 0x28, 0x2f, 0xb4, 0x2e, 0xf6, 0x2f, 0x52, 0x2e, 0x68, 0x2d, 0xd5, 0x2d, 0x12, 0x2e, 0x4c, 0x2f, + 0x36, 0x2d, 0xae, 0x2d, 0x9f, 0x2c, 0xca, 0x2d, 0xe6, 0x2e, 0x64, 0x30, 0x96, 0x2f, 0x68, 0x2d, + 0x57, 0x30, 0xde, 0x2e, 0x68, 0x2e, 0x24, 0x2e, 0x5c, 0x2f, 0x0b, 0x2f, 0x51, 0x2d, 0x34, 0x30, + 0xca, 0x2e, 0xc4, 0x2d, 0x08, 0x2d, 0x60, 0x2a, 0xa0, 0x2d, 0x88, 0x2e, 0x29, 0x2e, 0xd4, 0x2d, + 0xad, 0x30, 0x05, 0x30, 0x1a, 0x2d, 0x06, 0x30, 0x3e, 0x30, 0x5f, 0x2c, 0x8c, 0x2f, 0x0c, 0x2e, + 0xcc, 0x2f, 0x7e, 0x2f, 0xc9, 0x2d, 0x25, 0x2e, 0x55, 0x2e, 0xf0, 0x2d, 0x47, 0x30, 0x49, 0x30, + 0xf2, 0x2e, 0x82, 0x2f, 0x54, 0x30, 0x60, 0x2e, 0x4c, 0x30, 0x58, 0x2d, 0xcb, 0x2e, 0xfe, 0x2d, + 0xa0, 0x29, 0xd6, 0x2a, 0x14, 0x2a, 0xfd, 0x27, 0x19, 0x2b, 0x94, 0x28, 0x1c, 0x25, 0xa3, 0x27, + 0x76, 0x27, 0x51, 0x28, 0xbf, 0x2c, 0xe4, 0x26, 0x54, 0x26, 0xbc, 0x29, 0x09, 0x28, 0x3a, 0x29, + 0xa8, 0x28, 0x45, 0x28, 0x10, 0x23, 0x20, 0x29, 0x49, 0x2b, 0x06, 0x2a, 0xdb, 0x29, 0xd8, 0x1e, + 0x8c, 0x2d, 0x45, 0x2a, 0xf7, 0x29, 0x7a, 0x28, 0xb2, 0x26, 0xdc, 0x2b, 0xab, 0x24, 0x9e, 0x2b, + 0x28, 0x22, 0xce, 0x28, 0xf1, 0x25, 0xd9, 0x21, 0xe2, 0x28, 0x62, 0x2b, 0xa0, 0x23, 0xdc, 0x22, + 0x2a, 0x29, 0x1f, 0x2c, 0xd5, 0x29, 0xea, 0x2b, 0x52, 0x2a, 0x2d, 0x28, 0xb5, 0x28, 0x0d, 0x29, + 0x4b, 0x2c, 0x80, 0x2c, 0x7f, 0x28, 0xee, 0x28, 0x68, 0x25, 0x52, 0x2c, 0xc0, 0x2e, 0x42, 0x2b, + 0x5d, 0x2a, 0xcc, 0x2a, 0xb2, 0x2c, 0x0b, 0x2d, 0x74, 0x2c, 0x3b, 0x28, 0x96, 0x2b, 0xae, 0x29, + 0xeb, 0x29, 0xf1, 0x29, 0x60, 0x29, 0x92, 0x2c, 0x66, 0x2a, 0x7e, 0x2b, 0x99, 0x29, 0x0d, 0x2a, + 0x08, 0x2a, 0x29, 0x2a, 0xeb, 0x2a, 0x42, 0x2b, 0x02, 0x2b, 0xd5, 0x29, 0x54, 0x2a, 0x7e, 0x2a, + 0x2c, 0x2b, 0x8e, 0x29, 0xd7, 0x2a, 0x79, 0x2a, 0xc2, 0x2b, 0xe3, 0x2b, 0xa0, 0x2a, 0x0b, 0x29, + 0x24, 0x2c, 0x57, 0x2a, 0xa3, 0x2a, 0xd9, 0x28, 0x8b, 0x2a, 0x43, 0x2b, 0x0c, 0x2a, 0x3a, 0x2b, + 0x2b, 0x2b, 0xca, 0x29, 0xd4, 0x28, 0xee, 0x28, 0xee, 0x29, 0x0e, 0x2b, 0x01, 0x2c, 0xa2, 0x2a, + 0x86, 0x2c, 0x93, 0x2b, 0xd0, 0x28, 0x06, 0x2c, 0x10, 0x2b, 0xad, 0x28, 0xb5, 0x2b, 0xb2, 0x29, + 0x08, 0x2a, 0xcd, 0x2a, 0xa1, 0x29, 0x53, 0x2b, 0xa2, 0x2b, 0x6f, 0x2b, 0x4a, 0x29, 0x9b, 0x2b, + 0x00, 0x2a, 0x95, 0x2a, 0xda, 0x2b, 0x67, 0x29, 0x88, 0x2b, 0x7a, 0x28, 0x5c, 0x29, 0x6f, 0x2a, + 0xe9, 0x24, 0xd2, 0x26, 0x7c, 0x24, 0x43, 0x28, 0x21, 0x27, 0x08, 0x27, 0x09, 0x21, 0x66, 0x20, + 0xea, 0x1d, 0x78, 0x22, 0x6b, 0x27, 0x53, 0x25, 0x5c, 0x26, 0xba, 0x25, 0x8d, 0x24, 0x3c, 0x23, + 0x4a, 0x28, 0x06, 0x24, 0x33, 0x26, 0x7e, 0x26, 0x80, 0x28, 0x3d, 0x24, 0xe0, 0x23, 0x58, 0x10, + 0x27, 0x29, 0x37, 0x25, 0x6f, 0x26, 0x8d, 0x1f, 0xd6, 0x1e, 0x26, 0x28, 0x96, 0x23, 0x40, 0x25, + 0x9a, 0x20, 0xd8, 0x24, 0x26, 0x21, 0xf1, 0x24, 0x7f, 0x25, 0x38, 0x28, 0x9c, 0x25, 0xa2, 0x22, + 0x8e, 0x24, 0x52, 0x27, 0x40, 0x25, 0xee, 0x27, 0xf2, 0x22, 0xca, 0x24, 0x08, 0x25, 0x59, 0x24, + 0x10, 0x25, 0x9e, 0x27, 0x30, 0x24, 0x4a, 0x27, 0x4e, 0x25, 0xd1, 0x29, 0xf7, 0x26, 0x54, 0x25, + 0x77, 0x24, 0xf2, 0x24, 0xc7, 0x27, 0x12, 0x28, 0xc8, 0x26, 0xfc, 0x20, 0xb9, 0x24, 0x8e, 0x26, + 0x40, 0x3d, 0x46, 0x3d, 0x7c, 0x3d, 0x24, 0x3c, 0x95, 0x3d, 0x5a, 0x3c, 0xc5, 0x3b, 0x3d, 0x3d, + 0x80, 0x3d, 0x30, 0x3d, 0x8a, 0x3f, 0x2c, 0x3c, 0xaa, 0x3a, 0xe5, 0x3c, 0x74, 0x3c, 0xe1, 0x3d, + 0x04, 0x3b, 0x71, 0x3c, 0x88, 0x38, 0x71, 0x3c, 0x9c, 0x3d, 0xf8, 0x3e, 0x46, 0x3e, 0xd4, 0x3a, + 0x14, 0x40, 0xd3, 0x3d, 0x32, 0x3d, 0x33, 0x3d, 0x64, 0x3d, 0x18, 0x3e, 0xbf, 0x3a, 0x52, 0x3f, + 0x1c, 0x3c, 0x97, 0x3c, 0x7a, 0x3b, 0x34, 0x36, 0x6c, 0x3c, 0x8e, 0x3d, 0x9e, 0x3a, 0xed, 0x3a, + 0xd4, 0x3e, 0x04, 0x3f, 0x9f, 0x3c, 0xc0, 0x3e, 0x16, 0x3f, 0x0a, 0x3b, 0x82, 0x3d, 0xf5, 0x3c, + 0x76, 0x3f, 0x02, 0x3f, 0x94, 0x3c, 0x67, 0x3c, 0xab, 0x3b, 0x36, 0x3d, 0xeb, 0x40, 0x3a, 0x3f, + 0x0e, 0x3e, 0x7c, 0x3e, 0xd0, 0x3f, 0xca, 0x3e, 0xbe, 0x3f, 0x86, 0x3c, 0x7e, 0x3e, 0xce, 0x3c, + 0x64, 0x33, 0xf1, 0x36, 0x8c, 0x36, 0x4a, 0x38, 0x60, 0xa7, 0x9b, 0x35, 0x1b, 0x37, 0xd5, 0x39, + 0xe0, 0x37, 0x58, 0x2f, 0xbc, 0x3a, 0xc6, 0x3b, 0xec, 0x3a, 0x1e, 0x39, 0x8f, 0x35, 0x00, 0x27, + 0x21, 0x3a, 0xe2, 0x34, 0xa6, 0x39, 0x40, 0x3a, 0x60, 0x33, 0xc7, 0x37, 0x1b, 0x38, 0x60, 0x32, + 0x1b, 0x3a, 0x76, 0x33, 0xa4, 0x3a, 0x2e, 0x30, 0xa5, 0x2c, 0xb0, 0x32, 0x04, 0x3c, 0x3a, 0x38, + 0x57, 0x30, 0x0d, 0x38, 0x7b, 0x37, 0x8c, 0x34, 0xc0, 0x1e, 0x26, 0x37, 0x5a, 0x39, 0x20, 0x38, + 0x8e, 0x39, 0x85, 0x3a, 0x95, 0x39, 0xfc, 0x32, 0x78, 0x39, 0x0a, 0x3c, 0x36, 0x38, 0x80, 0x9e, + 0x5c, 0x35, 0xca, 0x31, 0x80, 0x39, 0xc0, 0x39, 0xec, 0x2d, 0x9c, 0x39, 0x98, 0xb1, 0x57, 0x3b, + 0x0c, 0x3c, 0x39, 0x36, 0x60, 0x33, 0x56, 0x39, 0x45, 0x39, 0x9a, 0x37, 0x8e, 0x31, 0x1d, 0x3b, + 0xc0, 0xb4, 0x8c, 0xaf, 0xfa, 0xb5, 0x15, 0xb8, 0xf1, 0xaf, 0xcd, 0xb2, 0x1d, 0xb6, 0x92, 0xb5, + 0x22, 0xb9, 0xf3, 0xb1, 0xc1, 0xb5, 0x60, 0xb1, 0x06, 0xb7, 0x4a, 0xb5, 0xfa, 0xae, 0x64, 0xb4, + 0x2a, 0xb4, 0xa5, 0xb3, 0x1b, 0xb5, 0x46, 0xaa, 0x95, 0xaf, 0x4c, 0xb6, 0xd6, 0xb5, 0x54, 0xb0, + 0x74, 0xb9, 0xf0, 0xac, 0xce, 0xb3, 0x90, 0xb5, 0xb8, 0xb2, 0x56, 0xb1, 0xb4, 0xb4, 0x80, 0xb4, + 0x74, 0xb4, 0x1a, 0xb4, 0xbe, 0xae, 0x4e, 0xb2, 0x20, 0xb4, 0x2e, 0xb1, 0xed, 0xb5, 0xe0, 0xb6, + 0x2c, 0xb5, 0xfe, 0xb7, 0xbc, 0xb5, 0x2c, 0xb6, 0x04, 0xb6, 0x82, 0xb5, 0x6a, 0xb6, 0x1d, 0x2c, + 0xee, 0xb5, 0xa0, 0xb2, 0x5e, 0xb3, 0x99, 0xab, 0x1d, 0xb4, 0x81, 0xb6, 0x3c, 0xab, 0x2d, 0xb6, + 0x91, 0xb8, 0x8e, 0xb4, 0xd6, 0xb5, 0xdb, 0xb6, 0x8e, 0xb8, 0x24, 0xb5, 0xa9, 0xb5, 0x22, 0xb8, + 0x4c, 0xb0, 0xe8, 0x1c, 0x58, 0x2e, 0x80, 0xa1, 0x25, 0xb0, 0xf3, 0x29, 0xd8, 0xad, 0x0e, 0xb2, + 0x84, 0xa9, 0xa0, 0xa6, 0x0e, 0xae, 0x80, 0xa9, 0x2b, 0xb1, 0xe8, 0xad, 0x03, 0x2d, 0x58, 0x26, + 0x10, 0xb4, 0xbc, 0x20, 0x21, 0xb0, 0x48, 0xb1, 0x1c, 0xb5, 0x8b, 0xad, 0x67, 0xae, 0x84, 0x2f, + 0x70, 0xb5, 0x80, 0xac, 0x75, 0xb4, 0x58, 0x25, 0xd7, 0x2a, 0xeb, 0xb0, 0x7e, 0xb4, 0xd4, 0xa4, + 0x10, 0x28, 0x56, 0xab, 0x42, 0xb4, 0x2f, 0x26, 0xe6, 0xaa, 0xd0, 0xa9, 0x64, 0xb1, 0xeb, 0xb4, + 0x54, 0xb0, 0x57, 0xae, 0x02, 0xb4, 0xb9, 0xb0, 0x2b, 0xb3, 0x27, 0xb0, 0x1e, 0xb0, 0x2f, 0xa4, + 0xec, 0xb4, 0xe3, 0xab, 0xd8, 0xb0, 0x7a, 0xb1, 0x8c, 0x31, 0x09, 0xb1, 0x4c, 0xb1, 0xe2, 0xb2, + 0xf2, 0xb0, 0x23, 0xb2, 0x48, 0x24, 0x92, 0xb2, 0xc8, 0xb1, 0xc4, 0xb6, 0x4c, 0xae, 0x0d, 0xb2, + 0x94, 0x29, 0x28, 0x1a, 0xcb, 0x28, 0x94, 0x2b, 0xab, 0x26, 0x94, 0x24, 0x07, 0x2a, 0xca, 0x28, + 0x40, 0x2d, 0x7e, 0x26, 0xd8, 0x27, 0x28, 0x9d, 0xc4, 0x29, 0x36, 0x28, 0xf0, 0x12, 0xfe, 0x28, + 0xb6, 0x26, 0x72, 0x26, 0xe2, 0x27, 0x58, 0xa1, 0xab, 0x26, 0x0a, 0x2a, 0x74, 0x29, 0xf6, 0x1f, + 0x00, 0x2e, 0x80, 0x1f, 0xd8, 0x25, 0xfb, 0x29, 0xc2, 0x26, 0x97, 0x26, 0x52, 0x26, 0x87, 0x26, + 0x90, 0x28, 0x42, 0x26, 0xec, 0x22, 0xd6, 0x24, 0x45, 0x29, 0xfa, 0x21, 0x56, 0x29, 0x02, 0x2c, + 0x1c, 0x28, 0xc0, 0x2a, 0x82, 0x29, 0x6c, 0x2b, 0xbc, 0x29, 0x44, 0x26, 0x46, 0x2a, 0x69, 0xa0, + 0xa8, 0x2b, 0x32, 0x27, 0xea, 0x24, 0xa5, 0x9e, 0xdc, 0x26, 0xd4, 0x29, 0xc8, 0x25, 0xe0, 0x28, + 0xa2, 0x2b, 0x0a, 0x29, 0xe9, 0x29, 0xb2, 0x2a, 0xa4, 0x2c, 0xd9, 0x2a, 0xae, 0x2a, 0x58, 0x2b, + 0xfd, 0x26, 0xc0, 0xa0, 0x03, 0xa1, 0xbc, 0x20, 0xcf, 0x26, 0x1c, 0xa0, 0x9d, 0x24, 0x80, 0x25, + 0xaa, 0x25, 0xca, 0x20, 0x38, 0x1c, 0x7d, 0xa4, 0x70, 0x24, 0xa4, 0x20, 0x94, 0xa4, 0xa7, 0x20, + 0x8a, 0x26, 0x40, 0x0e, 0x08, 0x22, 0xc0, 0x14, 0xe8, 0x29, 0x38, 0x24, 0x21, 0x24, 0x7a, 0xa4, + 0xa2, 0x2b, 0xce, 0x1e, 0xbc, 0x26, 0x10, 0x21, 0x80, 0x04, 0x42, 0x26, 0xfc, 0x25, 0xe8, 0x91, + 0xc0, 0x1c, 0xa6, 0x1c, 0x94, 0x27, 0xa2, 0x9a, 0x52, 0x25, 0x70, 0x01, 0x81, 0x25, 0x74, 0x2a, + 0xd0, 0x22, 0x7f, 0x22, 0x21, 0x28, 0x5f, 0x28, 0x95, 0x27, 0x2d, 0x1a, 0x95, 0x25, 0x80, 0x90, + 0xda, 0x2a, 0xf2, 0x22, 0xcb, 0x21, 0xcc, 0x1c, 0x28, 0xa4, 0x44, 0x25, 0x4a, 0x28, 0x5e, 0x25, + 0x9e, 0x24, 0xb1, 0x27, 0x1c, 0x20, 0xae, 0x27, 0x4e, 0x28, 0x13, 0x2c, 0xd2, 0x26, 0x4c, 0x26, + 0xd0, 0x22, 0x00, 0x22, 0xa6, 0x25, 0x27, 0x27, 0x3b, 0x19, 0x5e, 0x23, 0x45, 0x25, 0xd9, 0x25, + 0x10, 0x28, 0x62, 0x20, 0xbe, 0x26, 0x9f, 0x25, 0x63, 0x27, 0x9c, 0x25, 0x8e, 0x21, 0xa9, 0x21, + 0xf4, 0x24, 0x3f, 0x23, 0xaa, 0x25, 0x8e, 0x22, 0x64, 0x1c, 0x92, 0x25, 0x5f, 0x25, 0x0c, 0x21, + 0x5a, 0x28, 0x0b, 0x1e, 0xfe, 0x24, 0xeb, 0x23, 0xfe, 0x20, 0x00, 0x20, 0x36, 0x26, 0xe3, 0x24, + 0xb4, 0x22, 0x70, 0x24, 0x4e, 0x20, 0x5c, 0x22, 0xaa, 0x20, 0xa1, 0x22, 0xdf, 0x25, 0x57, 0x25, + 0xa2, 0x25, 0xf5, 0x27, 0x9e, 0x25, 0x2a, 0x24, 0xcf, 0x25, 0x33, 0x27, 0xae, 0x25, 0x93, 0x99, + 0x01, 0x24, 0x20, 0x21, 0xa8, 0x24, 0x2c, 0x22, 0xd6, 0x22, 0x65, 0x26, 0xdd, 0x99, 0xee, 0x26, + 0xa7, 0x28, 0x90, 0x23, 0x8a, 0x24, 0x4c, 0x26, 0xb3, 0x27, 0x84, 0x23, 0xa4, 0x23, 0x13, 0x28, + 0x4c, 0x1d, 0xb4, 0x1d, 0x5f, 0x96, 0x0d, 0x1a, 0xca, 0x19, 0xe0, 0x15, 0x32, 0x1e, 0x9f, 0x23, + 0x54, 0x17, 0x94, 0x10, 0xb2, 0x22, 0x05, 0x24, 0xae, 0x23, 0x1d, 0x21, 0x1a, 0x16, 0x00, 0x9c, + 0xde, 0x24, 0x2f, 0x17, 0x75, 0x22, 0xc3, 0x24, 0x58, 0x23, 0x95, 0x1e, 0xec, 0x1f, 0xb0, 0x99, + 0x56, 0x24, 0xb1, 0x1d, 0x70, 0x25, 0x0c, 0x9a, 0x00, 0x9b, 0x56, 0x1f, 0x0a, 0x26, 0x24, 0x1e, + 0xef, 0x98, 0x8e, 0x1f, 0x36, 0x24, 0x98, 0x15, 0x7a, 0x94, 0x4b, 0x1f, 0x92, 0x22, 0x8e, 0x23, + 0x74, 0x22, 0xaa, 0x21, 0x3c, 0x24, 0x74, 0x1c, 0xba, 0x23, 0x73, 0x24, 0x66, 0x20, 0x5e, 0x15, + 0x64, 0x22, 0x9b, 0x19, 0x4d, 0x23, 0x84, 0x24, 0x2e, 0xa0, 0x67, 0x22, 0x4f, 0x19, 0xbe, 0x24, + 0xf2, 0x23, 0x2e, 0x21, 0x88, 0x95, 0xe4, 0x22, 0x72, 0x21, 0x0f, 0x25, 0x6a, 0x18, 0xea, 0x23, + 0xeb, 0x39, 0xba, 0xb1, 0xb0, 0x32, 0x17, 0x39, 0xcc, 0x38, 0x00, 0x26, 0xf9, 0x38, 0xb0, 0x37, + 0x32, 0x3c, 0xca, 0x35, 0xc8, 0x31, 0xf3, 0xb7, 0xc6, 0x37, 0xff, 0x34, 0x75, 0xb4, 0x46, 0x38, + 0x42, 0x36, 0xc4, 0x32, 0xd7, 0x34, 0xcc, 0xb4, 0x0a, 0x3a, 0xc2, 0x38, 0x43, 0x38, 0x18, 0xb1, + 0x24, 0x3e, 0x54, 0x2e, 0x86, 0x35, 0xd6, 0x38, 0x7a, 0x34, 0x19, 0x38, 0xa3, 0x34, 0xb2, 0x30, + 0xb5, 0x36, 0x62, 0x32, 0xe5, 0x35, 0xb8, 0x2e, 0x90, 0x39, 0x18, 0x25, 0x5a, 0x38, 0x92, 0x3c, + 0x67, 0x35, 0x42, 0x38, 0x62, 0x39, 0xdc, 0x3b, 0x55, 0x39, 0x48, 0x2b, 0x53, 0x39, 0x80, 0xae, + 0xc4, 0x3c, 0xc8, 0x36, 0x7c, 0x31, 0x65, 0xb2, 0x24, 0x2e, 0x8c, 0x38, 0x66, 0x39, 0xf6, 0x36, + 0xf8, 0x38, 0x78, 0x39, 0x6b, 0x38, 0x13, 0x3a, 0x1e, 0x3c, 0xcd, 0x3c, 0xc0, 0x3a, 0x9a, 0x39, + 0x38, 0xb0, 0x0e, 0xb1, 0x87, 0xaf, 0x8c, 0xb2, 0x42, 0xb1, 0xff, 0xb2, 0xe3, 0xb0, 0x5f, 0xb1, + 0x22, 0xaf, 0x85, 0xaf, 0x03, 0xb2, 0xba, 0xb0, 0x74, 0xb1, 0x1e, 0xb1, 0xdb, 0xb0, 0x8c, 0xb1, + 0x1e, 0xb3, 0x69, 0xb1, 0x06, 0xb2, 0x98, 0xb0, 0x50, 0xb2, 0x1b, 0xb0, 0x52, 0xb1, 0x74, 0xae, + 0xc6, 0xb2, 0xa9, 0xb0, 0xfe, 0xb1, 0x60, 0xae, 0x82, 0xad, 0x21, 0xb1, 0xbb, 0xb1, 0x51, 0xb0, + 0xe9, 0xae, 0x19, 0xb0, 0xe3, 0xaf, 0x10, 0xb1, 0xbc, 0xaf, 0x18, 0xb2, 0x17, 0xb3, 0xd6, 0xb0, + 0x2e, 0xb2, 0x1d, 0xb3, 0x2e, 0xb0, 0x3c, 0xb1, 0x7a, 0xb0, 0xb6, 0xae, 0x6e, 0xb2, 0x66, 0xaf, + 0xd4, 0xb0, 0xbc, 0xb0, 0xb2, 0xb0, 0x7a, 0xb2, 0x64, 0xb0, 0xb7, 0xb4, 0xb6, 0xad, 0xd5, 0xb0, + 0xb1, 0xb1, 0x51, 0xb1, 0x18, 0xb2, 0x2c, 0xb2, 0xe2, 0xb0, 0x1e, 0xac, 0x0a, 0xae, 0x7d, 0xb1, + 0x91, 0x2e, 0x44, 0x2e, 0xf8, 0x2d, 0xcc, 0x2f, 0xe0, 0x2d, 0xff, 0x2e, 0x94, 0x2e, 0x34, 0x2f, + 0x78, 0x2f, 0x00, 0x2f, 0x76, 0x2f, 0x72, 0x2f, 0x61, 0x2e, 0x0a, 0x2f, 0x92, 0x2e, 0xa2, 0x2f, + 0x0f, 0x2e, 0x03, 0x2f, 0x82, 0x2d, 0x58, 0x2d, 0x47, 0x2f, 0x01, 0x30, 0xa7, 0x2f, 0x62, 0x2e, + 0x30, 0x30, 0xd6, 0x2e, 0x0a, 0x2e, 0x85, 0x2d, 0xad, 0x2e, 0x5b, 0x2f, 0xc2, 0x2d, 0xf0, 0x2e, + 0x8b, 0x2e, 0xc8, 0x2d, 0x64, 0x2e, 0x85, 0x2a, 0x03, 0x2e, 0x63, 0x2e, 0x88, 0x2f, 0x59, 0x2d, + 0x0c, 0x31, 0xb5, 0x2f, 0x12, 0x2e, 0xa2, 0x2f, 0x1c, 0x30, 0x6c, 0x2c, 0x52, 0x2f, 0x98, 0x2d, + 0x4c, 0x2f, 0xfe, 0x2e, 0xe2, 0x2e, 0xac, 0x2e, 0x62, 0x2d, 0x9e, 0x2e, 0x74, 0x2d, 0x5e, 0x30, + 0xc0, 0x2f, 0x7f, 0x2f, 0xb4, 0x2f, 0x7f, 0x2f, 0x5c, 0x30, 0x75, 0x2b, 0xea, 0x2c, 0xea, 0x2d, + 0x22, 0x27, 0x14, 0x2a, 0x8e, 0x29, 0x94, 0x25, 0x48, 0x2b, 0x96, 0x29, 0xa8, 0x24, 0x12, 0x24, + 0x8c, 0x1e, 0x4b, 0x25, 0x71, 0x2a, 0x40, 0x20, 0x67, 0x26, 0xed, 0x28, 0xcc, 0x27, 0x1d, 0x28, + 0x10, 0x2b, 0x00, 0x28, 0xc8, 0x28, 0x76, 0x29, 0x56, 0x2a, 0xb0, 0x27, 0xc4, 0x24, 0x40, 0x9c, + 0x3e, 0x2c, 0x86, 0x27, 0x3f, 0x2b, 0x08, 0x28, 0xc0, 0x18, 0xa4, 0x28, 0xb6, 0x25, 0x2c, 0x28, + 0xa0, 0x97, 0xe5, 0x29, 0x72, 0x25, 0x1a, 0x24, 0x1d, 0x25, 0xd9, 0x29, 0xea, 0x24, 0x8a, 0x22, + 0x5c, 0x27, 0x0d, 0x2c, 0x47, 0x28, 0xe4, 0x29, 0x31, 0x26, 0x40, 0x26, 0x93, 0x28, 0x6f, 0x27, + 0xe1, 0x29, 0x23, 0x2b, 0x76, 0x28, 0xfa, 0x28, 0xf0, 0x1e, 0x24, 0x2e, 0xcd, 0x2c, 0x66, 0x29, + 0xee, 0x29, 0x50, 0x27, 0x0e, 0x2c, 0xd7, 0x2c, 0x6e, 0x29, 0x96, 0x26, 0x1d, 0x26, 0x3d, 0x2a, + 0x58, 0xa2, 0x0a, 0xa2, 0x44, 0xa2, 0x6c, 0xa2, 0xb3, 0xa1, 0xc7, 0xa1, 0xc1, 0xa1, 0x30, 0xa2, + 0x39, 0xa3, 0xec, 0xa2, 0x09, 0xa3, 0xa0, 0xa2, 0x64, 0xa1, 0xb4, 0xa2, 0x14, 0xa2, 0x07, 0xa3, + 0xe2, 0xa0, 0x54, 0xa2, 0x5c, 0xa0, 0x0e, 0xa1, 0xa2, 0xa2, 0x18, 0xa4, 0xd2, 0xa2, 0xd4, 0xa1, + 0x12, 0xa4, 0x79, 0xa2, 0x80, 0xa1, 0xbc, 0xa1, 0x9a, 0xa2, 0x05, 0xa3, 0x6d, 0xa0, 0xdc, 0xa2, + 0xf6, 0xa1, 0xee, 0xa1, 0x1c, 0xa2, 0x7e, 0x9a, 0xaa, 0xa1, 0x96, 0xa1, 0xbf, 0xa1, 0x2b, 0xa0, + 0xc6, 0xa4, 0x14, 0xa3, 0xe8, 0xa1, 0x8c, 0xa3, 0x0c, 0xa4, 0x23, 0xa0, 0x46, 0xa2, 0x7c, 0xa1, + 0x5d, 0xa3, 0x4c, 0xa3, 0xa6, 0xa2, 0x8e, 0xa1, 0x4b, 0xa0, 0x5e, 0xa1, 0xf1, 0xa2, 0x7f, 0xa4, + 0x74, 0xa3, 0xe5, 0xa2, 0x9e, 0xa3, 0xa4, 0xa3, 0x78, 0xa4, 0x09, 0xa0, 0xe6, 0xa0, 0x6a, 0xa1, + 0x08, 0x9d, 0xeb, 0x9e, 0x6c, 0x9f, 0x36, 0x99, 0xd3, 0x9f, 0xd6, 0x9c, 0xb0, 0x99, 0xf6, 0x98, + 0x50, 0x9a, 0xc4, 0x9c, 0x48, 0x9f, 0x62, 0x98, 0x42, 0x9a, 0x16, 0x9e, 0xc2, 0x9c, 0x22, 0x9d, + 0xbc, 0x9d, 0xa3, 0x9c, 0x74, 0x9b, 0xee, 0x9d, 0xb2, 0x9e, 0xa2, 0x9e, 0x9e, 0x9a, 0x14, 0x94, + 0xd5, 0xa0, 0x04, 0x9d, 0x23, 0x9f, 0xdb, 0x9d, 0x84, 0x99, 0xfc, 0x9d, 0xb0, 0x97, 0xf3, 0x9d, + 0x0c, 0x95, 0x55, 0x9f, 0x2d, 0x9c, 0x47, 0x0d, 0x81, 0x9b, 0xa2, 0x9d, 0xa6, 0x95, 0x56, 0x94, + 0xe6, 0x9d, 0x26, 0xa0, 0x87, 0x9d, 0x98, 0x9f, 0x85, 0x9d, 0x68, 0x9b, 0x7d, 0x9c, 0xfe, 0x9c, + 0xc0, 0x9f, 0x8f, 0xa0, 0xdc, 0x9d, 0x69, 0x9c, 0x4c, 0x91, 0xfc, 0xa0, 0x6b, 0xa2, 0x28, 0xa0, + 0x3f, 0x9f, 0xb8, 0x9c, 0xb4, 0xa0, 0x84, 0xa1, 0x23, 0xa0, 0xbe, 0x9c, 0x61, 0x9c, 0x5c, 0x9e, + 0xd6, 0x9d, 0xd8, 0x9d, 0x08, 0x9d, 0xe4, 0x9f, 0x9a, 0x9d, 0x5e, 0x9f, 0x52, 0x9e, 0xfc, 0x9e, + 0x4d, 0x9e, 0xf1, 0x9d, 0x09, 0x9f, 0xe1, 0x9e, 0x6a, 0x9e, 0x76, 0x9e, 0x24, 0x9e, 0x20, 0x9f, + 0xb9, 0x9e, 0xb1, 0x9e, 0x0b, 0x9e, 0x1c, 0x9d, 0x1a, 0x9f, 0x9b, 0x9e, 0x31, 0x9f, 0x9a, 0x9d, + 0xca, 0x9f, 0x36, 0x9e, 0x18, 0x9e, 0x9c, 0x9c, 0x72, 0x9d, 0xaf, 0x9e, 0x39, 0x9e, 0x0c, 0x9e, + 0xcb, 0x9d, 0x10, 0x9d, 0xaa, 0x9d, 0x73, 0x9c, 0x6a, 0x9d, 0x80, 0x9e, 0x06, 0xa0, 0xa0, 0x9d, + 0x7e, 0xa0, 0x9c, 0x9f, 0x71, 0x9d, 0xc8, 0x9e, 0x02, 0x9f, 0x20, 0x9c, 0x5d, 0x9f, 0xf5, 0x9c, + 0x5e, 0x9e, 0x02, 0x9e, 0x30, 0x9e, 0xf7, 0x9e, 0x7c, 0x9d, 0xc5, 0x9f, 0xaa, 0x9b, 0x48, 0x9f, + 0x18, 0x9f, 0xf8, 0x9e, 0x0f, 0x9f, 0xd2, 0x9e, 0x4b, 0x9f, 0x03, 0x9a, 0x40, 0x9c, 0xda, 0x9d, + 0x03, 0x98, 0x88, 0x9a, 0xf5, 0x98, 0x8b, 0x99, 0xa0, 0x9b, 0x02, 0x9c, 0xf4, 0x97, 0x11, 0x98, + 0x3c, 0x91, 0x9a, 0x95, 0x4a, 0x9b, 0x66, 0x95, 0x2f, 0x99, 0x99, 0x99, 0xe3, 0x98, 0x55, 0x99, + 0xb3, 0x9c, 0x5c, 0x99, 0x16, 0x9b, 0x09, 0x9a, 0x9f, 0x9b, 0xf5, 0x96, 0x0c, 0x98, 0x8a, 0x8c, + 0x7c, 0x9c, 0x7e, 0x98, 0x2a, 0x9c, 0x38, 0x97, 0x18, 0x8b, 0x50, 0x99, 0x85, 0x99, 0x54, 0x98, + 0x6a, 0x8f, 0x99, 0x99, 0x90, 0x96, 0x53, 0x99, 0x82, 0x96, 0x73, 0x9b, 0x06, 0x9a, 0xff, 0x97, + 0xc4, 0x98, 0xae, 0x9c, 0x98, 0x98, 0x10, 0x9a, 0xb0, 0x96, 0x5e, 0x97, 0xac, 0x9a, 0xe1, 0x97, + 0xc2, 0x99, 0x88, 0x9a, 0xef, 0x98, 0x39, 0x9b, 0x18, 0x96, 0x63, 0x9f, 0xa0, 0x9a, 0xe6, 0x98, + 0x86, 0x9a, 0xdd, 0x98, 0x25, 0x9c, 0xb2, 0x9c, 0xf9, 0x98, 0x5d, 0x95, 0x2d, 0x96, 0x3b, 0x9b, + 0x2e, 0xb1, 0x6b, 0xb1, 0x14, 0xb2, 0x96, 0xaf, 0x72, 0xb1, 0x04, 0xb0, 0xa6, 0xaf, 0xc3, 0xaf, + 0x4c, 0xb1, 0x9b, 0xb1, 0x02, 0xb2, 0x40, 0xb0, 0x0e, 0xaf, 0x93, 0xb1, 0xbc, 0xb0, 0x5c, 0xb1, + 0x59, 0xaf, 0xbb, 0xb0, 0xbe, 0xad, 0x7e, 0xb0, 0x71, 0xb1, 0x10, 0xb3, 0xa2, 0xb0, 0x4e, 0xaf, + 0x6e, 0xb3, 0x27, 0xb1, 0xe1, 0xb0, 0x4b, 0xb1, 0xec, 0xb0, 0xc2, 0xb1, 0xb6, 0xac, 0xd6, 0xb1, + 0x8b, 0xaf, 0xb9, 0xb1, 0xc8, 0xb0, 0xb0, 0x1f, 0x56, 0xb0, 0x63, 0xb0, 0x88, 0xad, 0x35, 0xac, + 0x3e, 0xb3, 0x1e, 0xb2, 0x0e, 0xb1, 0xab, 0xb2, 0x84, 0xb2, 0xc0, 0xae, 0x62, 0xb0, 0xb0, 0xb0, + 0xb2, 0xb2, 0x27, 0xb3, 0x90, 0xb1, 0xad, 0xaf, 0x4f, 0xac, 0xec, 0xb0, 0x57, 0xb4, 0x05, 0xb4, + 0x5e, 0xb2, 0x2e, 0xb1, 0x24, 0xb3, 0xb6, 0xb3, 0xfb, 0xb3, 0xfc, 0xaf, 0x3b, 0xb0, 0xac, 0xb0, + 0x30, 0xa9, 0x79, 0xac, 0x86, 0xab, 0x2d, 0xaa, 0x66, 0xa5, 0x94, 0xad, 0x74, 0xae, 0xbb, 0xb0, + 0xdc, 0xac, 0x50, 0xa6, 0x3c, 0xb0, 0x30, 0xae, 0x50, 0xae, 0x80, 0xae, 0x0a, 0xac, 0x46, 0xac, + 0xa7, 0xaf, 0x0a, 0xae, 0x49, 0xae, 0x07, 0xad, 0x1a, 0xa9, 0xb5, 0xa9, 0xc2, 0xae, 0xb0, 0xaa, + 0x93, 0xae, 0x28, 0xaa, 0x24, 0xb0, 0x8a, 0xa9, 0x20, 0x1a, 0xc6, 0xa4, 0x26, 0xb1, 0xb9, 0xab, + 0xe0, 0x93, 0x09, 0xac, 0x7e, 0xad, 0x3c, 0xac, 0xf0, 0x1a, 0x96, 0xac, 0x9c, 0xaf, 0x09, 0xad, + 0xcb, 0xae, 0xce, 0xb0, 0x4d, 0xae, 0x2a, 0xa4, 0x43, 0xae, 0xa2, 0xae, 0xec, 0xae, 0x98, 0x9f, + 0x24, 0xad, 0xf8, 0xa5, 0xee, 0xae, 0xf6, 0xae, 0x4e, 0x21, 0x44, 0xb1, 0x5f, 0x25, 0xdf, 0xae, + 0x83, 0xb1, 0x88, 0xad, 0x27, 0xab, 0x56, 0xb0, 0xc8, 0xac, 0x10, 0xa9, 0x35, 0xa4, 0xab, 0xaf, + 0x7c, 0x2a, 0x08, 0x28, 0xda, 0x2a, 0x2a, 0x2c, 0x13, 0x25, 0xe8, 0x28, 0x1c, 0x2c, 0x8c, 0x2b, + 0xc5, 0x2d, 0x2e, 0x29, 0xfb, 0x2a, 0x45, 0x29, 0xb0, 0x2b, 0x31, 0x2c, 0xa4, 0x27, 0xd0, 0x2a, + 0xf8, 0x28, 0x62, 0x2b, 0x31, 0x29, 0xe2, 0x1e, 0x64, 0x27, 0x8f, 0x2a, 0xac, 0x2b, 0x22, 0x29, + 0xa4, 0x2d, 0xb8, 0x26, 0x11, 0x28, 0xea, 0x29, 0x8c, 0x27, 0xc6, 0x28, 0x5e, 0x29, 0x0b, 0x28, + 0x46, 0x28, 0xec, 0x28, 0x8a, 0x29, 0x1f, 0x24, 0x72, 0x29, 0xd7, 0x26, 0x6c, 0x2b, 0x3c, 0x29, + 0x1f, 0x2c, 0x44, 0x2c, 0x2d, 0x2c, 0x82, 0x2a, 0xbe, 0x2b, 0x8e, 0x29, 0x88, 0x2a, 0x27, 0x18, + 0x94, 0x2b, 0x9c, 0x28, 0xdf, 0x2a, 0x32, 0x25, 0x90, 0x23, 0x6c, 0x2b, 0xf0, 0x15, 0x3e, 0x2c, + 0xda, 0x2d, 0x8c, 0x2a, 0x12, 0x2a, 0x66, 0x2d, 0x7f, 0x2d, 0x24, 0x26, 0x08, 0x28, 0xde, 0x2b, + 0x0e, 0x23, 0x40, 0x20, 0x7c, 0x1d, 0x72, 0xa0, 0xa1, 0x27, 0x94, 0x20, 0xaa, 0x24, 0x16, 0x26, + 0x80, 0x19, 0xf8, 0x1e, 0x20, 0x22, 0xc9, 0x9f, 0x70, 0x24, 0x3a, 0x24, 0x50, 0x18, 0xc2, 0x20, + 0x99, 0x29, 0x1e, 0x21, 0xfc, 0x27, 0x28, 0x27, 0x90, 0x28, 0x00, 0x24, 0x68, 0x1d, 0xb8, 0xa1, + 0xe7, 0x28, 0x82, 0x1e, 0x89, 0x2a, 0xfd, 0x23, 0x17, 0xa1, 0x22, 0x1e, 0xd2, 0x28, 0xf6, 0x15, + 0xc4, 0x9f, 0xe4, 0x26, 0xa0, 0x28, 0x54, 0x99, 0x50, 0x98, 0x64, 0x1d, 0x4a, 0x25, 0x8e, 0x28, + 0xbf, 0x25, 0xdc, 0x27, 0xf9, 0x26, 0x87, 0x24, 0x3a, 0x26, 0xa2, 0x22, 0xbd, 0x26, 0xfe, 0x1e, + 0x0f, 0x29, 0x84, 0x23, 0x47, 0x27, 0x0a, 0x26, 0x1e, 0xa7, 0x84, 0x2a, 0xcc, 0x27, 0x1c, 0x28, + 0x54, 0x28, 0x7c, 0x24, 0x53, 0x23, 0x4b, 0x29, 0x94, 0x24, 0xaa, 0x2a, 0x00, 0x85, 0x66, 0x28, + 0x1c, 0x9f, 0xc0, 0x99, 0x7c, 0x9e, 0xe6, 0x9f, 0xb3, 0x9b, 0xbd, 0x9a, 0x5a, 0x9f, 0x3b, 0x9d, + 0xb2, 0xa1, 0xb9, 0x9d, 0x78, 0x9c, 0x44, 0x99, 0xbe, 0x9e, 0x72, 0x9f, 0x14, 0x99, 0x60, 0x9e, + 0x1e, 0x9c, 0x27, 0x9e, 0x7a, 0x9c, 0xfd, 0x0a, 0xe6, 0x9c, 0x26, 0x9f, 0xdf, 0x9d, 0x9c, 0x9b, + 0xff, 0xa1, 0x6e, 0x99, 0x29, 0x9a, 0x6b, 0x9e, 0x20, 0x9c, 0x67, 0x9d, 0xc2, 0x99, 0xbd, 0x99, + 0xb4, 0x9c, 0x22, 0x9d, 0x92, 0x9d, 0x60, 0x85, 0x8a, 0x9e, 0xb8, 0x97, 0xe8, 0x9d, 0x6b, 0x9d, + 0x6a, 0x9f, 0xaf, 0x9e, 0x08, 0xa0, 0x14, 0xa0, 0x32, 0x9f, 0x94, 0x9b, 0x7e, 0x9d, 0x87, 0x8e, + 0x30, 0xa0, 0x7c, 0x9d, 0x02, 0x9e, 0xe6, 0x90, 0xce, 0x94, 0xe6, 0x9d, 0x00, 0x99, 0x16, 0xa0, + 0xf3, 0xa0, 0xce, 0x9d, 0x1c, 0x9e, 0x39, 0xa1, 0xc8, 0xa1, 0x00, 0x9d, 0x46, 0x9c, 0x1e, 0x9f, + 0xa9, 0x9a, 0x05, 0x90, 0x36, 0x96, 0xa0, 0x8d, 0xdf, 0x9c, 0xc6, 0x8d, 0x49, 0x99, 0x84, 0x96, + 0xde, 0x98, 0x8d, 0x98, 0x30, 0x87, 0xb8, 0x18, 0xbe, 0x98, 0xda, 0x98, 0xda, 0x0c, 0x80, 0x97, + 0xa0, 0x9c, 0x11, 0x95, 0x08, 0x9b, 0xd2, 0x97, 0x96, 0x9d, 0x00, 0x9b, 0x6c, 0x8c, 0xc8, 0x15, + 0xe5, 0x9e, 0x6c, 0x91, 0x0e, 0x9d, 0x92, 0x9a, 0x80, 0x85, 0x64, 0x98, 0xe4, 0x98, 0xf8, 0x0a, + 0x14, 0x90, 0xe7, 0x9b, 0xd6, 0x9c, 0x54, 0x17, 0xe0, 0x97, 0x4c, 0x0c, 0x2c, 0x98, 0xe0, 0x9c, + 0x3f, 0x9a, 0x01, 0x9a, 0x2b, 0x9c, 0xc4, 0x9c, 0xfc, 0x9a, 0x66, 0x91, 0xec, 0x99, 0x32, 0x93, + 0x5c, 0x9e, 0x99, 0x9a, 0xd8, 0x9a, 0x96, 0x93, 0x51, 0x1a, 0x2b, 0x9d, 0x78, 0x9d, 0xa9, 0x9c, + 0xfe, 0x9b, 0xb9, 0x98, 0x70, 0x99, 0x0a, 0x9e, 0x98, 0x9c, 0xe0, 0x9f, 0x28, 0x94, 0x56, 0x9c, + 0xec, 0x98, 0x97, 0x98, 0xfa, 0x99, 0xdd, 0x9a, 0xea, 0x91, 0xb0, 0x99, 0xe6, 0x9b, 0x7c, 0x9c, + 0xb8, 0x9c, 0x8d, 0x97, 0x33, 0x9c, 0xa7, 0x9a, 0x88, 0x9b, 0x0a, 0x9c, 0x65, 0x98, 0x21, 0x9a, + 0xf1, 0x99, 0x65, 0x9b, 0xab, 0x99, 0xf7, 0x94, 0x54, 0x95, 0x0e, 0x99, 0x09, 0x9c, 0x26, 0x99, + 0xa6, 0x9c, 0xfd, 0x96, 0x8c, 0x99, 0xa0, 0x98, 0xd6, 0x94, 0xb3, 0x96, 0xbf, 0x9b, 0x73, 0x98, + 0x94, 0x95, 0x6a, 0x98, 0x54, 0x99, 0x20, 0x97, 0x68, 0x96, 0x5d, 0x98, 0xfb, 0x9b, 0xea, 0x98, + 0xfe, 0x9b, 0xba, 0x9c, 0xac, 0x9b, 0x02, 0x98, 0x5d, 0x9b, 0x89, 0x9a, 0xf0, 0x9a, 0x7d, 0x87, + 0x47, 0x9a, 0x5e, 0x96, 0x17, 0x9b, 0xab, 0x98, 0xf8, 0x91, 0x62, 0x9c, 0x89, 0x11, 0xf0, 0x9b, + 0x05, 0x9e, 0x6d, 0x9a, 0x2f, 0x99, 0x06, 0x9d, 0x58, 0x9c, 0x6f, 0x93, 0xf6, 0x95, 0xef, 0x9b, + 0x94, 0x90, 0x6f, 0x94, 0x2e, 0x90, 0x79, 0x0d, 0x00, 0x95, 0x5c, 0x95, 0x64, 0x96, 0x5d, 0x99, + 0xd8, 0x8b, 0x40, 0x89, 0x09, 0x98, 0xa5, 0x92, 0x53, 0x96, 0x1d, 0x96, 0x33, 0x92, 0x86, 0x92, + 0x73, 0x9a, 0x04, 0x95, 0xb5, 0x98, 0xa7, 0x98, 0xb7, 0x96, 0xa4, 0x91, 0xf4, 0x94, 0x40, 0x03, + 0x32, 0x98, 0xd3, 0x91, 0x86, 0x9b, 0xf8, 0x91, 0x7c, 0x12, 0x88, 0x84, 0x4f, 0x9b, 0x74, 0x91, + 0xa0, 0x11, 0x78, 0x96, 0x98, 0x98, 0x94, 0x92, 0x77, 0x11, 0x54, 0x94, 0x29, 0x98, 0x5f, 0x98, + 0x65, 0x97, 0xc7, 0x99, 0x91, 0x97, 0xe6, 0x8c, 0x52, 0x97, 0xd7, 0x96, 0x68, 0x98, 0xce, 0x8d, + 0x42, 0x98, 0x00, 0x90, 0x81, 0x98, 0x01, 0x99, 0xbb, 0x15, 0x05, 0x9c, 0x58, 0x92, 0x6e, 0x98, + 0x21, 0x9a, 0xf9, 0x95, 0x2c, 0x93, 0xa3, 0x99, 0xe4, 0x92, 0xbc, 0x98, 0x06, 0x08, 0x27, 0x99, + 0xb1, 0xae, 0x08, 0xa5, 0xb7, 0xac, 0x08, 0xad, 0xcc, 0xad, 0xea, 0xa4, 0x6a, 0xad, 0xeb, 0xa8, + 0x41, 0xb0, 0x3b, 0xad, 0x6e, 0xa4, 0x6c, 0x25, 0xd5, 0xac, 0x51, 0xad, 0x80, 0xa0, 0xbc, 0xac, + 0x3c, 0xac, 0x6b, 0xab, 0x0d, 0xac, 0x80, 0x10, 0xbd, 0xae, 0xc7, 0xae, 0x44, 0xa9, 0x98, 0xa2, + 0xd1, 0xb1, 0x78, 0xa6, 0x4d, 0xab, 0x22, 0xae, 0x04, 0xaa, 0x0b, 0xad, 0xae, 0xa4, 0x30, 0xa3, + 0x4e, 0xab, 0x61, 0xad, 0xf2, 0xad, 0x94, 0x28, 0x0a, 0xae, 0x80, 0x8d, 0x6f, 0xab, 0xfa, 0xad, + 0xb7, 0xad, 0x69, 0xac, 0x05, 0xaf, 0x6a, 0xb0, 0xf4, 0xad, 0xee, 0xa5, 0x37, 0xac, 0x68, 0xa2, + 0x8a, 0xb0, 0xce, 0xad, 0xe6, 0xac, 0x24, 0x21, 0xd9, 0x24, 0x26, 0xad, 0xf0, 0xad, 0x4e, 0xaf, + 0x04, 0xaf, 0x5c, 0xac, 0x51, 0xad, 0xb7, 0xb0, 0x15, 0xb1, 0x36, 0xb0, 0xf2, 0xaa, 0x23, 0xae, + 0x34, 0xac, 0xc2, 0xac, 0x7d, 0xab, 0x81, 0xb0, 0xd3, 0xac, 0x44, 0xad, 0xcd, 0xa8, 0x9d, 0xa7, + 0xdb, 0xa9, 0xb0, 0xab, 0xb9, 0xac, 0xba, 0xae, 0xa2, 0xae, 0x33, 0xac, 0x4e, 0xac, 0xad, 0xa8, + 0x6a, 0xae, 0xd2, 0xa8, 0x1c, 0xae, 0x56, 0xae, 0xde, 0xae, 0xd0, 0xad, 0x4c, 0xaa, 0x78, 0xa7, + 0x2d, 0xaf, 0x3a, 0xac, 0xd2, 0xac, 0xa7, 0xa7, 0x3d, 0xac, 0xbe, 0xae, 0x36, 0xab, 0x48, 0xad, + 0x55, 0xad, 0xa2, 0xac, 0xfc, 0xa8, 0xad, 0xab, 0x21, 0xad, 0x09, 0xae, 0xa6, 0xad, 0x82, 0xac, + 0xa2, 0xad, 0x9a, 0xac, 0x6e, 0xab, 0x1f, 0xaf, 0x06, 0xac, 0xd0, 0xac, 0x76, 0xac, 0x09, 0xac, + 0xdc, 0xa9, 0xb1, 0xad, 0x28, 0xab, 0x04, 0xae, 0xf8, 0xae, 0x8d, 0xad, 0x5e, 0xab, 0xa8, 0xad, + 0x9e, 0xa9, 0x14, 0xab, 0x6f, 0xad, 0x96, 0xaa, 0xff, 0xad, 0x0e, 0xab, 0xf6, 0xab, 0x58, 0xad, + 0xaa, 0x29, 0x31, 0x29, 0xbc, 0x29, 0x46, 0x2c, 0x79, 0x2a, 0xa0, 0x2a, 0x19, 0x29, 0x16, 0x2a, + 0x9f, 0x2a, 0xe6, 0x29, 0x3a, 0x2b, 0x13, 0x2a, 0x59, 0x2a, 0xa8, 0x28, 0xaa, 0x29, 0x44, 0x2a, + 0x26, 0x2a, 0x46, 0x28, 0x29, 0x2a, 0x7f, 0x2a, 0xd6, 0x2a, 0x76, 0x2c, 0xc5, 0x2a, 0x5d, 0x28, + 0x58, 0x2c, 0x02, 0x2a, 0xe1, 0x2a, 0xce, 0x29, 0x73, 0x2b, 0x7e, 0x2a, 0xdc, 0x29, 0x4f, 0x2c, + 0xd3, 0x2b, 0xca, 0x29, 0xe4, 0x26, 0xe9, 0x28, 0x84, 0x29, 0xbc, 0x2a, 0xfc, 0x2a, 0xac, 0x2b, + 0x42, 0x2c, 0x03, 0x2c, 0xda, 0x27, 0x33, 0x2c, 0x95, 0x2b, 0xc9, 0x28, 0x1c, 0x2c, 0xac, 0x29, + 0x88, 0x2a, 0xe2, 0x2a, 0x8e, 0x28, 0x60, 0x2a, 0x5b, 0x2c, 0x6e, 0x2a, 0x82, 0x2b, 0x8c, 0x2b, + 0xa8, 0x29, 0x90, 0x2a, 0x51, 0x2c, 0x15, 0x28, 0xa4, 0x2b, 0x5a, 0x2a, 0x43, 0x2b, 0xce, 0x2a, + 0xca, 0x24, 0x0e, 0x25, 0x79, 0x20, 0x55, 0x26, 0xaa, 0x24, 0xda, 0x21, 0xb0, 0x18, 0x00, 0x1f, + 0x88, 0x1e, 0x5c, 0x20, 0xa0, 0x27, 0xe6, 0x24, 0x2c, 0x24, 0x5a, 0x24, 0x08, 0x1f, 0xac, 0x1e, + 0x5a, 0x24, 0x56, 0x1f, 0x7b, 0x1c, 0x0f, 0x24, 0xe1, 0x27, 0x26, 0x22, 0xa6, 0x24, 0xca, 0x9c, + 0x38, 0x29, 0x20, 0x25, 0x8a, 0x22, 0x20, 0x90, 0x14, 0x1e, 0x80, 0x28, 0xc4, 0x1e, 0x65, 0x25, + 0xe0, 0x1a, 0xce, 0x1d, 0x15, 0x1e, 0x14, 0x21, 0x86, 0x25, 0x42, 0x27, 0x04, 0x21, 0x8c, 0x1e, + 0xe2, 0x20, 0x72, 0x24, 0x84, 0x25, 0x02, 0x27, 0x0a, 0x24, 0x52, 0x24, 0x86, 0x20, 0x68, 0x22, + 0xd2, 0x25, 0xae, 0x26, 0x7e, 0x20, 0xea, 0x24, 0x10, 0x23, 0xba, 0x25, 0x44, 0x28, 0x66, 0x24, + 0x3e, 0x21, 0xac, 0x25, 0x7c, 0x25, 0xc6, 0x26, 0x68, 0x27, 0x4d, 0x21, 0xfc, 0x26, 0xec, 0x23, + 0x84, 0x9d, 0xb4, 0x9c, 0x55, 0x9d, 0xb6, 0x9e, 0x20, 0x9e, 0xba, 0x9d, 0xf4, 0x9c, 0xa3, 0x9e, + 0xb6, 0x9e, 0x76, 0x9d, 0xae, 0x9f, 0xae, 0x9c, 0xeb, 0x9c, 0x37, 0x9c, 0xda, 0x9c, 0x9a, 0x9e, + 0xd5, 0x9c, 0x37, 0x9c, 0x5b, 0x9c, 0x3c, 0x9d, 0x1e, 0x9e, 0x1d, 0xa0, 0x60, 0x9f, 0x00, 0x9c, + 0x58, 0xa0, 0xfa, 0x9d, 0x56, 0x9e, 0xec, 0x9d, 0xfd, 0x9e, 0xfe, 0x9d, 0x73, 0x9d, 0x4e, 0xa0, + 0xb8, 0x9e, 0xc4, 0x9c, 0x38, 0x9a, 0x5b, 0x9c, 0xfd, 0x9c, 0x4f, 0x9e, 0xe0, 0x9d, 0x20, 0x9f, + 0xb4, 0x9f, 0x09, 0xa0, 0xaa, 0x9b, 0xaf, 0x9f, 0xc2, 0x9f, 0x0a, 0x9c, 0xf0, 0x9f, 0x4b, 0x9d, + 0x71, 0x9f, 0x8e, 0x9e, 0x02, 0x9c, 0x6c, 0x9d, 0x72, 0x9f, 0xe7, 0x9d, 0x60, 0xa0, 0xec, 0x9e, + 0xd0, 0x9d, 0x20, 0x9f, 0x47, 0xa0, 0x80, 0x9c, 0x74, 0x9f, 0x46, 0x9e, 0xfe, 0x9f, 0x1b, 0x9e, + 0xcd, 0x99, 0x26, 0x99, 0x8e, 0x96, 0xd4, 0x98, 0xa1, 0x99, 0x62, 0x96, 0x71, 0x94, 0xf0, 0x98, + 0x22, 0x98, 0x78, 0x96, 0xf3, 0x9c, 0x20, 0x97, 0x06, 0x96, 0x78, 0x98, 0x1a, 0x94, 0x85, 0x98, + 0x9b, 0x96, 0x1b, 0x96, 0xd0, 0x07, 0xa1, 0x96, 0x9a, 0x9b, 0xa2, 0x98, 0xa4, 0x9b, 0xf0, 0x81, + 0xeb, 0x9d, 0x72, 0x9a, 0x47, 0x98, 0xe8, 0x94, 0x88, 0x96, 0x66, 0x9c, 0xc0, 0x95, 0xbe, 0x9b, + 0x6c, 0x92, 0xba, 0x91, 0xb1, 0x93, 0xc4, 0x95, 0x91, 0x99, 0xa8, 0x9b, 0x26, 0x95, 0x96, 0x96, + 0x79, 0x97, 0xda, 0x9a, 0xcc, 0x99, 0x8a, 0x9b, 0x9d, 0x9a, 0xc3, 0x97, 0x88, 0x98, 0x14, 0x98, + 0x8b, 0x9c, 0x71, 0x9b, 0x0d, 0x95, 0x94, 0x98, 0x8c, 0x97, 0x18, 0x9a, 0x28, 0x9e, 0x4a, 0x99, + 0xac, 0x98, 0x24, 0x9c, 0xb0, 0x9b, 0xe2, 0x9b, 0x4e, 0x9c, 0x81, 0x98, 0xfa, 0x9c, 0x7e, 0x98, + 0x2a, 0x99, 0x26, 0x99, 0x36, 0x99, 0xa6, 0x9c, 0xfd, 0x99, 0x7d, 0x9a, 0x48, 0x98, 0x7e, 0x98, + 0x60, 0x99, 0x5f, 0x99, 0x18, 0x9a, 0xbd, 0x9a, 0xee, 0x9a, 0x99, 0x98, 0x84, 0x99, 0xd6, 0x98, + 0xac, 0x9a, 0x56, 0x97, 0xd7, 0x9a, 0xde, 0x9a, 0xfa, 0x9a, 0x13, 0x9c, 0x46, 0x99, 0x77, 0x97, + 0xff, 0x9b, 0x58, 0x99, 0x5e, 0x9a, 0x84, 0x98, 0x95, 0x9a, 0x97, 0x9a, 0x45, 0x99, 0x73, 0x9b, + 0x74, 0x9b, 0xcd, 0x99, 0x7d, 0x96, 0xbe, 0x98, 0x7e, 0x99, 0x88, 0x9a, 0xf4, 0x9a, 0xd8, 0x9a, + 0xde, 0x9b, 0xd4, 0x9a, 0x91, 0x97, 0x11, 0x9c, 0x4a, 0x9a, 0x02, 0x99, 0x16, 0x9b, 0x3c, 0x99, + 0xe6, 0x98, 0x7e, 0x9a, 0x6a, 0x98, 0x8a, 0x9a, 0x4c, 0x9c, 0x3f, 0x9a, 0x9e, 0x99, 0x1b, 0x9b, + 0xa1, 0x98, 0x42, 0x99, 0x87, 0x9b, 0x0f, 0x97, 0x0a, 0x9b, 0x6d, 0x99, 0xc8, 0x99, 0x88, 0x9a, + 0x18, 0x95, 0x08, 0x96, 0x15, 0x92, 0xfe, 0x98, 0x5a, 0x95, 0xb9, 0x94, 0xf8, 0x8a, 0x2c, 0x89, + 0xd1, 0x8d, 0x18, 0x92, 0xb9, 0x96, 0xae, 0x97, 0x08, 0x97, 0x3b, 0x95, 0xc4, 0x92, 0xe8, 0x8b, + 0xfd, 0x96, 0xd8, 0x8f, 0xba, 0x94, 0x98, 0x96, 0xa8, 0x98, 0x7a, 0x94, 0xe2, 0x92, 0x92, 0x07, + 0x39, 0x99, 0x2d, 0x95, 0x5a, 0x94, 0xb8, 0x05, 0xe6, 0x90, 0xff, 0x98, 0x08, 0x91, 0x6c, 0x95, + 0x26, 0x92, 0xfd, 0x92, 0x62, 0x90, 0x4e, 0x93, 0x93, 0x96, 0x0c, 0x98, 0xbc, 0x94, 0x72, 0x91, + 0x11, 0x94, 0x5d, 0x94, 0xb3, 0x95, 0x38, 0x98, 0x5d, 0x93, 0xcf, 0x95, 0xa7, 0x91, 0xeb, 0x93, + 0x00, 0x94, 0x53, 0x97, 0xae, 0x92, 0xd1, 0x96, 0x10, 0x96, 0xc8, 0x96, 0x0b, 0x96, 0xa6, 0x95, + 0x9c, 0x90, 0x8d, 0x94, 0xa2, 0x95, 0xf1, 0x95, 0xc8, 0x97, 0xa0, 0x91, 0x88, 0x95, 0x5a, 0x95, + 0xde, 0xac, 0xdc, 0xab, 0xef, 0xab, 0x2b, 0xac, 0x15, 0xad, 0xaa, 0xab, 0x55, 0xab, 0xee, 0xad, + 0x71, 0xad, 0x05, 0xac, 0xb4, 0xaf, 0xba, 0xa9, 0xb8, 0xa9, 0xf9, 0xaa, 0x1d, 0xaa, 0xa6, 0xad, + 0xe9, 0xa9, 0xde, 0xaa, 0xcc, 0xa5, 0x86, 0xaa, 0x43, 0xad, 0xf6, 0xad, 0x16, 0xaf, 0xbc, 0xa8, + 0x3d, 0xb0, 0x6a, 0xad, 0xc1, 0xac, 0x8d, 0xac, 0x01, 0xad, 0x9f, 0xad, 0xe3, 0xab, 0x75, 0xaf, + 0xea, 0xab, 0x65, 0xa9, 0x7b, 0xa8, 0x3c, 0xaa, 0x2f, 0xac, 0x9f, 0xad, 0x3f, 0xab, 0x0b, 0xad, + 0x52, 0xad, 0xef, 0xae, 0x9c, 0xab, 0x46, 0xae, 0xc7, 0xae, 0x02, 0xaa, 0x10, 0xae, 0x26, 0xac, + 0xb2, 0xaf, 0xce, 0xad, 0xa0, 0xa9, 0xd7, 0xab, 0xd2, 0xac, 0xe8, 0xac, 0xc8, 0xb0, 0x51, 0xad, + 0x08, 0xad, 0x0a, 0xaf, 0x5d, 0xaf, 0x1d, 0xad, 0xc5, 0xae, 0x1d, 0xad, 0x10, 0xb0, 0x8a, 0xac, + 0x1e, 0xa5, 0xc1, 0xa7, 0x72, 0xa7, 0x00, 0xac, 0xac, 0x9b, 0x3e, 0xa4, 0xf0, 0xa1, 0x0b, 0xa5, + 0x2c, 0xa7, 0xfc, 0xa2, 0xe4, 0xa9, 0x18, 0xad, 0x56, 0xac, 0xaa, 0xa8, 0xe0, 0xa5, 0x46, 0x24, + 0x3e, 0xaa, 0x00, 0x8d, 0x74, 0xaa, 0x42, 0xac, 0x50, 0xa7, 0x43, 0xaa, 0xd5, 0xa4, 0xfe, 0x9c, + 0x60, 0xab, 0x6c, 0xa4, 0xef, 0xa9, 0x14, 0x97, 0x8e, 0xa5, 0x6c, 0xa8, 0x0c, 0xaa, 0xca, 0xa9, + 0xa4, 0xa7, 0x1c, 0xa9, 0x2d, 0xa5, 0x27, 0xa3, 0x9e, 0xa4, 0x77, 0xa8, 0xbb, 0xa8, 0x61, 0xa8, + 0xb2, 0xa9, 0x96, 0xa8, 0x60, 0xa9, 0xcf, 0xa8, 0x5a, 0xa9, 0xc6, 0xac, 0xf8, 0xa5, 0x9a, 0xa0, + 0x62, 0xa1, 0xcd, 0xa6, 0xb2, 0xa8, 0x0a, 0xaa, 0x7f, 0xa8, 0x8b, 0xa5, 0x01, 0x99, 0x42, 0xac, + 0x42, 0xa9, 0xe4, 0xa3, 0x82, 0xa4, 0xd4, 0xa5, 0x02, 0xab, 0x78, 0xa9, 0xf3, 0xa5, 0x61, 0xab, + 0xac, 0x24, 0x7f, 0x1f, 0x60, 0x26, 0xe9, 0x28, 0x78, 0x23, 0x2b, 0x24, 0x42, 0x25, 0x50, 0x25, + 0xed, 0x28, 0xf4, 0x21, 0x83, 0x26, 0x0c, 0x21, 0xa0, 0x27, 0x3b, 0x23, 0x00, 0x20, 0x13, 0x24, + 0x26, 0x25, 0xaa, 0x1f, 0x7a, 0x26, 0x55, 0x22, 0xa1, 0x21, 0x34, 0x28, 0xe8, 0x25, 0x66, 0x1c, + 0xf2, 0x29, 0xc6, 0x1e, 0xbc, 0x25, 0x7c, 0x26, 0x55, 0x25, 0x17, 0x22, 0x94, 0x25, 0x69, 0x27, + 0xd7, 0x26, 0xf2, 0x24, 0xae, 0x95, 0x5d, 0x25, 0x74, 0x24, 0x1a, 0x24, 0x56, 0x26, 0xf0, 0x28, + 0x16, 0x25, 0x96, 0x28, 0x12, 0x24, 0x05, 0x28, 0x76, 0x26, 0x49, 0x26, 0x2a, 0x28, 0xa7, 0x16, + 0x0b, 0x26, 0x5f, 0x24, 0x71, 0x20, 0xf9, 0x1f, 0x47, 0x28, 0x0b, 0x27, 0x4d, 0x24, 0x1d, 0x26, + 0x84, 0x27, 0xae, 0x24, 0xee, 0x27, 0xee, 0x23, 0x6c, 0x28, 0x28, 0x28, 0x5e, 0x28, 0xd4, 0x28, + 0xf7, 0x21, 0x50, 0x09, 0x42, 0xa1, 0xda, 0x20, 0xc9, 0x1c, 0x13, 0x9e, 0xc4, 0x18, 0x84, 0x21, + 0x9e, 0x1c, 0x78, 0x16, 0x72, 0x21, 0xb8, 0x21, 0x60, 0x22, 0x2a, 0x1e, 0x7c, 0x9f, 0x04, 0x9d, + 0xc5, 0x21, 0x21, 0x9b, 0x44, 0x15, 0x5b, 0x20, 0x1a, 0x26, 0xf2, 0x1c, 0xe6, 0x21, 0x32, 0xa0, + 0xdc, 0x26, 0x57, 0x20, 0x1c, 0x21, 0x50, 0xa0, 0x80, 0x89, 0x20, 0x25, 0x52, 0x23, 0xa6, 0x1d, + 0x38, 0x10, 0x00, 0x9c, 0xd6, 0x22, 0x00, 0x11, 0x66, 0x21, 0x28, 0x20, 0x63, 0x21, 0x8c, 0x24, + 0x96, 0x1e, 0x80, 0x14, 0xba, 0x24, 0xbf, 0x22, 0x0d, 0x24, 0xb8, 0x21, 0x36, 0x1c, 0xc3, 0x15, + 0xff, 0x24, 0xa5, 0x1d, 0xad, 0x1d, 0xe6, 0x21, 0x1a, 0x9b, 0x80, 0x88, 0xa8, 0x21, 0x34, 0x22, + 0x18, 0x1c, 0x41, 0x24, 0x70, 0x9a, 0xc3, 0x20, 0x46, 0x24, 0x06, 0x26, 0xd6, 0x23, 0x3b, 0x20, + 0x71, 0x99, 0x58, 0x86, 0x9e, 0x98, 0x3e, 0x9c, 0xb4, 0x98, 0x85, 0x96, 0xc0, 0x99, 0x22, 0x9a, + 0x2f, 0x9d, 0x9c, 0x95, 0xc3, 0x99, 0x0d, 0x11, 0xce, 0x99, 0xef, 0x94, 0xb8, 0x82, 0x86, 0x99, + 0x02, 0x98, 0xb0, 0x93, 0x77, 0x98, 0x40, 0x84, 0xbd, 0x97, 0x3c, 0x9b, 0xfa, 0x9a, 0xb8, 0x84, + 0x8a, 0x9e, 0xe3, 0x92, 0xbe, 0x98, 0xcd, 0x9a, 0xe6, 0x98, 0xb1, 0x96, 0xef, 0x98, 0x5f, 0x9a, + 0x3a, 0x9a, 0x9c, 0x95, 0x02, 0x0b, 0x8c, 0x99, 0x32, 0x99, 0xc0, 0x96, 0x2a, 0x9a, 0xaa, 0x9d, + 0x71, 0x97, 0x56, 0x9c, 0xd0, 0x97, 0x37, 0x9c, 0x85, 0x9a, 0x31, 0x97, 0x6a, 0x9c, 0xf2, 0x80, + 0x44, 0x9c, 0xfe, 0x97, 0xd3, 0x8d, 0xa1, 0x89, 0x82, 0x9b, 0xf6, 0x9a, 0x20, 0x9a, 0x0a, 0x98, + 0xa0, 0x9a, 0x43, 0x9a, 0xfe, 0x9b, 0x28, 0x98, 0x4a, 0x9c, 0xbc, 0x9c, 0x3b, 0x9d, 0x40, 0x9c, + 0x16, 0x98, 0x12, 0x11, 0x2f, 0x15, 0x26, 0x95, 0x5a, 0x95, 0x26, 0x11, 0x0d, 0x94, 0x17, 0x98, + 0xd4, 0x96, 0xf6, 0x8d, 0x76, 0x95, 0xee, 0x0f, 0x82, 0x94, 0x77, 0x8e, 0x06, 0x16, 0x0c, 0x91, + 0x64, 0x94, 0x32, 0x09, 0x46, 0x0b, 0x48, 0x0d, 0x80, 0x9a, 0x02, 0x92, 0x9e, 0x98, 0xd2, 0x14, + 0x87, 0x9c, 0x35, 0x94, 0x66, 0x94, 0xa0, 0x03, 0xcc, 0x8c, 0x02, 0x99, 0xcd, 0x96, 0x4c, 0x92, + 0xdc, 0x8e, 0xc7, 0x13, 0x2a, 0x95, 0x86, 0x91, 0x95, 0x97, 0x05, 0x93, 0x7e, 0x96, 0x12, 0x9b, + 0xc6, 0x8f, 0x3e, 0x92, 0x70, 0x98, 0xb3, 0x98, 0x98, 0x98, 0xd9, 0x8e, 0x04, 0x96, 0x7a, 0x00, + 0xb2, 0x9b, 0x83, 0x92, 0xd0, 0x82, 0x76, 0x90, 0x50, 0x01, 0x32, 0x91, 0xf2, 0x98, 0x83, 0x92, + 0xbb, 0x91, 0xec, 0x99, 0x04, 0x90, 0xc7, 0x95, 0xed, 0x98, 0xa4, 0x9b, 0x9c, 0x9a, 0xee, 0x94, + 0x26, 0x93, 0x5d, 0x92, 0x56, 0x96, 0xd6, 0x98, 0x84, 0x90, 0xde, 0x93, 0xa6, 0x93, 0xf6, 0x93, + 0x96, 0x97, 0x45, 0x91, 0x81, 0x96, 0x5f, 0x96, 0x48, 0x98, 0x59, 0x94, 0x48, 0x92, 0x38, 0x8e, + 0xd7, 0x95, 0x64, 0x8d, 0x43, 0x97, 0x01, 0x96, 0xd6, 0x90, 0x11, 0x98, 0x3e, 0x94, 0xa6, 0x8d, + 0xd9, 0x98, 0x0b, 0x8f, 0x19, 0x96, 0x70, 0x94, 0xb0, 0x94, 0x52, 0x92, 0xd7, 0x95, 0x40, 0x97, + 0x1d, 0x96, 0xe3, 0x95, 0xba, 0x87, 0x24, 0x94, 0xba, 0x92, 0x6d, 0x94, 0xc9, 0x95, 0x59, 0x97, + 0xc2, 0x95, 0xe3, 0x97, 0x5c, 0x94, 0xb6, 0x96, 0xeb, 0x95, 0x25, 0x98, 0x61, 0x96, 0x53, 0x8a, + 0xb0, 0x92, 0x27, 0x94, 0x1a, 0x93, 0xa6, 0x93, 0xb4, 0x97, 0xb7, 0x95, 0x5d, 0x90, 0x98, 0x97, + 0x1c, 0x97, 0x04, 0x92, 0x1f, 0x96, 0xea, 0x92, 0x18, 0x98, 0xb3, 0x96, 0x0a, 0x96, 0xa7, 0x98, + 0x70, 0x90, 0xdf, 0x8e, 0xf8, 0x0a, 0x84, 0x93, 0xa4, 0x83, 0xe8, 0x08, 0x94, 0x81, 0xc0, 0x8f, + 0x16, 0x89, 0xa0, 0x88, 0xf2, 0x92, 0xa5, 0x96, 0x03, 0x95, 0x60, 0x91, 0xb3, 0x80, 0x6a, 0x11, + 0x07, 0x94, 0x2f, 0x0b, 0xaf, 0x90, 0x37, 0x95, 0x0c, 0x95, 0xce, 0x90, 0x83, 0x8f, 0x96, 0x0c, + 0xab, 0x95, 0x4a, 0x90, 0x19, 0x93, 0xf3, 0x10, 0x36, 0x86, 0xd3, 0x94, 0x4f, 0x94, 0xe9, 0x90, + 0xfd, 0x89, 0x8e, 0x8c, 0x01, 0x93, 0x64, 0x00, 0xd8, 0x8f, 0x9d, 0x91, 0xbe, 0x91, 0x2d, 0x92, + 0x05, 0x92, 0x3e, 0x8a, 0xe4, 0x94, 0xf6, 0x91, 0xfc, 0x93, 0x8d, 0x95, 0x76, 0x88, 0xf9, 0x89, + 0x04, 0x91, 0x25, 0x8f, 0xd2, 0x91, 0x97, 0x94, 0x84, 0x82, 0xc6, 0x80, 0x04, 0x8c, 0x2e, 0x95, + 0xaa, 0x8f, 0x56, 0x91, 0xf0, 0x0a, 0x62, 0x90, 0x88, 0x94, 0xde, 0x94, 0x62, 0x90, 0x8c, 0x92, + 0x0f, 0xaa, 0xe2, 0x22, 0xec, 0x9b, 0xc8, 0xa9, 0x12, 0xa9, 0xdc, 0x9e, 0x19, 0xa9, 0x8f, 0xaa, + 0x66, 0xac, 0x1e, 0xa4, 0x80, 0xa8, 0x22, 0x28, 0xfe, 0xa6, 0x8d, 0xa0, 0x96, 0x25, 0x4f, 0xa9, + 0x99, 0xa5, 0x74, 0xa0, 0xd4, 0xa0, 0x00, 0x25, 0x61, 0xaa, 0x94, 0xa8, 0x8c, 0xab, 0xfe, 0x22, + 0xdd, 0xae, 0x46, 0xa4, 0xcc, 0xa6, 0xa6, 0xa8, 0x59, 0xa6, 0xc5, 0xa8, 0x59, 0xa8, 0x24, 0xa8, + 0x06, 0xa8, 0x60, 0x1d, 0xe2, 0x9e, 0x6c, 0xa8, 0xbe, 0xa9, 0xfc, 0xa4, 0x6f, 0xa9, 0xed, 0xad, + 0x82, 0xa3, 0x0d, 0xaa, 0x86, 0xa8, 0x04, 0xac, 0x7b, 0xaa, 0xf3, 0x9d, 0x93, 0xab, 0x70, 0x18, + 0x8a, 0xad, 0x36, 0xa6, 0xcc, 0x1c, 0xa4, 0x1b, 0x20, 0xa8, 0x1e, 0xa9, 0xb6, 0xab, 0x6e, 0xa3, + 0x5a, 0xa8, 0x0a, 0xac, 0xc3, 0xa9, 0x3a, 0xa8, 0xbf, 0xab, 0x55, 0xad, 0xa2, 0xad, 0xdd, 0xa9, + 0x38, 0xbe, 0x37, 0xc0, 0x5e, 0xbe, 0x42, 0xbf, 0x01, 0xbf, 0x28, 0xc2, 0x7f, 0xc1, 0xfc, 0xc2, + 0x39, 0xbf, 0x0e, 0xbd, 0x3f, 0xc2, 0x4d, 0xbf, 0x67, 0xc0, 0x0d, 0xc1, 0x13, 0xc0, 0x86, 0xc1, + 0x6d, 0xc2, 0xde, 0xc1, 0x20, 0xc1, 0xb0, 0xbe, 0x10, 0xc0, 0xf2, 0xbc, 0xb6, 0xc1, 0x88, 0xbe, + 0x83, 0xc1, 0x4e, 0xbf, 0x24, 0xc2, 0x19, 0xbe, 0x83, 0xb7, 0x3b, 0xbd, 0xeb, 0xc2, 0x4f, 0xbe, + 0x06, 0xb9, 0x7a, 0xbe, 0x4f, 0xc0, 0x7f, 0xc0, 0x91, 0xba, 0xbe, 0xc0, 0xac, 0xc2, 0x43, 0xc0, + 0x98, 0xc1, 0xa6, 0xc3, 0x4c, 0xc0, 0x10, 0xbd, 0x73, 0xc0, 0x6b, 0xbe, 0x4e, 0xc2, 0x2d, 0xbc, + 0xe0, 0xc0, 0x74, 0xbd, 0x02, 0xc1, 0xc6, 0xc1, 0x44, 0xb9, 0xdc, 0xc4, 0x58, 0xb7, 0x58, 0xc0, + 0x63, 0xc3, 0x37, 0xc1, 0xaf, 0xc0, 0xff, 0xc2, 0x58, 0xbf, 0x47, 0xb9, 0x88, 0xba, 0x4d, 0xc1, + 0xdc, 0x3d, 0x11, 0x3d, 0x60, 0x3d, 0x72, 0x3e, 0x96, 0x3b, 0x74, 0x3d, 0x90, 0x3e, 0x99, 0x3e, + 0xc3, 0x3f, 0xce, 0x3d, 0x40, 0x3e, 0x30, 0x3e, 0xc3, 0x3d, 0x37, 0x3f, 0x1d, 0x3d, 0xba, 0x3e, + 0xc6, 0x3c, 0x0d, 0x3f, 0x52, 0x3c, 0xa2, 0x39, 0x4c, 0x3d, 0x00, 0x3e, 0xcc, 0x3e, 0xcb, 0x3d, + 0xc6, 0x3f, 0x1b, 0x3d, 0x3a, 0x3c, 0xaa, 0x3c, 0x86, 0x3c, 0xce, 0x3d, 0xb6, 0x3c, 0x69, 0x3c, + 0x66, 0x3c, 0xa2, 0x3c, 0x42, 0x3e, 0x2c, 0x37, 0x1a, 0x3d, 0x6e, 0x3c, 0x87, 0x3e, 0x39, 0x3b, + 0x4b, 0x40, 0x9e, 0x3e, 0x90, 0x3e, 0xd6, 0x3d, 0x0a, 0x3f, 0xd2, 0x3b, 0x9e, 0x3d, 0x62, 0x3a, + 0x87, 0x3e, 0x46, 0x3d, 0xad, 0x3e, 0x99, 0x3c, 0xa1, 0x38, 0xe2, 0x3d, 0xb2, 0x38, 0xc0, 0x3f, + 0x38, 0x40, 0x6a, 0x3e, 0xb2, 0x3d, 0x45, 0x40, 0x34, 0x40, 0xea, 0x37, 0x08, 0x3a, 0x35, 0x3d, + 0x15, 0x35, 0x68, 0x38, 0xc0, 0x38, 0x08, 0xaa, 0x39, 0x3b, 0xca, 0x38, 0x88, 0x36, 0xef, 0x35, + 0x80, 0x29, 0x4e, 0x34, 0x09, 0x38, 0x88, 0xb1, 0x60, 0x35, 0x2c, 0x38, 0x4b, 0x36, 0xcc, 0x37, + 0x13, 0x3c, 0x91, 0x37, 0x50, 0x3a, 0x9c, 0x39, 0x8d, 0x39, 0x25, 0x37, 0xf8, 0x2e, 0xd8, 0xae, + 0xce, 0x3a, 0x23, 0x34, 0x9f, 0x3c, 0xf4, 0x38, 0x0c, 0xb0, 0xa4, 0x31, 0xc2, 0x38, 0xfa, 0x33, + 0x5e, 0xb0, 0xec, 0x3a, 0x95, 0x38, 0x3c, 0x2e, 0x80, 0x25, 0x4a, 0x36, 0xb5, 0x35, 0xf8, 0x36, + 0x25, 0x38, 0x12, 0x3c, 0xcd, 0x37, 0x2c, 0x38, 0x46, 0x36, 0x7c, 0x34, 0x56, 0x39, 0x8e, 0x35, + 0x2e, 0x3a, 0x40, 0x39, 0x64, 0x39, 0x86, 0x38, 0x09, 0xb5, 0x84, 0x3e, 0xfe, 0x3b, 0xb9, 0x39, + 0x00, 0x3b, 0x20, 0x35, 0x96, 0x3a, 0xc2, 0x3c, 0x31, 0x37, 0xd8, 0x39, 0xb0, 0x27, 0xd8, 0x3a, + 0xe2, 0xb1, 0xb6, 0xb0, 0xb4, 0xb1, 0xb9, 0xb1, 0x22, 0xb0, 0x38, 0xb0, 0x9a, 0xb1, 0xc8, 0xb0, + 0x76, 0xb3, 0x0e, 0xb2, 0xfa, 0xb0, 0x34, 0xb1, 0x1b, 0xb1, 0xcc, 0xb2, 0x98, 0xb0, 0xe8, 0xb1, + 0xdc, 0xaf, 0x16, 0xb2, 0x6e, 0xaf, 0x70, 0xad, 0x4e, 0xb1, 0x99, 0xb2, 0x48, 0xb1, 0x09, 0xb1, + 0xbf, 0xb3, 0x98, 0xb0, 0x4c, 0xaf, 0xf6, 0xb0, 0xba, 0xb0, 0xdb, 0xb1, 0xd4, 0xad, 0x02, 0xb0, + 0x64, 0xb0, 0x37, 0xb1, 0x22, 0xb2, 0xa0, 0x19, 0x48, 0xb1, 0xd4, 0xae, 0xd9, 0xb0, 0x84, 0xad, + 0x13, 0xb4, 0x7d, 0xb1, 0x5e, 0xb2, 0x7d, 0xb2, 0xb4, 0xb2, 0xdd, 0xae, 0x73, 0xb0, 0x8b, 0xae, + 0x78, 0xb2, 0xee, 0xb1, 0x6c, 0xb2, 0xb1, 0xae, 0x49, 0xaa, 0x7a, 0xb0, 0xda, 0xaf, 0x10, 0xb4, + 0x98, 0xb3, 0x6c, 0xb1, 0xa7, 0xb1, 0x36, 0xb4, 0x6c, 0xb4, 0xae, 0xad, 0x98, 0xad, 0xca, 0xb0, + 0x6c, 0xac, 0xd7, 0xac, 0x9b, 0xae, 0xa0, 0xa2, 0xf8, 0xaf, 0x17, 0xab, 0xa2, 0xaa, 0x8e, 0xa5, + 0x90, 0xa9, 0x89, 0xac, 0x5c, 0xaa, 0xd8, 0x20, 0x47, 0xaa, 0x64, 0xad, 0x0c, 0xab, 0x2c, 0xac, + 0xbd, 0xae, 0xd8, 0xab, 0x91, 0xad, 0x6b, 0xad, 0x92, 0xae, 0x74, 0xae, 0x48, 0x9e, 0x00, 0x98, + 0x40, 0xb0, 0x56, 0xa9, 0xee, 0xaf, 0x6c, 0xae, 0x9e, 0xa5, 0x18, 0xab, 0x32, 0xa8, 0x1c, 0xa9, + 0xf4, 0xa2, 0x48, 0xb0, 0xea, 0xad, 0xd8, 0x28, 0x13, 0xa9, 0x84, 0xa8, 0x1e, 0xa6, 0x0d, 0xa9, + 0xe1, 0xad, 0xe6, 0xae, 0x5a, 0xad, 0xfa, 0xae, 0xa7, 0xac, 0xcc, 0xa8, 0x2e, 0xac, 0x52, 0xab, + 0x70, 0xaf, 0x9d, 0xaf, 0x78, 0xae, 0xe8, 0xa9, 0x25, 0x29, 0xfc, 0xb0, 0x3f, 0xb1, 0x38, 0xb0, + 0x48, 0xaf, 0x4b, 0xa9, 0x77, 0xaf, 0x65, 0xb1, 0x1d, 0xaf, 0x30, 0xaf, 0xac, 0xa3, 0xb9, 0xae, + 0xec, 0xac, 0xd8, 0xac, 0x72, 0xac, 0xd7, 0xad, 0xd8, 0xaa, 0x13, 0xae, 0x8c, 0xae, 0x62, 0xaf, + 0x8c, 0xae, 0x9e, 0xac, 0xad, 0xae, 0xcc, 0xad, 0x8a, 0xad, 0x9a, 0xae, 0xed, 0xac, 0x97, 0xae, + 0x7a, 0xad, 0xfb, 0xae, 0xb2, 0xac, 0xd2, 0xa9, 0xcb, 0xac, 0x90, 0xac, 0x08, 0xaf, 0x4d, 0xad, + 0xeb, 0xae, 0xce, 0xac, 0xe4, 0xac, 0xdc, 0xab, 0xa2, 0xaa, 0xbd, 0xac, 0x0a, 0xae, 0x21, 0xac, + 0xd6, 0xaa, 0xa1, 0xab, 0x98, 0xad, 0xca, 0xaa, 0xd6, 0xab, 0xdf, 0xac, 0x30, 0xaf, 0x17, 0xac, + 0xb8, 0xaf, 0x3c, 0xaf, 0xd6, 0xad, 0x70, 0xac, 0x48, 0xae, 0xab, 0xab, 0x32, 0xae, 0x80, 0xa9, + 0xe6, 0xad, 0x18, 0xac, 0x23, 0xae, 0x63, 0xad, 0xaf, 0xa8, 0x68, 0xaf, 0x65, 0xa4, 0x65, 0xae, + 0x27, 0xb0, 0x5f, 0xae, 0x41, 0xad, 0xe6, 0xaf, 0xa4, 0xae, 0x3a, 0xa5, 0x51, 0xa9, 0x44, 0xad, + 0xfe, 0xa4, 0x1c, 0xa9, 0x19, 0xa8, 0x41, 0xa0, 0x8e, 0xaa, 0x42, 0xab, 0x33, 0xa9, 0x97, 0xaa, + 0xaa, 0x9f, 0xa8, 0xa2, 0x96, 0xaa, 0xb4, 0x9c, 0x00, 0xa8, 0x01, 0xa9, 0x30, 0xa8, 0x8b, 0xa9, + 0xea, 0xac, 0xb6, 0xa9, 0x86, 0xab, 0xce, 0xa9, 0x8b, 0xa9, 0xc3, 0xa4, 0xca, 0xa7, 0x40, 0x9d, + 0xda, 0xaa, 0x2e, 0xa6, 0x34, 0xad, 0x5c, 0xa8, 0xa8, 0x21, 0x3f, 0xa1, 0x2c, 0xac, 0x97, 0xa5, + 0x4f, 0x20, 0xc1, 0xa9, 0xd7, 0xa8, 0x8c, 0xa8, 0xf8, 0x15, 0x2c, 0xa9, 0x36, 0xaa, 0x30, 0xa9, + 0x04, 0xa9, 0x32, 0xad, 0x4c, 0xa8, 0xbd, 0xa5, 0xa0, 0xa7, 0x68, 0xa6, 0xad, 0xab, 0x3c, 0xa5, + 0x4b, 0xaa, 0xe5, 0xa7, 0xe8, 0xa9, 0x09, 0xab, 0x20, 0x20, 0xea, 0xaf, 0xd6, 0xa8, 0xf8, 0xa8, + 0x5a, 0xac, 0x89, 0xa8, 0xac, 0xaa, 0xe0, 0xac, 0x6c, 0xa5, 0x1f, 0xa8, 0x6d, 0x9d, 0xad, 0xab, + 0xea, 0xc0, 0xed, 0xbf, 0x89, 0xc1, 0xd2, 0xbe, 0xd5, 0xc0, 0x5a, 0xbd, 0x77, 0xbf, 0x37, 0xbc, + 0x52, 0xc1, 0x38, 0xc1, 0x1c, 0xbe, 0x11, 0xbd, 0x30, 0xbf, 0x6a, 0xc1, 0xe6, 0xbe, 0x3d, 0xc0, + 0x0e, 0xbf, 0x31, 0xc0, 0x94, 0xbe, 0xf6, 0xbd, 0x06, 0xc1, 0x4a, 0xc2, 0xda, 0xbc, 0x8a, 0xbd, + 0x10, 0xc3, 0x83, 0xbe, 0xa3, 0xbf, 0xfc, 0xc0, 0x2a, 0xbf, 0xb6, 0xc0, 0x3c, 0xb8, 0xdc, 0xbd, + 0x29, 0xbe, 0xde, 0xc1, 0x51, 0xc1, 0x14, 0x3c, 0x32, 0xc0, 0x05, 0xbc, 0x8f, 0xbc, 0x0c, 0xbb, + 0x66, 0xc2, 0x60, 0xc0, 0x49, 0xc1, 0x62, 0xc2, 0x3d, 0xc1, 0xe9, 0xbc, 0x0b, 0xbe, 0x22, 0xbe, + 0xf2, 0xc1, 0x2a, 0xc2, 0x8a, 0xc1, 0xcb, 0xbb, 0x25, 0x2c, 0x3a, 0xc0, 0x38, 0xc2, 0x99, 0xc3, + 0x0e, 0xc2, 0xe2, 0xbe, 0x6e, 0xc1, 0xf7, 0xc3, 0xce, 0xc3, 0x32, 0xc0, 0x96, 0xbb, 0x68, 0xc0, + 0x91, 0xc2, 0x21, 0xbc, 0x27, 0xc0, 0x74, 0xbe, 0x65, 0xbd, 0x70, 0xc1, 0x2d, 0xbf, 0x62, 0xc1, + 0xe4, 0xbe, 0x13, 0xc1, 0x5f, 0xc1, 0x40, 0xbc, 0x0e, 0xc2, 0x26, 0xbc, 0x0d, 0x34, 0x9f, 0xc0, + 0x96, 0xbb, 0x25, 0xc1, 0xa3, 0xbf, 0xb9, 0xbe, 0x0d, 0xc2, 0x7b, 0xbd, 0xe8, 0xbb, 0x3d, 0xbf, + 0x1c, 0xb9, 0xd4, 0xbf, 0xd6, 0xbe, 0xd4, 0xc0, 0xba, 0xbb, 0x17, 0xb8, 0x98, 0xc0, 0xf1, 0xc2, + 0xd3, 0xc0, 0x57, 0xbc, 0x03, 0xbe, 0x24, 0xc0, 0xf2, 0xc1, 0xc0, 0xc2, 0x1a, 0xc1, 0x1f, 0xc1, + 0x21, 0xc0, 0x76, 0xc0, 0x02, 0xc0, 0xdd, 0xbb, 0x59, 0xc0, 0xa6, 0xb8, 0xf8, 0xbe, 0x01, 0xc0, + 0x91, 0xb5, 0x46, 0xc0, 0xd4, 0xbc, 0x45, 0xb9, 0xb9, 0xbe, 0xea, 0xc3, 0xd7, 0xbf, 0x78, 0xbd, + 0x21, 0xc0, 0x75, 0xc2, 0xeb, 0xbf, 0x59, 0xc1, 0xc5, 0xc3, 0x10, 0xc2, 0xdd, 0x27, 0xab, 0xbc, + 0x7a, 0x3e, 0x84, 0x39, 0x00, 0x3e, 0x8c, 0x3c, 0x72, 0x39, 0x2f, 0x3b, 0x36, 0x3e, 0xa6, 0x3c, + 0x53, 0x3d, 0xf5, 0x3c, 0x66, 0x3c, 0x45, 0x3e, 0xb4, 0x3c, 0xc4, 0x3d, 0x6b, 0x3c, 0xbb, 0x3f, + 0x36, 0x3d, 0xbb, 0x3f, 0xa6, 0x3e, 0xb6, 0x3c, 0x1c, 0x3e, 0xb6, 0x3c, 0x3f, 0x3c, 0xfd, 0x3d, + 0x21, 0x36, 0xea, 0x3e, 0x2c, 0x3d, 0x32, 0x3d, 0xde, 0x39, 0xcc, 0x38, 0x5a, 0x3d, 0x00, 0x3f, + 0x40, 0x3d, 0x6a, 0x3c, 0x62, 0x38, 0xda, 0x3c, 0x50, 0x3e, 0x62, 0x3d, 0xe2, 0x3c, 0x3b, 0x3c, + 0xcb, 0x3c, 0xea, 0x3d, 0x06, 0x3d, 0xde, 0x3a, 0x9e, 0x3f, 0x84, 0x3c, 0xdc, 0x3c, 0xfc, 0x3d, + 0xc2, 0x3a, 0xc1, 0x3b, 0x93, 0x3c, 0xea, 0x3c, 0xc7, 0x3e, 0x17, 0x40, 0x05, 0x3e, 0x9b, 0x3c, + 0x60, 0x3d, 0x38, 0x3d, 0x11, 0x40, 0x9e, 0x3f, 0x16, 0x40, 0x26, 0x3d, 0xc7, 0x37, 0x4e, 0x3d, + 0xca, 0x36, 0xb5, 0x32, 0xe8, 0x2e, 0x30, 0xa8, 0xe8, 0x31, 0x02, 0x3c, 0x08, 0x3b, 0x28, 0x3c, + 0x9c, 0x36, 0x6c, 0x38, 0x50, 0x3a, 0x89, 0x35, 0x82, 0x3a, 0xe8, 0x2f, 0xe9, 0xb5, 0x00, 0x36, + 0x41, 0x35, 0xc0, 0x26, 0x6b, 0x36, 0x00, 0x35, 0x92, 0x3c, 0x28, 0x39, 0x20, 0xaf, 0xd6, 0x30, + 0x08, 0x34, 0xa3, 0x35, 0xe0, 0x3b, 0x6c, 0x34, 0x94, 0x38, 0xcd, 0xb2, 0x3f, 0x39, 0xa2, 0x3b, + 0x79, 0x35, 0x52, 0x36, 0x44, 0xb6, 0xff, 0x38, 0x3a, 0xae, 0x56, 0x3c, 0x5a, 0x33, 0x22, 0x3a, + 0x52, 0x31, 0x38, 0x32, 0xc0, 0x24, 0x74, 0xa9, 0x6f, 0x3a, 0x08, 0x2a, 0xd2, 0x31, 0xe7, 0x3b, + 0xb4, 0x37, 0x6a, 0x38, 0x50, 0x33, 0x8c, 0xaf, 0x28, 0x38, 0x33, 0x35, 0xbd, 0x35, 0xfc, 0x35, + 0xc6, 0x31, 0xe5, 0x36, 0x62, 0x38, 0x82, 0x37, 0xd5, 0x38, 0x0c, 0x39, 0xb8, 0x39, 0xc4, 0x30, + 0xf3, 0xb0, 0x16, 0xad, 0x28, 0xb1, 0x5f, 0xaf, 0x40, 0xac, 0x08, 0xae, 0xf2, 0xb2, 0x5f, 0xb0, + 0x36, 0xb1, 0x21, 0xb0, 0x5c, 0xaf, 0xf2, 0xb2, 0x70, 0xaf, 0x02, 0xb2, 0xfa, 0xb0, 0x69, 0xb3, + 0xc4, 0xb1, 0x94, 0xb2, 0x97, 0xb2, 0x5d, 0xb0, 0xef, 0xb1, 0x3a, 0xb1, 0xb9, 0xaf, 0x80, 0xb1, + 0x3e, 0xaa, 0xc7, 0xb2, 0xf5, 0xb1, 0x1a, 0xb0, 0xd0, 0xae, 0xf8, 0xab, 0x06, 0xb1, 0x38, 0xb2, + 0x46, 0xb0, 0xc6, 0xb0, 0x58, 0xa2, 0x9e, 0xb0, 0x40, 0xb0, 0x98, 0xb0, 0x04, 0xaf, 0x26, 0xaf, + 0xa2, 0xaf, 0x08, 0xb1, 0xd8, 0xaf, 0x02, 0xae, 0x0e, 0xb4, 0xe0, 0xb0, 0x43, 0xb0, 0xa2, 0xb2, + 0x74, 0xb0, 0x84, 0xae, 0x92, 0xb0, 0x02, 0xb1, 0x2b, 0xb3, 0x1d, 0xb2, 0xb6, 0xb1, 0xab, 0xb0, + 0x89, 0xb0, 0x06, 0xaf, 0x43, 0xb4, 0x0a, 0xb3, 0xa2, 0xb2, 0xe8, 0xaf, 0x06, 0xaf, 0x5c, 0xb1, + 0x82, 0xa8, 0x08, 0xa8, 0xa4, 0xa5, 0x80, 0x17, 0x4e, 0xa4, 0xb4, 0xae, 0xca, 0xb0, 0xf5, 0xaf, + 0x8c, 0xac, 0xba, 0xab, 0x5e, 0xad, 0x12, 0xae, 0x1e, 0xad, 0x2a, 0xab, 0x38, 0xa0, 0xda, 0xac, + 0x4c, 0xad, 0xa8, 0xa3, 0x1a, 0xad, 0x60, 0xaa, 0xb4, 0xb0, 0x0a, 0xaf, 0x10, 0x9b, 0xc2, 0xa8, + 0x91, 0xa8, 0xbe, 0xac, 0xf8, 0xb0, 0xa7, 0xa6, 0xc7, 0xad, 0xd8, 0x24, 0xae, 0xad, 0x5a, 0xaf, + 0xab, 0xa8, 0x02, 0xad, 0x36, 0x2d, 0x76, 0xad, 0x74, 0x28, 0x82, 0xaf, 0x5e, 0xa1, 0x48, 0xad, + 0x1d, 0xa4, 0x5c, 0xa7, 0x00, 0x15, 0xb0, 0x9b, 0x97, 0xb0, 0xa6, 0xa9, 0xac, 0xa7, 0x01, 0xb1, + 0x96, 0xae, 0xaf, 0xab, 0x91, 0xaa, 0x94, 0xa5, 0xd2, 0xae, 0xe6, 0xa4, 0x15, 0xac, 0x5c, 0xac, + 0x84, 0xa6, 0xb5, 0xa5, 0x89, 0xaf, 0xe5, 0xac, 0x0d, 0xac, 0x6e, 0xab, 0x3e, 0xb0, 0x60, 0xaa, + 0x0c, 0xaf, 0x2c, 0xa9, 0xba, 0xad, 0x79, 0xac, 0xd9, 0xa9, 0x40, 0xac, 0xf0, 0xac, 0xd4, 0xac, + 0xb3, 0xac, 0x3d, 0xad, 0xe0, 0xac, 0x9e, 0xac, 0x66, 0xad, 0x77, 0xac, 0x5a, 0xa9, 0xcd, 0xae, + 0xc0, 0xab, 0x5c, 0xaf, 0xb5, 0xad, 0x5f, 0xac, 0xfa, 0xad, 0x8b, 0xab, 0x61, 0xab, 0x67, 0xad, + 0x99, 0xa5, 0xf4, 0xad, 0x23, 0xac, 0x78, 0xad, 0xaa, 0xa8, 0x4e, 0xa8, 0x23, 0xad, 0x23, 0xaf, + 0x73, 0xad, 0xfb, 0xaa, 0x75, 0xaa, 0x9e, 0xac, 0x09, 0xaf, 0xf6, 0xad, 0x77, 0xad, 0xa8, 0xac, + 0xf2, 0xac, 0xc6, 0xad, 0x23, 0xad, 0x49, 0xaa, 0x3f, 0xae, 0x96, 0xaa, 0xa0, 0xac, 0xf0, 0xac, + 0x07, 0xa8, 0x2a, 0xac, 0xb3, 0xab, 0x7d, 0xab, 0x71, 0xad, 0x6f, 0xb0, 0x6a, 0xad, 0xd8, 0xab, + 0x48, 0xad, 0x3b, 0xae, 0x8d, 0xae, 0x0c, 0xaf, 0x48, 0xb0, 0xd1, 0xad, 0x80, 0xa0, 0x4f, 0xac, + 0x73, 0xaa, 0x36, 0xa4, 0x18, 0xa5, 0xd9, 0xa1, 0x5a, 0xa5, 0x96, 0xac, 0x90, 0xa9, 0x6f, 0xac, + 0x0e, 0xa7, 0x14, 0xaa, 0xc1, 0xab, 0xac, 0xa1, 0x3e, 0xac, 0xfc, 0x9b, 0x14, 0x28, 0x45, 0xa7, + 0x0e, 0xa2, 0x0d, 0xa5, 0xac, 0xa6, 0x68, 0xa6, 0xbf, 0xac, 0x22, 0xa8, 0xf0, 0x13, 0x60, 0xa4, + 0xd6, 0xa3, 0x33, 0xa6, 0x4d, 0xaa, 0x66, 0xa8, 0x43, 0xa7, 0x1a, 0x1f, 0xe8, 0xa9, 0x97, 0xac, + 0xaf, 0xa8, 0xe2, 0xa4, 0x34, 0x9b, 0x79, 0xa9, 0xb1, 0xa6, 0x1e, 0xad, 0x97, 0xa8, 0x72, 0xab, + 0x92, 0xa6, 0xa6, 0xa6, 0xb4, 0xa4, 0x25, 0x9c, 0x74, 0xa9, 0x78, 0x19, 0x4b, 0xa5, 0x9e, 0xaa, + 0xcf, 0xa2, 0xa8, 0xa9, 0x5d, 0xa3, 0x82, 0x20, 0xbc, 0xa6, 0xba, 0xaa, 0xfe, 0xa6, 0xbe, 0xa5, + 0x3e, 0xa6, 0xdd, 0xaa, 0x31, 0xa7, 0xe9, 0xa8, 0xea, 0xab, 0x6f, 0xab, 0x2e, 0xa5, 0x54, 0xa0, + 0x44, 0xbd, 0xec, 0xbb, 0x2b, 0xbe, 0xfb, 0xba, 0xcb, 0xb8, 0x3c, 0xbe, 0x5c, 0xc3, 0x87, 0xc0, + 0x67, 0xc0, 0xfd, 0xbd, 0x43, 0xbe, 0x7d, 0xc2, 0xcd, 0xbd, 0xf7, 0xc0, 0xa0, 0xbf, 0xc4, 0xc1, + 0x72, 0xc1, 0x5a, 0xbf, 0x79, 0xc1, 0xb3, 0xbe, 0xf8, 0xc1, 0x77, 0xc1, 0x99, 0xbc, 0x99, 0xbf, + 0x53, 0xba, 0x75, 0xc1, 0xd5, 0xc2, 0x87, 0xbc, 0xe4, 0xbf, 0xc2, 0xb5, 0x62, 0xc0, 0x3a, 0xc1, + 0x3d, 0xbd, 0x93, 0xc0, 0x9c, 0x3c, 0x22, 0xc0, 0xc2, 0xb5, 0x2f, 0xc0, 0xb5, 0xb9, 0x2e, 0xbe, + 0x07, 0xbc, 0x23, 0xbe, 0xe9, 0xba, 0xd2, 0xba, 0xd8, 0xc3, 0x22, 0xc0, 0x9a, 0xbd, 0x2e, 0xc3, + 0x46, 0xc1, 0x25, 0xbd, 0x88, 0xbf, 0x5d, 0xbf, 0x9a, 0xc2, 0x82, 0xbd, 0x78, 0xc0, 0x22, 0xc0, + 0x76, 0xbd, 0x6d, 0xb9, 0xa8, 0xc3, 0x55, 0xc1, 0x25, 0xc0, 0x25, 0xbd, 0x78, 0xc1, 0x4f, 0xc0, + 0x72, 0x3d, 0x2a, 0x3d, 0x19, 0x3d, 0x0e, 0x3a, 0x3c, 0x3c, 0x41, 0x3c, 0x62, 0x3d, 0xd8, 0x3d, + 0x72, 0x3c, 0xa5, 0x3d, 0x3c, 0x3e, 0x8a, 0x3b, 0xf8, 0x3f, 0x7e, 0x3c, 0xdb, 0x39, 0x3a, 0x3e, + 0x69, 0x3d, 0xfa, 0x3d, 0xd9, 0x3c, 0x1a, 0x3d, 0x34, 0x3f, 0x86, 0x3e, 0x55, 0x3d, 0x4d, 0x3b, + 0x9c, 0x3b, 0x20, 0x3e, 0xc4, 0x3b, 0x26, 0x3f, 0xa1, 0x3e, 0x64, 0x39, 0xf5, 0x3b, 0xb6, 0x3e, + 0x08, 0x3d, 0x08, 0x3f, 0xa6, 0x3c, 0xfa, 0x3c, 0x5a, 0x3b, 0x6d, 0x3d, 0xaa, 0x3e, 0x68, 0x3e, + 0x1e, 0x3a, 0x3c, 0x3e, 0x79, 0x3e, 0xd2, 0x3c, 0x39, 0x3a, 0x46, 0x3c, 0x04, 0x40, 0x90, 0x3d, + 0x4c, 0x3c, 0x93, 0x40, 0x46, 0x3b, 0x8b, 0x3a, 0x4d, 0x3e, 0xda, 0x3f, 0xb1, 0x3e, 0xe8, 0x3c, + 0xfa, 0x3c, 0x49, 0x3e, 0x20, 0x3d, 0x05, 0x3c, 0x2a, 0x3f, 0x36, 0x3e, 0x7a, 0x34, 0x64, 0x3b, + 0x4e, 0xba, 0x84, 0xba, 0xd2, 0xbb, 0x2c, 0xbb, 0xd0, 0xb9, 0x7b, 0xba, 0x29, 0xbb, 0x8f, 0xba, + 0x23, 0xba, 0xb1, 0xb9, 0x66, 0xba, 0xb8, 0xba, 0x4b, 0xbc, 0xba, 0xbb, 0x2f, 0xbb, 0x95, 0xbc, + 0x6f, 0xbc, 0x00, 0xbc, 0xb6, 0xbb, 0x1e, 0xba, 0xf6, 0xbb, 0x3d, 0xbb, 0x1c, 0xbc, 0xd8, 0xba, + 0xb6, 0xb9, 0xdc, 0xbb, 0x1c, 0xbb, 0xff, 0xbb, 0x8e, 0xbb, 0x5d, 0xb9, 0x62, 0xba, 0x26, 0xbb, + 0x6c, 0xbb, 0x98, 0xbb, 0xc2, 0xb8, 0xcc, 0xb9, 0x8b, 0xba, 0x1e, 0xbc, 0x77, 0xba, 0xb2, 0xbb, + 0x9f, 0xba, 0x30, 0xbc, 0x06, 0xbb, 0xf8, 0xba, 0x1c, 0xba, 0x10, 0xbb, 0x16, 0xbc, 0x74, 0xbc, + 0x0a, 0xba, 0x75, 0xbb, 0x0c, 0xbb, 0x72, 0xba, 0x40, 0xbc, 0x4b, 0xbc, 0x7a, 0xbb, 0xfd, 0xb9, + 0x0c, 0xbb, 0xc4, 0xbb, 0x69, 0xbb, 0xd4, 0xb9, 0x55, 0xbc, 0x77, 0xba, 0x2a, 0xb8, 0x60, 0xbb, + 0x2f, 0xb4, 0x56, 0xb5, 0xfa, 0xb1, 0x95, 0xb1, 0x38, 0xaa, 0x92, 0xb5, 0x18, 0xb9, 0x22, 0xb8, + 0x7a, 0xb5, 0xb6, 0xb4, 0x1a, 0xb8, 0x76, 0xaf, 0x69, 0xb7, 0x7f, 0xb4, 0x18, 0xac, 0x7e, 0xb7, + 0x72, 0xb6, 0x62, 0xb3, 0xbb, 0xb5, 0xa2, 0xb1, 0xb7, 0xb5, 0x22, 0xb6, 0x7c, 0xb2, 0x99, 0xb1, + 0x78, 0xac, 0x3e, 0xb8, 0xc5, 0xb3, 0xca, 0xb6, 0x7c, 0xb5, 0xd4, 0x2a, 0x9c, 0xb7, 0x69, 0xb7, + 0xc2, 0xb2, 0xec, 0xb8, 0x64, 0xb2, 0x18, 0xb5, 0x4e, 0xb0, 0xb9, 0xb4, 0x5e, 0xb6, 0xb9, 0xb5, + 0x60, 0xa6, 0xed, 0xb7, 0x88, 0xb5, 0x62, 0xb7, 0x29, 0xb4, 0xd0, 0xb1, 0xdb, 0xb6, 0x11, 0xb6, + 0x23, 0xb4, 0x61, 0xb8, 0xf3, 0xb0, 0x2c, 0x2d, 0x56, 0xb8, 0xf3, 0xb3, 0x1e, 0xb4, 0x13, 0xb3, + 0x0e, 0xb2, 0x8f, 0xb6, 0xbe, 0xac, 0x39, 0xb3, 0x92, 0xb5, 0x62, 0xb4, 0x2f, 0xb0, 0x5e, 0xb4, + 0x80, 0x2d, 0x22, 0x2e, 0x42, 0x2f, 0x8d, 0x2f, 0xe0, 0x2c, 0x9f, 0x2e, 0xc2, 0x2f, 0x68, 0x2e, + 0x16, 0x2e, 0xc4, 0x2c, 0xff, 0x2d, 0x6c, 0x2e, 0x88, 0x2f, 0xc8, 0x2f, 0x43, 0x2f, 0x96, 0x30, + 0x86, 0x30, 0x26, 0x2f, 0xd0, 0x2f, 0x2e, 0x2d, 0xdc, 0x2e, 0x70, 0x2e, 0xab, 0x2f, 0xd2, 0x2e, + 0x0c, 0x2d, 0xdd, 0x2f, 0x3c, 0x2f, 0x24, 0x2f, 0xa2, 0x2e, 0xd4, 0x2c, 0x0f, 0x2f, 0x7b, 0x2e, + 0xe6, 0x2e, 0x56, 0x2f, 0xbc, 0x2b, 0x52, 0x2d, 0x52, 0x2e, 0xf0, 0x2f, 0x7b, 0x2d, 0xf6, 0x2e, + 0x64, 0x2e, 0x28, 0x30, 0x14, 0x2e, 0x44, 0x2f, 0x80, 0x2e, 0xc7, 0x2e, 0xe4, 0x2e, 0x78, 0x30, + 0xc8, 0x2d, 0xd1, 0x2d, 0x02, 0x2f, 0xba, 0x2d, 0x4a, 0x30, 0xea, 0x2e, 0x3a, 0x2e, 0x48, 0x2d, + 0x68, 0x2e, 0x47, 0x2f, 0x65, 0x2e, 0x92, 0x2d, 0xb1, 0x2f, 0x52, 0x2d, 0xd1, 0x2c, 0xc1, 0x2f, + 0x42, 0x28, 0x1c, 0x2a, 0x67, 0x28, 0xdc, 0x29, 0x44, 0x20, 0x54, 0x2b, 0x1b, 0x2e, 0x78, 0x2c, + 0xbc, 0x2a, 0x30, 0x28, 0x29, 0x2c, 0x32, 0x27, 0x42, 0x2b, 0xca, 0x2a, 0xb4, 0x27, 0xd6, 0x2c, + 0x88, 0x2c, 0x74, 0x28, 0xdf, 0x2b, 0xa8, 0x25, 0x8c, 0x29, 0x32, 0x2a, 0xc8, 0x28, 0xf0, 0x28, + 0x68, 0x23, 0xf8, 0x2c, 0x33, 0x2a, 0xec, 0x2a, 0x8f, 0x29, 0x40, 0x1a, 0xff, 0x2c, 0x74, 0x2b, + 0xa1, 0x28, 0x32, 0x2d, 0x5b, 0x25, 0x72, 0x29, 0xcd, 0x27, 0x79, 0x2a, 0xb5, 0x29, 0x32, 0x2a, + 0x04, 0x25, 0xc8, 0x2c, 0x5a, 0x29, 0xa5, 0x2c, 0x8f, 0x2a, 0x94, 0x28, 0x33, 0x2a, 0x3c, 0x2c, + 0x36, 0x29, 0xb6, 0x2a, 0xba, 0x28, 0x00, 0x13, 0x3f, 0x2d, 0x89, 0x26, 0x92, 0x27, 0x9e, 0x27, + 0xee, 0x27, 0x62, 0x2b, 0x8a, 0x23, 0xb4, 0x28, 0xfa, 0x29, 0x9c, 0x27, 0xa6, 0x28, 0x32, 0x2b, + 0x44, 0x2a, 0x21, 0x2a, 0x1d, 0x2b, 0xa3, 0x29, 0xab, 0x29, 0x99, 0x29, 0x26, 0x2a, 0x3a, 0x2a, + 0x81, 0x29, 0xf4, 0x29, 0x58, 0x2a, 0xda, 0x29, 0x54, 0x2c, 0x96, 0x2a, 0xc5, 0x29, 0x09, 0x2c, + 0x8f, 0x2b, 0x9e, 0x2b, 0xa3, 0x2a, 0x1b, 0x2a, 0x06, 0x2c, 0x35, 0x2b, 0x73, 0x2b, 0xc6, 0x29, + 0x58, 0x29, 0x23, 0x2b, 0xf2, 0x29, 0xee, 0x2b, 0x85, 0x2b, 0xbe, 0x28, 0x30, 0x29, 0x20, 0x2b, + 0xcc, 0x2a, 0x53, 0x2b, 0x04, 0x29, 0x9f, 0x29, 0xab, 0x29, 0x5d, 0x2b, 0xcc, 0x2a, 0x76, 0x2b, + 0x90, 0x29, 0x8e, 0x2b, 0x1c, 0x2b, 0x03, 0x2a, 0xe8, 0x28, 0x3a, 0x2a, 0x40, 0x2c, 0xb4, 0x2b, + 0x81, 0x29, 0x3d, 0x2c, 0xea, 0x29, 0xbe, 0x29, 0x99, 0x2b, 0x78, 0x2c, 0xa5, 0x2b, 0xdb, 0x29, + 0x94, 0x2a, 0x5a, 0x2b, 0x10, 0x2b, 0x47, 0x29, 0x3b, 0x2c, 0xc0, 0x2a, 0xc6, 0x25, 0xf5, 0x29, + 0xcb, 0x25, 0x1a, 0x26, 0x1a, 0x24, 0x12, 0x20, 0x50, 0x21, 0x49, 0x25, 0x7e, 0x28, 0x4a, 0x28, + 0x92, 0x25, 0x90, 0x26, 0x82, 0x28, 0xe9, 0x20, 0xc0, 0x28, 0x65, 0x24, 0xec, 0x1a, 0x77, 0x27, + 0x17, 0x26, 0x64, 0x25, 0x86, 0x25, 0xaf, 0x24, 0xec, 0x27, 0xbe, 0x27, 0x58, 0x24, 0xad, 0x21, + 0xe4, 0x20, 0x3e, 0x28, 0x40, 0x23, 0x40, 0x28, 0x56, 0x27, 0xe8, 0x12, 0x41, 0x26, 0x5f, 0x28, + 0x6c, 0x24, 0x30, 0x29, 0xf8, 0x24, 0x0c, 0x26, 0x32, 0x21, 0x54, 0x25, 0x2c, 0x28, 0x30, 0x27, + 0xce, 0x1a, 0x08, 0x28, 0x69, 0x27, 0xc8, 0x26, 0xbf, 0x22, 0xf4, 0x22, 0xb5, 0x28, 0x02, 0x26, + 0xa6, 0x24, 0x04, 0x2a, 0x18, 0x21, 0x50, 0x10, 0x43, 0x28, 0x52, 0x27, 0x9e, 0x26, 0xf6, 0x24, + 0x4e, 0x24, 0x89, 0x27, 0x6e, 0x22, 0x1f, 0x24, 0x84, 0x27, 0xaa, 0x26, 0x42, 0x1a, 0x37, 0x23, + 0x01, 0x3c, 0x2b, 0x3d, 0x61, 0x3d, 0x9e, 0x3e, 0x7d, 0x39, 0x2c, 0x3e, 0x34, 0x40, 0x30, 0x3e, + 0x8a, 0x3d, 0xd3, 0x3a, 0x95, 0x3d, 0xd7, 0x3c, 0xdc, 0x3d, 0xbc, 0x3e, 0xbb, 0x3d, 0x26, 0x40, + 0x19, 0x40, 0x19, 0x3d, 0x0d, 0x3f, 0xba, 0x3a, 0xf9, 0x3c, 0x13, 0x3d, 0xbe, 0x3d, 0x9f, 0x3d, + 0x80, 0x3a, 0x7e, 0x3f, 0x4c, 0x3e, 0xac, 0x3d, 0xf6, 0x3c, 0xb8, 0x39, 0x7c, 0x3f, 0x80, 0x3d, + 0x39, 0x3d, 0x0e, 0x3f, 0x00, 0x39, 0x6a, 0x3c, 0xe6, 0x3c, 0x80, 0x3e, 0x32, 0x3c, 0x79, 0x3d, + 0x9d, 0x3c, 0xa6, 0x3f, 0x8a, 0x3c, 0x2c, 0x3f, 0x1a, 0x3e, 0x4f, 0x3d, 0x05, 0x3d, 0xe0, 0x3f, + 0xd4, 0x3c, 0x2a, 0x3c, 0xaf, 0x3d, 0x7f, 0x3a, 0x13, 0x40, 0x0d, 0x3c, 0x0a, 0x3c, 0xa7, 0x3b, + 0xa8, 0x3c, 0x24, 0x3e, 0xc2, 0x3b, 0x91, 0x3c, 0xb8, 0x3d, 0x0e, 0x3b, 0xbe, 0x3c, 0x10, 0x3f, + 0x3c, 0x3c, 0x52, 0x36, 0x50, 0xb3, 0xbf, 0x38, 0x04, 0x2f, 0x22, 0x3a, 0x3e, 0x34, 0x1b, 0x35, + 0xc1, 0x3a, 0xb9, 0x34, 0xa4, 0x37, 0xa2, 0x34, 0x3b, 0x3c, 0xd4, 0x30, 0xd2, 0xb4, 0x9b, 0x38, + 0xfc, 0xaf, 0x4e, 0x39, 0xe4, 0x36, 0xc6, 0x3b, 0x64, 0x39, 0x26, 0x30, 0x10, 0x31, 0x8a, 0x38, + 0x84, 0x30, 0x30, 0x3a, 0xce, 0x37, 0xc8, 0x38, 0xae, 0x3a, 0xb8, 0x2c, 0x3e, 0x38, 0xe4, 0x39, + 0xf8, 0x37, 0x1e, 0x35, 0xc7, 0x36, 0x84, 0x3a, 0xb3, 0x34, 0xf7, 0x37, 0x70, 0x2e, 0x64, 0x32, + 0x01, 0x37, 0x1c, 0x35, 0xe4, 0x38, 0x38, 0xac, 0x78, 0x2e, 0xd6, 0x34, 0xb8, 0xae, 0x38, 0x2f, + 0xe4, 0xb1, 0x94, 0x30, 0xf6, 0x35, 0x32, 0x37, 0x80, 0x2d, 0x16, 0x3c, 0xb4, 0x3a, 0x3c, 0x2e, + 0xfc, 0x31, 0x4c, 0x3a, 0x51, 0x38, 0xf8, 0x34, 0x84, 0x2f, 0x48, 0x35, 0x0f, 0x32, 0xc2, 0x38, + 0x65, 0xb3, 0x84, 0xb2, 0x64, 0x2d, 0x57, 0xb6, 0xd0, 0xaa, 0xb7, 0xb4, 0x88, 0xb5, 0x9c, 0xb5, + 0x63, 0xb8, 0xce, 0xb2, 0x03, 0xb1, 0xb8, 0xb5, 0x76, 0xb4, 0x6e, 0xb6, 0xf1, 0xb1, 0x01, 0xb8, + 0xfc, 0xb1, 0x80, 0xb6, 0xb0, 0xb7, 0xd5, 0xb4, 0xab, 0xb8, 0x9c, 0xb4, 0x11, 0xb2, 0xc0, 0xb4, + 0x7c, 0x2e, 0x0e, 0xb9, 0x27, 0xb4, 0xa6, 0xb5, 0xb6, 0xb2, 0x7e, 0xb1, 0x26, 0xb6, 0x49, 0xb5, + 0x4b, 0xb2, 0xc4, 0xb1, 0x81, 0xb1, 0x7a, 0xb6, 0x38, 0xb1, 0x78, 0xb1, 0x1f, 0xb4, 0xea, 0xac, + 0x28, 0xb7, 0x01, 0xaf, 0x85, 0xb6, 0x28, 0xb2, 0x94, 0xb3, 0xea, 0xb3, 0x0a, 0xaf, 0x3c, 0xb5, + 0xea, 0xb2, 0x44, 0xb0, 0x37, 0xb5, 0x02, 0xb4, 0x07, 0xb7, 0x7e, 0xb5, 0x62, 0xb7, 0xfe, 0xb0, + 0x33, 0xb1, 0x27, 0xb5, 0xd4, 0xb7, 0x52, 0xb8, 0x8c, 0xb4, 0xdf, 0xb5, 0xbe, 0x25, 0xc9, 0xb3, + 0x70, 0xa9, 0xec, 0xad, 0x30, 0x32, 0xd3, 0x30, 0x9c, 0x1c, 0xda, 0xb5, 0xa4, 0xb1, 0xa6, 0xb1, + 0x5a, 0xb4, 0x56, 0xb4, 0xa2, 0xaf, 0xc0, 0xac, 0x4b, 0xb5, 0xe1, 0xad, 0x82, 0x30, 0x3c, 0x30, + 0xf2, 0x28, 0x7a, 0xa8, 0x40, 0xb0, 0x6a, 0xb2, 0x1a, 0xae, 0x34, 0xb3, 0xb6, 0x27, 0x0a, 0xaa, + 0x87, 0x2c, 0xf6, 0xb2, 0x98, 0xb4, 0xee, 0xaf, 0x29, 0xb4, 0x00, 0x14, 0x35, 0xb4, 0x28, 0xb3, + 0x0a, 0xb2, 0x1c, 0xb2, 0x59, 0xa5, 0x7f, 0xb0, 0x17, 0x31, 0xa0, 0xb3, 0x62, 0x30, 0xe8, 0xb1, + 0xe0, 0x23, 0x92, 0x2e, 0x50, 0xac, 0xfc, 0x2e, 0x37, 0xad, 0xdc, 0xa4, 0xc2, 0x2c, 0xb7, 0xb3, + 0x90, 0x28, 0x7e, 0xb4, 0x50, 0x22, 0x8e, 0x28, 0x12, 0x33, 0x35, 0xb0, 0x56, 0xb6, 0xdb, 0xa9, + 0x70, 0x99, 0x56, 0xb6, 0x88, 0xaf, 0xb4, 0x2b, 0x36, 0x29, 0x8b, 0xb1, 0xf5, 0xb1, 0xb3, 0xac, + 0x10, 0x16, 0xa2, 0x25, 0x86, 0xa3, 0x1c, 0x28, 0x34, 0x1c, 0xbb, 0x28, 0xa2, 0x2a, 0x80, 0x2a, + 0x56, 0x2c, 0x8f, 0x28, 0xf0, 0x22, 0x1a, 0x2a, 0xea, 0x25, 0xa9, 0x2b, 0xe6, 0x27, 0x4e, 0x2a, + 0xc0, 0x27, 0x32, 0x29, 0x1e, 0x2c, 0xe0, 0x25, 0x78, 0x2c, 0x6c, 0x2a, 0x8c, 0x25, 0x26, 0x27, + 0x00, 0xa6, 0x1c, 0x2d, 0xbd, 0x28, 0x06, 0x29, 0x42, 0x24, 0xf4, 0x25, 0xcc, 0x2a, 0xa2, 0x28, + 0xb8, 0x25, 0xa4, 0x26, 0x82, 0x22, 0x32, 0x29, 0xde, 0x1e, 0x5d, 0x25, 0x48, 0x27, 0x37, 0x23, + 0xa0, 0x2a, 0x30, 0x11, 0xa6, 0x29, 0x8e, 0x26, 0xa3, 0x28, 0x28, 0x27, 0x2b, 0x24, 0x58, 0x2b, + 0xb4, 0x28, 0x7e, 0x27, 0x96, 0x28, 0x54, 0x25, 0xa7, 0x2a, 0x26, 0x26, 0xd3, 0x2b, 0xc0, 0x25, + 0xad, 0x24, 0x5b, 0x29, 0xd8, 0x2b, 0x56, 0x2c, 0xb2, 0x28, 0xc0, 0x2a, 0xe0, 0x8d, 0x36, 0x25, + 0x72, 0xa4, 0x93, 0x21, 0x8e, 0xa6, 0xc7, 0xa5, 0xd5, 0x96, 0xa3, 0x29, 0x77, 0x28, 0x46, 0x28, + 0x22, 0x29, 0x4e, 0x29, 0x06, 0x21, 0xf5, 0x24, 0x3e, 0x27, 0x62, 0x27, 0x40, 0x97, 0x2d, 0xa3, + 0xe2, 0x1f, 0x14, 0x1b, 0x92, 0x27, 0x8e, 0x22, 0x62, 0x25, 0x9d, 0x29, 0x20, 0x8d, 0x20, 0x1a, + 0x37, 0xa5, 0xec, 0x28, 0x18, 0x29, 0x2d, 0x24, 0x8c, 0x25, 0x23, 0x1e, 0x40, 0x29, 0x89, 0x26, + 0x84, 0x25, 0xea, 0x26, 0xe5, 0x98, 0x40, 0x23, 0x86, 0xa6, 0x16, 0x27, 0xa4, 0xa1, 0x5e, 0x26, + 0x9c, 0x1d, 0x44, 0xa5, 0x2c, 0x21, 0xa8, 0x9c, 0x4c, 0x25, 0x42, 0x1d, 0x68, 0x98, 0x3e, 0x2a, + 0xef, 0x21, 0xa3, 0x29, 0x24, 0x18, 0xfc, 0x9f, 0x2c, 0xa3, 0xd4, 0x19, 0xfb, 0x2a, 0xd9, 0x21, + 0xb0, 0x18, 0x4e, 0x2a, 0x2d, 0x26, 0xd8, 0x1f, 0x46, 0x1c, 0x45, 0x28, 0x40, 0x25, 0xe4, 0x1a, + 0x99, 0x26, 0xb0, 0x22, 0xa9, 0x9c, 0xd1, 0x26, 0xf2, 0x1b, 0xee, 0x24, 0xc5, 0x23, 0x29, 0x24, + 0xff, 0x27, 0xde, 0x20, 0x52, 0x22, 0x6e, 0x24, 0x21, 0x26, 0x35, 0x24, 0x92, 0x1b, 0xaf, 0x27, + 0x8c, 0x1d, 0x9e, 0x26, 0x06, 0x26, 0x67, 0x26, 0x16, 0x28, 0x32, 0x21, 0x2f, 0x21, 0x1e, 0x25, + 0x9c, 0x90, 0x55, 0x28, 0x4e, 0x23, 0x8b, 0x25, 0xbf, 0x24, 0xdc, 0x1f, 0x1c, 0x25, 0x99, 0x25, + 0xd1, 0x22, 0xf8, 0x20, 0xc7, 0x22, 0xe9, 0x26, 0xa7, 0x22, 0x02, 0x22, 0xae, 0x22, 0xfd, 0x1b, + 0x36, 0x26, 0x7c, 0x21, 0x4e, 0x26, 0xa9, 0x1f, 0xe3, 0x20, 0x34, 0x23, 0xc4, 0x1a, 0xb3, 0x21, + 0x9c, 0x1d, 0xee, 0x1b, 0xc1, 0x24, 0x70, 0x24, 0x4a, 0x25, 0x3b, 0x27, 0xbe, 0x26, 0x12, 0x1f, + 0xb2, 0x20, 0x2e, 0x25, 0x9f, 0x26, 0xb4, 0x26, 0xb8, 0x22, 0x60, 0x24, 0xc0, 0x80, 0xa3, 0x24, + 0x2f, 0x24, 0xaf, 0x1f, 0x20, 0xa1, 0xde, 0x91, 0x11, 0x14, 0xd6, 0x25, 0xde, 0x1e, 0xbd, 0x1f, + 0x75, 0x24, 0x5c, 0x22, 0x5e, 0x21, 0x6e, 0x1a, 0xcb, 0x26, 0xa0, 0x13, 0x75, 0xa2, 0xaf, 0x96, + 0x02, 0x9e, 0x74, 0x1f, 0x4c, 0x1e, 0x1b, 0x25, 0xb0, 0x1f, 0x2b, 0x1f, 0x12, 0x93, 0xb1, 0x1f, + 0xec, 0x13, 0xbc, 0x22, 0x01, 0x24, 0x37, 0x21, 0x63, 0x25, 0xca, 0x93, 0x1e, 0x23, 0x3b, 0x24, + 0x9d, 0x22, 0x3e, 0x21, 0xd3, 0x1d, 0x0e, 0x23, 0x5c, 0x9a, 0xba, 0x23, 0xd7, 0x9e, 0xc1, 0x20, + 0xa0, 0x14, 0xc0, 0x06, 0xcc, 0x1f, 0xc7, 0x9f, 0x4a, 0x17, 0x70, 0x19, 0x0e, 0x9e, 0x00, 0x1f, + 0x45, 0x9f, 0xc2, 0x21, 0xdb, 0x16, 0x35, 0x1a, 0x50, 0xa2, 0x85, 0x24, 0xe6, 0x25, 0xb0, 0x15, + 0xdb, 0x14, 0x20, 0x26, 0x75, 0x1f, 0xdc, 0x9b, 0x67, 0x9a, 0x96, 0x1f, 0x56, 0x21, 0xee, 0x20, + 0x18, 0xb7, 0x57, 0x34, 0x57, 0xb6, 0x40, 0xa9, 0x00, 0x00, 0xc2, 0x39, 0x46, 0x3b, 0xf0, 0x3a, + 0x02, 0x3c, 0x59, 0x3a, 0x98, 0x30, 0x75, 0x39, 0x06, 0x35, 0xaf, 0x3b, 0x71, 0x36, 0x0c, 0x34, + 0x56, 0x37, 0x47, 0x35, 0x98, 0x3b, 0x1a, 0x31, 0xd6, 0x3a, 0x1a, 0x3c, 0x6c, 0x32, 0x9a, 0x32, + 0x3a, 0xb8, 0x8d, 0x3c, 0xf0, 0x39, 0x9c, 0x37, 0x1c, 0x33, 0xf8, 0x34, 0x69, 0x3b, 0x16, 0x38, + 0xe8, 0x35, 0x20, 0x38, 0x40, 0x21, 0x9a, 0x36, 0x8f, 0xb4, 0xc4, 0x36, 0x78, 0x31, 0x37, 0x36, + 0x46, 0x38, 0xde, 0xb4, 0x51, 0x37, 0x0e, 0x34, 0xec, 0x38, 0xc0, 0x34, 0xee, 0x31, 0xaa, 0x3c, + 0xb8, 0x38, 0x50, 0x3a, 0x54, 0x35, 0x5a, 0x28, 0x7e, 0x36, 0x00, 0x2a, 0x5e, 0x3c, 0xb6, 0x35, + 0xe8, 0x31, 0xa1, 0x3a, 0x9c, 0x3a, 0x2a, 0x3a, 0xe3, 0x36, 0x16, 0x3b, 0xbd, 0x31, 0x90, 0x2f, + 0xba, 0xb0, 0xbb, 0xb1, 0x13, 0xaf, 0xba, 0xad, 0xa2, 0xaf, 0xdd, 0xae, 0xf8, 0xb0, 0xa6, 0xb0, + 0xd8, 0xb0, 0x8b, 0xb0, 0x62, 0xb1, 0xb5, 0xaf, 0x04, 0xb4, 0x86, 0xb0, 0x9a, 0xae, 0xee, 0xb1, + 0x48, 0xb1, 0x7e, 0xb1, 0x79, 0xb0, 0xe1, 0xb1, 0x7e, 0xb2, 0x92, 0xb2, 0x98, 0xb1, 0xb7, 0xae, + 0x1d, 0xb0, 0x80, 0xb2, 0x3e, 0xaf, 0xf6, 0xb2, 0x21, 0xb4, 0x9e, 0xad, 0x5a, 0xae, 0x88, 0xb1, + 0x43, 0xb0, 0xca, 0xb3, 0xa8, 0xb0, 0x10, 0xb1, 0xd6, 0xaa, 0x69, 0xaf, 0x71, 0xb1, 0x48, 0xb1, + 0x56, 0xac, 0x9c, 0xb1, 0x9c, 0xb2, 0xa1, 0xb0, 0x50, 0xaa, 0xfe, 0xb0, 0x9d, 0xb3, 0xb9, 0xb0, + 0x7e, 0xb0, 0x77, 0xb4, 0x58, 0xaf, 0xe2, 0xaf, 0xe8, 0xb1, 0xd8, 0xb2, 0x48, 0xb3, 0x96, 0xb0, + 0x11, 0xb0, 0x5c, 0xb1, 0xe6, 0xb0, 0xf1, 0xac, 0xa4, 0xb0, 0xc0, 0xb0, 0xb8, 0xaa, 0x24, 0xb0, + 0xda, 0x2c, 0x00, 0x2f, 0xea, 0x2d, 0x74, 0x2f, 0xb2, 0x2d, 0xce, 0x2e, 0x63, 0x2e, 0x82, 0x2e, + 0x48, 0x2e, 0xde, 0x2c, 0xd6, 0x2d, 0xdc, 0x2d, 0x59, 0x30, 0x7c, 0x2f, 0xdc, 0x2e, 0x3c, 0x30, + 0x3b, 0x30, 0xc6, 0x2e, 0x3e, 0x2f, 0xc9, 0x2d, 0x15, 0x30, 0x18, 0x2f, 0x1b, 0x30, 0xf7, 0x2d, + 0xb6, 0x2d, 0xc4, 0x2f, 0x9a, 0x2e, 0xf6, 0x2f, 0x16, 0x30, 0xb5, 0x2d, 0x08, 0x2e, 0xe7, 0x2d, + 0xbd, 0x2e, 0x56, 0x2f, 0x18, 0x2d, 0xa9, 0x2d, 0x01, 0x2d, 0x8e, 0x2f, 0xfc, 0x2d, 0x39, 0x2f, + 0xd2, 0x2e, 0x3e, 0x2f, 0x0d, 0x2f, 0x25, 0x2f, 0x08, 0x2c, 0xdc, 0x2e, 0x91, 0x2f, 0x46, 0x30, + 0x22, 0x2e, 0x4e, 0x2f, 0x0e, 0x2f, 0xe9, 0x2d, 0xff, 0x2f, 0x0c, 0x2f, 0x3d, 0x2f, 0x3c, 0x2d, + 0x1b, 0x2e, 0x90, 0x2f, 0x1f, 0x2e, 0xa9, 0x2c, 0xf2, 0x2e, 0x30, 0x2e, 0xc6, 0x2b, 0xcd, 0x2e, + 0xf5, 0x26, 0x1a, 0x2a, 0x8a, 0x23, 0x0f, 0x25, 0x40, 0x13, 0xc0, 0x28, 0xcd, 0x2c, 0x9d, 0x2a, + 0x8d, 0x2a, 0x08, 0x29, 0x3f, 0x2b, 0xf4, 0x20, 0x8a, 0x2b, 0x74, 0x29, 0x9c, 0x23, 0x8b, 0x2a, + 0x14, 0x2a, 0x8a, 0x28, 0x06, 0x2a, 0xa5, 0x26, 0x2c, 0x25, 0x0b, 0x2a, 0x2e, 0x28, 0x08, 0x26, + 0xe0, 0x95, 0x0a, 0x2d, 0x9c, 0x24, 0xc4, 0x2b, 0xce, 0x29, 0xe0, 0x93, 0x02, 0x2c, 0x42, 0x2a, + 0x99, 0x27, 0xae, 0x2d, 0xa3, 0x29, 0x58, 0x28, 0xf0, 0x23, 0x26, 0x25, 0xca, 0x29, 0xc8, 0x28, + 0x60, 0x99, 0x04, 0x2c, 0xfe, 0x2a, 0x2b, 0x2c, 0x2c, 0x24, 0xd2, 0x26, 0xfa, 0x2a, 0x9c, 0x28, + 0x60, 0x25, 0xfa, 0x2c, 0xaa, 0x23, 0x23, 0xa1, 0xc6, 0x2a, 0x3b, 0x28, 0xf1, 0x29, 0xe2, 0x25, + 0xc8, 0x25, 0x48, 0x2c, 0x90, 0x95, 0xce, 0x23, 0xb4, 0x27, 0x58, 0x27, 0x34, 0x1d, 0x14, 0x29, + 0x00, 0xa0, 0x90, 0xa2, 0x97, 0xa1, 0xec, 0xa3, 0xd2, 0xa0, 0x48, 0xa3, 0xf0, 0xa2, 0xa8, 0xa2, + 0x43, 0xa2, 0x6d, 0xa0, 0x94, 0xa1, 0x3a, 0xa1, 0xa8, 0xa3, 0xa9, 0xa3, 0xda, 0xa2, 0x29, 0xa4, + 0x46, 0xa4, 0x1c, 0xa2, 0x82, 0xa3, 0x7a, 0xa0, 0xda, 0xa2, 0x38, 0xa2, 0xb4, 0xa3, 0xf8, 0xa1, + 0xa0, 0xa0, 0xde, 0xa3, 0x74, 0xa2, 0x63, 0xa3, 0x90, 0xa2, 0x54, 0xa1, 0x27, 0xa3, 0x63, 0xa1, + 0x94, 0xa2, 0xec, 0xa2, 0xc2, 0xa0, 0xf6, 0xa0, 0x7e, 0xa1, 0x9c, 0xa3, 0x70, 0xa1, 0xcf, 0xa2, + 0xf5, 0xa2, 0x4f, 0xa3, 0x56, 0xa2, 0xc6, 0xa3, 0x69, 0xa0, 0x3f, 0xa2, 0x6a, 0xa2, 0x55, 0xa4, + 0x82, 0xa1, 0xff, 0xa1, 0xe4, 0xa2, 0xbd, 0xa0, 0xcd, 0xa3, 0xa8, 0xa1, 0xff, 0xa1, 0x6d, 0xa0, + 0xbc, 0xa1, 0xee, 0xa3, 0xb3, 0xa0, 0xaa, 0xa0, 0xa1, 0xa2, 0xa0, 0xa1, 0xcc, 0x9f, 0xf8, 0xa2, + 0x08, 0x9a, 0xce, 0x9e, 0x70, 0x9a, 0xf2, 0x9d, 0xf0, 0x90, 0x7c, 0x9f, 0xab, 0xa1, 0x18, 0xa0, + 0xb7, 0x9f, 0x16, 0x9d, 0x9f, 0x9f, 0x22, 0x98, 0x71, 0x9f, 0xb5, 0x9f, 0x41, 0x9c, 0x28, 0xa0, + 0x37, 0xa0, 0x02, 0x9d, 0x20, 0xa0, 0xb4, 0x98, 0x6c, 0x99, 0xec, 0x9d, 0x9e, 0x9d, 0xce, 0x9c, + 0x80, 0x02, 0xaf, 0xa1, 0x2e, 0x9c, 0x20, 0xa0, 0xb2, 0x9c, 0xae, 0x95, 0x88, 0xa1, 0x55, 0x9e, + 0x69, 0x9d, 0x98, 0xa1, 0xe0, 0x9d, 0x57, 0x9c, 0x90, 0x9c, 0x2f, 0x9d, 0xef, 0x9d, 0xe3, 0x9d, + 0x7c, 0x99, 0xba, 0xa0, 0xfc, 0x9e, 0x64, 0xa1, 0x0b, 0x9c, 0x40, 0x9c, 0x4e, 0x9e, 0x58, 0x9f, + 0xb0, 0x9a, 0x22, 0xa0, 0x07, 0x9c, 0x0a, 0x14, 0x1d, 0xa0, 0xd8, 0x9a, 0x29, 0x9d, 0xaa, 0x99, + 0x03, 0x9c, 0x4e, 0xa1, 0x40, 0x10, 0xdf, 0x9a, 0x3a, 0x9d, 0x4a, 0x9c, 0x33, 0x98, 0x1e, 0x9f, + 0x15, 0x9d, 0xa6, 0x9e, 0x40, 0x9d, 0xbe, 0x9d, 0x71, 0x9d, 0x70, 0x9d, 0x86, 0x9d, 0xb9, 0x9d, + 0xb2, 0x9d, 0xd7, 0x9c, 0xa5, 0x9d, 0x72, 0x9d, 0x5f, 0xa0, 0x5c, 0x9e, 0xb0, 0x9d, 0x98, 0x9f, + 0x48, 0x9f, 0x8a, 0x9e, 0x20, 0x9e, 0x46, 0x9e, 0x0d, 0xa0, 0x28, 0x9f, 0x7d, 0x9f, 0x12, 0x9d, + 0xa4, 0x9d, 0x1e, 0x9f, 0xa9, 0x9d, 0xb5, 0x9f, 0x5d, 0xa0, 0xef, 0x9c, 0x83, 0x9c, 0xde, 0x9d, + 0xf0, 0x9d, 0x6c, 0x9f, 0xf9, 0x9c, 0xaa, 0x9d, 0x6e, 0x9b, 0x40, 0x9e, 0xea, 0x9d, 0xac, 0x9e, + 0x48, 0x9d, 0x89, 0x9e, 0x0e, 0x9f, 0xec, 0x9d, 0x1a, 0x9a, 0x6f, 0x9e, 0xe8, 0x9f, 0x2f, 0x9f, + 0xcf, 0x9d, 0x13, 0xa0, 0xff, 0x9d, 0xc7, 0x9d, 0x45, 0x9f, 0x75, 0x9f, 0xa2, 0x9f, 0x4b, 0x9d, + 0x8c, 0x9d, 0x8a, 0x9e, 0x4d, 0x9e, 0xd6, 0x9b, 0x41, 0x9e, 0xe1, 0x9d, 0x54, 0x9a, 0xc8, 0x9d, + 0x2f, 0x99, 0xf8, 0x9a, 0x12, 0x95, 0x5c, 0x92, 0x6e, 0x93, 0x94, 0x97, 0x41, 0x9c, 0x52, 0x9a, + 0x96, 0x9a, 0xeb, 0x99, 0xc0, 0x9b, 0x04, 0x95, 0xcf, 0x9c, 0x1a, 0x99, 0xab, 0x93, 0xdc, 0x9a, + 0xe5, 0x99, 0xde, 0x99, 0x7c, 0x99, 0xf4, 0x99, 0x36, 0x99, 0xcc, 0x9b, 0x30, 0x99, 0x01, 0x96, + 0xf5, 0x93, 0xef, 0x9c, 0x34, 0x95, 0x73, 0x9c, 0x83, 0x9c, 0x18, 0x8d, 0xc8, 0x99, 0x4a, 0x9b, + 0x21, 0x98, 0x18, 0x9e, 0x50, 0x9a, 0xbd, 0x99, 0x0a, 0x90, 0xf3, 0x94, 0xe2, 0x9a, 0x9c, 0x99, + 0xea, 0x0c, 0xe8, 0x9b, 0x36, 0x9c, 0x15, 0x9b, 0x4b, 0x91, 0xa0, 0x98, 0xa4, 0x9c, 0x63, 0x98, + 0xe4, 0x97, 0x48, 0x9e, 0x7e, 0x94, 0x48, 0x90, 0x2a, 0x9b, 0x06, 0x9b, 0x38, 0x9c, 0x90, 0x98, + 0x40, 0x97, 0xd4, 0x9b, 0x58, 0x95, 0xc5, 0x93, 0x7b, 0x98, 0xcb, 0x98, 0x04, 0x8d, 0xbe, 0x98, + 0x62, 0xad, 0x94, 0xb1, 0x2e, 0xb0, 0xf2, 0xb2, 0x4f, 0xad, 0xe5, 0xb2, 0x9f, 0xb3, 0x65, 0xb2, + 0xe8, 0xb1, 0x89, 0xaf, 0x42, 0xb1, 0xe1, 0xae, 0xfe, 0xb1, 0xf8, 0xb2, 0x7e, 0xb1, 0x4a, 0xb3, + 0xa9, 0xb3, 0xaf, 0xb0, 0x10, 0xb3, 0x02, 0xad, 0x26, 0xb0, 0xd0, 0xb0, 0xfe, 0xb1, 0x0d, 0xb1, + 0x6c, 0xac, 0xe8, 0xb3, 0x1a, 0xb1, 0x5c, 0xb2, 0x34, 0xb0, 0x17, 0xaf, 0x10, 0xb4, 0xa2, 0xb0, + 0x76, 0xb1, 0xda, 0xb2, 0x3f, 0xb0, 0x80, 0xaf, 0x0a, 0xb1, 0x3f, 0xb2, 0x8d, 0xb0, 0x8a, 0xb1, + 0x51, 0xb1, 0x10, 0xb3, 0x42, 0xb1, 0x0d, 0xb4, 0x21, 0xb0, 0xa4, 0xb0, 0xd4, 0xb0, 0x7e, 0xb3, + 0xda, 0xaf, 0xe4, 0xb0, 0x58, 0xb1, 0xf6, 0xab, 0xe7, 0xb2, 0xf0, 0xae, 0x36, 0xb0, 0xec, 0xad, + 0x6e, 0xb0, 0xf8, 0xb3, 0x92, 0xab, 0xbe, 0xaf, 0x57, 0xb1, 0x47, 0xb0, 0x1f, 0xae, 0x5e, 0xb2, + 0x84, 0xaf, 0xb8, 0xad, 0x16, 0x29, 0xae, 0xac, 0x42, 0xa6, 0xf6, 0xac, 0x0d, 0xaa, 0xf9, 0xa7, + 0xfc, 0xaf, 0xc5, 0xa7, 0xb0, 0xab, 0x10, 0xab, 0x0a, 0xb1, 0xaa, 0xa9, 0x00, 0x0a, 0x94, 0xad, + 0x74, 0xa4, 0xaa, 0xad, 0xe3, 0xab, 0xe9, 0xb0, 0xc6, 0xad, 0x17, 0xab, 0x59, 0xab, 0x82, 0xac, + 0x14, 0xaa, 0x14, 0xb0, 0x26, 0xac, 0x4f, 0xae, 0xa0, 0xb1, 0x1a, 0xa7, 0xc2, 0xaa, 0x08, 0xad, + 0x50, 0xab, 0x62, 0xae, 0x94, 0xac, 0x61, 0xaf, 0xa9, 0x20, 0xac, 0xa7, 0xe0, 0xa2, 0x54, 0xa6, + 0x7d, 0xa8, 0x96, 0xaa, 0xc6, 0xae, 0x80, 0xa4, 0xb4, 0x24, 0x15, 0xad, 0xd8, 0xa6, 0x0e, 0xa5, + 0x74, 0xa3, 0x25, 0xac, 0xd2, 0xab, 0x87, 0xad, 0x92, 0xa8, 0xa8, 0xaf, 0x9b, 0xb0, 0xc8, 0xa7, + 0x7f, 0xa5, 0xb2, 0xad, 0x0e, 0xad, 0xf0, 0x9f, 0xb6, 0x25, 0xb0, 0xa6, 0x90, 0xa9, 0x2c, 0xae, + 0x24, 0x25, 0x2e, 0x2a, 0x06, 0xa3, 0x40, 0x2c, 0x8a, 0x25, 0x02, 0x2b, 0xbe, 0x29, 0x04, 0x2b, + 0xe2, 0x2c, 0xb6, 0x26, 0xde, 0x26, 0x8d, 0x29, 0x36, 0x2b, 0xc1, 0x2b, 0x9a, 0x28, 0x59, 0x2c, + 0x20, 0x29, 0xf4, 0x29, 0x26, 0x2c, 0xaa, 0x29, 0x88, 0x2d, 0x4e, 0x2a, 0xb0, 0x29, 0xc9, 0x28, + 0xee, 0x1f, 0x86, 0x2d, 0x28, 0x29, 0xa2, 0x2b, 0xf0, 0x2a, 0x08, 0x29, 0xc6, 0x2a, 0x99, 0x28, + 0x1e, 0x28, 0xd4, 0x28, 0xc5, 0x28, 0x3b, 0x2b, 0x83, 0x22, 0x21, 0x28, 0xeb, 0x28, 0x3d, 0x26, + 0x62, 0x2c, 0xfc, 0x24, 0x0b, 0x2c, 0x9a, 0x29, 0x42, 0x21, 0x95, 0x29, 0xf2, 0x26, 0x08, 0x2b, + 0x60, 0x29, 0x64, 0x28, 0x02, 0x2b, 0xba, 0x28, 0x02, 0x2c, 0xbe, 0x28, 0x36, 0x2c, 0xc2, 0x25, + 0x0c, 0x26, 0xd4, 0x2a, 0xa6, 0x2a, 0xea, 0x2a, 0x56, 0x27, 0xba, 0x2a, 0x9b, 0x1e, 0xea, 0x28, + 0x6e, 0x1d, 0x06, 0x26, 0xf4, 0xa5, 0x8b, 0xa2, 0x80, 0x9c, 0xfa, 0x28, 0x6a, 0x27, 0x86, 0x24, + 0xa7, 0x29, 0xbb, 0x28, 0x69, 0x24, 0x96, 0x1d, 0xd8, 0x29, 0x12, 0x26, 0xb8, 0x9d, 0x93, 0xa0, + 0xf8, 0x1a, 0x13, 0x24, 0x46, 0x26, 0x48, 0x27, 0x1c, 0x9e, 0x07, 0x28, 0xca, 0x20, 0x84, 0x21, + 0xef, 0xa3, 0x12, 0x2a, 0xbc, 0x25, 0xd4, 0x27, 0xbe, 0x28, 0x60, 0x1c, 0x4c, 0x29, 0x60, 0x26, + 0x08, 0x27, 0xec, 0x29, 0xb2, 0x26, 0x24, 0x24, 0x1d, 0xa3, 0x5e, 0x23, 0x3d, 0xa0, 0x21, 0x25, + 0xc0, 0x9e, 0xf0, 0x19, 0xd9, 0x26, 0x26, 0x20, 0x70, 0x96, 0x4d, 0x21, 0x59, 0x1c, 0xb0, 0x25, + 0x54, 0xa0, 0x8c, 0x2a, 0x00, 0x97, 0xa2, 0x9d, 0xfa, 0xa5, 0x77, 0x25, 0xb8, 0x2b, 0x99, 0x1d, + 0xf4, 0x1b, 0x31, 0x2c, 0x4c, 0x1a, 0xd8, 0xa2, 0x84, 0xa0, 0xc4, 0x24, 0x74, 0x21, 0x0a, 0x25, + 0x38, 0x11, 0x92, 0x9d, 0xc4, 0x17, 0x0b, 0x9f, 0x87, 0x98, 0xc1, 0x9f, 0xc3, 0x9e, 0x21, 0xa0, + 0xdc, 0xa0, 0xe1, 0x9c, 0xe8, 0x99, 0x0e, 0x9d, 0x9d, 0x9d, 0x82, 0xa0, 0x3e, 0x9d, 0x00, 0x9f, + 0xbc, 0x9d, 0xf0, 0x9c, 0x8e, 0xa0, 0x07, 0x9a, 0xf2, 0xa0, 0x48, 0x9f, 0x58, 0x9d, 0xc7, 0x9b, + 0xe6, 0x14, 0xaa, 0xa1, 0x1e, 0x9d, 0x5d, 0x9f, 0x3a, 0x9c, 0x4f, 0x9d, 0x39, 0xa0, 0x1c, 0x9c, + 0x65, 0x9c, 0xc2, 0x9c, 0x9f, 0x9c, 0x92, 0x9d, 0x38, 0x96, 0xab, 0x9c, 0x04, 0x9d, 0xda, 0x9b, + 0x7e, 0xa0, 0x89, 0x95, 0x76, 0x9f, 0x8a, 0x9e, 0x3f, 0x98, 0x85, 0x9c, 0xf2, 0x9a, 0x73, 0xa0, + 0x85, 0x9d, 0xae, 0x9d, 0x3d, 0x9e, 0xe3, 0x98, 0x33, 0x9f, 0x66, 0x99, 0x24, 0xa0, 0x66, 0x99, + 0x31, 0x9a, 0x41, 0xa0, 0x70, 0x9d, 0x5a, 0x9f, 0xb6, 0x9c, 0x11, 0xa0, 0x7a, 0x83, 0x3b, 0x9b, + 0x56, 0x18, 0x14, 0x9a, 0xd6, 0x19, 0x3c, 0x14, 0x6e, 0x0e, 0x20, 0x9e, 0x22, 0x9d, 0x75, 0x9c, + 0x6d, 0x9e, 0xfb, 0x9d, 0xe0, 0x97, 0xb2, 0x94, 0x86, 0x9c, 0x3d, 0x9d, 0xf0, 0x93, 0xfc, 0x12, + 0xee, 0x97, 0x37, 0x97, 0xfb, 0x9c, 0x97, 0x95, 0xe0, 0x8e, 0x76, 0x9d, 0x02, 0x97, 0xa8, 0x94, + 0x36, 0x1b, 0x5a, 0x9f, 0x93, 0x9a, 0x63, 0x9c, 0xf0, 0x98, 0xe2, 0x96, 0x43, 0x9f, 0xf6, 0x99, + 0xd3, 0x9b, 0xa1, 0x9d, 0xf7, 0x9a, 0xc3, 0x95, 0xb0, 0x14, 0xad, 0x99, 0x80, 0x8b, 0xf6, 0x9a, + 0x30, 0x95, 0x03, 0x0d, 0x76, 0x9b, 0xc6, 0x99, 0x49, 0x93, 0x75, 0x94, 0x84, 0x94, 0x8d, 0x9d, + 0xb0, 0x8d, 0x67, 0x9f, 0x50, 0x8f, 0x9a, 0x17, 0x64, 0x16, 0x5d, 0x94, 0xb7, 0x9f, 0xa2, 0x93, + 0x4c, 0x94, 0xe0, 0xa0, 0xf8, 0x8e, 0x38, 0x8f, 0x00, 0x93, 0xa8, 0x9c, 0xc0, 0x8f, 0xcc, 0x96, + 0x6a, 0x99, 0x28, 0x9a, 0x40, 0x13, 0xea, 0x9b, 0x38, 0x95, 0xdb, 0x99, 0x51, 0x98, 0xd2, 0x98, + 0x7e, 0x9c, 0x34, 0x94, 0x56, 0x97, 0x13, 0x99, 0x44, 0x9c, 0x8e, 0x99, 0xcb, 0x95, 0x35, 0x9c, + 0x2a, 0x97, 0x2f, 0x9a, 0x74, 0x9a, 0xe7, 0x9b, 0xef, 0x9c, 0xde, 0x98, 0x21, 0x99, 0xff, 0x98, + 0x3a, 0x95, 0xe1, 0x9c, 0xbb, 0x98, 0x29, 0x9b, 0x9b, 0x9c, 0xb6, 0x97, 0xcf, 0x98, 0xce, 0x98, + 0x6e, 0x97, 0x1e, 0x99, 0xa2, 0x98, 0xdc, 0x9b, 0x17, 0x90, 0x43, 0x96, 0x3e, 0x97, 0x68, 0x94, + 0xa0, 0x9a, 0x5c, 0x96, 0xc7, 0x9b, 0x82, 0x97, 0x72, 0x86, 0xcb, 0x99, 0xc7, 0x95, 0x53, 0x98, + 0xe0, 0x97, 0x13, 0x97, 0x56, 0x9a, 0xed, 0x99, 0x9f, 0x9a, 0x61, 0x9a, 0x36, 0x9c, 0x40, 0x95, + 0xe6, 0x94, 0x75, 0x99, 0x9a, 0x9a, 0xc2, 0x98, 0xaa, 0x92, 0x73, 0x98, 0xd3, 0x92, 0xbc, 0x99, + 0xcc, 0x97, 0x3f, 0x97, 0x95, 0x15, 0xe8, 0x05, 0xd7, 0x01, 0x3b, 0x98, 0x61, 0x95, 0x10, 0x90, + 0xb2, 0x99, 0x68, 0x96, 0x5e, 0x95, 0xc3, 0x90, 0x70, 0x9b, 0x21, 0x93, 0x6a, 0x11, 0x61, 0x8c, + 0x68, 0x05, 0x1a, 0x96, 0xa6, 0x94, 0x49, 0x9a, 0x88, 0x8a, 0x16, 0x96, 0x28, 0x92, 0x8c, 0x94, + 0x64, 0x87, 0xc8, 0x99, 0xce, 0x95, 0x2f, 0x98, 0x75, 0x9b, 0x48, 0x88, 0x40, 0x97, 0x59, 0x97, + 0x78, 0x96, 0xfe, 0x99, 0xfe, 0x96, 0x76, 0x97, 0xd1, 0x12, 0x34, 0x91, 0xa5, 0x10, 0x08, 0x93, + 0x40, 0x10, 0xec, 0x91, 0x0c, 0x98, 0x40, 0x83, 0x0a, 0x10, 0xc7, 0x94, 0x5a, 0x8c, 0xca, 0x8f, + 0xe8, 0x10, 0x42, 0x99, 0x6a, 0x8c, 0x52, 0x92, 0x5a, 0x14, 0xc5, 0x98, 0xdb, 0x9b, 0x0c, 0x8f, + 0xd3, 0x8a, 0xb4, 0x9a, 0x68, 0x92, 0x49, 0x14, 0x58, 0x14, 0xce, 0x8f, 0x46, 0x94, 0xa4, 0x97, + 0xb9, 0x2b, 0xa0, 0xac, 0x91, 0x29, 0x14, 0xaa, 0x58, 0xa3, 0x43, 0xb0, 0x9c, 0xaf, 0x39, 0xb0, + 0xae, 0xb0, 0x28, 0xaf, 0x00, 0xa9, 0x00, 0xab, 0x9c, 0xac, 0xa8, 0xb0, 0x63, 0xac, 0xd5, 0xa9, + 0x26, 0xad, 0xa6, 0xaa, 0x76, 0xb0, 0x2e, 0xa1, 0xd0, 0xad, 0x0a, 0xb0, 0x13, 0xac, 0xda, 0xa8, + 0x28, 0x2c, 0x8d, 0xb1, 0xe0, 0xac, 0xb0, 0xae, 0x94, 0xa7, 0x80, 0xac, 0x1a, 0xb1, 0x75, 0xab, + 0xee, 0xac, 0xb6, 0xad, 0x8e, 0xac, 0x2b, 0xaa, 0xa0, 0x9e, 0xcd, 0xac, 0xf5, 0xaa, 0xd0, 0xac, + 0xac, 0xae, 0x80, 0x14, 0x2d, 0xae, 0x59, 0xae, 0xdb, 0xa8, 0x62, 0xa9, 0x9e, 0xa9, 0xec, 0xb0, + 0xcc, 0xab, 0x15, 0xb0, 0x76, 0xab, 0xff, 0x23, 0xbd, 0xaa, 0x04, 0xa2, 0x79, 0xb0, 0x2a, 0xa8, + 0x2e, 0xa9, 0xb6, 0xb1, 0xbc, 0xa9, 0x5d, 0xad, 0x49, 0xac, 0x4a, 0xb0, 0x10, 0x1d, 0x64, 0xa8, + 0xec, 0xad, 0xc5, 0xab, 0x74, 0xad, 0x20, 0xaa, 0xc8, 0xab, 0xd6, 0xac, 0xef, 0xac, 0x1e, 0xae, + 0xa7, 0xab, 0xdc, 0xad, 0x39, 0xae, 0x53, 0xaa, 0x2a, 0xaf, 0x5c, 0xab, 0xe9, 0xa6, 0xbd, 0xad, + 0x5b, 0xac, 0xcd, 0xad, 0x9a, 0xac, 0x13, 0xac, 0x02, 0xaf, 0x45, 0xad, 0x20, 0xac, 0x55, 0xab, + 0xb4, 0xa9, 0x25, 0xad, 0x87, 0xab, 0x63, 0xae, 0x56, 0xac, 0x35, 0xa8, 0x69, 0xac, 0x24, 0xaf, + 0x32, 0xad, 0x1e, 0xad, 0x0e, 0xac, 0x8a, 0xac, 0x24, 0xad, 0x74, 0xae, 0xa4, 0xae, 0x6a, 0xae, + 0x60, 0xab, 0xde, 0xad, 0x88, 0xad, 0xfc, 0xab, 0x1b, 0xac, 0xe3, 0xa9, 0xc2, 0xae, 0x62, 0xad, + 0x32, 0xaa, 0xa8, 0xaf, 0x57, 0xaa, 0x9c, 0xa8, 0x81, 0xad, 0x0f, 0xb0, 0x70, 0xad, 0x53, 0xac, + 0x09, 0xad, 0x95, 0xae, 0xc1, 0xac, 0x04, 0xad, 0x2d, 0xb0, 0xb2, 0xae, 0xce, 0x9f, 0xfa, 0xa9, + 0xde, 0x2a, 0x0e, 0x29, 0x03, 0x2c, 0x18, 0x2a, 0xdb, 0x28, 0x54, 0x29, 0x0e, 0x2b, 0xda, 0x29, + 0xa4, 0x29, 0xb8, 0x29, 0xf0, 0x29, 0xce, 0x2a, 0x36, 0x2b, 0x12, 0x2b, 0x5e, 0x2a, 0x68, 0x2c, + 0xce, 0x2b, 0x23, 0x2c, 0x68, 0x2b, 0xb2, 0x29, 0x1a, 0x2b, 0x66, 0x2a, 0xe4, 0x2a, 0xd4, 0x2a, + 0x68, 0x28, 0x64, 0x2b, 0x9b, 0x2a, 0x01, 0x2b, 0xc2, 0x29, 0x38, 0x28, 0x12, 0x2a, 0x7e, 0x2b, + 0xf0, 0x2a, 0x9f, 0x2a, 0x7e, 0x27, 0x6b, 0x29, 0x08, 0x2b, 0x89, 0x2b, 0x14, 0x2a, 0xc0, 0x2a, + 0xd0, 0x29, 0x00, 0x2c, 0x3d, 0x2a, 0xb4, 0x29, 0x58, 0x2b, 0x40, 0x2a, 0x51, 0x2b, 0x02, 0x2c, + 0x14, 0x29, 0x53, 0x2a, 0x26, 0x2a, 0x16, 0x2a, 0x02, 0x2c, 0x7c, 0x2c, 0xf6, 0x2a, 0xcd, 0x29, + 0xd4, 0x2a, 0xec, 0x2a, 0x00, 0x2c, 0xb1, 0x2a, 0x8d, 0x2c, 0x09, 0x2a, 0x25, 0x27, 0xde, 0x2a, + 0x2e, 0x24, 0x9a, 0x23, 0xbc, 0x21, 0x09, 0x20, 0x10, 0x1d, 0x73, 0x26, 0xa6, 0x28, 0x6a, 0x28, + 0x2e, 0x24, 0x52, 0x24, 0xe9, 0x27, 0xb5, 0x20, 0xdc, 0x26, 0xdc, 0x21, 0x20, 0x98, 0x83, 0x26, + 0x63, 0x25, 0x70, 0x20, 0xba, 0x24, 0xc2, 0x20, 0xfe, 0x27, 0xb6, 0x25, 0x4a, 0x1e, 0x58, 0x20, + 0xe2, 0x1f, 0xe5, 0x25, 0x9c, 0x25, 0xdc, 0x24, 0xe4, 0x24, 0xaa, 0x9d, 0x86, 0x26, 0xcb, 0x27, + 0xc5, 0x21, 0xfa, 0x26, 0xcc, 0x11, 0x44, 0x25, 0x88, 0x1d, 0xb1, 0x26, 0x3e, 0x25, 0x1c, 0x26, + 0xf4, 0x1b, 0xec, 0x25, 0x5a, 0x22, 0x97, 0x24, 0xac, 0x25, 0x8a, 0x1f, 0x20, 0x25, 0x17, 0x27, + 0xa8, 0x24, 0xb6, 0x26, 0xfe, 0x20, 0xa8, 0x9c, 0x0f, 0x28, 0xb7, 0x22, 0x78, 0x21, 0x28, 0x23, + 0x28, 0x21, 0x80, 0x24, 0x56, 0x21, 0x5c, 0x24, 0x0e, 0x26, 0xc4, 0x24, 0x6e, 0x23, 0x0e, 0x22, + 0xec, 0x9d, 0xc0, 0x9c, 0x4a, 0x9f, 0x16, 0x9e, 0x01, 0x9c, 0x1b, 0x9d, 0xb3, 0x9f, 0x86, 0x9d, + 0x90, 0x9d, 0x9b, 0x9c, 0x63, 0x9d, 0xf3, 0x9e, 0x36, 0x9e, 0x1b, 0x9f, 0x9e, 0x9e, 0x6a, 0xa0, + 0x0e, 0xa0, 0x46, 0x9f, 0x61, 0x9f, 0x24, 0x9d, 0x62, 0x9e, 0x01, 0x9e, 0x63, 0x9e, 0xaa, 0x9e, + 0x29, 0x9c, 0x4f, 0x9f, 0x0c, 0x9f, 0xf9, 0x9d, 0x94, 0x9d, 0x52, 0x9b, 0x3c, 0x9e, 0xbe, 0x9e, + 0x22, 0x9e, 0x9c, 0x9e, 0xbe, 0x98, 0x22, 0x9d, 0xf9, 0x9d, 0xf0, 0x9e, 0xcc, 0x9c, 0xe7, 0x9d, + 0x36, 0x9d, 0xa8, 0x9f, 0x2d, 0x9d, 0xa2, 0x9d, 0xc4, 0x9f, 0x4f, 0x9e, 0x34, 0x9e, 0x15, 0xa0, + 0x60, 0x9d, 0xda, 0x9c, 0x30, 0x9e, 0xd8, 0x9d, 0x26, 0xa0, 0x1b, 0x9f, 0x00, 0x9e, 0x5f, 0x9d, + 0x07, 0x9e, 0xb3, 0x9d, 0x99, 0x9f, 0x3a, 0x9e, 0xae, 0x9f, 0xa1, 0x9c, 0xc5, 0x9c, 0x40, 0x9f, + 0x33, 0x98, 0x76, 0x98, 0x0b, 0x98, 0x09, 0x98, 0x06, 0x91, 0xd8, 0x9a, 0xc6, 0x9d, 0x5f, 0x9c, + 0x72, 0x99, 0x07, 0x97, 0x9b, 0x9b, 0xe6, 0x98, 0x5d, 0x9a, 0x40, 0x99, 0x46, 0x95, 0x60, 0x9c, + 0x07, 0x9c, 0x2e, 0x96, 0xb0, 0x9a, 0x22, 0x96, 0xcd, 0x9b, 0x58, 0x9a, 0x01, 0x96, 0x28, 0x98, + 0xb2, 0x95, 0x82, 0x9b, 0x01, 0x9c, 0xa0, 0x98, 0xdb, 0x99, 0x84, 0x0c, 0xfa, 0x9b, 0xbc, 0x9b, + 0xf2, 0x96, 0x1b, 0x9c, 0x6f, 0x10, 0xd0, 0x99, 0x38, 0x93, 0x2d, 0x9b, 0x12, 0x98, 0xe5, 0x99, + 0xa6, 0x93, 0x3f, 0x9b, 0xd2, 0x95, 0xd1, 0x99, 0x28, 0x9c, 0x20, 0x98, 0xcc, 0x98, 0x97, 0x9c, + 0x59, 0x9a, 0xf3, 0x98, 0x88, 0x98, 0x0c, 0x90, 0x24, 0x9d, 0x17, 0x95, 0x1c, 0x96, 0x52, 0x98, + 0x60, 0x96, 0xd8, 0x97, 0x88, 0x98, 0x3e, 0x99, 0xa4, 0x99, 0xd2, 0x96, 0xcb, 0x9a, 0xf0, 0x99, + 0xdc, 0x9a, 0xb9, 0x98, 0x6f, 0x9b, 0x16, 0x99, 0xdc, 0x98, 0x22, 0x99, 0xef, 0x99, 0xe3, 0x99, + 0x02, 0x99, 0x20, 0x9a, 0x17, 0x9a, 0x89, 0x99, 0x77, 0x9b, 0xe8, 0x99, 0xcb, 0x98, 0xb3, 0x9b, + 0x77, 0x9a, 0xd6, 0x9b, 0x6d, 0x9a, 0x5a, 0x99, 0x32, 0x9b, 0x14, 0x9a, 0x2e, 0x9a, 0xd4, 0x99, + 0xee, 0x97, 0x98, 0x9a, 0x69, 0x99, 0x1e, 0x9b, 0x47, 0x99, 0x97, 0x97, 0x57, 0x99, 0x86, 0x9b, + 0xa5, 0x9a, 0x01, 0x9a, 0x64, 0x98, 0x25, 0x99, 0xd2, 0x9a, 0x4a, 0x9b, 0xa9, 0x9a, 0xdb, 0x9a, + 0x5b, 0x99, 0x56, 0x9b, 0x60, 0x9a, 0x17, 0x99, 0x08, 0x9a, 0x21, 0x99, 0x7b, 0x9b, 0xf8, 0x9a, + 0x3c, 0x98, 0x34, 0x9b, 0x20, 0x99, 0xff, 0x98, 0xee, 0x9a, 0xb0, 0x9c, 0xcd, 0x9a, 0x6e, 0x99, + 0x87, 0x9a, 0x36, 0x9b, 0x12, 0x9b, 0x41, 0x9a, 0xa8, 0x9c, 0xb7, 0x9a, 0x10, 0x94, 0x69, 0x99, + 0x14, 0x96, 0x67, 0x94, 0x54, 0x94, 0x14, 0x90, 0xef, 0x91, 0xe5, 0x96, 0x02, 0x98, 0xbc, 0x98, + 0x55, 0x94, 0x8c, 0x96, 0x81, 0x98, 0x21, 0x90, 0x70, 0x98, 0xb6, 0x91, 0x0a, 0x0c, 0x80, 0x96, + 0xc1, 0x94, 0x36, 0x94, 0xc8, 0x94, 0xfc, 0x92, 0xb6, 0x98, 0x7c, 0x96, 0x2c, 0x91, 0xf6, 0x90, + 0xec, 0x90, 0x12, 0x96, 0xbd, 0x94, 0xec, 0x96, 0x41, 0x95, 0xba, 0x04, 0x30, 0x96, 0xa6, 0x98, + 0x7f, 0x94, 0x00, 0x97, 0xdf, 0x91, 0xd5, 0x95, 0x00, 0x93, 0xe1, 0x97, 0xcd, 0x97, 0xce, 0x97, + 0x68, 0x90, 0x9c, 0x96, 0x4e, 0x95, 0xbb, 0x94, 0xd8, 0x94, 0x13, 0x8f, 0x5b, 0x97, 0xca, 0x96, + 0x11, 0x94, 0xd4, 0x98, 0xd8, 0x90, 0x6a, 0x09, 0x89, 0x97, 0x14, 0x97, 0xa8, 0x94, 0x70, 0x94, + 0x2f, 0x94, 0xf4, 0x96, 0xf0, 0x92, 0x4a, 0x95, 0x73, 0x98, 0x97, 0x97, 0x46, 0x8d, 0x84, 0x90, + 0x29, 0xac, 0xe1, 0xab, 0x30, 0xad, 0xef, 0xac, 0x92, 0xa8, 0xca, 0xac, 0x1f, 0xb0, 0x6e, 0xad, + 0xdf, 0xac, 0x05, 0xaa, 0xe8, 0xac, 0xd1, 0xad, 0xd0, 0xac, 0xec, 0xad, 0x26, 0xad, 0xda, 0xaf, + 0x74, 0xaf, 0xc1, 0xac, 0x64, 0xae, 0x5a, 0xab, 0x7b, 0xad, 0x23, 0xad, 0x84, 0xac, 0x3c, 0xad, + 0x6e, 0xaa, 0x7e, 0xae, 0xd4, 0xae, 0x3b, 0xac, 0xe6, 0xac, 0x67, 0xa7, 0x16, 0xae, 0xad, 0xad, + 0x3c, 0xac, 0x42, 0xae, 0x36, 0x9c, 0x85, 0xac, 0x1a, 0xab, 0xcb, 0xad, 0x49, 0xaa, 0x9f, 0xac, + 0xd4, 0xaa, 0x78, 0xae, 0x8f, 0xaa, 0x02, 0xad, 0x5b, 0xaf, 0x27, 0xad, 0x5c, 0xac, 0xa0, 0xaf, + 0x3d, 0xad, 0x7a, 0xaa, 0x27, 0xad, 0xc6, 0xab, 0x0a, 0xb0, 0xb9, 0xab, 0xde, 0xab, 0x37, 0xac, + 0x1a, 0xac, 0x7c, 0xab, 0xbf, 0xad, 0xf8, 0xac, 0x4c, 0xad, 0x8f, 0xa9, 0x9f, 0xad, 0x70, 0xae, + 0xf4, 0xac, 0x10, 0xa4, 0x2a, 0xa1, 0x14, 0xa9, 0xd8, 0xa2, 0xc7, 0xab, 0xe4, 0xa5, 0xee, 0xa8, + 0xf3, 0xa9, 0x92, 0xa8, 0x86, 0xa9, 0x4c, 0xa4, 0x2c, 0xac, 0x90, 0xa0, 0x51, 0x26, 0x33, 0xa9, + 0xe8, 0x1d, 0x48, 0xaa, 0x34, 0xa8, 0x36, 0xaa, 0xcd, 0xaa, 0x82, 0x9f, 0xde, 0x9d, 0x1f, 0xa9, + 0x24, 0x9c, 0x70, 0xa9, 0x6f, 0xa8, 0x28, 0xa9, 0xd4, 0xa6, 0xe0, 0x99, 0xdb, 0xa9, 0x20, 0xac, + 0x85, 0xa9, 0x2a, 0xa0, 0xc1, 0xa6, 0x56, 0xaa, 0x47, 0xaa, 0x6a, 0xab, 0x84, 0xa6, 0xb6, 0xa7, + 0x48, 0xa9, 0x98, 0xa7, 0x9f, 0xa8, 0xc0, 0x14, 0xc1, 0xa7, 0xd6, 0x9f, 0x80, 0x81, 0x2b, 0xa5, + 0x72, 0x23, 0x5c, 0xa2, 0xd0, 0xa5, 0x82, 0xa4, 0xeb, 0xa1, 0x1a, 0xad, 0x86, 0xa9, 0xb0, 0xa1, + 0xa0, 0xa6, 0x13, 0xac, 0xc3, 0xa8, 0x96, 0xa9, 0xfc, 0xa9, 0x90, 0xa9, 0x55, 0x9a, 0xba, 0xa7, + 0x9b, 0x26, 0x94, 0x20, 0xed, 0x20, 0xc8, 0x25, 0x5c, 0x1c, 0x12, 0x24, 0x13, 0x27, 0xb3, 0x25, + 0x3e, 0x28, 0x0e, 0x25, 0x46, 0x23, 0x6f, 0x27, 0x42, 0x24, 0xe9, 0x26, 0x3d, 0x23, 0xb1, 0x28, + 0x9c, 0x23, 0x79, 0x28, 0x50, 0x28, 0x7a, 0x25, 0x77, 0x28, 0xe6, 0x24, 0x1e, 0x22, 0x80, 0x26, + 0x7c, 0x9e, 0x26, 0x29, 0x2a, 0x25, 0xba, 0x25, 0x3e, 0x20, 0x1e, 0x20, 0xd8, 0x26, 0xc6, 0x27, + 0x84, 0x24, 0xcf, 0x22, 0x2a, 0x20, 0xbe, 0x26, 0xae, 0x25, 0x16, 0x24, 0x18, 0x25, 0x58, 0x20, + 0xc1, 0x26, 0x02, 0x24, 0x77, 0x26, 0x6f, 0x21, 0xf4, 0x27, 0x5e, 0x24, 0xf1, 0x21, 0xf7, 0x25, + 0x85, 0x22, 0xde, 0x20, 0x30, 0x25, 0x12, 0x25, 0x06, 0x28, 0x4b, 0x28, 0xdc, 0x27, 0x03, 0x24, + 0x79, 0x24, 0x8e, 0x25, 0x33, 0x29, 0x7d, 0x29, 0xfa, 0x27, 0x6e, 0x26, 0x30, 0x12, 0x0c, 0x25, + 0x87, 0x1d, 0xb7, 0x19, 0xf6, 0x9f, 0x4b, 0xa0, 0xba, 0x18, 0xac, 0x26, 0xce, 0x22, 0x79, 0x24, + 0x8e, 0x22, 0x04, 0x24, 0xbc, 0x21, 0x44, 0x1f, 0x2a, 0x25, 0x10, 0x18, 0x42, 0xa2, 0xb0, 0x9c, + 0x20, 0x94, 0x84, 0x94, 0x5c, 0x1f, 0x66, 0x21, 0xa4, 0x24, 0x60, 0x23, 0xa8, 0x9d, 0x46, 0x18, + 0x84, 0x11, 0x96, 0x1f, 0x2e, 0x26, 0x0b, 0x1c, 0xad, 0x23, 0xf2, 0x99, 0x9a, 0x23, 0xbc, 0x24, + 0x30, 0x21, 0x32, 0x1e, 0x38, 0xa0, 0xfc, 0x21, 0xc0, 0xa0, 0xf7, 0x25, 0x70, 0x9e, 0xca, 0x23, + 0xc4, 0x17, 0xfa, 0x9e, 0x1a, 0x96, 0x69, 0xa1, 0xc5, 0x22, 0x68, 0x92, 0x3a, 0x9d, 0x51, 0x25, + 0x10, 0x1b, 0x9a, 0x22, 0xd4, 0x15, 0xb6, 0x98, 0x92, 0x9e, 0x6d, 0x1f, 0x82, 0x24, 0x51, 0x1d, + 0x10, 0x12, 0x54, 0x24, 0x71, 0x22, 0xa4, 0x19, 0x49, 0x1c, 0xf6, 0x22, 0x6c, 0x24, 0x7c, 0x17, + 0x63, 0x96, 0xcb, 0x93, 0xe2, 0x91, 0xc0, 0x96, 0x0f, 0x8d, 0xd0, 0x96, 0x24, 0x9c, 0x00, 0x9a, + 0x2a, 0x9c, 0x3c, 0x99, 0x21, 0x95, 0x40, 0x9c, 0x73, 0x95, 0xe4, 0x9b, 0xc6, 0x98, 0x02, 0x9c, + 0xdc, 0x98, 0x46, 0x9b, 0x6c, 0x9c, 0x58, 0x98, 0x7a, 0x9c, 0xda, 0x9a, 0x6d, 0x95, 0x48, 0x99, + 0x4e, 0x14, 0x1b, 0x9d, 0x4e, 0x9a, 0x87, 0x98, 0x86, 0x94, 0xea, 0x93, 0x9d, 0x9a, 0xd3, 0x9a, + 0x06, 0x97, 0x26, 0x98, 0xc0, 0x04, 0xe9, 0x99, 0x08, 0x95, 0xac, 0x96, 0x78, 0x97, 0x38, 0x94, + 0x7c, 0x99, 0x2e, 0x94, 0x24, 0x99, 0x83, 0x94, 0x6b, 0x9c, 0xa2, 0x98, 0x14, 0x96, 0xe8, 0x9b, + 0x42, 0x99, 0xc9, 0x96, 0xd2, 0x98, 0xa6, 0x98, 0x1a, 0x9c, 0x9a, 0x99, 0x25, 0x9c, 0x97, 0x98, + 0x5c, 0x97, 0x0c, 0x98, 0x9e, 0x9d, 0x3d, 0x9d, 0xd0, 0x9a, 0x24, 0x9a, 0x28, 0x93, 0x2e, 0x98, + 0xdb, 0x0f, 0x84, 0x8d, 0x30, 0x14, 0xf0, 0x15, 0x75, 0x87, 0x7e, 0x99, 0x58, 0x99, 0x05, 0x99, + 0x29, 0x98, 0x68, 0x98, 0xcd, 0x93, 0x02, 0x98, 0xe6, 0x96, 0x89, 0x95, 0x04, 0x0d, 0xd8, 0x0a, + 0x6e, 0x92, 0x90, 0x03, 0xd6, 0x96, 0x3c, 0x94, 0x86, 0x99, 0xe2, 0x99, 0xe9, 0x0e, 0x16, 0x8c, + 0xbe, 0x0d, 0xfd, 0x96, 0x1e, 0x9b, 0x0b, 0x8e, 0x46, 0x97, 0x38, 0x08, 0x2e, 0x98, 0x64, 0x98, + 0xe4, 0x93, 0x54, 0x95, 0x00, 0x17, 0xd2, 0x95, 0xda, 0x17, 0xd6, 0x98, 0x84, 0x12, 0xfa, 0x96, + 0x84, 0x8b, 0x06, 0x15, 0x64, 0x09, 0x29, 0x14, 0x2d, 0x99, 0xdc, 0x8f, 0x1c, 0x0a, 0x63, 0x9b, + 0xb6, 0x96, 0x16, 0x98, 0xb6, 0x8f, 0xd0, 0x83, 0x58, 0x8b, 0xf8, 0x86, 0x9b, 0x99, 0xe3, 0x94, + 0x08, 0x88, 0x24, 0x96, 0x7e, 0x99, 0x84, 0x94, 0x27, 0x91, 0x90, 0x97, 0x2c, 0x99, 0x26, 0x8c, + 0x7c, 0x98, 0xa1, 0x90, 0x21, 0x91, 0x9b, 0x96, 0xcb, 0x8d, 0x40, 0x95, 0x22, 0x95, 0x27, 0x95, + 0x9c, 0x97, 0xaa, 0x94, 0x8e, 0x94, 0x3e, 0x95, 0xf9, 0x95, 0xa7, 0x94, 0x72, 0x8c, 0x52, 0x98, + 0xc3, 0x8f, 0x63, 0x98, 0x0c, 0x97, 0x0f, 0x96, 0x07, 0x98, 0x67, 0x91, 0x04, 0x91, 0x85, 0x96, + 0xda, 0x09, 0x59, 0x98, 0x3a, 0x94, 0xfc, 0x95, 0xa7, 0x90, 0x52, 0x8e, 0x79, 0x96, 0x10, 0x98, + 0x22, 0x95, 0x93, 0x90, 0xf2, 0x92, 0xd3, 0x96, 0x3a, 0x97, 0x36, 0x95, 0x1e, 0x95, 0x28, 0x91, + 0xc6, 0x96, 0xc7, 0x94, 0x6b, 0x96, 0x2f, 0x90, 0x2a, 0x96, 0x5d, 0x92, 0x17, 0x90, 0xca, 0x93, + 0x4c, 0x8a, 0x97, 0x8e, 0x97, 0x94, 0x5d, 0x94, 0xf2, 0x95, 0x1c, 0x99, 0xc2, 0x96, 0xcd, 0x91, + 0x6a, 0x94, 0xcf, 0x96, 0x12, 0x98, 0xb6, 0x98, 0xc3, 0x97, 0x20, 0x96, 0x76, 0x08, 0xfe, 0x94, + 0xca, 0x94, 0x38, 0x8c, 0xfd, 0x0b, 0x33, 0x84, 0x33, 0x8c, 0x56, 0x97, 0x65, 0x90, 0x2c, 0x94, + 0xe4, 0x92, 0xad, 0x93, 0xe1, 0x93, 0xfe, 0x89, 0xb3, 0x96, 0x80, 0x07, 0x49, 0x14, 0x14, 0x83, + 0x3d, 0x0d, 0x8e, 0x8d, 0x7c, 0x8e, 0xaa, 0x93, 0xa1, 0x94, 0xee, 0x8e, 0x46, 0x0c, 0x02, 0x8f, + 0xa0, 0x89, 0xe9, 0x8f, 0x0a, 0x95, 0x6c, 0x90, 0x67, 0x93, 0x4c, 0x09, 0x09, 0x94, 0xaf, 0x95, + 0x30, 0x93, 0x6d, 0x8a, 0xe6, 0x81, 0xa6, 0x93, 0x48, 0x8a, 0xb6, 0x96, 0xf5, 0x03, 0xea, 0x93, + 0x06, 0x8f, 0xd4, 0x82, 0x89, 0x8b, 0x7d, 0x10, 0x03, 0x91, 0x8a, 0x07, 0x82, 0x0d, 0xc0, 0x92, + 0x75, 0x0c, 0xae, 0x90, 0xd9, 0x89, 0xa6, 0x02, 0x3a, 0x0f, 0xd6, 0x94, 0x14, 0x94, 0x07, 0x8a, + 0xe3, 0x8b, 0xd5, 0x95, 0xc9, 0x90, 0xd2, 0x8c, 0x7a, 0x90, 0x8a, 0x93, 0xa6, 0x91, 0x53, 0x8d, + 0x5e, 0x1f, 0x64, 0xa1, 0x17, 0x1e, 0xe0, 0x1f, 0xc8, 0x95, 0x50, 0xa8, 0x6c, 0xac, 0x84, 0xaa, + 0x4a, 0xab, 0xa3, 0xa9, 0x2d, 0xa3, 0x21, 0xac, 0x83, 0xa4, 0x36, 0xab, 0x6a, 0xa7, 0x0a, 0xa8, + 0xbb, 0xa8, 0xee, 0xa6, 0x92, 0xab, 0xfc, 0xa5, 0x3d, 0xac, 0x5c, 0xac, 0x78, 0xa0, 0x96, 0xa5, + 0x4c, 0x24, 0x24, 0xac, 0x2a, 0xac, 0xdb, 0xa4, 0x96, 0xa6, 0x75, 0xa0, 0x20, 0xaa, 0xd3, 0xa9, + 0xf4, 0xa4, 0x88, 0xa8, 0xd2, 0x26, 0x9f, 0xa8, 0x1f, 0x25, 0xca, 0xa7, 0x50, 0x9d, 0xf2, 0xa5, + 0xbc, 0xa5, 0xfd, 0x20, 0x72, 0xa4, 0x30, 0x95, 0x71, 0xac, 0x6a, 0xa7, 0x7a, 0xa3, 0x0e, 0xad, + 0x86, 0xaa, 0xdd, 0xa8, 0xb0, 0xa6, 0x94, 0xa5, 0xba, 0xa9, 0x2a, 0xa2, 0x32, 0xac, 0x97, 0xa8, + 0xbf, 0xa3, 0xf0, 0xa5, 0x49, 0xad, 0x8f, 0xab, 0x33, 0xa8, 0x96, 0xa9, 0xcc, 0xa8, 0xae, 0xa4, + 0x54, 0xc0, 0x86, 0xc1, 0x72, 0xb4, 0xb4, 0xbd, 0x1f, 0xbd, 0x99, 0xbd, 0x53, 0xbf, 0x41, 0xbd, + 0x5b, 0xc1, 0x38, 0xbd, 0x96, 0xbf, 0xd7, 0xbe, 0xad, 0xc3, 0x52, 0xbf, 0xe3, 0xbc, 0xfa, 0xc0, + 0x44, 0xbf, 0x91, 0xc0, 0x27, 0xbf, 0xc0, 0xc2, 0x16, 0xc1, 0x2f, 0xc1, 0xbc, 0xc0, 0x15, 0xbe, + 0x49, 0xbf, 0x76, 0xc2, 0x4d, 0xbe, 0xe8, 0xc1, 0xa4, 0xc4, 0xb0, 0xbc, 0x8c, 0xbc, 0xd9, 0xbf, + 0x28, 0xbe, 0x2e, 0xc3, 0x22, 0xc0, 0x1f, 0xc1, 0xd4, 0x33, 0x60, 0xba, 0xb5, 0xbd, 0xfa, 0xbd, + 0x20, 0xb9, 0xfb, 0xbf, 0x07, 0xc2, 0x52, 0xbe, 0x41, 0x32, 0x02, 0xc1, 0x16, 0xc1, 0x88, 0xbd, + 0xa1, 0xbe, 0xab, 0xc2, 0xb6, 0xbe, 0x63, 0xc0, 0x1e, 0xc0, 0x87, 0xc1, 0x61, 0xc3, 0x73, 0xbe, + 0x9f, 0xbc, 0x28, 0xc0, 0x3c, 0xc0, 0x99, 0xb4, 0x8f, 0xb6, 0xd3, 0xbc, 0x59, 0xbc, 0x80, 0xc0, + 0x65, 0x39, 0x68, 0x3e, 0x16, 0x38, 0x16, 0x3f, 0x4c, 0x3c, 0x5a, 0x3e, 0xf5, 0x3c, 0xe2, 0x3d, + 0x84, 0x3e, 0xba, 0x3a, 0x36, 0x3c, 0x82, 0x3c, 0x78, 0x3f, 0x8d, 0x3e, 0x56, 0x3d, 0x16, 0x3f, + 0x74, 0x3e, 0xec, 0x3c, 0x58, 0x3e, 0xe3, 0x3c, 0x0d, 0x40, 0x07, 0x3e, 0xb6, 0x3e, 0x61, 0x3c, + 0xf7, 0x3b, 0x9e, 0x3f, 0x2a, 0x3d, 0xfc, 0x3e, 0x75, 0x3f, 0x2f, 0x3d, 0x4a, 0x3d, 0x05, 0x3c, + 0xe0, 0x3c, 0xc0, 0x3d, 0xb6, 0x3c, 0x50, 0x3d, 0x08, 0x39, 0x6c, 0x3d, 0xb8, 0x3c, 0x1f, 0x3d, + 0xae, 0x3e, 0x7e, 0x3c, 0x8c, 0x3e, 0x24, 0x3e, 0x90, 0x35, 0xa8, 0x3d, 0x54, 0x3d, 0xfc, 0x3e, + 0x5d, 0x3d, 0xb6, 0x3d, 0x49, 0x3e, 0x97, 0x3c, 0xc0, 0x3e, 0x94, 0x3c, 0x8c, 0x3e, 0x15, 0x3b, + 0x04, 0x3c, 0x6a, 0x3e, 0x9c, 0x3c, 0x9d, 0x3b, 0x22, 0x3c, 0x6e, 0x3d, 0x13, 0x39, 0x30, 0x3d, + 0x28, 0x34, 0xfe, 0x39, 0xe0, 0xaf, 0x88, 0x2d, 0x7c, 0xad, 0xd6, 0x38, 0x9a, 0x3b, 0x2a, 0x38, + 0xc0, 0x3b, 0xc2, 0x39, 0x12, 0x39, 0x5a, 0x2d, 0xc8, 0x3b, 0xb9, 0x39, 0x4c, 0x32, 0x18, 0x36, + 0xdb, 0x37, 0x92, 0x38, 0xa2, 0x39, 0x46, 0x38, 0xfc, 0xaf, 0xcc, 0x39, 0x00, 0x38, 0x94, 0x35, + 0x0f, 0xb4, 0x53, 0x3d, 0xa6, 0x32, 0xa9, 0x3b, 0x46, 0x3a, 0xc4, 0x2d, 0x07, 0x3c, 0xbb, 0x38, + 0x76, 0x38, 0xae, 0x3d, 0x1a, 0x3b, 0x86, 0x36, 0x68, 0x29, 0x7c, 0x2f, 0xbb, 0x35, 0x45, 0x37, + 0xa8, 0xb0, 0x86, 0x39, 0x49, 0x3b, 0xae, 0x3a, 0x8c, 0xa8, 0x78, 0x36, 0xed, 0x38, 0x8a, 0x36, + 0x40, 0x25, 0x40, 0x3d, 0xe8, 0x2d, 0x9f, 0xb0, 0x82, 0x32, 0x3f, 0x38, 0x72, 0x3c, 0x3f, 0x33, + 0x09, 0x34, 0x87, 0x3d, 0x7d, 0xaf, 0x28, 0xae, 0xa0, 0x2d, 0x00, 0x36, 0x80, 0x97, 0x0c, 0x39, + 0x61, 0xa9, 0xf0, 0xb1, 0xd5, 0xab, 0x20, 0xb3, 0x50, 0xaf, 0x18, 0xb3, 0x85, 0xb1, 0x7e, 0xb2, + 0x85, 0xb2, 0x99, 0xaf, 0x0b, 0xb0, 0x70, 0xaf, 0x6e, 0xb2, 0x0e, 0xb3, 0x6b, 0xb1, 0x76, 0xb2, + 0x97, 0xb2, 0x63, 0xb0, 0xd6, 0xb2, 0x3b, 0xae, 0xd0, 0xb2, 0xa1, 0xb1, 0x54, 0xb2, 0x31, 0xb0, + 0xdc, 0xac, 0xd2, 0xb3, 0xda, 0xb0, 0xc2, 0xb2, 0x40, 0xb1, 0x1e, 0xb1, 0xbf, 0xb2, 0x5a, 0xaf, + 0x1a, 0xb1, 0x58, 0xb1, 0xdf, 0xb0, 0x4c, 0xb0, 0xb2, 0xae, 0xd8, 0xb1, 0xb6, 0xb0, 0x47, 0xb1, + 0x1a, 0xb3, 0x5e, 0xb0, 0x19, 0xb2, 0x04, 0xb3, 0x48, 0xab, 0xc4, 0xb0, 0xab, 0xb0, 0x90, 0xb3, + 0xb3, 0xb0, 0x66, 0xb1, 0xe6, 0xb1, 0xe1, 0xad, 0x3a, 0xb2, 0xcc, 0xae, 0xb4, 0xb1, 0xca, 0xad, + 0xfb, 0xaf, 0xb1, 0xb3, 0x2c, 0xae, 0x1b, 0xb0, 0xa9, 0xb0, 0xd4, 0xb1, 0x88, 0xab, 0xd5, 0xb0, + 0x6c, 0x90, 0x6c, 0xae, 0x90, 0x18, 0xec, 0xaa, 0x60, 0x17, 0xe5, 0xaf, 0x98, 0xb0, 0xe6, 0xae, + 0x68, 0xb0, 0xd2, 0xae, 0x67, 0xad, 0x18, 0xa2, 0x22, 0xaf, 0x1b, 0xb0, 0x1d, 0xab, 0x05, 0xac, + 0x00, 0xae, 0x8d, 0xac, 0x04, 0xb0, 0x5a, 0xa7, 0x80, 0x18, 0x3f, 0xae, 0x20, 0xad, 0x36, 0xab, + 0x43, 0x2a, 0xfd, 0xb1, 0x8a, 0xa9, 0x3d, 0xb0, 0x2e, 0xab, 0x06, 0xa9, 0xbc, 0xb1, 0xb6, 0xac, + 0xf8, 0xad, 0x49, 0xb1, 0xe2, 0xaf, 0x67, 0xa9, 0xe2, 0xa9, 0x87, 0xab, 0x2b, 0xac, 0x44, 0xad, + 0x2c, 0xa9, 0xf8, 0xad, 0x8d, 0xaf, 0xc7, 0xb0, 0xc0, 0xa4, 0x24, 0xaa, 0xc0, 0xac, 0x90, 0xae, + 0xa4, 0xa3, 0x07, 0xb1, 0xaa, 0xa8, 0xcb, 0x28, 0xec, 0xa9, 0xbc, 0xa9, 0x05, 0xb0, 0x48, 0xa6, + 0x58, 0xaa, 0xfb, 0xb2, 0xd0, 0x27, 0x0a, 0xa6, 0x8d, 0xaa, 0x5e, 0xad, 0xdc, 0x1c, 0x67, 0xad, + 0x70, 0xab, 0x31, 0xae, 0x7c, 0xa6, 0xb5, 0xad, 0x03, 0xac, 0xd2, 0xac, 0x2b, 0xac, 0x83, 0xac, + 0xfe, 0xad, 0xab, 0xa9, 0x0b, 0xac, 0x76, 0xac, 0xcc, 0xaf, 0x50, 0xad, 0x5a, 0xac, 0x99, 0xae, + 0x6a, 0xad, 0xee, 0xac, 0x26, 0xad, 0x19, 0xae, 0xac, 0xaf, 0xcd, 0xad, 0x27, 0xae, 0xf0, 0xab, + 0x88, 0xac, 0xf4, 0xae, 0x9d, 0xac, 0x93, 0xae, 0x66, 0xb0, 0x4a, 0xac, 0x29, 0xab, 0x07, 0xac, + 0x1f, 0xac, 0x26, 0xae, 0x52, 0xac, 0x97, 0xad, 0xdc, 0xa4, 0x02, 0xac, 0x11, 0xac, 0x4d, 0xac, + 0xcf, 0xac, 0x45, 0xac, 0x69, 0xae, 0xb3, 0xac, 0x3c, 0xa1, 0xbe, 0xad, 0x6a, 0xad, 0x56, 0xad, + 0xf8, 0xac, 0xf0, 0xad, 0x7c, 0xad, 0x36, 0xad, 0x15, 0xae, 0x45, 0xad, 0xf9, 0xae, 0x54, 0xab, + 0xe0, 0xaa, 0xf6, 0xac, 0x1d, 0xad, 0x55, 0xa9, 0xb0, 0xa9, 0x3e, 0xac, 0x5d, 0xa9, 0x02, 0xad, + 0xc8, 0xa8, 0xec, 0xaa, 0x89, 0x1e, 0xf8, 0x9c, 0xcc, 0x9c, 0xb1, 0xa6, 0x7c, 0xaa, 0xe4, 0xa5, + 0xc2, 0xab, 0xe5, 0xa8, 0x67, 0xa9, 0x7e, 0xa4, 0xee, 0xac, 0xb8, 0xa8, 0xaa, 0xa1, 0x4d, 0xa8, + 0x4c, 0xa7, 0xa4, 0xa9, 0xa8, 0xa8, 0xcf, 0xab, 0x53, 0xa2, 0x98, 0xaa, 0xc4, 0xa8, 0x54, 0xa6, + 0x4e, 0xa0, 0x1e, 0xad, 0x96, 0xa4, 0x0d, 0xac, 0x57, 0xad, 0xdc, 0x9e, 0x24, 0xa9, 0x6b, 0xa9, + 0x02, 0xa8, 0x16, 0xae, 0xc8, 0xaa, 0x4b, 0xa9, 0x4c, 0x21, 0xf0, 0x91, 0xc1, 0xa5, 0xa4, 0xa6, + 0x01, 0x24, 0xc6, 0xa9, 0x17, 0xac, 0xec, 0xa8, 0x94, 0x20, 0x05, 0xa9, 0x3f, 0xaa, 0x3b, 0xa4, + 0xf4, 0xa1, 0x90, 0xad, 0x07, 0xa2, 0x4b, 0xa4, 0x57, 0xa5, 0x9c, 0xaa, 0x4b, 0xad, 0x38, 0xa6, + 0x24, 0xa4, 0x40, 0xac, 0x38, 0xa4, 0x7c, 0x21, 0xb0, 0x19, 0x66, 0xa4, 0x2f, 0xa1, 0xc0, 0xa9, + 0xf0, 0x2b, 0x0e, 0xc1, 0xc1, 0xb8, 0x84, 0xc1, 0x90, 0xbb, 0x11, 0xc3, 0x32, 0xc2, 0x56, 0xc2, + 0x58, 0xc2, 0x6b, 0xc0, 0x97, 0xbf, 0x0e, 0xbc, 0x1c, 0xc1, 0xef, 0xc2, 0x73, 0xc0, 0xb6, 0xc0, + 0xd6, 0xc1, 0x17, 0xbf, 0xbc, 0xc2, 0x9b, 0xb8, 0xf1, 0xbe, 0xdc, 0xc0, 0x12, 0xc1, 0xea, 0xbe, + 0xd8, 0x30, 0x0d, 0xc4, 0x31, 0xbf, 0x42, 0xc2, 0x45, 0xbd, 0xcb, 0xbf, 0x1b, 0xc4, 0x85, 0xbe, + 0xeb, 0xc0, 0xaf, 0xc1, 0x2a, 0xc1, 0x61, 0xbd, 0x05, 0xbf, 0x09, 0xc1, 0x11, 0xc0, 0xd1, 0xc0, + 0x95, 0xc1, 0x2c, 0xc0, 0x70, 0xc1, 0x82, 0xc3, 0x2f, 0xbb, 0x5b, 0xbe, 0x5d, 0xbf, 0xef, 0xc2, + 0xa0, 0xbd, 0xaa, 0xc1, 0x21, 0xc0, 0x1a, 0xac, 0x64, 0xc0, 0x08, 0xbc, 0xfe, 0xc0, 0x20, 0xbb, + 0xb0, 0xbe, 0xa4, 0xc4, 0xd0, 0xb2, 0x8d, 0xbe, 0x3a, 0xc0, 0x62, 0xc1, 0x64, 0xb5, 0x2c, 0xc0}; +unsigned char conv2d_winograd_fp16_bias[] = { + 0xf6, 0x3e, 0x80, 0x3f, 0x7f, 0x44, 0xde, 0x3e, 0x90, 0x47, 0x25, 0x4b, 0xa4, 0xc4, 0x00, 0x42, + 0x50, 0x44, 0x61, 0xc4, 0x0c, 0x42, 0x9c, 0x4d, 0x77, 0x49, 0xbf, 0x47, 0xf8, 0xc6, 0xfe, 0x46}; +unsigned char conv2d_winograd_fp16_out[] = { + 0xec, 0x52, 0x13, 0x55, 0x6e, 0x55, 0xef, 0x55, 0x80, 0x56, 0x2b, 0x56, 0x2a, 0x57, 0x19, 0x56, + 0x72, 0x56, 0xcf, 0x54, 0x64, 0x56, 0xaf, 0x55, 0xc1, 0x56, 0x02, 0x52, 0x53, 0x55, 0xb2, 0x57, + 0xb1, 0x55, 0x7a, 0x56, 0xca, 0x58, 0x86, 0x57, 0xf0, 0x59, 0xb3, 0x58, 0xf8, 0x59, 0x7e, 0x57, + 0x3a, 0x59, 0x7f, 0x58, 0x09, 0x59, 0xa5, 0x55, 0xda, 0x55, 0x0f, 0x58, 0x5a, 0x57, 0x60, 0x58, + 0x5f, 0x59, 0xfc, 0x58, 0xf2, 0x59, 0x09, 0x5a, 0x0e, 0x5a, 0xec, 0x58, 0x93, 0x59, 0x34, 0x59, + 0x07, 0x59, 0x2c, 0x56, 0x6d, 0x56, 0x92, 0x57, 0x1c, 0x57, 0x14, 0x58, 0x04, 0x58, 0xc4, 0x58, + 0xdc, 0x59, 0x09, 0x5a, 0x0e, 0x5a, 0xd1, 0x58, 0x67, 0x59, 0xe0, 0x58, 0xd8, 0x58, 0x68, 0x55, + 0x86, 0x55, 0xc2, 0x57, 0x84, 0x57, 0xa1, 0x58, 0xc5, 0x58, 0x06, 0x59, 0xdd, 0x59, 0x0e, 0x5a, + 0xba, 0x59, 0xfe, 0x58, 0x38, 0x59, 0x98, 0x58, 0x6a, 0x57, 0x4a, 0x55, 0xa4, 0x56, 0x14, 0x59, + 0x58, 0x58, 0x99, 0x58, 0xe9, 0x58, 0x2f, 0x59, 0x2c, 0x5a, 0x86, 0x5a, 0xde, 0x59, 0xd6, 0x58, + 0xf8, 0x58, 0x78, 0x58, 0x6d, 0x58, 0xab, 0x54, 0x1c, 0x57, 0xe4, 0x58, 0x1f, 0x58, 0x92, 0x58, + 0xa2, 0x58, 0x76, 0x59, 0x0e, 0x5a, 0x9d, 0x59, 0xe8, 0x58, 0x25, 0x58, 0x20, 0x58, 0x72, 0x58, + 0x32, 0x59, 0x93, 0x55, 0x67, 0x56, 0xb4, 0x58, 0x10, 0x58, 0x4a, 0x58, 0xc3, 0x58, 0x1c, 0x59, + 0x4b, 0x59, 0x1b, 0x59, 0x14, 0x58, 0xfe, 0x56, 0x56, 0x57, 0x62, 0x58, 0xcc, 0x58, 0x60, 0x56, + 0x13, 0x56, 0xd9, 0x58, 0x90, 0x58, 0x19, 0x59, 0xbb, 0x58, 0x45, 0x59, 0x4f, 0x59, 0x36, 0x59, + 0x52, 0x58, 0x0e, 0x58, 0x68, 0x58, 0x0c, 0x58, 0x0e, 0x59, 0x88, 0x55, 0xd7, 0x55, 0x2d, 0x59, + 0xe2, 0x58, 0x74, 0x59, 0xa5, 0x58, 0x63, 0x59, 0x44, 0x59, 0xa9, 0x59, 0x1c, 0x59, 0x9b, 0x58, + 0xa7, 0x58, 0xc6, 0x58, 0xf2, 0x59, 0x54, 0x57, 0x66, 0x55, 0x31, 0x59, 0x4d, 0x59, 0xe0, 0x58, + 0xf0, 0x57, 0x31, 0x59, 0xb2, 0x58, 0x80, 0x59, 0x6a, 0x59, 0xe3, 0x58, 0x57, 0x59, 0xa0, 0x59, + 0x2e, 0x5a, 0x5f, 0x57, 0xc4, 0x55, 0x63, 0x58, 0x52, 0x58, 0x51, 0x58, 0x1a, 0x58, 0x8f, 0x58, + 0x9e, 0x58, 0x4d, 0x59, 0xe8, 0x58, 0x02, 0x5a, 0xc0, 0x59, 0xe4, 0x59, 0xee, 0x58, 0x9c, 0x55, + 0x3a, 0x55, 0x20, 0x57, 0xae, 0x57, 0x7f, 0x57, 0x24, 0x58, 0x08, 0x58, 0x0b, 0x58, 0x82, 0x59, + 0x6a, 0x59, 0x47, 0x5a, 0x2f, 0x59, 0x3b, 0x59, 0x48, 0x58, 0x38, 0x55, 0x80, 0x52, 0xda, 0x54, + 0xe6, 0x54, 0xa3, 0x55, 0x6e, 0x55, 0x2c, 0x55, 0x08, 0x56, 0x89, 0x57, 0x54, 0x57, 0x78, 0x58, + 0xca, 0x55, 0xbb, 0x55, 0x0c, 0x54, 0x69, 0x53, 0x0c, 0x52, 0xb2, 0x54, 0xc5, 0x53, 0x0f, 0x55, + 0x3c, 0x56, 0xb2, 0x55, 0x28, 0x56, 0x6c, 0x56, 0xcb, 0x55, 0xda, 0x54, 0xc2, 0x55, 0x32, 0x55, + 0xa5, 0x55, 0x10, 0x52, 0xbc, 0x55, 0xa4, 0x56, 0x51, 0x56, 0xf5, 0x56, 0xe3, 0x57, 0xa8, 0x57, + 0xfd, 0x58, 0x2c, 0x59, 0xdf, 0x58, 0x77, 0x57, 0x3a, 0x58, 0xab, 0x58, 0x62, 0x58, 0xd8, 0x54, + 0x2c, 0x56, 0x4e, 0x57, 0x68, 0x56, 0x6d, 0x57, 0x3c, 0x58, 0xc7, 0x58, 0x1f, 0x59, 0x31, 0x59, + 0x07, 0x59, 0x85, 0x58, 0xd2, 0x58, 0xd0, 0x58, 0xe2, 0x58, 0xee, 0x55, 0xc4, 0x56, 0xa9, 0x57, + 0x0b, 0x56, 0x0a, 0x58, 0x80, 0x57, 0x3c, 0x58, 0x3d, 0x59, 0x8a, 0x59, 0x6f, 0x59, 0x14, 0x59, + 0xa3, 0x58, 0xbb, 0x58, 0x38, 0x58, 0xf9, 0x54, 0xe1, 0x55, 0x0c, 0x57, 0x4d, 0x57, 0x8a, 0x58, + 0x55, 0x58, 0xdd, 0x58, 0x65, 0x59, 0xb9, 0x59, 0x37, 0x59, 0xa9, 0x58, 0x39, 0x58, 0x58, 0x58, + 0xda, 0x57, 0xc1, 0x54, 0xab, 0x56, 0x2b, 0x58, 0x12, 0x58, 0xf4, 0x57, 0x99, 0x57, 0xfd, 0x58, + 0x55, 0x5a, 0xf4, 0x59, 0xc3, 0x59, 0x32, 0x58, 0xeb, 0x57, 0x96, 0x57, 0x53, 0x58, 0x04, 0x55, + 0xd1, 0x56, 0x88, 0x58, 0xf7, 0x57, 0xc4, 0x57, 0x80, 0x58, 0xd7, 0x58, 0x8c, 0x59, 0x75, 0x58, + 0x89, 0x58, 0x75, 0x57, 0x0f, 0x58, 0xe2, 0x57, 0x69, 0x58, 0x2a, 0x56, 0x9e, 0x56, 0x25, 0x58, + 0xff, 0x56, 0x5d, 0x58, 0x57, 0x58, 0x9c, 0x58, 0x7b, 0x59, 0xc8, 0x58, 0x2d, 0x58, 0x26, 0x57, + 0xe0, 0x56, 0xdc, 0x57, 0x6b, 0x58, 0x20, 0x56, 0x82, 0x56, 0x7b, 0x58, 0x19, 0x58, 0x3c, 0x58, + 0xc7, 0x58, 0xab, 0x58, 0xd9, 0x58, 0x71, 0x58, 0x19, 0x58, 0x2b, 0x57, 0x83, 0x57, 0x2c, 0x58, + 0x7f, 0x58, 0xe7, 0x55, 0xa2, 0x56, 0xa5, 0x58, 0xe5, 0x58, 0xce, 0x58, 0x93, 0x58, 0xbd, 0x58, + 0x5c, 0x58, 0x22, 0x59, 0xc5, 0x58, 0xd8, 0x57, 0x37, 0x58, 0xd3, 0x58, 0x0b, 0x59, 0x8b, 0x56, + 0x92, 0x56, 0x6c, 0x58, 0xe4, 0x58, 0x8d, 0x58, 0x49, 0x58, 0x61, 0x58, 0x7a, 0x58, 0xd6, 0x58, + 0x8a, 0x58, 0x84, 0x58, 0xff, 0x58, 0xdb, 0x58, 0x47, 0x59, 0xc3, 0x56, 0xfd, 0x55, 0xc5, 0x57, + 0x8c, 0x57, 0x30, 0x58, 0xd8, 0x57, 0xc6, 0x57, 0x65, 0x58, 0xfa, 0x58, 0xd3, 0x58, 0x2b, 0x59, + 0x5d, 0x59, 0x57, 0x59, 0x85, 0x58, 0x6a, 0x55, 0x88, 0x55, 0xf4, 0x56, 0xc6, 0x56, 0x72, 0x57, + 0x30, 0x57, 0x13, 0x57, 0xc4, 0x57, 0x87, 0x58, 0xe3, 0x58, 0xa7, 0x59, 0xe3, 0x58, 0x5f, 0x58, + 0x26, 0x58, 0x1b, 0x55, 0x06, 0x53, 0x1d, 0x54, 0x14, 0x54, 0x94, 0x54, 0x03, 0x55, 0x55, 0x55, + 0x74, 0x55, 0x18, 0x56, 0xbb, 0x56, 0xa6, 0x57, 0x6b, 0x56, 0xa6, 0x55, 0x08, 0x53, 0x9b, 0x51, + 0x64, 0x52, 0xcd, 0x53, 0xa4, 0x53, 0x9f, 0x54, 0x8c, 0x55, 0x0e, 0x55, 0x6a, 0x56, 0xe4, 0x54, + 0x04, 0x56, 0x63, 0x54, 0xa7, 0x55, 0xb4, 0x55, 0xf1, 0x54, 0x12, 0x4f, 0x1e, 0x55, 0x2a, 0x56, + 0x4d, 0x56, 0xe4, 0x56, 0xaa, 0x57, 0xce, 0x57, 0xac, 0x58, 0x8c, 0x58, 0xa6, 0x58, 0x0a, 0x58, + 0xb8, 0x58, 0x4a, 0x58, 0x6a, 0x58, 0xf0, 0x54, 0x80, 0x55, 0x3e, 0x56, 0x96, 0x56, 0x5c, 0x58, + 0xc9, 0x57, 0x0f, 0x58, 0x3e, 0x59, 0x10, 0x59, 0x7a, 0x58, 0x5c, 0x57, 0x45, 0x58, 0x8a, 0x58, + 0x12, 0x58, 0x0f, 0x55, 0x41, 0x55, 0x26, 0x57, 0x45, 0x57, 0x43, 0x57, 0xf0, 0x57, 0x50, 0x58, + 0x5c, 0x59, 0x0c, 0x59, 0x40, 0x59, 0x0d, 0x58, 0x96, 0x58, 0x52, 0x58, 0x0e, 0x58, 0x10, 0x55, + 0x38, 0x55, 0xfd, 0x57, 0xfb, 0x57, 0xc3, 0x57, 0x4d, 0x58, 0xa8, 0x58, 0xaa, 0x59, 0x28, 0x59, + 0x1b, 0x59, 0x9f, 0x58, 0xf4, 0x57, 0x1c, 0x58, 0x06, 0x57, 0x60, 0x54, 0x2a, 0x56, 0x0e, 0x58, + 0x49, 0x57, 0xc9, 0x58, 0x6d, 0x57, 0x49, 0x58, 0xd8, 0x59, 0x32, 0x5a, 0x7b, 0x59, 0x4b, 0x58, + 0xb8, 0x57, 0xfc, 0x56, 0x3c, 0x57, 0x61, 0x54, 0x9e, 0x56, 0xd6, 0x57, 0x5c, 0x57, 0x74, 0x57, + 0x4a, 0x58, 0x6b, 0x58, 0x2a, 0x59, 0x42, 0x59, 0xd5, 0x58, 0x72, 0x56, 0xe7, 0x56, 0x98, 0x57, + 0x2e, 0x58, 0x42, 0x55, 0x29, 0x56, 0x9c, 0x58, 0x30, 0x57, 0x3c, 0x57, 0x87, 0x58, 0xae, 0x58, + 0xa5, 0x58, 0x38, 0x58, 0x17, 0x58, 0x08, 0x56, 0xac, 0x56, 0x3f, 0x58, 0x7e, 0x58, 0x26, 0x56, + 0x7d, 0x55, 0xe8, 0x58, 0x19, 0x58, 0x26, 0x58, 0x7b, 0x58, 0x52, 0x58, 0xcb, 0x58, 0x2c, 0x58, + 0xfa, 0x56, 0xb3, 0x57, 0xd9, 0x57, 0x36, 0x57, 0x36, 0x58, 0xc9, 0x55, 0x84, 0x56, 0x71, 0x58, + 0x4a, 0x58, 0xb4, 0x58, 0xcb, 0x58, 0x80, 0x58, 0x40, 0x58, 0x6f, 0x58, 0xda, 0x57, 0x86, 0x58, + 0x89, 0x57, 0x52, 0x58, 0x62, 0x59, 0x62, 0x56, 0x52, 0x56, 0x08, 0x59, 0x76, 0x58, 0x37, 0x58, + 0x70, 0x57, 0x61, 0x58, 0xcd, 0x58, 0xb0, 0x58, 0x63, 0x58, 0x82, 0x57, 0xe8, 0x58, 0x04, 0x59, + 0xae, 0x59, 0x38, 0x55, 0xa9, 0x55, 0xff, 0x57, 0x6e, 0x56, 0x40, 0x58, 0x17, 0x57, 0xaa, 0x57, + 0x52, 0x57, 0xcd, 0x58, 0x44, 0x58, 0x1f, 0x59, 0x0d, 0x59, 0xe7, 0x58, 0xd2, 0x57, 0x9e, 0x55, + 0x0f, 0x55, 0x8e, 0x56, 0x37, 0x56, 0x19, 0x57, 0xa0, 0x56, 0xd2, 0x57, 0x80, 0x57, 0x4e, 0x58, + 0x29, 0x59, 0x2a, 0x59, 0xca, 0x58, 0x55, 0x58, 0x3d, 0x58, 0x3c, 0x54, 0x0a, 0x52, 0x6e, 0x54, + 0x6e, 0x54, 0xcb, 0x54, 0xdd, 0x54, 0x86, 0x53, 0x5e, 0x55, 0xd8, 0x56, 0x26, 0x56, 0x1a, 0x57, + 0xa0, 0x55, 0xb9, 0x54, 0x2d, 0x54, 0x8f, 0x52, 0x33, 0x51, 0x20, 0x54, 0xf3, 0x53, 0x16, 0x54, + 0xd9, 0x54, 0x9f, 0x54, 0x9c, 0x55, 0x39, 0x55, 0x83, 0x56, 0xc9, 0x53, 0x8e, 0x54, 0xdc, 0x55, + 0x79, 0x54, 0x64, 0x51, 0x50, 0x56, 0xf3, 0x56, 0x04, 0x55, 0x5b, 0x56, 0x4f, 0x58, 0x32, 0x58, + 0x3c, 0x58, 0xfb, 0x57, 0x8a, 0x58, 0x73, 0x58, 0x02, 0x58, 0x1b, 0x58, 0x7c, 0x58, 0x8f, 0x56, + 0xfb, 0x54, 0xf9, 0x57, 0x8b, 0x56, 0xd1, 0x57, 0xdc, 0x57, 0x4e, 0x58, 0xf8, 0x58, 0x02, 0x59, + 0xdc, 0x58, 0x1e, 0x58, 0x73, 0x58, 0xb1, 0x58, 0xab, 0x58, 0x97, 0x55, 0xef, 0x55, 0xd8, 0x57, + 0xef, 0x56, 0xff, 0x57, 0x32, 0x58, 0x93, 0x57, 0x6a, 0x59, 0x72, 0x59, 0xc7, 0x59, 0x34, 0x57, + 0xf2, 0x58, 0xac, 0x58, 0xb6, 0x58, 0x39, 0x55, 0x95, 0x55, 0x38, 0x57, 0xc5, 0x56, 0xbe, 0x57, + 0x28, 0x58, 0x3f, 0x58, 0xe8, 0x59, 0x9e, 0x59, 0x32, 0x59, 0xb1, 0x57, 0x9c, 0x58, 0xd1, 0x58, + 0x15, 0x58, 0xa3, 0x54, 0x09, 0x56, 0x16, 0x58, 0x5a, 0x56, 0xd0, 0x57, 0xd1, 0x57, 0x49, 0x58, + 0x77, 0x59, 0x65, 0x59, 0x5a, 0x59, 0x22, 0x58, 0x71, 0x58, 0x38, 0x58, 0xd7, 0x57, 0x7b, 0x55, + 0x73, 0x56, 0x5a, 0x58, 0x92, 0x57, 0xb9, 0x57, 0xba, 0x57, 0x11, 0x59, 0xb6, 0x59, 0x68, 0x59, + 0xbf, 0x58, 0x1f, 0x57, 0xd8, 0x57, 0x43, 0x57, 0x3b, 0x58, 0x9d, 0x55, 0x28, 0x56, 0x20, 0x58, + 0x8e, 0x56, 0xef, 0x56, 0xcf, 0x58, 0x68, 0x58, 0xe9, 0x58, 0xfc, 0x58, 0xed, 0x57, 0x4b, 0x56, + 0xf4, 0x57, 0x16, 0x58, 0x24, 0x58, 0xc7, 0x55, 0xb9, 0x56, 0x44, 0x58, 0x66, 0x58, 0x12, 0x58, + 0x31, 0x58, 0x92, 0x57, 0x67, 0x58, 0x99, 0x58, 0x61, 0x57, 0x4c, 0x56, 0xb3, 0x56, 0x3e, 0x58, + 0x44, 0x58, 0x9e, 0x55, 0x84, 0x56, 0x58, 0x58, 0xe6, 0x58, 0x5d, 0x59, 0x06, 0x58, 0x8e, 0x58, + 0x21, 0x58, 0x91, 0x58, 0x7e, 0x58, 0xa0, 0x58, 0x6b, 0x57, 0xa7, 0x57, 0x12, 0x59, 0xea, 0x56, + 0x05, 0x55, 0xb7, 0x58, 0xce, 0x58, 0x2b, 0x59, 0xfa, 0x57, 0xba, 0x58, 0xd6, 0x58, 0xbc, 0x58, + 0xa4, 0x58, 0x32, 0x58, 0x30, 0x59, 0x26, 0x59, 0x60, 0x59, 0x8c, 0x56, 0x8e, 0x55, 0x56, 0x58, + 0x87, 0x56, 0x0c, 0x58, 0x5a, 0x57, 0x31, 0x58, 0xde, 0x57, 0xb5, 0x58, 0x36, 0x58, 0xe9, 0x58, + 0x02, 0x59, 0x54, 0x59, 0xc4, 0x58, 0x3c, 0x57, 0x09, 0x55, 0x38, 0x57, 0xa7, 0x56, 0xce, 0x56, + 0x70, 0x56, 0x68, 0x57, 0x78, 0x57, 0x8f, 0x58, 0xb9, 0x58, 0x5e, 0x59, 0xf1, 0x58, 0xfb, 0x58, + 0xb5, 0x57, 0xe8, 0x55, 0x7f, 0x52, 0x74, 0x55, 0x05, 0x55, 0x65, 0x55, 0x23, 0x55, 0x6f, 0x54, + 0xd8, 0x55, 0x06, 0x57, 0x4f, 0x57, 0x06, 0x58, 0x61, 0x56, 0x2f, 0x56, 0x3f, 0x54, 0x26, 0x51, + 0xb9, 0x52, 0x1a, 0x55, 0xa7, 0x54, 0x41, 0x55, 0xe6, 0x55, 0x40, 0x56, 0xfe, 0x56, 0x94, 0x56, + 0x8d, 0x56, 0xa2, 0x55, 0xb0, 0x56, 0x2e, 0x56, 0x26, 0x56, 0xfc, 0x52, 0xf3, 0x54, 0x26, 0x58, + 0x5b, 0x57, 0x10, 0x58, 0x74, 0x58, 0x8a, 0x58, 0x26, 0x59, 0xcc, 0x58, 0xb2, 0x59, 0xcd, 0x57, + 0xf8, 0x58, 0xc8, 0x58, 0x3c, 0x59, 0x31, 0x56, 0xaa, 0x56, 0x26, 0x58, 0xf5, 0x55, 0x0c, 0x58, + 0xca, 0x58, 0xda, 0x58, 0xbe, 0x59, 0x82, 0x59, 0x90, 0x59, 0xe8, 0x58, 0x90, 0x59, 0xd4, 0x58, + 0x3c, 0x59, 0x91, 0x56, 0x5a, 0x56, 0x44, 0x58, 0x51, 0x57, 0xcc, 0x58, 0x98, 0x58, 0x88, 0x58, + 0x8a, 0x59, 0xf6, 0x59, 0x8e, 0x59, 0x2e, 0x59, 0x74, 0x59, 0xfc, 0x58, 0x0c, 0x59, 0xd9, 0x56, + 0xcb, 0x56, 0x52, 0x58, 0xdb, 0x57, 0x84, 0x58, 0x64, 0x58, 0xda, 0x58, 0xe0, 0x59, 0x6c, 0x5a, + 0xfe, 0x59, 0x1e, 0x59, 0x3a, 0x59, 0x3e, 0x58, 0x86, 0x58, 0x30, 0x56, 0x9d, 0x56, 0xa6, 0x58, + 0x04, 0x58, 0xe0, 0x58, 0x4a, 0x58, 0xac, 0x58, 0x44, 0x5a, 0x72, 0x5a, 0x3a, 0x5a, 0x1c, 0x59, + 0x94, 0x58, 0x74, 0x58, 0x88, 0x58, 0xed, 0x56, 0xd4, 0x57, 0xf2, 0x58, 0x60, 0x58, 0xbe, 0x58, + 0xc4, 0x58, 0xee, 0x58, 0x20, 0x5a, 0x0c, 0x5a, 0x02, 0x59, 0x68, 0x58, 0x26, 0x58, 0x26, 0x58, + 0xd0, 0x58, 0x53, 0x56, 0xec, 0x56, 0x96, 0x58, 0x2f, 0x58, 0xb6, 0x58, 0xf8, 0x58, 0x86, 0x58, + 0x4c, 0x59, 0x82, 0x59, 0x66, 0x58, 0x3e, 0x58, 0xae, 0x57, 0x88, 0x58, 0x84, 0x59, 0x74, 0x57, + 0x74, 0x56, 0x98, 0x58, 0xca, 0x58, 0x7e, 0x58, 0x8e, 0x58, 0x34, 0x59, 0x3e, 0x59, 0xe6, 0x58, + 0x6c, 0x58, 0x44, 0x58, 0x28, 0x58, 0x8e, 0x58, 0x18, 0x59, 0xa2, 0x56, 0x69, 0x56, 0xe6, 0x59, + 0x5a, 0x59, 0x92, 0x59, 0xa2, 0x58, 0x4a, 0x59, 0x0a, 0x59, 0x48, 0x59, 0x0c, 0x59, 0xc6, 0x58, + 0x6a, 0x58, 0x84, 0x58, 0x72, 0x59, 0xa5, 0x57, 0x84, 0x56, 0x38, 0x59, 0x44, 0x59, 0x2e, 0x59, + 0x88, 0x58, 0x5a, 0x59, 0xc6, 0x58, 0x58, 0x59, 0x0e, 0x59, 0x0e, 0x59, 0x3c, 0x59, 0x8a, 0x59, + 0x00, 0x5a, 0xa6, 0x57, 0xbf, 0x55, 0x5a, 0x58, 0x68, 0x58, 0x39, 0x58, 0x86, 0x58, 0xdc, 0x58, + 0x9c, 0x58, 0x18, 0x59, 0x1a, 0x59, 0x60, 0x59, 0x04, 0x5a, 0x06, 0x5a, 0x50, 0x59, 0x0d, 0x57, + 0xb5, 0x55, 0x91, 0x57, 0x36, 0x58, 0xe2, 0x56, 0xd0, 0x57, 0x04, 0x58, 0x5e, 0x58, 0x64, 0x59, + 0x56, 0x59, 0xf2, 0x59, 0x4a, 0x59, 0x9a, 0x59, 0xc3, 0x57, 0xed, 0x55, 0x3e, 0x54, 0xfe, 0x54, + 0x0e, 0x55, 0xb7, 0x55, 0xd6, 0x55, 0x0a, 0x56, 0x18, 0x56, 0x73, 0x57, 0x58, 0x57, 0x5e, 0x58, + 0xb7, 0x57, 0xb3, 0x56, 0x73, 0x55, 0xae, 0x52, 0x2f, 0x52, 0xfe, 0x53, 0x0b, 0x54, 0x61, 0x55, + 0xba, 0x55, 0x4a, 0x56, 0x5b, 0x56, 0x8d, 0x56, 0xbf, 0x56, 0xb2, 0x54, 0xd7, 0x56, 0x15, 0x55, + 0x03, 0x55, 0x97, 0x53, 0xe1, 0x55, 0xae, 0x56, 0xe3, 0x55, 0xce, 0x57, 0x66, 0x58, 0x8c, 0x57, + 0x7e, 0x58, 0x01, 0x59, 0xe6, 0x58, 0x8c, 0x57, 0x9d, 0x58, 0xec, 0x58, 0xa5, 0x58, 0x54, 0x55, + 0x8a, 0x56, 0xe7, 0x56, 0x04, 0x57, 0xe0, 0x57, 0xb0, 0x58, 0x1c, 0x59, 0x3a, 0x59, 0x51, 0x58, + 0xad, 0x58, 0xcf, 0x57, 0xc7, 0x58, 0x9f, 0x58, 0xd7, 0x58, 0x0d, 0x57, 0x4b, 0x56, 0x36, 0x58, + 0x22, 0x58, 0xe7, 0x57, 0x48, 0x58, 0x32, 0x58, 0xd4, 0x59, 0xd8, 0x59, 0x39, 0x59, 0xb8, 0x58, + 0xd0, 0x58, 0xee, 0x58, 0x68, 0x58, 0x67, 0x55, 0x31, 0x56, 0x36, 0x58, 0x2f, 0x57, 0x72, 0x58, + 0x51, 0x58, 0x11, 0x59, 0xb2, 0x58, 0xd6, 0x59, 0x69, 0x59, 0x0b, 0x59, 0xbe, 0x58, 0xb6, 0x58, + 0xd1, 0x57, 0x07, 0x56, 0xb3, 0x56, 0xc4, 0x58, 0xc6, 0x57, 0xd9, 0x57, 0xcf, 0x57, 0xf0, 0x58, + 0xdb, 0x59, 0x40, 0x5a, 0x51, 0x5a, 0xd7, 0x58, 0x73, 0x58, 0x86, 0x58, 0x77, 0x58, 0xd9, 0x55, + 0x53, 0x56, 0x73, 0x58, 0x59, 0x58, 0xad, 0x57, 0x7c, 0x58, 0xc6, 0x58, 0xca, 0x59, 0xda, 0x58, + 0x78, 0x58, 0xf5, 0x57, 0x67, 0x58, 0x1b, 0x58, 0xb6, 0x58, 0xb7, 0x56, 0xf4, 0x55, 0xec, 0x58, + 0x5a, 0x58, 0x38, 0x58, 0x70, 0x58, 0x5b, 0x59, 0x7f, 0x59, 0x06, 0x59, 0xe0, 0x58, 0x51, 0x57, + 0x9d, 0x57, 0x12, 0x58, 0x7e, 0x58, 0xdb, 0x56, 0x07, 0x56, 0x2a, 0x59, 0xb9, 0x58, 0x45, 0x58, + 0xec, 0x58, 0xe3, 0x58, 0xfc, 0x58, 0x50, 0x58, 0xe1, 0x57, 0x92, 0x58, 0xb7, 0x58, 0x34, 0x58, + 0x86, 0x58, 0xc1, 0x56, 0xa0, 0x56, 0x3e, 0x59, 0x75, 0x58, 0x04, 0x59, 0x83, 0x58, 0x46, 0x59, + 0x0d, 0x59, 0x82, 0x59, 0x14, 0x59, 0x81, 0x58, 0x49, 0x58, 0xa5, 0x58, 0x6c, 0x59, 0x8b, 0x57, + 0xa5, 0x55, 0xd6, 0x58, 0x74, 0x59, 0x81, 0x59, 0x4a, 0x58, 0xc4, 0x58, 0xe8, 0x58, 0xd6, 0x58, + 0x06, 0x59, 0x38, 0x58, 0x14, 0x59, 0x76, 0x59, 0x44, 0x59, 0x75, 0x56, 0x27, 0x56, 0xbf, 0x57, + 0x58, 0x57, 0xc7, 0x58, 0x0d, 0x58, 0xcb, 0x58, 0xc0, 0x58, 0x26, 0x59, 0xda, 0x58, 0x38, 0x59, + 0xa9, 0x59, 0x33, 0x5a, 0xb2, 0x58, 0x11, 0x56, 0x2c, 0x56, 0xeb, 0x56, 0x99, 0x57, 0x7e, 0x57, + 0xfb, 0x57, 0xf5, 0x56, 0x05, 0x58, 0x55, 0x59, 0x03, 0x59, 0x77, 0x59, 0x08, 0x59, 0xb8, 0x58, + 0xfe, 0x58, 0x33, 0x55, 0xe9, 0x53, 0xd7, 0x54, 0x2d, 0x55, 0xe2, 0x55, 0x41, 0x56, 0x55, 0x55, + 0x4f, 0x56, 0x71, 0x57, 0x09, 0x58, 0x68, 0x58, 0xeb, 0x56, 0x4c, 0x55, 0x5b, 0x54, 0x05, 0x54, + 0xb4, 0x51, 0x35, 0x54, 0x1e, 0x53, 0xcc, 0x54, 0xce, 0x55, 0x86, 0x55, 0xc4, 0x55, 0x5d, 0x55, + 0xba, 0x55, 0x37, 0x54, 0xa7, 0x54, 0xbe, 0x55, 0x00, 0x55, 0x6c, 0x52, 0x9f, 0x54, 0x2d, 0x56, + 0x37, 0x56, 0xc6, 0x56, 0xe6, 0x57, 0x3d, 0x57, 0x66, 0x58, 0x9b, 0x58, 0xaa, 0x58, 0x50, 0x57, + 0x6e, 0x58, 0x79, 0x58, 0x82, 0x58, 0xde, 0x55, 0x4c, 0x55, 0x04, 0x57, 0xda, 0x54, 0x18, 0x57, + 0xba, 0x57, 0x48, 0x58, 0xfb, 0x58, 0xef, 0x58, 0x7b, 0x58, 0x43, 0x58, 0x0f, 0x58, 0xad, 0x58, + 0xb0, 0x58, 0x86, 0x55, 0x9c, 0x55, 0x4d, 0x57, 0xfe, 0x55, 0xd4, 0x57, 0x3b, 0x57, 0x15, 0x58, + 0x4f, 0x59, 0x79, 0x59, 0xad, 0x59, 0x61, 0x58, 0x29, 0x58, 0xbf, 0x58, 0x23, 0x58, 0x0f, 0x55, + 0x53, 0x54, 0xea, 0x56, 0xee, 0x56, 0x9d, 0x57, 0x5d, 0x58, 0x23, 0x58, 0x05, 0x59, 0x6e, 0x59, + 0x4a, 0x59, 0x84, 0x58, 0x86, 0x58, 0x2f, 0x58, 0x5c, 0x57, 0xd8, 0x54, 0x88, 0x55, 0x55, 0x58, + 0x1f, 0x57, 0xdc, 0x57, 0x77, 0x57, 0xbe, 0x58, 0x78, 0x59, 0x49, 0x59, 0xe3, 0x59, 0x58, 0x58, + 0x21, 0x58, 0x14, 0x56, 0x09, 0x58, 0x76, 0x55, 0xef, 0x55, 0x9a, 0x58, 0xfe, 0x57, 0xde, 0x56, + 0x1c, 0x58, 0x27, 0x58, 0x29, 0x59, 0x8b, 0x58, 0xcf, 0x58, 0x20, 0x57, 0x23, 0x57, 0x13, 0x58, + 0x40, 0x58, 0xb3, 0x55, 0xd6, 0x55, 0x6d, 0x58, 0x52, 0x57, 0x9c, 0x57, 0x30, 0x58, 0x7d, 0x58, + 0x2d, 0x59, 0xa8, 0x57, 0xce, 0x57, 0xdc, 0x56, 0x9b, 0x55, 0x1e, 0x57, 0xa2, 0x57, 0x0e, 0x56, + 0x09, 0x55, 0x44, 0x58, 0x1b, 0x58, 0xe3, 0x58, 0x17, 0x59, 0xa5, 0x58, 0x56, 0x58, 0xea, 0x57, + 0x13, 0x57, 0x17, 0x57, 0x6e, 0x57, 0x56, 0x57, 0x6b, 0x58, 0x30, 0x56, 0x5c, 0x55, 0x55, 0x58, + 0x66, 0x58, 0x85, 0x58, 0x4a, 0x58, 0xb3, 0x58, 0xef, 0x58, 0xdc, 0x58, 0xb7, 0x58, 0xc6, 0x57, + 0x20, 0x58, 0xbf, 0x58, 0xbb, 0x58, 0x6f, 0x56, 0xf2, 0x55, 0xaf, 0x58, 0x5c, 0x58, 0xa0, 0x58, + 0x34, 0x58, 0x19, 0x58, 0x76, 0x58, 0xd5, 0x58, 0x3d, 0x58, 0x10, 0x58, 0xdc, 0x58, 0x8e, 0x58, + 0x47, 0x59, 0x48, 0x57, 0x9a, 0x54, 0x1f, 0x58, 0x94, 0x56, 0x30, 0x58, 0xa6, 0x57, 0x99, 0x57, + 0x59, 0x57, 0xa8, 0x58, 0x89, 0x58, 0x9d, 0x58, 0xed, 0x58, 0x9e, 0x59, 0x4b, 0x58, 0xa2, 0x55, + 0x5f, 0x54, 0xa8, 0x57, 0x3e, 0x56, 0x0d, 0x56, 0x46, 0x56, 0x52, 0x57, 0x74, 0x57, 0x9d, 0x58, + 0x6d, 0x58, 0xf9, 0x58, 0x7f, 0x59, 0x9d, 0x58, 0x8e, 0x57, 0x39, 0x55, 0xcc, 0x51, 0x12, 0x54, + 0x48, 0x54, 0x4e, 0x54, 0xe1, 0x54, 0x24, 0x55, 0x2f, 0x55, 0x2b, 0x56, 0x70, 0x56, 0x86, 0x57, + 0x7f, 0x55, 0x12, 0x55, 0x20, 0x51, 0x68, 0x51, 0x1f, 0x53, 0xac, 0x54, 0x74, 0x54, 0x45, 0x55, + 0xa1, 0x56, 0xaf, 0x55, 0xbe, 0x56, 0x00, 0x56, 0x64, 0x57, 0x6f, 0x54, 0x5b, 0x55, 0x78, 0x56, + 0xbe, 0x55, 0x45, 0x52, 0xe2, 0x55, 0x13, 0x58, 0x63, 0x55, 0xde, 0x56, 0x0c, 0x58, 0xec, 0x56, + 0xdd, 0x58, 0x04, 0x59, 0x84, 0x59, 0xd8, 0x57, 0xe4, 0x58, 0x7a, 0x58, 0xa1, 0x58, 0x10, 0x56, + 0x24, 0x56, 0xfb, 0x56, 0xef, 0x55, 0x44, 0x58, 0xab, 0x58, 0x8c, 0x58, 0x1c, 0x59, 0x3b, 0x59, + 0x4c, 0x59, 0x3d, 0x58, 0xd3, 0x58, 0xe2, 0x58, 0xd4, 0x58, 0xfc, 0x55, 0x83, 0x56, 0x0e, 0x58, + 0x1c, 0x57, 0xf4, 0x56, 0x19, 0x58, 0x4e, 0x58, 0x3d, 0x5a, 0xb7, 0x59, 0x98, 0x59, 0xb2, 0x58, + 0xfb, 0x58, 0xfd, 0x58, 0x7f, 0x58, 0xa2, 0x55, 0x86, 0x55, 0x84, 0x57, 0x23, 0x57, 0x32, 0x58, + 0x1a, 0x58, 0xae, 0x58, 0x20, 0x5a, 0xcd, 0x59, 0xdd, 0x58, 0x0c, 0x59, 0x14, 0x59, 0x90, 0x58, + 0x28, 0x58, 0x07, 0x55, 0x7c, 0x56, 0x50, 0x58, 0xf0, 0x57, 0x83, 0x58, 0x2c, 0x58, 0x1f, 0x59, + 0xe2, 0x59, 0x6a, 0x5a, 0xd4, 0x59, 0x85, 0x58, 0x86, 0x58, 0x1f, 0x58, 0x40, 0x57, 0x53, 0x55, + 0xf4, 0x56, 0x86, 0x58, 0xe3, 0x57, 0x0c, 0x58, 0xab, 0x58, 0xfb, 0x58, 0x82, 0x59, 0x03, 0x5a, + 0xca, 0x58, 0x88, 0x57, 0x69, 0x57, 0x5c, 0x58, 0x72, 0x58, 0x76, 0x56, 0x9e, 0x56, 0x6e, 0x58, + 0xb3, 0x57, 0xf6, 0x57, 0x22, 0x58, 0x70, 0x59, 0x3b, 0x59, 0xab, 0x58, 0xec, 0x57, 0x09, 0x57, + 0xe1, 0x56, 0xe9, 0x57, 0x82, 0x58, 0x78, 0x56, 0xe6, 0x55, 0x3e, 0x58, 0xde, 0x58, 0xc5, 0x58, + 0x78, 0x58, 0xa5, 0x58, 0xab, 0x58, 0xa0, 0x58, 0x54, 0x58, 0x6d, 0x58, 0x03, 0x58, 0x06, 0x58, + 0x6e, 0x58, 0x06, 0x56, 0x97, 0x56, 0xa2, 0x58, 0xc1, 0x58, 0x15, 0x59, 0xa8, 0x58, 0x12, 0x59, + 0xdc, 0x58, 0x5f, 0x59, 0x12, 0x59, 0x94, 0x58, 0x6a, 0x58, 0xd9, 0x58, 0x18, 0x59, 0x13, 0x57, + 0x36, 0x56, 0xf9, 0x58, 0x9c, 0x58, 0xfc, 0x58, 0xdb, 0x58, 0xf2, 0x58, 0xa8, 0x58, 0xfa, 0x58, + 0xbf, 0x58, 0xbe, 0x58, 0xff, 0x58, 0xde, 0x58, 0x0e, 0x5a, 0xbf, 0x57, 0x41, 0x55, 0x3c, 0x58, + 0xf6, 0x56, 0x6d, 0x58, 0xa6, 0x57, 0x4e, 0x57, 0x38, 0x58, 0xdc, 0x58, 0xfe, 0x58, 0x59, 0x59, + 0x3a, 0x59, 0xed, 0x59, 0xc6, 0x58, 0x0f, 0x56, 0x10, 0x56, 0x34, 0x57, 0x36, 0x55, 0xa4, 0x57, + 0x15, 0x58, 0x28, 0x58, 0xed, 0x57, 0xb2, 0x58, 0x25, 0x59, 0xcc, 0x59, 0xff, 0x59, 0x0d, 0x59, + 0x3a, 0x57, 0x2a, 0x55, 0x88, 0x53, 0x5b, 0x54, 0xc7, 0x54, 0xd6, 0x54, 0xae, 0x55, 0xcc, 0x55, + 0xa4, 0x55, 0x96, 0x56, 0x1a, 0x57, 0x30, 0x58, 0x6c, 0x56, 0x0a, 0x55, 0xae, 0x53, 0x67, 0x53, + 0xc9, 0x51, 0x59, 0x54, 0x02, 0x54, 0xfc, 0x54, 0x48, 0x56, 0x17, 0x56, 0x0e, 0x56, 0xe7, 0x55, + 0x1d, 0x56, 0x41, 0x54, 0xb5, 0x56, 0x47, 0x55, 0x62, 0x55, 0x9e, 0x51, 0xfb, 0x54, 0x67, 0x56, + 0xa8, 0x56, 0xdf, 0x57, 0x58, 0x58, 0xd8, 0x57, 0xbe, 0x58, 0x8e, 0x58, 0xcc, 0x58, 0xbd, 0x56, + 0xc6, 0x58, 0x2c, 0x58, 0xc8, 0x58, 0x55, 0x55, 0xd5, 0x55, 0xb8, 0x55, 0xee, 0x55, 0x05, 0x58, + 0x4e, 0x58, 0x34, 0x58, 0x04, 0x59, 0x18, 0x59, 0x98, 0x58, 0x0e, 0x58, 0x6a, 0x58, 0x3e, 0x58, + 0x7a, 0x58, 0x26, 0x56, 0x05, 0x55, 0xae, 0x57, 0xcb, 0x56, 0x36, 0x58, 0xd0, 0x57, 0xa8, 0x58, + 0xd8, 0x58, 0x3e, 0x59, 0xfe, 0x58, 0xec, 0x58, 0x16, 0x58, 0xb6, 0x58, 0xd5, 0x57, 0xe9, 0x55, + 0x6b, 0x56, 0x4d, 0x57, 0x92, 0x56, 0x0a, 0x58, 0xd4, 0x56, 0xba, 0x58, 0x44, 0x59, 0x1a, 0x59, + 0x46, 0x59, 0xca, 0x58, 0x4e, 0x58, 0xc3, 0x57, 0x97, 0x57, 0xfd, 0x54, 0xd5, 0x56, 0x26, 0x58, + 0x59, 0x57, 0x5a, 0x58, 0xac, 0x57, 0x3c, 0x58, 0xb2, 0x59, 0xa8, 0x59, 0x9c, 0x59, 0x64, 0x58, + 0xb6, 0x58, 0xf7, 0x57, 0x4e, 0x58, 0x4a, 0x55, 0xb1, 0x56, 0x62, 0x58, 0x39, 0x57, 0xdd, 0x57, + 0x7e, 0x58, 0x94, 0x58, 0xa0, 0x59, 0xf8, 0x58, 0x1c, 0x58, 0x78, 0x56, 0x5f, 0x57, 0x10, 0x58, + 0x66, 0x58, 0x17, 0x55, 0x67, 0x56, 0x94, 0x58, 0xe1, 0x56, 0xad, 0x57, 0xdd, 0x57, 0x08, 0x59, + 0xee, 0x58, 0x98, 0x58, 0x05, 0x58, 0xcc, 0x56, 0x49, 0x56, 0x0a, 0x58, 0x70, 0x58, 0xb8, 0x55, + 0x4f, 0x55, 0xd2, 0x58, 0x06, 0x58, 0x0f, 0x58, 0x09, 0x58, 0x02, 0x59, 0xe0, 0x58, 0x2c, 0x58, + 0x30, 0x58, 0x2c, 0x58, 0x7e, 0x57, 0xcf, 0x57, 0xae, 0x58, 0x4c, 0x56, 0x2e, 0x56, 0xcc, 0x58, + 0x08, 0x59, 0xb6, 0x58, 0x58, 0x58, 0x0a, 0x59, 0x60, 0x58, 0x02, 0x59, 0x7a, 0x58, 0x0d, 0x58, + 0x12, 0x58, 0x8c, 0x58, 0xfa, 0x58, 0xef, 0x56, 0xc1, 0x56, 0x04, 0x58, 0x10, 0x59, 0x4e, 0x58, + 0x36, 0x58, 0x66, 0x58, 0x3e, 0x58, 0x30, 0x59, 0x58, 0x58, 0x21, 0x58, 0x8c, 0x58, 0xbe, 0x58, + 0x60, 0x59, 0xfb, 0x55, 0x70, 0x55, 0x2d, 0x57, 0x56, 0x58, 0xe1, 0x57, 0x16, 0x57, 0xb0, 0x57, + 0x4e, 0x58, 0x88, 0x58, 0x6e, 0x58, 0xc8, 0x58, 0x54, 0x59, 0x5c, 0x59, 0x9e, 0x58, 0x2d, 0x55, + 0x53, 0x55, 0x0d, 0x56, 0x13, 0x57, 0x18, 0x58, 0x47, 0x56, 0x73, 0x56, 0x52, 0x58, 0xee, 0x58, + 0xaa, 0x58, 0x76, 0x59, 0x9c, 0x58, 0x40, 0x58, 0xdb, 0x57, 0xb9, 0x54, 0x08, 0x53, 0x3b, 0x54, + 0x5b, 0x54, 0x97, 0x55, 0x8a, 0x55, 0x61, 0x54, 0x08, 0x55, 0x99, 0x56, 0x6d, 0x57, 0xe9, 0x57, + 0x4f, 0x56, 0x55, 0x54, 0xbf, 0x54, 0x66, 0x52, 0xe2, 0x4d, 0x1a, 0x53, 0x36, 0x54, 0x9b, 0x54, + 0x58, 0x55, 0xef, 0x54, 0xed, 0x55, 0x32, 0x55, 0x04, 0x56, 0x5d, 0x54, 0x36, 0x55, 0x78, 0x54, + 0xc6, 0x55, 0xb2, 0x51, 0x8e, 0x55, 0x80, 0x56, 0x34, 0x55, 0x7e, 0x56, 0x77, 0x58, 0x84, 0x57, + 0xde, 0x58, 0x83, 0x58, 0xc7, 0x58, 0x6c, 0x57, 0x6c, 0x58, 0xd6, 0x58, 0x53, 0x58, 0xbc, 0x54, + 0xc2, 0x55, 0x8e, 0x56, 0x08, 0x57, 0xac, 0x57, 0xd0, 0x58, 0xca, 0x58, 0xf1, 0x58, 0x1b, 0x59, + 0xa1, 0x58, 0x57, 0x58, 0xc3, 0x58, 0x79, 0x58, 0x91, 0x58, 0xe6, 0x55, 0xdb, 0x56, 0x44, 0x57, + 0x24, 0x56, 0x24, 0x57, 0x0b, 0x58, 0x0a, 0x58, 0xf8, 0x59, 0x97, 0x59, 0x9f, 0x59, 0x8a, 0x58, + 0xef, 0x58, 0x00, 0x59, 0x37, 0x58, 0x53, 0x55, 0x64, 0x55, 0x1f, 0x57, 0x9e, 0x57, 0xb6, 0x57, + 0x31, 0x58, 0xdd, 0x58, 0xeb, 0x58, 0x01, 0x5a, 0x4b, 0x59, 0x80, 0x58, 0xa0, 0x58, 0xc7, 0x58, + 0xe8, 0x57, 0xb3, 0x54, 0x8c, 0x56, 0x99, 0x58, 0x42, 0x57, 0xea, 0x57, 0x45, 0x58, 0x57, 0x58, + 0xb4, 0x59, 0x5b, 0x5a, 0xe1, 0x59, 0x96, 0x58, 0x6d, 0x58, 0x29, 0x58, 0x07, 0x58, 0x24, 0x55, + 0x66, 0x56, 0x7e, 0x58, 0xe6, 0x57, 0xda, 0x57, 0x7d, 0x58, 0xb4, 0x58, 0x4d, 0x59, 0x5b, 0x59, + 0xc5, 0x58, 0x8d, 0x57, 0xfa, 0x56, 0x78, 0x57, 0xbc, 0x58, 0x86, 0x55, 0x3b, 0x56, 0x2a, 0x58, + 0xa6, 0x57, 0x5c, 0x58, 0xc6, 0x57, 0xe2, 0x58, 0x67, 0x59, 0x4a, 0x59, 0x09, 0x59, 0x58, 0x56, + 0x5c, 0x56, 0x7a, 0x58, 0xe3, 0x58, 0xb8, 0x55, 0x43, 0x56, 0x1e, 0x59, 0x73, 0x58, 0x83, 0x57, + 0x1a, 0x58, 0x97, 0x58, 0x8d, 0x58, 0x3e, 0x58, 0xb7, 0x58, 0x7d, 0x57, 0x2a, 0x58, 0x3d, 0x57, + 0xaf, 0x58, 0xba, 0x56, 0xba, 0x55, 0x05, 0x59, 0x97, 0x58, 0x1f, 0x59, 0xf1, 0x58, 0x51, 0x58, + 0xbf, 0x58, 0x00, 0x59, 0xbc, 0x58, 0xb0, 0x58, 0xd4, 0x57, 0x3f, 0x58, 0x33, 0x59, 0x2f, 0x57, + 0x34, 0x55, 0xe4, 0x58, 0xc4, 0x58, 0xd8, 0x58, 0xd9, 0x58, 0xc9, 0x58, 0xcb, 0x58, 0x86, 0x58, + 0x75, 0x58, 0x50, 0x58, 0x2e, 0x59, 0x07, 0x59, 0xfb, 0x58, 0xbe, 0x56, 0xce, 0x54, 0x6f, 0x58, + 0xce, 0x57, 0x2f, 0x58, 0x6c, 0x57, 0x31, 0x58, 0xb7, 0x58, 0x6b, 0x59, 0x60, 0x58, 0x33, 0x59, + 0xf2, 0x58, 0xd1, 0x59, 0xe5, 0x58, 0x22, 0x56, 0x3f, 0x55, 0xe4, 0x56, 0x29, 0x57, 0xb1, 0x57, + 0x52, 0x56, 0xdd, 0x56, 0xd0, 0x56, 0x41, 0x59, 0x19, 0x59, 0x61, 0x59, 0xdd, 0x58, 0xd9, 0x58, + 0x21, 0x58, 0x5e, 0x55, 0x5c, 0x51, 0xe0, 0x54, 0x29, 0x54, 0xc6, 0x54, 0x8c, 0x55, 0x61, 0x54, + 0x31, 0x55, 0x70, 0x56, 0x6d, 0x57, 0x2b, 0x58, 0x98, 0x56, 0x3b, 0x55, 0x2a, 0x52, 0x94, 0x52, + 0x4a, 0x52, 0xd4, 0x54, 0x03, 0x53, 0x2b, 0x55, 0xed, 0x55, 0x0e, 0x56, 0x3f, 0x56, 0x8e, 0x55, + 0x9d, 0x56, 0x74, 0x55, 0xab, 0x55, 0x49, 0x55, 0x11, 0x55, 0x13, 0x51, 0x83, 0x55, 0x62, 0x56, + 0x90, 0x56, 0xee, 0x57, 0x7a, 0x57, 0x9a, 0x57, 0x65, 0x58, 0x75, 0x58, 0xc4, 0x58, 0xdf, 0x56, + 0x42, 0x58, 0xc3, 0x58, 0xaa, 0x58, 0x90, 0x55, 0xcb, 0x54, 0x01, 0x57, 0x04, 0x57, 0xdf, 0x56, + 0x46, 0x58, 0x42, 0x58, 0xeb, 0x58, 0xea, 0x58, 0xd4, 0x58, 0x8d, 0x57, 0xad, 0x58, 0x8c, 0x58, + 0xdc, 0x58, 0xec, 0x55, 0xde, 0x55, 0x23, 0x58, 0xa4, 0x57, 0x0e, 0x58, 0x07, 0x58, 0x6e, 0x58, + 0xda, 0x58, 0xd4, 0x59, 0x89, 0x59, 0x24, 0x59, 0xb8, 0x58, 0xb9, 0x58, 0x5c, 0x58, 0xf7, 0x54, + 0xa3, 0x55, 0x2e, 0x57, 0xc6, 0x56, 0x4a, 0x57, 0x0a, 0x58, 0xa0, 0x58, 0x05, 0x59, 0xe5, 0x59, + 0x70, 0x59, 0xe9, 0x58, 0x5e, 0x58, 0x6d, 0x58, 0xd4, 0x57, 0x90, 0x55, 0xe3, 0x55, 0xc2, 0x57, + 0xe4, 0x57, 0x5b, 0x58, 0x58, 0x57, 0xa3, 0x58, 0xeb, 0x59, 0xff, 0x59, 0x11, 0x5a, 0xb5, 0x58, + 0x0f, 0x58, 0xba, 0x57, 0x1b, 0x58, 0x40, 0x55, 0xa0, 0x56, 0x4c, 0x58, 0xe1, 0x57, 0xf4, 0x57, + 0x2c, 0x58, 0xd4, 0x58, 0x4e, 0x59, 0x92, 0x58, 0xc5, 0x58, 0x5a, 0x57, 0x27, 0x58, 0xdc, 0x57, + 0x32, 0x58, 0x36, 0x56, 0x51, 0x56, 0x83, 0x58, 0xa6, 0x57, 0x01, 0x58, 0x2d, 0x58, 0x46, 0x58, + 0x64, 0x59, 0x7f, 0x58, 0x50, 0x58, 0x4e, 0x57, 0xbd, 0x57, 0x1e, 0x58, 0x22, 0x58, 0x70, 0x56, + 0x19, 0x56, 0xa6, 0x58, 0x10, 0x58, 0x96, 0x57, 0x92, 0x58, 0x51, 0x58, 0xf8, 0x58, 0x36, 0x58, + 0x74, 0x57, 0xf9, 0x57, 0xb9, 0x57, 0xe9, 0x57, 0xd2, 0x58, 0x2c, 0x56, 0x87, 0x55, 0x01, 0x59, + 0xe4, 0x58, 0xf6, 0x58, 0xe1, 0x58, 0x0c, 0x59, 0xf4, 0x58, 0xee, 0x58, 0x9a, 0x58, 0xd9, 0x57, + 0xea, 0x57, 0xd0, 0x58, 0x44, 0x59, 0x34, 0x56, 0xa4, 0x55, 0xcd, 0x58, 0xbf, 0x58, 0x67, 0x58, + 0xea, 0x57, 0x6e, 0x58, 0x94, 0x58, 0xd4, 0x58, 0x9a, 0x58, 0x36, 0x58, 0xe6, 0x58, 0xcf, 0x58, + 0x72, 0x59, 0xeb, 0x56, 0xf3, 0x54, 0xda, 0x57, 0x41, 0x57, 0x26, 0x58, 0xbf, 0x57, 0x22, 0x58, + 0x1c, 0x58, 0xb6, 0x58, 0x7f, 0x58, 0xe5, 0x58, 0xcb, 0x58, 0x92, 0x59, 0xcc, 0x58, 0xf6, 0x55, + 0xc8, 0x54, 0x5c, 0x57, 0x64, 0x57, 0xfe, 0x56, 0xcc, 0x57, 0xe9, 0x56, 0xc8, 0x57, 0xb8, 0x58, + 0xe6, 0x58, 0x8d, 0x59, 0xe8, 0x58, 0x7c, 0x58, 0x2e, 0x58, 0x14, 0x55, 0x67, 0x53, 0xa2, 0x54, + 0xb2, 0x54, 0x06, 0x55, 0x05, 0x55, 0xb8, 0x54, 0x09, 0x55, 0x7e, 0x56, 0xd8, 0x56, 0xda, 0x57, + 0x8e, 0x56, 0x96, 0x55, 0x22, 0x54, 0x4e, 0x53, 0xef, 0x53, 0x02, 0x56, 0xec, 0x54, 0xac, 0x55, + 0x09, 0x57, 0xf4, 0x55, 0x16, 0x58, 0x89, 0x56, 0x63, 0x57, 0xfc, 0x55, 0xf7, 0x56, 0x47, 0x56, + 0xa4, 0x57, 0x3a, 0x53, 0x51, 0x57, 0xc7, 0x57, 0xe7, 0x55, 0x18, 0x58, 0x52, 0x58, 0x3a, 0x58, + 0x5e, 0x59, 0x02, 0x59, 0x3c, 0x59, 0x54, 0x58, 0xfa, 0x58, 0xbe, 0x58, 0x96, 0x58, 0xb1, 0x55, + 0xf9, 0x55, 0xf9, 0x57, 0xab, 0x57, 0xaa, 0x58, 0xc6, 0x58, 0xa6, 0x58, 0xd8, 0x59, 0x42, 0x59, + 0x9a, 0x59, 0x52, 0x59, 0xf4, 0x58, 0x12, 0x59, 0x12, 0x59, 0x27, 0x57, 0xfb, 0x56, 0xdb, 0x57, + 0xe4, 0x56, 0x28, 0x58, 0x0a, 0x58, 0x96, 0x58, 0x8c, 0x59, 0x36, 0x59, 0x18, 0x5a, 0x1a, 0x59, + 0xd2, 0x58, 0xea, 0x58, 0x7c, 0x58, 0xc9, 0x56, 0xdb, 0x55, 0x2d, 0x58, 0x98, 0x58, 0x33, 0x58, + 0xba, 0x58, 0x3c, 0x59, 0xfe, 0x59, 0xdc, 0x59, 0x30, 0x5a, 0x70, 0x59, 0x28, 0x59, 0x7f, 0x58, + 0xae, 0x58, 0x5e, 0x55, 0x74, 0x56, 0xa0, 0x58, 0x3c, 0x58, 0xc8, 0x58, 0xa8, 0x58, 0x30, 0x59, + 0x52, 0x5a, 0x7c, 0x5a, 0xe0, 0x59, 0x4e, 0x59, 0x61, 0x58, 0x98, 0x57, 0x64, 0x58, 0x79, 0x56, + 0x7a, 0x56, 0xe2, 0x58, 0x96, 0x58, 0x6b, 0x58, 0x1a, 0x58, 0x68, 0x59, 0x0a, 0x5a, 0x80, 0x59, + 0x42, 0x59, 0x1a, 0x58, 0xdb, 0x57, 0x60, 0x58, 0x9e, 0x58, 0x5d, 0x56, 0x2f, 0x56, 0x1e, 0x59, + 0x8a, 0x58, 0x53, 0x58, 0xd6, 0x58, 0x06, 0x59, 0x64, 0x59, 0xa8, 0x58, 0x94, 0x58, 0x00, 0x58, + 0xe4, 0x57, 0x06, 0x59, 0xba, 0x58, 0x11, 0x57, 0xd6, 0x56, 0x2a, 0x59, 0xc6, 0x58, 0x6c, 0x58, + 0xa6, 0x58, 0x4a, 0x59, 0x0a, 0x59, 0xfa, 0x58, 0xb8, 0x58, 0x76, 0x58, 0x2a, 0x58, 0x28, 0x58, + 0xb2, 0x58, 0xd9, 0x56, 0x05, 0x57, 0xda, 0x58, 0x24, 0x59, 0x5c, 0x59, 0x5a, 0x59, 0xa6, 0x59, + 0x28, 0x59, 0xd6, 0x59, 0xd2, 0x58, 0x38, 0x58, 0x68, 0x58, 0xee, 0x58, 0xd0, 0x59, 0x22, 0x57, + 0x42, 0x56, 0x38, 0x59, 0xc0, 0x58, 0x4c, 0x59, 0x5c, 0x58, 0x9a, 0x58, 0x1a, 0x59, 0x38, 0x59, + 0xe2, 0x58, 0x1a, 0x59, 0x8a, 0x59, 0xc4, 0x59, 0xf8, 0x59, 0x33, 0x57, 0x07, 0x56, 0x8a, 0x58, + 0xf8, 0x57, 0xd8, 0x58, 0x5e, 0x58, 0x5f, 0x58, 0xc0, 0x58, 0x42, 0x59, 0x74, 0x59, 0x54, 0x59, + 0x6e, 0x59, 0x18, 0x5a, 0xe0, 0x58, 0xe3, 0x56, 0x53, 0x55, 0xd9, 0x57, 0xab, 0x57, 0x36, 0x58, + 0xf2, 0x57, 0x4c, 0x58, 0x4a, 0x58, 0x30, 0x59, 0x9a, 0x59, 0x5a, 0x5a, 0x2e, 0x59, 0xce, 0x58, + 0x1c, 0x58, 0xf1, 0x55, 0x2d, 0x54, 0x9f, 0x55, 0x87, 0x55, 0xd9, 0x55, 0x60, 0x56, 0x95, 0x56, + 0x13, 0x56, 0x8c, 0x57, 0x4f, 0x57, 0x5f, 0x58, 0xaf, 0x56, 0xa3, 0x56, 0x43, 0x55, 0x26, 0x54, + 0x65, 0x52, 0xc7, 0x54, 0x9c, 0x54, 0x92, 0x56, 0xfa, 0x56, 0xa5, 0x56, 0xc8, 0x56, 0x20, 0x57, + 0x4d, 0x56, 0x37, 0x55, 0xfa, 0x55, 0x8c, 0x56, 0x53, 0x55, 0x6a, 0x53, 0x69, 0x56, 0x69, 0x56, + 0x4e, 0x56, 0x02, 0x58, 0x90, 0x58, 0x9a, 0x58, 0x26, 0x59, 0xe9, 0x59, 0x1f, 0x5a, 0x3c, 0x58, + 0x03, 0x59, 0x28, 0x59, 0x1b, 0x59, 0x0d, 0x56, 0x49, 0x57, 0x4d, 0x57, 0xfc, 0x55, 0xa8, 0x57, + 0x2b, 0x59, 0x75, 0x58, 0xdb, 0x59, 0xc8, 0x59, 0xc7, 0x59, 0x92, 0x58, 0x11, 0x59, 0x1d, 0x59, + 0x45, 0x59, 0x01, 0x57, 0x5a, 0x56, 0x9a, 0x58, 0x21, 0x58, 0xe2, 0x57, 0x75, 0x58, 0xeb, 0x58, + 0x38, 0x59, 0x31, 0x5a, 0xce, 0x59, 0x40, 0x59, 0x1b, 0x59, 0x7a, 0x59, 0x2f, 0x59, 0xcb, 0x56, + 0x1b, 0x57, 0xeb, 0x57, 0x54, 0x58, 0x3f, 0x58, 0xf4, 0x58, 0x9f, 0x59, 0x7e, 0x59, 0x38, 0x5a, + 0xc1, 0x59, 0x03, 0x59, 0xab, 0x58, 0x15, 0x59, 0x43, 0x58, 0x19, 0x56, 0x98, 0x57, 0x71, 0x59, + 0x81, 0x58, 0xcc, 0x58, 0xb4, 0x58, 0x37, 0x5a, 0xa5, 0x5a, 0x7c, 0x5a, 0xbf, 0x5a, 0xef, 0x58, + 0x8f, 0x58, 0x5f, 0x58, 0x91, 0x58, 0x97, 0x56, 0x33, 0x56, 0x25, 0x59, 0xbe, 0x58, 0x33, 0x58, + 0xbd, 0x58, 0xce, 0x59, 0x08, 0x5a, 0xaf, 0x59, 0x7c, 0x59, 0xad, 0x57, 0x2b, 0x58, 0x52, 0x58, + 0xf9, 0x58, 0x78, 0x57, 0x73, 0x57, 0xde, 0x58, 0x14, 0x58, 0x45, 0x58, 0xf8, 0x58, 0x5a, 0x59, + 0x18, 0x5a, 0x15, 0x59, 0xd9, 0x58, 0x5b, 0x58, 0x46, 0x57, 0x36, 0x58, 0xdb, 0x58, 0xe5, 0x57, + 0xf1, 0x56, 0x15, 0x59, 0x07, 0x59, 0xce, 0x58, 0x40, 0x59, 0x7d, 0x59, 0x17, 0x59, 0x47, 0x59, + 0xad, 0x58, 0x22, 0x58, 0x8a, 0x58, 0x90, 0x58, 0x5b, 0x59, 0xa4, 0x57, 0x7d, 0x56, 0xd7, 0x59, + 0xe8, 0x58, 0xd8, 0x58, 0x15, 0x59, 0xad, 0x59, 0x57, 0x59, 0x9f, 0x59, 0x68, 0x59, 0xc4, 0x58, + 0xe8, 0x58, 0xb6, 0x58, 0x5b, 0x59, 0x67, 0x58, 0x16, 0x57, 0x05, 0x59, 0x50, 0x59, 0x08, 0x5a, + 0xe2, 0x58, 0x9f, 0x58, 0x01, 0x59, 0x73, 0x59, 0x43, 0x59, 0xb7, 0x58, 0x66, 0x59, 0xe0, 0x59, + 0x61, 0x59, 0xf4, 0x57, 0x50, 0x55, 0x6c, 0x58, 0x7c, 0x58, 0x07, 0x59, 0x79, 0x58, 0x75, 0x58, + 0xd3, 0x58, 0xf2, 0x58, 0x6c, 0x59, 0x63, 0x59, 0xb5, 0x59, 0x13, 0x5a, 0x5e, 0x59, 0x89, 0x55, + 0x47, 0x55, 0x65, 0x57, 0x01, 0x58, 0x6b, 0x58, 0x93, 0x57, 0xd7, 0x57, 0xe1, 0x58, 0x8c, 0x59, + 0x3b, 0x59, 0x06, 0x5a, 0xb7, 0x59, 0x80, 0x59, 0x7e, 0x58, 0xc5, 0x56, 0xa9, 0x54, 0x4a, 0x54, + 0x6d, 0x55, 0x4b, 0x56, 0xe8, 0x56, 0xc9, 0x55, 0xaa, 0x54, 0x84, 0x57, 0x5b, 0x58, 0x62, 0x58, + 0x54, 0x57, 0xf1, 0x55, 0x45, 0x55, 0x86, 0x52, 0x6e, 0x53, 0xc5, 0x54, 0x76, 0x54, 0x9b, 0x54, + 0xe6, 0x55, 0xe2, 0x55, 0xa2, 0x56, 0x64, 0x56, 0x75, 0x55, 0x90, 0x54, 0x36, 0x56, 0x5d, 0x55, + 0x2d, 0x56, 0xda, 0x52, 0x70, 0x55, 0x72, 0x57, 0x96, 0x56, 0x3a, 0x58, 0xaf, 0x58, 0xa4, 0x58, + 0x80, 0x59, 0x13, 0x59, 0x35, 0x59, 0xa0, 0x57, 0xa5, 0x58, 0xbb, 0x58, 0xfb, 0x58, 0xde, 0x54, + 0xc6, 0x56, 0xb9, 0x56, 0xba, 0x56, 0x34, 0x58, 0x3d, 0x58, 0x8a, 0x58, 0xaa, 0x59, 0x81, 0x59, + 0x4f, 0x59, 0x7a, 0x58, 0xb9, 0x58, 0x26, 0x59, 0xec, 0x58, 0x8a, 0x56, 0x4e, 0x56, 0xc9, 0x57, + 0x32, 0x56, 0x64, 0x58, 0xb7, 0x58, 0x5c, 0x58, 0xe0, 0x59, 0x3a, 0x59, 0x86, 0x59, 0x10, 0x59, + 0xd0, 0x58, 0x1d, 0x59, 0x98, 0x58, 0xe6, 0x56, 0xab, 0x55, 0xb4, 0x57, 0xb4, 0x57, 0x45, 0x58, + 0x7b, 0x58, 0x10, 0x59, 0xf8, 0x58, 0xfa, 0x59, 0x66, 0x59, 0xe2, 0x58, 0xe3, 0x58, 0x27, 0x58, + 0x3c, 0x58, 0xd6, 0x54, 0x8d, 0x56, 0x73, 0x58, 0x27, 0x58, 0x56, 0x58, 0x2a, 0x58, 0xe0, 0x58, + 0xfe, 0x59, 0x16, 0x5a, 0x4c, 0x5a, 0xe1, 0x58, 0x13, 0x58, 0xe7, 0x57, 0x9b, 0x58, 0x38, 0x56, + 0xff, 0x56, 0xfd, 0x58, 0x3c, 0x58, 0x88, 0x58, 0xdc, 0x58, 0x29, 0x59, 0x74, 0x5a, 0xa0, 0x59, + 0x94, 0x58, 0x12, 0x58, 0x4e, 0x57, 0x16, 0x58, 0xbb, 0x58, 0xc8, 0x55, 0xb5, 0x56, 0xb1, 0x58, + 0x4e, 0x58, 0x96, 0x58, 0xd2, 0x58, 0x28, 0x59, 0xf8, 0x58, 0xfe, 0x58, 0x5d, 0x58, 0x3b, 0x57, + 0x40, 0x57, 0x41, 0x58, 0xc0, 0x58, 0xe7, 0x56, 0xdf, 0x56, 0xea, 0x58, 0x8c, 0x58, 0x13, 0x58, + 0xa9, 0x58, 0xee, 0x58, 0x1e, 0x59, 0x00, 0x59, 0x3a, 0x58, 0xf4, 0x57, 0x1c, 0x58, 0x54, 0x58, + 0xfe, 0x58, 0x6e, 0x56, 0x87, 0x56, 0x74, 0x59, 0x2e, 0x59, 0x09, 0x59, 0x46, 0x59, 0x80, 0x59, + 0x02, 0x59, 0x2f, 0x59, 0xc7, 0x58, 0x5b, 0x58, 0x64, 0x58, 0x94, 0x58, 0x66, 0x59, 0xc7, 0x57, + 0xaa, 0x56, 0x3f, 0x59, 0x40, 0x59, 0x44, 0x59, 0x3c, 0x58, 0xfb, 0x58, 0x90, 0x58, 0x7f, 0x59, + 0x4b, 0x58, 0x9d, 0x58, 0x2d, 0x59, 0x9e, 0x59, 0x9e, 0x59, 0xbb, 0x57, 0x89, 0x55, 0x76, 0x58, + 0x65, 0x58, 0x7d, 0x58, 0x83, 0x57, 0x30, 0x58, 0xad, 0x58, 0xf0, 0x58, 0x30, 0x59, 0x18, 0x59, + 0x83, 0x59, 0x0a, 0x5a, 0x10, 0x59, 0x70, 0x56, 0x4c, 0x55, 0x06, 0x57, 0x28, 0x58, 0xb4, 0x57, + 0x7a, 0x57, 0x92, 0x57, 0x50, 0x58, 0xee, 0x58, 0x80, 0x59, 0xa5, 0x59, 0x5d, 0x59, 0xd3, 0x58, + 0x6e, 0x57, 0x39, 0x55, 0x12, 0x54, 0xce, 0x54, 0x53, 0x55, 0xbc, 0x55, 0x8c, 0x55, 0xc7, 0x55, + 0x1e, 0x56, 0xfe, 0x56, 0xb6, 0x57, 0x45, 0x58, 0x84, 0x56, 0x30, 0x56, 0x08, 0x55, 0xea, 0x52, + 0xb5, 0x50, 0x39, 0x53, 0x10, 0x53, 0x00, 0x54, 0x60, 0x54, 0xb8, 0x54, 0x26, 0x55, 0xec, 0x55, + 0xf6, 0x54, 0xd4, 0x54, 0x7a, 0x54, 0xd2, 0x54, 0xa0, 0x55, 0xd9, 0x50, 0xe2, 0x54, 0x44, 0x56, + 0x9e, 0x55, 0xc4, 0x55, 0x46, 0x57, 0x8a, 0x56, 0x8a, 0x58, 0x76, 0x57, 0x80, 0x58, 0x0e, 0x56, + 0xe2, 0x56, 0x71, 0x58, 0xdc, 0x57, 0xb4, 0x54, 0x06, 0x55, 0x64, 0x57, 0xfa, 0x56, 0xf2, 0x56, + 0x76, 0x58, 0x64, 0x58, 0x8a, 0x58, 0x5f, 0x58, 0xc2, 0x58, 0xf2, 0x57, 0x9c, 0x58, 0x86, 0x58, + 0x80, 0x58, 0xf6, 0x56, 0x1c, 0x56, 0x6c, 0x56, 0xe4, 0x54, 0x18, 0x57, 0x44, 0x58, 0x42, 0x57, + 0xda, 0x58, 0x27, 0x59, 0x76, 0x59, 0x4d, 0x58, 0x48, 0x58, 0x3e, 0x58, 0x13, 0x58, 0x5c, 0x54, + 0x1c, 0x54, 0x1c, 0x56, 0x6c, 0x56, 0x02, 0x57, 0xc6, 0x57, 0x3d, 0x58, 0xca, 0x58, 0xa6, 0x59, + 0x13, 0x59, 0x2f, 0x58, 0x22, 0x58, 0x07, 0x58, 0xba, 0x56, 0xa4, 0x54, 0x46, 0x55, 0xe6, 0x56, + 0x84, 0x56, 0x32, 0x56, 0x3a, 0x57, 0xac, 0x57, 0x8a, 0x59, 0xe4, 0x59, 0xe6, 0x58, 0x7a, 0x57, + 0x28, 0x58, 0x3a, 0x57, 0x4e, 0x57, 0x68, 0x54, 0xc4, 0x55, 0x3e, 0x58, 0x36, 0x56, 0x78, 0x57, + 0x19, 0x58, 0x55, 0x58, 0x0d, 0x59, 0x37, 0x58, 0xb0, 0x57, 0x06, 0x57, 0xd4, 0x57, 0x32, 0x58, + 0xdc, 0x57, 0x2e, 0x55, 0x9c, 0x56, 0x09, 0x58, 0x60, 0x56, 0x46, 0x57, 0x3a, 0x58, 0xb2, 0x58, + 0x98, 0x58, 0xa9, 0x58, 0x90, 0x57, 0xfa, 0x55, 0xfc, 0x55, 0xd2, 0x57, 0x44, 0x58, 0xc4, 0x55, + 0x08, 0x56, 0x5f, 0x58, 0x46, 0x57, 0x5a, 0x57, 0x64, 0x57, 0x75, 0x58, 0x6d, 0x58, 0x95, 0x58, + 0x1e, 0x57, 0x44, 0x56, 0x26, 0x57, 0x7a, 0x56, 0x98, 0x57, 0xec, 0x54, 0x86, 0x55, 0x2d, 0x58, + 0x7a, 0x58, 0x4a, 0x59, 0x6f, 0x58, 0x44, 0x58, 0x49, 0x58, 0xb0, 0x58, 0x57, 0x58, 0xdc, 0x56, + 0x90, 0x57, 0x84, 0x58, 0xf4, 0x58, 0xea, 0x55, 0x34, 0x54, 0x85, 0x58, 0x68, 0x58, 0x8f, 0x58, + 0x14, 0x56, 0x35, 0x58, 0x64, 0x58, 0x3d, 0x58, 0x03, 0x58, 0x27, 0x58, 0xd5, 0x58, 0xf8, 0x58, + 0xb6, 0x58, 0x16, 0x57, 0x12, 0x55, 0x3e, 0x57, 0x20, 0x57, 0x58, 0x57, 0xbe, 0x56, 0xa6, 0x57, + 0xac, 0x57, 0xcc, 0x58, 0xb7, 0x58, 0xf2, 0x58, 0xe2, 0x58, 0x0a, 0x59, 0xac, 0x58, 0xbc, 0x55, + 0xe0, 0x54, 0xe4, 0x56, 0xe6, 0x56, 0x36, 0x56, 0x40, 0x56, 0x4e, 0x56, 0x92, 0x56, 0x1d, 0x58, + 0x70, 0x58, 0x06, 0x59, 0x59, 0x58, 0x05, 0x58, 0x7e, 0x57, 0x36, 0x54, 0xa0, 0x52, 0x77, 0x53, + 0x19, 0x53, 0xd9, 0x53, 0x22, 0x54, 0xb2, 0x54, 0x5e, 0x55, 0x02, 0x56, 0xf2, 0x55, 0x7a, 0x56, + 0x92, 0x55, 0x7e, 0x55, 0x57, 0x52, 0x32, 0x52, 0xb3, 0x53, 0x2e, 0x54, 0x09, 0x55, 0x6e, 0x55, + 0x19, 0x56, 0x2e, 0x56, 0xc9, 0x56, 0xca, 0x56, 0x9f, 0x56, 0x4e, 0x55, 0x69, 0x56, 0xb5, 0x56, + 0xf7, 0x55, 0x57, 0x53, 0x39, 0x56, 0xc3, 0x57, 0x1e, 0x57, 0x96, 0x57, 0xa5, 0x58, 0x64, 0x58, + 0x51, 0x59, 0xe5, 0x58, 0x4e, 0x59, 0x57, 0x58, 0x36, 0x59, 0x92, 0x58, 0xb4, 0x59, 0x47, 0x56, + 0x43, 0x56, 0x1b, 0x58, 0xcb, 0x56, 0x68, 0x58, 0xf6, 0x58, 0xfb, 0x58, 0x38, 0x5a, 0xb8, 0x59, + 0x58, 0x59, 0x5a, 0x58, 0x14, 0x59, 0x24, 0x59, 0x02, 0x59, 0xe8, 0x56, 0x8a, 0x56, 0x1e, 0x58, + 0xd4, 0x56, 0x35, 0x58, 0xc7, 0x58, 0x8c, 0x58, 0x79, 0x59, 0x86, 0x5a, 0x71, 0x5a, 0xe7, 0x58, + 0xfa, 0x58, 0xe6, 0x58, 0x7c, 0x58, 0xc7, 0x55, 0x52, 0x56, 0x3f, 0x58, 0x14, 0x58, 0x97, 0x58, + 0xec, 0x58, 0xcf, 0x58, 0x0d, 0x5a, 0xfc, 0x59, 0x27, 0x5a, 0xf6, 0x58, 0xde, 0x58, 0xa0, 0x58, + 0xbc, 0x57, 0x4c, 0x56, 0x7a, 0x56, 0xf7, 0x58, 0x58, 0x58, 0xb6, 0x58, 0x8b, 0x58, 0x20, 0x59, + 0x48, 0x5a, 0xed, 0x5a, 0x0e, 0x5a, 0xd4, 0x58, 0xe7, 0x58, 0x4e, 0x58, 0xae, 0x58, 0x9a, 0x55, + 0x36, 0x57, 0xe4, 0x58, 0x08, 0x58, 0x82, 0x58, 0x33, 0x59, 0x22, 0x59, 0xcc, 0x59, 0x86, 0x59, + 0x82, 0x58, 0x5c, 0x57, 0xfc, 0x57, 0x2c, 0x58, 0x0e, 0x59, 0xa7, 0x56, 0xf6, 0x56, 0x11, 0x59, + 0xeb, 0x57, 0x8a, 0x58, 0x17, 0x59, 0x4e, 0x59, 0x62, 0x59, 0x50, 0x59, 0xdd, 0x58, 0xf4, 0x57, + 0x10, 0x57, 0x47, 0x58, 0xe2, 0x58, 0x73, 0x57, 0x26, 0x56, 0x12, 0x59, 0xa4, 0x58, 0x17, 0x59, + 0xc8, 0x58, 0x52, 0x59, 0x44, 0x59, 0x33, 0x59, 0x37, 0x58, 0x29, 0x58, 0x24, 0x58, 0x52, 0x58, + 0xd4, 0x58, 0xb9, 0x56, 0x75, 0x56, 0x18, 0x59, 0x1a, 0x59, 0xba, 0x59, 0x58, 0x59, 0x62, 0x59, + 0xf7, 0x58, 0xfb, 0x58, 0xcc, 0x58, 0x73, 0x58, 0x9c, 0x58, 0xba, 0x58, 0xe4, 0x59, 0x55, 0x58, + 0x2b, 0x56, 0x1c, 0x59, 0x57, 0x59, 0xc0, 0x58, 0x43, 0x58, 0xde, 0x58, 0x04, 0x59, 0xa0, 0x59, + 0xa0, 0x58, 0xa0, 0x58, 0x85, 0x59, 0x9a, 0x59, 0xdc, 0x59, 0x39, 0x57, 0x33, 0x56, 0xa2, 0x58, + 0x27, 0x58, 0x72, 0x58, 0xaf, 0x57, 0x80, 0x58, 0xab, 0x58, 0x9c, 0x59, 0x50, 0x59, 0xbe, 0x59, + 0x91, 0x59, 0x0a, 0x5a, 0x4b, 0x59, 0x2a, 0x57, 0xc8, 0x55, 0x20, 0x58, 0x8a, 0x57, 0x08, 0x58, + 0xf6, 0x57, 0x09, 0x58, 0x3e, 0x58, 0x44, 0x59, 0x99, 0x59, 0xd1, 0x59, 0x45, 0x59, 0xe3, 0x58, + 0x93, 0x58, 0xf0, 0x55, 0x1c, 0x53, 0xec, 0x54, 0xc4, 0x54, 0xfe, 0x54, 0x79, 0x55, 0x0e, 0x55, + 0x3c, 0x56, 0x20, 0x57, 0x96, 0x57, 0x3c, 0x58, 0x42, 0x56, 0x10, 0x56, 0xc2, 0x54, 0x2b, 0x54}; \ No newline at end of file diff --git a/tests/unit_test/valid_data/dwconv2d.dat b/tests/unit_test/valid_data/dwconv2d.dat new file mode 100644 index 00000000..b48355a0 --- /dev/null +++ b/tests/unit_test/valid_data/dwconv2d.dat @@ -0,0 +1,258 @@ +/************************************************************************** + * NCHW layout + **************************************************************************/ +// [c h w]: [2 4 10] +// [c h w]: [2 4 10] +// pad_left = pad_right = pad_top = pad_down = 1 +unsigned char dwconv3x3s1_fp32_in[] = { 0x3f, 0xcd, 0x18, 0x3e, 0xbc, 0xd2, 0xad, 0xbf, 0x61, 0xf0, 0x4c, 0x3f, + 0x26, 0x33, 0x68, 0x40, 0x0f, 0x11, 0x77, 0xc0, 0x68, 0x12, 0x07, 0x40, + 0xe2, 0xe2, 0x34, 0x40, 0xa6, 0x3d, 0x3f, 0x40, 0xb7, 0x6b, 0xce, 0x40, + 0x17, 0xf9, 0x0d, 0xbf, 0x04, 0xb4, 0xe4, 0x3f, 0x99, 0xef, 0x7e, 0xc0, + 0xff, 0x65, 0xfd, 0x3f, 0x01, 0x38, 0xd2, 0xbf, 0x13, 0xa5, 0x21, 0x3f, + 0x8a, 0xb1, 0x98, 0x40, 0x25, 0xe9, 0x2b, 0xbf, 0x19, 0xe2, 0x8c, 0xbd, + 0xe9, 0xcf, 0x09, 0xc0, 0xa6, 0x8c, 0x80, 0xc0, 0xc2, 0xa0, 0x96, 0xbe, + 0x7e, 0x73, 0x72, 0x3f, 0xf6, 0xcc, 0x31, 0xbf, 0xd7, 0x65, 0x4b, 0xbe, + 0xf1, 0x28, 0x22, 0xc0, 0x24, 0x35, 0x18, 0xc0, 0x1d, 0xf3, 0x54, 0x40, + 0xbb, 0x19, 0xc8, 0xbe, 0xba, 0xe4, 0x41, 0x40, 0x03, 0x63, 0x85, 0x3f, + 0x52, 0x7a, 0xe9, 0x3f, 0x5b, 0x68, 0xcf, 0x3f, 0xe8, 0x11, 0x31, 0x3f, + 0x28, 0xd5, 0x19, 0x40, 0xb8, 0x59, 0xa2, 0x3f, 0xde, 0x21, 0xf9, 0x3f, + 0x6c, 0x30, 0xa1, 0xbf, 0xeb, 0xbf, 0xb2, 0xbd, 0xfa, 0xc8, 0x35, 0x40, + 0x60, 0x2a, 0xad, 0x3f, 0xcc, 0x01, 0x3c, 0x40, 0x57, 0x87, 0x0d, 0x3f, + 0x05, 0x54, 0xa4, 0xbf, 0xdb, 0x0f, 0x10, 0xc0, 0x0a, 0x5f, 0x13, 0xbf, + 0x2e, 0x1c, 0xa7, 0x3f, 0x58, 0x76, 0x77, 0xc0, 0x62, 0x7a, 0xa2, 0x3f, + 0xe1, 0x47, 0x50, 0x40, 0x7f, 0xb1, 0x56, 0x3f, 0xba, 0x03, 0x0f, 0xbf, + 0x92, 0x68, 0x09, 0x40, 0x5a, 0xc6, 0x01, 0x40, 0x2a, 0x8d, 0x70, 0xbf, + 0xab, 0xda, 0x1f, 0x3f, 0x0a, 0x9f, 0x87, 0x3f, 0x88, 0xcf, 0x76, 0x3f, + 0x3b, 0x23, 0x81, 0x3f, 0x39, 0x6b, 0x02, 0xbf, 0x7f, 0xad, 0x8e, 0x3f, + 0xc3, 0x17, 0xc1, 0xbf, 0x39, 0xac, 0x9c, 0x3f, 0x45, 0x7b, 0x8d, 0xbe, + 0x9a, 0x7e, 0x40, 0x3e, 0xe3, 0xa8, 0x4b, 0x3f, 0xbf, 0x03, 0xfc, 0xbd, + 0xc2, 0xd4, 0x33, 0xbf, 0x5c, 0x9a, 0x2a, 0xbf, 0xb8, 0x7d, 0xea, 0xbd, + 0x3f, 0x15, 0x21, 0xc0, 0x94, 0xc5, 0x3a, 0x3e, 0x3d, 0x02, 0x4e, 0x3e, + 0xb4, 0x1d, 0xa5, 0xbf, 0x3a, 0xbf, 0x1b, 0xc0, 0x45, 0xf9, 0xde, 0xbf, + 0xd2, 0x85, 0x61, 0xbf, 0x8b, 0x39, 0x43, 0xc0, 0xd2, 0xd6, 0x10, 0xbf, + 0x94, 0x30, 0x6c, 0xbf, 0x25, 0xe9, 0x9f, 0x3f}; +unsigned char dwconv3x3s1_fp32_ker[] = { 0x35, 0x33, 0x6f, 0xc0, 0xb2, 0x8a, 0x67, 0xc0, 0x9c, 0x46, 0x06, 0x3e, + 0xac, 0xab, 0x18, 0xbe, 0x49, 0x5e, 0x5d, 0x3f, 0x55, 0x5e, 0xe7, 0xbf, + 0x30, 0x14, 0xbb, 0x3f, 0xfa, 0x1c, 0xf5, 0x3f, 0x1d, 0x09, 0xa6, 0x3f, + 0xed, 0x11, 0xb1, 0x3f, 0xbb, 0x18, 0xe1, 0xbe, 0xb2, 0x16, 0x92, 0x3e, + 0xc1, 0xc6, 0x92, 0xbe, 0xa5, 0x4c, 0x16, 0x3e, 0x36, 0xca, 0x96, 0x40, + 0x01, 0x56, 0x05, 0xc0, 0x24, 0xbb, 0x1e, 0xbf, 0x99, 0xe1, 0xa7, 0x40}; +unsigned char dwconv3x3s1_fp32_bias[] = { 0x1d, 0x98, 0x59, 0xc0, 0xcb, 0x24, 0xe5, 0x40}; +unsigned char dwconv3x3s1_fp32_out[] = { 0x87, 0xf3, 0x23, 0xc0, 0x7b, 0xdf, 0x07, 0xc1, 0xd5, 0x97, 0x53, 0xc1, + 0x35, 0x3b, 0xe5, 0x40, 0x5c, 0x0e, 0xc3, 0xc0, 0x02, 0x31, 0x45, 0x40, + 0x1c, 0x56, 0x89, 0xbf, 0x15, 0x6e, 0x86, 0xc1, 0x65, 0x65, 0xd6, 0xc0, + 0x9f, 0xe5, 0x7a, 0xc1, 0x7e, 0x5c, 0xa9, 0x40, 0x1f, 0xe1, 0xb7, 0xc0, + 0xee, 0x6e, 0x8a, 0x40, 0xd1, 0x80, 0xdc, 0xc1, 0x29, 0x3a, 0x96, 0xc1, + 0x92, 0x2d, 0xa2, 0x40, 0xd6, 0x33, 0x9f, 0xc1, 0x1f, 0x36, 0x3f, 0xc1, + 0xa4, 0xe4, 0xcf, 0xc1, 0x47, 0xdb, 0xb1, 0xc1, 0x81, 0x40, 0xd8, 0xc0, + 0x38, 0x09, 0x56, 0x41, 0x32, 0x9d, 0x28, 0x41, 0x8d, 0xe1, 0xdf, 0x40, + 0x60, 0xf0, 0x3a, 0x41, 0xa7, 0xda, 0xd6, 0xc1, 0xb9, 0xe2, 0x68, 0xc1, + 0x07, 0xd3, 0xb1, 0xc0, 0xd5, 0xb4, 0x3f, 0x41, 0xfa, 0xfb, 0xd2, 0x41, + 0x5f, 0x08, 0x64, 0xc0, 0xeb, 0x0a, 0xbe, 0xc0, 0xea, 0x0e, 0x07, 0xc1, + 0xa6, 0x2a, 0x3c, 0xbf, 0xa8, 0xb9, 0x5a, 0x40, 0x70, 0x0b, 0x97, 0x41, + 0x97, 0x4a, 0xfa, 0xc0, 0x5c, 0x5f, 0x98, 0xc1, 0x5a, 0xd4, 0x4b, 0xc1, + 0x48, 0xf9, 0x8d, 0xc1, 0x39, 0x75, 0xae, 0x41, 0x60, 0x22, 0x2d, 0x41, + 0x1a, 0x40, 0x67, 0xc1, 0x2e, 0xd5, 0x83, 0x40, 0x6f, 0x05, 0xa8, 0x41, + 0xd8, 0x48, 0xf3, 0xc0, 0x87, 0x02, 0x6b, 0x41, 0x0e, 0xdf, 0x93, 0x41, + 0xa4, 0x9c, 0x74, 0x41, 0x6b, 0x13, 0xd7, 0x40, 0xf9, 0x59, 0xbb, 0x41, + 0x2b, 0xa3, 0xac, 0x41, 0x37, 0xd0, 0xda, 0x3f, 0xf6, 0x0e, 0x51, 0x41, + 0x84, 0xee, 0x07, 0x41, 0xfe, 0x2c, 0x7c, 0x40, 0x11, 0x10, 0x4d, 0x41, + 0xd8, 0xf0, 0x6d, 0x3f, 0x68, 0x61, 0x5e, 0x3f, 0xf2, 0x71, 0x56, 0x41, + 0x42, 0x1f, 0x68, 0x41, 0x8a, 0x48, 0xf8, 0xbf, 0x28, 0xee, 0x3a, 0xc0, + 0x9c, 0x85, 0x17, 0x41, 0x4f, 0xcd, 0xdc, 0x40, 0x7e, 0x9b, 0xf1, 0xc0, + 0x94, 0x4e, 0xc1, 0x40, 0xcd, 0x5c, 0x15, 0x41, 0x57, 0xf5, 0xb6, 0x40, + 0x17, 0xdc, 0xd8, 0x40, 0x19, 0x5e, 0x12, 0x41, 0xfa, 0x7b, 0xd2, 0xbf, + 0xb6, 0xda, 0x2b, 0xc0, 0x21, 0xf5, 0xa2, 0xbf, 0x5c, 0xe8, 0x54, 0x40, + 0x80, 0x72, 0xbc, 0xc0, 0x57, 0xef, 0x87, 0x40, 0xd0, 0x29, 0x39, 0x40, + 0x89, 0xc1, 0x37, 0x41, 0x74, 0xe8, 0x08, 0x41}; +unsigned char dwconv3x3s1_fp16_in[] = { 0xc6, 0x30, 0x6e, 0xbd, 0x67, 0x3a, 0x41, 0x43, 0xb8, 0xc3, 0x38, 0x40, + 0xa7, 0x41, 0xf9, 0x41, 0x73, 0x46, 0x6f, 0xb8, 0x25, 0x3f, 0xf7, 0xc3, + 0xeb, 0x3f, 0x91, 0xbe, 0x0d, 0x39, 0xc5, 0x44, 0x5f, 0xb9, 0x67, 0xac, + 0x4e, 0xc0, 0x04, 0xc4, 0xb5, 0xb4, 0x93, 0x3b, 0x8e, 0xb9, 0x5b, 0xb2, + 0x11, 0xc1, 0xc1, 0xc0, 0xa7, 0x42, 0x40, 0xb6, 0x0f, 0x42, 0x2b, 0x3c, + 0x4b, 0x3f, 0x7b, 0x3e, 0x88, 0x39, 0xce, 0x40, 0x12, 0x3d, 0xc9, 0x3f, + 0x09, 0xbd, 0x95, 0xad, 0xae, 0x41, 0x69, 0x3d, 0xe0, 0x41, 0x6c, 0x38, + 0x22, 0xbd, 0x80, 0xc0, 0x9a, 0xb8, 0x38, 0x3d, 0xbb, 0xc3, 0x13, 0x3d, + 0x82, 0x42, 0xb5, 0x3a, 0x78, 0xb8, 0x4b, 0x40, 0x0e, 0x40, 0x84, 0xbb, + 0xfe, 0x38, 0x3c, 0x3c, 0xb6, 0x3b, 0x09, 0x3c, 0x13, 0xb8, 0x75, 0x3c, + 0x08, 0xbe, 0xe5, 0x3c, 0x6b, 0xb4, 0x03, 0x32, 0x5d, 0x3a, 0xe0, 0xaf, + 0x9e, 0xb9, 0x54, 0xb9, 0x53, 0xaf, 0x08, 0xc1, 0xd6, 0x31, 0x70, 0x32, + 0x28, 0xbd, 0xdd, 0xc0, 0xf7, 0xbe, 0x0c, 0xbb, 0x19, 0xc2, 0x86, 0xb8, + 0x61, 0xbb, 0xff, 0x3c}; +unsigned char dwconv3x3s1_fp16_ker[] = { 0x79, 0xc3, 0x3c, 0xc3, 0x32, 0x30, 0xc5, 0xb0, 0xea, 0x3a, 0x3a, 0xbf, + 0xd8, 0x3d, 0xa8, 0x3f, 0x30, 0x3d, 0x88, 0x3d, 0x08, 0xb7, 0x90, 0x34, + 0x96, 0xb4, 0xb2, 0x30, 0xb6, 0x44, 0x2a, 0xc0, 0xf5, 0xb8, 0x3f, 0x45}; +unsigned char dwconv3x3s1_fp16_bias[] = { 0xcc, 0xc2, 0x29, 0x47}; +unsigned char dwconv3x3s1_fp16_out[] = { 0x20, 0xc1, 0x3e, 0xc8, 0x9c, 0xca, 0x2a, 0x47, 0x17, 0xc6, 0x26, 0x42, + 0x48, 0xbc, 0x33, 0xcc, 0xb2, 0xc6, 0xd6, 0xcb, 0x49, 0x45, 0xbf, 0xc5, + 0x52, 0x44, 0xe2, 0xce, 0xb1, 0xcc, 0x13, 0x45, 0xf8, 0xcc, 0xfb, 0xc9, + 0x7e, 0xce, 0x8e, 0xcd, 0xc2, 0xc6, 0xaf, 0x4a, 0x44, 0x49, 0xfb, 0x46, + 0xd6, 0x49, 0xb6, 0xce, 0x46, 0xcb, 0x8d, 0xc5, 0xfd, 0x49, 0x96, 0x4e, + 0x1f, 0xc3, 0xf0, 0xc5, 0x38, 0xc8, 0xe0, 0xb9, 0xd4, 0x42, 0xb8, 0x4c, + 0xd2, 0xc7, 0xc2, 0xcc, 0x5f, 0xca, 0x70, 0xcc, 0x73, 0x4d, 0x69, 0x49, + 0x39, 0xcb, 0x1f, 0x44, 0x3f, 0x4d, 0x98, 0xc7, 0x58, 0x4b, 0x9f, 0x4c, + 0xa4, 0x4b, 0xb9, 0x46, 0xdb, 0x4d, 0x63, 0x4d, 0xdf, 0x3e, 0x88, 0x4a, + 0x3e, 0x48, 0xe2, 0x43, 0x67, 0x4a, 0x74, 0x3b, 0xf8, 0x3a, 0xb3, 0x4a, + 0x40, 0x4b, 0xb9, 0xbf, 0xd7, 0xc1, 0xbb, 0x48, 0xe5, 0x46, 0x8d, 0xc7, + 0x0d, 0x46, 0xaa, 0x48, 0xb9, 0x45, 0xc6, 0x46, 0x92, 0x48, 0x8a, 0xbe, + 0x55, 0xc1, 0x10, 0xbd, 0xa9, 0x42, 0xe3, 0xc5, 0x3f, 0x44, 0xcb, 0x41, + 0xbe, 0x49, 0x47, 0x48}; +unsigned char dwconv3x3s1_int8_in[] = {}; +unsigned char dwconv3x3s1_int8_ker[] = {}; +unsigned char dwconv3x3s1_int8_bias[] = {}; +unsigned char dwconv3x3s1_int8_out[] = {}; + + +// [c h w]: [2 6 18] +// [c h w]: [2 3 9] +// pad_left = pad_right = pad_top = pad_down = 1 +unsigned char dwconv3x3s2_fp32_in[] = { 0x85, 0xce, 0x23, 0x3e, 0xeb, 0x3c, 0x3f, 0x3f, 0x2e, 0xff, 0x8b, 0xbe, + 0x52, 0x48, 0x7a, 0xbe, 0x3b, 0xb0, 0x35, 0xc0, 0xcd, 0x17, 0x04, 0xc0, + 0xee, 0x37, 0x81, 0xbf, 0xcc, 0x2a, 0x89, 0xbf, 0x59, 0x6d, 0x4f, 0x3f, + 0xd4, 0xb5, 0x09, 0xc0, 0xf5, 0x8f, 0xf8, 0x3d, 0x01, 0xd3, 0xb1, 0xbe, + 0x6e, 0xba, 0xc4, 0xbf, 0xaf, 0xa3, 0xde, 0xbf, 0x05, 0x1e, 0xb9, 0x3e, + 0xb2, 0xb3, 0x84, 0xbf, 0x64, 0x2f, 0xbf, 0x3e, 0xe4, 0xd4, 0x82, 0x3f, + 0x5b, 0x90, 0x9a, 0xbf, 0xa8, 0x65, 0xc0, 0xbf, 0x73, 0x67, 0xd0, 0xbf, + 0xca, 0x59, 0xbc, 0xbe, 0x19, 0x77, 0xf5, 0xbf, 0xcf, 0xe3, 0x09, 0xbf, + 0x91, 0x8f, 0xe5, 0x3e, 0xac, 0x46, 0x42, 0xc0, 0x78, 0x5c, 0xea, 0xbf, + 0x2e, 0x4a, 0xe9, 0x3c, 0x0f, 0x5c, 0x6b, 0x3f, 0xf9, 0x18, 0x3e, 0xbe, + 0x6a, 0x5a, 0x66, 0xbf, 0x1b, 0xd6, 0x02, 0x3e, 0x0e, 0xd8, 0x80, 0xbf, + 0xe9, 0xd3, 0x98, 0xbf, 0x8d, 0xbc, 0x2e, 0xbe, 0xbd, 0x27, 0x20, 0xbf, + 0xf0, 0xbe, 0x57, 0xbf, 0x0a, 0xee, 0x12, 0xbf, 0x56, 0xea, 0x26, 0xc0, + 0x06, 0x4a, 0xfb, 0xbf, 0x8c, 0xaa, 0x18, 0xbe, 0xad, 0x40, 0x19, 0xc0, + 0xaa, 0x64, 0x9c, 0xbf, 0x12, 0xb3, 0xb3, 0xbf, 0xe6, 0x8b, 0x25, 0x3e, + 0x09, 0xe9, 0x71, 0xbf, 0xb5, 0xe4, 0x3c, 0xbf, 0x65, 0xab, 0x82, 0x3d, + 0x04, 0x7a, 0x46, 0x3e, 0x91, 0x6e, 0xfe, 0xbf, 0x60, 0x5f, 0x21, 0xbf, + 0xb2, 0xc4, 0x37, 0x3e, 0x67, 0xcf, 0xb9, 0xbf, 0xac, 0x6b, 0x3c, 0xbe, + 0x51, 0x0f, 0x29, 0xbf, 0xfd, 0x03, 0x15, 0x3f, 0x9f, 0xcb, 0x42, 0xbf, + 0x4a, 0x85, 0xc3, 0xbf, 0x8c, 0x97, 0xc3, 0xbf, 0xb5, 0x1f, 0x24, 0xbf, + 0xe3, 0x52, 0x51, 0xbf, 0xc6, 0x87, 0x12, 0x3e, 0x9d, 0x5a, 0xe1, 0xbe, + 0x3b, 0x29, 0xe9, 0xbf, 0x72, 0xd8, 0xbd, 0xbf, 0x8e, 0xd6, 0xa8, 0xbf, + 0x1b, 0xc7, 0xbe, 0x3d, 0x4a, 0x6f, 0xea, 0xbf, 0x6e, 0xbb, 0x12, 0xbf, + 0xa4, 0xfc, 0x60, 0xbe, 0x01, 0x47, 0x87, 0xbf, 0x53, 0x43, 0xcb, 0xbf, + 0x57, 0x53, 0x8d, 0xbf, 0xfc, 0x99, 0x48, 0xbf, 0x2d, 0xcd, 0x6b, 0xbf, + 0x66, 0xa3, 0x86, 0xbe, 0xbd, 0x6c, 0xb3, 0xbf, 0xdf, 0xa1, 0x2f, 0xbf, + 0xa2, 0x4b, 0x34, 0x3c, 0xa5, 0x0e, 0xef, 0xbf, 0xd3, 0xe2, 0x85, 0xc0, + 0x35, 0x3c, 0x3b, 0xbf, 0x29, 0x05, 0xc1, 0xbf, 0x91, 0x2e, 0xc1, 0xbf, + 0x7f, 0x5b, 0xa7, 0xbf, 0x59, 0x18, 0xb5, 0xbf, 0x79, 0x23, 0xc7, 0xbf, + 0x5f, 0x9c, 0xf7, 0xbe, 0x0e, 0x4d, 0x09, 0xc0, 0xd0, 0xd2, 0xb0, 0xbe, + 0x0a, 0x29, 0x2b, 0xbf, 0x03, 0x56, 0x8f, 0xbf, 0x99, 0x3b, 0x08, 0xc0, + 0xfd, 0x29, 0x2e, 0xbf, 0x01, 0xee, 0x17, 0xbf, 0x50, 0x36, 0x5e, 0xbf, + 0x91, 0xa7, 0xab, 0xbe, 0x53, 0x02, 0x78, 0xbe, 0x76, 0x0c, 0xa2, 0xbf, + 0x60, 0xd1, 0xb2, 0xbe, 0xf0, 0x23, 0xd5, 0xbf, 0x2d, 0x33, 0xc0, 0xbf, + 0x07, 0x54, 0xb3, 0xbe, 0x3b, 0xb3, 0x20, 0xc0, 0x46, 0xd5, 0x72, 0xbe, + 0x99, 0xff, 0x4e, 0xbe, 0xd0, 0x23, 0x86, 0xbf, 0x50, 0xab, 0x54, 0xbf, + 0x8c, 0x66, 0x1c, 0xc0, 0x65, 0xfd, 0x81, 0x3f, 0x53, 0x53, 0xde, 0x3d, + 0x75, 0x8f, 0xe7, 0xbf, 0xab, 0xa0, 0xd4, 0xbf, 0xd5, 0x21, 0x30, 0x3f, + 0xe8, 0x6a, 0x87, 0xbf, 0x48, 0x35, 0x20, 0xbf, 0x54, 0x9f, 0x91, 0xbe, + 0x5c, 0xbd, 0xcc, 0xbf, 0xa2, 0xd8, 0x1f, 0xc0, 0x66, 0x0d, 0x63, 0xbf, + 0x2d, 0xc5, 0x26, 0x3e, 0x09, 0xd6, 0x1e, 0xbf, 0xcd, 0x90, 0x03, 0xc0, + 0xc6, 0xc7, 0xa1, 0xbf, 0xdc, 0x83, 0x84, 0xbf, 0xc2, 0x71, 0x06, 0xc0, + 0x76, 0x91, 0x80, 0xbe, 0x79, 0x3e, 0x53, 0xbf, 0xf2, 0x2c, 0x02, 0xc0, + 0xa0, 0x1b, 0x19, 0xc0, 0x33, 0xf2, 0x76, 0xbf, 0xe1, 0xb2, 0x91, 0xbe, + 0xc1, 0xb7, 0xb5, 0xbf, 0xff, 0x6d, 0xb2, 0xbf, 0xfb, 0x20, 0xdc, 0xbe, + 0xb9, 0x1b, 0x03, 0xc0, 0x96, 0xf5, 0x5c, 0xc0, 0x4a, 0xf6, 0xeb, 0xbf, + 0x26, 0x5c, 0xd4, 0xbf, 0x90, 0x3d, 0xcd, 0xbf, 0x3a, 0xf0, 0xe4, 0xbe, + 0xb0, 0x30, 0x95, 0xbf, 0x45, 0xdc, 0xfd, 0xbf, 0x1d, 0xbc, 0xd1, 0xbf, + 0x2c, 0xa0, 0xcc, 0x3e, 0x35, 0x7b, 0xb1, 0xbf, 0x11, 0x34, 0xe0, 0xbe, + 0x24, 0x21, 0xcc, 0xbf, 0x2a, 0x72, 0xbc, 0xbe, 0xb4, 0x7d, 0xb6, 0x3e, + 0xe9, 0xe3, 0x2a, 0xc0, 0x8c, 0x62, 0xfe, 0xbf, 0x72, 0x79, 0x24, 0xbe, + 0x34, 0x8b, 0xbb, 0x3e, 0x42, 0xf7, 0xde, 0x3e, 0x5b, 0x96, 0x41, 0xbe, + 0x30, 0x4f, 0x27, 0xbf, 0xd6, 0x4f, 0x00, 0xbf, 0x9c, 0xc6, 0xf1, 0x3d, + 0xe2, 0x7b, 0xab, 0xbf, 0x51, 0x5e, 0xaf, 0xbf, 0x3a, 0x4b, 0x56, 0xc0, + 0xf2, 0x74, 0xb0, 0xbf, 0x1a, 0x85, 0x2f, 0xbf, 0x38, 0x65, 0xb7, 0xbf, + 0x42, 0x18, 0xad, 0xbf, 0x24, 0x16, 0x9d, 0x3e, 0xf5, 0x22, 0x6f, 0xbf, + 0xe8, 0x0d, 0xd8, 0xbf, 0x8b, 0x9a, 0x5a, 0xbf, 0xf5, 0x99, 0xb1, 0xbf, + 0xe9, 0xf8, 0x5e, 0xbf, 0xcc, 0x48, 0xda, 0xbf, 0x52, 0x20, 0x74, 0xbf, + 0xc2, 0x0c, 0xd6, 0xbf, 0xf0, 0x2a, 0xbf, 0xbf, 0x34, 0x56, 0xc3, 0xbf, + 0x25, 0xe2, 0xd4, 0x3d, 0xa6, 0x1f, 0xbe, 0xbf, 0x2f, 0x1e, 0xb9, 0xbf, + 0x1f, 0xbd, 0x83, 0xbf, 0x01, 0xa4, 0x99, 0xbf, 0x36, 0xeb, 0x35, 0x3f, + 0xd0, 0xdd, 0x82, 0xbf, 0x15, 0x0a, 0xec, 0xbd, 0xa7, 0x35, 0xb0, 0xbf, + 0x74, 0x4b, 0x12, 0xc0, 0xc5, 0x19, 0x8c, 0x3e, 0x56, 0x70, 0x49, 0xbf, + 0x5a, 0xaa, 0xcf, 0xbf, 0x12, 0x64, 0xa4, 0xbf, 0x1f, 0x1d, 0x5a, 0xbf, + 0x35, 0x2b, 0x9d, 0xbf, 0x60, 0x82, 0x26, 0xbf, 0x2d, 0xf2, 0x19, 0xbf, + 0x1f, 0xd9, 0xd9, 0xbf, 0x30, 0xf6, 0xa2, 0xbf, 0x52, 0xdd, 0x11, 0x3e, + 0xe9, 0x2c, 0x0f, 0xc0, 0xdb, 0x5c, 0x1c, 0xc0, 0x6c, 0xa8, 0xc1, 0xbf, + 0xb0, 0xfd, 0x1e, 0x3f, 0x54, 0xa7, 0xa1, 0xbe, 0x1c, 0xf8, 0xb1, 0xbe, + 0x4e, 0x70, 0x0a, 0xc0, 0xb2, 0x0c, 0x78, 0xbf, 0xf3, 0xa7, 0x1c, 0xc0, + 0x8f, 0xbd, 0x25, 0xbf, 0x05, 0x4d, 0x25, 0xbf, 0xa8, 0xda, 0xcf, 0x3f, + 0xf1, 0x28, 0x16, 0xc0, 0x1c, 0xd9, 0x20, 0xbf, 0x0f, 0x21, 0x5d, 0xbf, + 0xe4, 0xd4, 0x1d, 0xc0, 0x3a, 0x07, 0x7f, 0x3e, 0xac, 0xe6, 0xdb, 0xbf}; +unsigned char dwconv3x3s2_fp32_ker[] = { 0xee, 0x84, 0x37, 0x3f, 0xa4, 0x1b, 0x43, 0xbf, 0x84, 0xcb, 0xbd, 0xbe, + 0x78, 0x56, 0x43, 0x3e, 0x0d, 0x20, 0x0d, 0x40, 0xdf, 0xcd, 0xa9, 0x3f, + 0x2b, 0x29, 0x79, 0x3f, 0x9b, 0xe7, 0x06, 0xc0, 0x5c, 0x90, 0x8d, 0xbf, + 0x8b, 0x6b, 0x04, 0x40, 0xde, 0x28, 0x26, 0x40, 0x02, 0x16, 0x8c, 0x40, + 0x7c, 0x88, 0xce, 0x3f, 0x8a, 0xc7, 0xd6, 0x40, 0x4d, 0x6f, 0x8a, 0x3f, + 0xf5, 0xae, 0x9b, 0x3f, 0xa4, 0x77, 0x29, 0xc0, 0x63, 0x8d, 0x9c, 0x3f}; +unsigned char dwconv3x3s2_fp32_bias[] = {0x8d, 0x7d, 0x7f, 0x41, 0x7c, 0xff, 0xfe, 0x40}; +unsigned char dwconv3x3s2_fp32_out[] = { 0x29, 0x28, 0xac, 0x41, 0x8d, 0x79, 0x8c, 0x41, 0x7a, 0x41, 0x33, 0x41, + 0xd5, 0x08, 0x5d, 0x41, 0x5d, 0x1c, 0x79, 0x41, 0x02, 0x8e, 0x5a, 0x41, + 0x4d, 0x7b, 0x3c, 0x41, 0xd5, 0xff, 0x94, 0x41, 0xc7, 0xb6, 0x8e, 0x41, + 0xc3, 0x2f, 0x79, 0x41, 0x38, 0x9b, 0x3a, 0x41, 0xa1, 0xdd, 0x7e, 0x41, + 0x11, 0xb2, 0x44, 0x41, 0xd5, 0xbf, 0x88, 0x41, 0xff, 0x91, 0x83, 0x41, + 0x20, 0x25, 0x6d, 0x41, 0x05, 0x83, 0x76, 0x41, 0xf9, 0x55, 0x7d, 0x41, + 0x7f, 0xde, 0x76, 0x41, 0x87, 0x36, 0x99, 0x41, 0xe7, 0x5d, 0x5c, 0x41, + 0xd0, 0xfc, 0x59, 0x41, 0xde, 0x91, 0x15, 0x41, 0x16, 0x54, 0x7a, 0x41, + 0x0d, 0x2f, 0x4a, 0x41, 0xa7, 0xc6, 0x11, 0x41, 0x22, 0xad, 0x6d, 0x41, + 0x22, 0xbe, 0xf5, 0xc0, 0xe4, 0x5e, 0x1d, 0x41, 0x3d, 0xe4, 0xc1, 0xc0, + 0xd7, 0x98, 0x40, 0x40, 0xbd, 0xd1, 0x83, 0x3e, 0xd0, 0x0e, 0xfe, 0xc0, + 0x93, 0xc8, 0xe4, 0x40, 0x84, 0x38, 0x26, 0xc1, 0xe6, 0x0c, 0xbb, 0xbf, + 0x83, 0x5e, 0xf6, 0x40, 0xa6, 0x65, 0x71, 0xc1, 0x56, 0xd3, 0x0f, 0xc1, + 0x5c, 0xec, 0x9c, 0xc1, 0xee, 0x2e, 0xea, 0xc0, 0x0c, 0x88, 0xf6, 0xc0, + 0x0f, 0x6e, 0x2f, 0xc1, 0x04, 0xdb, 0x39, 0xbf, 0xea, 0x9f, 0x9b, 0xc1, + 0xbe, 0x78, 0x78, 0xc0, 0xc4, 0xf8, 0xdc, 0x3e, 0xf6, 0x2e, 0x5e, 0xbf, + 0x89, 0xb7, 0x73, 0xc1, 0x63, 0xfd, 0x52, 0xc0, 0xc3, 0xfb, 0x3a, 0xc1, + 0x68, 0x19, 0xf9, 0xc0, 0x31, 0x89, 0xe1, 0xc0, 0x60, 0x35, 0x97, 0xc1}; +unsigned char dwconv3x3s2_fp16_in[] = { 0x1e, 0x31, 0xf9, 0x39, 0x5f, 0xb4, 0xd2, 0xb3, 0xad, 0xc1, 0x20, 0xc0, + 0x09, 0xbc, 0x49, 0xbc, 0x7b, 0x3a, 0x4d, 0xc0, 0xc4, 0x2f, 0x8e, 0xb5, + 0x25, 0xbe, 0xf5, 0xbe, 0xc8, 0x35, 0x25, 0xbc, 0xf9, 0x35, 0x16, 0x3c, + 0xd4, 0xbc, 0x03, 0xbe, 0x83, 0xbe, 0xe2, 0xb5, 0xab, 0xbf, 0x4f, 0xb8, + 0x2c, 0x37, 0x12, 0xc2, 0x52, 0xbf, 0x4a, 0x27, 0x5a, 0x3b, 0xf0, 0xb1, + 0x32, 0xbb, 0x16, 0x30, 0x06, 0xbc, 0xc6, 0xbc, 0x75, 0xb1, 0x01, 0xb9, + 0xbd, 0xba, 0x97, 0xb8, 0x37, 0xc1, 0xda, 0xbf, 0xc5, 0xb0, 0xca, 0xc0, + 0xe3, 0xbc, 0x9d, 0xbd, 0x2c, 0x31, 0x8f, 0xbb, 0xe7, 0xb9, 0x15, 0x2c, + 0x33, 0x32, 0xf3, 0xbf, 0x0a, 0xb9, 0xbe, 0x31, 0xce, 0xbd, 0xe3, 0xb1, + 0x48, 0xb9, 0xa8, 0x38, 0x16, 0xba, 0x1c, 0xbe, 0x1c, 0xbe, 0x20, 0xb9, + 0x8a, 0xba, 0x94, 0x30, 0x0a, 0xb7, 0x49, 0xbf, 0xee, 0xbd, 0x46, 0xbd, + 0xf6, 0x2d, 0x53, 0xbf, 0x95, 0xb8, 0x07, 0xb3, 0x3a, 0xbc, 0x5a, 0xbe, + 0x6a, 0xbc, 0x44, 0xba, 0x5e, 0xbb, 0x35, 0xb4, 0x9b, 0xbd, 0x7d, 0xb9, + 0xa2, 0x21, 0x78, 0xbf, 0x2f, 0xc4, 0xd9, 0xb9, 0x08, 0xbe, 0x09, 0xbe, + 0x3a, 0xbd, 0xa8, 0xbd, 0x39, 0xbe, 0xbc, 0xb7, 0x4a, 0xc0, 0x86, 0xb5, + 0x59, 0xb9, 0x7a, 0xbc, 0x41, 0xc0, 0x71, 0xb9, 0xbf, 0xb8, 0xf1, 0xba, + 0x5d, 0xb5, 0xc0, 0xb3, 0x10, 0xbd, 0x96, 0xb5, 0xa9, 0xbe, 0x01, 0xbe, + 0x9a, 0xb5, 0x05, 0xc1, 0x96, 0xb3, 0x77, 0xb2, 0x31, 0xbc, 0xa5, 0xba, + 0xe3, 0xc0, 0x0f, 0x3c, 0xf2, 0x2e, 0x3c, 0xbf, 0xa5, 0xbe, 0x81, 0x39, + 0x3b, 0xbc, 0x01, 0xb9, 0x8c, 0xb4, 0x65, 0xbe, 0xfe, 0xc0, 0x18, 0xbb, + 0x36, 0x31, 0xf6, 0xb8, 0x1c, 0xc0, 0x0e, 0xbd, 0x24, 0xbc, 0x33, 0xc0, + 0x04, 0xb4, 0x99, 0xba, 0x11, 0xc0, 0xc8, 0xc0, 0xb7, 0xbb, 0x8d, 0xb4, + 0xad, 0xbd, 0x93, 0xbd, 0xe1, 0xb6, 0x18, 0xc0, 0xe7, 0xc2, 0x5f, 0xbf, + 0xa2, 0xbe, 0x69, 0xbe, 0x27, 0xb7, 0xa9, 0xbc, 0xee, 0xbf, 0x8d, 0xbe, + 0x65, 0x36, 0x8b, 0xbd, 0x01, 0xb7, 0x61, 0xbe, 0xe3, 0xb5, 0xb3, 0x35, + 0x57, 0xc1, 0xf3, 0xbf, 0x23, 0xb1, 0xdc, 0x35, 0xf7, 0x36, 0x0c, 0xb2, + 0x3a, 0xb9, 0x02, 0xb8, 0x8e, 0x2f, 0x5b, 0xbd, 0x7a, 0xbd, 0xb2, 0xc2, + 0x83, 0xbd, 0x7c, 0xb9, 0xbb, 0xbd, 0x68, 0xbd, 0xe8, 0x34, 0x79, 0xbb, + 0xc0, 0xbe, 0xd4, 0xba, 0x8c, 0xbd, 0xf7, 0xba, 0xd2, 0xbe, 0xa1, 0xbb, + 0xb0, 0xbe, 0xf9, 0xbd, 0x1a, 0xbe, 0xa7, 0x2e, 0xf0, 0xbd, 0xc8, 0xbd, + 0x1d, 0xbc, 0xcd, 0xbc, 0xaf, 0x39, 0x16, 0xbc, 0x60, 0xaf, 0x81, 0xbd, + 0x92, 0xc0, 0x60, 0x34, 0x4b, 0xba, 0x7d, 0xbe, 0x23, 0xbd, 0xd0, 0xba, + 0xe9, 0xbc, 0x34, 0xb9, 0xcf, 0xb8, 0xce, 0xbe, 0x17, 0xbd, 0x8e, 0x30, + 0x79, 0xc0, 0xe2, 0xc0, 0x0d, 0xbe, 0xf7, 0x38, 0x0d, 0xb5, 0x8f, 0xb5, + 0x53, 0xc0, 0xc0, 0xbb, 0xe5, 0xc0, 0x2d, 0xb9, 0x2a, 0xb9, 0x7e, 0x3e, + 0xb1, 0xc0, 0x06, 0xb9, 0xe9, 0xba, 0xee, 0xc0, 0xf8, 0x33, 0xdf, 0xbe}; +unsigned char dwconv3x3s2_fp16_ker[] = { 0xbc, 0x39, 0x18, 0xba, 0xee, 0xb5, 0x1a, 0x32, 0x69, 0x40, 0x4e, 0x3d, + 0xc9, 0x3b, 0x37, 0xc0, 0x6c, 0xbc, 0x23, 0x40, 0x31, 0x41, 0x60, 0x44, + 0x74, 0x3e, 0xb6, 0x46, 0x53, 0x3c, 0xdd, 0x3c, 0x4b, 0xc1, 0xe4, 0x3c}; +unsigned char dwconv3x3s2_fp16_bias[] = {0xfb, 0x4b, 0xf7, 0x47}; +unsigned char dwconv3x3s2_fp16_out[] = { 0x60, 0x4d, 0x64, 0x4c, 0x99, 0x49, 0xe7, 0x4a, 0xc9, 0x4b, 0xd4, 0x4a, + 0xe4, 0x49, 0xa7, 0x4c, 0x75, 0x4c, 0xca, 0x4b, 0xd4, 0x49, 0xf7, 0x4b, + 0x25, 0x4a, 0x46, 0x4c, 0x1c, 0x4c, 0x69, 0x4b, 0xb2, 0x4b, 0xe9, 0x4b, + 0xb6, 0x4b, 0xc8, 0x4c, 0xe2, 0x4a, 0xcf, 0x4a, 0xa9, 0x48, 0xd2, 0x4b, + 0x50, 0x4a, 0x8d, 0x48, 0x6c, 0x4b, 0xad, 0xc7, 0xea, 0x48, 0x10, 0xc6, + 0x06, 0x42, 0x35, 0x34, 0xf1, 0xc7, 0x24, 0x47, 0x30, 0xc9, 0xdc, 0xbd, + 0xb1, 0x47, 0x89, 0xcb, 0x7e, 0xc8, 0xe7, 0xcc, 0x4e, 0xc7, 0xaf, 0xc7, + 0x7a, 0xc9, 0xc8, 0xb9, 0xdc, 0xcc, 0xc0, 0xc3, 0xf2, 0x36, 0xed, 0xba, + 0x9e, 0xcb, 0x95, 0xc2, 0xd8, 0xc9, 0xc8, 0xc7, 0x0a, 0xc7, 0xb8, 0xcc}; +unsigned char dwconv3x3s2_int8_in[] = {}; +unsigned char dwconv3x3s2_int8_ker[] = {}; +unsigned char dwconv3x3s2_int8_bias[] = {}; +unsigned char dwconv3x3s2_int8_out[] = {}; \ No newline at end of file diff --git a/tests/unit_test/valid_data/fullyconnected.dat b/tests/unit_test/valid_data/fullyconnected.dat new file mode 100644 index 00000000..28e69609 --- /dev/null +++ b/tests/unit_test/valid_data/fullyconnected.dat @@ -0,0 +1,449 @@ +/************************************************************************** + * NCHW layout + **************************************************************************/ +// in_node = 17 out_node = 31 +unsigned char fc_fp32_in[] = { + 0x67, 0x2d, 0x31, 0x3f, 0x50, 0xd4, 0x05, 0xbf, 0x5f, 0x27, 0xfa, 0xbf, 0x06, 0xbe, + 0x14, 0x40, 0xb4, 0x22, 0xd8, 0xbf, 0x0c, 0xb6, 0xad, 0xbf, 0x2d, 0x07, 0x90, 0x3f, + 0x91, 0xd2, 0x4b, 0xbf, 0x05, 0x41, 0xd1, 0xbf, 0x7d, 0x41, 0x1e, 0x40, 0x18, 0xa0, + 0xb6, 0x3f, 0xa6, 0xde, 0x96, 0xbf, 0xf7, 0xfc, 0xfd, 0xbf, 0x01, 0xf0, 0xd9, 0xbf, + 0x1a, 0xa4, 0xa4, 0xbf, 0xa3, 0xf6, 0x33, 0x40, 0x55, 0xb3, 0x10, 0x40}; +unsigned char fc_fp32_weight[] = { + 0xe2, 0x3f, 0x13, 0x3f, 0x30, 0xbe, 0x43, 0xbf, 0x50, 0x9f, 0x0e, 0xbe, 0xba, 0xf7, 0xaf, 0x3d, + 0xfe, 0x04, 0x7c, 0xbf, 0x30, 0xa9, 0x46, 0x3f, 0xde, 0xcd, 0x5b, 0xbf, 0x66, 0x66, 0xe8, 0xbe, + 0xee, 0x6e, 0xf1, 0x3d, 0x5b, 0xf6, 0x99, 0x3e, 0x97, 0x2f, 0x3c, 0x3f, 0x17, 0x00, 0x39, 0x3e, + 0x4d, 0x16, 0x01, 0x3f, 0xdc, 0x2a, 0x20, 0xbe, 0x43, 0x52, 0x78, 0xbd, 0xe6, 0x9d, 0xa3, 0x3e, + 0x76, 0x38, 0xef, 0x3e, 0x47, 0xac, 0xec, 0x3e, 0x33, 0xa9, 0x20, 0xbf, 0xde, 0x3c, 0x06, 0x3f, + 0x6f, 0x6a, 0x34, 0x3e, 0xc0, 0x68, 0x74, 0x3f, 0x29, 0x78, 0x9d, 0xbe, 0x82, 0x8b, 0x06, 0xbf, + 0xab, 0x0d, 0xcc, 0x3e, 0xee, 0x13, 0x1f, 0xbe, 0x7f, 0x8b, 0x24, 0x3f, 0xf1, 0x32, 0x78, 0xbc, + 0xb7, 0x0f, 0x34, 0x3f, 0x16, 0x74, 0x85, 0x3e, 0x34, 0xf7, 0xbe, 0x3e, 0x5b, 0xed, 0x95, 0xbe, + 0x2c, 0x29, 0xa5, 0x3e, 0xf9, 0x8f, 0xed, 0xbe, 0x93, 0x62, 0xaa, 0xbd, 0xf7, 0x53, 0x58, 0xbf, + 0x9b, 0x1a, 0xd3, 0xbd, 0xdd, 0x55, 0x73, 0x3f, 0x6f, 0xa1, 0x55, 0x3e, 0x8f, 0xcc, 0x79, 0x3e, + 0xda, 0xfb, 0x27, 0x3f, 0xba, 0x81, 0x44, 0x3f, 0x62, 0x1c, 0x6b, 0xbe, 0x97, 0x1d, 0x0c, 0xbf, + 0xaa, 0x51, 0x9a, 0xbd, 0xbe, 0xe7, 0xcf, 0x3b, 0xd9, 0x05, 0xd8, 0x3e, 0xf3, 0x4e, 0xfe, 0xbc, + 0xb0, 0x63, 0x6e, 0xbf, 0xbb, 0x9f, 0x83, 0xbe, 0xfc, 0x11, 0xa8, 0x3c, 0x4a, 0x2e, 0x10, 0xbf, + 0x5c, 0x54, 0xa1, 0x3e, 0xc8, 0xec, 0x7e, 0x3f, 0x95, 0x7a, 0x04, 0xbf, 0xb4, 0x0f, 0x4c, 0x3f, + 0xb9, 0x8b, 0x36, 0x3f, 0xf7, 0x04, 0x37, 0x3f, 0xa5, 0x17, 0x98, 0xbe, 0x89, 0x92, 0x44, 0xbf, + 0x7a, 0x96, 0x21, 0xbd, 0x88, 0x0a, 0x5a, 0xbf, 0xa9, 0xc1, 0x14, 0x3f, 0x46, 0x39, 0x1f, 0x3f, + 0xad, 0x0c, 0x79, 0x3e, 0x29, 0x28, 0x77, 0x3f, 0xc8, 0x87, 0x41, 0x3e, 0xa5, 0x27, 0xc1, 0x3d, + 0x75, 0xe8, 0xbe, 0x3d, 0x4a, 0xa8, 0x20, 0xbf, 0x3c, 0x66, 0x90, 0xbe, 0x18, 0x9f, 0x5f, 0x3f, + 0x90, 0x97, 0x12, 0x3f, 0x75, 0x4f, 0xc6, 0xbe, 0x35, 0xf6, 0x9c, 0xbe, 0x5e, 0xe3, 0x24, 0x3f, + 0xd3, 0x47, 0x3e, 0xbf, 0xa6, 0x57, 0xe9, 0xbe, 0x40, 0xea, 0x4b, 0xbf, 0x69, 0xfa, 0x56, 0xbf, + 0xe9, 0xb1, 0xbd, 0x3e, 0xe8, 0x2f, 0xe7, 0xbe, 0x7a, 0xb4, 0xaf, 0x3e, 0x8e, 0x3b, 0x78, 0xbf, + 0xca, 0xa5, 0x72, 0xbf, 0xa6, 0x1a, 0x04, 0x3f, 0x60, 0x61, 0xa1, 0xbe, 0xd2, 0x2a, 0x20, 0xbd, + 0x1b, 0x5e, 0x9b, 0xbc, 0xad, 0x33, 0x60, 0xbf, 0x40, 0x59, 0x00, 0x3f, 0x8c, 0x10, 0x4d, 0xbf, + 0x64, 0x48, 0xc6, 0x3e, 0x77, 0xc8, 0x6e, 0xbf, 0x1f, 0x57, 0x19, 0xbf, 0x45, 0xb6, 0x07, 0xbf, + 0x74, 0x1f, 0x26, 0x3f, 0x68, 0x87, 0xeb, 0x3e, 0xa7, 0x74, 0x08, 0xbe, 0x54, 0xc1, 0x86, 0xbe, + 0x82, 0x10, 0x64, 0x3f, 0x8c, 0x19, 0x6b, 0x3f, 0x91, 0x14, 0x05, 0xbf, 0x8a, 0x65, 0x0e, 0x3e, + 0x3b, 0xe8, 0x68, 0x3f, 0x1e, 0x35, 0x83, 0xbe, 0x6f, 0x1e, 0x66, 0x3e, 0x7b, 0xb0, 0xdb, 0x3e, + 0x00, 0x07, 0x48, 0x3e, 0xcc, 0xd0, 0x3f, 0xbf, 0x16, 0x45, 0x65, 0x3f, 0xc5, 0xb3, 0x45, 0x3f, + 0x84, 0x09, 0x9a, 0x3e, 0x74, 0xc7, 0x29, 0x3f, 0x17, 0x99, 0x52, 0xbf, 0x7c, 0x5a, 0x08, 0x3f, + 0x2a, 0xaf, 0x85, 0xbe, 0x70, 0x6a, 0x5a, 0xbf, 0x6f, 0xba, 0x02, 0xbe, 0x5d, 0xb7, 0x5c, 0x3f, + 0x41, 0x89, 0x30, 0xbf, 0xa2, 0xd4, 0xc6, 0x3e, 0x49, 0x78, 0x17, 0x3e, 0x47, 0xb2, 0xd1, 0xbe, + 0x04, 0xac, 0x4f, 0xbf, 0x2f, 0xd6, 0x08, 0xbf, 0x83, 0x04, 0x62, 0xbf, 0x5f, 0x3b, 0x3c, 0xbf, + 0xae, 0x5a, 0x94, 0xbe, 0xaf, 0xbc, 0xd1, 0xbe, 0xad, 0x75, 0x04, 0x3e, 0xe0, 0xfb, 0x38, 0x3f, + 0xa7, 0x46, 0x19, 0x3e, 0xc2, 0xc9, 0x42, 0xbf, 0xc8, 0xf0, 0x7a, 0xbd, 0x13, 0xe3, 0xec, 0x3e, + 0xb0, 0xd8, 0x41, 0xbf, 0x7b, 0x97, 0x10, 0xbf, 0x3c, 0x8a, 0x4b, 0x3e, 0x1b, 0xe1, 0xbe, 0x3e, + 0x89, 0x51, 0xc5, 0xbe, 0xd1, 0x2a, 0xdf, 0xbe, 0xb8, 0xa1, 0xb6, 0x3e, 0x7d, 0xed, 0x21, 0x3e, + 0xe9, 0x0b, 0x50, 0xbf, 0x03, 0x67, 0x7f, 0xbf, 0x69, 0x9a, 0x41, 0xbf, 0xa2, 0xc2, 0x1d, 0x3f, + 0x10, 0x9c, 0x76, 0xbf, 0xae, 0x98, 0x64, 0xbd, 0xa0, 0xe9, 0x95, 0x3c, 0xd9, 0x46, 0x54, 0xbf, + 0x59, 0xfa, 0x79, 0xbf, 0xb5, 0x6a, 0x91, 0x3e, 0x49, 0xa8, 0x3f, 0x3e, 0xe9, 0x8b, 0x78, 0x3f, + 0x9e, 0x59, 0xcc, 0x3d, 0x89, 0x35, 0x48, 0x3f, 0x3f, 0x3f, 0x76, 0xbd, 0xcf, 0xe4, 0x56, 0xbd, + 0x4d, 0x35, 0x4f, 0xbf, 0xa7, 0x48, 0x55, 0x3f, 0x55, 0xc9, 0x6c, 0xbf, 0x0c, 0x2b, 0x0d, 0xbf, + 0xa3, 0x9f, 0x1d, 0x3f, 0xa8, 0xfa, 0xea, 0xbe, 0xd0, 0x4a, 0x93, 0xbe, 0x15, 0x83, 0xc8, 0x3e, + 0x80, 0x13, 0x82, 0x3e, 0x27, 0x0b, 0xa2, 0x3c, 0x8c, 0xc1, 0x74, 0xbf, 0xbc, 0x1b, 0x56, 0x3f, + 0x3e, 0x91, 0x68, 0xbd, 0xd0, 0xc5, 0x3b, 0xbf, 0x46, 0x76, 0xad, 0xbe, 0x28, 0xa8, 0x2f, 0xbf, + 0x65, 0x10, 0xd2, 0xbe, 0x3d, 0x0a, 0x1f, 0x3f, 0xf2, 0x72, 0x72, 0xbe, 0xae, 0x3f, 0x3f, 0xbe, + 0x60, 0x08, 0x1d, 0x3f, 0x1e, 0x74, 0x0c, 0x3f, 0x91, 0x46, 0x56, 0xbf, 0x9a, 0xbc, 0x36, 0xbf, + 0xee, 0x49, 0x2a, 0xbf, 0x03, 0xf7, 0x24, 0xbf, 0x13, 0x31, 0x16, 0xbf, 0x2d, 0x25, 0x42, 0xbe, + 0x69, 0x5a, 0x26, 0x3e, 0x17, 0x77, 0x73, 0x3e, 0x52, 0x0d, 0x20, 0x3f, 0xed, 0x8a, 0x56, 0x3f, + 0x44, 0xd3, 0x3c, 0x3d, 0x5d, 0x65, 0xec, 0xbe, 0xe7, 0xc3, 0x35, 0xbf, 0xe1, 0x63, 0x54, 0x3f, + 0x86, 0xee, 0x71, 0xbf, 0xf9, 0x38, 0x51, 0x3f, 0x49, 0x8b, 0x43, 0xbf, 0xa7, 0xc0, 0x35, 0xbf, + 0x56, 0x6e, 0x30, 0xbf, 0xae, 0xb0, 0x6c, 0xbf, 0xed, 0xd4, 0x77, 0x3f, 0x7d, 0x41, 0x18, 0x3f, + 0x91, 0x8c, 0x31, 0xbf, 0x8b, 0xe3, 0x08, 0x3f, 0xbb, 0xfe, 0xa3, 0xbe, 0x82, 0xf1, 0x0b, 0xbf, + 0x84, 0xfd, 0x7d, 0xbf, 0x8f, 0xfe, 0x88, 0xbe, 0x93, 0x97, 0x2e, 0x3f, 0xf6, 0xc1, 0x14, 0x3f, + 0x7a, 0x40, 0x6d, 0xbe, 0xaa, 0xfa, 0x18, 0x3f, 0x02, 0xd5, 0xc1, 0x3e, 0x53, 0x88, 0x34, 0xbf, + 0xa4, 0x07, 0x70, 0x3f, 0x68, 0x04, 0xf1, 0x3d, 0x82, 0x20, 0x65, 0x3f, 0x4a, 0x4a, 0x93, 0xbe, + 0x4a, 0xaf, 0xaa, 0x3e, 0xca, 0xea, 0xd9, 0x3e, 0x9f, 0x9c, 0x11, 0x3f, 0xea, 0x64, 0x18, 0xbf, + 0x02, 0xbb, 0x67, 0xbd, 0x64, 0x1f, 0x4a, 0x3f, 0xdc, 0x71, 0xb0, 0x3e, 0x1f, 0x8f, 0x4a, 0x3f, + 0x7e, 0xca, 0xd8, 0xbe, 0xd4, 0xdb, 0xa0, 0xbe, 0x35, 0x5c, 0x03, 0x3f, 0x18, 0xd6, 0x46, 0x3f, + 0x54, 0x5b, 0x77, 0xbf, 0xe3, 0x73, 0x42, 0x3f, 0x79, 0x4e, 0x2b, 0xbf, 0xec, 0xe9, 0x43, 0xbe, + 0x04, 0x7f, 0x60, 0x3e, 0x4f, 0xce, 0xa9, 0xbe, 0x56, 0x61, 0x46, 0x3e, 0x7e, 0xa3, 0x5e, 0x3e, + 0x19, 0xbc, 0x74, 0x3f, 0x9b, 0x06, 0x63, 0xbf, 0xac, 0xb7, 0xf3, 0x3c, 0xf3, 0x7c, 0x7d, 0xbf, + 0xa2, 0xce, 0xe0, 0xbb, 0x6e, 0x97, 0x90, 0xbe, 0x81, 0x41, 0x56, 0x3f, 0x28, 0xf0, 0xad, 0x3e, + 0x36, 0x79, 0x07, 0xbe, 0x07, 0x69, 0x8c, 0xbd, 0x80, 0xd0, 0x70, 0x3e, 0x4b, 0x2f, 0xba, 0x3e, + 0x31, 0x65, 0x53, 0xbf, 0x94, 0xbc, 0x25, 0xbe, 0xf4, 0xee, 0x8f, 0xbe, 0x50, 0xbd, 0x67, 0xbe, + 0x17, 0x93, 0x66, 0x3f, 0xf2, 0xc1, 0xf4, 0x3e, 0x9a, 0xfd, 0x55, 0x3f, 0x40, 0x4c, 0x0f, 0x3f, + 0xfc, 0xe3, 0xca, 0x3e, 0x76, 0xa6, 0x65, 0x3f, 0x6c, 0x24, 0x32, 0xbf, 0xf4, 0xb9, 0xb7, 0x3e, + 0xc1, 0x27, 0x4b, 0xbd, 0x87, 0x8a, 0x4c, 0x3f, 0xab, 0xdb, 0xd2, 0xbd, 0xef, 0xd8, 0x36, 0xbe, + 0x52, 0x29, 0x33, 0x3e, 0xae, 0x98, 0x12, 0xbf, 0xa7, 0xe7, 0x30, 0xbf, 0x49, 0x30, 0x40, 0xbf, + 0xe1, 0x73, 0x36, 0x3e, 0xa9, 0xa3, 0x16, 0x3f, 0xd4, 0xaf, 0xab, 0xbe, 0x2c, 0xb5, 0xa6, 0x3d, + 0x43, 0xe4, 0x62, 0xbf, 0x78, 0x36, 0x29, 0x3f, 0x82, 0x3f, 0x65, 0xbf, 0xac, 0xa9, 0x44, 0xbf, + 0x8f, 0xde, 0x72, 0x3e, 0x2a, 0x0e, 0x50, 0x3d, 0xcf, 0xc2, 0x54, 0xbf, 0x26, 0xb7, 0x1d, 0xbe, + 0xcd, 0x8d, 0x4b, 0xbf, 0x29, 0x5c, 0x3b, 0xbf, 0x1e, 0x31, 0x64, 0xbf, 0x37, 0x37, 0x0f, 0x3f, + 0x7d, 0xe6, 0x56, 0x3f, 0x2e, 0x18, 0x5b, 0xbd, 0x75, 0xe5, 0xdc, 0x3e, 0xc3, 0xec, 0xc9, 0x3e, + 0xc5, 0xb7, 0x75, 0xbd, 0x69, 0x9a, 0x0d, 0xbf, 0x62, 0xa7, 0xfd, 0xbe, 0x09, 0x3d, 0x4d, 0x3f, + 0xf3, 0x0b, 0x09, 0xbf, 0x71, 0xac, 0xdb, 0x3e, 0xb9, 0x3f, 0x41, 0xbf, 0xbe, 0xe7, 0xb0, 0xbc, + 0x14, 0x8e, 0x93, 0x3e, 0x53, 0xda, 0x13, 0x3f, 0xea, 0x80, 0x58, 0xbd, 0xff, 0x83, 0x11, 0x3f, + 0x91, 0x85, 0x6c, 0x3e, 0x5a, 0x47, 0xfd, 0x3c, 0xd2, 0x33, 0x2b, 0xbf, 0x43, 0x84, 0x6d, 0x3f, + 0xc7, 0xff, 0xbc, 0x3e, 0xed, 0x17, 0xd5, 0xbe, 0xfa, 0x52, 0x21, 0x3c, 0x78, 0xe7, 0xfa, 0xbe, + 0xe1, 0x7a, 0xd9, 0xbd, 0xba, 0xb3, 0x6c, 0xbf, 0x05, 0xcd, 0x76, 0x3f, 0x08, 0x8a, 0x77, 0xbd, + 0xc6, 0x81, 0x0d, 0x3f, 0x8f, 0xfa, 0x56, 0x3e, 0x85, 0x59, 0x32, 0xbf, 0x28, 0xa6, 0x7e, 0xbf, + 0x30, 0x85, 0x57, 0xbf, 0x92, 0xe9, 0x6d, 0xbf, 0x4d, 0x5e, 0x78, 0x3e, 0xe5, 0xfd, 0x70, 0x3e, + 0x4d, 0x4c, 0x8d, 0xbe, 0xbb, 0xa5, 0x73, 0xbe, 0xb4, 0x9f, 0x65, 0xbe, 0xba, 0x11, 0xfc, 0xbe, + 0x36, 0xe6, 0x16, 0xbf, 0xe8, 0x79, 0xd3, 0x3e, 0x54, 0xdf, 0x0b, 0xbf, 0x7d, 0x02, 0xdd, 0x3e, + 0x05, 0x5c, 0xce, 0x3e, 0x0e, 0x7c, 0xad, 0xbe, 0x70, 0x46, 0x06, 0x3e, 0x97, 0x4d, 0x3a, 0x3e, + 0xc4, 0x94, 0x77, 0x3e, 0xe4, 0xb3, 0x86, 0x3d, 0x75, 0x57, 0x66, 0x3f, 0x14, 0x11, 0x72, 0x3e, + 0xe0, 0xc5, 0xc7, 0xbe, 0x44, 0x34, 0x3e, 0xbd, 0xa2, 0xdb, 0xde, 0xbe, 0xe4, 0xe7, 0x82, 0xbe, + 0x67, 0x6d, 0x72, 0x3f, 0x3b, 0xb4, 0xff, 0xbc, 0x7b, 0x75, 0x0c, 0x3f, 0x73, 0xca, 0x7b, 0x3e, + 0x91, 0x60, 0x85, 0xbe, 0x8e, 0xbb, 0xa3, 0xbe, 0x17, 0x32, 0x09, 0xbe, 0xaa, 0x9e, 0xf1, 0xbd, + 0x43, 0xa8, 0x64, 0x3f, 0x04, 0x4e, 0x54, 0xbf, 0x73, 0xde, 0x01, 0x3f, 0x58, 0xd5, 0x75, 0xbf, + 0x78, 0xd7, 0x6a, 0xbf, 0x1d, 0xd5, 0x3d, 0x3e, 0xdd, 0xa4, 0x72, 0x3f, 0x2d, 0x90, 0xe5, 0x3e, + 0x81, 0xbb, 0x91, 0xbd, 0x85, 0xd9, 0x73, 0xbf, 0xcb, 0xb4, 0x7d, 0x3f, 0x35, 0x8e, 0xa7, 0xbe, + 0x57, 0x00, 0x4b, 0x3f, 0x76, 0x3b, 0x7f, 0x3f, 0xd6, 0x20, 0x0c, 0xbe, 0x67, 0x3d, 0xc8, 0x3e, + 0x03, 0x56, 0x7b, 0x3f, 0x27, 0x34, 0xf1, 0xbe, 0xbe, 0x2e, 0xc5, 0x3d, 0xaa, 0xf2, 0x3d, 0x3f, + 0x04, 0xb7, 0x11, 0xbf, 0x45, 0x4c, 0x62, 0xbf, 0xe0, 0x54, 0xa5, 0xbe, 0x73, 0xc2, 0x80, 0x3e, + 0x9e, 0x59, 0x9f, 0x3e, 0xfc, 0x27, 0x76, 0xbf, 0xe5, 0x33, 0x21, 0x3f, 0xdf, 0x2d, 0x24, 0xbf, + 0x71, 0xfa, 0x6c, 0xbf, 0xdb, 0x02, 0x73, 0x3f, 0x41, 0x03, 0xbb, 0x3e, 0x5b, 0x25, 0x47, 0xbf, + 0xa8, 0xeb, 0x3d, 0xbd, 0xc2, 0x50, 0x6c, 0x3f, 0x24, 0x1d, 0xa9, 0x3e, 0xd2, 0x7a, 0x57, 0xbf, + 0x52, 0x6e, 0xab, 0xbd, 0x73, 0xd1, 0xf8, 0xbc, 0x70, 0xad, 0xf3, 0x3e, 0x5b, 0x51, 0xf3, 0x3e, + 0x5b, 0x8b, 0x71, 0xbe, 0x46, 0x8c, 0x0e, 0xbf, 0xfe, 0x14, 0x7f, 0xbf, 0x16, 0x9c, 0x89, 0xbe, + 0x74, 0x79, 0xb5, 0xbd, 0xd4, 0xd7, 0x20, 0x3f, 0x8f, 0x91, 0x37, 0xbf, 0x0a, 0xc2, 0xee, 0xbe, + 0xf9, 0x75, 0xcd, 0x3b, 0x31, 0x67, 0x44, 0xbf, 0x16, 0x98, 0x33, 0x3f, 0x7f, 0xa2, 0x39, 0xbf, + 0xb1, 0x44, 0x03, 0x3f, 0x7c, 0x45, 0x62, 0xbc, 0xae, 0x0b, 0x20, 0x3d, 0xbd, 0x77, 0x38, 0xbf, + 0x85, 0x92, 0x28, 0xbe, 0x3f, 0x8a, 0xf1, 0xbe, 0x71, 0xaa, 0xcc, 0xbe, 0x1e, 0xcc, 0xe5, 0x3e, + 0xe2, 0x97, 0x69, 0xbf, 0x78, 0xfd, 0xd4, 0x3e, 0x88, 0x7d, 0x34, 0xbe, 0xd8, 0x90, 0x10, 0x3d, + 0x64, 0xb8, 0x2b, 0xbf, 0x0d, 0x08, 0x80, 0x3e, 0x2d, 0xd1, 0xef, 0x3e, 0xfa, 0x76, 0x2b, 0xbf, + 0xcb, 0xf8, 0x1b, 0xbf, 0xf2, 0x61, 0x2d, 0x3f, 0x91, 0x51, 0x76, 0xbf, 0xed, 0xe3, 0x56, 0xbf, + 0x78, 0x49, 0x74, 0x3f, 0xcf, 0x8d, 0x1c, 0xbf, 0x24, 0xf2, 0x56, 0x3e, 0xc0, 0xc6, 0x17, 0x3f, + 0x11, 0xf0, 0xbc, 0x3e, 0xbb, 0x5c, 0x06, 0x3f, 0xac, 0x1f, 0x8f, 0xbe, 0x53, 0x62, 0x34, 0xbf, + 0x46, 0x0c, 0x55, 0xbf, 0x70, 0xbd, 0xaa, 0xbd, 0x14, 0xcb, 0xd3, 0xbe, 0x05, 0x1a, 0x30, 0x3f, + 0x5a, 0x8b, 0x09, 0x3f, 0xcf, 0x0b, 0x73, 0xbf, 0xfd, 0xf7, 0x03, 0xbf, 0x46, 0x9e, 0x13, 0x3e, + 0xc4, 0x5c, 0xd1, 0x3e, 0x11, 0x16, 0x7f, 0xbf, 0x91, 0xb7, 0x07, 0x3f, 0x60, 0x2e, 0x17, 0xbf, + 0x5e, 0xb3, 0x51, 0x3f, 0xed, 0x59, 0x99, 0x3e, 0xa0, 0xcf, 0x84, 0xbe, 0xf3, 0x67, 0xbf, 0xbe, + 0xa3, 0xc9, 0x29, 0x3f, 0x2a, 0x2b, 0x15, 0x3e, 0x19, 0xad, 0xf4, 0x3c, 0x85, 0x5d, 0xed, 0xbe, + 0x73, 0xcc, 0x7a, 0xbf, 0x91, 0xcd, 0x15, 0x3f, 0x3e, 0xe7, 0x5c, 0x3d, 0xeb, 0x01, 0x3d, 0xbf, + 0x9f, 0x56, 0x1e, 0x3f, 0xff, 0x0e, 0x62, 0x3f, 0xe6, 0x8b, 0x5c, 0xbf, 0x15, 0x5a, 0xe4, 0x3e, + 0x9d, 0xa8, 0x69, 0x3f, 0xfe, 0xa7, 0x1a, 0xbf, 0x64, 0xc2, 0x3d, 0x3e, 0xc4, 0x6e, 0x78, 0x3e, + 0xe9, 0x96, 0x4a, 0xbe, 0xe9, 0x60, 0x54, 0x3e, 0xfc, 0x5b, 0xb1, 0xbe, 0x23, 0x31, 0xda, 0x3e, + 0x84, 0x40, 0x5f, 0x3f, 0x21, 0x0e, 0x1d, 0x3f, 0xf7, 0x47, 0x0f, 0xbf, 0x64, 0x5b, 0xcb, 0xbb, + 0x29, 0x76, 0x44, 0xbf, 0xbc, 0x63, 0x15, 0x3e, 0xcf, 0x03, 0xfd, 0x3d, 0xd4, 0xdb, 0x3a, 0x3f, + 0x28, 0x29, 0x14, 0x3f, 0x34, 0x6f, 0x25, 0xbf, 0xc5, 0xab, 0x6c, 0x3f, 0x13, 0xd3, 0xf6, 0x3e, + 0x7d, 0x2c, 0x21, 0x3f, 0xca, 0x50, 0x30, 0xbd, 0x81, 0x4b, 0x3d, 0xbd, 0xdf, 0xa0, 0x34, 0x3f, + 0xf5, 0xfb, 0xa8, 0x3e, 0xc8, 0xec, 0x98, 0x3e, 0x29, 0x1b, 0x3f, 0xbe, 0x0d, 0x96, 0x1c, 0x3f, + 0x35, 0xf6, 0x3c, 0x3f, 0x02, 0x45, 0xef, 0xbe, 0x93, 0x3d, 0x47, 0xbf, 0x63, 0x99, 0x65, 0xbf, + 0xcd, 0xbc, 0x1f, 0xbf, 0xb0, 0x59, 0x18, 0xbf, 0x1a, 0x16, 0x55, 0xbf, 0xf5, 0x8e, 0x54, 0x3f, + 0xf1, 0x41, 0x0c, 0xbf, 0xff, 0xd4, 0x0c, 0x3f, 0x72, 0xed, 0x15, 0x3e, 0x8b, 0x2e, 0x6f, 0xbe, + 0xce, 0x46, 0x5d, 0xbf, 0xcd, 0xa9, 0x7d, 0xbe, 0x76, 0x06, 0x5b, 0xbf, 0xad, 0xce, 0x74, 0x3d, + 0x4c, 0x40, 0x4d, 0xbf, 0xd3, 0xdc, 0xc2, 0x3d, 0x41, 0x80, 0x56, 0x3f, 0x18, 0x2f, 0x46, 0x3f, + 0xdd, 0x44, 0xc1, 0xbe, 0xa7, 0xa5, 0x88, 0xbe, 0x6d, 0x52, 0x46, 0xbe, 0xc5, 0x68, 0x22, 0xbf, + 0x72, 0x67, 0x80, 0x3d, 0xa3, 0xab, 0x85, 0x3d, 0xcf, 0x99, 0x33, 0x3f, 0x19, 0x7a, 0x08, 0x3f, + 0x3a, 0xed, 0x9d, 0x3d, 0x43, 0x56, 0xca, 0x3d, 0x5d, 0x59, 0x66, 0xbf, 0x2f, 0xfc, 0x52, 0xbf, + 0x8d, 0xc9, 0x12, 0xbf, 0x61, 0x31, 0xbb, 0xbe, 0x12, 0x67, 0x75, 0x3f, 0x5a, 0xe5, 0xae, 0xbe, + 0xcd, 0xe4, 0x4b, 0xbe, 0x3c, 0x5b, 0x43, 0x3f, 0xa8, 0x1e, 0xda, 0xbe}; +unsigned char fc_fp32_weight_ref[] = { + 0xe2, 0x3f, 0x13, 0x3f, 0x47, 0xac, 0xec, 0x3e, 0x93, 0x62, 0xaa, 0xbd, 0x4a, 0x2e, 0x10, 0xbf, + 0x30, 0xbe, 0x43, 0xbf, 0x33, 0xa9, 0x20, 0xbf, 0xf7, 0x53, 0x58, 0xbf, 0x5c, 0x54, 0xa1, 0x3e, + 0x50, 0x9f, 0x0e, 0xbe, 0xde, 0x3c, 0x06, 0x3f, 0x9b, 0x1a, 0xd3, 0xbd, 0xc8, 0xec, 0x7e, 0x3f, + 0xba, 0xf7, 0xaf, 0x3d, 0x6f, 0x6a, 0x34, 0x3e, 0xdd, 0x55, 0x73, 0x3f, 0x95, 0x7a, 0x04, 0xbf, + 0xfe, 0x04, 0x7c, 0xbf, 0xc0, 0x68, 0x74, 0x3f, 0x6f, 0xa1, 0x55, 0x3e, 0xb4, 0x0f, 0x4c, 0x3f, + 0x30, 0xa9, 0x46, 0x3f, 0x29, 0x78, 0x9d, 0xbe, 0x8f, 0xcc, 0x79, 0x3e, 0xb9, 0x8b, 0x36, 0x3f, + 0xde, 0xcd, 0x5b, 0xbf, 0x82, 0x8b, 0x06, 0xbf, 0xda, 0xfb, 0x27, 0x3f, 0xf7, 0x04, 0x37, 0x3f, + 0x66, 0x66, 0xe8, 0xbe, 0xab, 0x0d, 0xcc, 0x3e, 0xba, 0x81, 0x44, 0x3f, 0xa5, 0x17, 0x98, 0xbe, + 0xee, 0x6e, 0xf1, 0x3d, 0xee, 0x13, 0x1f, 0xbe, 0x62, 0x1c, 0x6b, 0xbe, 0x89, 0x92, 0x44, 0xbf, + 0x5b, 0xf6, 0x99, 0x3e, 0x7f, 0x8b, 0x24, 0x3f, 0x97, 0x1d, 0x0c, 0xbf, 0x7a, 0x96, 0x21, 0xbd, + 0x97, 0x2f, 0x3c, 0x3f, 0xf1, 0x32, 0x78, 0xbc, 0xaa, 0x51, 0x9a, 0xbd, 0x88, 0x0a, 0x5a, 0xbf, + 0x17, 0x00, 0x39, 0x3e, 0xb7, 0x0f, 0x34, 0x3f, 0xbe, 0xe7, 0xcf, 0x3b, 0xa9, 0xc1, 0x14, 0x3f, + 0x4d, 0x16, 0x01, 0x3f, 0x16, 0x74, 0x85, 0x3e, 0xd9, 0x05, 0xd8, 0x3e, 0x46, 0x39, 0x1f, 0x3f, + 0xdc, 0x2a, 0x20, 0xbe, 0x34, 0xf7, 0xbe, 0x3e, 0xf3, 0x4e, 0xfe, 0xbc, 0xad, 0x0c, 0x79, 0x3e, + 0x43, 0x52, 0x78, 0xbd, 0x5b, 0xed, 0x95, 0xbe, 0xb0, 0x63, 0x6e, 0xbf, 0x29, 0x28, 0x77, 0x3f, + 0xe6, 0x9d, 0xa3, 0x3e, 0x2c, 0x29, 0xa5, 0x3e, 0xbb, 0x9f, 0x83, 0xbe, 0xc8, 0x87, 0x41, 0x3e, + 0x76, 0x38, 0xef, 0x3e, 0xf9, 0x8f, 0xed, 0xbe, 0xfc, 0x11, 0xa8, 0x3c, 0xa5, 0x27, 0xc1, 0x3d, + 0x75, 0xe8, 0xbe, 0x3d, 0xa6, 0x1a, 0x04, 0x3f, 0x91, 0x14, 0x05, 0xbf, 0x5d, 0xb7, 0x5c, 0x3f, + 0x4a, 0xa8, 0x20, 0xbf, 0x60, 0x61, 0xa1, 0xbe, 0x8a, 0x65, 0x0e, 0x3e, 0x41, 0x89, 0x30, 0xbf, + 0x3c, 0x66, 0x90, 0xbe, 0xd2, 0x2a, 0x20, 0xbd, 0x3b, 0xe8, 0x68, 0x3f, 0xa2, 0xd4, 0xc6, 0x3e, + 0x18, 0x9f, 0x5f, 0x3f, 0x1b, 0x5e, 0x9b, 0xbc, 0x1e, 0x35, 0x83, 0xbe, 0x49, 0x78, 0x17, 0x3e, + 0x90, 0x97, 0x12, 0x3f, 0xad, 0x33, 0x60, 0xbf, 0x6f, 0x1e, 0x66, 0x3e, 0x47, 0xb2, 0xd1, 0xbe, + 0x75, 0x4f, 0xc6, 0xbe, 0x40, 0x59, 0x00, 0x3f, 0x7b, 0xb0, 0xdb, 0x3e, 0x04, 0xac, 0x4f, 0xbf, + 0x35, 0xf6, 0x9c, 0xbe, 0x8c, 0x10, 0x4d, 0xbf, 0x00, 0x07, 0x48, 0x3e, 0x2f, 0xd6, 0x08, 0xbf, + 0x5e, 0xe3, 0x24, 0x3f, 0x64, 0x48, 0xc6, 0x3e, 0xcc, 0xd0, 0x3f, 0xbf, 0x83, 0x04, 0x62, 0xbf, + 0xd3, 0x47, 0x3e, 0xbf, 0x77, 0xc8, 0x6e, 0xbf, 0x16, 0x45, 0x65, 0x3f, 0x5f, 0x3b, 0x3c, 0xbf, + 0xa6, 0x57, 0xe9, 0xbe, 0x1f, 0x57, 0x19, 0xbf, 0xc5, 0xb3, 0x45, 0x3f, 0xae, 0x5a, 0x94, 0xbe, + 0x40, 0xea, 0x4b, 0xbf, 0x45, 0xb6, 0x07, 0xbf, 0x84, 0x09, 0x9a, 0x3e, 0xaf, 0xbc, 0xd1, 0xbe, + 0x69, 0xfa, 0x56, 0xbf, 0x74, 0x1f, 0x26, 0x3f, 0x74, 0xc7, 0x29, 0x3f, 0xad, 0x75, 0x04, 0x3e, + 0xe9, 0xb1, 0xbd, 0x3e, 0x68, 0x87, 0xeb, 0x3e, 0x17, 0x99, 0x52, 0xbf, 0xe0, 0xfb, 0x38, 0x3f, + 0xe8, 0x2f, 0xe7, 0xbe, 0xa7, 0x74, 0x08, 0xbe, 0x7c, 0x5a, 0x08, 0x3f, 0xa7, 0x46, 0x19, 0x3e, + 0x7a, 0xb4, 0xaf, 0x3e, 0x54, 0xc1, 0x86, 0xbe, 0x2a, 0xaf, 0x85, 0xbe, 0xc2, 0xc9, 0x42, 0xbf, + 0x8e, 0x3b, 0x78, 0xbf, 0x82, 0x10, 0x64, 0x3f, 0x70, 0x6a, 0x5a, 0xbf, 0xc8, 0xf0, 0x7a, 0xbd, + 0xca, 0xa5, 0x72, 0xbf, 0x8c, 0x19, 0x6b, 0x3f, 0x6f, 0xba, 0x02, 0xbe, 0x13, 0xe3, 0xec, 0x3e, + 0xb0, 0xd8, 0x41, 0xbf, 0xb5, 0x6a, 0x91, 0x3e, 0x8c, 0xc1, 0x74, 0xbf, 0x2d, 0x25, 0x42, 0xbe, + 0x7b, 0x97, 0x10, 0xbf, 0x49, 0xa8, 0x3f, 0x3e, 0xbc, 0x1b, 0x56, 0x3f, 0x69, 0x5a, 0x26, 0x3e, + 0x3c, 0x8a, 0x4b, 0x3e, 0xe9, 0x8b, 0x78, 0x3f, 0x3e, 0x91, 0x68, 0xbd, 0x17, 0x77, 0x73, 0x3e, + 0x1b, 0xe1, 0xbe, 0x3e, 0x9e, 0x59, 0xcc, 0x3d, 0xd0, 0xc5, 0x3b, 0xbf, 0x52, 0x0d, 0x20, 0x3f, + 0x89, 0x51, 0xc5, 0xbe, 0x89, 0x35, 0x48, 0x3f, 0x46, 0x76, 0xad, 0xbe, 0xed, 0x8a, 0x56, 0x3f, + 0xd1, 0x2a, 0xdf, 0xbe, 0x3f, 0x3f, 0x76, 0xbd, 0x28, 0xa8, 0x2f, 0xbf, 0x44, 0xd3, 0x3c, 0x3d, + 0xb8, 0xa1, 0xb6, 0x3e, 0xcf, 0xe4, 0x56, 0xbd, 0x65, 0x10, 0xd2, 0xbe, 0x5d, 0x65, 0xec, 0xbe, + 0x7d, 0xed, 0x21, 0x3e, 0x4d, 0x35, 0x4f, 0xbf, 0x3d, 0x0a, 0x1f, 0x3f, 0xe7, 0xc3, 0x35, 0xbf, + 0xe9, 0x0b, 0x50, 0xbf, 0xa7, 0x48, 0x55, 0x3f, 0xf2, 0x72, 0x72, 0xbe, 0xe1, 0x63, 0x54, 0x3f, + 0x03, 0x67, 0x7f, 0xbf, 0x55, 0xc9, 0x6c, 0xbf, 0xae, 0x3f, 0x3f, 0xbe, 0x86, 0xee, 0x71, 0xbf, + 0x69, 0x9a, 0x41, 0xbf, 0x0c, 0x2b, 0x0d, 0xbf, 0x60, 0x08, 0x1d, 0x3f, 0xf9, 0x38, 0x51, 0x3f, + 0xa2, 0xc2, 0x1d, 0x3f, 0xa3, 0x9f, 0x1d, 0x3f, 0x1e, 0x74, 0x0c, 0x3f, 0x49, 0x8b, 0x43, 0xbf, + 0x10, 0x9c, 0x76, 0xbf, 0xa8, 0xfa, 0xea, 0xbe, 0x91, 0x46, 0x56, 0xbf, 0xa7, 0xc0, 0x35, 0xbf, + 0xae, 0x98, 0x64, 0xbd, 0xd0, 0x4a, 0x93, 0xbe, 0x9a, 0xbc, 0x36, 0xbf, 0x56, 0x6e, 0x30, 0xbf, + 0xa0, 0xe9, 0x95, 0x3c, 0x15, 0x83, 0xc8, 0x3e, 0xee, 0x49, 0x2a, 0xbf, 0xae, 0xb0, 0x6c, 0xbf, + 0xd9, 0x46, 0x54, 0xbf, 0x80, 0x13, 0x82, 0x3e, 0x03, 0xf7, 0x24, 0xbf, 0xed, 0xd4, 0x77, 0x3f, + 0x59, 0xfa, 0x79, 0xbf, 0x27, 0x0b, 0xa2, 0x3c, 0x13, 0x31, 0x16, 0xbf, 0x7d, 0x41, 0x18, 0x3f, + 0x91, 0x8c, 0x31, 0xbf, 0xca, 0xea, 0xd9, 0x3e, 0x56, 0x61, 0x46, 0x3e, 0x50, 0xbd, 0x67, 0xbe, + 0x8b, 0xe3, 0x08, 0x3f, 0x9f, 0x9c, 0x11, 0x3f, 0x7e, 0xa3, 0x5e, 0x3e, 0x17, 0x93, 0x66, 0x3f, + 0xbb, 0xfe, 0xa3, 0xbe, 0xea, 0x64, 0x18, 0xbf, 0x19, 0xbc, 0x74, 0x3f, 0xf2, 0xc1, 0xf4, 0x3e, + 0x82, 0xf1, 0x0b, 0xbf, 0x02, 0xbb, 0x67, 0xbd, 0x9b, 0x06, 0x63, 0xbf, 0x9a, 0xfd, 0x55, 0x3f, + 0x84, 0xfd, 0x7d, 0xbf, 0x64, 0x1f, 0x4a, 0x3f, 0xac, 0xb7, 0xf3, 0x3c, 0x40, 0x4c, 0x0f, 0x3f, + 0x8f, 0xfe, 0x88, 0xbe, 0xdc, 0x71, 0xb0, 0x3e, 0xf3, 0x7c, 0x7d, 0xbf, 0xfc, 0xe3, 0xca, 0x3e, + 0x93, 0x97, 0x2e, 0x3f, 0x1f, 0x8f, 0x4a, 0x3f, 0xa2, 0xce, 0xe0, 0xbb, 0x76, 0xa6, 0x65, 0x3f, + 0xf6, 0xc1, 0x14, 0x3f, 0x7e, 0xca, 0xd8, 0xbe, 0x6e, 0x97, 0x90, 0xbe, 0x6c, 0x24, 0x32, 0xbf, + 0x7a, 0x40, 0x6d, 0xbe, 0xd4, 0xdb, 0xa0, 0xbe, 0x81, 0x41, 0x56, 0x3f, 0xf4, 0xb9, 0xb7, 0x3e, + 0xaa, 0xfa, 0x18, 0x3f, 0x35, 0x5c, 0x03, 0x3f, 0x28, 0xf0, 0xad, 0x3e, 0xc1, 0x27, 0x4b, 0xbd, + 0x02, 0xd5, 0xc1, 0x3e, 0x18, 0xd6, 0x46, 0x3f, 0x36, 0x79, 0x07, 0xbe, 0x87, 0x8a, 0x4c, 0x3f, + 0x53, 0x88, 0x34, 0xbf, 0x54, 0x5b, 0x77, 0xbf, 0x07, 0x69, 0x8c, 0xbd, 0xab, 0xdb, 0xd2, 0xbd, + 0xa4, 0x07, 0x70, 0x3f, 0xe3, 0x73, 0x42, 0x3f, 0x80, 0xd0, 0x70, 0x3e, 0xef, 0xd8, 0x36, 0xbe, + 0x68, 0x04, 0xf1, 0x3d, 0x79, 0x4e, 0x2b, 0xbf, 0x4b, 0x2f, 0xba, 0x3e, 0x52, 0x29, 0x33, 0x3e, + 0x82, 0x20, 0x65, 0x3f, 0xec, 0xe9, 0x43, 0xbe, 0x31, 0x65, 0x53, 0xbf, 0xae, 0x98, 0x12, 0xbf, + 0x4a, 0x4a, 0x93, 0xbe, 0x04, 0x7f, 0x60, 0x3e, 0x94, 0xbc, 0x25, 0xbe, 0xa7, 0xe7, 0x30, 0xbf, + 0x4a, 0xaf, 0xaa, 0x3e, 0x4f, 0xce, 0xa9, 0xbe, 0xf4, 0xee, 0x8f, 0xbe, 0x49, 0x30, 0x40, 0xbf, + 0xe1, 0x73, 0x36, 0x3e, 0x2e, 0x18, 0x5b, 0xbd, 0xd2, 0x33, 0x2b, 0xbf, 0xe5, 0xfd, 0x70, 0x3e, + 0xa9, 0xa3, 0x16, 0x3f, 0x75, 0xe5, 0xdc, 0x3e, 0x43, 0x84, 0x6d, 0x3f, 0x4d, 0x4c, 0x8d, 0xbe, + 0xd4, 0xaf, 0xab, 0xbe, 0xc3, 0xec, 0xc9, 0x3e, 0xc7, 0xff, 0xbc, 0x3e, 0xbb, 0xa5, 0x73, 0xbe, + 0x2c, 0xb5, 0xa6, 0x3d, 0xc5, 0xb7, 0x75, 0xbd, 0xed, 0x17, 0xd5, 0xbe, 0xb4, 0x9f, 0x65, 0xbe, + 0x43, 0xe4, 0x62, 0xbf, 0x69, 0x9a, 0x0d, 0xbf, 0xfa, 0x52, 0x21, 0x3c, 0xba, 0x11, 0xfc, 0xbe, + 0x78, 0x36, 0x29, 0x3f, 0x62, 0xa7, 0xfd, 0xbe, 0x78, 0xe7, 0xfa, 0xbe, 0x36, 0xe6, 0x16, 0xbf, + 0x82, 0x3f, 0x65, 0xbf, 0x09, 0x3d, 0x4d, 0x3f, 0xe1, 0x7a, 0xd9, 0xbd, 0xe8, 0x79, 0xd3, 0x3e, + 0xac, 0xa9, 0x44, 0xbf, 0xf3, 0x0b, 0x09, 0xbf, 0xba, 0xb3, 0x6c, 0xbf, 0x54, 0xdf, 0x0b, 0xbf, + 0x8f, 0xde, 0x72, 0x3e, 0x71, 0xac, 0xdb, 0x3e, 0x05, 0xcd, 0x76, 0x3f, 0x7d, 0x02, 0xdd, 0x3e, + 0x2a, 0x0e, 0x50, 0x3d, 0xb9, 0x3f, 0x41, 0xbf, 0x08, 0x8a, 0x77, 0xbd, 0x05, 0x5c, 0xce, 0x3e, + 0xcf, 0xc2, 0x54, 0xbf, 0xbe, 0xe7, 0xb0, 0xbc, 0xc6, 0x81, 0x0d, 0x3f, 0x0e, 0x7c, 0xad, 0xbe, + 0x26, 0xb7, 0x1d, 0xbe, 0x14, 0x8e, 0x93, 0x3e, 0x8f, 0xfa, 0x56, 0x3e, 0x70, 0x46, 0x06, 0x3e, + 0xcd, 0x8d, 0x4b, 0xbf, 0x53, 0xda, 0x13, 0x3f, 0x85, 0x59, 0x32, 0xbf, 0x97, 0x4d, 0x3a, 0x3e, + 0x29, 0x5c, 0x3b, 0xbf, 0xea, 0x80, 0x58, 0xbd, 0x28, 0xa6, 0x7e, 0xbf, 0xc4, 0x94, 0x77, 0x3e, + 0x1e, 0x31, 0x64, 0xbf, 0xff, 0x83, 0x11, 0x3f, 0x30, 0x85, 0x57, 0xbf, 0xe4, 0xb3, 0x86, 0x3d, + 0x37, 0x37, 0x0f, 0x3f, 0x91, 0x85, 0x6c, 0x3e, 0x92, 0xe9, 0x6d, 0xbf, 0x75, 0x57, 0x66, 0x3f, + 0x7d, 0xe6, 0x56, 0x3f, 0x5a, 0x47, 0xfd, 0x3c, 0x4d, 0x5e, 0x78, 0x3e, 0x14, 0x11, 0x72, 0x3e, + 0xe0, 0xc5, 0xc7, 0xbe, 0x1d, 0xd5, 0x3d, 0x3e, 0xe0, 0x54, 0xa5, 0xbe, 0x5b, 0x51, 0xf3, 0x3e, + 0x44, 0x34, 0x3e, 0xbd, 0xdd, 0xa4, 0x72, 0x3f, 0x73, 0xc2, 0x80, 0x3e, 0x5b, 0x8b, 0x71, 0xbe, + 0xa2, 0xdb, 0xde, 0xbe, 0x2d, 0x90, 0xe5, 0x3e, 0x9e, 0x59, 0x9f, 0x3e, 0x46, 0x8c, 0x0e, 0xbf, + 0xe4, 0xe7, 0x82, 0xbe, 0x81, 0xbb, 0x91, 0xbd, 0xfc, 0x27, 0x76, 0xbf, 0xfe, 0x14, 0x7f, 0xbf, + 0x67, 0x6d, 0x72, 0x3f, 0x85, 0xd9, 0x73, 0xbf, 0xe5, 0x33, 0x21, 0x3f, 0x16, 0x9c, 0x89, 0xbe, + 0x3b, 0xb4, 0xff, 0xbc, 0xcb, 0xb4, 0x7d, 0x3f, 0xdf, 0x2d, 0x24, 0xbf, 0x74, 0x79, 0xb5, 0xbd, + 0x7b, 0x75, 0x0c, 0x3f, 0x35, 0x8e, 0xa7, 0xbe, 0x71, 0xfa, 0x6c, 0xbf, 0xd4, 0xd7, 0x20, 0x3f, + 0x73, 0xca, 0x7b, 0x3e, 0x57, 0x00, 0x4b, 0x3f, 0xdb, 0x02, 0x73, 0x3f, 0x8f, 0x91, 0x37, 0xbf, + 0x91, 0x60, 0x85, 0xbe, 0x76, 0x3b, 0x7f, 0x3f, 0x41, 0x03, 0xbb, 0x3e, 0x0a, 0xc2, 0xee, 0xbe, + 0x8e, 0xbb, 0xa3, 0xbe, 0xd6, 0x20, 0x0c, 0xbe, 0x5b, 0x25, 0x47, 0xbf, 0xf9, 0x75, 0xcd, 0x3b, + 0x17, 0x32, 0x09, 0xbe, 0x67, 0x3d, 0xc8, 0x3e, 0xa8, 0xeb, 0x3d, 0xbd, 0x31, 0x67, 0x44, 0xbf, + 0xaa, 0x9e, 0xf1, 0xbd, 0x03, 0x56, 0x7b, 0x3f, 0xc2, 0x50, 0x6c, 0x3f, 0x16, 0x98, 0x33, 0x3f, + 0x43, 0xa8, 0x64, 0x3f, 0x27, 0x34, 0xf1, 0xbe, 0x24, 0x1d, 0xa9, 0x3e, 0x7f, 0xa2, 0x39, 0xbf, + 0x04, 0x4e, 0x54, 0xbf, 0xbe, 0x2e, 0xc5, 0x3d, 0xd2, 0x7a, 0x57, 0xbf, 0xb1, 0x44, 0x03, 0x3f, + 0x73, 0xde, 0x01, 0x3f, 0xaa, 0xf2, 0x3d, 0x3f, 0x52, 0x6e, 0xab, 0xbd, 0x7c, 0x45, 0x62, 0xbc, + 0x58, 0xd5, 0x75, 0xbf, 0x04, 0xb7, 0x11, 0xbf, 0x73, 0xd1, 0xf8, 0xbc, 0xae, 0x0b, 0x20, 0x3d, + 0x78, 0xd7, 0x6a, 0xbf, 0x45, 0x4c, 0x62, 0xbf, 0x70, 0xad, 0xf3, 0x3e, 0xbd, 0x77, 0x38, 0xbf, + 0x85, 0x92, 0x28, 0xbe, 0xcf, 0x8d, 0x1c, 0xbf, 0x91, 0xb7, 0x07, 0x3f, 0x15, 0x5a, 0xe4, 0x3e, + 0x3f, 0x8a, 0xf1, 0xbe, 0x24, 0xf2, 0x56, 0x3e, 0x60, 0x2e, 0x17, 0xbf, 0x9d, 0xa8, 0x69, 0x3f, + 0x71, 0xaa, 0xcc, 0xbe, 0xc0, 0xc6, 0x17, 0x3f, 0x5e, 0xb3, 0x51, 0x3f, 0xfe, 0xa7, 0x1a, 0xbf, + 0x1e, 0xcc, 0xe5, 0x3e, 0x11, 0xf0, 0xbc, 0x3e, 0xed, 0x59, 0x99, 0x3e, 0x64, 0xc2, 0x3d, 0x3e, + 0xe2, 0x97, 0x69, 0xbf, 0xbb, 0x5c, 0x06, 0x3f, 0xa0, 0xcf, 0x84, 0xbe, 0xc4, 0x6e, 0x78, 0x3e, + 0x78, 0xfd, 0xd4, 0x3e, 0xac, 0x1f, 0x8f, 0xbe, 0xf3, 0x67, 0xbf, 0xbe, 0xe9, 0x96, 0x4a, 0xbe, + 0x88, 0x7d, 0x34, 0xbe, 0x53, 0x62, 0x34, 0xbf, 0xa3, 0xc9, 0x29, 0x3f, 0xe9, 0x60, 0x54, 0x3e, + 0xd8, 0x90, 0x10, 0x3d, 0x46, 0x0c, 0x55, 0xbf, 0x2a, 0x2b, 0x15, 0x3e, 0xfc, 0x5b, 0xb1, 0xbe, + 0x64, 0xb8, 0x2b, 0xbf, 0x70, 0xbd, 0xaa, 0xbd, 0x19, 0xad, 0xf4, 0x3c, 0x23, 0x31, 0xda, 0x3e, + 0x0d, 0x08, 0x80, 0x3e, 0x14, 0xcb, 0xd3, 0xbe, 0x85, 0x5d, 0xed, 0xbe, 0x84, 0x40, 0x5f, 0x3f, + 0x2d, 0xd1, 0xef, 0x3e, 0x05, 0x1a, 0x30, 0x3f, 0x73, 0xcc, 0x7a, 0xbf, 0x21, 0x0e, 0x1d, 0x3f, + 0xfa, 0x76, 0x2b, 0xbf, 0x5a, 0x8b, 0x09, 0x3f, 0x91, 0xcd, 0x15, 0x3f, 0xf7, 0x47, 0x0f, 0xbf, + 0xcb, 0xf8, 0x1b, 0xbf, 0xcf, 0x0b, 0x73, 0xbf, 0x3e, 0xe7, 0x5c, 0x3d, 0x64, 0x5b, 0xcb, 0xbb, + 0xf2, 0x61, 0x2d, 0x3f, 0xfd, 0xf7, 0x03, 0xbf, 0xeb, 0x01, 0x3d, 0xbf, 0x29, 0x76, 0x44, 0xbf, + 0x91, 0x51, 0x76, 0xbf, 0x46, 0x9e, 0x13, 0x3e, 0x9f, 0x56, 0x1e, 0x3f, 0xbc, 0x63, 0x15, 0x3e, + 0xed, 0xe3, 0x56, 0xbf, 0xc4, 0x5c, 0xd1, 0x3e, 0xff, 0x0e, 0x62, 0x3f, 0xcf, 0x03, 0xfd, 0x3d, + 0x78, 0x49, 0x74, 0x3f, 0x11, 0x16, 0x7f, 0xbf, 0xe6, 0x8b, 0x5c, 0xbf, 0xd4, 0xdb, 0x3a, 0x3f, + 0x28, 0x29, 0x14, 0x3f, 0x34, 0x6f, 0x25, 0xbf, 0xc5, 0xab, 0x6c, 0x3f, 0x13, 0xd3, 0xf6, 0x3e, + 0x7d, 0x2c, 0x21, 0x3f, 0xca, 0x50, 0x30, 0xbd, 0x81, 0x4b, 0x3d, 0xbd, 0xdf, 0xa0, 0x34, 0x3f, + 0xf5, 0xfb, 0xa8, 0x3e, 0xc8, 0xec, 0x98, 0x3e, 0x29, 0x1b, 0x3f, 0xbe, 0x0d, 0x96, 0x1c, 0x3f, + 0x35, 0xf6, 0x3c, 0x3f, 0x02, 0x45, 0xef, 0xbe, 0x93, 0x3d, 0x47, 0xbf, 0x63, 0x99, 0x65, 0xbf, + 0xcd, 0xbc, 0x1f, 0xbf, 0xb0, 0x59, 0x18, 0xbf, 0x1a, 0x16, 0x55, 0xbf, 0xf5, 0x8e, 0x54, 0x3f, + 0xf1, 0x41, 0x0c, 0xbf, 0xff, 0xd4, 0x0c, 0x3f, 0x72, 0xed, 0x15, 0x3e, 0x8b, 0x2e, 0x6f, 0xbe, + 0xce, 0x46, 0x5d, 0xbf, 0xcd, 0xa9, 0x7d, 0xbe, 0x76, 0x06, 0x5b, 0xbf, 0xad, 0xce, 0x74, 0x3d, + 0x4c, 0x40, 0x4d, 0xbf, 0xd3, 0xdc, 0xc2, 0x3d, 0x41, 0x80, 0x56, 0x3f, 0x18, 0x2f, 0x46, 0x3f, + 0xdd, 0x44, 0xc1, 0xbe, 0xa7, 0xa5, 0x88, 0xbe, 0x6d, 0x52, 0x46, 0xbe, 0xc5, 0x68, 0x22, 0xbf, + 0x72, 0x67, 0x80, 0x3d, 0xa3, 0xab, 0x85, 0x3d, 0xcf, 0x99, 0x33, 0x3f, 0x19, 0x7a, 0x08, 0x3f, + 0x3a, 0xed, 0x9d, 0x3d, 0x43, 0x56, 0xca, 0x3d, 0x5d, 0x59, 0x66, 0xbf, 0x2f, 0xfc, 0x52, 0xbf, + 0x8d, 0xc9, 0x12, 0xbf, 0x61, 0x31, 0xbb, 0xbe, 0x12, 0x67, 0x75, 0x3f, 0x5a, 0xe5, 0xae, 0xbe, + 0xcd, 0xe4, 0x4b, 0xbe, 0x3c, 0x5b, 0x43, 0x3f, 0xa8, 0x1e, 0xda, 0xbe}; +unsigned char fc_fp32_bias[] = { + 0x87, 0x61, 0xbb, 0x3f, 0xde, 0x60, 0xaa, 0x40, 0xe2, 0x91, 0xe9, 0x3e, 0xec, 0x1f, 0xed, 0x3d, + 0x98, 0x43, 0x7d, 0x40, 0x2a, 0x12, 0x40, 0x40, 0x55, 0x39, 0xa7, 0x40, 0x20, 0x4e, 0x24, 0xc0, + 0x15, 0x39, 0x5a, 0x3d, 0xf1, 0xee, 0x43, 0x40, 0x20, 0xb1, 0x5b, 0xbf, 0x18, 0xd2, 0x5a, 0x40, + 0x51, 0x69, 0xa3, 0x3f, 0x14, 0x60, 0x9d, 0x40, 0x83, 0x78, 0x42, 0x3f, 0xeb, 0x69, 0x74, 0x40, + 0xc8, 0xd0, 0x78, 0xc0, 0x51, 0x85, 0xbe, 0x3f, 0x12, 0x11, 0x83, 0x3f, 0x02, 0x11, 0x9a, 0x3f, + 0x5c, 0x14, 0xaf, 0x40, 0xf0, 0x07, 0x72, 0xc0, 0xb3, 0x58, 0x3c, 0x3f, 0x06, 0x92, 0xd8, 0xbf, + 0x53, 0xe9, 0x50, 0xbf, 0xf2, 0xc7, 0x23, 0xc0, 0xb5, 0x0a, 0xbe, 0x40, 0xfd, 0xb3, 0x8e, 0x40, + 0x45, 0x3d, 0x72, 0x40, 0x1b, 0xd7, 0x6b, 0xbf, 0x52, 0x3d, 0xae, 0x40}; +unsigned char fc_fp32_out[] = { + 0xef, 0x8c, 0xad, 0x40, 0xb1, 0x09, 0x55, 0x40, 0x55, 0x5a, 0xab, 0x3f, 0x41, 0xf0, 0xf7, 0xc0, + 0x8f, 0xa9, 0x97, 0x3e, 0x33, 0xd2, 0xbc, 0x40, 0x10, 0xad, 0x3d, 0x3f, 0x97, 0x11, 0x60, 0xbe, + 0x16, 0x85, 0x6b, 0xc0, 0xe6, 0x8a, 0x2a, 0xc0, 0xb3, 0x6d, 0x10, 0xc0, 0x1a, 0x89, 0x0f, 0x41, + 0x94, 0x47, 0x0b, 0x40, 0x3a, 0x5e, 0x12, 0x41, 0x19, 0xe7, 0x58, 0xc0, 0x61, 0x12, 0xfe, 0x3f, + 0xbc, 0xab, 0x49, 0x40, 0xb1, 0x83, 0x41, 0xbf, 0x54, 0xf0, 0x0f, 0x3f, 0xe9, 0x11, 0xbb, 0x40, + 0x2c, 0xbd, 0xed, 0xbf, 0xf8, 0x6d, 0x44, 0xc1, 0x57, 0x0b, 0xc8, 0xc0, 0xb3, 0x75, 0x2c, 0xc0, + 0x0d, 0xed, 0xc4, 0x40, 0xde, 0x1c, 0x46, 0xc0, 0x1c, 0x8e, 0x9c, 0x40, 0x49, 0x4e, 0x46, 0x41, + 0xd4, 0xe5, 0x0a, 0xc0, 0x25, 0x3e, 0x17, 0xc1, 0x9c, 0xe4, 0x3b, 0x40}; + +unsigned char fc_fp16_in[] = {0x89, 0x39, 0x2e, 0xb8, 0xd1, 0xbf, 0xa5, 0x40, 0xc1, + 0xbe, 0x6d, 0xbd, 0x80, 0x3c, 0x5e, 0xba, 0x8a, 0xbe, + 0xf2, 0x40, 0xb5, 0x3d, 0xb6, 0xbc, 0xef, 0xbf, 0xcf, + 0xbe, 0x25, 0xbd, 0x9f, 0x41, 0x85, 0x40}; +unsigned char fc_fp16_weight[] = { + 0x99, 0x38, 0x1d, 0xba, 0x74, 0xb0, 0x7f, 0x2d, 0xe0, 0xbb, 0x35, 0x3a, 0xde, 0xba, 0x43, 0xb7, + 0x8b, 0x2f, 0xcf, 0x34, 0xe1, 0x39, 0xc8, 0x31, 0x08, 0x38, 0x01, 0xb1, 0xc2, 0xab, 0x1c, 0x35, + 0x79, 0x37, 0x65, 0x37, 0x05, 0xb9, 0x31, 0x38, 0xa3, 0x31, 0xa3, 0x3b, 0xeb, 0xb4, 0x34, 0xb8, + 0x60, 0x36, 0xf8, 0xb0, 0x24, 0x39, 0xc1, 0xa3, 0xa0, 0x39, 0x2b, 0x34, 0xf7, 0x35, 0xaf, 0xb4, + 0x29, 0x35, 0x6c, 0xb7, 0x53, 0xad, 0xc2, 0xba, 0x98, 0xae, 0x9a, 0x3b, 0xad, 0x32, 0xce, 0x33, + 0x3f, 0x39, 0x24, 0x3a, 0x58, 0xb3, 0x60, 0xb8, 0xd2, 0xac, 0x7f, 0x1e, 0xc0, 0x36, 0xf2, 0xa7, + 0x73, 0xbb, 0x1c, 0xb4, 0x40, 0x25, 0x81, 0xb8, 0x0a, 0x35, 0xf7, 0x3b, 0x23, 0xb8, 0x60, 0x3a, + 0xb4, 0x39, 0xb8, 0x39, 0xc0, 0xb4, 0x24, 0xba, 0x0c, 0xa9, 0xd0, 0xba, 0xa6, 0x38, 0xf9, 0x38, + 0xc8, 0x33, 0xb9, 0x3b, 0x0c, 0x32, 0x09, 0x2e, 0xf7, 0x2d, 0x05, 0xb9, 0x83, 0xb4, 0xfc, 0x3a, + 0x94, 0x38, 0x32, 0xb6, 0xe7, 0xb4, 0x27, 0x39, 0xf2, 0xb9, 0x4a, 0xb7, 0x5f, 0xba, 0xb7, 0xba, + 0xed, 0x35, 0x39, 0xb7, 0x7d, 0x35, 0xc1, 0xbb, 0x95, 0xbb, 0x20, 0x38, 0x0b, 0xb5, 0x01, 0xa9, + 0xda, 0xa4, 0x01, 0xbb, 0x02, 0x38, 0x68, 0xba, 0x32, 0x36, 0x76, 0xbb, 0xca, 0xb8, 0x3d, 0xb8, + 0x30, 0x39, 0x5c, 0x37, 0x43, 0xb0, 0x36, 0xb4, 0x20, 0x3b, 0x58, 0x3b, 0x28, 0xb8, 0x73, 0x30, + 0x47, 0x3b, 0x19, 0xb4, 0x30, 0x33, 0xdd, 0x36, 0x40, 0x32, 0xfe, 0xb9, 0x2a, 0x3b, 0x2d, 0x3a, + 0xd0, 0x34, 0x4e, 0x39, 0x94, 0xba, 0x42, 0x38, 0x2d, 0xb4, 0xd3, 0xba, 0x15, 0xb0, 0xe5, 0x3a, + 0x84, 0xb9, 0x36, 0x36, 0xbb, 0x30, 0x8d, 0xb6, 0x7d, 0xba, 0x46, 0xb8, 0x10, 0xbb, 0xe1, 0xb9, + 0xa2, 0xb4, 0x8d, 0xb6, 0x23, 0x30, 0xc7, 0x39, 0xca, 0x30, 0x16, 0xba, 0xd7, 0xab, 0x67, 0x37, + 0x0e, 0xba, 0x84, 0xb8, 0x5c, 0x32, 0xf7, 0x35, 0x2a, 0xb6, 0xf9, 0xb6, 0xb5, 0x35, 0x0f, 0x31, + 0x80, 0xba, 0xfb, 0xbb, 0x0c, 0xba, 0xee, 0x38, 0xb4, 0xbb, 0x24, 0xab, 0xaf, 0x24, 0xa2, 0xba, + 0xcf, 0xbb, 0x8b, 0x34, 0xfd, 0x31, 0xc4, 0x3b, 0x62, 0x2e, 0x41, 0x3a, 0xb1, 0xab, 0xb7, 0xaa, + 0x79, 0xba, 0xaa, 0x3a, 0x66, 0xbb, 0x69, 0xb8, 0xec, 0x38, 0x57, 0xb7, 0x9a, 0xb4, 0x44, 0x36, + 0x10, 0x34, 0x10, 0x25, 0xa6, 0xbb, 0xb0, 0x3a, 0x44, 0xab, 0xde, 0xb9, 0x6b, 0xb5, 0x7d, 0xb9, + 0x90, 0xb6, 0xf8, 0x38, 0x93, 0xb3, 0xf9, 0xb1, 0xe8, 0x38, 0x63, 0x38, 0xb2, 0xba, 0xb5, 0xb9, + 0x52, 0xb9, 0x27, 0xb9, 0xb1, 0xb8, 0x11, 0xb2, 0x32, 0x31, 0x9b, 0x33, 0x00, 0x39, 0xb4, 0x3a, + 0xe6, 0x29, 0x63, 0xb7, 0xae, 0xb9, 0xa3, 0x3a, 0x8f, 0xbb, 0x89, 0x3a, 0x1c, 0xba, 0xae, 0xb9, + 0x83, 0xb9, 0x65, 0xbb, 0xbe, 0x3b, 0xc2, 0x38, 0x8c, 0xb9, 0x47, 0x38, 0x1f, 0xb5, 0x5f, 0xb8, + 0xef, 0xbb, 0x47, 0xb4, 0x74, 0x39, 0xa6, 0x38, 0x6a, 0xb3, 0xc7, 0x38, 0x0e, 0x36, 0xa4, 0xb9, + 0x80, 0x3b, 0x88, 0x2f, 0x29, 0x3b, 0x9a, 0xb4, 0x55, 0x35, 0xcf, 0x36, 0x8c, 0x38, 0xc3, 0xb8, + 0x3d, 0xab, 0x50, 0x3a, 0x83, 0x35, 0x54, 0x3a, 0xc6, 0xb6, 0x06, 0xb5, 0x1a, 0x38, 0x36, 0x3a, + 0xba, 0xbb, 0x13, 0x3a, 0x5a, 0xb9, 0x1f, 0xb2, 0x03, 0x33, 0x4e, 0xb5, 0x33, 0x32, 0xf5, 0x32, + 0xa5, 0x3b, 0x18, 0xbb, 0x9d, 0x27, 0xeb, 0xbb, 0x06, 0x9f, 0x84, 0xb4, 0xb2, 0x3a, 0x6f, 0x35, + 0x3b, 0xb0, 0x63, 0xac, 0x86, 0x33, 0xd1, 0x35, 0x9b, 0xba, 0x2d, 0xb1, 0x7f, 0xb4, 0x3d, 0xb3, + 0x34, 0x3b, 0xa6, 0x37, 0xaf, 0x3a, 0x7a, 0x38, 0x57, 0x36, 0x2d, 0x3b, 0x91, 0xb9, 0xbd, 0x35, + 0x59, 0xaa, 0x64, 0x3a, 0x96, 0xae, 0xb6, 0xb1, 0x99, 0x31, 0x94, 0xb8, 0x87, 0xb9, 0x01, 0xba, + 0xb3, 0x31, 0xb5, 0x38, 0x5d, 0xb5, 0x35, 0x2d, 0x17, 0xbb, 0x49, 0x39, 0x29, 0xbb, 0x25, 0xba, + 0x96, 0x33, 0x80, 0x2a, 0xa6, 0xba, 0xed, 0xb0, 0x5c, 0xba, 0xda, 0xb9, 0x21, 0xbb, 0x79, 0x38, + 0xb7, 0x3a, 0xd8, 0xaa, 0xe7, 0x36, 0x4f, 0x36, 0xad, 0xab, 0x6c, 0xb8, 0xed, 0xb7, 0x69, 0x3a, + 0x48, 0xb8, 0xdd, 0x36, 0x09, 0xba, 0x87, 0xa5, 0x9c, 0x34, 0x9e, 0x38, 0xc4, 0xaa, 0x8c, 0x38, + 0x64, 0x33, 0xea, 0x27, 0x59, 0xb9, 0x6c, 0x3b, 0xe7, 0x35, 0xa8, 0xb6, 0x0a, 0x21, 0xd7, 0xb7, + 0xcb, 0xae, 0x65, 0xbb, 0xb6, 0x3b, 0xbc, 0xab, 0x6c, 0x38, 0xb7, 0x32, 0x92, 0xb9, 0xf5, 0xbb, + 0xbc, 0xba, 0x6f, 0xbb, 0xc2, 0x33, 0x87, 0x33, 0x6a, 0xb4, 0x9d, 0xb3, 0x2c, 0xb3, 0xe0, 0xb7, + 0xb7, 0xb8, 0x9b, 0x36, 0x5e, 0xb8, 0xe8, 0x36, 0x72, 0x36, 0x6b, 0xb5, 0x32, 0x30, 0xd2, 0x31, + 0xbc, 0x33, 0x35, 0x2c, 0x32, 0x3b, 0x90, 0x33, 0x3e, 0xb6, 0xf1, 0xa9, 0xf6, 0xb6, 0x17, 0xb4, + 0x93, 0x3b, 0xfd, 0xa7, 0x63, 0x38, 0xde, 0x33, 0x2b, 0xb4, 0x1d, 0xb5, 0x49, 0xb0, 0x8c, 0xaf, + 0x25, 0x3b, 0xa2, 0xba, 0x0e, 0x38, 0xae, 0xbb, 0x56, 0xbb, 0xee, 0x31, 0x95, 0x3b, 0x2c, 0x37, + 0x8d, 0xac, 0x9e, 0xbb, 0xed, 0x3b, 0x3c, 0xb5, 0x58, 0x3a, 0xf9, 0x3b, 0x61, 0xb0, 0x41, 0x36, + 0xda, 0x3b, 0x89, 0xb7, 0x29, 0x2e, 0xef, 0x39, 0x8d, 0xb8, 0x12, 0xbb, 0x2a, 0xb5, 0x06, 0x34, + 0xfa, 0x34, 0xb1, 0xbb, 0x09, 0x39, 0x21, 0xb9, 0x67, 0xbb, 0x98, 0x3b, 0xd8, 0x35, 0x39, 0xba, + 0xef, 0xa9, 0x62, 0x3b, 0x48, 0x35, 0xbb, 0xba, 0x5b, 0xad, 0xc6, 0xa7, 0x9d, 0x37, 0x9a, 0x37, + 0x8c, 0xb3, 0x74, 0xb8, 0xf8, 0xbb, 0x4c, 0xb4, 0xab, 0xad, 0x06, 0x39, 0xbc, 0xb9, 0x76, 0xb7, + 0x6b, 0x1e, 0x23, 0xba, 0x9c, 0x39, 0xcd, 0xb9, 0x1a, 0x38, 0x12, 0xa3, 0x00, 0x29, 0xc3, 0xb9, + 0x44, 0xb1, 0x8c, 0xb7, 0x65, 0xb6, 0x2e, 0x37, 0x4c, 0xbb, 0xa7, 0x36, 0xa3, 0xb1, 0x84, 0x28, + 0x5d, 0xb9, 0x00, 0x34, 0x7e, 0x37, 0x5b, 0xb9, 0xdf, 0xb8, 0x6b, 0x39, 0xb2, 0xbb, 0xb7, 0xba, + 0xa2, 0x3b, 0xe4, 0xb8, 0xb7, 0x32, 0xbe, 0x38, 0xe7, 0x35, 0x32, 0x38, 0x78, 0xb4, 0xa3, 0xb9, + 0xa8, 0xba, 0x55, 0xad, 0x9e, 0xb6, 0x80, 0x39, 0x4c, 0x38, 0x98, 0xbb, 0x1f, 0xb8, 0x9c, 0x30, + 0x8a, 0x36, 0xf8, 0xbb, 0x3d, 0x38, 0xb9, 0xb8, 0x8d, 0x3a, 0xca, 0x34, 0x26, 0xb4, 0xfb, 0xb5, + 0x4e, 0x39, 0xa9, 0x30, 0xa5, 0x27, 0x6a, 0xb7, 0xd6, 0xbb, 0xae, 0x38, 0xe7, 0x2a, 0xe8, 0xb9, + 0xf2, 0x38, 0x10, 0x3b, 0xe4, 0xba, 0x22, 0x37, 0x4d, 0x3b, 0xd5, 0xb8, 0xee, 0x31, 0xc3, 0x33, + 0x54, 0xb2, 0xa3, 0x32, 0x8a, 0xb5, 0xd1, 0x36, 0xfa, 0x3a, 0xe8, 0x38, 0x7a, 0xb8, 0x5a, 0x9e, + 0x23, 0xba, 0xab, 0x30, 0xe8, 0x2f, 0xd6, 0x39, 0xa1, 0x38, 0x2b, 0xb9, 0x65, 0x3b, 0xb6, 0x37, + 0x09, 0x39, 0x82, 0xa9, 0xea, 0xa9, 0xa5, 0x39, 0x47, 0x35, 0xc7, 0x34, 0xf8, 0xb1, 0xe4, 0x38, + 0xe7, 0x39, 0x7a, 0xb7, 0x39, 0xba, 0x2c, 0xbb, 0xfd, 0xb8, 0xc2, 0xb8, 0xa8, 0xba, 0xa4, 0x3a, + 0x62, 0xb8, 0x66, 0x38, 0xaf, 0x30, 0x79, 0xb3, 0xea, 0xba, 0xed, 0xb3, 0xd8, 0xba, 0xa6, 0x2b, + 0x6a, 0xba, 0x16, 0x2e, 0xb4, 0x3a, 0x31, 0x3a, 0x0a, 0xb6, 0x45, 0xb4, 0x32, 0xb2, 0x13, 0xb9, + 0x03, 0x2c, 0x2d, 0x2c, 0x9c, 0x39, 0x43, 0x38, 0xef, 0x2c, 0x52, 0x2e, 0x32, 0xbb, 0x97, 0xba, + 0x96, 0xb8, 0xd9, 0xb5, 0xab, 0x3b, 0x77, 0xb5, 0x5f, 0xb2, 0x1a, 0x3a, 0xd0, 0xb6}; +unsigned char fc_fp16_weight_ref[] = { + 0x99, 0x38, 0x65, 0x37, 0x53, 0xad, 0x81, 0xb8, 0xf7, 0x2d, 0x20, 0x38, 0x28, 0xb8, 0xe5, 0x3a, + 0x1d, 0xba, 0x05, 0xb9, 0xc2, 0xba, 0x0a, 0x35, 0x05, 0xb9, 0x0b, 0xb5, 0x73, 0x30, 0x84, 0xb9, + 0x74, 0xb0, 0x31, 0x38, 0x98, 0xae, 0xf7, 0x3b, 0x83, 0xb4, 0x01, 0xa9, 0x47, 0x3b, 0x36, 0x36, + 0x7f, 0x2d, 0xa3, 0x31, 0x9a, 0x3b, 0x23, 0xb8, 0xfc, 0x3a, 0xda, 0xa4, 0x19, 0xb4, 0xbb, 0x30, + 0xe0, 0xbb, 0xa3, 0x3b, 0xad, 0x32, 0x60, 0x3a, 0x94, 0x38, 0x01, 0xbb, 0x30, 0x33, 0x8d, 0xb6, + 0x35, 0x3a, 0xeb, 0xb4, 0xce, 0x33, 0xb4, 0x39, 0x32, 0xb6, 0x02, 0x38, 0xdd, 0x36, 0x7d, 0xba, + 0xde, 0xba, 0x34, 0xb8, 0x3f, 0x39, 0xb8, 0x39, 0xe7, 0xb4, 0x68, 0xba, 0x40, 0x32, 0x46, 0xb8, + 0x43, 0xb7, 0x60, 0x36, 0x24, 0x3a, 0xc0, 0xb4, 0x27, 0x39, 0x32, 0x36, 0xfe, 0xb9, 0x10, 0xbb, + 0x8b, 0x2f, 0xf8, 0xb0, 0x58, 0xb3, 0x24, 0xba, 0xf2, 0xb9, 0x76, 0xbb, 0x2a, 0x3b, 0xe1, 0xb9, + 0xcf, 0x34, 0x24, 0x39, 0x60, 0xb8, 0x0c, 0xa9, 0x4a, 0xb7, 0xca, 0xb8, 0x2d, 0x3a, 0xa2, 0xb4, + 0xe1, 0x39, 0xc1, 0xa3, 0xd2, 0xac, 0xd0, 0xba, 0x5f, 0xba, 0x3d, 0xb8, 0xd0, 0x34, 0x8d, 0xb6, + 0xc8, 0x31, 0xa0, 0x39, 0x7f, 0x1e, 0xa6, 0x38, 0xb7, 0xba, 0x30, 0x39, 0x4e, 0x39, 0x23, 0x30, + 0x08, 0x38, 0x2b, 0x34, 0xc0, 0x36, 0xf9, 0x38, 0xed, 0x35, 0x5c, 0x37, 0x94, 0xba, 0xc7, 0x39, + 0x01, 0xb1, 0xf7, 0x35, 0xf2, 0xa7, 0xc8, 0x33, 0x39, 0xb7, 0x43, 0xb0, 0x42, 0x38, 0xca, 0x30, + 0xc2, 0xab, 0xaf, 0xb4, 0x73, 0xbb, 0xb9, 0x3b, 0x7d, 0x35, 0x36, 0xb4, 0x2d, 0xb4, 0x16, 0xba, + 0x1c, 0x35, 0x29, 0x35, 0x1c, 0xb4, 0x0c, 0x32, 0xc1, 0xbb, 0x20, 0x3b, 0xd3, 0xba, 0xd7, 0xab, + 0x79, 0x37, 0x6c, 0xb7, 0x40, 0x25, 0x09, 0x2e, 0x95, 0xbb, 0x58, 0x3b, 0x15, 0xb0, 0x67, 0x37, + 0x0e, 0xba, 0x8b, 0x34, 0xa6, 0xbb, 0x11, 0xb2, 0x8c, 0xb9, 0xcf, 0x36, 0x33, 0x32, 0x3d, 0xb3, + 0x84, 0xb8, 0xfd, 0x31, 0xb0, 0x3a, 0x32, 0x31, 0x47, 0x38, 0x8c, 0x38, 0xf5, 0x32, 0x34, 0x3b, + 0x5c, 0x32, 0xc4, 0x3b, 0x44, 0xab, 0x9b, 0x33, 0x1f, 0xb5, 0xc3, 0xb8, 0xa5, 0x3b, 0xa6, 0x37, + 0xf7, 0x35, 0x62, 0x2e, 0xde, 0xb9, 0x00, 0x39, 0x5f, 0xb8, 0x3d, 0xab, 0x18, 0xbb, 0xaf, 0x3a, + 0x2a, 0xb6, 0x41, 0x3a, 0x6b, 0xb5, 0xb4, 0x3a, 0xef, 0xbb, 0x50, 0x3a, 0x9d, 0x27, 0x7a, 0x38, + 0xf9, 0xb6, 0xb1, 0xab, 0x7d, 0xb9, 0xe6, 0x29, 0x47, 0xb4, 0x83, 0x35, 0xeb, 0xbb, 0x57, 0x36, + 0xb5, 0x35, 0xb7, 0xaa, 0x90, 0xb6, 0x63, 0xb7, 0x74, 0x39, 0x54, 0x3a, 0x06, 0x9f, 0x2d, 0x3b, + 0x0f, 0x31, 0x79, 0xba, 0xf8, 0x38, 0xae, 0xb9, 0xa6, 0x38, 0xc6, 0xb6, 0x84, 0xb4, 0x91, 0xb9, + 0x80, 0xba, 0xaa, 0x3a, 0x93, 0xb3, 0xa3, 0x3a, 0x6a, 0xb3, 0x06, 0xb5, 0xb2, 0x3a, 0xbd, 0x35, + 0xfb, 0xbb, 0x66, 0xbb, 0xf9, 0xb1, 0x8f, 0xbb, 0xc7, 0x38, 0x1a, 0x38, 0x6f, 0x35, 0x59, 0xaa, + 0x0c, 0xba, 0x69, 0xb8, 0xe8, 0x38, 0x89, 0x3a, 0x0e, 0x36, 0x36, 0x3a, 0x3b, 0xb0, 0x64, 0x3a, + 0xee, 0x38, 0xec, 0x38, 0x63, 0x38, 0x1c, 0xba, 0xa4, 0xb9, 0xba, 0xbb, 0x63, 0xac, 0x96, 0xae, + 0xb4, 0xbb, 0x57, 0xb7, 0xb2, 0xba, 0xae, 0xb9, 0x80, 0x3b, 0x13, 0x3a, 0x86, 0x33, 0xb6, 0xb1, + 0x24, 0xab, 0x9a, 0xb4, 0xb5, 0xb9, 0x83, 0xb9, 0x88, 0x2f, 0x5a, 0xb9, 0xd1, 0x35, 0x99, 0x31, + 0xaf, 0x24, 0x44, 0x36, 0x52, 0xb9, 0x65, 0xbb, 0x29, 0x3b, 0x1f, 0xb2, 0x9b, 0xba, 0x94, 0xb8, + 0xa2, 0xba, 0x10, 0x34, 0x27, 0xb9, 0xbe, 0x3b, 0x9a, 0xb4, 0x03, 0x33, 0x2d, 0xb1, 0x87, 0xb9, + 0xcf, 0xbb, 0x10, 0x25, 0xb1, 0xb8, 0xc2, 0x38, 0x55, 0x35, 0x4e, 0xb5, 0x7f, 0xb4, 0x01, 0xba, + 0xb3, 0x31, 0xd8, 0xaa, 0x59, 0xb9, 0x87, 0x33, 0x3e, 0xb6, 0xee, 0x31, 0x2a, 0xb5, 0x9a, 0x37, + 0xb5, 0x38, 0xe7, 0x36, 0x6c, 0x3b, 0x6a, 0xb4, 0xf1, 0xa9, 0x95, 0x3b, 0x06, 0x34, 0x8c, 0xb3, + 0x5d, 0xb5, 0x4f, 0x36, 0xe7, 0x35, 0x9d, 0xb3, 0xf6, 0xb6, 0x2c, 0x37, 0xfa, 0x34, 0x74, 0xb8, + 0x35, 0x2d, 0xad, 0xab, 0xa8, 0xb6, 0x2c, 0xb3, 0x17, 0xb4, 0x8d, 0xac, 0xb1, 0xbb, 0xf8, 0xbb, + 0x17, 0xbb, 0x6c, 0xb8, 0x0a, 0x21, 0xe0, 0xb7, 0x93, 0x3b, 0x9e, 0xbb, 0x09, 0x39, 0x4c, 0xb4, + 0x49, 0x39, 0xed, 0xb7, 0xd7, 0xb7, 0xb7, 0xb8, 0xfd, 0xa7, 0xed, 0x3b, 0x21, 0xb9, 0xab, 0xad, + 0x29, 0xbb, 0x69, 0x3a, 0xcb, 0xae, 0x9b, 0x36, 0x63, 0x38, 0x3c, 0xb5, 0x67, 0xbb, 0x06, 0x39, + 0x25, 0xba, 0x48, 0xb8, 0x65, 0xbb, 0x5e, 0xb8, 0xde, 0x33, 0x58, 0x3a, 0x98, 0x3b, 0xbc, 0xb9, + 0x96, 0x33, 0xdd, 0x36, 0xb6, 0x3b, 0xe8, 0x36, 0x2b, 0xb4, 0xf9, 0x3b, 0xd8, 0x35, 0x76, 0xb7, + 0x80, 0x2a, 0x09, 0xba, 0xbc, 0xab, 0x72, 0x36, 0x1d, 0xb5, 0x61, 0xb0, 0x39, 0xba, 0x6b, 0x1e, + 0xa6, 0xba, 0x87, 0xa5, 0x6c, 0x38, 0x6b, 0xb5, 0x49, 0xb0, 0x41, 0x36, 0xef, 0xa9, 0x23, 0xba, + 0xed, 0xb0, 0x9c, 0x34, 0xb7, 0x32, 0x32, 0x30, 0x8c, 0xaf, 0xda, 0x3b, 0x62, 0x3b, 0x9c, 0x39, + 0x5c, 0xba, 0x9e, 0x38, 0x92, 0xb9, 0xd2, 0x31, 0x25, 0x3b, 0x89, 0xb7, 0x48, 0x35, 0xcd, 0xb9, + 0xda, 0xb9, 0xc4, 0xaa, 0xf5, 0xbb, 0xbc, 0x33, 0xa2, 0xba, 0x29, 0x2e, 0xbb, 0xba, 0x1a, 0x38, + 0x21, 0xbb, 0x8c, 0x38, 0xbc, 0xba, 0x35, 0x2c, 0x0e, 0x38, 0xef, 0x39, 0x5b, 0xad, 0x12, 0xa3, + 0x79, 0x38, 0x64, 0x33, 0x6f, 0xbb, 0x32, 0x3b, 0xae, 0xbb, 0x8d, 0xb8, 0xc6, 0xa7, 0x00, 0x29, + 0xb7, 0x3a, 0xea, 0x27, 0xc2, 0x33, 0x90, 0x33, 0x56, 0xbb, 0x12, 0xbb, 0x9d, 0x37, 0xc3, 0xb9, + 0x44, 0xb1, 0x8c, 0xb7, 0x65, 0xb6, 0x2e, 0x37, 0x4c, 0xbb, 0xa7, 0x36, 0xa3, 0xb1, 0x84, 0x28, + 0x5d, 0xb9, 0x00, 0x34, 0x7e, 0x37, 0x5b, 0xb9, 0xdf, 0xb8, 0x6b, 0x39, 0xb2, 0xbb, 0xb7, 0xba, + 0xa2, 0x3b, 0xe4, 0xb8, 0xb7, 0x32, 0xbe, 0x38, 0xe7, 0x35, 0x32, 0x38, 0x78, 0xb4, 0xa3, 0xb9, + 0xa8, 0xba, 0x55, 0xad, 0x9e, 0xb6, 0x80, 0x39, 0x4c, 0x38, 0x98, 0xbb, 0x1f, 0xb8, 0x9c, 0x30, + 0x8a, 0x36, 0xf8, 0xbb, 0x3d, 0x38, 0xb9, 0xb8, 0x8d, 0x3a, 0xca, 0x34, 0x26, 0xb4, 0xfb, 0xb5, + 0x4e, 0x39, 0xa9, 0x30, 0xa5, 0x27, 0x6a, 0xb7, 0xd6, 0xbb, 0xae, 0x38, 0xe7, 0x2a, 0xe8, 0xb9, + 0xf2, 0x38, 0x10, 0x3b, 0xe4, 0xba, 0x22, 0x37, 0x4d, 0x3b, 0xd5, 0xb8, 0xee, 0x31, 0xc3, 0x33, + 0x54, 0xb2, 0xa3, 0x32, 0x8a, 0xb5, 0xd1, 0x36, 0xfa, 0x3a, 0xe8, 0x38, 0x7a, 0xb8, 0x5a, 0x9e, + 0x23, 0xba, 0xab, 0x30, 0xe8, 0x2f, 0xd6, 0x39, 0xa1, 0x38, 0x2b, 0xb9, 0x65, 0x3b, 0xb6, 0x37, + 0x09, 0x39, 0x82, 0xa9, 0xea, 0xa9, 0xa5, 0x39, 0x47, 0x35, 0xc7, 0x34, 0xf8, 0xb1, 0xe4, 0x38, + 0xe7, 0x39, 0x7a, 0xb7, 0x39, 0xba, 0x2c, 0xbb, 0xfd, 0xb8, 0xc2, 0xb8, 0xa8, 0xba, 0xa4, 0x3a, + 0x62, 0xb8, 0x66, 0x38, 0xaf, 0x30, 0x79, 0xb3, 0xea, 0xba, 0xed, 0xb3, 0xd8, 0xba, 0xa6, 0x2b, + 0x6a, 0xba, 0x16, 0x2e, 0xb4, 0x3a, 0x31, 0x3a, 0x0a, 0xb6, 0x45, 0xb4, 0x32, 0xb2, 0x13, 0xb9, + 0x03, 0x2c, 0x2d, 0x2c, 0x9c, 0x39, 0x43, 0x38, 0xef, 0x2c, 0x52, 0x2e, 0x32, 0xbb, 0x97, 0xba, + 0x96, 0xb8, 0xd9, 0xb5, 0xab, 0x3b, 0x77, 0xb5, 0x5f, 0xb2, 0x1a, 0x3a, 0xd0, 0xb6}; +unsigned char fc_fp16_bias[] = { + 0xdb, 0x3d, 0x53, 0x45, 0x4c, 0x37, 0x68, 0x2f, 0xea, 0x43, 0x00, 0x42, 0x39, 0x45, 0x22, 0xc1, + 0xd1, 0x2a, 0x1f, 0x42, 0xdd, 0xba, 0xd6, 0x42, 0x1b, 0x3d, 0xeb, 0x44, 0x13, 0x3a, 0xa3, 0x43, + 0xc6, 0xc3, 0xf4, 0x3d, 0x18, 0x3c, 0xd0, 0x3c, 0x78, 0x45, 0x90, 0xc3, 0xe2, 0x39, 0xc4, 0xbe, + 0x87, 0xba, 0x1e, 0xc1, 0xf0, 0x45, 0x75, 0x44, 0x91, 0x43, 0x5e, 0xbb, 0x71, 0x45}; +unsigned char fc_fp16_out[] = { + 0x6b, 0x45, 0xa9, 0x42, 0x5a, 0x3d, 0xbf, 0xc7, 0xd4, 0x34, 0xe6, 0x45, 0xec, 0x39, 0xeb, 0xb2, + 0x5c, 0xc3, 0x53, 0xc1, 0x81, 0xc0, 0x7b, 0x48, 0x5a, 0x40, 0x92, 0x48, 0xc7, 0xc2, 0xf3, 0x3f, + 0x49, 0x42, 0x0c, 0xba, 0x86, 0x38, 0xd8, 0x45, 0x69, 0xbf, 0x24, 0xca, 0x40, 0xc6, 0x63, 0xc1, + 0x26, 0x46, 0x32, 0xc2, 0xe5, 0x44, 0x32, 0x4a, 0x56, 0xc0, 0xb9, 0xc8, 0xde, 0x41}; + +unsigned char fc_int8_in[] = {}; +unsigned char fc_int8_weight[] = {}; +unsigned char fc_int8_weight_ref[] = {}; +unsigned char fc_int8_bias[] = {}; +unsigned char fc_int8_out[] = {}; diff --git a/tests/unit_test/valid_data/gemm.dat b/tests/unit_test/valid_data/gemm.dat new file mode 100644 index 00000000..21a0c298 --- /dev/null +++ b/tests/unit_test/valid_data/gemm.dat @@ -0,0 +1,881 @@ +/************************************************************************** + * NCHW layout + **************************************************************************/ +// A:[31, 16] B:[16, 20] C:[31, 20] +unsigned char gemm_fp32_a[] = { + 0x95, 0x0b, 0x32, 0xc0, 0x63, 0x48, 0x80, 0xc0, 0x60, 0xbf, 0x2b, 0xc0, 0x71, 0xdb, 0x44, 0xc0, + 0x7a, 0xd4, 0x97, 0xc0, 0xe9, 0x05, 0x83, 0xc0, 0xd7, 0x01, 0x66, 0xc0, 0xc2, 0x0e, 0x83, 0xc0, + 0xdd, 0x34, 0x4d, 0xc0, 0x60, 0xe0, 0x57, 0xc0, 0xc7, 0xdf, 0x45, 0xc0, 0xaa, 0xd1, 0x5e, 0xc0, + 0xe2, 0xee, 0x4b, 0xc0, 0x41, 0x00, 0xcd, 0xbf, 0xa3, 0xa2, 0x2e, 0xc0, 0x57, 0x8f, 0x53, 0xc0, + 0xf0, 0x4d, 0x43, 0xc0, 0x48, 0x36, 0x75, 0xc0, 0xd4, 0xfe, 0x0b, 0xc0, 0x5b, 0x65, 0xa0, 0xc0, + 0x3d, 0xec, 0x9c, 0xc0, 0x43, 0xcd, 0x37, 0xc0, 0x8e, 0xe8, 0x4b, 0xc0, 0x08, 0xbc, 0xff, 0xbf, + 0x66, 0x74, 0xa8, 0xbf, 0x72, 0x97, 0x31, 0xc0, 0x90, 0xe2, 0xa0, 0xc0, 0x72, 0x2f, 0xce, 0xbf, + 0x58, 0x60, 0x22, 0xc0, 0x00, 0x18, 0x5c, 0xc0, 0x01, 0x79, 0x6e, 0xc0, 0xad, 0x2f, 0xa7, 0xbf, + 0xa6, 0xa5, 0x86, 0xc0, 0xb6, 0xaf, 0x65, 0xc0, 0x5e, 0xdc, 0x90, 0xc0, 0x94, 0x37, 0x74, 0xc0, + 0x1f, 0xc4, 0x37, 0xc0, 0xf4, 0xf4, 0xd8, 0xbf, 0x00, 0xcc, 0x6e, 0xc0, 0x6d, 0x8a, 0x23, 0xc0, + 0x38, 0xf7, 0x0d, 0xc0, 0xe7, 0xfb, 0x5b, 0xc0, 0x5a, 0x73, 0x77, 0xc0, 0x75, 0x3e, 0x9b, 0xc0, + 0x79, 0x5c, 0x56, 0xc0, 0x58, 0x87, 0x57, 0xc0, 0x7f, 0xf4, 0x2d, 0xc0, 0xe7, 0xd0, 0xcd, 0xbe, + 0x87, 0xd8, 0x57, 0xc0, 0x02, 0x35, 0x46, 0xc0, 0x97, 0x28, 0x2a, 0xc0, 0x94, 0x0e, 0xeb, 0xbf, + 0x0c, 0xdd, 0x6a, 0xc0, 0x1d, 0x67, 0x63, 0xc0, 0x7a, 0x58, 0x2e, 0xc0, 0x2e, 0x81, 0x88, 0xc0, + 0xff, 0x8e, 0x9e, 0xbf, 0x10, 0xdb, 0x96, 0xc0, 0xad, 0xad, 0x2c, 0xc0, 0xb5, 0x07, 0x5d, 0xc0, + 0x5a, 0x14, 0x2b, 0xc0, 0xd3, 0xc8, 0x50, 0xc0, 0x63, 0xb1, 0x80, 0xc0, 0x54, 0x94, 0x5d, 0xc0, + 0x92, 0x6e, 0x94, 0xc0, 0xef, 0x1c, 0xd3, 0xbf, 0xa3, 0x31, 0x5a, 0xc0, 0x8f, 0xb7, 0x48, 0xc0, + 0xd0, 0x43, 0x88, 0xc0, 0x7c, 0x2a, 0x0f, 0xc0, 0x32, 0x3e, 0x42, 0xc0, 0x63, 0x5f, 0x07, 0xc0, + 0xf2, 0xdc, 0x23, 0xc0, 0xdd, 0xb9, 0x74, 0xc0, 0xf8, 0xe5, 0x61, 0xc0, 0xad, 0x7e, 0x84, 0xc0, + 0x9e, 0x5b, 0x45, 0xc0, 0x07, 0x9c, 0x7f, 0xc0, 0xce, 0x22, 0x4d, 0xc0, 0x6b, 0x64, 0x08, 0xc0, + 0x60, 0x1d, 0x47, 0xc0, 0xd9, 0xaa, 0x51, 0xc0, 0xf7, 0x5b, 0x21, 0xc0, 0xe1, 0x9c, 0x46, 0xc0, + 0x86, 0xc6, 0x15, 0xc0, 0x8e, 0x15, 0x48, 0xc0, 0x75, 0xe1, 0x14, 0xc0, 0x99, 0x5a, 0x81, 0xc0, + 0xf8, 0x95, 0xb9, 0xbf, 0xa7, 0xe4, 0x75, 0xbf, 0xc2, 0x70, 0x1a, 0xc0, 0x6f, 0xf5, 0x56, 0xc0, + 0x2b, 0x76, 0x34, 0xc0, 0x0a, 0x11, 0x43, 0xc0, 0x04, 0xac, 0xd7, 0xbf, 0x4f, 0xd4, 0x52, 0xc0, + 0xff, 0x0e, 0xbd, 0xbf, 0x29, 0x73, 0x58, 0xc0, 0xfe, 0xf8, 0x82, 0xc0, 0xfd, 0x8e, 0x29, 0xc0, + 0x0b, 0x0c, 0x48, 0xc0, 0xd6, 0x01, 0x84, 0xc0, 0xb2, 0x85, 0x47, 0xc0, 0x74, 0x93, 0xae, 0xbf, + 0x18, 0xdc, 0x36, 0xc0, 0x4e, 0x37, 0x84, 0xc0, 0xd5, 0xf9, 0x74, 0xc0, 0x8f, 0x49, 0x03, 0xc0, + 0x78, 0x10, 0x0f, 0xc0, 0xac, 0x86, 0x22, 0xc0, 0x5e, 0x42, 0x4c, 0xc0, 0x53, 0x7c, 0x38, 0xc0, + 0x4f, 0x37, 0x7f, 0xc0, 0x42, 0xed, 0x67, 0xc0, 0xc3, 0x08, 0x5d, 0xc0, 0xcb, 0x54, 0xc9, 0xbf, + 0x34, 0x2b, 0x35, 0xc0, 0xcf, 0xe9, 0xfa, 0xbf, 0x06, 0x39, 0x2e, 0xc0, 0x02, 0x7a, 0x0d, 0xc0, + 0x87, 0x28, 0x0a, 0xc0, 0x25, 0x6f, 0x85, 0xc0, 0xb1, 0x34, 0x27, 0xbf, 0xf2, 0x83, 0x60, 0xc0, + 0xf6, 0xc7, 0x56, 0xc0, 0xe1, 0xa7, 0x42, 0xc0, 0xec, 0x0f, 0x52, 0xc0, 0x36, 0x49, 0x7e, 0xbf, + 0x54, 0x70, 0x51, 0xc0, 0x2b, 0xba, 0x1d, 0xc0, 0x79, 0xa3, 0x8f, 0xc0, 0x59, 0x80, 0x45, 0xc0, + 0x3c, 0x35, 0x61, 0xc0, 0xf6, 0x16, 0xbe, 0xbf, 0x96, 0xdd, 0x27, 0xc0, 0x53, 0xfc, 0x33, 0xc0, + 0xd3, 0x0b, 0x86, 0xc0, 0x47, 0x2c, 0x3b, 0xc0, 0x43, 0x1a, 0x70, 0xc0, 0x59, 0xba, 0x37, 0xc0, + 0x4c, 0x3a, 0xfa, 0xbf, 0x9e, 0x13, 0x7b, 0xc0, 0x3d, 0xe9, 0x3f, 0xc0, 0xb0, 0xe7, 0x79, 0xc0, + 0x25, 0xcd, 0x8a, 0xc0, 0xfa, 0xe7, 0x48, 0xc0, 0x99, 0x37, 0x2a, 0xc0, 0xf2, 0xbb, 0x35, 0xc0, + 0x84, 0x2f, 0x77, 0xc0, 0x45, 0x83, 0xcb, 0xbf, 0x92, 0xb4, 0x61, 0xc0, 0xa1, 0xad, 0x3b, 0xc0, + 0x33, 0xec, 0xeb, 0xbf, 0x07, 0x3b, 0xb5, 0xbe, 0x4a, 0x0b, 0x61, 0xc0, 0xc4, 0xb6, 0x5a, 0xc0, + 0x41, 0x5b, 0x39, 0xc0, 0x0d, 0x4f, 0xe4, 0xbf, 0x78, 0xb1, 0x64, 0xc0, 0xc1, 0x55, 0x20, 0xc0, + 0x14, 0x0a, 0x43, 0xc0, 0xe9, 0xe3, 0x82, 0xc0, 0x86, 0xcc, 0x14, 0xc0, 0xe7, 0xa6, 0x0c, 0xc0, + 0x62, 0x11, 0x9d, 0xc0, 0xcd, 0xa6, 0x40, 0xc0, 0x17, 0x05, 0x4f, 0xc0, 0xe4, 0xa1, 0x02, 0xc0, + 0xfb, 0xa0, 0x4b, 0xc0, 0xed, 0x11, 0x08, 0xc0, 0x53, 0xfe, 0x5c, 0xc0, 0x64, 0xea, 0x03, 0xc0, + 0x47, 0xea, 0x8f, 0xc0, 0xea, 0x9e, 0x07, 0xc0, 0xd9, 0x10, 0xa9, 0xc0, 0x50, 0xd5, 0x0c, 0xc0, + 0x2b, 0x0c, 0x0f, 0xc0, 0xf3, 0x95, 0x11, 0xc0, 0xe9, 0xea, 0x38, 0xc0, 0x5b, 0x21, 0x10, 0xc0, + 0xc4, 0xa1, 0x70, 0xc0, 0xea, 0xa2, 0x04, 0xc0, 0x8a, 0xb5, 0x48, 0xc0, 0x6e, 0xc7, 0x6f, 0xc0, + 0x49, 0x10, 0x4b, 0xc0, 0xf6, 0xb2, 0x47, 0xc0, 0xf2, 0x26, 0x66, 0xc0, 0xf8, 0xdd, 0x00, 0xc0, + 0x4e, 0xae, 0x1d, 0xc0, 0x73, 0xeb, 0x1d, 0xc0, 0xf9, 0xbb, 0x27, 0xc0, 0x88, 0x9d, 0x24, 0xc0, + 0x4c, 0xc9, 0x9f, 0xbf, 0x8d, 0xab, 0x02, 0xc0, 0xdf, 0x6b, 0x12, 0xc0, 0x98, 0x80, 0x5e, 0xc0, + 0xe7, 0x03, 0x95, 0xbf, 0x87, 0x58, 0xc5, 0xbf, 0x0e, 0x2d, 0x29, 0xc0, 0xdc, 0x0a, 0xfe, 0xbf, + 0x4d, 0xe4, 0xdd, 0xbf, 0xd8, 0x63, 0x1c, 0xc0, 0x59, 0x24, 0x22, 0xc0, 0x90, 0x28, 0x45, 0xc0, + 0x00, 0xf5, 0x4c, 0xc0, 0xec, 0x10, 0x33, 0xc0, 0x18, 0x83, 0x1c, 0xc0, 0x58, 0x74, 0x64, 0xc0, + 0x52, 0x1c, 0x10, 0xc0, 0xa1, 0x61, 0x17, 0xc0, 0xdc, 0xc7, 0x76, 0xc0, 0x0c, 0x57, 0x36, 0xc0, + 0xdf, 0x47, 0xe4, 0xbf, 0x41, 0x1a, 0x06, 0xc0, 0x0b, 0xdf, 0x82, 0xc0, 0xd5, 0xa6, 0x3c, 0xc0, + 0x34, 0xcb, 0x67, 0xc0, 0x49, 0x12, 0x6e, 0xc0, 0xf3, 0x12, 0x9a, 0xc0, 0x21, 0xef, 0x39, 0xc0, + 0xa4, 0x00, 0xd9, 0xbf, 0x96, 0x49, 0x2c, 0xc0, 0xbe, 0xb0, 0x6e, 0xc0, 0x74, 0x06, 0x2b, 0xc0, + 0xdd, 0x37, 0x4a, 0xc0, 0xd8, 0x62, 0x80, 0xc0, 0x36, 0x63, 0x0b, 0xc0, 0x4d, 0xb1, 0xd7, 0xbf, + 0x8c, 0x06, 0x45, 0xc0, 0xdb, 0xbc, 0x62, 0xc0, 0x07, 0x9e, 0x2f, 0xc0, 0xb1, 0x4e, 0xf9, 0xbf, + 0xef, 0xe8, 0x10, 0xc0, 0x7b, 0x20, 0x6c, 0xc0, 0xc2, 0xce, 0x7c, 0xc0, 0x77, 0x03, 0x23, 0xc0, + 0x4f, 0xdb, 0x2b, 0xc0, 0xc6, 0x6f, 0x5e, 0xc0, 0x85, 0x0f, 0x8b, 0xbf, 0x1a, 0x3a, 0x8d, 0xc0, + 0x32, 0x29, 0x80, 0xc0, 0xfb, 0xba, 0x13, 0xc0, 0xbf, 0xfb, 0x0e, 0xc0, 0xdd, 0x46, 0x1b, 0xc0, + 0x9e, 0x57, 0x15, 0xc0, 0x0d, 0x7b, 0x28, 0xc0, 0x26, 0x91, 0x53, 0xc0, 0x90, 0x83, 0xc7, 0xbf, + 0xc1, 0x07, 0x2d, 0xc0, 0x72, 0x83, 0x61, 0xc0, 0xef, 0xdc, 0x19, 0xc0, 0xd0, 0x9a, 0x12, 0xc0, + 0xb2, 0xd6, 0x6c, 0xc0, 0xe7, 0x1d, 0x36, 0xc0, 0x86, 0x93, 0xcd, 0xbf, 0xdc, 0x23, 0x4e, 0xc0, + 0xb5, 0x2f, 0x88, 0xc0, 0xb5, 0x09, 0xcb, 0xbf, 0x08, 0x4e, 0x6d, 0xc0, 0x25, 0x9c, 0x38, 0xc0, + 0x55, 0x87, 0x30, 0xc0, 0xfe, 0x8c, 0x5a, 0xc0, 0x72, 0x30, 0xda, 0xbf, 0x14, 0x8e, 0x1e, 0xc0, + 0x7a, 0x12, 0x1e, 0xc0, 0x62, 0x4e, 0xa8, 0xbf, 0xbe, 0xc0, 0x8f, 0xbf, 0x78, 0xff, 0x16, 0xc0, + 0x6f, 0x5a, 0x7b, 0xc0, 0x53, 0x21, 0x77, 0xc0, 0xdb, 0x62, 0x39, 0xc0, 0xc0, 0x90, 0x0c, 0xc0, + 0xdb, 0x95, 0x4d, 0xc0, 0x04, 0x10, 0x3f, 0xc0, 0xfb, 0xcb, 0xae, 0xc0, 0xd7, 0x03, 0x3d, 0xc0, + 0xe0, 0xdf, 0x2d, 0xc0, 0xa3, 0xe5, 0x5b, 0xc0, 0x60, 0xad, 0x81, 0xc0, 0xb8, 0x05, 0xf2, 0xbf, + 0x3e, 0x11, 0xc1, 0xbf, 0xbf, 0x57, 0xb4, 0xc0, 0x56, 0x68, 0x6f, 0xc0, 0xff, 0xf6, 0x84, 0xc0, + 0xa0, 0xd9, 0x32, 0xc0, 0x72, 0x23, 0x74, 0xc0, 0xe5, 0xb3, 0x24, 0xc0, 0xec, 0x1f, 0x35, 0xc0, + 0x5a, 0xe8, 0x6f, 0xc0, 0x84, 0xd6, 0xc9, 0xbf, 0x6c, 0xd3, 0x3c, 0xc0, 0x0e, 0x8e, 0x7d, 0xc0, + 0xbd, 0x56, 0x99, 0xc0, 0x4d, 0x2d, 0x63, 0xc0, 0x2e, 0x09, 0x8a, 0xc0, 0x1e, 0x51, 0x1c, 0xc0, + 0x93, 0x21, 0x74, 0xbf, 0x43, 0x8e, 0x59, 0xc0, 0x61, 0xf4, 0x3b, 0xc0, 0x56, 0x63, 0xbc, 0xc0, + 0xe5, 0x07, 0x70, 0xc0, 0xc7, 0x35, 0x0b, 0xc0, 0xf7, 0x58, 0x46, 0xc0, 0x9b, 0xc7, 0x37, 0xc0, + 0xf0, 0xaf, 0xab, 0xbf, 0x02, 0x09, 0x43, 0xc0, 0x00, 0xf9, 0x42, 0xc0, 0x1c, 0xb7, 0x6f, 0xc0, + 0x12, 0xe1, 0x56, 0xc0, 0x1d, 0x88, 0x47, 0xc0, 0xef, 0xcd, 0x31, 0xc0, 0x61, 0x76, 0xe1, 0xbf, + 0x3b, 0xd0, 0x5b, 0xc0, 0x4b, 0x75, 0x0a, 0xc0, 0xdd, 0xfc, 0x13, 0xc0, 0x91, 0xd4, 0x17, 0xc0, + 0x6a, 0xf7, 0xf2, 0xbf, 0x6f, 0x9d, 0x43, 0xc0, 0x84, 0xee, 0x30, 0xc0, 0x0d, 0x1d, 0x58, 0xc0, + 0x22, 0xdd, 0xa8, 0xbf, 0xe8, 0x7c, 0x9b, 0xc0, 0x75, 0xf8, 0x8b, 0xc0, 0x68, 0xfb, 0x48, 0xc0, + 0xad, 0x86, 0x54, 0xc0, 0xfc, 0xa0, 0xdb, 0xbf, 0x33, 0x61, 0x06, 0xc0, 0x7e, 0xfb, 0x29, 0xc0, + 0x1f, 0x4f, 0x2f, 0xc0, 0x95, 0x22, 0x83, 0xc0, 0x15, 0x8f, 0x98, 0xc0, 0xbf, 0xcf, 0x72, 0xc0, + 0xb6, 0x69, 0x40, 0xc0, 0xda, 0xb3, 0x85, 0xc0, 0xb4, 0xb2, 0x72, 0xc0, 0x24, 0xb7, 0x77, 0xc0, + 0x8b, 0xf3, 0x2f, 0xc0, 0xe7, 0xae, 0x87, 0xc0, 0x6c, 0x00, 0x29, 0xc0, 0x45, 0xf0, 0x7f, 0xc0, + 0x5b, 0x6f, 0x44, 0xc0, 0x4a, 0xf8, 0x49, 0xc0, 0x2e, 0x07, 0x8d, 0xc0, 0xc2, 0xcd, 0x25, 0xc0, + 0x7f, 0x0c, 0x36, 0xc0, 0x07, 0xcd, 0x5f, 0xc0, 0xc9, 0x33, 0x10, 0xc0, 0xf6, 0x86, 0x69, 0xc0, + 0x28, 0xed, 0x14, 0xc0, 0x93, 0xf6, 0x2d, 0xc0, 0xd4, 0x7b, 0x22, 0xc0, 0x9e, 0x53, 0x63, 0xc0, + 0x79, 0xaf, 0x1c, 0xc0, 0xc2, 0x68, 0x7d, 0xc0, 0x35, 0x39, 0x89, 0xbf, 0x68, 0xfc, 0x1d, 0xc0, + 0xf6, 0x1e, 0x3c, 0xc0, 0xe5, 0x0b, 0x46, 0xc0, 0x62, 0xbc, 0x52, 0xc0, 0xa9, 0xf1, 0x9c, 0xc0, + 0x8c, 0xd3, 0x5f, 0xc0, 0x67, 0xdf, 0x09, 0xc0, 0xe2, 0xcd, 0x1b, 0xc0, 0x70, 0xd0, 0x99, 0xbf, + 0x1b, 0xa9, 0x33, 0xc0, 0x9b, 0xf0, 0x6c, 0xc0, 0x31, 0x6f, 0x98, 0xc0, 0xfb, 0x53, 0x90, 0xc0, + 0x00, 0x12, 0x85, 0xc0, 0xf8, 0x8b, 0x43, 0xc0, 0x41, 0x26, 0xe4, 0xbf, 0xdf, 0x00, 0x82, 0xc0, + 0x70, 0x76, 0x38, 0xc0, 0x92, 0xd8, 0x44, 0xc0, 0x55, 0x97, 0x45, 0xc0, 0xdf, 0xd1, 0x35, 0xc0, + 0xc4, 0x52, 0xd6, 0xbf, 0xc8, 0x79, 0x99, 0xc0, 0x16, 0xac, 0x1e, 0xc0, 0x8d, 0xdb, 0xbc, 0xbf, + 0xfa, 0xd2, 0x47, 0xc0, 0x14, 0x11, 0xe0, 0xbf, 0x35, 0x2a, 0x8b, 0xc0, 0xa9, 0x51, 0x6a, 0xc0, + 0x23, 0x1c, 0x03, 0xc0, 0x83, 0xc7, 0x8d, 0xc0, 0x96, 0x86, 0x09, 0xc0, 0x1e, 0xf1, 0x55, 0xc0, + 0x99, 0x45, 0x88, 0xc0, 0xff, 0x94, 0x18, 0xc0, 0x29, 0x4f, 0x49, 0xc0, 0xc3, 0xdb, 0x80, 0xc0, + 0x43, 0x3f, 0x3e, 0xc0, 0xc7, 0x2b, 0x65, 0xc0, 0xd6, 0xb0, 0x48, 0xc0, 0xb5, 0xbc, 0xa8, 0xc0, + 0x0f, 0x5c, 0x44, 0xc0, 0x68, 0x11, 0x21, 0xc0, 0x5d, 0x95, 0x8b, 0xc0, 0xce, 0x0f, 0x39, 0xc0, + 0xa2, 0xc7, 0x36, 0xc0, 0x75, 0x81, 0x7b, 0xc0, 0x8e, 0x97, 0x5d, 0xc0, 0x6a, 0x87, 0x4f, 0xc0, + 0x96, 0xde, 0x7c, 0xc0, 0xb0, 0xb6, 0x4f, 0xc0, 0xd0, 0x57, 0x10, 0xc0, 0xfa, 0x48, 0x94, 0xc0, + 0xdd, 0xc8, 0x61, 0xc0, 0x28, 0x44, 0x35, 0xc0, 0x77, 0xc9, 0x17, 0xc0, 0xc7, 0x30, 0x3f, 0xbf, + 0x8d, 0xa5, 0x4f, 0xc0, 0x3b, 0xf5, 0xcf, 0xbf, 0xbe, 0xdb, 0xba, 0xbf, 0x50, 0x6d, 0xea, 0xbf, + 0x12, 0xbd, 0x6b, 0xc0, 0x9a, 0x2d, 0x48, 0xc0, 0xbd, 0xa0, 0x91, 0xc0, 0x12, 0xad, 0x41, 0xc0, + 0x28, 0x75, 0xa2, 0xc0, 0xca, 0xd3, 0x8f, 0xbf, 0xe8, 0xb8, 0xde, 0xbf, 0xb7, 0xdd, 0x09, 0xc0, + 0x56, 0x7a, 0x48, 0xc0, 0x9c, 0xa4, 0x5f, 0xc0, 0x6f, 0x1e, 0x6e, 0xc0, 0x75, 0xb8, 0x7e, 0xc0, + 0x8f, 0xb5, 0x2d, 0xc0, 0x5f, 0x4a, 0x77, 0xc0, 0x65, 0xcb, 0x8b, 0xc0, 0x41, 0xd4, 0x03, 0xc0, + 0x3a, 0xbe, 0x97, 0xc0, 0xda, 0x9a, 0x2e, 0xc0, 0x49, 0x67, 0x13, 0xc0, 0x28, 0x6b, 0xe3, 0xbf, + 0x20, 0x1a, 0x44, 0xc0, 0xd0, 0x4e, 0x04, 0xc0, 0x81, 0x36, 0x2b, 0xc0, 0x7f, 0xc2, 0x7e, 0xc0, + 0xbe, 0x66, 0x6f, 0xc0, 0xe6, 0xe8, 0x14, 0xc0, 0x53, 0x43, 0x59, 0xc0, 0x28, 0x2b, 0x98, 0xc0, + 0x9d, 0xfb, 0x4a, 0xc0, 0x91, 0xb5, 0x5c, 0xc0, 0xf7, 0x3d, 0x71, 0xc0, 0xf5, 0x6f, 0x26, 0xc0, + 0xe6, 0xfb, 0x2f, 0xc0, 0x8c, 0x84, 0x30, 0xc0, 0xa5, 0x9c, 0x12, 0xc0, 0x0e, 0x32, 0x0f, 0xc0, + 0xbb, 0xb0, 0x06, 0xc0, 0x5a, 0x92, 0x67, 0xc0, 0xfa, 0x3b, 0x48, 0xc0, 0xb1, 0x1e, 0x5a, 0xc0, + 0x3b, 0xde, 0x14, 0xc0, 0xb6, 0xe5, 0x8a, 0xc0, 0x9d, 0x0e, 0xc7, 0xbf, 0xd2, 0x3d, 0x42, 0xc0, + 0x78, 0xce, 0xe0, 0xbf, 0xcf, 0x03, 0x8c, 0xc0, 0x37, 0x38, 0x39, 0xc0, 0x5b, 0x96, 0x6a, 0xc0, + 0x10, 0x9e, 0xa1, 0xc0, 0x06, 0x58, 0x63, 0xc0, 0x81, 0x5d, 0x60, 0xc0, 0xc8, 0x39, 0x4c, 0xc0, + 0x79, 0x60, 0x11, 0xc0, 0xa9, 0xd4, 0x2e, 0xc0, 0x3a, 0x51, 0x2b, 0xc0, 0xfd, 0x7e, 0x2f, 0xc0, + 0xe6, 0xc0, 0x1f, 0xc0, 0xfa, 0x48, 0x30, 0xc0, 0xbd, 0x36, 0x13, 0xc0, 0x50, 0x50, 0x20, 0xc0, + 0x1a, 0x19, 0x08, 0xc0, 0xbd, 0xfb, 0x32, 0xc0, 0x3f, 0x99, 0x27, 0xc0, 0x0c, 0xb1, 0x44, 0xc0, + 0x7d, 0x1c, 0xd6, 0xbf, 0x6a, 0x0c, 0x86, 0xc0, 0x45, 0xab, 0x26, 0xc0, 0x9d, 0xf1, 0x25, 0xc0, + 0xcb, 0x5c, 0x5f, 0xc0, 0xb0, 0xa6, 0xf0, 0xbf, 0x34, 0x59, 0x73, 0xc0, 0xa6, 0xe6, 0xbe, 0xbf, + 0xe7, 0x36, 0x5f, 0xc0, 0x14, 0x10, 0xd0, 0xbf, 0x00, 0x16, 0xc4, 0xbf, 0x61, 0xf3, 0x39, 0xc0, + 0x18, 0x64, 0x8e, 0xc0, 0xb6, 0x4e, 0x57, 0xc0, 0xca, 0x73, 0x81, 0xc0, 0xc8, 0xf1, 0x05, 0xc0}; +unsigned char gemm_fp32_a1[] = { + 0x95, 0x0b, 0x32, 0xc0, 0xf0, 0x4d, 0x43, 0xc0, 0xa6, 0xa5, 0x86, 0xc0, 0x87, 0xd8, 0x57, 0xc0, + 0x92, 0x6e, 0x94, 0xc0, 0x60, 0x1d, 0x47, 0xc0, 0xff, 0x0e, 0xbd, 0xbf, 0x4f, 0x37, 0x7f, 0xc0, + 0x63, 0x48, 0x80, 0xc0, 0x48, 0x36, 0x75, 0xc0, 0xb6, 0xaf, 0x65, 0xc0, 0x02, 0x35, 0x46, 0xc0, + 0xef, 0x1c, 0xd3, 0xbf, 0xd9, 0xaa, 0x51, 0xc0, 0x29, 0x73, 0x58, 0xc0, 0x42, 0xed, 0x67, 0xc0, + 0x60, 0xbf, 0x2b, 0xc0, 0xd4, 0xfe, 0x0b, 0xc0, 0x5e, 0xdc, 0x90, 0xc0, 0x97, 0x28, 0x2a, 0xc0, + 0xa3, 0x31, 0x5a, 0xc0, 0xf7, 0x5b, 0x21, 0xc0, 0xfe, 0xf8, 0x82, 0xc0, 0xc3, 0x08, 0x5d, 0xc0, + 0x71, 0xdb, 0x44, 0xc0, 0x5b, 0x65, 0xa0, 0xc0, 0x94, 0x37, 0x74, 0xc0, 0x94, 0x0e, 0xeb, 0xbf, + 0x8f, 0xb7, 0x48, 0xc0, 0xe1, 0x9c, 0x46, 0xc0, 0xfd, 0x8e, 0x29, 0xc0, 0xcb, 0x54, 0xc9, 0xbf, + 0x7a, 0xd4, 0x97, 0xc0, 0x3d, 0xec, 0x9c, 0xc0, 0x1f, 0xc4, 0x37, 0xc0, 0x0c, 0xdd, 0x6a, 0xc0, + 0xd0, 0x43, 0x88, 0xc0, 0x86, 0xc6, 0x15, 0xc0, 0x0b, 0x0c, 0x48, 0xc0, 0x34, 0x2b, 0x35, 0xc0, + 0xe9, 0x05, 0x83, 0xc0, 0x43, 0xcd, 0x37, 0xc0, 0xf4, 0xf4, 0xd8, 0xbf, 0x1d, 0x67, 0x63, 0xc0, + 0x7c, 0x2a, 0x0f, 0xc0, 0x8e, 0x15, 0x48, 0xc0, 0xd6, 0x01, 0x84, 0xc0, 0xcf, 0xe9, 0xfa, 0xbf, + 0xd7, 0x01, 0x66, 0xc0, 0x8e, 0xe8, 0x4b, 0xc0, 0x00, 0xcc, 0x6e, 0xc0, 0x7a, 0x58, 0x2e, 0xc0, + 0x32, 0x3e, 0x42, 0xc0, 0x75, 0xe1, 0x14, 0xc0, 0xb2, 0x85, 0x47, 0xc0, 0x06, 0x39, 0x2e, 0xc0, + 0xc2, 0x0e, 0x83, 0xc0, 0x08, 0xbc, 0xff, 0xbf, 0x6d, 0x8a, 0x23, 0xc0, 0x2e, 0x81, 0x88, 0xc0, + 0x63, 0x5f, 0x07, 0xc0, 0x99, 0x5a, 0x81, 0xc0, 0x74, 0x93, 0xae, 0xbf, 0x02, 0x7a, 0x0d, 0xc0, + 0xdd, 0x34, 0x4d, 0xc0, 0x66, 0x74, 0xa8, 0xbf, 0x38, 0xf7, 0x0d, 0xc0, 0xff, 0x8e, 0x9e, 0xbf, + 0xf2, 0xdc, 0x23, 0xc0, 0xf8, 0x95, 0xb9, 0xbf, 0x18, 0xdc, 0x36, 0xc0, 0x87, 0x28, 0x0a, 0xc0, + 0x60, 0xe0, 0x57, 0xc0, 0x72, 0x97, 0x31, 0xc0, 0xe7, 0xfb, 0x5b, 0xc0, 0x10, 0xdb, 0x96, 0xc0, + 0xdd, 0xb9, 0x74, 0xc0, 0xa7, 0xe4, 0x75, 0xbf, 0x4e, 0x37, 0x84, 0xc0, 0x25, 0x6f, 0x85, 0xc0, + 0xc7, 0xdf, 0x45, 0xc0, 0x90, 0xe2, 0xa0, 0xc0, 0x5a, 0x73, 0x77, 0xc0, 0xad, 0xad, 0x2c, 0xc0, + 0xf8, 0xe5, 0x61, 0xc0, 0xc2, 0x70, 0x1a, 0xc0, 0xd5, 0xf9, 0x74, 0xc0, 0xb1, 0x34, 0x27, 0xbf, + 0xaa, 0xd1, 0x5e, 0xc0, 0x72, 0x2f, 0xce, 0xbf, 0x75, 0x3e, 0x9b, 0xc0, 0xb5, 0x07, 0x5d, 0xc0, + 0xad, 0x7e, 0x84, 0xc0, 0x6f, 0xf5, 0x56, 0xc0, 0x8f, 0x49, 0x03, 0xc0, 0xf2, 0x83, 0x60, 0xc0, + 0xe2, 0xee, 0x4b, 0xc0, 0x58, 0x60, 0x22, 0xc0, 0x79, 0x5c, 0x56, 0xc0, 0x5a, 0x14, 0x2b, 0xc0, + 0x9e, 0x5b, 0x45, 0xc0, 0x2b, 0x76, 0x34, 0xc0, 0x78, 0x10, 0x0f, 0xc0, 0xf6, 0xc7, 0x56, 0xc0, + 0x41, 0x00, 0xcd, 0xbf, 0x00, 0x18, 0x5c, 0xc0, 0x58, 0x87, 0x57, 0xc0, 0xd3, 0xc8, 0x50, 0xc0, + 0x07, 0x9c, 0x7f, 0xc0, 0x0a, 0x11, 0x43, 0xc0, 0xac, 0x86, 0x22, 0xc0, 0xe1, 0xa7, 0x42, 0xc0, + 0xa3, 0xa2, 0x2e, 0xc0, 0x01, 0x79, 0x6e, 0xc0, 0x7f, 0xf4, 0x2d, 0xc0, 0x63, 0xb1, 0x80, 0xc0, + 0xce, 0x22, 0x4d, 0xc0, 0x04, 0xac, 0xd7, 0xbf, 0x5e, 0x42, 0x4c, 0xc0, 0xec, 0x0f, 0x52, 0xc0, + 0x57, 0x8f, 0x53, 0xc0, 0xad, 0x2f, 0xa7, 0xbf, 0xe7, 0xd0, 0xcd, 0xbe, 0x54, 0x94, 0x5d, 0xc0, + 0x6b, 0x64, 0x08, 0xc0, 0x4f, 0xd4, 0x52, 0xc0, 0x53, 0x7c, 0x38, 0xc0, 0x36, 0x49, 0x7e, 0xbf, + 0x54, 0x70, 0x51, 0xc0, 0x25, 0xcd, 0x8a, 0xc0, 0x14, 0x0a, 0x43, 0xc0, 0x2b, 0x0c, 0x0f, 0xc0, + 0x4c, 0xc9, 0x9f, 0xbf, 0x52, 0x1c, 0x10, 0xc0, 0xdd, 0x37, 0x4a, 0xc0, 0x32, 0x29, 0x80, 0xc0, + 0x2b, 0xba, 0x1d, 0xc0, 0xfa, 0xe7, 0x48, 0xc0, 0xe9, 0xe3, 0x82, 0xc0, 0xf3, 0x95, 0x11, 0xc0, + 0x8d, 0xab, 0x02, 0xc0, 0xa1, 0x61, 0x17, 0xc0, 0xd8, 0x62, 0x80, 0xc0, 0xfb, 0xba, 0x13, 0xc0, + 0x79, 0xa3, 0x8f, 0xc0, 0x99, 0x37, 0x2a, 0xc0, 0x86, 0xcc, 0x14, 0xc0, 0xe9, 0xea, 0x38, 0xc0, + 0xdf, 0x6b, 0x12, 0xc0, 0xdc, 0xc7, 0x76, 0xc0, 0x36, 0x63, 0x0b, 0xc0, 0xbf, 0xfb, 0x0e, 0xc0, + 0x59, 0x80, 0x45, 0xc0, 0xf2, 0xbb, 0x35, 0xc0, 0xe7, 0xa6, 0x0c, 0xc0, 0x5b, 0x21, 0x10, 0xc0, + 0x98, 0x80, 0x5e, 0xc0, 0x0c, 0x57, 0x36, 0xc0, 0x4d, 0xb1, 0xd7, 0xbf, 0xdd, 0x46, 0x1b, 0xc0, + 0x3c, 0x35, 0x61, 0xc0, 0x84, 0x2f, 0x77, 0xc0, 0x62, 0x11, 0x9d, 0xc0, 0xc4, 0xa1, 0x70, 0xc0, + 0xe7, 0x03, 0x95, 0xbf, 0xdf, 0x47, 0xe4, 0xbf, 0x8c, 0x06, 0x45, 0xc0, 0x9e, 0x57, 0x15, 0xc0, + 0xf6, 0x16, 0xbe, 0xbf, 0x45, 0x83, 0xcb, 0xbf, 0xcd, 0xa6, 0x40, 0xc0, 0xea, 0xa2, 0x04, 0xc0, + 0x87, 0x58, 0xc5, 0xbf, 0x41, 0x1a, 0x06, 0xc0, 0xdb, 0xbc, 0x62, 0xc0, 0x0d, 0x7b, 0x28, 0xc0, + 0x96, 0xdd, 0x27, 0xc0, 0x92, 0xb4, 0x61, 0xc0, 0x17, 0x05, 0x4f, 0xc0, 0x8a, 0xb5, 0x48, 0xc0, + 0x0e, 0x2d, 0x29, 0xc0, 0x0b, 0xdf, 0x82, 0xc0, 0x07, 0x9e, 0x2f, 0xc0, 0x26, 0x91, 0x53, 0xc0, + 0x53, 0xfc, 0x33, 0xc0, 0xa1, 0xad, 0x3b, 0xc0, 0xe4, 0xa1, 0x02, 0xc0, 0x6e, 0xc7, 0x6f, 0xc0, + 0xdc, 0x0a, 0xfe, 0xbf, 0xd5, 0xa6, 0x3c, 0xc0, 0xb1, 0x4e, 0xf9, 0xbf, 0x90, 0x83, 0xc7, 0xbf, + 0xd3, 0x0b, 0x86, 0xc0, 0x33, 0xec, 0xeb, 0xbf, 0xfb, 0xa0, 0x4b, 0xc0, 0x49, 0x10, 0x4b, 0xc0, + 0x4d, 0xe4, 0xdd, 0xbf, 0x34, 0xcb, 0x67, 0xc0, 0xef, 0xe8, 0x10, 0xc0, 0xc1, 0x07, 0x2d, 0xc0, + 0x47, 0x2c, 0x3b, 0xc0, 0x07, 0x3b, 0xb5, 0xbe, 0xed, 0x11, 0x08, 0xc0, 0xf6, 0xb2, 0x47, 0xc0, + 0xd8, 0x63, 0x1c, 0xc0, 0x49, 0x12, 0x6e, 0xc0, 0x7b, 0x20, 0x6c, 0xc0, 0x72, 0x83, 0x61, 0xc0, + 0x43, 0x1a, 0x70, 0xc0, 0x4a, 0x0b, 0x61, 0xc0, 0x53, 0xfe, 0x5c, 0xc0, 0xf2, 0x26, 0x66, 0xc0, + 0x59, 0x24, 0x22, 0xc0, 0xf3, 0x12, 0x9a, 0xc0, 0xc2, 0xce, 0x7c, 0xc0, 0xef, 0xdc, 0x19, 0xc0, + 0x59, 0xba, 0x37, 0xc0, 0xc4, 0xb6, 0x5a, 0xc0, 0x64, 0xea, 0x03, 0xc0, 0xf8, 0xdd, 0x00, 0xc0, + 0x90, 0x28, 0x45, 0xc0, 0x21, 0xef, 0x39, 0xc0, 0x77, 0x03, 0x23, 0xc0, 0xd0, 0x9a, 0x12, 0xc0, + 0x4c, 0x3a, 0xfa, 0xbf, 0x41, 0x5b, 0x39, 0xc0, 0x47, 0xea, 0x8f, 0xc0, 0x4e, 0xae, 0x1d, 0xc0, + 0x00, 0xf5, 0x4c, 0xc0, 0xa4, 0x00, 0xd9, 0xbf, 0x4f, 0xdb, 0x2b, 0xc0, 0xb2, 0xd6, 0x6c, 0xc0, + 0x9e, 0x13, 0x7b, 0xc0, 0x0d, 0x4f, 0xe4, 0xbf, 0xea, 0x9e, 0x07, 0xc0, 0x73, 0xeb, 0x1d, 0xc0, + 0xec, 0x10, 0x33, 0xc0, 0x96, 0x49, 0x2c, 0xc0, 0xc6, 0x6f, 0x5e, 0xc0, 0xe7, 0x1d, 0x36, 0xc0, + 0x3d, 0xe9, 0x3f, 0xc0, 0x78, 0xb1, 0x64, 0xc0, 0xd9, 0x10, 0xa9, 0xc0, 0xf9, 0xbb, 0x27, 0xc0, + 0x18, 0x83, 0x1c, 0xc0, 0xbe, 0xb0, 0x6e, 0xc0, 0x85, 0x0f, 0x8b, 0xbf, 0x86, 0x93, 0xcd, 0xbf, + 0xb0, 0xe7, 0x79, 0xc0, 0xc1, 0x55, 0x20, 0xc0, 0x50, 0xd5, 0x0c, 0xc0, 0x88, 0x9d, 0x24, 0xc0, + 0x58, 0x74, 0x64, 0xc0, 0x74, 0x06, 0x2b, 0xc0, 0x1a, 0x3a, 0x8d, 0xc0, 0xdc, 0x23, 0x4e, 0xc0, + 0xb5, 0x2f, 0x88, 0xc0, 0xdb, 0x95, 0x4d, 0xc0, 0x5a, 0xe8, 0x6f, 0xc0, 0xf0, 0xaf, 0xab, 0xbf, + 0x22, 0xdd, 0xa8, 0xbf, 0x8b, 0xf3, 0x2f, 0xc0, 0x79, 0xaf, 0x1c, 0xc0, 0x00, 0x12, 0x85, 0xc0, + 0xb5, 0x09, 0xcb, 0xbf, 0x04, 0x10, 0x3f, 0xc0, 0x84, 0xd6, 0xc9, 0xbf, 0x02, 0x09, 0x43, 0xc0, + 0xe8, 0x7c, 0x9b, 0xc0, 0xe7, 0xae, 0x87, 0xc0, 0xc2, 0x68, 0x7d, 0xc0, 0xf8, 0x8b, 0x43, 0xc0, + 0x08, 0x4e, 0x6d, 0xc0, 0xfb, 0xcb, 0xae, 0xc0, 0x6c, 0xd3, 0x3c, 0xc0, 0x00, 0xf9, 0x42, 0xc0, + 0x75, 0xf8, 0x8b, 0xc0, 0x6c, 0x00, 0x29, 0xc0, 0x35, 0x39, 0x89, 0xbf, 0x41, 0x26, 0xe4, 0xbf, + 0x25, 0x9c, 0x38, 0xc0, 0xd7, 0x03, 0x3d, 0xc0, 0x0e, 0x8e, 0x7d, 0xc0, 0x1c, 0xb7, 0x6f, 0xc0, + 0x68, 0xfb, 0x48, 0xc0, 0x45, 0xf0, 0x7f, 0xc0, 0x68, 0xfc, 0x1d, 0xc0, 0xdf, 0x00, 0x82, 0xc0, + 0x55, 0x87, 0x30, 0xc0, 0xe0, 0xdf, 0x2d, 0xc0, 0xbd, 0x56, 0x99, 0xc0, 0x12, 0xe1, 0x56, 0xc0, + 0xad, 0x86, 0x54, 0xc0, 0x5b, 0x6f, 0x44, 0xc0, 0xf6, 0x1e, 0x3c, 0xc0, 0x70, 0x76, 0x38, 0xc0, + 0xfe, 0x8c, 0x5a, 0xc0, 0xa3, 0xe5, 0x5b, 0xc0, 0x4d, 0x2d, 0x63, 0xc0, 0x1d, 0x88, 0x47, 0xc0, + 0xfc, 0xa0, 0xdb, 0xbf, 0x4a, 0xf8, 0x49, 0xc0, 0xe5, 0x0b, 0x46, 0xc0, 0x92, 0xd8, 0x44, 0xc0, + 0x72, 0x30, 0xda, 0xbf, 0x60, 0xad, 0x81, 0xc0, 0x2e, 0x09, 0x8a, 0xc0, 0xef, 0xcd, 0x31, 0xc0, + 0x33, 0x61, 0x06, 0xc0, 0x2e, 0x07, 0x8d, 0xc0, 0x62, 0xbc, 0x52, 0xc0, 0x55, 0x97, 0x45, 0xc0, + 0x14, 0x8e, 0x1e, 0xc0, 0xb8, 0x05, 0xf2, 0xbf, 0x1e, 0x51, 0x1c, 0xc0, 0x61, 0x76, 0xe1, 0xbf, + 0x7e, 0xfb, 0x29, 0xc0, 0xc2, 0xcd, 0x25, 0xc0, 0xa9, 0xf1, 0x9c, 0xc0, 0xdf, 0xd1, 0x35, 0xc0, + 0x7a, 0x12, 0x1e, 0xc0, 0x3e, 0x11, 0xc1, 0xbf, 0x93, 0x21, 0x74, 0xbf, 0x3b, 0xd0, 0x5b, 0xc0, + 0x1f, 0x4f, 0x2f, 0xc0, 0x7f, 0x0c, 0x36, 0xc0, 0x8c, 0xd3, 0x5f, 0xc0, 0xc4, 0x52, 0xd6, 0xbf, + 0x62, 0x4e, 0xa8, 0xbf, 0xbf, 0x57, 0xb4, 0xc0, 0x43, 0x8e, 0x59, 0xc0, 0x4b, 0x75, 0x0a, 0xc0, + 0x95, 0x22, 0x83, 0xc0, 0x07, 0xcd, 0x5f, 0xc0, 0x67, 0xdf, 0x09, 0xc0, 0xc8, 0x79, 0x99, 0xc0, + 0xbe, 0xc0, 0x8f, 0xbf, 0x56, 0x68, 0x6f, 0xc0, 0x61, 0xf4, 0x3b, 0xc0, 0xdd, 0xfc, 0x13, 0xc0, + 0x15, 0x8f, 0x98, 0xc0, 0xc9, 0x33, 0x10, 0xc0, 0xe2, 0xcd, 0x1b, 0xc0, 0x16, 0xac, 0x1e, 0xc0, + 0x78, 0xff, 0x16, 0xc0, 0xff, 0xf6, 0x84, 0xc0, 0x56, 0x63, 0xbc, 0xc0, 0x91, 0xd4, 0x17, 0xc0, + 0xbf, 0xcf, 0x72, 0xc0, 0xf6, 0x86, 0x69, 0xc0, 0x70, 0xd0, 0x99, 0xbf, 0x8d, 0xdb, 0xbc, 0xbf, + 0x6f, 0x5a, 0x7b, 0xc0, 0xa0, 0xd9, 0x32, 0xc0, 0xe5, 0x07, 0x70, 0xc0, 0x6a, 0xf7, 0xf2, 0xbf, + 0xb6, 0x69, 0x40, 0xc0, 0x28, 0xed, 0x14, 0xc0, 0x1b, 0xa9, 0x33, 0xc0, 0xfa, 0xd2, 0x47, 0xc0, + 0x53, 0x21, 0x77, 0xc0, 0x72, 0x23, 0x74, 0xc0, 0xc7, 0x35, 0x0b, 0xc0, 0x6f, 0x9d, 0x43, 0xc0, + 0xda, 0xb3, 0x85, 0xc0, 0x93, 0xf6, 0x2d, 0xc0, 0x9b, 0xf0, 0x6c, 0xc0, 0x14, 0x11, 0xe0, 0xbf, + 0xdb, 0x62, 0x39, 0xc0, 0xe5, 0xb3, 0x24, 0xc0, 0xf7, 0x58, 0x46, 0xc0, 0x84, 0xee, 0x30, 0xc0, + 0xb4, 0xb2, 0x72, 0xc0, 0xd4, 0x7b, 0x22, 0xc0, 0x31, 0x6f, 0x98, 0xc0, 0x35, 0x2a, 0x8b, 0xc0, + 0xc0, 0x90, 0x0c, 0xc0, 0xec, 0x1f, 0x35, 0xc0, 0x9b, 0xc7, 0x37, 0xc0, 0x0d, 0x1d, 0x58, 0xc0, + 0x24, 0xb7, 0x77, 0xc0, 0x9e, 0x53, 0x63, 0xc0, 0xfb, 0x53, 0x90, 0xc0, 0xa9, 0x51, 0x6a, 0xc0, + 0x23, 0x1c, 0x03, 0xc0, 0xa2, 0xc7, 0x36, 0xc0, 0x12, 0xbd, 0x6b, 0xc0, 0x3a, 0xbe, 0x97, 0xc0, + 0x83, 0xc7, 0x8d, 0xc0, 0x75, 0x81, 0x7b, 0xc0, 0x9a, 0x2d, 0x48, 0xc0, 0xda, 0x9a, 0x2e, 0xc0, + 0x96, 0x86, 0x09, 0xc0, 0x8e, 0x97, 0x5d, 0xc0, 0xbd, 0xa0, 0x91, 0xc0, 0x49, 0x67, 0x13, 0xc0, + 0x1e, 0xf1, 0x55, 0xc0, 0x6a, 0x87, 0x4f, 0xc0, 0x12, 0xad, 0x41, 0xc0, 0x28, 0x6b, 0xe3, 0xbf, + 0x99, 0x45, 0x88, 0xc0, 0x96, 0xde, 0x7c, 0xc0, 0x28, 0x75, 0xa2, 0xc0, 0x20, 0x1a, 0x44, 0xc0, + 0xff, 0x94, 0x18, 0xc0, 0xb0, 0xb6, 0x4f, 0xc0, 0xca, 0xd3, 0x8f, 0xbf, 0xd0, 0x4e, 0x04, 0xc0, + 0x29, 0x4f, 0x49, 0xc0, 0xd0, 0x57, 0x10, 0xc0, 0xe8, 0xb8, 0xde, 0xbf, 0x81, 0x36, 0x2b, 0xc0, + 0xc3, 0xdb, 0x80, 0xc0, 0xfa, 0x48, 0x94, 0xc0, 0xb7, 0xdd, 0x09, 0xc0, 0x7f, 0xc2, 0x7e, 0xc0, + 0x43, 0x3f, 0x3e, 0xc0, 0xdd, 0xc8, 0x61, 0xc0, 0x56, 0x7a, 0x48, 0xc0, 0xbe, 0x66, 0x6f, 0xc0, + 0xc7, 0x2b, 0x65, 0xc0, 0x28, 0x44, 0x35, 0xc0, 0x9c, 0xa4, 0x5f, 0xc0, 0xe6, 0xe8, 0x14, 0xc0, + 0xd6, 0xb0, 0x48, 0xc0, 0x77, 0xc9, 0x17, 0xc0, 0x6f, 0x1e, 0x6e, 0xc0, 0x53, 0x43, 0x59, 0xc0, + 0xb5, 0xbc, 0xa8, 0xc0, 0xc7, 0x30, 0x3f, 0xbf, 0x75, 0xb8, 0x7e, 0xc0, 0x28, 0x2b, 0x98, 0xc0, + 0x0f, 0x5c, 0x44, 0xc0, 0x8d, 0xa5, 0x4f, 0xc0, 0x8f, 0xb5, 0x2d, 0xc0, 0x9d, 0xfb, 0x4a, 0xc0, + 0x68, 0x11, 0x21, 0xc0, 0x3b, 0xf5, 0xcf, 0xbf, 0x5f, 0x4a, 0x77, 0xc0, 0x91, 0xb5, 0x5c, 0xc0, + 0x5d, 0x95, 0x8b, 0xc0, 0xbe, 0xdb, 0xba, 0xbf, 0x65, 0xcb, 0x8b, 0xc0, 0xf7, 0x3d, 0x71, 0xc0, + 0xce, 0x0f, 0x39, 0xc0, 0x50, 0x6d, 0xea, 0xbf, 0x41, 0xd4, 0x03, 0xc0, 0xf5, 0x6f, 0x26, 0xc0, + 0xe6, 0xfb, 0x2f, 0xc0, 0x10, 0x9e, 0xa1, 0xc0, 0x8c, 0x84, 0x30, 0xc0, 0x06, 0x58, 0x63, 0xc0, + 0xa5, 0x9c, 0x12, 0xc0, 0x81, 0x5d, 0x60, 0xc0, 0x0e, 0x32, 0x0f, 0xc0, 0xc8, 0x39, 0x4c, 0xc0, + 0xbb, 0xb0, 0x06, 0xc0, 0x79, 0x60, 0x11, 0xc0, 0x5a, 0x92, 0x67, 0xc0, 0xa9, 0xd4, 0x2e, 0xc0, + 0xfa, 0x3b, 0x48, 0xc0, 0x3a, 0x51, 0x2b, 0xc0, 0xb1, 0x1e, 0x5a, 0xc0, 0xfd, 0x7e, 0x2f, 0xc0, + 0x3b, 0xde, 0x14, 0xc0, 0xe6, 0xc0, 0x1f, 0xc0, 0xb6, 0xe5, 0x8a, 0xc0, 0xfa, 0x48, 0x30, 0xc0, + 0x9d, 0x0e, 0xc7, 0xbf, 0xbd, 0x36, 0x13, 0xc0, 0xd2, 0x3d, 0x42, 0xc0, 0x50, 0x50, 0x20, 0xc0, + 0x78, 0xce, 0xe0, 0xbf, 0x1a, 0x19, 0x08, 0xc0, 0xcf, 0x03, 0x8c, 0xc0, 0xbd, 0xfb, 0x32, 0xc0, + 0x37, 0x38, 0x39, 0xc0, 0x3f, 0x99, 0x27, 0xc0, 0x5b, 0x96, 0x6a, 0xc0, 0x0c, 0xb1, 0x44, 0xc0, + 0x7d, 0x1c, 0xd6, 0xbf, 0x6a, 0x0c, 0x86, 0xc0, 0x45, 0xab, 0x26, 0xc0, 0x9d, 0xf1, 0x25, 0xc0, + 0xcb, 0x5c, 0x5f, 0xc0, 0xb0, 0xa6, 0xf0, 0xbf, 0x34, 0x59, 0x73, 0xc0, 0xa6, 0xe6, 0xbe, 0xbf, + 0xe7, 0x36, 0x5f, 0xc0, 0x14, 0x10, 0xd0, 0xbf, 0x00, 0x16, 0xc4, 0xbf, 0x61, 0xf3, 0x39, 0xc0, + 0x18, 0x64, 0x8e, 0xc0, 0xb6, 0x4e, 0x57, 0xc0, 0xca, 0x73, 0x81, 0xc0, 0xc8, 0xf1, 0x05, 0xc0}; +unsigned char gemm_fp32_b[] = { + 0x64, 0x31, 0xba, 0x3f, 0x67, 0x91, 0xb4, 0x3f, 0x9a, 0x56, 0x15, 0x40, 0xe1, 0x9b, 0xc4, 0x3f, + 0x4e, 0x18, 0x4f, 0x40, 0x91, 0x4e, 0x3f, 0x40, 0x67, 0x8f, 0x0d, 0x40, 0x90, 0x1b, 0xd1, 0x3f, + 0xec, 0xb8, 0x00, 0x40, 0x8f, 0xcf, 0x62, 0x3f, 0x31, 0x0e, 0x83, 0x40, 0x86, 0x70, 0x7c, 0x3f, + 0x79, 0xfc, 0x0f, 0x40, 0x5e, 0xc1, 0x3b, 0x40, 0x6c, 0xe7, 0x00, 0x40, 0x12, 0x40, 0xbe, 0x3f, + 0xd3, 0x3b, 0x72, 0x40, 0xf1, 0x1b, 0x3c, 0x40, 0x1f, 0x8c, 0xf4, 0x3f, 0x5b, 0xc3, 0x48, 0x3f, + 0x3b, 0x0d, 0x04, 0x40, 0xba, 0x74, 0x21, 0x40, 0x91, 0xf6, 0x6e, 0x3f, 0x64, 0x50, 0xda, 0x3f, + 0x51, 0x62, 0xb4, 0x3f, 0xbf, 0xef, 0xe2, 0x3f, 0x7d, 0x81, 0xca, 0x3f, 0x85, 0x73, 0x4f, 0x40, + 0x26, 0xf3, 0x16, 0x40, 0x1d, 0xdc, 0x14, 0x40, 0x4b, 0x29, 0x37, 0x40, 0x75, 0xfa, 0x6d, 0x40, + 0xb0, 0x84, 0xd7, 0x3f, 0xa5, 0x5b, 0xa0, 0x3e, 0x48, 0xde, 0x02, 0x40, 0x2e, 0xe1, 0x1f, 0x40, + 0x14, 0x04, 0x07, 0x40, 0xc3, 0xf0, 0xe9, 0x3f, 0x23, 0xc0, 0x06, 0x40, 0xb8, 0x9b, 0x21, 0x40, + 0x43, 0xcd, 0x52, 0x40, 0x4b, 0xbe, 0xe3, 0x3f, 0xc3, 0xac, 0xb5, 0x3f, 0x75, 0x0d, 0x04, 0x40, + 0x21, 0xbe, 0x17, 0x40, 0xe3, 0x55, 0xc9, 0x3f, 0x70, 0x6e, 0xac, 0x3f, 0x51, 0xb2, 0x08, 0x40, + 0x98, 0xa5, 0xfd, 0x3f, 0x1d, 0x38, 0xf6, 0x3e, 0x64, 0x6d, 0xd4, 0x3f, 0x53, 0xed, 0x20, 0x40, + 0xfe, 0x22, 0xee, 0x3f, 0x59, 0x07, 0x2b, 0x40, 0x82, 0x7f, 0x1c, 0x40, 0x83, 0x21, 0x2e, 0x40, + 0x70, 0x30, 0xc8, 0x3f, 0x49, 0xfc, 0x95, 0x3e, 0xb4, 0x4d, 0xd8, 0x3f, 0x1a, 0x05, 0xd9, 0x3f, + 0x01, 0x66, 0x01, 0x40, 0x56, 0xaf, 0xe3, 0x3f, 0x71, 0x6d, 0x73, 0x3f, 0x62, 0xc9, 0xda, 0x3f, + 0xd4, 0xf4, 0xc8, 0x3f, 0xeb, 0x23, 0x4a, 0x3f, 0xca, 0xe9, 0x0a, 0x40, 0xeb, 0x1c, 0xe1, 0x3f, + 0xdf, 0xa8, 0x6c, 0x3f, 0x17, 0x8d, 0x93, 0x40, 0xfe, 0xd5, 0x11, 0x40, 0x00, 0x99, 0x98, 0x3f, + 0x83, 0xa3, 0x0b, 0x40, 0xeb, 0x45, 0xac, 0x3f, 0xba, 0xee, 0x67, 0x3e, 0xb9, 0x04, 0x29, 0x40, + 0x6d, 0x52, 0xe2, 0x3f, 0x1c, 0x2a, 0xee, 0x3f, 0xd6, 0x87, 0xcb, 0x3e, 0xf5, 0xbc, 0x1b, 0x40, + 0x11, 0x4c, 0x01, 0x40, 0xab, 0xaa, 0xbf, 0x3f, 0xc1, 0x7d, 0x28, 0x40, 0x30, 0x1e, 0x67, 0x40, + 0x1c, 0xe8, 0x1a, 0x40, 0x12, 0x35, 0xf0, 0x3f, 0x9a, 0xbe, 0x86, 0xbf, 0x9e, 0x1f, 0xcc, 0x3f, + 0x1f, 0x6a, 0x06, 0x40, 0x54, 0x0b, 0x26, 0x40, 0x99, 0xed, 0x5d, 0x40, 0xd6, 0xcb, 0x2f, 0xbd, + 0x39, 0x44, 0x3d, 0x40, 0xec, 0xab, 0x3c, 0x40, 0xe6, 0x91, 0xd7, 0x3f, 0x45, 0xde, 0x12, 0x40, + 0x1a, 0x49, 0x4a, 0x40, 0xda, 0xff, 0x61, 0x3f, 0xef, 0x02, 0xfb, 0x3f, 0xa8, 0xaf, 0xc9, 0x3f, + 0xe1, 0xff, 0x70, 0x3f, 0xd0, 0xa6, 0x65, 0x40, 0x38, 0x8f, 0xbb, 0x3f, 0x56, 0xa3, 0x4e, 0x40, + 0x63, 0xa7, 0xbc, 0x3f, 0xdf, 0x5d, 0xe2, 0x3f, 0xdd, 0x1f, 0x48, 0x40, 0xc8, 0x56, 0x01, 0x40, + 0x4d, 0x89, 0xb0, 0x3f, 0xd7, 0x21, 0x42, 0x40, 0x63, 0xb9, 0x37, 0x40, 0xae, 0x05, 0x88, 0x3f, + 0x71, 0x69, 0xe2, 0x3f, 0xb7, 0xe2, 0x8b, 0x3f, 0x05, 0x0a, 0x0f, 0x40, 0x82, 0xc3, 0x6e, 0x40, + 0x2e, 0x6b, 0x82, 0x3f, 0xff, 0x80, 0x99, 0x3f, 0x67, 0xdc, 0xb9, 0x3f, 0xcf, 0x62, 0x2e, 0x40, + 0xbf, 0x16, 0xdf, 0x3f, 0x15, 0xbd, 0xc4, 0x3f, 0x75, 0x54, 0xc9, 0x3f, 0x42, 0xa9, 0xcd, 0x3f, + 0x17, 0x39, 0xeb, 0x3f, 0xd4, 0xc0, 0xb9, 0x3f, 0x34, 0xf6, 0x27, 0x40, 0xd2, 0x91, 0xe4, 0x3f, + 0x81, 0xb1, 0xf0, 0x3f, 0x55, 0x07, 0x28, 0x40, 0xed, 0xcf, 0x72, 0x40, 0x45, 0xef, 0x05, 0x40, + 0x1b, 0x41, 0xee, 0x3f, 0xd4, 0xe0, 0xc8, 0x3e, 0xc5, 0xf8, 0x43, 0x40, 0xcb, 0x1a, 0xa8, 0x3f, + 0xb4, 0xab, 0x12, 0x40, 0x5e, 0x6d, 0x4e, 0x40, 0x67, 0x40, 0x73, 0x40, 0xcb, 0x0e, 0x61, 0x40, + 0xa1, 0xdc, 0x5b, 0x40, 0xc3, 0x30, 0x26, 0x40, 0xdc, 0x6a, 0x4b, 0x40, 0x58, 0x93, 0x14, 0x40, + 0x9f, 0x54, 0x28, 0x40, 0xa1, 0x45, 0x05, 0x40, 0xef, 0x8b, 0x72, 0x3e, 0x5a, 0xa6, 0x5c, 0x3f, + 0x0d, 0x74, 0x7d, 0x3f, 0xf7, 0x67, 0x0f, 0x40, 0xd0, 0x7f, 0x1a, 0x40, 0x4f, 0xeb, 0x1d, 0x40, + 0x88, 0xd0, 0x0d, 0x40, 0x1d, 0xec, 0x77, 0x40, 0x8a, 0xb5, 0x55, 0x40, 0x7d, 0x25, 0x35, 0x40, + 0xbd, 0xba, 0x4a, 0x40, 0xbe, 0x04, 0x39, 0x40, 0x3d, 0xad, 0xfe, 0x3f, 0xb4, 0x66, 0xe9, 0x3f, + 0x53, 0xc9, 0x01, 0x40, 0x8b, 0x53, 0x60, 0x3f, 0xd4, 0xc4, 0x2a, 0x40, 0xf2, 0x64, 0x49, 0x40, + 0x68, 0x4e, 0xbe, 0x3f, 0x86, 0x94, 0x32, 0xbf, 0x8b, 0x97, 0x9b, 0x3f, 0x62, 0x01, 0x32, 0x40, + 0x6e, 0xd7, 0xb8, 0x3f, 0xfc, 0x35, 0x2d, 0x40, 0x9e, 0x29, 0x05, 0x40, 0xfe, 0x53, 0xef, 0x3f, + 0x5a, 0xf5, 0x3e, 0x40, 0x3e, 0x17, 0x18, 0x3f, 0x71, 0xac, 0x68, 0x3f, 0x43, 0x0b, 0xd5, 0x3f, + 0x7e, 0x02, 0xbe, 0x3f, 0x12, 0x80, 0x32, 0x40, 0x8f, 0x7d, 0x1b, 0x40, 0x49, 0x88, 0x47, 0xbe, + 0x65, 0xbb, 0x45, 0x40, 0xc1, 0xa8, 0xec, 0x3f, 0x73, 0xd0, 0xa5, 0x3f, 0x5f, 0x26, 0xba, 0x3f, + 0xc9, 0xfd, 0x09, 0x40, 0xa8, 0xfa, 0x27, 0x40, 0x9d, 0x40, 0x8e, 0x40, 0xa9, 0x03, 0x5f, 0x40, + 0x17, 0x78, 0x77, 0x40, 0x01, 0x75, 0x70, 0x3f, 0xc1, 0xc7, 0x1e, 0x3f, 0x63, 0x1f, 0xff, 0x3f, + 0x9e, 0x48, 0xeb, 0x3f, 0x31, 0xfc, 0xc5, 0x3f, 0x8d, 0x92, 0x9b, 0x3f, 0xf0, 0x70, 0x28, 0x40, + 0xc0, 0x2c, 0x13, 0x40, 0xc5, 0x90, 0x3c, 0x3f, 0xc2, 0xd4, 0xcf, 0x3f, 0xc9, 0x4d, 0xed, 0x3f, + 0x31, 0xfa, 0x21, 0x40, 0x9c, 0xca, 0x82, 0x3f, 0x65, 0x1e, 0xdf, 0x3f, 0x34, 0x5f, 0x6a, 0x3f, + 0xd1, 0xbb, 0x4f, 0x40, 0xf0, 0xbb, 0x88, 0x40, 0x0f, 0x9a, 0x4e, 0x40, 0x41, 0x64, 0x68, 0x40, + 0x61, 0xab, 0x1b, 0x40, 0xa8, 0xdd, 0xb1, 0x3f, 0x4a, 0x9e, 0xf6, 0x3f, 0x6c, 0xd7, 0x47, 0x40, + 0x9f, 0xc4, 0xc7, 0x3f, 0xee, 0xcc, 0x19, 0x40, 0xa8, 0x05, 0xcb, 0x3f, 0x48, 0xeb, 0x6f, 0x3f, + 0x02, 0x96, 0x04, 0x40, 0x2f, 0xdb, 0x90, 0x3f, 0xa9, 0xa1, 0xe9, 0x3f, 0x0a, 0xa0, 0x18, 0x40, + 0x34, 0x83, 0xdc, 0x3f, 0xc4, 0x9d, 0x5b, 0x3f, 0xfe, 0xe3, 0xc3, 0x3f, 0xc8, 0xa7, 0x98, 0x3f, + 0xad, 0x7c, 0x0f, 0x40, 0xd2, 0xcf, 0xc0, 0x3f, 0xf3, 0x8c, 0x1f, 0x40, 0x23, 0x30, 0xe5, 0x3f, + 0x9f, 0x37, 0x50, 0x3f, 0x44, 0x12, 0x05, 0x3e, 0x18, 0x85, 0xeb, 0x3f, 0xb2, 0x30, 0xe7, 0x3f, + 0x15, 0x02, 0x5c, 0x3f, 0x0d, 0xec, 0xe7, 0x3d, 0x6c, 0x03, 0xe8, 0x3f, 0xf1, 0x20, 0x74, 0x40, + 0x05, 0xc6, 0x16, 0x3f, 0xc0, 0x5e, 0x5b, 0x40, 0x20, 0x43, 0x76, 0x40, 0x61, 0xbf, 0x13, 0x40, + 0xea, 0x8b, 0xfa, 0x3f, 0x42, 0x33, 0xc6, 0x3f, 0x6e, 0x85, 0x2d, 0x40, 0x7b, 0x7a, 0x50, 0x40, + 0xd1, 0x84, 0xec, 0x3f, 0xd0, 0x62, 0x52, 0x40, 0x21, 0x9a, 0x09, 0x40, 0x8d, 0x80, 0x6c, 0x40, + 0x00, 0xa5, 0x3a, 0x40, 0x44, 0xc2, 0xf5, 0x3c, 0x18, 0xb2, 0xeb, 0x3f, 0x8d, 0x2a, 0x9b, 0x3f, + 0x4b, 0xd2, 0x14, 0x40, 0xa6, 0x5c, 0x21, 0x40, 0x19, 0x84, 0x60, 0x40, 0xff, 0x6a, 0x27, 0x40, + 0xf4, 0x55, 0x29, 0x40, 0x22, 0x46, 0xb6, 0x3f, 0x7b, 0xc6, 0x3d, 0x40, 0x1d, 0x3f, 0xb9, 0x3f, + 0xaf, 0xab, 0x47, 0x40, 0xda, 0xcd, 0xf6, 0x3f, 0x75, 0x72, 0x92, 0x3f, 0xfb, 0x69, 0xea, 0x3f, + 0xb4, 0x13, 0xd9, 0x3f, 0x80, 0x9c, 0xa5, 0x3e, 0x7b, 0x1e, 0xce, 0x3f, 0x91, 0x49, 0xa6, 0x3d, + 0xd3, 0xf2, 0x88, 0x40, 0x52, 0x7e, 0xe8, 0x3f, 0x9e, 0x00, 0x14, 0x40, 0x65, 0x5b, 0x82, 0x3f, + 0x39, 0xf0, 0x45, 0x40, 0x10, 0x24, 0x90, 0x3f, 0xbe, 0x46, 0x3c, 0x40, 0x70, 0xfb, 0x35, 0x40, + 0xe5, 0xa0, 0x07, 0x40, 0x9d, 0x51, 0x03, 0x40, 0x61, 0x7a, 0x70, 0x40, 0x80, 0xc5, 0xd1, 0x3f, + 0xb7, 0xe8, 0xf0, 0x3f, 0x5e, 0xae, 0x3f, 0x40, 0x72, 0x01, 0x58, 0x40, 0x1e, 0x55, 0x0e, 0x40, + 0x47, 0x09, 0x18, 0x40, 0x95, 0x05, 0xa8, 0x3f, 0x9d, 0x5e, 0x1a, 0x40, 0xa0, 0xe7, 0x1c, 0x40, + 0x34, 0xe3, 0x31, 0x40, 0x29, 0x8a, 0xe5, 0x3f, 0xfc, 0xc2, 0xa7, 0x40, 0x0f, 0x1d, 0x30, 0x40, + 0x18, 0xa3, 0x84, 0x3f, 0x6e, 0x09, 0xec, 0x3f, 0x9e, 0xfe, 0x07, 0x40, 0x94, 0x34, 0x32, 0x3f, + 0x11, 0x6f, 0x89, 0x3f, 0xce, 0xaa, 0xa0, 0x3f, 0x2d, 0x32, 0x2e, 0x40, 0x3b, 0xe4, 0xc1, 0x3f, + 0x82, 0x03, 0x20, 0x40, 0x0f, 0x09, 0x3a, 0x40, 0x11, 0xbb, 0x0d, 0x40, 0xc6, 0x29, 0x63, 0x3e, + 0xcb, 0xca, 0xeb, 0xbd, 0x59, 0xdd, 0xf0, 0x3f, 0x27, 0x81, 0x23, 0x40, 0x1a, 0x7c, 0x00, 0x40, + 0x5f, 0xcd, 0x65, 0x3f, 0xeb, 0xaf, 0x0b, 0x3f, 0x78, 0x4a, 0x27, 0x40, 0x71, 0x76, 0x17, 0x40, + 0xc2, 0x7e, 0xee, 0x3f, 0xb4, 0x83, 0x36, 0x40, 0x66, 0x4b, 0xf1, 0x3f, 0x96, 0xaa, 0xdc, 0x3f, + 0x71, 0x3b, 0x23, 0x40, 0x76, 0x3e, 0x82, 0x3f, 0xa2, 0xfc, 0x25, 0x40, 0x69, 0x74, 0xa9, 0x40}; +unsigned char gemm_fp32_b1[] = { + 0x64, 0x31, 0xba, 0x3f, 0x67, 0x91, 0xb4, 0x3f, 0x9a, 0x56, 0x15, 0x40, 0xe1, 0x9b, 0xc4, 0x3f, + 0x4e, 0x18, 0x4f, 0x40, 0x91, 0x4e, 0x3f, 0x40, 0x67, 0x8f, 0x0d, 0x40, 0x90, 0x1b, 0xd1, 0x3f, + 0x3b, 0x0d, 0x04, 0x40, 0xba, 0x74, 0x21, 0x40, 0x91, 0xf6, 0x6e, 0x3f, 0x64, 0x50, 0xda, 0x3f, + 0x51, 0x62, 0xb4, 0x3f, 0xbf, 0xef, 0xe2, 0x3f, 0x7d, 0x81, 0xca, 0x3f, 0x85, 0x73, 0x4f, 0x40, + 0x43, 0xcd, 0x52, 0x40, 0x4b, 0xbe, 0xe3, 0x3f, 0xc3, 0xac, 0xb5, 0x3f, 0x75, 0x0d, 0x04, 0x40, + 0x21, 0xbe, 0x17, 0x40, 0xe3, 0x55, 0xc9, 0x3f, 0x70, 0x6e, 0xac, 0x3f, 0x51, 0xb2, 0x08, 0x40, + 0x01, 0x66, 0x01, 0x40, 0x56, 0xaf, 0xe3, 0x3f, 0x71, 0x6d, 0x73, 0x3f, 0x62, 0xc9, 0xda, 0x3f, + 0xd4, 0xf4, 0xc8, 0x3f, 0xeb, 0x23, 0x4a, 0x3f, 0xca, 0xe9, 0x0a, 0x40, 0xeb, 0x1c, 0xe1, 0x3f, + 0x11, 0x4c, 0x01, 0x40, 0xab, 0xaa, 0xbf, 0x3f, 0xc1, 0x7d, 0x28, 0x40, 0x30, 0x1e, 0x67, 0x40, + 0x1c, 0xe8, 0x1a, 0x40, 0x12, 0x35, 0xf0, 0x3f, 0x9a, 0xbe, 0x86, 0xbf, 0x9e, 0x1f, 0xcc, 0x3f, + 0xe1, 0xff, 0x70, 0x3f, 0xd0, 0xa6, 0x65, 0x40, 0x38, 0x8f, 0xbb, 0x3f, 0x56, 0xa3, 0x4e, 0x40, + 0x63, 0xa7, 0xbc, 0x3f, 0xdf, 0x5d, 0xe2, 0x3f, 0xdd, 0x1f, 0x48, 0x40, 0xc8, 0x56, 0x01, 0x40, + 0xbf, 0x16, 0xdf, 0x3f, 0x15, 0xbd, 0xc4, 0x3f, 0x75, 0x54, 0xc9, 0x3f, 0x42, 0xa9, 0xcd, 0x3f, + 0x17, 0x39, 0xeb, 0x3f, 0xd4, 0xc0, 0xb9, 0x3f, 0x34, 0xf6, 0x27, 0x40, 0xd2, 0x91, 0xe4, 0x3f, + 0xa1, 0xdc, 0x5b, 0x40, 0xc3, 0x30, 0x26, 0x40, 0xdc, 0x6a, 0x4b, 0x40, 0x58, 0x93, 0x14, 0x40, + 0x9f, 0x54, 0x28, 0x40, 0xa1, 0x45, 0x05, 0x40, 0xef, 0x8b, 0x72, 0x3e, 0x5a, 0xa6, 0x5c, 0x3f, + 0x53, 0xc9, 0x01, 0x40, 0x8b, 0x53, 0x60, 0x3f, 0xd4, 0xc4, 0x2a, 0x40, 0xf2, 0x64, 0x49, 0x40, + 0x68, 0x4e, 0xbe, 0x3f, 0x86, 0x94, 0x32, 0xbf, 0x8b, 0x97, 0x9b, 0x3f, 0x62, 0x01, 0x32, 0x40, + 0x65, 0xbb, 0x45, 0x40, 0xc1, 0xa8, 0xec, 0x3f, 0x73, 0xd0, 0xa5, 0x3f, 0x5f, 0x26, 0xba, 0x3f, + 0xc9, 0xfd, 0x09, 0x40, 0xa8, 0xfa, 0x27, 0x40, 0x9d, 0x40, 0x8e, 0x40, 0xa9, 0x03, 0x5f, 0x40, + 0x31, 0xfa, 0x21, 0x40, 0x9c, 0xca, 0x82, 0x3f, 0x65, 0x1e, 0xdf, 0x3f, 0x34, 0x5f, 0x6a, 0x3f, + 0xd1, 0xbb, 0x4f, 0x40, 0xf0, 0xbb, 0x88, 0x40, 0x0f, 0x9a, 0x4e, 0x40, 0x41, 0x64, 0x68, 0x40, + 0x34, 0x83, 0xdc, 0x3f, 0xc4, 0x9d, 0x5b, 0x3f, 0xfe, 0xe3, 0xc3, 0x3f, 0xc8, 0xa7, 0x98, 0x3f, + 0xad, 0x7c, 0x0f, 0x40, 0xd2, 0xcf, 0xc0, 0x3f, 0xf3, 0x8c, 0x1f, 0x40, 0x23, 0x30, 0xe5, 0x3f, + 0xea, 0x8b, 0xfa, 0x3f, 0x42, 0x33, 0xc6, 0x3f, 0x6e, 0x85, 0x2d, 0x40, 0x7b, 0x7a, 0x50, 0x40, + 0xd1, 0x84, 0xec, 0x3f, 0xd0, 0x62, 0x52, 0x40, 0x21, 0x9a, 0x09, 0x40, 0x8d, 0x80, 0x6c, 0x40, + 0xaf, 0xab, 0x47, 0x40, 0xda, 0xcd, 0xf6, 0x3f, 0x75, 0x72, 0x92, 0x3f, 0xfb, 0x69, 0xea, 0x3f, + 0xb4, 0x13, 0xd9, 0x3f, 0x80, 0x9c, 0xa5, 0x3e, 0x7b, 0x1e, 0xce, 0x3f, 0x91, 0x49, 0xa6, 0x3d, + 0xb7, 0xe8, 0xf0, 0x3f, 0x5e, 0xae, 0x3f, 0x40, 0x72, 0x01, 0x58, 0x40, 0x1e, 0x55, 0x0e, 0x40, + 0x47, 0x09, 0x18, 0x40, 0x95, 0x05, 0xa8, 0x3f, 0x9d, 0x5e, 0x1a, 0x40, 0xa0, 0xe7, 0x1c, 0x40, + 0x82, 0x03, 0x20, 0x40, 0x0f, 0x09, 0x3a, 0x40, 0x11, 0xbb, 0x0d, 0x40, 0xc6, 0x29, 0x63, 0x3e, + 0xcb, 0xca, 0xeb, 0xbd, 0x59, 0xdd, 0xf0, 0x3f, 0x27, 0x81, 0x23, 0x40, 0x1a, 0x7c, 0x00, 0x40, + 0xec, 0xb8, 0x00, 0x40, 0x8f, 0xcf, 0x62, 0x3f, 0x31, 0x0e, 0x83, 0x40, 0x86, 0x70, 0x7c, 0x3f, + 0x79, 0xfc, 0x0f, 0x40, 0x5e, 0xc1, 0x3b, 0x40, 0x6c, 0xe7, 0x00, 0x40, 0x12, 0x40, 0xbe, 0x3f, + 0x26, 0xf3, 0x16, 0x40, 0x1d, 0xdc, 0x14, 0x40, 0x4b, 0x29, 0x37, 0x40, 0x75, 0xfa, 0x6d, 0x40, + 0xb0, 0x84, 0xd7, 0x3f, 0xa5, 0x5b, 0xa0, 0x3e, 0x48, 0xde, 0x02, 0x40, 0x2e, 0xe1, 0x1f, 0x40, + 0x98, 0xa5, 0xfd, 0x3f, 0x1d, 0x38, 0xf6, 0x3e, 0x64, 0x6d, 0xd4, 0x3f, 0x53, 0xed, 0x20, 0x40, + 0xfe, 0x22, 0xee, 0x3f, 0x59, 0x07, 0x2b, 0x40, 0x82, 0x7f, 0x1c, 0x40, 0x83, 0x21, 0x2e, 0x40, + 0xdf, 0xa8, 0x6c, 0x3f, 0x17, 0x8d, 0x93, 0x40, 0xfe, 0xd5, 0x11, 0x40, 0x00, 0x99, 0x98, 0x3f, + 0x83, 0xa3, 0x0b, 0x40, 0xeb, 0x45, 0xac, 0x3f, 0xba, 0xee, 0x67, 0x3e, 0xb9, 0x04, 0x29, 0x40, + 0x1f, 0x6a, 0x06, 0x40, 0x54, 0x0b, 0x26, 0x40, 0x99, 0xed, 0x5d, 0x40, 0xd6, 0xcb, 0x2f, 0xbd, + 0x39, 0x44, 0x3d, 0x40, 0xec, 0xab, 0x3c, 0x40, 0xe6, 0x91, 0xd7, 0x3f, 0x45, 0xde, 0x12, 0x40, + 0x4d, 0x89, 0xb0, 0x3f, 0xd7, 0x21, 0x42, 0x40, 0x63, 0xb9, 0x37, 0x40, 0xae, 0x05, 0x88, 0x3f, + 0x71, 0x69, 0xe2, 0x3f, 0xb7, 0xe2, 0x8b, 0x3f, 0x05, 0x0a, 0x0f, 0x40, 0x82, 0xc3, 0x6e, 0x40, + 0x81, 0xb1, 0xf0, 0x3f, 0x55, 0x07, 0x28, 0x40, 0xed, 0xcf, 0x72, 0x40, 0x45, 0xef, 0x05, 0x40, + 0x1b, 0x41, 0xee, 0x3f, 0xd4, 0xe0, 0xc8, 0x3e, 0xc5, 0xf8, 0x43, 0x40, 0xcb, 0x1a, 0xa8, 0x3f, + 0x0d, 0x74, 0x7d, 0x3f, 0xf7, 0x67, 0x0f, 0x40, 0xd0, 0x7f, 0x1a, 0x40, 0x4f, 0xeb, 0x1d, 0x40, + 0x88, 0xd0, 0x0d, 0x40, 0x1d, 0xec, 0x77, 0x40, 0x8a, 0xb5, 0x55, 0x40, 0x7d, 0x25, 0x35, 0x40, + 0x6e, 0xd7, 0xb8, 0x3f, 0xfc, 0x35, 0x2d, 0x40, 0x9e, 0x29, 0x05, 0x40, 0xfe, 0x53, 0xef, 0x3f, + 0x5a, 0xf5, 0x3e, 0x40, 0x3e, 0x17, 0x18, 0x3f, 0x71, 0xac, 0x68, 0x3f, 0x43, 0x0b, 0xd5, 0x3f, + 0x17, 0x78, 0x77, 0x40, 0x01, 0x75, 0x70, 0x3f, 0xc1, 0xc7, 0x1e, 0x3f, 0x63, 0x1f, 0xff, 0x3f, + 0x9e, 0x48, 0xeb, 0x3f, 0x31, 0xfc, 0xc5, 0x3f, 0x8d, 0x92, 0x9b, 0x3f, 0xf0, 0x70, 0x28, 0x40, + 0x61, 0xab, 0x1b, 0x40, 0xa8, 0xdd, 0xb1, 0x3f, 0x4a, 0x9e, 0xf6, 0x3f, 0x6c, 0xd7, 0x47, 0x40, + 0x9f, 0xc4, 0xc7, 0x3f, 0xee, 0xcc, 0x19, 0x40, 0xa8, 0x05, 0xcb, 0x3f, 0x48, 0xeb, 0x6f, 0x3f, + 0x9f, 0x37, 0x50, 0x3f, 0x44, 0x12, 0x05, 0x3e, 0x18, 0x85, 0xeb, 0x3f, 0xb2, 0x30, 0xe7, 0x3f, + 0x15, 0x02, 0x5c, 0x3f, 0x0d, 0xec, 0xe7, 0x3d, 0x6c, 0x03, 0xe8, 0x3f, 0xf1, 0x20, 0x74, 0x40, + 0x00, 0xa5, 0x3a, 0x40, 0x44, 0xc2, 0xf5, 0x3c, 0x18, 0xb2, 0xeb, 0x3f, 0x8d, 0x2a, 0x9b, 0x3f, + 0x4b, 0xd2, 0x14, 0x40, 0xa6, 0x5c, 0x21, 0x40, 0x19, 0x84, 0x60, 0x40, 0xff, 0x6a, 0x27, 0x40, + 0xd3, 0xf2, 0x88, 0x40, 0x52, 0x7e, 0xe8, 0x3f, 0x9e, 0x00, 0x14, 0x40, 0x65, 0x5b, 0x82, 0x3f, + 0x39, 0xf0, 0x45, 0x40, 0x10, 0x24, 0x90, 0x3f, 0xbe, 0x46, 0x3c, 0x40, 0x70, 0xfb, 0x35, 0x40, + 0x34, 0xe3, 0x31, 0x40, 0x29, 0x8a, 0xe5, 0x3f, 0xfc, 0xc2, 0xa7, 0x40, 0x0f, 0x1d, 0x30, 0x40, + 0x18, 0xa3, 0x84, 0x3f, 0x6e, 0x09, 0xec, 0x3f, 0x9e, 0xfe, 0x07, 0x40, 0x94, 0x34, 0x32, 0x3f, + 0x5f, 0xcd, 0x65, 0x3f, 0xeb, 0xaf, 0x0b, 0x3f, 0x78, 0x4a, 0x27, 0x40, 0x71, 0x76, 0x17, 0x40, + 0xc2, 0x7e, 0xee, 0x3f, 0xb4, 0x83, 0x36, 0x40, 0x66, 0x4b, 0xf1, 0x3f, 0x96, 0xaa, 0xdc, 0x3f, + 0xd3, 0x3b, 0x72, 0x40, 0x14, 0x04, 0x07, 0x40, 0x70, 0x30, 0xc8, 0x3f, 0x6d, 0x52, 0xe2, 0x3f, + 0x1a, 0x49, 0x4a, 0x40, 0x2e, 0x6b, 0x82, 0x3f, 0xb4, 0xab, 0x12, 0x40, 0xbd, 0xba, 0x4a, 0x40, + 0x7e, 0x02, 0xbe, 0x3f, 0xc0, 0x2c, 0x13, 0x40, 0x02, 0x96, 0x04, 0x40, 0x05, 0xc6, 0x16, 0x3f, + 0xf4, 0x55, 0x29, 0x40, 0xe5, 0xa0, 0x07, 0x40, 0x11, 0x6f, 0x89, 0x3f, 0x71, 0x3b, 0x23, 0x40, + 0xf1, 0x1b, 0x3c, 0x40, 0xc3, 0xf0, 0xe9, 0x3f, 0x49, 0xfc, 0x95, 0x3e, 0x1c, 0x2a, 0xee, 0x3f, + 0xda, 0xff, 0x61, 0x3f, 0xff, 0x80, 0x99, 0x3f, 0x5e, 0x6d, 0x4e, 0x40, 0xbe, 0x04, 0x39, 0x40, + 0x12, 0x80, 0x32, 0x40, 0xc5, 0x90, 0x3c, 0x3f, 0x2f, 0xdb, 0x90, 0x3f, 0xc0, 0x5e, 0x5b, 0x40, + 0x22, 0x46, 0xb6, 0x3f, 0x9d, 0x51, 0x03, 0x40, 0xce, 0xaa, 0xa0, 0x3f, 0x76, 0x3e, 0x82, 0x3f, + 0x1f, 0x8c, 0xf4, 0x3f, 0x23, 0xc0, 0x06, 0x40, 0xb4, 0x4d, 0xd8, 0x3f, 0xd6, 0x87, 0xcb, 0x3e, + 0xef, 0x02, 0xfb, 0x3f, 0x67, 0xdc, 0xb9, 0x3f, 0x67, 0x40, 0x73, 0x40, 0x3d, 0xad, 0xfe, 0x3f, + 0x8f, 0x7d, 0x1b, 0x40, 0xc2, 0xd4, 0xcf, 0x3f, 0xa9, 0xa1, 0xe9, 0x3f, 0x20, 0x43, 0x76, 0x40, + 0x7b, 0xc6, 0x3d, 0x40, 0x61, 0x7a, 0x70, 0x40, 0x2d, 0x32, 0x2e, 0x40, 0xa2, 0xfc, 0x25, 0x40, + 0x5b, 0xc3, 0x48, 0x3f, 0xb8, 0x9b, 0x21, 0x40, 0x1a, 0x05, 0xd9, 0x3f, 0xf5, 0xbc, 0x1b, 0x40, + 0xa8, 0xaf, 0xc9, 0x3f, 0xcf, 0x62, 0x2e, 0x40, 0xcb, 0x0e, 0x61, 0x40, 0xb4, 0x66, 0xe9, 0x3f, + 0x49, 0x88, 0x47, 0xbe, 0xc9, 0x4d, 0xed, 0x3f, 0x0a, 0xa0, 0x18, 0x40, 0x61, 0xbf, 0x13, 0x40, + 0x1d, 0x3f, 0xb9, 0x3f, 0x80, 0xc5, 0xd1, 0x3f, 0x3b, 0xe4, 0xc1, 0x3f, 0x69, 0x74, 0xa9, 0x40}; +unsigned char gemm_fp32_c[] = { + 0x4b, 0x22, 0xe3, 0xc2, 0x25, 0x8b, 0xc8, 0xc2, 0x36, 0x9e, 0xcb, 0xc2, 0x48, 0x57, 0xd5, 0xc2, + 0x21, 0xb9, 0xcc, 0xc2, 0x35, 0xbb, 0xbd, 0xc2, 0x10, 0x16, 0xc8, 0xc2, 0xfa, 0xbc, 0xe5, 0xc2, + 0x22, 0xcb, 0xca, 0xc2, 0x17, 0xe0, 0xbb, 0xc2, 0xd9, 0xbc, 0x08, 0xc3, 0x24, 0xb9, 0xc0, 0xc2, + 0x89, 0x71, 0xcf, 0xc2, 0x55, 0x7d, 0xb9, 0xc2, 0xf2, 0x21, 0xd4, 0xc2, 0x9f, 0xee, 0xef, 0xc2, + 0xa7, 0x63, 0xdc, 0xc2, 0xd3, 0x1a, 0xba, 0xc2, 0x29, 0xc3, 0xea, 0xc2, 0x7c, 0x0e, 0xdc, 0xc2, + 0x27, 0x98, 0xe1, 0xc2, 0x5c, 0x67, 0xc4, 0xc2, 0x8f, 0x12, 0xc2, 0xc2, 0x60, 0x81, 0xd0, 0xc2, + 0x3c, 0x02, 0xd8, 0xc2, 0xea, 0xa6, 0xc5, 0xc2, 0xac, 0x51, 0xcc, 0xc2, 0x74, 0x5c, 0xe2, 0xc2, + 0xcc, 0x1d, 0xdf, 0xc2, 0x99, 0x03, 0xcd, 0xc2, 0xa5, 0xba, 0x0a, 0xc3, 0xd5, 0x20, 0xbe, 0xc2, + 0x9b, 0x71, 0xd1, 0xc2, 0xa9, 0xce, 0xb9, 0xc2, 0x2d, 0x9a, 0xcb, 0xc2, 0x68, 0x2b, 0xe0, 0xc2, + 0x5b, 0xc0, 0xdc, 0xc2, 0x1a, 0x79, 0xb1, 0xc2, 0x0f, 0xc3, 0xdf, 0xc2, 0x20, 0x6b, 0xd4, 0xc2, + 0x1e, 0xb8, 0xed, 0xc2, 0xb7, 0x23, 0xbb, 0xc2, 0xf8, 0xbf, 0xc4, 0xc2, 0xe3, 0xa2, 0xd0, 0xc2, + 0xfe, 0xe8, 0xe4, 0xc2, 0x75, 0x04, 0xc7, 0xc2, 0x58, 0xc5, 0xd8, 0xc2, 0x53, 0xac, 0xe8, 0xc2, + 0x65, 0xbb, 0xe1, 0xc2, 0x06, 0x2f, 0xb4, 0xc2, 0xc1, 0xd2, 0x06, 0xc3, 0xaa, 0x1d, 0xc9, 0xc2, + 0xcd, 0xb2, 0xd1, 0xc2, 0xe7, 0xac, 0xb5, 0xc2, 0x45, 0x73, 0xda, 0xc2, 0xfa, 0x11, 0xf4, 0xc2, + 0x74, 0xba, 0xdc, 0xc2, 0x72, 0x4d, 0xc8, 0xc2, 0x4d, 0xda, 0xf6, 0xc2, 0xef, 0x9b, 0xcd, 0xc2, + 0x35, 0x4c, 0xf2, 0xc2, 0xee, 0xbb, 0xd7, 0xc2, 0xc8, 0xaf, 0xd4, 0xc2, 0x61, 0x1e, 0xd3, 0xc2, + 0x20, 0x59, 0xd9, 0xc2, 0x1b, 0xa3, 0xcd, 0xc2, 0x29, 0xc9, 0xdc, 0xc2, 0x02, 0x23, 0xe5, 0xc2, + 0x61, 0x57, 0xe4, 0xc2, 0xb1, 0x86, 0xb3, 0xc2, 0x57, 0x37, 0x0c, 0xc3, 0xf3, 0xfa, 0xca, 0xc2, + 0xae, 0x38, 0xd3, 0xc2, 0xde, 0x24, 0xca, 0xc2, 0x97, 0xd8, 0xe3, 0xc2, 0xc4, 0x33, 0xf6, 0xc2, + 0xae, 0xb1, 0xe5, 0xc2, 0x6c, 0x04, 0xbe, 0xc2, 0x78, 0x34, 0xf8, 0xc2, 0xe7, 0x14, 0xe5, 0xc2, + 0xc4, 0xb7, 0xe7, 0xc2, 0x03, 0x31, 0xbb, 0xc2, 0xd1, 0xb5, 0xc9, 0xc2, 0x98, 0xd2, 0xce, 0xc2, + 0x0b, 0x52, 0xdb, 0xc2, 0x99, 0x16, 0xc1, 0xc2, 0xb7, 0x82, 0xd2, 0xc2, 0xb5, 0x80, 0xdd, 0xc2, + 0xb0, 0xfb, 0xdf, 0xc2, 0x52, 0x81, 0xad, 0xc2, 0x30, 0xae, 0x07, 0xc3, 0x1d, 0x9c, 0xb6, 0xc2, + 0x74, 0x8e, 0xd3, 0xc2, 0x01, 0xdc, 0xbc, 0xc2, 0xed, 0x69, 0xd3, 0xc2, 0x8f, 0xb9, 0xea, 0xc2, + 0xe2, 0x30, 0xde, 0xc2, 0x3a, 0x63, 0xbd, 0xc2, 0x66, 0xab, 0xf3, 0xc2, 0x4c, 0x6a, 0xcc, 0xc2, + 0x89, 0xfb, 0xc5, 0xc2, 0x4a, 0xfb, 0xaf, 0xc2, 0x3f, 0xe5, 0xab, 0xc2, 0x5b, 0x79, 0xae, 0xc2, + 0x84, 0x2a, 0xaf, 0xc2, 0xcc, 0x1f, 0xa4, 0xc2, 0xad, 0xfd, 0xaa, 0xc2, 0xf3, 0x27, 0xb6, 0xc2, + 0x24, 0x18, 0xac, 0xc2, 0x31, 0xe7, 0x9d, 0xc2, 0x2d, 0xe7, 0xe8, 0xc2, 0x3a, 0xa0, 0xa7, 0xc2, + 0x76, 0x35, 0xb3, 0xc2, 0xc2, 0x9a, 0xa5, 0xc2, 0x95, 0x6b, 0xbf, 0xc2, 0x85, 0x29, 0xd3, 0xc2, + 0xff, 0x29, 0xbf, 0xc2, 0xd5, 0x50, 0xa9, 0xc2, 0x3b, 0xf6, 0xcd, 0xc2, 0x66, 0x4e, 0xc3, 0xc2, + 0x98, 0x2b, 0xd0, 0xc2, 0xfb, 0x17, 0xb6, 0xc2, 0x37, 0xba, 0xab, 0xc2, 0xaf, 0xd8, 0xb9, 0xc2, + 0x20, 0x7f, 0xb5, 0xc2, 0x75, 0x1d, 0xa8, 0xc2, 0x80, 0x8c, 0xc7, 0xc2, 0xff, 0xb0, 0xd6, 0xc2, + 0x79, 0xd2, 0xc6, 0xc2, 0x11, 0xd1, 0xa5, 0xc2, 0x49, 0x90, 0xed, 0xc2, 0xa2, 0x88, 0xb4, 0xc2, + 0x2c, 0xc0, 0xb7, 0xc2, 0x23, 0x83, 0x9d, 0xc2, 0x5a, 0x0f, 0xb9, 0xc2, 0x64, 0x36, 0xd2, 0xc2, + 0x12, 0xa6, 0xb9, 0xc2, 0x35, 0x26, 0x95, 0xc2, 0xee, 0xef, 0xcf, 0xc2, 0xe7, 0x58, 0xc7, 0xc2, + 0x1d, 0x17, 0xc4, 0xc2, 0x8c, 0x31, 0xa5, 0xc2, 0x63, 0xc0, 0xa9, 0xc2, 0x41, 0xf1, 0xb4, 0xc2, + 0x02, 0x2c, 0xb6, 0xc2, 0x26, 0x62, 0x9d, 0xc2, 0xfe, 0x4d, 0xb1, 0xc2, 0x7b, 0x4f, 0xc0, 0xc2, + 0x28, 0xe9, 0xc3, 0xc2, 0x01, 0x26, 0x8c, 0xc2, 0x38, 0x22, 0xe7, 0xc2, 0x9d, 0xf7, 0xa1, 0xc2, + 0x56, 0xf0, 0xaf, 0xc2, 0xc5, 0xcd, 0x96, 0xc2, 0xb3, 0x77, 0xbb, 0xc2, 0x6f, 0x94, 0xcd, 0xc2, + 0xfe, 0xaa, 0xba, 0xc2, 0x31, 0xe2, 0xa1, 0xc2, 0x22, 0x29, 0xd2, 0xc2, 0x9a, 0x13, 0xa6, 0xc2, + 0xf2, 0x7b, 0xe2, 0xc2, 0xc9, 0x74, 0xb1, 0xc2, 0x2e, 0xc8, 0xbc, 0xc2, 0x0a, 0x2a, 0xbb, 0xc2, + 0x41, 0x7f, 0xc1, 0xc2, 0x23, 0xa3, 0xa5, 0xc2, 0xef, 0x4c, 0xbb, 0xc2, 0xad, 0x31, 0xcf, 0xc2, + 0x24, 0x45, 0xc9, 0xc2, 0x99, 0xc9, 0xa4, 0xc2, 0x39, 0xf1, 0xfb, 0xc2, 0x06, 0xa4, 0xb7, 0xc2, + 0xdc, 0x61, 0xc8, 0xc2, 0x7e, 0xfd, 0xb1, 0xc2, 0xf2, 0x9e, 0xc1, 0xc2, 0x27, 0xa2, 0xd5, 0xc2, + 0x38, 0xc1, 0xcd, 0xc2, 0x3a, 0xae, 0xaa, 0xc2, 0xbd, 0x6b, 0xe1, 0xc2, 0x3d, 0x66, 0xc5, 0xc2, + 0x34, 0xc8, 0xbe, 0xc2, 0x4d, 0x94, 0xa3, 0xc2, 0x02, 0xbe, 0xb3, 0xc2, 0x9a, 0xef, 0xae, 0xc2, + 0x3c, 0x03, 0xba, 0xc2, 0x4c, 0x9a, 0xa8, 0xc2, 0x41, 0x6a, 0xa3, 0xc2, 0xe5, 0x84, 0xbd, 0xc2, + 0xbf, 0x74, 0xac, 0xc2, 0x73, 0x5e, 0x99, 0xc2, 0xb3, 0x4b, 0xfd, 0xc2, 0x82, 0x98, 0xa6, 0xc2, + 0x79, 0xd8, 0xae, 0xc2, 0x64, 0x8a, 0xa5, 0xc2, 0x05, 0x96, 0xba, 0xc2, 0x2f, 0x8e, 0xbe, 0xc2, + 0xb1, 0x5a, 0xc1, 0xc2, 0xc0, 0xbe, 0xa8, 0xc2, 0x9b, 0x08, 0xd1, 0xc2, 0x3d, 0x0b, 0xb8, 0xc2, + 0x26, 0xba, 0xd4, 0xc2, 0x1c, 0x62, 0xc0, 0xc2, 0x94, 0x92, 0xcd, 0xc2, 0x03, 0xce, 0xd5, 0xc2, + 0xb9, 0xc3, 0xca, 0xc2, 0xba, 0xa2, 0xb9, 0xc2, 0xc7, 0x67, 0xbe, 0xc2, 0xcb, 0xf6, 0xe5, 0xc2, + 0x88, 0x11, 0xd5, 0xc2, 0x25, 0x19, 0xb1, 0xc2, 0xf1, 0xbf, 0x0c, 0xc3, 0xe6, 0xae, 0xba, 0xc2, + 0x1a, 0x6e, 0xc8, 0xc2, 0x94, 0x33, 0xb3, 0xc2, 0x2f, 0x96, 0xd0, 0xc2, 0x2b, 0xef, 0xd4, 0xc2, + 0x0b, 0x3d, 0xd3, 0xc2, 0xa5, 0xab, 0xac, 0xc2, 0x23, 0x6b, 0xe8, 0xc2, 0xe9, 0x9f, 0xc5, 0xc2, + 0xae, 0x58, 0xcc, 0xc2, 0xe0, 0x7f, 0xa6, 0xc2, 0xdf, 0x02, 0xb2, 0xc2, 0xb3, 0x11, 0xb4, 0xc2, + 0x31, 0x48, 0xb5, 0xc2, 0xba, 0xd1, 0xa2, 0xc2, 0xee, 0x74, 0xaa, 0xc2, 0x7a, 0x57, 0xc2, 0xc2, + 0x81, 0x4a, 0xb8, 0xc2, 0xbb, 0xca, 0x9d, 0xc2, 0x17, 0x49, 0xe6, 0xc2, 0xb5, 0x0b, 0xa8, 0xc2, + 0x8c, 0x76, 0xb6, 0xc2, 0x2d, 0x33, 0xa7, 0xc2, 0x67, 0xcd, 0xb7, 0xc2, 0xce, 0xcf, 0xc3, 0xc2, + 0xac, 0x65, 0xc0, 0xc2, 0x05, 0xb7, 0x9d, 0xc2, 0x91, 0x9d, 0xcb, 0xc2, 0x64, 0x49, 0xb5, 0xc2, + 0x75, 0x20, 0xb3, 0xc2, 0xc3, 0xd3, 0x96, 0xc2, 0xe9, 0x1d, 0x95, 0xc2, 0x06, 0xa4, 0x92, 0xc2, + 0x73, 0xdf, 0x94, 0xc2, 0x31, 0x13, 0x8e, 0xc2, 0x57, 0x95, 0xaa, 0xc2, 0x82, 0x89, 0xad, 0xc2, + 0x6a, 0xe5, 0xa1, 0xc2, 0xa4, 0xd7, 0x85, 0xc2, 0x27, 0x29, 0xc5, 0xc2, 0xd2, 0x54, 0x99, 0xc2, + 0x48, 0x1e, 0x9b, 0xc2, 0x81, 0x0f, 0x89, 0xc2, 0x19, 0x57, 0xa2, 0xc2, 0x45, 0xe0, 0xb3, 0xc2, + 0x92, 0x58, 0xa0, 0xc2, 0x62, 0x5a, 0x8e, 0xc2, 0xa2, 0xec, 0xba, 0xc2, 0xb7, 0x0e, 0xb2, 0xc2, + 0x8d, 0x8f, 0xde, 0xc2, 0xf9, 0x9c, 0xb3, 0xc2, 0x81, 0x34, 0xba, 0xc2, 0xfc, 0xb0, 0xb6, 0xc2, + 0xab, 0x2f, 0xc6, 0xc2, 0x57, 0x9d, 0xae, 0xc2, 0x64, 0x3e, 0xd0, 0xc2, 0xa7, 0x3a, 0xd9, 0xc2, + 0x74, 0xdd, 0xc9, 0xc2, 0xb4, 0x4d, 0xa9, 0xc2, 0x3e, 0xe7, 0xf8, 0xc2, 0xb6, 0xa8, 0xc4, 0xc2, + 0x93, 0x6f, 0xbd, 0xc2, 0x3e, 0x1b, 0xa8, 0xc2, 0xe7, 0xdc, 0xc2, 0xc2, 0xf5, 0xf9, 0xcf, 0xc2, + 0x6c, 0x27, 0xc4, 0xc2, 0xee, 0x4a, 0xae, 0xc2, 0x70, 0x98, 0xe0, 0xc2, 0x46, 0xd6, 0xc9, 0xc2, + 0x64, 0x11, 0xd2, 0xc2, 0x51, 0x07, 0xb6, 0xc2, 0x20, 0xd3, 0xad, 0xc2, 0x10, 0x69, 0xb2, 0xc2, + 0xf2, 0x9b, 0xb4, 0xc2, 0x64, 0xc9, 0xb5, 0xc2, 0xbd, 0xe3, 0xc7, 0xc2, 0x10, 0x6d, 0xd2, 0xc2, + 0x71, 0x0f, 0xc8, 0xc2, 0x0e, 0xd2, 0x9c, 0xc2, 0xea, 0xae, 0xec, 0xc2, 0x77, 0xe5, 0xb2, 0xc2, + 0xd5, 0x3b, 0xbf, 0xc2, 0x66, 0x92, 0xa6, 0xc2, 0x9a, 0xf4, 0xc1, 0xc2, 0xf8, 0x50, 0xd7, 0xc2, + 0x01, 0x0d, 0xcc, 0xc2, 0xc6, 0xc2, 0xa3, 0xc2, 0x02, 0x7d, 0xda, 0xc2, 0x65, 0xcd, 0xd2, 0xc2, + 0xa5, 0x10, 0xaf, 0xc2, 0xa6, 0x9a, 0x92, 0xc2, 0x9e, 0x72, 0x96, 0xc2, 0x95, 0x2f, 0x9b, 0xc2, + 0x52, 0xeb, 0x9a, 0xc2, 0x38, 0x5f, 0x93, 0xc2, 0x0b, 0xe3, 0xaa, 0xc2, 0x1c, 0x21, 0xb2, 0xc2, + 0x90, 0x99, 0xa9, 0xc2, 0x3c, 0xb1, 0x80, 0xc2, 0x30, 0xba, 0xcf, 0xc2, 0x77, 0xda, 0x8c, 0xc2, + 0x27, 0x3e, 0xa3, 0xc2, 0x40, 0xb2, 0x8a, 0xc2, 0x83, 0x83, 0xa4, 0xc2, 0x2c, 0x3b, 0xb4, 0xc2, + 0x81, 0xba, 0xae, 0xc2, 0x98, 0xdb, 0x8e, 0xc2, 0xd9, 0xf1, 0xbb, 0xc2, 0x48, 0x93, 0xa6, 0xc2, + 0x78, 0x8e, 0xb3, 0xc2, 0xfe, 0xbc, 0x9d, 0xc2, 0x6d, 0x4d, 0xa2, 0xc2, 0x66, 0xee, 0xad, 0xc2, + 0x9c, 0xcf, 0xa3, 0xc2, 0x61, 0x06, 0x8c, 0xc2, 0x66, 0xf2, 0x99, 0xc2, 0x2f, 0x01, 0xa6, 0xc2, + 0xc5, 0x3f, 0xaa, 0xc2, 0x66, 0x7b, 0x89, 0xc2, 0xf1, 0xce, 0xde, 0xc2, 0x4c, 0x86, 0x88, 0xc2, + 0x8e, 0xc1, 0xaa, 0xc2, 0xb8, 0x23, 0x99, 0xc2, 0xa3, 0xf7, 0xaf, 0xc2, 0xf7, 0xad, 0xc0, 0xc2, + 0x89, 0xaf, 0xae, 0xc2, 0x1d, 0xff, 0x91, 0xc2, 0x89, 0x01, 0xbe, 0xc2, 0x6b, 0x89, 0x9c, 0xc2, + 0x89, 0x67, 0xf3, 0xc2, 0xa1, 0x1b, 0xc7, 0xc2, 0xbd, 0x80, 0xbd, 0xc2, 0x21, 0x4d, 0xc8, 0xc2, + 0xf3, 0x84, 0xd7, 0xc2, 0x31, 0x50, 0xc8, 0xc2, 0x3e, 0x6c, 0xeb, 0xc2, 0xaa, 0x4c, 0xeb, 0xc2, + 0x51, 0xb2, 0xe7, 0xc2, 0x59, 0xcf, 0xa9, 0xc2, 0x99, 0x4c, 0x03, 0xc3, 0x98, 0x77, 0xc9, 0xc2, + 0x0a, 0xe5, 0xce, 0xc2, 0x46, 0x28, 0xb6, 0xc2, 0xfa, 0xa8, 0xda, 0xc2, 0x04, 0xc5, 0xf8, 0xc2, + 0x5c, 0x5b, 0xd9, 0xc2, 0x21, 0x13, 0xb4, 0xc2, 0xfc, 0x48, 0xf6, 0xc2, 0xba, 0xdd, 0xe3, 0xc2, + 0x13, 0xc5, 0xfa, 0xc2, 0xb4, 0x3c, 0xdc, 0xc2, 0x7c, 0xeb, 0xe3, 0xc2, 0xd6, 0xd9, 0xe9, 0xc2, + 0xca, 0x7d, 0xf1, 0xc2, 0x2e, 0xdd, 0xe2, 0xc2, 0xa9, 0xf9, 0xf1, 0xc2, 0xda, 0x31, 0xfa, 0xc2, + 0x55, 0x08, 0xea, 0xc2, 0x9b, 0xb2, 0xca, 0xc2, 0x9f, 0xff, 0x17, 0xc3, 0xa6, 0x46, 0xcf, 0xc2, + 0xa8, 0x74, 0xe5, 0xc2, 0xe3, 0x2f, 0xd4, 0xc2, 0x5b, 0x02, 0xf2, 0xc2, 0x74, 0x88, 0x08, 0xc3, + 0xfb, 0xb5, 0xf4, 0xc2, 0xae, 0x7e, 0xdb, 0xc2, 0x4b, 0xbe, 0x08, 0xc3, 0x7e, 0xc3, 0xfd, 0xc2, + 0x93, 0x1c, 0xbe, 0xc2, 0x4b, 0x57, 0xa6, 0xc2, 0xc1, 0xdb, 0xa0, 0xc2, 0x2c, 0xc3, 0xac, 0xc2, + 0x56, 0x8d, 0x9f, 0xc2, 0x3e, 0x79, 0x89, 0xc2, 0x99, 0x0d, 0xa7, 0xc2, 0x42, 0xee, 0xb7, 0xc2, + 0x54, 0x0e, 0xac, 0xc2, 0x49, 0x95, 0xa3, 0xc2, 0x96, 0xfd, 0xe0, 0xc2, 0xa3, 0xf2, 0x9e, 0xc2, + 0xe9, 0x07, 0xaf, 0xc2, 0x4d, 0x24, 0x8f, 0xc2, 0xd2, 0xed, 0xa7, 0xc2, 0x72, 0xcc, 0xc3, 0xc2, + 0x3f, 0x2a, 0xac, 0xc2, 0xcb, 0x16, 0x94, 0xc2, 0x0a, 0x03, 0xc3, 0xc2, 0x17, 0x21, 0xb8, 0xc2, + 0xaf, 0xfa, 0xfe, 0xc2, 0xde, 0x95, 0xce, 0xc2, 0x7e, 0xf2, 0xcb, 0xc2, 0x89, 0x39, 0xce, 0xc2, + 0x31, 0x19, 0xd6, 0xc2, 0x60, 0xa2, 0xc6, 0xc2, 0xba, 0x25, 0xde, 0xc2, 0xbb, 0x70, 0xf6, 0xc2, + 0xb6, 0xc1, 0xeb, 0xc2, 0x7d, 0xe0, 0xb4, 0xc2, 0x31, 0x46, 0x07, 0xc3, 0x50, 0xc8, 0xdc, 0xc2, + 0x3d, 0xc1, 0xd6, 0xc2, 0x51, 0xc3, 0xbe, 0xc2, 0x53, 0xe9, 0xdb, 0xc2, 0x24, 0x1f, 0xf6, 0xc2, + 0x4e, 0x2d, 0xdd, 0xc2, 0x68, 0x1f, 0xb6, 0xc2, 0x01, 0xbe, 0xfc, 0xc2, 0xa4, 0x82, 0xe8, 0xc2, + 0x00, 0x8d, 0xd5, 0xc2, 0x8b, 0x6a, 0xba, 0xc2, 0x6f, 0x08, 0xb3, 0xc2, 0xf1, 0x92, 0xbb, 0xc2, + 0xd5, 0xde, 0xb8, 0xc2, 0x46, 0x8e, 0xa6, 0xc2, 0xd2, 0x0f, 0xc8, 0xc2, 0x72, 0x03, 0xd3, 0xc2, + 0x81, 0x2d, 0xc2, 0xc2, 0x9c, 0x7b, 0xb2, 0xc2, 0x41, 0x1c, 0x00, 0xc3, 0x95, 0xb5, 0xb7, 0xc2, + 0x4c, 0xdf, 0xc0, 0xc2, 0x4d, 0xfe, 0x9d, 0xc2, 0xa9, 0xe5, 0xc2, 0xc2, 0x0c, 0x8c, 0xe0, 0xc2, + 0xb7, 0x96, 0xc9, 0xc2, 0xca, 0x79, 0xb3, 0xc2, 0xc3, 0xe7, 0xe1, 0xc2, 0xf3, 0x55, 0xd6, 0xc2, + 0xe8, 0x93, 0xdd, 0xc2, 0x81, 0x9a, 0xcc, 0xc2, 0xf0, 0x87, 0xcc, 0xc2, 0xc8, 0xc0, 0xc6, 0xc2, + 0x43, 0x28, 0xba, 0xc2, 0xcb, 0xc0, 0xa8, 0xc2, 0x10, 0xc7, 0xbc, 0xc2, 0xdc, 0xf3, 0xcf, 0xc2, + 0x89, 0x2c, 0xcb, 0xc2, 0xea, 0x2f, 0xb9, 0xc2, 0x2d, 0x02, 0x0a, 0xc3, 0x89, 0xa4, 0xc0, 0xc2, + 0x86, 0x2f, 0xca, 0xc2, 0x71, 0x22, 0xb6, 0xc2, 0xf1, 0x20, 0xd3, 0xc2, 0x80, 0x38, 0xd5, 0xc2, + 0xa4, 0x9b, 0xd5, 0xc2, 0xe3, 0x4d, 0xb5, 0xc2, 0x84, 0x41, 0xe8, 0xc2, 0x6d, 0x8f, 0xd4, 0xc2, + 0x46, 0xc7, 0xc0, 0xc2, 0xb2, 0x37, 0xaf, 0xc2, 0xf4, 0x2f, 0xab, 0xc2, 0x7f, 0xcc, 0xa8, 0xc2, + 0x89, 0x02, 0xac, 0xc2, 0x36, 0xd6, 0xa5, 0xc2, 0x88, 0x58, 0xbe, 0xc2, 0x1e, 0x8f, 0xc6, 0xc2, + 0x82, 0x2c, 0xb8, 0xc2, 0x38, 0x1b, 0x9b, 0xc2, 0x95, 0x4e, 0xf3, 0xc2, 0xd9, 0xb2, 0xa1, 0xc2, + 0x97, 0x93, 0xab, 0xc2, 0xc9, 0xe8, 0xa0, 0xc2, 0x4b, 0xd9, 0xad, 0xc2, 0x2f, 0x25, 0xbf, 0xc2, + 0xd1, 0x3e, 0xc0, 0xc2, 0xfb, 0x75, 0x93, 0xc2, 0xf9, 0xc5, 0xbf, 0xc2, 0x76, 0x7b, 0xbc, 0xc2, + 0xac, 0x17, 0xf4, 0xc2, 0x24, 0x34, 0xd1, 0xc2, 0x52, 0x15, 0xd9, 0xc2, 0x48, 0x06, 0xdb, 0xc2, + 0x69, 0x10, 0xdc, 0xc2, 0x09, 0xd0, 0xc4, 0xc2, 0xe9, 0x7d, 0xd7, 0xc2, 0x61, 0x02, 0xf2, 0xc2, + 0x53, 0x1f, 0xde, 0xc2, 0xbc, 0x51, 0xc2, 0xc2, 0xc4, 0x14, 0x10, 0xc3, 0x3b, 0xd1, 0xd4, 0xc2, + 0xf9, 0x55, 0xd6, 0xc2, 0xe2, 0xe3, 0xbc, 0xc2, 0x5a, 0x87, 0xdf, 0xc2, 0x60, 0x41, 0xfc, 0xc2, + 0x14, 0x00, 0xe1, 0xc2, 0x76, 0xc8, 0xcc, 0xc2, 0x82, 0x79, 0x01, 0xc3, 0x81, 0xc9, 0xe5, 0xc2, + 0xc2, 0x37, 0xbe, 0xc2, 0xb9, 0x78, 0xa0, 0xc2, 0xa5, 0xa3, 0xa3, 0xc2, 0x69, 0xe2, 0xb4, 0xc2, + 0x23, 0x19, 0xa7, 0xc2, 0xe7, 0xdc, 0x94, 0xc2, 0xd4, 0xfa, 0x92, 0xc2, 0xfe, 0xe5, 0xb8, 0xc2, + 0xe7, 0x24, 0xa6, 0xc2, 0x6e, 0x06, 0x9e, 0xc2, 0x69, 0x38, 0xd8, 0xc2, 0xd7, 0xda, 0x98, 0xc2, + 0x2b, 0x28, 0xb0, 0xc2, 0x20, 0x00, 0x9f, 0xc2, 0x36, 0xa1, 0xab, 0xc2, 0x18, 0x12, 0xc2, 0xc2, + 0x02, 0xa3, 0xba, 0xc2, 0x1e, 0x1b, 0x8f, 0xc2, 0x74, 0x01, 0xb0, 0xc2, 0x63, 0x49, 0xa1, 0xc2, + 0x46, 0x79, 0xf3, 0xc2, 0x43, 0x0c, 0xc1, 0xc2, 0x2e, 0xce, 0xd1, 0xc2, 0xaa, 0xce, 0xd6, 0xc2, + 0xd8, 0xf7, 0xe0, 0xc2, 0x55, 0x66, 0xc0, 0xc2, 0x77, 0x29, 0xca, 0xc2, 0xce, 0x4d, 0xe8, 0xc2, + 0xc3, 0xc8, 0xe7, 0xc2, 0x77, 0x23, 0xb2, 0xc2, 0x1b, 0x6c, 0x0b, 0xc3, 0x18, 0xcf, 0xc5, 0xc2, + 0xe9, 0x55, 0xd8, 0xc2, 0x7b, 0xdd, 0xc3, 0xc2, 0x55, 0xc4, 0xd4, 0xc2, 0x9e, 0x1b, 0xed, 0xc2, + 0x00, 0xf1, 0xdf, 0xc2, 0xaa, 0x3d, 0xb9, 0xc2, 0x9f, 0x33, 0xf6, 0xc2, 0xef, 0x31, 0xcb, 0xc2, + 0x8f, 0x6b, 0xea, 0xc2, 0x7e, 0x76, 0xc3, 0xc2, 0x42, 0x4d, 0xd9, 0xc2, 0x0a, 0xe9, 0xd2, 0xc2, + 0xbf, 0xa3, 0xdd, 0xc2, 0x68, 0x25, 0xc2, 0xc2, 0xed, 0x5f, 0xcf, 0xc2, 0xdd, 0x42, 0xe1, 0xc2, + 0xb1, 0x60, 0xd9, 0xc2, 0x02, 0x34, 0xae, 0xc2, 0xc9, 0x37, 0x0d, 0xc3, 0xcd, 0xa8, 0xc7, 0xc2, + 0x80, 0xee, 0xd4, 0xc2, 0xf5, 0x7e, 0xc1, 0xc2, 0x54, 0x81, 0xdf, 0xc2, 0x5e, 0x45, 0xec, 0xc2, + 0x04, 0x02, 0xe2, 0xc2, 0xc0, 0xc8, 0xd0, 0xc2, 0xf4, 0x22, 0x00, 0xc3, 0xb8, 0x81, 0xce, 0xc2, + 0x09, 0xfc, 0xe5, 0xc2, 0x9c, 0xaf, 0xcd, 0xc2, 0x46, 0x73, 0xc2, 0xc2, 0xbd, 0x8f, 0xc5, 0xc2, + 0x3a, 0x28, 0xc4, 0xc2, 0x5e, 0x84, 0xb1, 0xc2, 0xfd, 0x9a, 0xd7, 0xc2, 0x03, 0xb3, 0xd1, 0xc2, + 0xf5, 0x9d, 0xd8, 0xc2, 0xe0, 0x30, 0xb4, 0xc2, 0x36, 0x7a, 0x01, 0xc3, 0x09, 0x2e, 0xc0, 0xc2, + 0xa0, 0x96, 0xcd, 0xc2, 0x7e, 0xca, 0xb2, 0xc2, 0x02, 0x80, 0xd5, 0xc2, 0x9a, 0xa5, 0xec, 0xc2, + 0xbe, 0x9e, 0xd4, 0xc2, 0xd2, 0x6a, 0xbc, 0xc2, 0x8e, 0x16, 0xef, 0xc2, 0x24, 0x28, 0xdc, 0xc2, + 0x9c, 0xa4, 0xd2, 0xc2, 0xcd, 0xbd, 0xb7, 0xc2, 0xe5, 0xc8, 0xb5, 0xc2, 0x62, 0xc9, 0xb7, 0xc2, + 0x00, 0x94, 0xbf, 0xc2, 0x82, 0x07, 0xad, 0xc2, 0x4b, 0x4c, 0xc1, 0xc2, 0xff, 0xca, 0xca, 0xc2, + 0x53, 0x03, 0xc2, 0xc2, 0x21, 0x1f, 0xa6, 0xc2, 0x49, 0x6d, 0xfd, 0xc2, 0x8b, 0xe2, 0xb3, 0xc2, + 0xb7, 0xd4, 0xbf, 0xc2, 0x2c, 0x98, 0xad, 0xc2, 0x4a, 0x1d, 0xc2, 0xc2, 0x33, 0xf0, 0xd5, 0xc2, + 0xdb, 0x62, 0xce, 0xc2, 0x2d, 0x64, 0xaf, 0xc2, 0xf8, 0xb2, 0xd5, 0xc2, 0x48, 0x1d, 0xc4, 0xc2, + 0xdd, 0x02, 0xc5, 0xc2, 0x2b, 0x22, 0xab, 0xc2, 0x81, 0xc1, 0xb2, 0xc2, 0x52, 0x31, 0xc0, 0xc2, + 0xc7, 0x57, 0xae, 0xc2, 0x0f, 0x98, 0x95, 0xc2, 0xc9, 0x80, 0xab, 0xc2, 0xfd, 0xb1, 0xc9, 0xc2, + 0x3d, 0x89, 0xc2, 0xc2, 0x10, 0x46, 0xa1, 0xc2, 0xce, 0xa2, 0xf7, 0xc2, 0xf9, 0x1c, 0xaa, 0xc2, + 0x78, 0xf6, 0xb8, 0xc2, 0xa1, 0xa4, 0x92, 0xc2, 0x81, 0xfb, 0xc0, 0xc2, 0x6b, 0xb9, 0xca, 0xc2, + 0xec, 0x34, 0xb9, 0xc2, 0xa1, 0x49, 0xa6, 0xc2, 0x97, 0x9f, 0xdf, 0xc2, 0x33, 0x63, 0xb6, 0xc2}; +unsigned char gemm_fp32_bias[] = { + 0x3e, 0xa5, 0x3d, 0x40, 0xb0, 0xdf, 0xab, 0xc0, 0x58, 0x7d, 0x65, 0xc0, 0xe6, 0x63, 0xb7, 0xc0, + 0x84, 0xc7, 0xf8, 0xbf, 0xd3, 0xc8, 0x51, 0xc0, 0x95, 0xad, 0xbe, 0x3f, 0xbb, 0xf0, 0x21, 0xbf, + 0x71, 0xdc, 0x73, 0x40, 0x3e, 0x0a, 0xa1, 0x3f, 0xac, 0x38, 0x39, 0x3e, 0xcb, 0x47, 0x82, 0x3e, + 0x05, 0xab, 0x1b, 0xc0, 0x16, 0x93, 0xc6, 0xbe, 0xd6, 0x1e, 0x7f, 0xbf, 0x64, 0x3b, 0xe7, 0x40, + 0x16, 0xf5, 0x94, 0x40, 0xbb, 0x85, 0xe1, 0x3f, 0xd3, 0xe4, 0x4d, 0xc1, 0xd5, 0x83, 0xf1, 0x3f, + 0xd7, 0x6b, 0x90, 0xbf, 0x78, 0xd8, 0x70, 0x40, 0xa7, 0xe4, 0x2d, 0x3e, 0xfe, 0x97, 0x1d, 0x41, + 0x36, 0x0c, 0x21, 0xc0, 0x69, 0x21, 0xfe, 0x40, 0x7e, 0x26, 0x3a, 0xc0, 0x66, 0x18, 0x9d, 0xc0, + 0x44, 0xbf, 0x16, 0xc1, 0xe8, 0x3d, 0x0c, 0xc0, 0x75, 0x89, 0x08, 0xbc}; + +unsigned char gemm_fp16_a[] = { + 0x90, 0xc1, 0x02, 0xc4, 0x5d, 0xc1, 0x26, 0xc2, 0xbe, 0xc4, 0x18, 0xc4, 0x30, 0xc3, 0x18, 0xc4, + 0x69, 0xc2, 0xbf, 0xc2, 0x2e, 0xc2, 0xf6, 0xc2, 0x5f, 0xc2, 0x68, 0xbe, 0x75, 0xc1, 0x9c, 0xc2, + 0x1a, 0xc2, 0xa9, 0xc3, 0x5f, 0xc0, 0x03, 0xc5, 0xe7, 0xc4, 0xbe, 0xc1, 0x5f, 0xc2, 0xfd, 0xbf, + 0x43, 0xbd, 0x8c, 0xc1, 0x07, 0xc5, 0x71, 0xbe, 0x13, 0xc1, 0xe0, 0xc2, 0x73, 0xc3, 0x39, 0xbd, + 0x35, 0xc4, 0x2d, 0xc3, 0x86, 0xc4, 0xa1, 0xc3, 0xbe, 0xc1, 0xc7, 0xbe, 0x76, 0xc3, 0x1c, 0xc1, + 0x6f, 0xc0, 0xdf, 0xc2, 0xbb, 0xc3, 0xd9, 0xc4, 0xb2, 0xc2, 0xbc, 0xc2, 0x6f, 0xc1, 0x6e, 0xb6, + 0xbe, 0xc2, 0x31, 0xc2, 0x51, 0xc1, 0x58, 0xbf, 0x56, 0xc3, 0x1b, 0xc3, 0x72, 0xc1, 0x44, 0xc4, + 0xf4, 0xbc, 0xb6, 0xc4, 0x65, 0xc1, 0xe8, 0xc2, 0x58, 0xc1, 0x86, 0xc2, 0x05, 0xc4, 0xec, 0xc2, + 0xa3, 0xc4, 0x98, 0xbe, 0xd1, 0xc2, 0x45, 0xc2, 0x42, 0xc4, 0x79, 0xc0, 0x11, 0xc2, 0x3a, 0xc0, + 0x1e, 0xc1, 0xa5, 0xc3, 0x0f, 0xc3, 0x23, 0xc4, 0x2a, 0xc2, 0xfc, 0xc3, 0x69, 0xc2, 0x43, 0xc0, + 0x38, 0xc2, 0x8d, 0xc2, 0x0a, 0xc1, 0x34, 0xc2, 0xae, 0xc0, 0x40, 0xc2, 0xa7, 0xc0, 0x0a, 0xc4, + 0xcc, 0xbd, 0xaf, 0xbb, 0xd3, 0xc0, 0xb7, 0xc2, 0xa3, 0xc1, 0x18, 0xc2, 0xbd, 0xbe, 0x96, 0xc2, + 0xe8, 0xbd, 0xc3, 0xc2, 0x17, 0xc4, 0x4c, 0xc1, 0x40, 0xc2, 0x20, 0xc4, 0x3c, 0xc2, 0x74, 0xbd, + 0xb6, 0xc1, 0x21, 0xc4, 0xa7, 0xc3, 0x1a, 0xc0, 0x78, 0xc0, 0x14, 0xc1, 0x62, 0xc2, 0xc3, 0xc1, + 0xf9, 0xc3, 0x3f, 0xc3, 0xe8, 0xc2, 0x4a, 0xbe, 0xa9, 0xc1, 0xd7, 0xbf, 0x71, 0xc1, 0x6b, 0xc0, + 0x51, 0xc0, 0x2b, 0xc4, 0x39, 0xb9, 0x04, 0xc3, 0xb6, 0xc2, 0x15, 0xc2, 0x90, 0xc2, 0xf2, 0xbb, + 0x8b, 0xc2, 0xed, 0xc0, 0x7d, 0xc4, 0x2c, 0xc2, 0x09, 0xc3, 0xf0, 0xbd, 0x3e, 0xc1, 0x9f, 0xc1, + 0x30, 0xc4, 0xd9, 0xc1, 0x80, 0xc3, 0xbd, 0xc1, 0xd1, 0xbf, 0xd8, 0xc3, 0xff, 0xc1, 0xcf, 0xc3, + 0x56, 0xc4, 0x47, 0xc2, 0x51, 0xc1, 0xad, 0xc1, 0xb9, 0xc3, 0x5c, 0xbe, 0x0d, 0xc3, 0xdd, 0xc1, + 0x5f, 0xbf, 0xa9, 0xb5, 0x08, 0xc3, 0xd5, 0xc2, 0xca, 0xc1, 0x22, 0xbf, 0x25, 0xc3, 0x02, 0xc1, + 0x18, 0xc2, 0x17, 0xc4, 0xa6, 0xc0, 0x65, 0xc0, 0xe8, 0xc4, 0x05, 0xc2, 0x78, 0xc2, 0x15, 0xc0, + 0x5d, 0xc2, 0x40, 0xc0, 0xe7, 0xc2, 0x1f, 0xc0, 0x7f, 0xc4, 0x3c, 0xc0, 0x48, 0xc5, 0x66, 0xc0, + 0x78, 0xc0, 0x8c, 0xc0, 0xc7, 0xc1, 0x81, 0xc0, 0x85, 0xc3, 0x25, 0xc0, 0x45, 0xc2, 0x7e, 0xc3, + 0x58, 0xc2, 0x3d, 0xc2, 0x31, 0xc3, 0x06, 0xc0, 0xed, 0xc0, 0xef, 0xc0, 0x3d, 0xc1, 0x24, 0xc1, + 0xfe, 0xbc, 0x15, 0xc0, 0x93, 0xc0, 0xf4, 0xc2, 0xa8, 0xbc, 0x2a, 0xbe, 0x49, 0xc1, 0xf0, 0xbf, + 0xef, 0xbe, 0xe3, 0xc0, 0x11, 0xc1, 0x29, 0xc2, 0x67, 0xc2, 0x98, 0xc1, 0xe4, 0xc0, 0x23, 0xc3, + 0x80, 0xc0, 0xbb, 0xc0, 0xb6, 0xc3, 0xb2, 0xc1, 0x22, 0xbf, 0x30, 0xc0, 0x16, 0xc4, 0xe5, 0xc1, + 0x3e, 0xc3, 0x70, 0xc3, 0xd0, 0xc4, 0xcf, 0xc1, 0xc8, 0xbe, 0x62, 0xc1, 0x75, 0xc3, 0x58, 0xc1, + 0x51, 0xc2, 0x03, 0xc4, 0x5b, 0xc0, 0xbd, 0xbe, 0x28, 0xc2, 0x15, 0xc3, 0x7c, 0xc1, 0xca, 0xbf, + 0x87, 0xc0, 0x61, 0xc3, 0xe6, 0xc3, 0x18, 0xc1, 0x5e, 0xc1, 0xf3, 0xc2, 0x58, 0xbc, 0x69, 0xc4, + 0x01, 0xc4, 0x9d, 0xc0, 0x77, 0xc0, 0xda, 0xc0, 0xaa, 0xc0, 0x43, 0xc1, 0x9c, 0xc2, 0x3c, 0xbe, + 0x68, 0xc1, 0x0c, 0xc3, 0xce, 0xc0, 0x94, 0xc0, 0x66, 0xc3, 0xb0, 0xc1, 0x6c, 0xbe, 0x71, 0xc2, + 0x41, 0xc4, 0x58, 0xbe, 0x6a, 0xc3, 0xc4, 0xc1, 0x84, 0xc1, 0xd4, 0xc2, 0xd1, 0xbe, 0xf4, 0xc0, + 0xf0, 0xc0, 0x42, 0xbd, 0x7e, 0xbc, 0xb7, 0xc0, 0xda, 0xc3, 0xb9, 0xc3, 0xcb, 0xc1, 0x64, 0xc0, + 0x6c, 0xc2, 0xf8, 0xc1, 0x76, 0xc5, 0xe8, 0xc1, 0x6e, 0xc1, 0xdf, 0xc2, 0x0d, 0xc4, 0x90, 0xbf, + 0x08, 0xbe, 0xa2, 0xc5, 0x7b, 0xc3, 0x27, 0xc4, 0x96, 0xc1, 0xa1, 0xc3, 0x25, 0xc1, 0xa8, 0xc1, + 0x7f, 0xc3, 0x4e, 0xbe, 0xe6, 0xc1, 0xec, 0xc3, 0xca, 0xc4, 0x19, 0xc3, 0x50, 0xc4, 0xe2, 0xc0, + 0xa1, 0xbb, 0xcc, 0xc2, 0xdf, 0xc1, 0xe3, 0xc5, 0x80, 0xc3, 0x59, 0xc0, 0x32, 0xc2, 0xbe, 0xc1, + 0x5d, 0xbd, 0x18, 0xc2, 0x17, 0xc2, 0x7d, 0xc3, 0xb7, 0xc2, 0x3c, 0xc2, 0x8e, 0xc1, 0x0b, 0xbf, + 0xde, 0xc2, 0x53, 0xc0, 0x9f, 0xc0, 0xbe, 0xc0, 0x97, 0xbf, 0x1c, 0xc2, 0x87, 0xc1, 0xc0, 0xc2, + 0x46, 0xbd, 0xdb, 0xc4, 0x5f, 0xc4, 0x47, 0xc2, 0xa4, 0xc2, 0xdd, 0xbe, 0x33, 0xc0, 0x4f, 0xc1, + 0x7a, 0xc1, 0x19, 0xc4, 0xc4, 0xc4, 0x96, 0xc3, 0x03, 0xc2, 0x2d, 0xc4, 0x95, 0xc3, 0xbd, 0xc3, + 0x7f, 0xc1, 0x3d, 0xc4, 0x48, 0xc1, 0xff, 0xc3, 0x23, 0xc2, 0x4f, 0xc2, 0x68, 0xc4, 0x2e, 0xc1, + 0xb0, 0xc1, 0xfe, 0xc2, 0x81, 0xc0, 0x4c, 0xc3, 0xa7, 0xc0, 0x6f, 0xc1, 0x13, 0xc1, 0x1a, 0xc3, + 0xe5, 0xc0, 0xeb, 0xc3, 0x49, 0xbc, 0xef, 0xc0, 0xe0, 0xc1, 0x30, 0xc2, 0x95, 0xc2, 0xe7, 0xc4, + 0xfe, 0xc2, 0x4e, 0xc0, 0xde, 0xc0, 0xce, 0xbc, 0x9d, 0xc1, 0x67, 0xc3, 0xc3, 0xc4, 0x82, 0xc4, + 0x28, 0xc4, 0x1c, 0xc2, 0x21, 0xbf, 0x10, 0xc4, 0xc3, 0xc1, 0x26, 0xc2, 0x2c, 0xc2, 0xae, 0xc1, + 0xb2, 0xbe, 0xcb, 0xc4, 0xf5, 0xc0, 0xe6, 0xbd, 0x3e, 0xc2, 0x00, 0xbf, 0x59, 0xc4, 0x52, 0xc3, + 0x18, 0xc0, 0x6e, 0xc4, 0x4c, 0xc0, 0xaf, 0xc2, 0x42, 0xc4, 0xc4, 0xc0, 0x4a, 0xc2, 0x06, 0xc4, + 0xf1, 0xc1, 0x29, 0xc3, 0x45, 0xc2, 0x45, 0xc5, 0x22, 0xc2, 0x08, 0xc1, 0x5c, 0xc4, 0xc8, 0xc1, + 0xb6, 0xc1, 0xdc, 0xc3, 0xec, 0xc2, 0x7c, 0xc2, 0xe6, 0xc3, 0x7d, 0xc2, 0x82, 0xc0, 0xa2, 0xc4, + 0x0e, 0xc3, 0xaa, 0xc1, 0xbe, 0xc0, 0xf9, 0xb9, 0x7d, 0xc2, 0x7f, 0xbe, 0xd6, 0xbd, 0x53, 0xbf, + 0x5d, 0xc3, 0x41, 0xc2, 0x8d, 0xc4, 0x0d, 0xc2, 0x13, 0xc5, 0x7e, 0xbc, 0xf5, 0xbe, 0x4e, 0xc0, + 0x43, 0xc2, 0xfd, 0xc2, 0x70, 0xc3, 0xf5, 0xc3, 0x6d, 0xc1, 0xba, 0xc3, 0x5e, 0xc4, 0x1e, 0xc0, + 0xbd, 0xc4, 0x74, 0xc1, 0x9b, 0xc0, 0x1b, 0xbf, 0x20, 0xc2, 0x22, 0xc0, 0x59, 0xc1, 0xf6, 0xc3, + 0x7b, 0xc3, 0xa7, 0xc0, 0xca, 0xc2, 0xc1, 0xc4, 0x57, 0xc2, 0xe5, 0xc2, 0x89, 0xc3, 0x33, 0xc1, + 0x7f, 0xc1, 0x84, 0xc1, 0x94, 0xc0, 0x79, 0xc0, 0x35, 0xc0, 0x3c, 0xc3, 0x41, 0xc2, 0xd0, 0xc2, + 0xa6, 0xc0, 0x57, 0xc4, 0x38, 0xbe, 0x11, 0xc2, 0x06, 0xbf, 0x60, 0xc4, 0xc9, 0xc1, 0x54, 0xc3, + 0x0c, 0xc5, 0x1a, 0xc3, 0x02, 0xc3, 0x61, 0xc2, 0x8b, 0xc0, 0x76, 0xc1, 0x5a, 0xc1, 0x7b, 0xc1, + 0xfe, 0xc0, 0x82, 0xc1, 0x99, 0xc0, 0x02, 0xc1, 0x40, 0xc0, 0x97, 0xc1, 0x3c, 0xc1, 0x25, 0xc2, + 0xb0, 0xbe, 0x30, 0xc4, 0x35, 0xc1, 0x2f, 0xc1, 0xfa, 0xc2, 0x85, 0xbf, 0x9a, 0xc3, 0xf7, 0xbd, + 0xf9, 0xc2, 0x80, 0xbe, 0x20, 0xbe, 0xcf, 0xc1, 0x73, 0xc4, 0xba, 0xc2, 0x0b, 0xc4, 0x2f, 0xc0}; +unsigned char gemm_fp16_a1[] = { + 0x90, 0xc1, 0x1a, 0xc2, 0x35, 0xc4, 0xbe, 0xc2, 0xa3, 0xc4, 0x38, 0xc2, 0xe8, 0xbd, 0xf9, 0xc3, + 0x02, 0xc4, 0xa9, 0xc3, 0x2d, 0xc3, 0x31, 0xc2, 0x98, 0xbe, 0x8d, 0xc2, 0xc3, 0xc2, 0x3f, 0xc3, + 0x5d, 0xc1, 0x5f, 0xc0, 0x86, 0xc4, 0x51, 0xc1, 0xd1, 0xc2, 0x0a, 0xc1, 0x17, 0xc4, 0xe8, 0xc2, + 0x26, 0xc2, 0x03, 0xc5, 0xa1, 0xc3, 0x58, 0xbf, 0x45, 0xc2, 0x34, 0xc2, 0x4c, 0xc1, 0x4a, 0xbe, + 0xbe, 0xc4, 0xe7, 0xc4, 0xbe, 0xc1, 0x56, 0xc3, 0x42, 0xc4, 0xae, 0xc0, 0x40, 0xc2, 0xa9, 0xc1, + 0x18, 0xc4, 0xbe, 0xc1, 0xc7, 0xbe, 0x1b, 0xc3, 0x79, 0xc0, 0x40, 0xc2, 0x20, 0xc4, 0xd7, 0xbf, + 0x30, 0xc3, 0x5f, 0xc2, 0x76, 0xc3, 0x72, 0xc1, 0x11, 0xc2, 0xa7, 0xc0, 0x3c, 0xc2, 0x71, 0xc1, + 0x18, 0xc4, 0xfd, 0xbf, 0x1c, 0xc1, 0x44, 0xc4, 0x3a, 0xc0, 0x0a, 0xc4, 0x74, 0xbd, 0x6b, 0xc0, + 0x69, 0xc2, 0x43, 0xbd, 0x6f, 0xc0, 0xf4, 0xbc, 0x1e, 0xc1, 0xcc, 0xbd, 0xb6, 0xc1, 0x51, 0xc0, + 0xbf, 0xc2, 0x8c, 0xc1, 0xdf, 0xc2, 0xb6, 0xc4, 0xa5, 0xc3, 0xaf, 0xbb, 0x21, 0xc4, 0x2b, 0xc4, + 0x2e, 0xc2, 0x07, 0xc5, 0xbb, 0xc3, 0x65, 0xc1, 0x0f, 0xc3, 0xd3, 0xc0, 0xa7, 0xc3, 0x39, 0xb9, + 0xf6, 0xc2, 0x71, 0xbe, 0xd9, 0xc4, 0xe8, 0xc2, 0x23, 0xc4, 0xb7, 0xc2, 0x1a, 0xc0, 0x04, 0xc3, + 0x5f, 0xc2, 0x13, 0xc1, 0xb2, 0xc2, 0x58, 0xc1, 0x2a, 0xc2, 0xa3, 0xc1, 0x78, 0xc0, 0xb6, 0xc2, + 0x68, 0xbe, 0xe0, 0xc2, 0xbc, 0xc2, 0x86, 0xc2, 0xfc, 0xc3, 0x18, 0xc2, 0x14, 0xc1, 0x15, 0xc2, + 0x75, 0xc1, 0x73, 0xc3, 0x6f, 0xc1, 0x05, 0xc4, 0x69, 0xc2, 0xbd, 0xbe, 0x62, 0xc2, 0x90, 0xc2, + 0x9c, 0xc2, 0x39, 0xbd, 0x6e, 0xb6, 0xec, 0xc2, 0x43, 0xc0, 0x96, 0xc2, 0xc3, 0xc1, 0xf2, 0xbb, + 0x8b, 0xc2, 0x56, 0xc4, 0x18, 0xc2, 0x78, 0xc0, 0xfe, 0xbc, 0x80, 0xc0, 0x51, 0xc2, 0x01, 0xc4, + 0xed, 0xc0, 0x47, 0xc2, 0x17, 0xc4, 0x8c, 0xc0, 0x15, 0xc0, 0xbb, 0xc0, 0x03, 0xc4, 0x9d, 0xc0, + 0x7d, 0xc4, 0x51, 0xc1, 0xa6, 0xc0, 0xc7, 0xc1, 0x93, 0xc0, 0xb6, 0xc3, 0x5b, 0xc0, 0x77, 0xc0, + 0x2c, 0xc2, 0xad, 0xc1, 0x65, 0xc0, 0x81, 0xc0, 0xf4, 0xc2, 0xb2, 0xc1, 0xbd, 0xbe, 0xda, 0xc0, + 0x09, 0xc3, 0xb9, 0xc3, 0xe8, 0xc4, 0x85, 0xc3, 0xa8, 0xbc, 0x22, 0xbf, 0x28, 0xc2, 0xaa, 0xc0, + 0xf0, 0xbd, 0x5c, 0xbe, 0x05, 0xc2, 0x25, 0xc0, 0x2a, 0xbe, 0x30, 0xc0, 0x15, 0xc3, 0x43, 0xc1, + 0x3e, 0xc1, 0x0d, 0xc3, 0x78, 0xc2, 0x45, 0xc2, 0x49, 0xc1, 0x16, 0xc4, 0x7c, 0xc1, 0x9c, 0xc2, + 0x9f, 0xc1, 0xdd, 0xc1, 0x15, 0xc0, 0x7e, 0xc3, 0xf0, 0xbf, 0xe5, 0xc1, 0xca, 0xbf, 0x3c, 0xbe, + 0x30, 0xc4, 0x5f, 0xbf, 0x5d, 0xc2, 0x58, 0xc2, 0xef, 0xbe, 0x3e, 0xc3, 0x87, 0xc0, 0x68, 0xc1, + 0xd9, 0xc1, 0xa9, 0xb5, 0x40, 0xc0, 0x3d, 0xc2, 0xe3, 0xc0, 0x70, 0xc3, 0x61, 0xc3, 0x0c, 0xc3, + 0x80, 0xc3, 0x08, 0xc3, 0xe7, 0xc2, 0x31, 0xc3, 0x11, 0xc1, 0xd0, 0xc4, 0xe6, 0xc3, 0xce, 0xc0, + 0xbd, 0xc1, 0xd5, 0xc2, 0x1f, 0xc0, 0x06, 0xc0, 0x29, 0xc2, 0xcf, 0xc1, 0x18, 0xc1, 0x94, 0xc0, + 0xd1, 0xbf, 0xca, 0xc1, 0x7f, 0xc4, 0xed, 0xc0, 0x67, 0xc2, 0xc8, 0xbe, 0x5e, 0xc1, 0x66, 0xc3, + 0xd8, 0xc3, 0x22, 0xbf, 0x3c, 0xc0, 0xef, 0xc0, 0x98, 0xc1, 0x62, 0xc1, 0xf3, 0xc2, 0xb0, 0xc1, + 0xff, 0xc1, 0x25, 0xc3, 0x48, 0xc5, 0x3d, 0xc1, 0xe4, 0xc0, 0x75, 0xc3, 0x58, 0xbc, 0x6c, 0xbe, + 0xcf, 0xc3, 0x02, 0xc1, 0x66, 0xc0, 0x24, 0xc1, 0x23, 0xc3, 0x58, 0xc1, 0x69, 0xc4, 0x71, 0xc2, + 0x41, 0xc4, 0x6c, 0xc2, 0x7f, 0xc3, 0x5d, 0xbd, 0x46, 0xbd, 0x7f, 0xc1, 0xe5, 0xc0, 0x28, 0xc4, + 0x58, 0xbe, 0xf8, 0xc1, 0x4e, 0xbe, 0x18, 0xc2, 0xdb, 0xc4, 0x3d, 0xc4, 0xeb, 0xc3, 0x1c, 0xc2, + 0x6a, 0xc3, 0x76, 0xc5, 0xe6, 0xc1, 0x17, 0xc2, 0x5f, 0xc4, 0x48, 0xc1, 0x49, 0xbc, 0x21, 0xbf, + 0xc4, 0xc1, 0xe8, 0xc1, 0xec, 0xc3, 0x7d, 0xc3, 0x47, 0xc2, 0xff, 0xc3, 0xef, 0xc0, 0x10, 0xc4, + 0x84, 0xc1, 0x6e, 0xc1, 0xca, 0xc4, 0xb7, 0xc2, 0xa4, 0xc2, 0x23, 0xc2, 0xe0, 0xc1, 0xc3, 0xc1, + 0xd4, 0xc2, 0xdf, 0xc2, 0x19, 0xc3, 0x3c, 0xc2, 0xdd, 0xbe, 0x4f, 0xc2, 0x30, 0xc2, 0x26, 0xc2, + 0xd1, 0xbe, 0x0d, 0xc4, 0x50, 0xc4, 0x8e, 0xc1, 0x33, 0xc0, 0x68, 0xc4, 0x95, 0xc2, 0x2c, 0xc2, + 0xf4, 0xc0, 0x90, 0xbf, 0xe2, 0xc0, 0x0b, 0xbf, 0x4f, 0xc1, 0x2e, 0xc1, 0xe7, 0xc4, 0xae, 0xc1, + 0xf0, 0xc0, 0x08, 0xbe, 0xa1, 0xbb, 0xde, 0xc2, 0x7a, 0xc1, 0xb0, 0xc1, 0xfe, 0xc2, 0xb2, 0xbe, + 0x42, 0xbd, 0xa2, 0xc5, 0xcc, 0xc2, 0x53, 0xc0, 0x19, 0xc4, 0xfe, 0xc2, 0x4e, 0xc0, 0xcb, 0xc4, + 0x7e, 0xbc, 0x7b, 0xc3, 0xdf, 0xc1, 0x9f, 0xc0, 0xc4, 0xc4, 0x81, 0xc0, 0xde, 0xc0, 0xf5, 0xc0, + 0xb7, 0xc0, 0x27, 0xc4, 0xe3, 0xc5, 0xbe, 0xc0, 0x96, 0xc3, 0x4c, 0xc3, 0xce, 0xbc, 0xe6, 0xbd, + 0xda, 0xc3, 0x96, 0xc1, 0x80, 0xc3, 0x97, 0xbf, 0x03, 0xc2, 0xa7, 0xc0, 0x9d, 0xc1, 0x3e, 0xc2, + 0xb9, 0xc3, 0xa1, 0xc3, 0x59, 0xc0, 0x1c, 0xc2, 0x2d, 0xc4, 0x6f, 0xc1, 0x67, 0xc3, 0x00, 0xbf, + 0xcb, 0xc1, 0x25, 0xc1, 0x32, 0xc2, 0x87, 0xc1, 0x95, 0xc3, 0x13, 0xc1, 0xc3, 0xc4, 0x59, 0xc4, + 0x64, 0xc0, 0xa8, 0xc1, 0xbe, 0xc1, 0xc0, 0xc2, 0xbd, 0xc3, 0x1a, 0xc3, 0x82, 0xc4, 0x52, 0xc3, + 0x18, 0xc0, 0xb6, 0xc1, 0x5d, 0xc3, 0xbd, 0xc4, 0x6e, 0xc4, 0xdc, 0xc3, 0x41, 0xc2, 0x74, 0xc1, + 0x4c, 0xc0, 0xec, 0xc2, 0x8d, 0xc4, 0x9b, 0xc0, 0xaf, 0xc2, 0x7c, 0xc2, 0x0d, 0xc2, 0x1b, 0xbf, + 0x42, 0xc4, 0xe6, 0xc3, 0x13, 0xc5, 0x20, 0xc2, 0xc4, 0xc0, 0x7d, 0xc2, 0x7e, 0xbc, 0x22, 0xc0, + 0x4a, 0xc2, 0x82, 0xc0, 0xf5, 0xbe, 0x59, 0xc1, 0x06, 0xc4, 0xa2, 0xc4, 0x4e, 0xc0, 0xf6, 0xc3, + 0xf1, 0xc1, 0x0e, 0xc3, 0x43, 0xc2, 0x7b, 0xc3, 0x29, 0xc3, 0xaa, 0xc1, 0xfd, 0xc2, 0xa7, 0xc0, + 0x45, 0xc2, 0xbe, 0xc0, 0x70, 0xc3, 0xca, 0xc2, 0x45, 0xc5, 0xf9, 0xb9, 0xf5, 0xc3, 0xc1, 0xc4, + 0x22, 0xc2, 0x7d, 0xc2, 0x6d, 0xc1, 0x57, 0xc2, 0x08, 0xc1, 0x7f, 0xbe, 0xba, 0xc3, 0xe5, 0xc2, + 0x5c, 0xc4, 0xd6, 0xbd, 0x5e, 0xc4, 0x89, 0xc3, 0xc8, 0xc1, 0x53, 0xbf, 0x1e, 0xc0, 0x33, 0xc1, + 0x7f, 0xc1, 0x0c, 0xc5, 0x84, 0xc1, 0x1a, 0xc3, 0x94, 0xc0, 0x02, 0xc3, 0x79, 0xc0, 0x61, 0xc2, + 0x35, 0xc0, 0x8b, 0xc0, 0x3c, 0xc3, 0x76, 0xc1, 0x41, 0xc2, 0x5a, 0xc1, 0xd0, 0xc2, 0x7b, 0xc1, + 0xa6, 0xc0, 0xfe, 0xc0, 0x57, 0xc4, 0x82, 0xc1, 0x38, 0xbe, 0x99, 0xc0, 0x11, 0xc2, 0x02, 0xc1, + 0x06, 0xbf, 0x40, 0xc0, 0x60, 0xc4, 0x97, 0xc1, 0xc9, 0xc1, 0x3c, 0xc1, 0x54, 0xc3, 0x25, 0xc2, + 0xb0, 0xbe, 0x30, 0xc4, 0x35, 0xc1, 0x2f, 0xc1, 0xfa, 0xc2, 0x85, 0xbf, 0x9a, 0xc3, 0xf7, 0xbd, + 0xf9, 0xc2, 0x80, 0xbe, 0x20, 0xbe, 0xcf, 0xc1, 0x73, 0xc4, 0xba, 0xc2, 0x0b, 0xc4, 0x2f, 0xc0}; +unsigned char gemm_fp16_b[] = { + 0xd1, 0x3d, 0xa4, 0x3d, 0xaa, 0x40, 0x24, 0x3e, 0x78, 0x42, 0xfa, 0x41, 0x6c, 0x40, 0x88, 0x3e, + 0x05, 0x40, 0x16, 0x3b, 0x18, 0x44, 0xe3, 0x3b, 0x7f, 0x40, 0xde, 0x41, 0x07, 0x40, 0xf2, 0x3d, + 0x91, 0x43, 0xe0, 0x41, 0xa4, 0x3f, 0x46, 0x3a, 0x20, 0x40, 0x0b, 0x41, 0x77, 0x3b, 0xd2, 0x3e, + 0xa3, 0x3d, 0x17, 0x3f, 0x54, 0x3e, 0x7b, 0x42, 0xb7, 0x40, 0xa6, 0x40, 0xb9, 0x41, 0x6f, 0x43, + 0xbc, 0x3e, 0x02, 0x35, 0x16, 0x40, 0xff, 0x40, 0x38, 0x40, 0x4f, 0x3f, 0x36, 0x40, 0x0c, 0x41, + 0x96, 0x42, 0x1d, 0x3f, 0xad, 0x3d, 0x20, 0x40, 0xbd, 0x40, 0x4a, 0x3e, 0x63, 0x3d, 0x45, 0x40, + 0xed, 0x3f, 0xb1, 0x37, 0xa3, 0x3e, 0x07, 0x41, 0x71, 0x3f, 0x58, 0x41, 0xe3, 0x40, 0x71, 0x41, + 0x41, 0x3e, 0xaf, 0x34, 0xc2, 0x3e, 0xc8, 0x3e, 0x0b, 0x40, 0x1d, 0x3f, 0x9b, 0x3b, 0xd6, 0x3e, + 0x47, 0x3e, 0x51, 0x3a, 0x57, 0x40, 0x08, 0x3f, 0x65, 0x3b, 0x9c, 0x44, 0x8e, 0x40, 0xc4, 0x3c, + 0x5d, 0x40, 0x62, 0x3d, 0x3f, 0x33, 0x48, 0x41, 0x12, 0x3f, 0x71, 0x3f, 0x5c, 0x36, 0xdd, 0x40, + 0x0a, 0x40, 0xfd, 0x3d, 0x43, 0x41, 0x38, 0x43, 0xd7, 0x40, 0x81, 0x3f, 0x35, 0xbc, 0x60, 0x3e, + 0x33, 0x40, 0x30, 0x41, 0xef, 0x42, 0x7e, 0xa9, 0xea, 0x41, 0xe5, 0x41, 0xbc, 0x3e, 0x96, 0x40, + 0x52, 0x42, 0x0f, 0x3b, 0xd8, 0x3f, 0x4d, 0x3e, 0x87, 0x3b, 0x2d, 0x43, 0xdc, 0x3d, 0x75, 0x42, + 0xe5, 0x3d, 0x12, 0x3f, 0x40, 0x42, 0x0a, 0x40, 0x84, 0x3d, 0x11, 0x42, 0xbd, 0x41, 0x40, 0x3c, + 0x13, 0x3f, 0x5f, 0x3c, 0x78, 0x40, 0x76, 0x43, 0x13, 0x3c, 0xcc, 0x3c, 0xce, 0x3d, 0x73, 0x41, + 0xf8, 0x3e, 0x25, 0x3e, 0x4a, 0x3e, 0x6d, 0x3e, 0x59, 0x3f, 0xce, 0x3d, 0x3f, 0x41, 0x24, 0x3f, + 0x85, 0x3f, 0x40, 0x41, 0x96, 0x43, 0x2f, 0x40, 0x72, 0x3f, 0x47, 0x36, 0x1f, 0x42, 0x40, 0x3d, + 0x95, 0x40, 0x73, 0x42, 0x9a, 0x43, 0x08, 0x43, 0xde, 0x42, 0x31, 0x41, 0x5b, 0x42, 0xa4, 0x40, + 0x42, 0x41, 0x2a, 0x40, 0x94, 0x33, 0xe5, 0x3a, 0xeb, 0x3b, 0x7b, 0x40, 0xd3, 0x40, 0xef, 0x40, + 0x6e, 0x40, 0xbf, 0x43, 0xad, 0x42, 0xa9, 0x41, 0x55, 0x42, 0xc8, 0x41, 0xf5, 0x3f, 0x4b, 0x3f, + 0x0e, 0x40, 0x02, 0x3b, 0x56, 0x41, 0x4b, 0x42, 0xf2, 0x3d, 0x94, 0xb9, 0xdc, 0x3c, 0x90, 0x41, + 0xc6, 0x3d, 0x69, 0x41, 0x29, 0x40, 0x7a, 0x3f, 0xf7, 0x41, 0xc0, 0x38, 0x45, 0x3b, 0xa8, 0x3e, + 0xf0, 0x3d, 0x94, 0x41, 0xdb, 0x40, 0x3c, 0xb2, 0x2d, 0x42, 0x65, 0x3f, 0x2e, 0x3d, 0xd1, 0x3d, + 0x4f, 0x40, 0x3f, 0x41, 0x72, 0x44, 0xf8, 0x42, 0xbb, 0x43, 0x83, 0x3b, 0xf6, 0x38, 0xf8, 0x3f, + 0x5a, 0x3f, 0x2f, 0x3e, 0xdc, 0x3c, 0x43, 0x41, 0x99, 0x40, 0xe4, 0x39, 0x7e, 0x3e, 0x6a, 0x3f, + 0x0f, 0x41, 0x16, 0x3c, 0xf8, 0x3e, 0x52, 0x3b, 0x7d, 0x42, 0x45, 0x44, 0x74, 0x42, 0x43, 0x43, + 0xdd, 0x40, 0x8e, 0x3d, 0xb4, 0x3f, 0x3e, 0x42, 0x3e, 0x3e, 0xce, 0x40, 0x58, 0x3e, 0x7f, 0x3b, + 0x24, 0x40, 0x86, 0x3c, 0x4d, 0x3f, 0xc5, 0x40, 0xe4, 0x3e, 0xdc, 0x3a, 0x1f, 0x3e, 0xc5, 0x3c, + 0x7b, 0x40, 0x06, 0x3e, 0xfc, 0x40, 0x29, 0x3f, 0x81, 0x3a, 0x28, 0x30, 0x5c, 0x3f, 0x39, 0x3f, + 0xe0, 0x3a, 0x3f, 0x2f, 0x40, 0x3f, 0xa1, 0x43, 0xb6, 0x38, 0xda, 0x42, 0xb2, 0x43, 0x9d, 0x40, + 0xd4, 0x3f, 0x31, 0x3e, 0x6c, 0x41, 0x83, 0x42, 0x64, 0x3f, 0x93, 0x42, 0x4c, 0x40, 0x64, 0x43, + 0xd5, 0x41, 0xae, 0x27, 0x5d, 0x3f, 0xd9, 0x3c, 0xa6, 0x40, 0x0a, 0x41, 0x04, 0x43, 0x3b, 0x41, + 0x4a, 0x41, 0xb2, 0x3d, 0xee, 0x41, 0xc9, 0x3d, 0x3d, 0x42, 0xb6, 0x3f, 0x93, 0x3c, 0x53, 0x3f, + 0xc8, 0x3e, 0x2c, 0x35, 0x70, 0x3e, 0x32, 0x2d, 0x47, 0x44, 0x43, 0x3f, 0xa0, 0x40, 0x12, 0x3c, + 0x2f, 0x42, 0x81, 0x3c, 0xe2, 0x41, 0xaf, 0x41, 0x3d, 0x40, 0x1a, 0x40, 0x83, 0x43, 0x8e, 0x3e, + 0x87, 0x3f, 0xfd, 0x41, 0xc0, 0x42, 0x72, 0x40, 0xc0, 0x40, 0x40, 0x3d, 0xd2, 0x40, 0xe7, 0x40, + 0x8f, 0x41, 0x2c, 0x3f, 0x3e, 0x45, 0x80, 0x41, 0x25, 0x3c, 0x60, 0x3f, 0x3f, 0x40, 0x91, 0x39, + 0x4b, 0x3c, 0x05, 0x3d, 0x71, 0x41, 0x0f, 0x3e, 0x00, 0x41, 0xd0, 0x41, 0x6d, 0x40, 0x19, 0x33, + 0x5e, 0xaf, 0x86, 0x3f, 0x1c, 0x41, 0x03, 0x40, 0x2e, 0x3b, 0x5d, 0x38, 0x3a, 0x41, 0xbb, 0x40, + 0x73, 0x3f, 0xb4, 0x41, 0x8a, 0x3f, 0xe5, 0x3e, 0x19, 0x41, 0x11, 0x3c, 0x2f, 0x41, 0x4b, 0x45}; +unsigned char gemm_fp16_b1[] = { + 0xd1, 0x3d, 0xa4, 0x3d, 0xaa, 0x40, 0x24, 0x3e, 0x78, 0x42, 0xfa, 0x41, 0x6c, 0x40, 0x88, 0x3e, + 0x05, 0x40, 0x16, 0x3b, 0x18, 0x44, 0xe3, 0x3b, 0x7f, 0x40, 0xde, 0x41, 0x07, 0x40, 0xf2, 0x3d, + 0x20, 0x40, 0x0b, 0x41, 0x77, 0x3b, 0xd2, 0x3e, 0xa3, 0x3d, 0x17, 0x3f, 0x54, 0x3e, 0x7b, 0x42, + 0xb7, 0x40, 0xa6, 0x40, 0xb9, 0x41, 0x6f, 0x43, 0xbc, 0x3e, 0x02, 0x35, 0x16, 0x40, 0xff, 0x40, + 0x96, 0x42, 0x1d, 0x3f, 0xad, 0x3d, 0x20, 0x40, 0xbd, 0x40, 0x4a, 0x3e, 0x63, 0x3d, 0x45, 0x40, + 0xed, 0x3f, 0xb1, 0x37, 0xa3, 0x3e, 0x07, 0x41, 0x71, 0x3f, 0x58, 0x41, 0xe3, 0x40, 0x71, 0x41, + 0x0b, 0x40, 0x1d, 0x3f, 0x9b, 0x3b, 0xd6, 0x3e, 0x47, 0x3e, 0x51, 0x3a, 0x57, 0x40, 0x08, 0x3f, + 0x65, 0x3b, 0x9c, 0x44, 0x8e, 0x40, 0xc4, 0x3c, 0x5d, 0x40, 0x62, 0x3d, 0x3f, 0x33, 0x48, 0x41, + 0x0a, 0x40, 0xfd, 0x3d, 0x43, 0x41, 0x38, 0x43, 0xd7, 0x40, 0x81, 0x3f, 0x35, 0xbc, 0x60, 0x3e, + 0x33, 0x40, 0x30, 0x41, 0xef, 0x42, 0x7e, 0xa9, 0xea, 0x41, 0xe5, 0x41, 0xbc, 0x3e, 0x96, 0x40, + 0x87, 0x3b, 0x2d, 0x43, 0xdc, 0x3d, 0x75, 0x42, 0xe5, 0x3d, 0x12, 0x3f, 0x40, 0x42, 0x0a, 0x40, + 0x84, 0x3d, 0x11, 0x42, 0xbd, 0x41, 0x40, 0x3c, 0x13, 0x3f, 0x5f, 0x3c, 0x78, 0x40, 0x76, 0x43, + 0xf8, 0x3e, 0x25, 0x3e, 0x4a, 0x3e, 0x6d, 0x3e, 0x59, 0x3f, 0xce, 0x3d, 0x3f, 0x41, 0x24, 0x3f, + 0x85, 0x3f, 0x40, 0x41, 0x96, 0x43, 0x2f, 0x40, 0x72, 0x3f, 0x47, 0x36, 0x1f, 0x42, 0x40, 0x3d, + 0xde, 0x42, 0x31, 0x41, 0x5b, 0x42, 0xa4, 0x40, 0x42, 0x41, 0x2a, 0x40, 0x94, 0x33, 0xe5, 0x3a, + 0xeb, 0x3b, 0x7b, 0x40, 0xd3, 0x40, 0xef, 0x40, 0x6e, 0x40, 0xbf, 0x43, 0xad, 0x42, 0xa9, 0x41, + 0x0e, 0x40, 0x02, 0x3b, 0x56, 0x41, 0x4b, 0x42, 0xf2, 0x3d, 0x94, 0xb9, 0xdc, 0x3c, 0x90, 0x41, + 0xc6, 0x3d, 0x69, 0x41, 0x29, 0x40, 0x7a, 0x3f, 0xf7, 0x41, 0xc0, 0x38, 0x45, 0x3b, 0xa8, 0x3e, + 0x2d, 0x42, 0x65, 0x3f, 0x2e, 0x3d, 0xd1, 0x3d, 0x4f, 0x40, 0x3f, 0x41, 0x72, 0x44, 0xf8, 0x42, + 0xbb, 0x43, 0x83, 0x3b, 0xf6, 0x38, 0xf8, 0x3f, 0x5a, 0x3f, 0x2f, 0x3e, 0xdc, 0x3c, 0x43, 0x41, + 0x0f, 0x41, 0x16, 0x3c, 0xf8, 0x3e, 0x52, 0x3b, 0x7d, 0x42, 0x45, 0x44, 0x74, 0x42, 0x43, 0x43, + 0xdd, 0x40, 0x8e, 0x3d, 0xb4, 0x3f, 0x3e, 0x42, 0x3e, 0x3e, 0xce, 0x40, 0x58, 0x3e, 0x7f, 0x3b, + 0xe4, 0x3e, 0xdc, 0x3a, 0x1f, 0x3e, 0xc5, 0x3c, 0x7b, 0x40, 0x06, 0x3e, 0xfc, 0x40, 0x29, 0x3f, + 0x81, 0x3a, 0x28, 0x30, 0x5c, 0x3f, 0x39, 0x3f, 0xe0, 0x3a, 0x3f, 0x2f, 0x40, 0x3f, 0xa1, 0x43, + 0xd4, 0x3f, 0x31, 0x3e, 0x6c, 0x41, 0x83, 0x42, 0x64, 0x3f, 0x93, 0x42, 0x4c, 0x40, 0x64, 0x43, + 0xd5, 0x41, 0xae, 0x27, 0x5d, 0x3f, 0xd9, 0x3c, 0xa6, 0x40, 0x0a, 0x41, 0x04, 0x43, 0x3b, 0x41, + 0x3d, 0x42, 0xb6, 0x3f, 0x93, 0x3c, 0x53, 0x3f, 0xc8, 0x3e, 0x2c, 0x35, 0x70, 0x3e, 0x32, 0x2d, + 0x47, 0x44, 0x43, 0x3f, 0xa0, 0x40, 0x12, 0x3c, 0x2f, 0x42, 0x81, 0x3c, 0xe2, 0x41, 0xaf, 0x41, + 0x87, 0x3f, 0xfd, 0x41, 0xc0, 0x42, 0x72, 0x40, 0xc0, 0x40, 0x40, 0x3d, 0xd2, 0x40, 0xe7, 0x40, + 0x8f, 0x41, 0x2c, 0x3f, 0x3e, 0x45, 0x80, 0x41, 0x25, 0x3c, 0x60, 0x3f, 0x3f, 0x40, 0x91, 0x39, + 0x00, 0x41, 0xd0, 0x41, 0x6d, 0x40, 0x19, 0x33, 0x5e, 0xaf, 0x86, 0x3f, 0x1c, 0x41, 0x03, 0x40, + 0x2e, 0x3b, 0x5d, 0x38, 0x3a, 0x41, 0xbb, 0x40, 0x73, 0x3f, 0xb4, 0x41, 0x8a, 0x3f, 0xe5, 0x3e, + 0x91, 0x43, 0x38, 0x40, 0x41, 0x3e, 0x12, 0x3f, 0x52, 0x42, 0x13, 0x3c, 0x95, 0x40, 0x55, 0x42, + 0xf0, 0x3d, 0x99, 0x40, 0x24, 0x40, 0xb6, 0x38, 0x4a, 0x41, 0x3d, 0x40, 0x4b, 0x3c, 0x19, 0x41, + 0xe0, 0x41, 0x4f, 0x3f, 0xaf, 0x34, 0x71, 0x3f, 0x0f, 0x3b, 0xcc, 0x3c, 0x73, 0x42, 0xc8, 0x41, + 0x94, 0x41, 0xe4, 0x39, 0x86, 0x3c, 0xda, 0x42, 0xb2, 0x3d, 0x1a, 0x40, 0x05, 0x3d, 0x11, 0x3c, + 0xa4, 0x3f, 0x36, 0x40, 0xc2, 0x3e, 0x5c, 0x36, 0xd8, 0x3f, 0xce, 0x3d, 0x9a, 0x43, 0xf5, 0x3f, + 0xdb, 0x40, 0x7e, 0x3e, 0x4d, 0x3f, 0xb2, 0x43, 0xee, 0x41, 0x83, 0x43, 0x71, 0x41, 0x2f, 0x41, + 0x46, 0x3a, 0x0c, 0x41, 0xc8, 0x3e, 0xdd, 0x40, 0x4d, 0x3e, 0x73, 0x41, 0x08, 0x43, 0x4b, 0x3f, + 0x3c, 0xb2, 0x6a, 0x3f, 0xc5, 0x40, 0x9d, 0x40, 0xc9, 0x3d, 0x8e, 0x3e, 0x0f, 0x3e, 0x4b, 0x45}; +unsigned char gemm_fp16_c[] = { + 0x18, 0xd7, 0x44, 0xd6, 0x5b, 0xd6, 0xa8, 0xd6, 0x64, 0xd6, 0xec, 0xd5, 0x40, 0xd6, 0x2d, 0xd7, + 0x55, 0xd6, 0xdf, 0xd5, 0x45, 0xd8, 0x05, 0xd6, 0x79, 0xd6, 0xca, 0xd5, 0xa0, 0xd6, 0x7d, 0xd7, + 0xe1, 0xd6, 0xd1, 0xd5, 0x55, 0xd7, 0xdf, 0xd6, 0x0a, 0xd7, 0x23, 0xd6, 0x0f, 0xd6, 0x84, 0xd6, + 0xc0, 0xd6, 0x2b, 0xd6, 0x62, 0xd6, 0x12, 0xd7, 0xf9, 0xd6, 0x68, 0xd6, 0x55, 0xd8, 0xf1, 0xd5, + 0x8b, 0xd6, 0xce, 0xd5, 0x5c, 0xd6, 0xff, 0xd6, 0xe5, 0xd6, 0x8b, 0xd5, 0xfc, 0xd6, 0xa2, 0xd6, + 0x6d, 0xd7, 0xd9, 0xd5, 0x25, 0xd6, 0x84, 0xd6, 0x25, 0xd7, 0x36, 0xd6, 0xc5, 0xd6, 0x44, 0xd7, + 0x0c, 0xd7, 0xa1, 0xd5, 0x36, 0xd8, 0x49, 0xd6, 0x8d, 0xd6, 0xad, 0xd5, 0xd1, 0xd6, 0xa0, 0xd7, + 0xe4, 0xd6, 0x42, 0xd6, 0xb6, 0xd7, 0x6b, 0xd6, 0x91, 0xd7, 0xbd, 0xd6, 0xa6, 0xd6, 0x98, 0xd6, + 0xca, 0xd6, 0x6b, 0xd6, 0xe5, 0xd6, 0x29, 0xd7, 0x22, 0xd7, 0x9b, 0xd5, 0x62, 0xd8, 0x57, 0xd6, + 0x97, 0xd6, 0x52, 0xd6, 0x1e, 0xd7, 0xb0, 0xd7, 0x2c, 0xd7, 0xf0, 0xd5, 0xc2, 0xd7, 0x27, 0xd7, + 0x3d, 0xd7, 0xd9, 0xd5, 0x4c, 0xd6, 0x77, 0xd6, 0xd8, 0xd6, 0x08, 0xd6, 0x93, 0xd6, 0xea, 0xd6, + 0x00, 0xd7, 0x6a, 0xd5, 0x3e, 0xd8, 0xb4, 0xd5, 0x9b, 0xd6, 0xe6, 0xd5, 0x9a, 0xd6, 0x54, 0xd7, + 0xf0, 0xd6, 0xe9, 0xd5, 0x9d, 0xd7, 0x62, 0xd6, 0x30, 0xd6, 0x7f, 0xd5, 0x5f, 0xd5, 0x74, 0xd5, + 0x78, 0xd5, 0x21, 0xd5, 0x57, 0xd5, 0xb1, 0xd5, 0x60, 0xd5, 0xee, 0xd4, 0x46, 0xd7, 0x3d, 0xd5, + 0x99, 0xd5, 0x2c, 0xd5, 0xf8, 0xd5, 0x99, 0xd6, 0xf8, 0xd5, 0x4a, 0xd5, 0x6f, 0xd6, 0x1a, 0xd6, + 0x80, 0xd6, 0xaf, 0xd5, 0x5c, 0xd5, 0xcc, 0xd5, 0xac, 0xd5, 0x41, 0xd5, 0x3b, 0xd6, 0xb4, 0xd6, + 0x36, 0xd6, 0x2e, 0xd5, 0x6b, 0xd7, 0xa2, 0xd5, 0xbe, 0xd5, 0xea, 0xd4, 0xc6, 0xd5, 0x8f, 0xd6, + 0xcc, 0xd5, 0xa8, 0xd4, 0x7e, 0xd6, 0x3b, 0xd6, 0x20, 0xd6, 0x28, 0xd5, 0x4e, 0xd5, 0xa7, 0xd5, + 0xb0, 0xd5, 0xea, 0xd4, 0x8a, 0xd5, 0x01, 0xd6, 0x1f, 0xd6, 0x61, 0xd4, 0x38, 0xd7, 0x10, 0xd5, + 0x7f, 0xd5, 0xb6, 0xd4, 0xda, 0xd5, 0x6b, 0xd6, 0xd5, 0xd5, 0x0e, 0xd5, 0x90, 0xd6, 0x31, 0xd5, + 0x13, 0xd7, 0x8b, 0xd5, 0xe6, 0xd5, 0xda, 0xd5, 0x0b, 0xd6, 0x2b, 0xd5, 0xda, 0xd5, 0x79, 0xd6, + 0x48, 0xd6, 0x25, 0xd5, 0xdd, 0xd7, 0xbc, 0xd5, 0x42, 0xd6, 0x8f, 0xd5, 0x0d, 0xd6, 0xac, 0xd6, + 0x6c, 0xd6, 0x56, 0xd5, 0x0b, 0xd7, 0x2a, 0xd6, 0xf6, 0xd5, 0x1c, 0xd5, 0x9e, 0xd5, 0x76, 0xd5, + 0xce, 0xd5, 0x43, 0xd5, 0x1a, 0xd5, 0xea, 0xd5, 0x64, 0xd5, 0xca, 0xd4, 0xe9, 0xd7, 0x33, 0xd5, + 0x76, 0xd5, 0x2b, 0xd5, 0xd3, 0xd5, 0xf3, 0xd5, 0x07, 0xd6, 0x46, 0xd5, 0x87, 0xd6, 0xc0, 0xd5, + 0xa5, 0xd6, 0x02, 0xd6, 0x6c, 0xd6, 0xad, 0xd6, 0x55, 0xd6, 0xcc, 0xd5, 0xf2, 0xd5, 0x2f, 0xd7, + 0xa9, 0xd6, 0x88, 0xd5, 0x65, 0xd8, 0xd3, 0xd5, 0x43, 0xd6, 0x98, 0xd5, 0x83, 0xd6, 0xa7, 0xd6, + 0x99, 0xd6, 0x64, 0xd5, 0x41, 0xd7, 0x2c, 0xd6, 0x62, 0xd6, 0x34, 0xd5, 0x8e, 0xd5, 0x9f, 0xd5, + 0xa9, 0xd5, 0x16, 0xd5, 0x53, 0xd5, 0x12, 0xd6, 0xc2, 0xd5, 0xed, 0xd4, 0x31, 0xd7, 0x3f, 0xd5, + 0xb4, 0xd5, 0x37, 0xd5, 0xbd, 0xd5, 0x1e, 0xd6, 0x03, 0xd6, 0xed, 0xd4, 0x5c, 0xd6, 0xa9, 0xd5, + 0x99, 0xd5, 0xb8, 0xd4, 0xa8, 0xd4, 0x95, 0xd4, 0xa6, 0xd4, 0x6f, 0xd4, 0x55, 0xd5, 0x6c, 0xd5, + 0x0e, 0xd5, 0x2e, 0xd4, 0x27, 0xd6, 0xca, 0xd4, 0xd9, 0xd4, 0x48, 0xd4, 0x12, 0xd5, 0x9d, 0xd5, + 0x02, 0xd5, 0x73, 0xd4, 0xd6, 0xd5, 0x8f, 0xd5, 0xf3, 0xd6, 0x9c, 0xd5, 0xd1, 0xd5, 0xb4, 0xd5, + 0x30, 0xd6, 0x73, 0xd5, 0x80, 0xd6, 0xc8, 0xd6, 0x4d, 0xd6, 0x49, 0xd5, 0xc7, 0xd7, 0x25, 0xd6, + 0xec, 0xd5, 0x40, 0xd5, 0x17, 0xd6, 0x7f, 0xd6, 0x20, 0xd6, 0x72, 0xd5, 0x03, 0xd7, 0x4d, 0xd6, + 0x8f, 0xd6, 0xae, 0xd5, 0x6e, 0xd5, 0x94, 0xd5, 0xa2, 0xd5, 0xad, 0xd5, 0x3e, 0xd6, 0x94, 0xd6, + 0x3f, 0xd6, 0xe5, 0xd4, 0x64, 0xd7, 0x97, 0xd5, 0xf9, 0xd5, 0x34, 0xd5, 0x0f, 0xd6, 0xb9, 0xd6, + 0x61, 0xd6, 0x1e, 0xd5, 0xd3, 0xd6, 0x94, 0xd6, 0x77, 0xd5, 0x94, 0xd4, 0xb3, 0xd4, 0xd8, 0xd4, + 0xd6, 0xd4, 0x9b, 0xd4, 0x57, 0xd5, 0x91, 0xd5, 0x4c, 0xd5, 0x06, 0xd4, 0x7d, 0xd6, 0x65, 0xd4, + 0x1a, 0xd5, 0x54, 0xd4, 0x24, 0xd5, 0xa1, 0xd5, 0x74, 0xd5, 0x75, 0xd4, 0xdf, 0xd5, 0x33, 0xd5, + 0x9c, 0xd5, 0xed, 0xd4, 0x11, 0xd5, 0x6f, 0xd5, 0x1e, 0xd5, 0x5f, 0xd4, 0xcf, 0xd4, 0x2f, 0xd5, + 0x52, 0xd5, 0x4b, 0xd4, 0xf6, 0xd6, 0x43, 0xd4, 0x55, 0xd5, 0xc8, 0xd4, 0x7f, 0xd5, 0x05, 0xd6, + 0x75, 0xd5, 0x8f, 0xd4, 0xef, 0xd5, 0xe4, 0xd4, 0x98, 0xd7, 0x38, 0xd6, 0xeb, 0xd5, 0x41, 0xd6, + 0xbb, 0xd6, 0x42, 0xd6, 0x5a, 0xd7, 0x59, 0xd7, 0x3d, 0xd7, 0x4f, 0xd5, 0x19, 0xd8, 0x4a, 0xd6, + 0x76, 0xd6, 0xb2, 0xd5, 0xd4, 0xd6, 0xc4, 0xd7, 0xc9, 0xd6, 0xa0, 0xd5, 0xb0, 0xd7, 0x1f, 0xd7, + 0xd4, 0xd7, 0xe1, 0xd6, 0x1e, 0xd7, 0x4d, 0xd7, 0x8b, 0xd7, 0x16, 0xd7, 0x8f, 0xd7, 0xd0, 0xd7, + 0x50, 0xd7, 0x54, 0xd6, 0xbf, 0xd8, 0x7a, 0xd6, 0x2c, 0xd7, 0xa1, 0xd6, 0x8e, 0xd7, 0x44, 0xd8, + 0xa5, 0xd7, 0xda, 0xd6, 0x46, 0xd8, 0xec, 0xd7, 0xef, 0xd5, 0x32, 0xd5, 0x06, 0xd5, 0x65, 0xd5, + 0xfb, 0xd4, 0x4b, 0xd4, 0x38, 0xd5, 0xbe, 0xd5, 0x60, 0xd5, 0x1b, 0xd5, 0x06, 0xd7, 0xf7, 0xd4, + 0x78, 0xd5, 0x78, 0xd4, 0x3f, 0xd5, 0x1e, 0xd6, 0x61, 0xd5, 0x9f, 0xd4, 0x17, 0xd6, 0xc0, 0xd5, + 0xf7, 0xd7, 0x73, 0xd6, 0x5e, 0xd6, 0x71, 0xd6, 0xaf, 0xd6, 0x35, 0xd6, 0xf0, 0xd6, 0xb3, 0xd7, + 0x5d, 0xd7, 0xa6, 0xd5, 0x3a, 0xd8, 0xe4, 0xd6, 0xb5, 0xd6, 0xf5, 0xd5, 0xe0, 0xd6, 0xaf, 0xd7, + 0xe9, 0xd6, 0xaf, 0xd5, 0xe5, 0xd7, 0x43, 0xd7, 0xac, 0xd6, 0xd4, 0xd5, 0x98, 0xd5, 0xdc, 0xd5, + 0xc5, 0xd5, 0x33, 0xd5, 0x3f, 0xd6, 0x98, 0xd6, 0x10, 0xd6, 0x94, 0xd5, 0x01, 0xd8, 0xbb, 0xd5, + 0x06, 0xd6, 0xef, 0xd4, 0x17, 0xd6, 0x04, 0xd7, 0x4c, 0xd6, 0x9b, 0xd5, 0x0e, 0xd7, 0xb1, 0xd6, + 0xeb, 0xd6, 0x64, 0xd6, 0x64, 0xd6, 0x35, 0xd6, 0xcf, 0xd5, 0x46, 0xd5, 0xe5, 0xd5, 0x7f, 0xd6, + 0x57, 0xd6, 0xc7, 0xd5, 0x4f, 0xd8, 0x04, 0xd6, 0x50, 0xd6, 0xb0, 0xd5, 0x99, 0xd6, 0xa7, 0xd6, + 0xac, 0xd6, 0xa9, 0xd5, 0x40, 0xd7, 0xa3, 0xd6, 0x05, 0xd6, 0x79, 0xd5, 0x59, 0xd5, 0x46, 0xd5, + 0x5d, 0xd5, 0x2e, 0xd5, 0xf2, 0xd5, 0x33, 0xd6, 0xc1, 0xd5, 0xd8, 0xd4, 0x9a, 0xd7, 0x0d, 0xd5, + 0x5c, 0xd5, 0x07, 0xd5, 0x6e, 0xd5, 0xf9, 0xd5, 0x01, 0xd6, 0x9b, 0xd4, 0xfe, 0xd5, 0xe3, 0xd5, + 0xa0, 0xd7, 0x88, 0xd6, 0xc6, 0xd6, 0xd8, 0xd6, 0xe0, 0xd6, 0x26, 0xd6, 0xb9, 0xd6, 0x8f, 0xd7, + 0xf0, 0xd6, 0x11, 0xd6, 0x7f, 0xd8, 0xa5, 0xd6, 0xb0, 0xd6, 0xe6, 0xd5, 0xfb, 0xd6, 0xe1, 0xd7, + 0x08, 0xd7, 0x65, 0xd6, 0x0b, 0xd8, 0x2e, 0xd7, 0xf1, 0xd5, 0x03, 0xd5, 0x1d, 0xd5, 0xa7, 0xd5, + 0x38, 0xd5, 0xa6, 0xd4, 0x97, 0xd4, 0xc7, 0xd5, 0x30, 0xd5, 0xf1, 0xd4, 0xc1, 0xd6, 0xc5, 0xd4, + 0x80, 0xd5, 0xf7, 0xd4, 0x5c, 0xd5, 0x10, 0xd6, 0xd4, 0xd5, 0x78, 0xd4, 0x80, 0xd5, 0x0a, 0xd5, + 0x9a, 0xd7, 0x08, 0xd6, 0x8e, 0xd6, 0xb4, 0xd6, 0x06, 0xd7, 0x03, 0xd6, 0x4f, 0xd6, 0x41, 0xd7, + 0x3e, 0xd7, 0x8f, 0xd5, 0x5b, 0xd8, 0x2d, 0xd6, 0xc2, 0xd6, 0x1e, 0xd6, 0xa4, 0xd6, 0x69, 0xd7, + 0xfd, 0xd6, 0xc9, 0xd5, 0xb1, 0xd7, 0x58, 0xd6, 0x51, 0xd7, 0x1a, 0xd6, 0xc8, 0xd6, 0x96, 0xd6, + 0xeb, 0xd6, 0x10, 0xd6, 0x79, 0xd6, 0x08, 0xd7, 0xcb, 0xd6, 0x71, 0xd5, 0x69, 0xd8, 0x3b, 0xd6, + 0xa5, 0xd6, 0x0c, 0xd6, 0xfa, 0xd6, 0x62, 0xd7, 0x0f, 0xd7, 0x85, 0xd6, 0x00, 0xd8, 0x72, 0xd6, + 0x2e, 0xd7, 0x6c, 0xd6, 0x11, 0xd6, 0x2c, 0xd6, 0x22, 0xd6, 0x8b, 0xd5, 0xbc, 0xd6, 0x8d, 0xd6, + 0xc3, 0xd6, 0xa1, 0xd5, 0x0b, 0xd8, 0x00, 0xd6, 0x6b, 0xd6, 0x95, 0xd5, 0xaa, 0xd6, 0x63, 0xd7, + 0xa4, 0xd6, 0xe2, 0xd5, 0x78, 0xd7, 0xe0, 0xd6, 0x94, 0xd6, 0xbd, 0xd5, 0xad, 0xd5, 0xbe, 0xd5, + 0xfb, 0xd5, 0x66, 0xd5, 0x0a, 0xd6, 0x58, 0xd6, 0x0e, 0xd6, 0x2f, 0xd5, 0xea, 0xd7, 0x9d, 0xd5, + 0xfc, 0xd5, 0x6c, 0xd5, 0x11, 0xd6, 0xae, 0xd6, 0x71, 0xd6, 0x7b, 0xd5, 0xac, 0xd6, 0x1e, 0xd6, + 0x28, 0xd6, 0x58, 0xd5, 0x94, 0xd5, 0x01, 0xd6, 0x71, 0xd5, 0xab, 0xd4, 0x5c, 0xd5, 0x4c, 0xd6, + 0x14, 0xd6, 0x09, 0xd5, 0xba, 0xd7, 0x51, 0xd5, 0xc6, 0xd5, 0x94, 0xd4, 0x06, 0xd6, 0x55, 0xd6, + 0xc8, 0xd5, 0x30, 0xd5, 0xfc, 0xd6, 0xb1, 0xd5}; +unsigned char gemm_fp16_bias[] = { + 0xed, 0x41, 0x5e, 0xc5, 0x2b, 0xc3, 0xbb, 0xc5, 0xc6, 0xbf, 0x8e, 0xc2, 0xf5, 0x3d, 0x0f, 0xb9, + 0x9e, 0x43, 0x08, 0x3d, 0xc9, 0x31, 0x12, 0x34, 0xdd, 0xc0, 0x34, 0xb6, 0xf8, 0xbb, 0x39, 0x47, + 0xa7, 0x44, 0x0c, 0x3f, 0x6f, 0xca, 0x8c, 0x3f, 0x83, 0xbc, 0x86, 0x43, 0x6f, 0x31, 0xec, 0x48, + 0x08, 0xc1, 0xf1, 0x47, 0xd1, 0xc1, 0xe8, 0xc4, 0xb5, 0xc8, 0x61, 0xc0, 0x44, 0xa0}; + +unsigned char gemm_int8_a[] = {}; +unsigned char gemm_int8_a1[] = {}; +unsigned char gemm_int8_b[] = {}; +unsigned char gemm_int8_b1[] = {}; +unsigned char gemm_int8_c[] = {}; +unsigned char gemm_int32_bias[] = {}; diff --git a/tests/unit_test/valid_data/maxpool.dat b/tests/unit_test/valid_data/maxpool.dat new file mode 100644 index 00000000..65ba1d27 --- /dev/null +++ b/tests/unit_test/valid_data/maxpool.dat @@ -0,0 +1,701 @@ +/************************************************************************** + * NCHW layout + **************************************************************************/ +// [c h w]: [2 6 18] +unsigned char maxpool2x2s2_fp32_in[] = { + 0xfe, 0xbe, 0xd7, 0xbf, 0xb0, 0x08, 0x93, 0x3e, 0xc8, 0x24, 0x19, 0xc0, 0xf0, 0x37, 0x7a, 0xbf, + 0x83, 0x4a, 0xe0, 0xbf, 0xc2, 0x2f, 0xc3, 0xbf, 0x86, 0xda, 0x35, 0xc0, 0xdc, 0x31, 0x35, 0xc0, + 0xe9, 0xba, 0xd0, 0xbf, 0xb1, 0x63, 0x22, 0xc0, 0x3c, 0xd1, 0x59, 0xc0, 0x11, 0x80, 0x22, 0xc0, + 0xa1, 0x82, 0xd8, 0xbf, 0xef, 0x20, 0x0b, 0xbf, 0x2f, 0x20, 0xdb, 0xbf, 0x9e, 0x08, 0x4f, 0xc0, + 0xe7, 0x61, 0x6c, 0xbf, 0x72, 0x44, 0x03, 0xc0, 0x70, 0x12, 0xd0, 0xbf, 0xf5, 0x4b, 0xba, 0xbf, + 0x05, 0x45, 0x40, 0xc0, 0xf8, 0xb7, 0x17, 0xc0, 0xe7, 0xd4, 0xa3, 0xbf, 0x8c, 0x00, 0x74, 0xbe, + 0xdb, 0x62, 0xb6, 0xbf, 0xd9, 0x40, 0x11, 0xc0, 0xe4, 0x86, 0xd5, 0xbf, 0xe8, 0xdb, 0x4b, 0xc0, + 0xa3, 0xac, 0x6f, 0xbf, 0x2d, 0x82, 0x83, 0xbf, 0xcf, 0x12, 0x6f, 0xc0, 0xba, 0x8a, 0x2f, 0xc0, + 0x8f, 0x4f, 0x12, 0xc0, 0x33, 0x5f, 0x45, 0xc0, 0x54, 0x84, 0x01, 0xc0, 0x55, 0xc9, 0xef, 0xbf, + 0xad, 0xf1, 0xae, 0xbf, 0x30, 0xb9, 0x6a, 0xc0, 0x8f, 0xd9, 0x0d, 0xc0, 0xfd, 0x40, 0xfa, 0xbf, + 0x05, 0xd9, 0x96, 0xbf, 0xab, 0xb4, 0x2b, 0xc0, 0xd6, 0x12, 0xcf, 0xbf, 0x5e, 0x78, 0x2e, 0xc0, + 0x1e, 0x06, 0x99, 0xbf, 0x71, 0xbf, 0xcb, 0xbf, 0x0f, 0x53, 0x4a, 0xc0, 0xe2, 0xb5, 0x21, 0xc0, + 0x9d, 0x3b, 0xe0, 0xbf, 0x0f, 0xf3, 0x63, 0xbf, 0x5c, 0x17, 0x4c, 0xbf, 0xa6, 0x54, 0x6e, 0xc0, + 0xc0, 0xf1, 0x0c, 0xc0, 0x87, 0xe5, 0xab, 0xbf, 0x23, 0xaf, 0xbb, 0xbf, 0x34, 0x57, 0x51, 0xbf, + 0xd5, 0x36, 0x2d, 0xc0, 0xb3, 0x70, 0x31, 0xc0, 0xd8, 0xb7, 0x1c, 0xc0, 0x0a, 0x7e, 0x6c, 0xbf, + 0x3c, 0x83, 0x04, 0xc0, 0x22, 0xec, 0xc7, 0xbf, 0x3d, 0x9f, 0x6a, 0xc0, 0xd7, 0x3d, 0x52, 0xc0, + 0xb1, 0x3b, 0x8c, 0xbf, 0x64, 0xce, 0xa2, 0xbf, 0x16, 0x2a, 0x51, 0xc0, 0x3f, 0x27, 0xf0, 0xbf, + 0x9f, 0x2a, 0x68, 0xbf, 0x1f, 0x1a, 0x14, 0xc0, 0x10, 0xed, 0x50, 0x3e, 0x00, 0xab, 0x1f, 0xc0, + 0x40, 0x64, 0xc7, 0xbf, 0xc5, 0xa7, 0xb8, 0xbf, 0x60, 0xdb, 0x6e, 0xbd, 0x9c, 0xa6, 0x46, 0xc0, + 0x17, 0x46, 0x0b, 0xc0, 0x22, 0x8d, 0xc0, 0xbe, 0x62, 0x8c, 0x40, 0xc0, 0xfa, 0x45, 0x0f, 0xc0, + 0x75, 0x97, 0xbe, 0xbf, 0x4e, 0xce, 0x3f, 0xc0, 0x2d, 0x1e, 0x2a, 0xc0, 0x0d, 0x4b, 0x12, 0xc0, + 0x54, 0xb8, 0x2a, 0xc0, 0x17, 0x6d, 0x06, 0xc0, 0xaf, 0x43, 0xda, 0xbf, 0x98, 0x3b, 0xe3, 0xbf, + 0x2b, 0xa4, 0x40, 0xc0, 0x02, 0xe7, 0x7f, 0xbf, 0x46, 0xc2, 0x98, 0xbf, 0x93, 0xd9, 0x30, 0xc0, + 0x01, 0x5d, 0x00, 0xc0, 0xcc, 0xf3, 0x3c, 0xbf, 0x0e, 0x41, 0xd8, 0xbf, 0x00, 0x37, 0xa9, 0xbd, + 0xa0, 0x14, 0xb5, 0xbf, 0xcc, 0x0a, 0xa2, 0xbf, 0xba, 0x2d, 0x31, 0xc0, 0xd5, 0x05, 0xef, 0xbf, + 0xf1, 0x84, 0x09, 0xc0, 0xc9, 0xf7, 0x2e, 0xc0, 0x71, 0xc7, 0x02, 0xc0, 0x47, 0x48, 0x50, 0xc0, + 0xae, 0x60, 0x13, 0xc0, 0x51, 0x0f, 0xf1, 0xbf, 0x29, 0xe4, 0x0f, 0xc0, 0x4a, 0x21, 0xfb, 0xbf, + 0xc3, 0x1c, 0x2a, 0xc0, 0xf1, 0x4e, 0x0a, 0x3f, 0x1a, 0x60, 0x11, 0xc0, 0x37, 0x9b, 0x13, 0xc0, + 0x58, 0x46, 0x20, 0xc0, 0xb3, 0xe6, 0x31, 0xbf, 0x40, 0x84, 0xc4, 0xbf, 0xf8, 0x15, 0x40, 0xc0, + 0xcc, 0x3f, 0x16, 0xc0, 0x35, 0x08, 0x30, 0xc0, 0xaf, 0x10, 0x05, 0xc0, 0xe3, 0x39, 0x3e, 0xc0, + 0x83, 0xcf, 0x91, 0xbf, 0xa9, 0x0c, 0x25, 0xc0, 0xdc, 0xc9, 0x21, 0xc0, 0x4b, 0xaa, 0xef, 0xbf, + 0xc6, 0x35, 0xe2, 0xbf, 0x3b, 0x49, 0xf9, 0xbf, 0x32, 0x36, 0x9a, 0xbf, 0x4d, 0x79, 0x26, 0xbf, + 0x1a, 0xda, 0x46, 0xc0, 0x8f, 0xec, 0x2d, 0xc0, 0xd7, 0xb1, 0x58, 0xc0, 0x0b, 0xe5, 0x04, 0xc0, + 0x89, 0x77, 0x62, 0xc0, 0x02, 0xa9, 0x4b, 0xbf, 0xa6, 0x46, 0x22, 0xc0, 0xcf, 0x0b, 0xf7, 0x3e, + 0xd6, 0xa6, 0x52, 0xc0, 0x35, 0x57, 0x99, 0xbf, 0xac, 0xd7, 0xee, 0xbf, 0x62, 0xcc, 0x6f, 0xc0, + 0x4f, 0x04, 0x42, 0xc0, 0xae, 0xf4, 0xca, 0xbf, 0x37, 0x5c, 0x34, 0xc0, 0x5a, 0x8d, 0x6e, 0xc0, + 0xaf, 0x6f, 0xbb, 0xbf, 0x3b, 0x0d, 0x3f, 0xc0, 0x5c, 0x42, 0xa4, 0xbf, 0x71, 0xcf, 0xff, 0xbf, + 0x30, 0x10, 0x19, 0xc0, 0x83, 0x70, 0xb1, 0xbf, 0xe0, 0x2d, 0xd1, 0xbf, 0xd3, 0x93, 0x43, 0xc0, + 0x95, 0x49, 0xbd, 0xbf, 0x20, 0x9a, 0x14, 0xc0, 0x5f, 0x64, 0x35, 0xc0, 0xde, 0x1f, 0xeb, 0xbf, + 0xf6, 0xab, 0x2a, 0xc0, 0xed, 0xc0, 0xaf, 0xbf, 0x1e, 0xf6, 0x4a, 0xc0, 0x16, 0xbe, 0x83, 0xbf, + 0x0c, 0x20, 0x5e, 0xc0, 0xb1, 0x7a, 0xef, 0xbf, 0x1b, 0xe2, 0x1f, 0xc0, 0xbc, 0x8f, 0x1e, 0xc0, + 0x42, 0x4c, 0xb7, 0xbf, 0xd5, 0x1a, 0x06, 0xbf, 0x34, 0xfb, 0xdd, 0xbf, 0x08, 0xa8, 0xc3, 0xbf, + 0x15, 0xbb, 0x20, 0xc0, 0xa5, 0x6d, 0x3d, 0xc0, 0xc5, 0x75, 0x21, 0xc0, 0x1a, 0xbe, 0x5b, 0xbd, + 0xb9, 0x61, 0x38, 0xc0, 0xd4, 0x01, 0x27, 0xc0, 0x2a, 0x9e, 0xe0, 0xbf, 0x2b, 0xef, 0x13, 0xc0, + 0xbf, 0xe1, 0x12, 0xc0, 0x76, 0xcc, 0x83, 0xc0, 0xd5, 0x53, 0xd3, 0xbe, 0x2e, 0x99, 0x37, 0xc0, + 0x6a, 0xca, 0xb4, 0xbf, 0x61, 0x79, 0xd3, 0xbf, 0x63, 0x3f, 0x50, 0xbf, 0xa4, 0xc9, 0xa5, 0xbf, + 0x54, 0xa3, 0x0e, 0xc0, 0x0b, 0xa9, 0xf2, 0xbf, 0x95, 0x91, 0x01, 0xbf, 0x9e, 0x79, 0x3c, 0xc0, + 0xc7, 0x0f, 0xd9, 0xbf, 0x4f, 0x1a, 0x97, 0xbf, 0xae, 0x2a, 0xdf, 0xbf, 0x92, 0x97, 0xd6, 0xbf, + 0xb6, 0xce, 0x29, 0xc0, 0x14, 0x23, 0xcf, 0xbf, 0x05, 0xca, 0x0b, 0xc0, 0x73, 0xe1, 0x0a, 0xc0, + 0xd7, 0x10, 0x8b, 0xbf, 0x29, 0x1a, 0xac, 0xbe, 0x92, 0x31, 0xbf, 0xbf, 0xfc, 0x35, 0x52, 0xc0, + 0x50, 0x89, 0x1e, 0xc0, 0xc5, 0x61, 0x26, 0xbf, 0x78, 0xca, 0x2c, 0xc0, 0x47, 0xcc, 0xa0, 0xbf, + 0x63, 0x5f, 0x04, 0xc0, 0x38, 0x87, 0x67, 0xc0, 0xc3, 0xc7, 0x0f, 0xbf, 0x64, 0x54, 0xc4, 0xbf, + 0x81, 0x47, 0x0b, 0xc0, 0x0c, 0xb9, 0x85, 0xbf, 0x4d, 0xb9, 0x25, 0xc0, 0x3c, 0xb6, 0xd7, 0xbf, + 0xba, 0xc0, 0xd9, 0xbf, 0xb0, 0x9b, 0xb8, 0xbe, 0x8f, 0x43, 0xbf, 0xbf, 0x83, 0x3e, 0x2c, 0xc0}; +// [c h w]: [2 3 9] +unsigned char maxpool2x2s2_fp32_out[] = { + 0xb0, 0x08, 0x93, 0x3e, 0xf0, 0x37, 0x7a, 0xbf, 0x8c, 0x00, 0x74, 0xbe, 0xdb, 0x62, 0xb6, 0xbf, + 0xe9, 0xba, 0xd0, 0xbf, 0xa3, 0xac, 0x6f, 0xbf, 0xef, 0x20, 0x0b, 0xbf, 0x2f, 0x20, 0xdb, 0xbf, + 0xe7, 0x61, 0x6c, 0xbf, 0x34, 0x57, 0x51, 0xbf, 0xfd, 0x40, 0xfa, 0xbf, 0x0a, 0x7e, 0x6c, 0xbf, + 0x22, 0xec, 0xc7, 0xbf, 0x1e, 0x06, 0x99, 0xbf, 0xb1, 0x3b, 0x8c, 0xbf, 0x0f, 0xf3, 0x63, 0xbf, + 0x5c, 0x17, 0x4c, 0xbf, 0x10, 0xed, 0x50, 0x3e, 0x46, 0xc2, 0x98, 0xbf, 0x60, 0xdb, 0x6e, 0xbd, + 0x00, 0x37, 0xa9, 0xbd, 0xcc, 0x0a, 0xa2, 0xbf, 0x75, 0x97, 0xbe, 0xbf, 0xf1, 0x84, 0x09, 0xc0, + 0x71, 0xc7, 0x02, 0xc0, 0xaf, 0x43, 0xda, 0xbf, 0x02, 0xe7, 0x7f, 0xbf, 0xf1, 0x4e, 0x0a, 0x3f, + 0x1a, 0x60, 0x11, 0xc0, 0xb3, 0xe6, 0x31, 0xbf, 0x02, 0xa9, 0x4b, 0xbf, 0xcf, 0x0b, 0xf7, 0x3e, + 0x35, 0x57, 0x99, 0xbf, 0x83, 0xcf, 0x91, 0xbf, 0xae, 0xf4, 0xca, 0xbf, 0xc6, 0x35, 0xe2, 0xbf, + 0xaf, 0x6f, 0xbb, 0xbf, 0xd5, 0x1a, 0x06, 0xbf, 0x83, 0x70, 0xb1, 0xbf, 0xe0, 0x2d, 0xd1, 0xbf, + 0x1a, 0xbe, 0x5b, 0xbd, 0xde, 0x1f, 0xeb, 0xbf, 0xed, 0xc0, 0xaf, 0xbf, 0x16, 0xbe, 0x83, 0xbf, + 0xd5, 0x53, 0xd3, 0xbe, 0x6a, 0xca, 0xb4, 0xbf, 0xc5, 0x61, 0x26, 0xbf, 0x47, 0xcc, 0xa0, 0xbf, + 0x95, 0x91, 0x01, 0xbf, 0xc3, 0xc7, 0x0f, 0xbf, 0x0c, 0xb9, 0x85, 0xbf, 0x14, 0x23, 0xcf, 0xbf, + 0xb0, 0x9b, 0xb8, 0xbe, 0x29, 0x1a, 0xac, 0xbe}; +unsigned char maxpool2x2s2_fp16_in[] = { + 0xbd, 0xbe, 0x98, 0x34, 0xc9, 0xc0, 0xd1, 0xbb, 0x02, 0xbf, 0x19, 0xbe, 0xae, 0xc1, 0xa9, 0xc1, + 0x85, 0xbe, 0x13, 0xc1, 0xce, 0xc2, 0x14, 0xc1, 0xc4, 0xbe, 0x59, 0xb8, 0xd9, 0xbe, 0x78, 0xc2, + 0x63, 0xbb, 0x1a, 0xc0, 0x80, 0xbe, 0xd2, 0xbd, 0x02, 0xc2, 0xbd, 0xc0, 0x1e, 0xbd, 0xa0, 0xb3, + 0xb3, 0xbd, 0x8a, 0xc0, 0xac, 0xbe, 0x5e, 0xc2, 0x7d, 0xbb, 0x1c, 0xbc, 0x78, 0xc3, 0x7c, 0xc1, + 0x92, 0xc0, 0x2a, 0xc2, 0x0c, 0xc0, 0x7e, 0xbf, 0x77, 0xbd, 0x55, 0xc3, 0x6e, 0xc0, 0xd2, 0xbf, + 0xb6, 0xbc, 0x5d, 0xc1, 0x78, 0xbe, 0x73, 0xc1, 0xc8, 0xbc, 0x5d, 0xbe, 0x52, 0xc2, 0x0d, 0xc1, + 0x01, 0xbf, 0x1f, 0xbb, 0x60, 0xba, 0x72, 0xc3, 0x67, 0xc0, 0x5f, 0xbd, 0xdd, 0xbd, 0x8a, 0xba, + 0x69, 0xc1, 0x8b, 0xc1, 0xe5, 0xc0, 0x63, 0xbb, 0x24, 0xc0, 0x3f, 0xbe, 0x54, 0xc3, 0x91, 0xc2, + 0x61, 0xbc, 0x16, 0xbd, 0x89, 0xc2, 0x81, 0xbf, 0x41, 0xbb, 0xa0, 0xc0, 0x87, 0x32, 0xfd, 0xc0, + 0x3b, 0xbe, 0xc5, 0xbd, 0x76, 0xab, 0x35, 0xc2, 0x5a, 0xc0, 0x04, 0xb6, 0x04, 0xc2, 0x7a, 0xc0, + 0xf4, 0xbd, 0xfe, 0xc1, 0x50, 0xc1, 0x92, 0xc0, 0x55, 0xc1, 0x33, 0xc0, 0xd2, 0xbe, 0x19, 0xbf, + 0x05, 0xc2, 0xff, 0xbb, 0xc6, 0xbc, 0x86, 0xc1, 0x02, 0xc0, 0xe7, 0xb9, 0xc2, 0xbe, 0x49, 0xad, + 0xa8, 0xbd, 0x10, 0xbd, 0x89, 0xc1, 0x78, 0xbf, 0x4c, 0xc0, 0x77, 0xc1, 0x16, 0xc0, 0x82, 0xc2, + 0x9b, 0xc0, 0x88, 0xbf, 0x7f, 0xc0, 0xd9, 0xbf, 0x50, 0xc1, 0x52, 0x38, 0x8b, 0xc0, 0x9c, 0xc0, + 0x02, 0xc1, 0x8f, 0xb9, 0x24, 0xbe, 0x00, 0xc2, 0xb1, 0xc0, 0x80, 0xc1, 0x28, 0xc0, 0xf1, 0xc1, + 0x8e, 0xbc, 0x28, 0xc1, 0x0e, 0xc1, 0x7d, 0xbf, 0x11, 0xbf, 0xca, 0xbf, 0xd1, 0xbc, 0x33, 0xb9, + 0x36, 0xc2, 0x6f, 0xc1, 0xc5, 0xc2, 0x27, 0xc0, 0x13, 0xc3, 0x5d, 0xba, 0x12, 0xc1, 0xb8, 0x37, + 0x95, 0xc2, 0xca, 0xbc, 0x76, 0xbf, 0x7e, 0xc3, 0x10, 0xc2, 0x57, 0xbe, 0xa2, 0xc1, 0x74, 0xc3, + 0xdb, 0xbd, 0xf8, 0xc1, 0x22, 0xbd, 0xfe, 0xbf, 0xc8, 0xc0, 0x8b, 0xbd, 0x89, 0xbe, 0x1c, 0xc2, + 0xea, 0xbd, 0xa4, 0xc0, 0xab, 0xc1, 0x58, 0xbf, 0x55, 0xc1, 0x7e, 0xbd, 0x57, 0xc2, 0x1d, 0xbc, + 0xf1, 0xc2, 0x7b, 0xbf, 0xff, 0xc0, 0xf4, 0xc0, 0xba, 0xbd, 0x30, 0xb8, 0xef, 0xbe, 0x1d, 0xbe, + 0x05, 0xc1, 0xeb, 0xc1, 0x0b, 0xc1, 0xdd, 0xaa, 0xc3, 0xc1, 0x38, 0xc1, 0x04, 0xbf, 0x9f, 0xc0, + 0x97, 0xc0, 0x1e, 0xc4, 0x9a, 0xb6, 0xbc, 0xc1, 0xa6, 0xbd, 0x9b, 0xbe, 0x81, 0xba, 0x2e, 0xbd, + 0x75, 0xc0, 0x95, 0xbf, 0x0c, 0xb8, 0xe3, 0xc1, 0xc8, 0xbe, 0xb8, 0xbc, 0xf9, 0xbe, 0xb4, 0xbe, + 0x4e, 0xc1, 0x79, 0xbe, 0x5e, 0xc0, 0x57, 0xc0, 0x58, 0xbc, 0x60, 0xb5, 0xf9, 0xbd, 0x91, 0xc2, + 0xf4, 0xc0, 0x33, 0xb9, 0x66, 0xc1, 0x06, 0xbd, 0x22, 0xc0, 0x3c, 0xc3, 0x7e, 0xb8, 0x22, 0xbe, + 0x5a, 0xc0, 0x2d, 0xbc, 0x2d, 0xc1, 0xbd, 0xbe, 0xce, 0xbe, 0xc4, 0xb5, 0xfa, 0xbd, 0x61, 0xc1}; +unsigned char maxpool2x2s2_fp16_out[] = { + 0x98, 0x34, 0xd1, 0xbb, 0xa0, 0xb3, 0xb3, 0xbd, 0x85, 0xbe, 0x7d, 0xbb, 0x59, 0xb8, 0xd9, 0xbe, + 0x63, 0xbb, 0x8a, 0xba, 0xd2, 0xbf, 0x63, 0xbb, 0x3f, 0xbe, 0xc8, 0xbc, 0x61, 0xbc, 0x1f, 0xbb, + 0x60, 0xba, 0x87, 0x32, 0xc6, 0xbc, 0x76, 0xab, 0x49, 0xad, 0x10, 0xbd, 0xf4, 0xbd, 0x4c, 0xc0, + 0x16, 0xc0, 0xd2, 0xbe, 0xff, 0xbb, 0x52, 0x38, 0x8b, 0xc0, 0x8f, 0xb9, 0x5d, 0xba, 0xb8, 0x37, + 0xca, 0xbc, 0x8e, 0xbc, 0x57, 0xbe, 0x11, 0xbf, 0xdb, 0xbd, 0x30, 0xb8, 0x8b, 0xbd, 0x89, 0xbe, + 0xdd, 0xaa, 0x58, 0xbf, 0x7e, 0xbd, 0x1d, 0xbc, 0x9a, 0xb6, 0xa6, 0xbd, 0x33, 0xb9, 0x06, 0xbd, + 0x0c, 0xb8, 0x7e, 0xb8, 0x2d, 0xbc, 0x79, 0xbe, 0xc4, 0xb5, 0x60, 0xb5}; +unsigned char maxpool2x2s2_int8_in[] = { + 0xcc, 0x09, 0xb6, 0xe2, 0xca, 0xd1, 0xa8, 0xa8, 0xce, 0xb1, 0x97, 0xb1, 0xcc, 0xef, 0xcb, 0x9c, + 0xe3, 0xc1, 0xce, 0xd3, 0xa3, 0xb7, 0xd8, 0xf9, 0xd4, 0xba, 0xcc, 0x9d, 0xe3, 0xe0, 0x8c, 0xab, + 0xb9, 0xa1, 0xc1, 0xc6, 0xd6, 0x8e, 0xbb, 0xc3, 0xdc, 0xad, 0xce, 0xac, 0xdb, 0xcf, 0x9e, 0xb2, + 0xca, 0xe4, 0xe7, 0x8d, 0xbc, 0xd6, 0xd3, 0xe7, 0xac, 0xaa, 0xb4, 0xe3, 0xc0, 0xd0, 0x8f, 0x9a, + 0xde, 0xd9, 0x9b, 0xc6, 0xe4, 0xb8, 0x06, 0xb3, 0xd0, 0xd3, 0xfe, 0xa0, 0xbd, 0xf4, 0xa3, 0xbb, + 0xd2, 0xa3, 0xae, 0xb9, 0xad, 0xbf, 0xcb, 0xc9, 0xa3, 0xe1, 0xdb, 0xaa, 0xc2, 0xe9, 0xcc, 0xfd, + 0xd4, 0xd9, 0xaa, 0xc6, 0xbd, 0xab, 0xc1, 0x9b, 0xb9, 0xc6, 0xba, 0xc3, 0xae, 0x11, 0xba, 0xb9, + 0xb2, 0xea, 0xd0, 0xa3, 0xb7, 0xab, 0xc0, 0xa4, 0xdd, 0xb0, 0xb2, 0xc6, 0xc9, 0xc4, 0xdb, 0xec, + 0xa0, 0xac, 0x97, 0xc0, 0x92, 0xe7, 0xb2, 0x0f, 0x9a, 0xdb, 0xc6, 0x8c, 0xa2, 0xcf, 0xa9, 0x8d, + 0xd3, 0xa4, 0xd8, 0xc2, 0xb6, 0xd5, 0xcd, 0xa1, 0xd2, 0xb8, 0xa8, 0xc7, 0xad, 0xd5, 0x9e, 0xe0, + 0x95, 0xc6, 0xb3, 0xb3, 0xd4, 0xf0, 0xca, 0xd1, 0xb2, 0xa4, 0xb2, 0xfe, 0xa7, 0xaf, 0xca, 0xb8, + 0xb9, 0x80, 0xf3, 0xa7, 0xd4, 0xcd, 0xe7, 0xd8, 0xbb, 0xc5, 0xf0, 0xa5, 0xcc, 0xdb, 0xca, 0xcc, + 0xae, 0xce, 0xbc, 0xbd, 0xde, 0xf6, 0xd2, 0x9a, 0xb3, 0xec, 0xac, 0xd9, 0xc0, 0x90, 0xef, 0xd1, + 0xbd, 0xe0, 0xb0, 0xcc, 0xcb, 0xf5, 0xd2, 0xad}; +unsigned char maxpool2x2s2_int8_out[] = { + 0x09, 0xe2, 0xf9, 0xd4, 0xce, 0xe3, 0xef, 0xcb, 0xe3, 0xe7, 0xc3, 0xe3, 0xd0, 0xdb, + 0xde, 0xe4, 0xe7, 0x06, 0xdb, 0xfe, 0xfd, 0xd9, 0xd2, 0xbd, 0xc1, 0xcb, 0xe1, 0x11, + 0xba, 0xea, 0xe7, 0x0f, 0xdb, 0xdd, 0xcf, 0xc9, 0xd3, 0xf0, 0xd5, 0xcd, 0xfe, 0xc7, + 0xd5, 0xe0, 0xf3, 0xd4, 0xec, 0xd9, 0xf0, 0xef, 0xe0, 0xce, 0xf5, 0xf6}; + +// [c h w]: [2 7 19] +unsigned char maxpool2x2s2_p1_fp32_in[] = { + 0x54, 0x95, 0x88, 0xc0, 0x5d, 0xed, 0xd0, 0xc0, 0x13, 0xf5, 0x05, 0xc1, 0xc9, 0xee, 0xcd, 0xc0, + 0x90, 0x32, 0xcb, 0xc0, 0x4a, 0x1b, 0xba, 0xc0, 0x8d, 0x94, 0xbc, 0xc0, 0x66, 0x21, 0x09, 0xc1, + 0x2a, 0x84, 0x0a, 0xc1, 0xb2, 0x5f, 0xcb, 0xc0, 0x6a, 0x2a, 0xea, 0xc0, 0x2a, 0x16, 0x87, 0xc0, + 0xca, 0x6d, 0x15, 0xc1, 0x1b, 0x67, 0xdd, 0xc0, 0x4b, 0x2b, 0x18, 0xc1, 0x48, 0xe3, 0x93, 0xc0, + 0x93, 0x92, 0x3d, 0xc1, 0x4a, 0xf9, 0xa2, 0xc0, 0x74, 0x68, 0x20, 0xc1, 0x18, 0xcc, 0x17, 0xc1, + 0x36, 0x9e, 0xa3, 0xc0, 0x72, 0xe7, 0x03, 0xc1, 0x00, 0x1e, 0xca, 0xc0, 0x19, 0xf6, 0xeb, 0xc0, + 0x56, 0x61, 0xf0, 0xc0, 0xbb, 0x51, 0x26, 0xc1, 0x78, 0xaf, 0x30, 0xc1, 0x8b, 0x03, 0xd0, 0xc0, + 0x8f, 0xbc, 0xcd, 0xc0, 0x34, 0xc7, 0x35, 0xc1, 0xaa, 0x35, 0x07, 0xc1, 0xa3, 0x1c, 0xe5, 0xc0, + 0x9c, 0xfe, 0xf6, 0xc0, 0x41, 0xa3, 0xf4, 0xc0, 0x17, 0x41, 0x39, 0xc1, 0x49, 0xc8, 0x08, 0xc1, + 0xb5, 0xca, 0xab, 0xc0, 0xc5, 0x5e, 0x0a, 0xc1, 0x9b, 0x30, 0xd2, 0xc0, 0x83, 0x47, 0x94, 0xc0, + 0xf9, 0x13, 0xea, 0xc0, 0xc5, 0xa8, 0x0b, 0xc1, 0x67, 0x90, 0xa1, 0xc0, 0x56, 0xa7, 0x1e, 0xc1, + 0x81, 0xc9, 0x04, 0xc1, 0x5a, 0x1a, 0xaf, 0xc0, 0x05, 0x1a, 0x07, 0xc1, 0xf9, 0x25, 0x0d, 0xc1, + 0x78, 0x54, 0xce, 0xc0, 0x1f, 0x1d, 0x13, 0xc1, 0x23, 0x58, 0xec, 0xc0, 0x52, 0xd3, 0x17, 0xc1, + 0x7f, 0xd7, 0x11, 0xc1, 0x0d, 0xbe, 0x0c, 0xc1, 0x15, 0xa0, 0xef, 0xc0, 0x55, 0x19, 0x72, 0xc0, + 0xa7, 0xb8, 0xa9, 0xc0, 0x37, 0x7e, 0xde, 0xc0, 0xe3, 0x9f, 0xd1, 0xc0, 0x16, 0x76, 0x35, 0xc1, + 0x13, 0x9f, 0xcc, 0xc0, 0xef, 0x3b, 0xec, 0xc0, 0xfd, 0xb9, 0xe5, 0xc0, 0x70, 0x45, 0xe1, 0xc0, + 0xb7, 0x68, 0x0f, 0xc1, 0x77, 0xbf, 0xf9, 0xc0, 0xda, 0xe1, 0x0d, 0xc1, 0xb2, 0x97, 0x28, 0xc1, + 0xbb, 0xb9, 0x1b, 0xc1, 0x62, 0x57, 0x37, 0xc1, 0xde, 0x19, 0x2b, 0xc1, 0xe2, 0xc9, 0xf2, 0xc0, + 0x69, 0x78, 0xbe, 0xc0, 0xd7, 0x00, 0x41, 0xc0, 0x20, 0x1d, 0x0f, 0xc1, 0x87, 0x16, 0x02, 0xc1, + 0xef, 0x4c, 0xd9, 0xc0, 0xc8, 0xb9, 0xfb, 0xc0, 0x29, 0xc7, 0x1d, 0xc1, 0x29, 0x82, 0x0b, 0xc1, + 0x1e, 0xa9, 0x15, 0xc1, 0x5c, 0xa1, 0x3d, 0xc1, 0x16, 0xdc, 0x04, 0xc1, 0x25, 0x0d, 0xcd, 0xc0, + 0xa5, 0x9e, 0xda, 0xc0, 0x5f, 0x3d, 0xc2, 0xc0, 0x3a, 0x55, 0xa5, 0xc0, 0x22, 0x6d, 0xc7, 0xc0, + 0x3c, 0xd5, 0x03, 0xc1, 0x8a, 0x90, 0x06, 0xc1, 0x08, 0x62, 0x7c, 0xc0, 0xbf, 0x43, 0x29, 0xc1, + 0x11, 0x06, 0x70, 0xc0, 0x74, 0x11, 0x1e, 0xc1, 0x37, 0xe0, 0x13, 0xc1, 0x9e, 0xe8, 0xdd, 0xc0, + 0x4c, 0xe1, 0xd6, 0xc0, 0x52, 0xf0, 0x02, 0xc1, 0xdd, 0xbb, 0xd5, 0xc0, 0xd0, 0xbc, 0x0c, 0xc1, + 0x9c, 0xde, 0xcb, 0xc0, 0x94, 0xd5, 0xf8, 0xc0, 0x39, 0x7d, 0xf8, 0xc0, 0x33, 0xc9, 0xdd, 0xc0, + 0x47, 0x82, 0x8c, 0xc0, 0x87, 0x4d, 0x28, 0xc1, 0xc3, 0xed, 0xd6, 0xc0, 0xd2, 0xad, 0xb4, 0xc0, + 0x00, 0x36, 0xd6, 0xc0, 0xf5, 0xaf, 0x32, 0xc1, 0xd2, 0xfd, 0xd6, 0xc0, 0x44, 0xc2, 0x10, 0xc1, + 0xff, 0xd5, 0x4e, 0xc0, 0x2c, 0x1a, 0xf6, 0xc0, 0xcb, 0x03, 0xe1, 0xc0, 0x7e, 0xf3, 0x1c, 0xc1, + 0xc0, 0x7a, 0x38, 0xc1, 0x3e, 0x21, 0x15, 0xc1, 0x1d, 0xe4, 0xf2, 0xc0, 0x65, 0x0e, 0x21, 0xc1, + 0xac, 0x03, 0xf3, 0xc0, 0x38, 0xfb, 0x25, 0xc1, 0xa8, 0x8a, 0x26, 0xc1, 0x9e, 0x6a, 0x0e, 0xc1, + 0x39, 0x4a, 0x02, 0xc1, 0xb3, 0x48, 0x07, 0xc1, 0x91, 0xe5, 0x26, 0xc1, 0x3e, 0x9d, 0x1f, 0xc1, + 0xb4, 0xac, 0xe0, 0xc0, 0xa7, 0x03, 0xfc, 0xc0, 0x43, 0x56, 0x81, 0xc0, 0x60, 0xaa, 0x22, 0xc1, + 0xa8, 0x83, 0x04, 0xc1, 0x04, 0x07, 0x07, 0xc1, 0x07, 0xdd, 0xc7, 0xc0, 0x56, 0x01, 0xf7, 0xc0, + 0x91, 0x8e, 0x31, 0xc1, 0x04, 0xf6, 0xd9, 0xc0, 0x33, 0x63, 0xbd, 0xc0, 0x14, 0xe2, 0x17, 0xc1, + 0x16, 0xb2, 0xf9, 0xc0, 0xe7, 0x04, 0x02, 0xc1, 0x76, 0xc3, 0x10, 0xc1, 0x17, 0xb9, 0xa1, 0xc0, + 0x6f, 0x69, 0x2f, 0xc1, 0x21, 0x92, 0x3b, 0xc1, 0x87, 0x77, 0xa1, 0xc0, 0x46, 0xfd, 0x0f, 0xc1, + 0x9a, 0x01, 0xe7, 0xc0, 0x70, 0xfd, 0x1f, 0xc1, 0xcb, 0xd5, 0xdb, 0xc0, 0xe6, 0xe6, 0x16, 0xc1, + 0x2d, 0x64, 0x11, 0xc1, 0x83, 0xe2, 0xd4, 0xc0, 0xa6, 0x90, 0xc5, 0xc0, 0xf9, 0x90, 0xdb, 0xc0, + 0x09, 0x6c, 0x12, 0xc1, 0x59, 0xa1, 0xfd, 0xc0, 0x76, 0x40, 0xd9, 0xc0, 0x9a, 0xfc, 0xd6, 0xc0, + 0xae, 0xd6, 0x0c, 0xc1, 0x67, 0x57, 0xc6, 0xc0, 0x73, 0xc4, 0xd2, 0xc0, 0x04, 0x70, 0xad, 0xc0, + 0x2e, 0x3d, 0x31, 0xc1, 0xbb, 0x4f, 0xdb, 0xc0, 0xf6, 0x7e, 0x84, 0xc0, 0x51, 0x0d, 0x1d, 0xc1, + 0xf9, 0xb2, 0xf3, 0xc0, 0xd4, 0xdb, 0x0b, 0xc1, 0xc9, 0x27, 0xbc, 0xc0, 0xc8, 0x59, 0x3a, 0xc1, + 0xa2, 0xc1, 0x40, 0xc1, 0xbe, 0xae, 0x08, 0xc1, 0xe3, 0xe0, 0x0b, 0xc1, 0xd6, 0x4d, 0xe5, 0xc0, + 0x61, 0xe7, 0xcd, 0xc0, 0xd3, 0x57, 0x19, 0xc1, 0x35, 0xcd, 0xc4, 0xc0, 0x58, 0x10, 0x18, 0xc1, + 0xb5, 0xb1, 0xf1, 0xc0, 0x9b, 0x94, 0x1e, 0xc1, 0x58, 0x9f, 0xe6, 0xc0, 0xb5, 0xc9, 0xec, 0xc0, + 0xb7, 0x51, 0x0f, 0xc1, 0xc4, 0x76, 0xcd, 0xc0, 0xa9, 0xad, 0xbd, 0xc0, 0xae, 0xad, 0x30, 0xc1, + 0xe5, 0xfc, 0x09, 0xc1, 0x5f, 0xf1, 0xfe, 0xc0, 0xd3, 0xd7, 0xcf, 0xc0, 0x8d, 0xd1, 0x0e, 0xc1, + 0xdd, 0x52, 0xc9, 0xc0, 0xf7, 0x97, 0xee, 0xc0, 0x55, 0xe1, 0x41, 0xc1, 0xb6, 0x16, 0xe0, 0xc0, + 0xc6, 0xb3, 0x1b, 0xc1, 0xb7, 0x49, 0xd1, 0xc0, 0x89, 0xb7, 0xea, 0xc0, 0xa1, 0x44, 0x30, 0xc1, + 0xeb, 0x73, 0x02, 0xc1, 0xd6, 0xe8, 0xf6, 0xc0, 0xaf, 0xd0, 0x18, 0xc0, 0x8e, 0xd6, 0xee, 0xc0, + 0x78, 0x6c, 0x0c, 0xc1, 0xb6, 0x1e, 0xbe, 0xc0, 0xcd, 0x25, 0xa7, 0xc0, 0xc7, 0xdb, 0xcf, 0xc0, + 0xa0, 0xcf, 0xa2, 0xc0, 0x5f, 0xfe, 0x20, 0xc1, 0x94, 0xb9, 0x09, 0xc1, 0xd9, 0x3f, 0x2d, 0xc1, + 0xc6, 0x65, 0x0b, 0xc1, 0x7e, 0x71, 0x22, 0xc1, 0x11, 0x42, 0x29, 0xc1, 0x6b, 0x62, 0xdb, 0xc0, + 0x44, 0x3f, 0x30, 0xc1, 0xd7, 0x54, 0x06, 0xc1, 0x91, 0x7c, 0x00, 0xc1, 0xf6, 0x97, 0xfd, 0xc0, + 0xdd, 0xc8, 0xdd, 0xc0, 0xc6, 0xa6, 0xeb, 0xc0, 0xae, 0x47, 0xdc, 0xc0, 0xc9, 0x00, 0x02, 0xc1, + 0xe9, 0x47, 0x32, 0xc1, 0xd8, 0xba, 0xf7, 0xc0, 0x73, 0x7f, 0x9b, 0xc0, 0x39, 0xab, 0x36, 0xc1, + 0x8d, 0x8f, 0x04, 0xc1, 0x82, 0x7c, 0x54, 0xc1, 0x3a, 0xae, 0xf2, 0xc0, 0x20, 0xb3, 0xe6, 0xc0, + 0xb7, 0x38, 0xed, 0xc0, 0x3a, 0xc8, 0xc2, 0xc0, 0x56, 0xe8, 0xf5, 0xc0, 0xe1, 0xd4, 0xf9, 0xc0, + 0x26, 0x9b, 0xe9, 0xc0, 0xca, 0x02, 0xc9, 0xc0, 0xce, 0xa0, 0x30, 0xc1, 0x39, 0x4e, 0x0c, 0xc1, + 0x41, 0x7b, 0x10, 0xc1, 0x40, 0x3f, 0x14, 0xc1, 0x98, 0x42, 0xe8, 0xc0, 0xb7, 0xa3, 0x10, 0xc1, + 0x88, 0xb3, 0x1b, 0xc1, 0xc2, 0x60, 0x0a, 0xc1, 0xa0, 0x76, 0x16, 0xc1, 0x9a, 0x0b, 0xa6, 0xc0, + 0x9d, 0x2a, 0x03, 0xc1, 0x7d, 0x45, 0xcd, 0xc0, 0xd6, 0x7f, 0x02, 0xc1, 0x14, 0x18, 0xf7, 0xc0, + 0xdd, 0x0d, 0x02, 0xc1, 0x36, 0x31, 0x24, 0xc1, 0xd7, 0xa7, 0xd8, 0xc0, 0xae, 0xba, 0xfa, 0xc0, + 0xaa, 0x5e, 0xf6, 0xc0, 0xa9, 0xef, 0x34, 0xc1, 0xc8, 0xa6, 0xa6, 0xc0, 0x3d, 0xeb, 0xa5, 0xc0, + 0x93, 0xec, 0xbe, 0xc0, 0x6d, 0xcc, 0xa2, 0xc0, 0x17, 0xf8, 0xfc, 0xc0, 0xe2, 0xd7, 0x01, 0xc1, + 0x62, 0xfd, 0x10, 0xc1, 0x10, 0xe5, 0x0e, 0xc1}; +// [c h w]: [2 4 10] +unsigned char maxpool2x2s2_p1_fp32_out[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x83, 0x47, 0x94, 0xc0, + 0x67, 0x90, 0xa1, 0xc0, 0x56, 0x61, 0xf0, 0xc0, 0x5a, 0x1a, 0xaf, 0xc0, 0x8f, 0xbc, 0xcd, 0xc0, + 0xa3, 0x1c, 0xe5, 0xc0, 0x41, 0xa3, 0xf4, 0xc0, 0x15, 0xa0, 0xef, 0xc0, 0x55, 0x19, 0x72, 0xc0, + 0x00, 0x00, 0x00, 0x00, 0xe3, 0x9f, 0xd1, 0xc0, 0x13, 0x9f, 0xcc, 0xc0, 0x70, 0x45, 0xe1, 0xc0, + 0x25, 0x0d, 0xcd, 0xc0, 0x3a, 0x55, 0xa5, 0xc0, 0x22, 0x6d, 0xc7, 0xc0, 0x08, 0x62, 0x7c, 0xc0, + 0xd7, 0x00, 0x41, 0xc0, 0x87, 0x16, 0x02, 0xc1, 0x00, 0x00, 0x00, 0x00, 0x4c, 0xe1, 0xd6, 0xc0, + 0xdd, 0xbb, 0xd5, 0xc0, 0x9c, 0xde, 0xcb, 0xc0, 0x33, 0xc9, 0xdd, 0xc0, 0x47, 0x82, 0x8c, 0xc0, + 0xd2, 0xad, 0xb4, 0xc0, 0x00, 0x36, 0xd6, 0xc0, 0x43, 0x56, 0x81, 0xc0, 0xff, 0xd5, 0x4e, 0xc0, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa6, 0x90, 0xc5, 0xc0, + 0xf9, 0x90, 0xdb, 0xc0, 0x61, 0xe7, 0xcd, 0xc0, 0x35, 0xcd, 0xc4, 0xc0, 0x67, 0x57, 0xc6, 0xc0, + 0x04, 0x70, 0xad, 0xc0, 0xf6, 0x7e, 0x84, 0xc0, 0xa9, 0xad, 0xbd, 0xc0, 0xc9, 0x27, 0xbc, 0xc0, + 0x00, 0x00, 0x00, 0x00, 0xdd, 0x52, 0xc9, 0xc0, 0xf7, 0x97, 0xee, 0xc0, 0x6b, 0x62, 0xdb, 0xc0, + 0xb7, 0x49, 0xd1, 0xc0, 0xf6, 0x97, 0xfd, 0xc0, 0xaf, 0xd0, 0x18, 0xc0, 0xae, 0x47, 0xdc, 0xc0, + 0xcd, 0x25, 0xa7, 0xc0, 0x73, 0x7f, 0x9b, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x7d, 0x45, 0xcd, 0xc0, + 0x20, 0xb3, 0xe6, 0xc0, 0x3a, 0xc8, 0xc2, 0xc0, 0xd7, 0xa7, 0xd8, 0xc0, 0xca, 0x02, 0xc9, 0xc0, + 0x3d, 0xeb, 0xa5, 0xc0, 0x6d, 0xcc, 0xa2, 0xc0, 0x17, 0xf8, 0xfc, 0xc0, 0xc2, 0x60, 0x0a, 0xc1}; +unsigned char maxpool2x2s2_p1_fp16_in[] = { + 0x44, 0xc4, 0x87, 0xc6, 0x2f, 0xc8, 0x6f, 0xc6, 0x59, 0xc6, 0xd0, 0xc5, 0xe4, 0xc5, 0x49, 0xc8, + 0x54, 0xc8, 0x5a, 0xc6, 0x51, 0xc7, 0x38, 0xc4, 0xab, 0xc8, 0xeb, 0xc6, 0xc1, 0xc8, 0x9f, 0xc4, + 0xec, 0xc9, 0x17, 0xc5, 0x03, 0xc9, 0xbe, 0xc8, 0x1c, 0xc5, 0x1f, 0xc8, 0x50, 0xc6, 0x5f, 0xc7, + 0x83, 0xc7, 0x32, 0xc9, 0x85, 0xc9, 0x80, 0xc6, 0x6d, 0xc6, 0xae, 0xc9, 0x39, 0xc8, 0x28, 0xc7, + 0xb7, 0xc7, 0xa5, 0xc7, 0xca, 0xc9, 0x46, 0xc8, 0x5e, 0xc5, 0x52, 0xc8, 0x91, 0xc6, 0xa2, 0xc4, + 0x50, 0xc7, 0x5d, 0xc8, 0x0c, 0xc5, 0xf5, 0xc8, 0x26, 0xc8, 0x78, 0xc5, 0x38, 0xc8, 0x69, 0xc8, + 0x72, 0xc6, 0x98, 0xc8, 0x62, 0xc7, 0xbe, 0xc8, 0x8e, 0xc8, 0x65, 0xc8, 0x7d, 0xc7, 0x90, 0xc3, + 0x4d, 0xc5, 0xf3, 0xc6, 0x8c, 0xc6, 0xab, 0xc9, 0x64, 0xc6, 0x61, 0xc7, 0x2d, 0xc7, 0x0a, 0xc7, + 0x7b, 0xc8, 0xcd, 0xc7, 0x6f, 0xc8, 0x44, 0xc9, 0xdd, 0xc8, 0xba, 0xc9, 0x58, 0xc9, 0x96, 0xc7, + 0xf3, 0xc5, 0x08, 0xc2, 0x78, 0xc8, 0x10, 0xc8, 0xca, 0xc6, 0xdd, 0xc7, 0xee, 0xc8, 0x5c, 0xc8, + 0xad, 0xc8, 0xed, 0xc9, 0x26, 0xc8, 0x68, 0xc6, 0xd4, 0xc6, 0x11, 0xc6, 0x2a, 0xc5, 0x3b, 0xc6, + 0x1e, 0xc8, 0x34, 0xc8, 0xe3, 0xc3, 0x4a, 0xc9, 0x80, 0xc3, 0xf0, 0xc8, 0x9f, 0xc8, 0xef, 0xc6, + 0xb7, 0xc6, 0x17, 0xc8, 0xad, 0xc6, 0x65, 0xc8, 0x5e, 0xc6, 0xc6, 0xc7, 0xc3, 0xc7, 0xee, 0xc6, + 0x64, 0xc4, 0x42, 0xc9, 0xb7, 0xc6, 0xa5, 0xc5, 0xb1, 0xc6, 0x95, 0xc9, 0xb7, 0xc6, 0x86, 0xc8, + 0x76, 0xc2, 0xb0, 0xc7, 0x08, 0xc7, 0xe7, 0xc8, 0xc3, 0xc9, 0xa9, 0xc8, 0x97, 0xc7, 0x08, 0xc9, + 0x98, 0xc7, 0x2f, 0xc9, 0x34, 0xc9, 0x73, 0xc8, 0x12, 0xc8, 0x3a, 0xc8, 0x37, 0xc9, 0xfc, 0xc8, + 0x05, 0xc7, 0xe0, 0xc7, 0x0a, 0xc4, 0x15, 0xc9, 0x24, 0xc8, 0x38, 0xc8, 0x3e, 0xc6, 0xb8, 0xc7, + 0x8c, 0xc9, 0xcf, 0xc6, 0xeb, 0xc5, 0xbf, 0xc8, 0xcd, 0xc7, 0x10, 0xc8, 0x86, 0xc8, 0x0d, 0xc5, + 0x7b, 0xc9, 0xdc, 0xc9, 0x0b, 0xc5, 0x7f, 0xc8, 0x38, 0xc7, 0xff, 0xc8, 0xde, 0xc6, 0xb7, 0xc8, + 0x8b, 0xc8, 0xa7, 0xc6, 0x2c, 0xc6, 0xdc, 0xc6, 0x93, 0xc8, 0xed, 0xc7, 0xca, 0xc6, 0xb7, 0xc6, + 0x66, 0xc8, 0x32, 0xc6, 0x96, 0xc6, 0x6b, 0xc5, 0x89, 0xc9, 0xda, 0xc6, 0x23, 0xc4, 0xe8, 0xc8, + 0x9d, 0xc7, 0x5e, 0xc8, 0xe1, 0xc5, 0xd2, 0xc9, 0x06, 0xca, 0x45, 0xc8, 0x5f, 0xc8, 0x2a, 0xc7, + 0x6f, 0xc6, 0xca, 0xc8, 0x26, 0xc6, 0xc0, 0xc8, 0x8d, 0xc7, 0xf4, 0xc8, 0x34, 0xc7, 0x66, 0xc7, + 0x7a, 0xc8, 0x6b, 0xc6, 0xed, 0xc5, 0x85, 0xc9, 0x4f, 0xc8, 0xf7, 0xc7, 0x7e, 0xc6, 0x76, 0xc8, + 0x4a, 0xc6, 0x74, 0xc7, 0x0f, 0xca, 0x00, 0xc7, 0xdd, 0xc8, 0x8a, 0xc6, 0x55, 0xc7, 0x82, 0xc9, + 0x13, 0xc8, 0xb7, 0xc7, 0xc6, 0xc0, 0x76, 0xc7, 0x63, 0xc8, 0xf0, 0xc5, 0x39, 0xc5, 0x7e, 0xc6, + 0x16, 0xc5, 0x07, 0xc9, 0x4d, 0xc8, 0x69, 0xc9, 0x5b, 0xc8, 0x13, 0xc9, 0x4a, 0xc9, 0xdb, 0xc6, + 0x81, 0xc9, 0x32, 0xc8, 0x03, 0xc8, 0xec, 0xc7, 0xee, 0xc6, 0x5d, 0xc7, 0xe2, 0xc6, 0x10, 0xc8, + 0x92, 0xc9, 0xbd, 0xc7, 0xdb, 0xc4, 0xb5, 0xc9, 0x24, 0xc8, 0xa3, 0xca, 0x95, 0xc7, 0x35, 0xc7, + 0x69, 0xc7, 0x16, 0xc6, 0xaf, 0xc7, 0xce, 0xc7, 0x4c, 0xc7, 0x48, 0xc6, 0x85, 0xc9, 0x62, 0xc8, + 0x83, 0xc8, 0xa1, 0xc8, 0x42, 0xc7, 0x85, 0xc8, 0xdd, 0xc8, 0x53, 0xc8, 0xb3, 0xc8, 0x30, 0xc5, + 0x19, 0xc8, 0x6a, 0xc6, 0x13, 0xc8, 0xb8, 0xc7, 0x10, 0xc8, 0x21, 0xc9, 0xc5, 0xc6, 0xd5, 0xc7, + 0xb2, 0xc7, 0xa7, 0xc9, 0x35, 0xc5, 0x2f, 0xc5, 0xf7, 0xc5, 0x16, 0xc5, 0xe7, 0xc7, 0x0e, 0xc8, + 0x87, 0xc8, 0x77, 0xc8}; +unsigned char maxpool2x2s2_p1_fp16_out[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa2, 0xc4, 0x0c, 0xc5, 0x83, 0xc7, 0x78, 0xc5, 0x6d, 0xc6, + 0x28, 0xc7, 0xa5, 0xc7, 0x7d, 0xc7, 0x90, 0xc3, 0x00, 0x00, 0x8c, 0xc6, 0x64, 0xc6, 0x0a, 0xc7, + 0x68, 0xc6, 0x2a, 0xc5, 0x3b, 0xc6, 0xe3, 0xc3, 0x08, 0xc2, 0x10, 0xc8, 0x00, 0x00, 0xb7, 0xc6, + 0xad, 0xc6, 0x5e, 0xc6, 0xee, 0xc6, 0x64, 0xc4, 0xa5, 0xc5, 0xb1, 0xc6, 0x0a, 0xc4, 0x76, 0xc2, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0xc6, 0xdc, 0xc6, 0x6f, 0xc6, 0x26, 0xc6, 0x32, 0xc6, + 0x6b, 0xc5, 0x23, 0xc4, 0xed, 0xc5, 0xe1, 0xc5, 0x00, 0x00, 0x4a, 0xc6, 0x74, 0xc7, 0xdb, 0xc6, + 0x8a, 0xc6, 0xec, 0xc7, 0xc6, 0xc0, 0xe2, 0xc6, 0x39, 0xc5, 0xdb, 0xc4, 0x00, 0x00, 0x6a, 0xc6, + 0x35, 0xc7, 0x16, 0xc6, 0xc5, 0xc6, 0x48, 0xc6, 0x2f, 0xc5, 0x16, 0xc5, 0xe7, 0xc7, 0x53, 0xc8}; +unsigned char maxpool2x2s2_p1_int8_in[] = { + 0xd7, 0xc1, 0xb0, 0xc2, 0xc3, 0xc8, 0xc7, 0xae, 0xad, 0xc3, 0xba, 0xd7, 0xa6, 0xbe, 0xa5, 0xd4, + 0x8e, 0xcf, 0xa0, 0xa5, 0xcf, 0xb1, 0xc3, 0xb9, 0xb8, 0x9c, 0x96, 0xc2, 0xc2, 0x93, 0xaf, 0xbb, + 0xb6, 0xb7, 0x91, 0xae, 0xcc, 0xad, 0xc1, 0xd4, 0xba, 0xac, 0xd0, 0xa1, 0xb0, 0xcb, 0xaf, 0xab, + 0xc2, 0xa8, 0xb9, 0xa5, 0xa8, 0xac, 0xb8, 0xdc, 0xcd, 0xbd, 0xc1, 0x93, 0xc3, 0xb9, 0xbb, 0xbc, + 0xaa, 0xb5, 0xab, 0x9b, 0xa3, 0x92, 0x99, 0xb7, 0xc7, 0xe3, 0xaa, 0xb2, 0xbf, 0xb4, 0xa1, 0xac, + 0xa6, 0x8e, 0xb0, 0xc2, 0xbe, 0xc6, 0xce, 0xc4, 0xb1, 0xaf, 0xda, 0x9a, 0xdc, 0xa1, 0xa7, 0xbd, + 0xc0, 0xb1, 0xc0, 0xac, 0xc3, 0xb5, 0xb5, 0xbd, 0xd6, 0x9b, 0xc0, 0xca, 0xc0, 0x95, 0xbf, 0xa9, + 0xe1, 0xb6, 0xbc, 0xa2, 0x91, 0xa7, 0xb7, 0x9f, 0xb7, 0x9c, 0x9c, 0xab, 0xb2, 0xaf, 0x9c, 0xa0, + 0xbd, 0xb4, 0xd9, 0x9e, 0xb0, 0xaf, 0xc4, 0xb6, 0x95, 0xbf, 0xc7, 0xa5, 0xb5, 0xb2, 0xa9, 0xcf, + 0x97, 0x8f, 0xd0, 0xaa, 0xbb, 0xa0, 0xbe, 0xa5, 0xa9, 0xc0, 0xc5, 0xbe, 0xa8, 0xb4, 0xbf, 0xbf, + 0xab, 0xc4, 0xc1, 0xcc, 0x96, 0xbe, 0xd8, 0xa2, 0xb7, 0xac, 0xc8, 0x90, 0x8c, 0xae, 0xac, 0xbb, + 0xc2, 0xa4, 0xc5, 0xa5, 0xb7, 0xa1, 0xbb, 0xb9, 0xaa, 0xc2, 0xc7, 0x96, 0xad, 0xb4, 0xc2, 0xaa, + 0xc4, 0xb8, 0x8c, 0xbd, 0xa3, 0xc1, 0xba, 0x96, 0xb2, 0xb6, 0xe9, 0xb8, 0xac, 0xc7, 0xce, 0xc2, + 0xcf, 0x9f, 0xad, 0x98, 0xac, 0x9f, 0x9a, 0xbe, 0x96, 0xaf, 0xb3, 0xb4, 0xbd, 0xb9, 0xbe, 0xb2, + 0x95, 0xb6, 0xd1, 0x92, 0xb0, 0x80, 0xb7, 0xbb, 0xb9, 0xc6, 0xb6, 0xb5, 0xba, 0xc4, 0x96, 0xac, + 0xa9, 0xa7, 0xba, 0xa9, 0xa3, 0xad, 0xa6, 0xce, 0xb1, 0xc2, 0xb2, 0xb6, 0xb2, 0x9d, 0xbf, 0xb5, + 0xb6, 0x93, 0xce, 0xce, 0xc7, 0xcf, 0xb4, 0xb2, 0xa9, 0xaa}; +unsigned char maxpool2x2s2_p1_int8_out[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd4, 0xd0, 0xb8, 0xcb, 0xc2, + 0xbb, 0xb7, 0xb8, 0xdc, 0x00, 0xc1, 0xc3, 0xbc, 0xc2, 0xce, 0xc4, 0xda, 0xe3, 0xb2, 0x00, 0xc0, + 0xc0, 0xc3, 0xbd, 0xd6, 0xca, 0xc0, 0xd9, 0xe1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xc5, 0xbe, 0xc2, 0xc5, 0xc4, 0xcc, 0xd8, 0xc7, 0xc8, 0x00, 0xc4, 0xb8, 0xbe, + 0xc1, 0xb4, 0xe9, 0xbe, 0xce, 0xd1, 0x00, 0xc2, 0xbb, 0xc6, 0xbf, 0xc4, 0xce, 0xcf, 0xb4, 0xad}; + +// [c h w]: [2 7 19] +unsigned char maxpool3x3s2_fp32_in[] = { + 0x76, 0x88, 0xe4, 0xc0, 0x36, 0xf6, 0xcd, 0xc0, 0x0d, 0xed, 0x02, 0xc1, 0x0b, 0x46, 0xcb, 0xc0, + 0xc4, 0x23, 0xbe, 0xc0, 0xe6, 0x9c, 0xd4, 0xc0, 0x33, 0x52, 0xb0, 0xc0, 0x48, 0x09, 0xba, 0xc0, + 0x33, 0x60, 0xbc, 0xc0, 0xcf, 0x98, 0xdb, 0xc0, 0x5e, 0xfc, 0xc0, 0xc0, 0x82, 0xbd, 0xa3, 0xc0, + 0x6f, 0x3a, 0xe2, 0xc0, 0xaf, 0x11, 0xa5, 0xc0, 0x78, 0xcb, 0x93, 0xc0, 0xa0, 0xd0, 0xe0, 0xc0, + 0xab, 0x67, 0xab, 0xc0, 0xdc, 0x55, 0x7e, 0xc0, 0xbe, 0xec, 0x54, 0xc0, 0xb2, 0xb8, 0xbd, 0xc0, + 0x49, 0x72, 0xc3, 0xc0, 0x8b, 0x8b, 0xab, 0xc0, 0x8d, 0xc5, 0xa9, 0xc0, 0x8a, 0x6a, 0xb9, 0xc0, + 0x6a, 0xfe, 0xa3, 0xc0, 0x57, 0xa3, 0xc0, 0xc0, 0xe8, 0x01, 0xd7, 0xc0, 0x6e, 0x0d, 0xca, 0xc0, + 0x28, 0xb0, 0xcb, 0xc0, 0x7a, 0x02, 0xc6, 0xc0, 0xa0, 0xf7, 0xdf, 0xc0, 0x3d, 0x5d, 0x99, 0xc0, + 0x53, 0x5e, 0xc1, 0xc0, 0xd2, 0xbb, 0xc7, 0xc0, 0xbd, 0xee, 0xc5, 0xc0, 0xf0, 0xe1, 0xcb, 0xc0, + 0x77, 0xe7, 0xd3, 0xc0, 0x36, 0x81, 0xa8, 0xc0, 0x58, 0x21, 0xdf, 0xc0, 0x79, 0xad, 0xbb, 0xc0, + 0x4a, 0x83, 0xe1, 0xc0, 0x0c, 0x1f, 0xb6, 0xc0, 0x1b, 0xd2, 0xad, 0xc0, 0x5b, 0xbb, 0xa3, 0xc0, + 0x82, 0xd9, 0xc0, 0xc0, 0xa1, 0xc1, 0xff, 0xc0, 0x62, 0x90, 0x98, 0xc0, 0x96, 0xa0, 0xd0, 0xc0, + 0x92, 0x0c, 0x8d, 0xc0, 0xbe, 0x24, 0xeb, 0xc0, 0x50, 0xb6, 0xaa, 0xc0, 0x3f, 0x8e, 0xd3, 0xc0, + 0x80, 0x8c, 0xd7, 0xc0, 0x74, 0x73, 0xb6, 0xc0, 0x8d, 0x71, 0x90, 0xc0, 0xb2, 0xab, 0xfe, 0xc0, + 0x7a, 0xb5, 0xcb, 0xc0, 0x64, 0x6c, 0xea, 0xc0, 0xf0, 0x60, 0xb9, 0xc0, 0x86, 0xdb, 0xfb, 0xc0, + 0x63, 0xc2, 0x9e, 0xc0, 0x3e, 0x30, 0xc9, 0xc0, 0x01, 0xeb, 0x9c, 0xc0, 0x87, 0xda, 0xa3, 0xc0, + 0x1d, 0x41, 0xe4, 0xc0, 0x44, 0x6a, 0x96, 0xc0, 0x37, 0x47, 0xda, 0xc0, 0x48, 0xed, 0xd6, 0xc0, + 0x16, 0x1a, 0xd6, 0xc0, 0x4d, 0xe7, 0xe1, 0xc0, 0x20, 0xd9, 0xc7, 0xc0, 0x8f, 0x87, 0x8b, 0xc0, + 0x95, 0x74, 0x91, 0xc0, 0x71, 0xf3, 0x84, 0xc0, 0x69, 0x09, 0x89, 0xc0, 0x2b, 0x96, 0xc9, 0xc0, + 0x7b, 0xda, 0x93, 0xc0, 0x7f, 0x05, 0xed, 0xc0, 0x99, 0x5f, 0xe8, 0xc0, 0x19, 0x6b, 0xa7, 0xc0, + 0xcc, 0x6f, 0xc2, 0xc0, 0x49, 0x5e, 0xd7, 0xc0, 0x5e, 0xd5, 0xa7, 0xc0, 0x98, 0xca, 0x85, 0xc0, + 0x27, 0x64, 0x8c, 0xc0, 0x50, 0x68, 0x34, 0xc0, 0xf0, 0x8d, 0x99, 0xc0, 0xe7, 0x63, 0xbf, 0xc0, + 0xe2, 0xd2, 0xd8, 0xc0, 0x7b, 0xdb, 0x02, 0xc1, 0x66, 0xfd, 0xb7, 0xc0, 0x01, 0xe0, 0xce, 0xc0, + 0xfd, 0x45, 0xdb, 0xc0, 0xc4, 0x88, 0xdb, 0xc0, 0x59, 0xb1, 0xb6, 0xc0, 0x58, 0x62, 0xb5, 0xc0, + 0xbd, 0x51, 0x61, 0xc0, 0xc5, 0x9d, 0xb2, 0xc0, 0xa4, 0x0a, 0xf3, 0xc0, 0xfd, 0x25, 0xb8, 0xc0, + 0x48, 0x9a, 0xac, 0xc0, 0xdf, 0x71, 0xc5, 0xc0, 0x7b, 0xd9, 0xac, 0xc0, 0xff, 0xd2, 0xa7, 0xc0, + 0x65, 0x85, 0xb1, 0xc0, 0xe4, 0x5a, 0xe8, 0xc0, 0x00, 0x3b, 0xad, 0xc0, 0x79, 0xe2, 0xaa, 0xc0, + 0xab, 0x1d, 0xf8, 0xc0, 0x1b, 0x1e, 0x95, 0xc0, 0xe2, 0x8a, 0xe0, 0xc0, 0xed, 0xa5, 0x92, 0xc0, + 0xe0, 0x6c, 0x9f, 0xc0, 0x7a, 0x39, 0xaf, 0xc0, 0xd8, 0x59, 0xd8, 0xc0, 0x72, 0x45, 0xbd, 0xc0, + 0x38, 0x7b, 0x93, 0xc0, 0x0c, 0xfe, 0x99, 0xc0, 0x03, 0xd8, 0xc1, 0xc0, 0x8f, 0x41, 0xd3, 0xc0, + 0x77, 0x09, 0x69, 0xc0, 0xf2, 0xb7, 0xe9, 0xc0, 0x45, 0x83, 0xd6, 0xc0, 0x14, 0x4b, 0xad, 0xc0, + 0x8b, 0xe7, 0xba, 0xc0, 0x61, 0x1a, 0xc6, 0xc0, 0x56, 0x49, 0xc2, 0xc0, 0x89, 0xbc, 0xc1, 0xc0, + 0xfa, 0x86, 0xb9, 0xc0, 0xbb, 0xc9, 0xb3, 0xc0, 0x4d, 0x43, 0xc8, 0xc0, 0x5f, 0x54, 0xbb, 0xc0, + 0x1c, 0x76, 0x82, 0xc0, 0x00, 0xb1, 0xe0, 0xc0, 0x4d, 0x67, 0xbf, 0xc0, 0xb0, 0xae, 0xc4, 0xc0, + 0xb1, 0x22, 0xd1, 0xc0, 0x5e, 0x0a, 0xbd, 0xc0, 0x40, 0x1c, 0xda, 0xc0, 0xb5, 0xae, 0xe9, 0xc0, + 0x4c, 0x8c, 0xdf, 0xc0, 0x2c, 0xe3, 0xd2, 0xc0, 0x65, 0x8c, 0xac, 0xc0, 0xa2, 0x04, 0xc0, 0xc0, + 0xc2, 0xe5, 0xdd, 0xc0, 0xe8, 0x40, 0xdf, 0xc0, 0xc0, 0xde, 0xa9, 0xc0, 0xd1, 0x26, 0xd7, 0xc0, + 0xb7, 0x13, 0xbf, 0xc0, 0x86, 0xa0, 0x9b, 0xc0, 0xad, 0x4b, 0x83, 0xc0, 0x1f, 0x7b, 0xc9, 0xc0, + 0xc3, 0xd3, 0xbc, 0xc0, 0x36, 0x72, 0x9e, 0xc0, 0xd9, 0x85, 0xe2, 0xc0, 0x6c, 0x0a, 0xee, 0xc0, + 0xfc, 0xa8, 0x3f, 0xc0, 0xc0, 0xa4, 0xb2, 0xc0, 0x47, 0x35, 0xc5, 0xc0, 0x1a, 0x08, 0xf0, 0xc0, + 0x6d, 0x67, 0xe3, 0xc0, 0x66, 0xad, 0xd3, 0xc0, 0xef, 0xea, 0xbe, 0xc0, 0x0a, 0xbe, 0xbe, 0xc0, + 0xb2, 0x31, 0xdf, 0xc0, 0x92, 0xf8, 0xc8, 0xc0, 0x43, 0x63, 0x84, 0xc0, 0xf9, 0xf4, 0xad, 0xc0, + 0xb5, 0xf2, 0xdc, 0xc0, 0xa7, 0xb3, 0x71, 0xc0, 0x13, 0x03, 0xbb, 0xc0, 0xbc, 0x94, 0xc8, 0xc0, + 0x88, 0x04, 0xe9, 0xc0, 0xe4, 0x6b, 0xd0, 0xc0, 0x2d, 0x45, 0x95, 0xc0, 0xdf, 0xeb, 0xc9, 0xc0, + 0xf3, 0xc8, 0x8b, 0xc0, 0x25, 0xda, 0xe0, 0xc0, 0x69, 0x04, 0xda, 0xc0, 0x68, 0x0d, 0xe7, 0xc0, + 0xd1, 0xb6, 0xce, 0xc0, 0x51, 0x38, 0x98, 0xc0, 0x43, 0xc8, 0xd6, 0xc0, 0xe2, 0xfe, 0xe1, 0xc0, + 0x3a, 0xe1, 0xa0, 0xc0, 0x8e, 0xd0, 0xc0, 0xc0, 0x4b, 0x4b, 0xdf, 0xc0, 0x4d, 0x33, 0xa0, 0xc0, + 0x3e, 0xb5, 0xb1, 0xc0, 0x35, 0x76, 0xab, 0xc0, 0xef, 0x56, 0xd6, 0xc0, 0x16, 0x72, 0xd9, 0xc0, + 0x19, 0x5e, 0xea, 0xc0, 0xc3, 0xc3, 0xbb, 0xc0, 0xe5, 0x06, 0xf3, 0xc0, 0xd2, 0xc5, 0xfe, 0xc0, + 0xb0, 0x2a, 0x96, 0xc0, 0x40, 0x0c, 0xe8, 0xc0, 0x1c, 0x50, 0xa5, 0xc0, 0xe4, 0xeb, 0xe3, 0xc0, + 0x78, 0x90, 0xe6, 0xc0, 0x8e, 0xdf, 0xd0, 0xc0, 0x0c, 0xc5, 0xfe, 0xc0, 0xb3, 0xb9, 0x9d, 0xc0, + 0xc7, 0xc3, 0x84, 0xc0, 0x35, 0x8c, 0xbf, 0xc0, 0xa4, 0xe2, 0x4a, 0xc0, 0xc8, 0xf6, 0xb5, 0xc0, + 0xad, 0x34, 0xf7, 0xc0, 0xb7, 0x8b, 0x63, 0xc0, 0x65, 0x82, 0xa3, 0xc0, 0x48, 0x39, 0xba, 0xc0, + 0xf1, 0x4c, 0x94, 0xc0, 0xf9, 0x4b, 0xbc, 0xc0, 0xd1, 0xcf, 0xd2, 0xc0, 0x63, 0x75, 0x84, 0xc0, + 0x55, 0xfd, 0xa6, 0xc0, 0x1f, 0x7a, 0xf6, 0xc0, 0xdd, 0xc2, 0xfb, 0xc0, 0x93, 0x08, 0xde, 0xc0, + 0xe0, 0x1a, 0x5f, 0xc0, 0xaa, 0x36, 0x9a, 0xc0, 0x83, 0xfa, 0xc9, 0xc0, 0xfc, 0x4e, 0xc6, 0xc0, + 0x1a, 0x28, 0xe0, 0xc0, 0xda, 0x32, 0xab, 0xc0, 0x27, 0xc5, 0x88, 0xc0, 0xad, 0xe6, 0x95, 0xc0, + 0xdc, 0x43, 0xc7, 0xc0, 0x7c, 0x5a, 0xf0, 0xc0, 0xa0, 0xa3, 0xbd, 0xc0, 0xe5, 0x14, 0xcd, 0xc0, + 0x5c, 0xa9, 0xc7, 0xc0, 0xc9, 0x97, 0x8e, 0xc0, 0xc6, 0x1f, 0xdd, 0xc0, 0xce, 0x1f, 0xfb, 0xc0, + 0x69, 0x39, 0xc3, 0xc0, 0x4b, 0xc7, 0xec, 0xc0, 0xb8, 0x42, 0xbc, 0xc0, 0x2f, 0xf4, 0xa4, 0xc0, + 0xb9, 0x68, 0xdd, 0xc0, 0x30, 0xad, 0x98, 0xc0, 0x33, 0xed, 0xac, 0xc0, 0x70, 0xae, 0xcd, 0xc0, + 0xb5, 0x40, 0xba, 0xc0, 0x0e, 0x86, 0xd3, 0xc0, 0xc5, 0xe5, 0xe7, 0xc0, 0x43, 0x63, 0xc1, 0xc0, + 0xeb, 0xf8, 0x9d, 0xc0, 0xe2, 0x9c, 0xcc, 0xc0, 0x38, 0xdd, 0x97, 0xc0, 0xe9, 0xd7, 0xcc, 0xc0, + 0x8f, 0xe4, 0xcc, 0xc0, 0x64, 0x65, 0xf4, 0xc0, 0x34, 0xba, 0xc0, 0xc0, 0xda, 0x21, 0xb1, 0xc0, + 0x7c, 0xcb, 0xc4, 0xc0, 0x46, 0xd5, 0xb5, 0xc0, 0x4a, 0x1a, 0xc6, 0xc0, 0x71, 0x6b, 0x78, 0xc0, + 0x86, 0x6d, 0xc0, 0xc0, 0xc6, 0xaa, 0x03, 0xc1, 0xd2, 0x91, 0x99, 0xc0, 0xc8, 0xd8, 0xe2, 0xc0, + 0x53, 0x49, 0xe2, 0xc0, 0xf4, 0xb1, 0xab, 0xc0}; +// [c h w]: [2 3 9] +unsigned char maxpool3x3s2_fp32_out[] = { + 0x8b, 0x8b, 0xab, 0xc0, 0x8d, 0xc5, 0xa9, 0xc0, 0x5b, 0xbb, 0xa3, 0xc0, 0x62, 0x90, 0x98, 0xc0, + 0x92, 0x0c, 0x8d, 0xc0, 0x92, 0x0c, 0x8d, 0xc0, 0x78, 0xcb, 0x93, 0xc0, 0x8d, 0x71, 0x90, 0xc0, + 0xbe, 0xec, 0x54, 0xc0, 0x7b, 0xda, 0x93, 0xc0, 0x63, 0xc2, 0x9e, 0xc0, 0x01, 0xeb, 0x9c, 0xc0, + 0x98, 0xca, 0x85, 0xc0, 0x50, 0x68, 0x34, 0xc0, 0x92, 0x0c, 0x8d, 0xc0, 0x8f, 0x87, 0x8b, 0xc0, + 0x71, 0xf3, 0x84, 0xc0, 0x71, 0xf3, 0x84, 0xc0, 0xbd, 0x51, 0x61, 0xc0, 0x38, 0x7b, 0x93, 0xc0, + 0x77, 0x09, 0x69, 0xc0, 0x77, 0x09, 0x69, 0xc0, 0x50, 0x68, 0x34, 0xc0, 0xf0, 0x8d, 0x99, 0xc0, + 0x1b, 0x1e, 0x95, 0xc0, 0xed, 0xa5, 0x92, 0xc0, 0x1c, 0x76, 0x82, 0xc0, 0x36, 0x72, 0x9e, 0xc0, + 0xfc, 0xa8, 0x3f, 0xc0, 0xfc, 0xa8, 0x3f, 0xc0, 0x47, 0x35, 0xc5, 0xc0, 0x51, 0x38, 0x98, 0xc0, + 0x51, 0x38, 0x98, 0xc0, 0x43, 0x63, 0x84, 0xc0, 0x43, 0x63, 0x84, 0xc0, 0xa7, 0xb3, 0x71, 0xc0, + 0xb7, 0x8b, 0x63, 0xc0, 0xf1, 0x4c, 0x94, 0xc0, 0x63, 0x75, 0x84, 0xc0, 0x63, 0x75, 0x84, 0xc0, + 0x51, 0x38, 0x98, 0xc0, 0xe0, 0x1a, 0x5f, 0xc0, 0xc7, 0xc3, 0x84, 0xc0, 0xa4, 0xe2, 0x4a, 0xc0, + 0xa4, 0xe2, 0x4a, 0xc0, 0xb7, 0x8b, 0x63, 0xc0, 0xf1, 0x4c, 0x94, 0xc0, 0x63, 0x75, 0x84, 0xc0, + 0x63, 0x75, 0x84, 0xc0, 0xda, 0x21, 0xb1, 0xc0, 0xe0, 0x1a, 0x5f, 0xc0, 0x71, 0x6b, 0x78, 0xc0, + 0xd2, 0x91, 0x99, 0xc0, 0x27, 0xc5, 0x88, 0xc0}; +unsigned char maxpool3x3s2_fp16_in[] = { + 0x24, 0xc7, 0x6f, 0xc6, 0x17, 0xc8, 0x5a, 0xc6, 0xf1, 0xc5, 0xa4, 0xc6, 0x82, 0xc5, 0xd0, 0xc5, + 0xe3, 0xc5, 0xdc, 0xc6, 0x07, 0xc6, 0x1d, 0xc5, 0x11, 0xc7, 0x28, 0xc5, 0x9e, 0xc4, 0x06, 0xc7, + 0x5b, 0xc5, 0xf2, 0xc3, 0xa7, 0xc2, 0xed, 0xc5, 0x1b, 0xc6, 0x5c, 0xc5, 0x4e, 0xc5, 0xcb, 0xc5, + 0x1f, 0xc5, 0x05, 0xc6, 0xb8, 0xc6, 0x50, 0xc6, 0x5d, 0xc6, 0x30, 0xc6, 0xff, 0xc6, 0xca, 0xc4, + 0x0a, 0xc6, 0x3d, 0xc6, 0x2f, 0xc6, 0x5f, 0xc6, 0x9f, 0xc6, 0x44, 0xc5, 0xf9, 0xc6, 0xdd, 0xc5, + 0x0c, 0xc7, 0xb0, 0xc5, 0x6e, 0xc5, 0x1d, 0xc5, 0x06, 0xc6, 0xfe, 0xc7, 0xc4, 0xc4, 0x85, 0xc6, + 0x68, 0xc4, 0x59, 0xc7, 0x55, 0xc5, 0x9c, 0xc6, 0xbc, 0xc6, 0xb3, 0xc5, 0x83, 0xc4, 0xf5, 0xc7, + 0x5d, 0xc6, 0x53, 0xc7, 0xcb, 0xc5, 0xde, 0xc7, 0xf6, 0xc4, 0x49, 0xc6, 0xe7, 0xc4, 0x1e, 0xc5, + 0x22, 0xc7, 0xb3, 0xc4, 0xd2, 0xc6, 0xb7, 0xc6, 0xb0, 0xc6, 0x0f, 0xc7, 0x3e, 0xc6, 0x5c, 0xc4, + 0x8b, 0xc4, 0x27, 0xc4, 0x48, 0xc4, 0x4c, 0xc6, 0x9e, 0xc4, 0x68, 0xc7, 0x42, 0xc7, 0x3b, 0xc5, + 0x13, 0xc6, 0xba, 0xc6, 0x3e, 0xc5, 0x2e, 0xc4, 0x63, 0xc4, 0xa3, 0xc1, 0xcc, 0xc4, 0xfb, 0xc5, + 0xc6, 0xc6, 0x16, 0xc8, 0xbf, 0xc5, 0x77, 0xc6, 0xda, 0xc6, 0xdc, 0xc6, 0xb5, 0xc5, 0xab, 0xc5, + 0x0a, 0xc3, 0x94, 0xc5, 0x98, 0xc7, 0xc1, 0xc5, 0x64, 0xc5, 0x2b, 0xc6, 0x66, 0xc5, 0x3e, 0xc5, + 0x8c, 0xc5, 0x42, 0xc7, 0x69, 0xc5, 0x57, 0xc5, 0xc0, 0xc7, 0xa8, 0xc4, 0x04, 0xc7, 0x95, 0xc4, + 0xfb, 0xc4, 0x79, 0xc5, 0xc2, 0xc6, 0xea, 0xc5, 0x9b, 0xc4, 0xcf, 0xc4, 0x0e, 0xc6, 0x9a, 0xc6, + 0x48, 0xc3, 0x4d, 0xc7, 0xb4, 0xc6, 0x6a, 0xc5, 0xd7, 0xc5, 0x30, 0xc6, 0x12, 0xc6, 0x0d, 0xc6, + 0xcc, 0xc5, 0x9e, 0xc5, 0x42, 0xc6, 0xda, 0xc5, 0x13, 0xc4, 0x05, 0xc7, 0xfb, 0xc5, 0x25, 0xc6, + 0x89, 0xc6, 0xe8, 0xc5, 0xd0, 0xc6, 0x4d, 0xc7, 0xfc, 0xc6, 0x97, 0xc6, 0x64, 0xc5, 0x00, 0xc6, + 0xef, 0xc6, 0xfa, 0xc6, 0x4e, 0xc5, 0xb9, 0xc6, 0xf8, 0xc5, 0xdd, 0xc4, 0x1a, 0xc4, 0x4b, 0xc6, + 0xe6, 0xc5, 0xf3, 0xc4, 0x14, 0xc7, 0x70, 0xc7, 0xfd, 0xc1, 0x95, 0xc5, 0x29, 0xc6, 0x80, 0xc7, + 0x1b, 0xc7, 0x9d, 0xc6, 0xf7, 0xc5, 0xf5, 0xc5, 0xf9, 0xc6, 0x47, 0xc6, 0x23, 0xc4, 0x6f, 0xc5, + 0xe7, 0xc6, 0x8d, 0xc3, 0xd8, 0xc5, 0x44, 0xc6, 0x48, 0xc7, 0x83, 0xc6, 0xaa, 0xc4, 0x4f, 0xc6, + 0x5e, 0xc4, 0x06, 0xc7, 0xd0, 0xc6, 0x38, 0xc7, 0x75, 0xc6, 0xc1, 0xc4, 0xb6, 0xc6, 0x0f, 0xc7, + 0x07, 0xc5, 0x06, 0xc6, 0xfa, 0xc6, 0x01, 0xc5, 0x8d, 0xc5, 0x5b, 0xc5, 0xb2, 0xc6, 0xcb, 0xc6, + 0x52, 0xc7, 0xde, 0xc5, 0x98, 0xc7, 0xf6, 0xc7, 0xb1, 0xc4, 0x40, 0xc7, 0x2a, 0xc5, 0x1f, 0xc7, + 0x34, 0xc7, 0x86, 0xc6, 0xf6, 0xc7, 0xed, 0xc4, 0x26, 0xc4, 0xfc, 0xc5, 0x57, 0xc2, 0xaf, 0xc5, + 0xb9, 0xc7, 0x1c, 0xc3, 0x1c, 0xc5, 0xd1, 0xc5, 0xa2, 0xc4, 0xe2, 0xc5, 0x96, 0xc6, 0x23, 0xc4, + 0x37, 0xc5, 0xb3, 0xc7, 0xde, 0xc7, 0xf0, 0xc6, 0xf8, 0xc2, 0xd1, 0xc4, 0x4f, 0xc6, 0x32, 0xc6, + 0x01, 0xc7, 0x59, 0xc5, 0x46, 0xc4, 0xaf, 0xc4, 0x3a, 0xc6, 0x82, 0xc7, 0xed, 0xc5, 0x68, 0xc6, + 0x3d, 0xc6, 0x74, 0xc4, 0xe8, 0xc6, 0xd8, 0xc7, 0x19, 0xc6, 0x66, 0xc7, 0xe2, 0xc5, 0x27, 0xc5, + 0xeb, 0xc6, 0xc5, 0xc4, 0x67, 0xc5, 0x6d, 0xc6, 0xd2, 0xc5, 0x9c, 0xc6, 0x3f, 0xc7, 0x0b, 0xc6, + 0xef, 0xc4, 0x64, 0xc6, 0xbe, 0xc4, 0x66, 0xc6, 0x67, 0xc6, 0xa3, 0xc7, 0x05, 0xc6, 0x89, 0xc5, + 0x26, 0xc6, 0xae, 0xc5, 0x30, 0xc6, 0xc3, 0xc3, 0x03, 0xc6, 0x1d, 0xc8, 0xcc, 0xc4, 0x16, 0xc7, + 0x12, 0xc7, 0x5d, 0xc5}; +unsigned char maxpool3x3s2_fp16_out[] = { + 0x5c, 0xc5, 0x4e, 0xc5, 0x1d, 0xc5, 0xc4, 0xc4, 0x68, 0xc4, 0x68, 0xc4, 0x9e, 0xc4, 0x83, 0xc4, + 0xa7, 0xc2, 0x9e, 0xc4, 0xf6, 0xc4, 0xe7, 0xc4, 0x2e, 0xc4, 0xa3, 0xc1, 0x68, 0xc4, 0x5c, 0xc4, + 0x27, 0xc4, 0x27, 0xc4, 0x0a, 0xc3, 0x9b, 0xc4, 0x48, 0xc3, 0x48, 0xc3, 0xa3, 0xc1, 0xcc, 0xc4, + 0xa8, 0xc4, 0x95, 0xc4, 0x13, 0xc4, 0xf3, 0xc4, 0xfd, 0xc1, 0xfd, 0xc1, 0x29, 0xc6, 0xc1, 0xc4, + 0xc1, 0xc4, 0x23, 0xc4, 0x23, 0xc4, 0x8d, 0xc3, 0x1c, 0xc3, 0xa2, 0xc4, 0x23, 0xc4, 0x23, 0xc4, + 0xc1, 0xc4, 0xf8, 0xc2, 0x26, 0xc4, 0x57, 0xc2, 0x57, 0xc2, 0x1c, 0xc3, 0xa2, 0xc4, 0x23, 0xc4, + 0x23, 0xc4, 0x89, 0xc5, 0xf8, 0xc2, 0xc3, 0xc3, 0xcc, 0xc4, 0x46, 0xc4}; +unsigned char maxpool3x3s2_int8_in[] = { + 0x91, 0x9c, 0x81, 0x9e, 0xa4, 0x99, 0xab, 0xa6, 0xa5, 0x96, 0xa3, 0xb1, 0x92, 0xb0, 0xb8, 0x93, + 0xad, 0xc2, 0xcc, 0xa4, 0xa1, 0xad, 0xae, 0xa6, 0xb1, 0xa3, 0x98, 0x9e, 0x9d, 0xa0, 0x94, 0xb6, + 0xa2, 0x9f, 0xa0, 0x9d, 0x99, 0xae, 0x94, 0xa5, 0x93, 0xa8, 0xac, 0xb1, 0xa3, 0x84, 0xb6, 0x9b, + 0xbc, 0x8e, 0xad, 0x9a, 0x98, 0xa8, 0xba, 0x85, 0x9d, 0x8e, 0xa6, 0x86, 0xb3, 0x9f, 0xb4, 0xb1, + 0x91, 0xb7, 0x96, 0x98, 0x98, 0x93, 0x9f, 0xbc, 0xba, 0xc0, 0xbe, 0x9e, 0xb8, 0x8d, 0x8f, 0xaf, + 0xa2, 0x98, 0xaf, 0xbf, 0xbc, 0xd4, 0xb6, 0xa3, 0x97, 0x81, 0xa7, 0x9c, 0x96, 0x96, 0xa8, 0xa8, + 0xc9, 0xaa, 0x8a, 0xa7, 0xac, 0xa0, 0xac, 0xaf, 0xaa, 0x8f, 0xac, 0xad, 0x88, 0xb8, 0x93, 0xb9, + 0xb3, 0xab, 0x97, 0xa4, 0xb9, 0xb5, 0xa2, 0x9a, 0xc8, 0x8f, 0x98, 0xac, 0xa6, 0xa0, 0xa2, 0xa2, + 0xa6, 0xa9, 0x9f, 0xa5, 0xc1, 0x93, 0xa3, 0xa1, 0x9b, 0xa4, 0x96, 0x8f, 0x94, 0x9a, 0xac, 0xa3, + 0x95, 0x94, 0xae, 0x98, 0xa3, 0xb5, 0xc0, 0x9e, 0xa5, 0xb3, 0x92, 0x8d, 0xd2, 0xaa, 0xa1, 0x8c, + 0x92, 0x9a, 0xa4, 0xa4, 0x94, 0x9f, 0xc0, 0xac, 0x95, 0xc5, 0xa5, 0x9f, 0x8f, 0x9b, 0xb8, 0x9e, + 0xbc, 0x93, 0x96, 0x90, 0x9c, 0xb6, 0x98, 0x93, 0xb2, 0xa3, 0x94, 0xb2, 0xaa, 0xad, 0x98, 0x97, + 0x8f, 0xa5, 0x8a, 0x85, 0xb7, 0x90, 0xb0, 0x92, 0x90, 0x9b, 0x85, 0xb4, 0xc0, 0xa3, 0xcf, 0xa8, + 0x88, 0xc9, 0xb1, 0xa6, 0xb8, 0xa5, 0x9a, 0xc0, 0xaf, 0x89, 0x86, 0x94, 0xca, 0xb5, 0x9e, 0xa0, + 0x93, 0xad, 0xbe, 0xb7, 0xa0, 0x8c, 0xa4, 0x9d, 0x9f, 0xbb, 0x95, 0x86, 0xa1, 0x8d, 0xa5, 0xb0, + 0x95, 0xb6, 0xac, 0x9c, 0xa6, 0x9a, 0x90, 0xa2, 0xb4, 0x9d, 0xb6, 0x9d, 0x9d, 0x8a, 0xa3, 0xaa, + 0xa1, 0xa8, 0xa0, 0xc4, 0xa3, 0x80, 0xb6, 0x92, 0x92, 0xad}; +unsigned char maxpool3x3s2_int8_out[] = { + 0xad, 0xae, 0xb1, 0xb6, 0xbc, 0xbc, 0xb8, 0xba, 0xcc, 0xb8, 0xb3, 0xb4, 0xbf, 0xd4, + 0xbc, 0xbc, 0xc0, 0xc0, 0xc9, 0xb9, 0xc8, 0xc8, 0xd4, 0xb6, 0xb8, 0xb9, 0xc1, 0xb3, + 0xd2, 0xd2, 0xa1, 0xb6, 0xb6, 0xc0, 0xc0, 0xc5, 0xc9, 0xb8, 0xc0, 0xc0, 0xb6, 0xca, + 0xc0, 0xcf, 0xcf, 0xc9, 0xb8, 0xc0, 0xc0, 0xaa, 0xca, 0xc4, 0xb6, 0xbe}; + +// [c h w]: [2 6 18] +unsigned char maxpool3x3s2_p1_fp32_in[] = { + 0x73, 0x36, 0x81, 0x40, 0xc0, 0x7e, 0x8b, 0x40, 0x6c, 0x36, 0x40, 0x40, 0xe5, 0x3b, 0x2c, 0x40, + 0x2e, 0x88, 0x89, 0x40, 0x4a, 0x15, 0x93, 0x40, 0x4e, 0xcf, 0x63, 0x40, 0xd6, 0x33, 0x1a, 0x3e, + 0x9d, 0xd6, 0x48, 0x40, 0x57, 0x9c, 0xe6, 0x3f, 0x11, 0x4e, 0x3b, 0x40, 0x5b, 0x66, 0xfb, 0x3f, + 0x8f, 0x13, 0x80, 0x3f, 0x1a, 0xcc, 0x3f, 0x40, 0xe4, 0xf4, 0x17, 0x40, 0x11, 0x75, 0x64, 0x40, + 0x31, 0x8f, 0x00, 0x40, 0x49, 0xcf, 0xc8, 0x3f, 0xe3, 0xa0, 0x5d, 0x40, 0x6f, 0xb0, 0x6a, 0x40, + 0xbb, 0x53, 0x18, 0x40, 0xce, 0xcb, 0x07, 0x40, 0x69, 0x85, 0x6e, 0x40, 0x60, 0x8d, 0xf4, 0x3f, + 0xcc, 0x9f, 0x38, 0x40, 0x43, 0x73, 0x63, 0x40, 0xf7, 0xa4, 0x0d, 0x40, 0x5b, 0x19, 0x8a, 0x40, + 0xb1, 0x52, 0xf4, 0x3f, 0x07, 0x2c, 0x34, 0x40, 0x39, 0x5a, 0x6e, 0x40, 0xe8, 0x0b, 0x36, 0x40, + 0x07, 0x2b, 0x77, 0x40, 0x09, 0xbc, 0x7c, 0x40, 0x72, 0xf3, 0x31, 0x40, 0x3d, 0xcc, 0x5b, 0x40, + 0xec, 0xc8, 0x29, 0x3f, 0xb0, 0xd9, 0xa5, 0x40, 0x15, 0x56, 0xb3, 0x3f, 0x2b, 0x1d, 0x41, 0x40, + 0x4a, 0xf5, 0x27, 0x40, 0xaf, 0xdf, 0x84, 0x40, 0x01, 0x0c, 0x5e, 0x40, 0xf2, 0xb8, 0x13, 0x40, + 0x59, 0xcd, 0xbd, 0x3f, 0xc2, 0xba, 0x58, 0x40, 0xc6, 0x5b, 0x53, 0x40, 0xf8, 0x63, 0x78, 0x40, + 0x62, 0xa0, 0x4b, 0x40, 0x1c, 0xa6, 0x31, 0x40, 0xf1, 0xae, 0x24, 0x40, 0x77, 0x88, 0x5e, 0x40, + 0xc8, 0xe7, 0xa9, 0x3f, 0xaf, 0xac, 0x79, 0x40, 0xee, 0x90, 0x5f, 0x40, 0xe5, 0x3c, 0x53, 0x40, + 0xb9, 0xb0, 0xf1, 0x3f, 0xf0, 0x40, 0x02, 0x40, 0x86, 0x4d, 0x62, 0x40, 0xf9, 0x05, 0x7a, 0x40, + 0xe2, 0x4a, 0x0d, 0x40, 0xa6, 0xb4, 0x90, 0x40, 0x5a, 0xc5, 0x28, 0x40, 0xd1, 0x59, 0x29, 0x40, + 0xbd, 0xc6, 0xe3, 0x3f, 0x89, 0x82, 0xda, 0x3f, 0x46, 0xc5, 0x0e, 0x40, 0x0c, 0x73, 0x28, 0x40, + 0xf9, 0x3e, 0x8d, 0x40, 0xaa, 0xd9, 0x5c, 0x40, 0xe3, 0x1e, 0x3b, 0x40, 0x8a, 0x0d, 0x31, 0x40, + 0x67, 0x24, 0x43, 0x40, 0x66, 0x6c, 0x56, 0x40, 0xdf, 0x85, 0x50, 0x40, 0x4d, 0x52, 0xf4, 0x3f, + 0xa8, 0x50, 0x2c, 0x40, 0x2a, 0xb2, 0xf6, 0x3f, 0x15, 0x70, 0x16, 0x40, 0x9a, 0xf8, 0x48, 0x40, + 0x59, 0x85, 0xd4, 0x3f, 0x34, 0x41, 0xe2, 0x3f, 0x79, 0x75, 0x08, 0x40, 0xed, 0x6d, 0x82, 0x40, + 0x23, 0xc6, 0x1f, 0x40, 0xcc, 0x13, 0x19, 0x40, 0x33, 0xa9, 0x1f, 0x40, 0x22, 0xcf, 0x8c, 0x40, + 0x69, 0x1a, 0x00, 0x40, 0x62, 0x92, 0x3b, 0x40, 0xb5, 0xae, 0x4b, 0x40, 0x4f, 0x7d, 0x43, 0x40, + 0x78, 0x14, 0x61, 0x40, 0xb9, 0xd9, 0x1d, 0x40, 0x17, 0x90, 0xf0, 0x3f, 0xa9, 0x16, 0x1d, 0x40, + 0x03, 0xff, 0x4b, 0x40, 0xbf, 0x3e, 0xbf, 0x3f, 0xd3, 0x03, 0x27, 0x40, 0xaa, 0xe7, 0x0f, 0x40, + 0xfd, 0x1a, 0x33, 0x40, 0x55, 0x86, 0xf4, 0x3f, 0xaf, 0x71, 0x85, 0x40, 0x02, 0x8d, 0x17, 0x40, + 0x08, 0xc9, 0x91, 0x40, 0x86, 0x07, 0x64, 0x40, 0x5d, 0x29, 0x80, 0x40, 0x8f, 0x62, 0x56, 0x40, + 0xd9, 0xae, 0x84, 0x40, 0xf3, 0xbd, 0x6e, 0x40, 0xb1, 0x82, 0xbe, 0x3f, 0xad, 0x09, 0x75, 0x40, + 0x36, 0x92, 0x2f, 0x40, 0xaf, 0xad, 0x09, 0x40, 0x75, 0x40, 0x2b, 0x40, 0x4b, 0xcf, 0x0c, 0x40, + 0x53, 0x72, 0x8e, 0x40, 0x06, 0x78, 0xf4, 0xbd, 0xd9, 0x24, 0x42, 0x40, 0xe6, 0x56, 0x21, 0x40, + 0x25, 0x9f, 0x54, 0x40, 0x8e, 0x01, 0xde, 0x3f, 0xc5, 0x90, 0x31, 0x40, 0x09, 0x4e, 0x31, 0x40, + 0xda, 0x42, 0x71, 0x40, 0xf7, 0x5b, 0x34, 0x40, 0x83, 0x06, 0x30, 0x40, 0xb8, 0x3f, 0x1a, 0x40, + 0x8d, 0x6a, 0x9a, 0x40, 0xca, 0xb5, 0x3d, 0x40, 0x60, 0x1f, 0x46, 0x40, 0xeb, 0x83, 0x6b, 0x40, + 0xda, 0xf6, 0x25, 0x40, 0x3a, 0xb1, 0x78, 0x40, 0x6a, 0x65, 0x66, 0x40, 0x1c, 0x2a, 0x9d, 0x40, + 0xc6, 0x12, 0x6e, 0x40, 0x62, 0xe5, 0x80, 0x40, 0x87, 0x69, 0x8f, 0x40, 0xcf, 0xca, 0x8b, 0x40, + 0xc8, 0x36, 0x82, 0x3f, 0xb9, 0x86, 0x5c, 0x40, 0x7a, 0x70, 0x38, 0x40, 0xf4, 0xbb, 0xd2, 0x3f, + 0x09, 0xb5, 0x6f, 0x40, 0xac, 0xaa, 0x0f, 0x40, 0x7b, 0x7e, 0x4d, 0x40, 0x80, 0x6a, 0x5e, 0x40, + 0xa6, 0x9c, 0x2b, 0x40, 0xe0, 0x1e, 0x58, 0x40, 0xc8, 0x54, 0xaf, 0x40, 0x08, 0xe9, 0x89, 0x40, + 0xac, 0xc1, 0x1f, 0x40, 0x17, 0x9b, 0x93, 0x40, 0x7c, 0x0c, 0x12, 0x40, 0xc9, 0xed, 0x19, 0x40, + 0x31, 0x70, 0x78, 0x40, 0x9b, 0x9b, 0x27, 0x40, 0x8e, 0x3f, 0x2a, 0x40, 0x6e, 0x31, 0xf5, 0x3f, + 0x93, 0x93, 0x40, 0x40, 0xc3, 0x7a, 0x43, 0x40, 0xea, 0x51, 0x6a, 0x40, 0x1d, 0x8d, 0x4e, 0x40, + 0x5d, 0x53, 0x48, 0x40, 0xb9, 0x1e, 0x8c, 0x40, 0xf0, 0x0a, 0x87, 0x40, 0x2d, 0xb2, 0x10, 0x40, + 0xa9, 0x36, 0x27, 0x40, 0xfd, 0xbe, 0xc2, 0x3f, 0x49, 0xdc, 0x9b, 0x40, 0x48, 0x75, 0x39, 0x40, + 0xba, 0x2a, 0x68, 0x40, 0xac, 0xdc, 0x3b, 0x40, 0x61, 0xbc, 0xad, 0x40, 0x41, 0xa1, 0x0e, 0x40, + 0xe4, 0x20, 0x4a, 0x40, 0x87, 0xed, 0x80, 0x40, 0x3f, 0x79, 0x21, 0x40, 0xa2, 0x32, 0xad, 0x3f, + 0xfc, 0x40, 0x40, 0x40, 0x44, 0xd3, 0x09, 0x40, 0xe9, 0x6a, 0x6d, 0x40, 0xb4, 0x9c, 0x6f, 0x40, + 0x45, 0xc8, 0x7c, 0x40, 0xb8, 0x68, 0x95, 0x40, 0xef, 0x44, 0xa3, 0x40, 0xbd, 0x76, 0x3a, 0x40, + 0x60, 0x24, 0x10, 0x40, 0xb9, 0x00, 0x0e, 0x40, 0x8a, 0xef, 0x61, 0x40, 0x8c, 0x4b, 0x6a, 0x40, + 0xff, 0x2c, 0x1c, 0x40, 0x49, 0x15, 0x59, 0x40, 0x7f, 0x8f, 0x25, 0x40, 0xeb, 0x1c, 0x14, 0x40, + 0xad, 0x07, 0x26, 0x40, 0x70, 0xf8, 0xa6, 0x40, 0xc1, 0x6c, 0x4c, 0x40, 0x76, 0x20, 0x21, 0x40, + 0x91, 0x1c, 0x39, 0x40, 0xdd, 0xa6, 0xa7, 0x40, 0x1d, 0x99, 0x54, 0x40, 0x37, 0x22, 0x80, 0x40, + 0x6a, 0x5d, 0x2c, 0x40, 0x8d, 0x81, 0x65, 0x40, 0x40, 0x95, 0x7b, 0x40, 0x94, 0xcb, 0x49, 0x40, + 0x94, 0x98, 0x3d, 0x40, 0x5b, 0xb8, 0x64, 0x3f, 0xb7, 0xee, 0x1c, 0x40, 0x27, 0xb1, 0x63, 0x40, + 0x26, 0x3e, 0xd6, 0x3f, 0x47, 0x61, 0x2a, 0x40, 0xc8, 0x11, 0x05, 0x40, 0x88, 0x0a, 0x43, 0x40}; +// [c h w]: [2 3 9] +unsigned char maxpool3x3s2_p1_fp32_out[] = { + 0xc0, 0x7e, 0x8b, 0x40, 0xc0, 0x7e, 0x8b, 0x40, 0x4a, 0x15, 0x93, 0x40, 0x4a, 0x15, 0x93, 0x40, + 0x5b, 0x19, 0x8a, 0x40, 0x5b, 0x19, 0x8a, 0x40, 0x39, 0x5a, 0x6e, 0x40, 0x09, 0xbc, 0x7c, 0x40, + 0x09, 0xbc, 0x7c, 0x40, 0xb0, 0xd9, 0xa5, 0x40, 0xb0, 0xd9, 0xa5, 0x40, 0xaf, 0xdf, 0x84, 0x40, + 0xa6, 0xb4, 0x90, 0x40, 0xa6, 0xb4, 0x90, 0x40, 0x5b, 0x19, 0x8a, 0x40, 0xf8, 0x63, 0x78, 0x40, + 0xf9, 0x3e, 0x8d, 0x40, 0x09, 0xbc, 0x7c, 0x40, 0xee, 0x90, 0x5f, 0x40, 0x78, 0x14, 0x61, 0x40, + 0xf9, 0x05, 0x7a, 0x40, 0xa6, 0xb4, 0x90, 0x40, 0xa6, 0xb4, 0x90, 0x40, 0xed, 0x6d, 0x82, 0x40, + 0xaf, 0x71, 0x85, 0x40, 0x08, 0xc9, 0x91, 0x40, 0x22, 0xcf, 0x8c, 0x40, 0xd9, 0xae, 0x84, 0x40, + 0x8d, 0x6a, 0x9a, 0x40, 0xad, 0x09, 0x75, 0x40, 0x3a, 0xb1, 0x78, 0x40, 0x1c, 0x2a, 0x9d, 0x40, + 0x1c, 0x2a, 0x9d, 0x40, 0x87, 0x69, 0x8f, 0x40, 0xcf, 0xca, 0x8b, 0x40, 0xda, 0x42, 0x71, 0x40, + 0x09, 0xb5, 0x6f, 0x40, 0x8d, 0x6a, 0x9a, 0x40, 0xb9, 0x1e, 0x8c, 0x40, 0xc8, 0x54, 0xaf, 0x40, + 0x1c, 0x2a, 0x9d, 0x40, 0x1c, 0x2a, 0x9d, 0x40, 0x61, 0xbc, 0xad, 0x40, 0xcf, 0xca, 0x8b, 0x40, + 0x87, 0xed, 0x80, 0x40, 0xea, 0x51, 0x6a, 0x40, 0xdd, 0xa6, 0xa7, 0x40, 0xdd, 0xa6, 0xa7, 0x40, + 0xef, 0x44, 0xa3, 0x40, 0x49, 0xdc, 0x9b, 0x40, 0x8c, 0x4b, 0x6a, 0x40, 0x61, 0xbc, 0xad, 0x40, + 0x87, 0xed, 0x80, 0x40, 0x70, 0xf8, 0xa6, 0x40}; +unsigned char maxpool3x3s2_p1_fp16_in[] = { + 0x09, 0x44, 0x5b, 0x44, 0x01, 0x42, 0x61, 0x41, 0x4c, 0x44, 0x98, 0x44, 0x1e, 0x43, 0xd1, 0x30, + 0x46, 0x42, 0x34, 0x3f, 0xda, 0x41, 0xdb, 0x3f, 0x00, 0x3c, 0xfe, 0x41, 0xbf, 0x40, 0x23, 0x43, + 0x04, 0x40, 0x46, 0x3e, 0xed, 0x42, 0x55, 0x43, 0xc2, 0x40, 0x3e, 0x40, 0x74, 0x43, 0xa4, 0x3f, + 0xc4, 0x41, 0x1b, 0x43, 0x6d, 0x40, 0x50, 0x44, 0xa2, 0x3f, 0xa1, 0x41, 0x72, 0x43, 0xb0, 0x41, + 0xb9, 0x43, 0xe5, 0x43, 0x8f, 0x41, 0xde, 0x42, 0x4e, 0x39, 0x2e, 0x45, 0x9a, 0x3d, 0x08, 0x42, + 0x3f, 0x41, 0x26, 0x44, 0xf0, 0x42, 0x9d, 0x40, 0xee, 0x3d, 0xc5, 0x42, 0x9a, 0x42, 0xc3, 0x43, + 0x5d, 0x42, 0x8d, 0x41, 0x25, 0x41, 0xf4, 0x42, 0x4f, 0x3d, 0xcd, 0x43, 0xfc, 0x42, 0x99, 0x42, + 0x8d, 0x3f, 0x12, 0x40, 0x12, 0x43, 0xd0, 0x43, 0x6a, 0x40, 0x85, 0x44, 0x46, 0x41, 0x4a, 0x41, + 0x1e, 0x3f, 0xd4, 0x3e, 0x76, 0x40, 0x43, 0x41, 0x69, 0x44, 0xe6, 0x42, 0xd8, 0x41, 0x88, 0x41, + 0x19, 0x42, 0xb3, 0x42, 0x84, 0x42, 0xa2, 0x3f, 0x62, 0x41, 0xb5, 0x3f, 0xb3, 0x40, 0x47, 0x42, + 0xa4, 0x3e, 0x12, 0x3f, 0x43, 0x40, 0x13, 0x44, 0xfe, 0x40, 0xc8, 0x40, 0xfd, 0x40, 0x66, 0x44, + 0x00, 0x40, 0xdc, 0x41, 0x5d, 0x42, 0x1b, 0x42, 0x08, 0x43, 0xee, 0x40, 0x84, 0x3f, 0xe8, 0x40, + 0x5f, 0x42, 0xf9, 0x3d, 0x38, 0x41, 0x7f, 0x40, 0x98, 0x41, 0xa4, 0x3f, 0x2b, 0x44, 0xbc, 0x40, + 0x8e, 0x44, 0x20, 0x43, 0x01, 0x44, 0xb3, 0x42, 0x25, 0x44, 0x75, 0x43, 0xf4, 0x3d, 0xa8, 0x43, + 0x7c, 0x41, 0x4d, 0x40, 0x5a, 0x41, 0x66, 0x40, 0x73, 0x44, 0xa3, 0xaf, 0x11, 0x42, 0x0a, 0x41, + 0xa4, 0x42, 0xf0, 0x3e, 0x8c, 0x41, 0x8a, 0x41, 0x8a, 0x43, 0xa2, 0x41, 0x80, 0x41, 0xd1, 0x40, + 0xd3, 0x44, 0xed, 0x41, 0x30, 0x42, 0x5c, 0x43, 0x2f, 0x41, 0xc5, 0x43, 0x33, 0x43, 0xe9, 0x44, + 0x70, 0x43, 0x07, 0x44, 0x7b, 0x44, 0x5e, 0x44, 0x11, 0x3c, 0xe4, 0x42, 0xc3, 0x41, 0x95, 0x3e, + 0x7d, 0x43, 0x7d, 0x40, 0x6b, 0x42, 0xf3, 0x42, 0x5c, 0x41, 0xc0, 0x42, 0x7a, 0x45, 0x4f, 0x44, + 0xfe, 0x40, 0x9c, 0x44, 0x90, 0x40, 0xcf, 0x40, 0xc3, 0x43, 0x3c, 0x41, 0x51, 0x41, 0xa9, 0x3f, + 0x04, 0x42, 0x1b, 0x42, 0x52, 0x43, 0x74, 0x42, 0x42, 0x42, 0x60, 0x44, 0x38, 0x44, 0x85, 0x40, + 0x39, 0x41, 0x15, 0x3e, 0xde, 0x44, 0xcb, 0x41, 0x41, 0x43, 0xde, 0x41, 0x6d, 0x45, 0x75, 0x40, + 0x51, 0x42, 0x07, 0x44, 0x0b, 0x41, 0x69, 0x3d, 0x02, 0x42, 0x4e, 0x40, 0x6b, 0x43, 0x7c, 0x43, + 0xe6, 0x43, 0xab, 0x44, 0x1a, 0x45, 0xd3, 0x41, 0x81, 0x40, 0x70, 0x40, 0x0f, 0x43, 0x52, 0x43, + 0xe1, 0x40, 0xc8, 0x42, 0x2c, 0x41, 0xa0, 0x40, 0x30, 0x41, 0x37, 0x45, 0x63, 0x42, 0x09, 0x41, + 0xc8, 0x41, 0x3d, 0x45, 0xa4, 0x42, 0x01, 0x44, 0x62, 0x41, 0x2c, 0x43, 0xdc, 0x43, 0x4e, 0x42, + 0xec, 0x41, 0x25, 0x3b, 0xe7, 0x40, 0x1d, 0x43, 0xb1, 0x3e, 0x53, 0x41, 0x28, 0x40, 0x18, 0x42}; +unsigned char maxpool3x3s2_p1_fp16_out[] = { + 0x5b, 0x44, 0x5b, 0x44, 0x98, 0x44, 0x98, 0x44, 0x50, 0x44, 0x50, 0x44, 0x72, 0x43, 0xe5, 0x43, + 0xe5, 0x43, 0x2e, 0x45, 0x2e, 0x45, 0x26, 0x44, 0x85, 0x44, 0x85, 0x44, 0x50, 0x44, 0xc3, 0x43, + 0x69, 0x44, 0xe5, 0x43, 0xfc, 0x42, 0x08, 0x43, 0xd0, 0x43, 0x85, 0x44, 0x85, 0x44, 0x13, 0x44, + 0x2b, 0x44, 0x8e, 0x44, 0x66, 0x44, 0x25, 0x44, 0xd3, 0x44, 0xa8, 0x43, 0xc5, 0x43, 0xe9, 0x44, + 0xe9, 0x44, 0x7b, 0x44, 0x5e, 0x44, 0x8a, 0x43, 0x7d, 0x43, 0xd3, 0x44, 0x60, 0x44, 0x7a, 0x45, + 0xe9, 0x44, 0xe9, 0x44, 0x6d, 0x45, 0x5e, 0x44, 0x07, 0x44, 0x52, 0x43, 0x3d, 0x45, 0x3d, 0x45, + 0x1a, 0x45, 0xde, 0x44, 0x52, 0x43, 0x6d, 0x45, 0x07, 0x44, 0x37, 0x45}; +unsigned char maxpool3x3s2_p1_int8_in[] = { + 0x5e, 0x65, 0x46, 0x3f, 0x64, 0x6b, 0x53, 0x04, 0x49, 0x2a, 0x44, 0x2e, 0x17, 0x46, 0x37, 0x53, + 0x2f, 0x25, 0x51, 0x55, 0x37, 0x31, 0x57, 0x2c, 0x43, 0x53, 0x34, 0x64, 0x2c, 0x42, 0x57, 0x42, + 0x5a, 0x5c, 0x41, 0x50, 0x0f, 0x79, 0x21, 0x46, 0x3d, 0x61, 0x51, 0x36, 0x23, 0x4f, 0x4d, 0x5a, + 0x4a, 0x41, 0x3c, 0x51, 0x1f, 0x5b, 0x51, 0x4d, 0x2c, 0x2f, 0x52, 0x5b, 0x33, 0x69, 0x3d, 0x3e, + 0x29, 0x28, 0x34, 0x3d, 0x67, 0x50, 0x44, 0x40, 0x47, 0x4e, 0x4c, 0x2c, 0x3f, 0x2d, 0x37, 0x49, + 0x27, 0x29, 0x32, 0x5f, 0x3a, 0x38, 0x3a, 0x66, 0x2f, 0x44, 0x4a, 0x47, 0x52, 0x39, 0x2c, 0x39, + 0x4a, 0x23, 0x3d, 0x34, 0x41, 0x2c, 0x61, 0x37, 0x6a, 0x53, 0x5d, 0x4e, 0x60, 0x57, 0x23, 0x59, + 0x40, 0x32, 0x3e, 0x33, 0x68, 0xfd, 0x47, 0x3b, 0x4d, 0x28, 0x41, 0x40, 0x58, 0x42, 0x40, 0x38, + 0x70, 0x45, 0x48, 0x56, 0x3c, 0x5a, 0x54, 0x72, 0x57, 0x5e, 0x68, 0x66, 0x18, 0x50, 0x43, 0x26, + 0x57, 0x34, 0x4b, 0x51, 0x3e, 0x4f, 0x7f, 0x64, 0x3a, 0x6b, 0x35, 0x38, 0x5a, 0x3d, 0x3e, 0x2d, + 0x46, 0x47, 0x55, 0x4b, 0x49, 0x66, 0x62, 0x35, 0x3d, 0x23, 0x71, 0x43, 0x54, 0x44, 0x7e, 0x34, + 0x49, 0x5e, 0x3b, 0x1f, 0x46, 0x32, 0x56, 0x57, 0x5c, 0x6d, 0x77, 0x44, 0x34, 0x34, 0x52, 0x55, + 0x39, 0x4f, 0x3c, 0x36, 0x3c, 0x79, 0x4a, 0x3b, 0x43, 0x7a, 0x4d, 0x5d, 0x3f, 0x53, 0x5b, 0x49, + 0x45, 0x15, 0x39, 0x53, 0x27, 0x3e, 0x30, 0x47}; +unsigned char maxpool3x3s2_p1_int8_out[] = { + 0x65, 0x65, 0x6b, 0x6b, 0x64, 0x64, 0x57, 0x5c, 0x5c, 0x79, 0x79, 0x61, 0x69, 0x69, + 0x64, 0x5a, 0x67, 0x5c, 0x51, 0x52, 0x5b, 0x69, 0x69, 0x5f, 0x61, 0x6a, 0x66, 0x60, + 0x70, 0x59, 0x5a, 0x72, 0x72, 0x68, 0x66, 0x58, 0x57, 0x70, 0x66, 0x7f, 0x72, 0x72, + 0x7e, 0x66, 0x5e, 0x55, 0x7a, 0x7a, 0x77, 0x71, 0x55, 0x7e, 0x5e, 0x79}; + +// [c h w]: [2 3 10] +unsigned char maxpool3x3s1_p1_fp32_in[] = { + 0xec, 0xfb, 0x1f, 0x40, 0xae, 0x07, 0x1c, 0x40, 0xaf, 0x4e, 0x32, 0x40, 0x23, 0x27, 0x35, 0x40, + 0x45, 0x0f, 0x17, 0x40, 0x44, 0x6a, 0xdf, 0x3f, 0x0e, 0xd8, 0x6d, 0x40, 0x3a, 0x88, 0x2d, 0x40, + 0xa2, 0xcc, 0x31, 0x40, 0xfe, 0x32, 0xca, 0x3f, 0x5c, 0x8f, 0x40, 0x40, 0xaa, 0x3d, 0x14, 0x40, + 0xe5, 0xb0, 0x83, 0x40, 0x4d, 0x95, 0x9a, 0x40, 0x1c, 0xc2, 0x51, 0x40, 0x88, 0x5c, 0x89, 0x40, + 0x26, 0x76, 0xb0, 0x3f, 0x78, 0x51, 0xa1, 0x3f, 0xfc, 0x6d, 0x88, 0x40, 0x74, 0x62, 0x6b, 0x40, + 0xe5, 0xa3, 0x0b, 0x40, 0xce, 0x1f, 0x71, 0x3f, 0x48, 0xcb, 0xfb, 0x3f, 0xbd, 0xe8, 0x76, 0x40, + 0xe0, 0xc2, 0xf3, 0x3f, 0x06, 0xe4, 0x3f, 0x3f, 0x3c, 0xd9, 0x4f, 0x40, 0x62, 0xf4, 0x81, 0x40, + 0x9d, 0xfd, 0xeb, 0x3f, 0xa2, 0xb0, 0x8f, 0x40, 0xbf, 0x88, 0xa5, 0x40, 0x8c, 0xe2, 0x3e, 0x40, + 0x62, 0x28, 0x42, 0x40, 0x19, 0x42, 0x47, 0x40, 0x34, 0xa5, 0x82, 0x40, 0xdc, 0x64, 0xf5, 0x3f, + 0x94, 0x23, 0x5d, 0x40, 0xb9, 0xc2, 0x48, 0x40, 0xa3, 0xc2, 0x77, 0x40, 0x68, 0xb4, 0x52, 0x40, + 0x9b, 0x35, 0x89, 0x40, 0xac, 0x39, 0x00, 0x40, 0xfe, 0xa0, 0x47, 0x40, 0xe0, 0xf9, 0xf8, 0x3f, + 0x65, 0xf4, 0xae, 0x3f, 0xad, 0x86, 0xa0, 0x40, 0xed, 0x0e, 0xf2, 0x3f, 0xed, 0xee, 0x52, 0x40, + 0x3f, 0xd6, 0x5b, 0x40, 0x18, 0x23, 0x20, 0x40, 0x63, 0x52, 0x4e, 0x40, 0xb7, 0xf9, 0x0a, 0x40, + 0x89, 0xcb, 0x7f, 0x40, 0x19, 0xb9, 0x4e, 0x40, 0xcd, 0x85, 0x42, 0x40, 0x82, 0xd9, 0x10, 0x40, + 0xee, 0x20, 0x8d, 0x40, 0x9c, 0xbf, 0xdf, 0x3f, 0xdf, 0x76, 0xa6, 0x3f, 0xfa, 0x15, 0x0a, 0x40}; +// [c h w]: [2 3 10] +unsigned char maxpool3x3s1_p1_fp32_out[] = { + 0x5c, 0x8f, 0x40, 0x40, 0xe5, 0xb0, 0x83, 0x40, 0x4d, 0x95, 0x9a, 0x40, 0x4d, 0x95, 0x9a, 0x40, + 0x4d, 0x95, 0x9a, 0x40, 0x88, 0x5c, 0x89, 0x40, 0x88, 0x5c, 0x89, 0x40, 0xfc, 0x6d, 0x88, 0x40, + 0xfc, 0x6d, 0x88, 0x40, 0xfc, 0x6d, 0x88, 0x40, 0x5c, 0x8f, 0x40, 0x40, 0xe5, 0xb0, 0x83, 0x40, + 0x4d, 0x95, 0x9a, 0x40, 0x4d, 0x95, 0x9a, 0x40, 0x4d, 0x95, 0x9a, 0x40, 0x88, 0x5c, 0x89, 0x40, + 0x88, 0x5c, 0x89, 0x40, 0xfc, 0x6d, 0x88, 0x40, 0xa2, 0xb0, 0x8f, 0x40, 0xa2, 0xb0, 0x8f, 0x40, + 0x5c, 0x8f, 0x40, 0x40, 0xe5, 0xb0, 0x83, 0x40, 0x4d, 0x95, 0x9a, 0x40, 0x4d, 0x95, 0x9a, 0x40, + 0x4d, 0x95, 0x9a, 0x40, 0x88, 0x5c, 0x89, 0x40, 0x88, 0x5c, 0x89, 0x40, 0xfc, 0x6d, 0x88, 0x40, + 0xa2, 0xb0, 0x8f, 0x40, 0xa2, 0xb0, 0x8f, 0x40, 0xbf, 0x88, 0xa5, 0x40, 0xbf, 0x88, 0xa5, 0x40, + 0xfe, 0xa0, 0x47, 0x40, 0x34, 0xa5, 0x82, 0x40, 0xad, 0x86, 0xa0, 0x40, 0xad, 0x86, 0xa0, 0x40, + 0xad, 0x86, 0xa0, 0x40, 0xa3, 0xc2, 0x77, 0x40, 0xa3, 0xc2, 0x77, 0x40, 0xa3, 0xc2, 0x77, 0x40, + 0xbf, 0x88, 0xa5, 0x40, 0xbf, 0x88, 0xa5, 0x40, 0x89, 0xcb, 0x7f, 0x40, 0x34, 0xa5, 0x82, 0x40, + 0xad, 0x86, 0xa0, 0x40, 0xad, 0x86, 0xa0, 0x40, 0xad, 0x86, 0xa0, 0x40, 0xee, 0x20, 0x8d, 0x40, + 0xa3, 0xc2, 0x77, 0x40, 0xa3, 0xc2, 0x77, 0x40, 0x9b, 0x35, 0x89, 0x40, 0x9b, 0x35, 0x89, 0x40, + 0x89, 0xcb, 0x7f, 0x40, 0x89, 0xcb, 0x7f, 0x40, 0xad, 0x86, 0xa0, 0x40, 0xad, 0x86, 0xa0, 0x40, + 0xad, 0x86, 0xa0, 0x40, 0xee, 0x20, 0x8d, 0x40, 0x3f, 0xd6, 0x5b, 0x40, 0x3f, 0xd6, 0x5b, 0x40}; +unsigned char maxpool3x3s1_p1_fp16_in[] = { + 0xff, 0x40, 0xe0, 0x40, 0x92, 0x41, 0xa9, 0x41, 0xb8, 0x40, 0xfb, 0x3e, 0x6e, 0x43, 0x6c, + 0x41, 0x8e, 0x41, 0x51, 0x3e, 0x04, 0x42, 0xa1, 0x40, 0x1d, 0x44, 0xd4, 0x44, 0x8e, 0x42, + 0x4a, 0x44, 0x83, 0x3d, 0x0a, 0x3d, 0x43, 0x44, 0x5b, 0x43, 0x5d, 0x40, 0x88, 0x3b, 0xde, + 0x3f, 0xb7, 0x43, 0x9e, 0x3f, 0xff, 0x39, 0x7e, 0x42, 0x0f, 0x44, 0x5f, 0x3f, 0x7d, 0x44, + 0x2c, 0x45, 0xf7, 0x41, 0x11, 0x42, 0x3a, 0x42, 0x15, 0x44, 0xab, 0x3f, 0xe9, 0x42, 0x46, + 0x42, 0xbe, 0x43, 0x95, 0x42, 0x49, 0x44, 0x01, 0x40, 0x3d, 0x42, 0xc7, 0x3f, 0x77, 0x3d, + 0x04, 0x45, 0x90, 0x3f, 0x97, 0x42, 0xde, 0x42, 0x01, 0x41, 0x72, 0x42, 0x57, 0x40, 0xfe, + 0x43, 0x75, 0x42, 0x14, 0x42, 0x86, 0x40, 0x69, 0x44, 0xfd, 0x3e, 0x33, 0x3d, 0x50, 0x40}; +unsigned char maxpool3x3s1_p1_fp16_out[] = { + 0x04, 0x42, 0x1d, 0x44, 0xd4, 0x44, 0xd4, 0x44, 0xd4, 0x44, 0x4a, 0x44, 0x4a, 0x44, 0x43, + 0x44, 0x43, 0x44, 0x43, 0x44, 0x04, 0x42, 0x1d, 0x44, 0xd4, 0x44, 0xd4, 0x44, 0xd4, 0x44, + 0x4a, 0x44, 0x4a, 0x44, 0x43, 0x44, 0x7d, 0x44, 0x7d, 0x44, 0x04, 0x42, 0x1d, 0x44, 0xd4, + 0x44, 0xd4, 0x44, 0xd4, 0x44, 0x4a, 0x44, 0x4a, 0x44, 0x43, 0x44, 0x7d, 0x44, 0x7d, 0x44, + 0x2c, 0x45, 0x2c, 0x45, 0x3d, 0x42, 0x15, 0x44, 0x04, 0x45, 0x04, 0x45, 0x04, 0x45, 0xbe, + 0x43, 0xbe, 0x43, 0xbe, 0x43, 0x2c, 0x45, 0x2c, 0x45, 0xfe, 0x43, 0x15, 0x44, 0x04, 0x45, + 0x04, 0x45, 0x04, 0x45, 0x69, 0x44, 0xbe, 0x43, 0xbe, 0x43, 0x49, 0x44, 0x49, 0x44, 0xfe, + 0x43, 0xfe, 0x43, 0x04, 0x45, 0x04, 0x45, 0x04, 0x45, 0x69, 0x44, 0xde, 0x42, 0xde, 0x42}; +unsigned char maxpool3x3s1_p1_int8_in[] = { + 0x3e, 0x3c, 0x45, 0x46, 0x3a, 0x2b, 0x5c, 0x43, 0x44, 0x27, 0x4a, 0x39, 0x65, 0x77, 0x51, + 0x6a, 0x22, 0x1f, 0x69, 0x5b, 0x36, 0x17, 0x30, 0x5f, 0x2f, 0x12, 0x50, 0x64, 0x2d, 0x6f, + 0x7f, 0x4a, 0x4b, 0x4d, 0x65, 0x2f, 0x55, 0x4d, 0x5f, 0x51, 0x6a, 0x31, 0x4d, 0x30, 0x22, + 0x7c, 0x2f, 0x51, 0x55, 0x3e, 0x4f, 0x36, 0x63, 0x50, 0x4b, 0x38, 0x6d, 0x2b, 0x20, 0x35}; +unsigned char maxpool3x3s1_p1_int8_out[] = { + 0x4a, 0x65, 0x77, 0x77, 0x77, 0x6a, 0x6a, 0x69, 0x69, 0x69, 0x4a, 0x65, 0x77, 0x77, 0x77, + 0x6a, 0x6a, 0x69, 0x6f, 0x6f, 0x4a, 0x65, 0x77, 0x77, 0x77, 0x6a, 0x6a, 0x69, 0x6f, 0x6f, + 0x7f, 0x7f, 0x4d, 0x65, 0x7c, 0x7c, 0x7c, 0x5f, 0x5f, 0x5f, 0x7f, 0x7f, 0x63, 0x65, 0x7c, + 0x7c, 0x7c, 0x6d, 0x5f, 0x5f, 0x6a, 0x6a, 0x63, 0x63, 0x7c, 0x7c, 0x7c, 0x6d, 0x55, 0x55}; + +// [c h w]: [3 7 7] +unsigned char global_maxpool_fp32_in[] = { + 0xd9, 0x1b, 0xb4, 0xc0, 0x9d, 0xd5, 0x9f, 0xc0, 0x60, 0xa9, 0xa4, 0xc0, 0x6d, 0x54, 0xbf, 0xc0, + 0x93, 0x12, 0xba, 0xc0, 0x09, 0xe2, 0xd4, 0xc0, 0x66, 0x63, 0xba, 0xc0, 0x75, 0x5d, 0x9c, 0xc0, + 0x37, 0xbb, 0xef, 0xc0, 0x3a, 0x6b, 0xcf, 0xc0, 0x1e, 0x4a, 0xaf, 0xc0, 0x99, 0x5d, 0xde, 0xc0, + 0x7f, 0xb8, 0xbc, 0xc0, 0xfc, 0xae, 0xac, 0xc0, 0x97, 0xe7, 0x85, 0xc0, 0xe6, 0x77, 0xef, 0xc0, + 0x6e, 0x40, 0xad, 0xc0, 0x91, 0xb6, 0xac, 0xc0, 0xf6, 0xea, 0x02, 0xc1, 0xd8, 0x16, 0xd2, 0xc0, + 0x57, 0xcb, 0xba, 0xc0, 0x97, 0x91, 0xcf, 0xc0, 0x49, 0xa2, 0x95, 0xc0, 0xb6, 0x3d, 0xab, 0xc0, + 0x16, 0x67, 0xd1, 0xc0, 0xb3, 0xba, 0xa9, 0xc0, 0xf0, 0x6e, 0xae, 0xc0, 0x2f, 0x2f, 0x01, 0xc1, + 0x47, 0x47, 0x96, 0xc0, 0xdf, 0x36, 0xc2, 0xc0, 0x6c, 0xa5, 0xd2, 0xc0, 0x28, 0x49, 0xcd, 0xc0, + 0x85, 0xb4, 0xbb, 0xc0, 0xaf, 0xd5, 0xe4, 0xc0, 0xac, 0x16, 0x90, 0xc0, 0xa2, 0x8a, 0xa8, 0xc0, + 0x88, 0x36, 0xb0, 0xc0, 0x0f, 0x3a, 0xc1, 0xc0, 0x6e, 0x67, 0xef, 0xc0, 0x37, 0x3d, 0x9c, 0xc0, + 0xb4, 0x7f, 0xb7, 0xc0, 0x94, 0x62, 0xbe, 0xc0, 0x19, 0xb1, 0xc5, 0xc0, 0x3b, 0x37, 0xdf, 0xc0, + 0xd8, 0x41, 0x99, 0xc0, 0x33, 0x1e, 0x01, 0xc1, 0x23, 0x13, 0x91, 0xc0, 0x98, 0x72, 0xa0, 0xc0, + 0xd4, 0xa6, 0xd7, 0xc0, 0x66, 0xf3, 0xa6, 0xc0, 0x2b, 0x48, 0x9a, 0xc0, 0x23, 0x92, 0xba, 0xc0, + 0x3a, 0xe4, 0xa0, 0xc0, 0x41, 0xbe, 0x9b, 0xc0, 0xef, 0x5a, 0x9f, 0xc0, 0x2d, 0x7f, 0xb2, 0xc0, + 0xcc, 0xfa, 0xe9, 0xc0, 0x30, 0x09, 0xb2, 0xc0, 0x15, 0xee, 0xcb, 0xc0, 0xf8, 0x98, 0xac, 0xc0, + 0xdc, 0xed, 0xda, 0xc0, 0x4a, 0xaf, 0x06, 0xc1, 0x76, 0xb5, 0xd7, 0xc0, 0x67, 0x32, 0xe8, 0xc0, + 0xd1, 0x3a, 0xb9, 0xc0, 0x45, 0x43, 0xe3, 0xc0, 0xcd, 0xa6, 0xb5, 0xc0, 0x1e, 0x9f, 0xcd, 0xc0, + 0xac, 0x15, 0xc8, 0xc0, 0xae, 0x07, 0xcf, 0xc0, 0x59, 0xae, 0xaa, 0xc0, 0x6c, 0xa3, 0xd1, 0xc0, + 0x2d, 0x1f, 0xca, 0xc0, 0x01, 0xf2, 0xc9, 0xc0, 0xc5, 0xf9, 0xd2, 0xc0, 0xa5, 0x02, 0xa9, 0xc0, + 0x52, 0xae, 0x55, 0xc0, 0x8a, 0xa2, 0xbd, 0xc0, 0xb0, 0xe1, 0xaf, 0xc0, 0x60, 0xd4, 0xa7, 0xc0, + 0xea, 0x6d, 0x92, 0xc0, 0xa6, 0xf8, 0xb9, 0xc0, 0xd7, 0x84, 0xc7, 0xc0, 0x15, 0x49, 0xa2, 0xc0, + 0x30, 0xec, 0xde, 0xc0, 0x6a, 0x83, 0x93, 0xc0, 0x4f, 0x38, 0xcd, 0xc0, 0x8e, 0xd8, 0xe9, 0xc0, + 0x19, 0x2c, 0xe0, 0xc0, 0xf0, 0x74, 0x06, 0xc1, 0x5b, 0xf0, 0x9e, 0xc0, 0x56, 0x55, 0x9e, 0xc0, + 0x74, 0x07, 0xc1, 0xc0, 0x94, 0xf6, 0xfd, 0xc0, 0x4f, 0xba, 0xbe, 0xc0, 0x30, 0x79, 0xb9, 0xc0, + 0x6b, 0x29, 0xd5, 0xc0, 0x7e, 0xd7, 0xb6, 0xc0, 0xd1, 0xd3, 0x8b, 0xc0, 0x05, 0xf1, 0xe3, 0xc0, + 0xb5, 0xf7, 0x99, 0xc0, 0x4f, 0x9e, 0xbf, 0xc0, 0x4a, 0x12, 0xb9, 0xc0, 0x67, 0x78, 0x98, 0xc0, + 0xee, 0xd3, 0xf8, 0xc0, 0xcd, 0x39, 0x9b, 0xc0, 0xd1, 0x85, 0xc6, 0xc0, 0x62, 0x0f, 0xc8, 0xc0, + 0x3f, 0xc9, 0xbb, 0xc0, 0xd7, 0xff, 0xdb, 0xc0, 0xa4, 0xba, 0xbd, 0xc0, 0x58, 0x80, 0xab, 0xc0, + 0x20, 0xec, 0x8b, 0xc0, 0x2d, 0xc8, 0xd5, 0xc0, 0x1d, 0xe0, 0xef, 0xc0, 0xa7, 0xd0, 0xc9, 0xc0, + 0x8b, 0xc2, 0xbb, 0xc0, 0xfb, 0xf8, 0xba, 0xc0, 0xfd, 0xb3, 0x95, 0xc0, 0x28, 0xa4, 0xac, 0xc0, + 0x3c, 0x15, 0x9f, 0xc0, 0x13, 0x9b, 0xd6, 0xc0, 0xc4, 0x38, 0xac, 0xc0, 0xa4, 0x50, 0xbb, 0xc0, + 0xe6, 0x9f, 0xb7, 0xc0, 0xf3, 0x5a, 0xca, 0xc0, 0x47, 0x4e, 0x08, 0xc1, 0x9c, 0x2d, 0xf3, 0xc0, + 0xe1, 0x90, 0xb3, 0xc0, 0xa4, 0xa7, 0xb5, 0xc0, 0x45, 0x04, 0x90, 0xc0, 0x33, 0xa8, 0xca, 0xc0, + 0x29, 0x17, 0xc8, 0xc0, 0xe7, 0xd7, 0xb5, 0xc0, 0x1d, 0x19, 0xc3, 0xc0, 0x5f, 0x23, 0xd3, 0xc0, + 0x16, 0x41, 0xdc, 0xc0, 0x51, 0x45, 0xcb, 0xc0, 0x76, 0xcc, 0xde, 0xc0, 0xc8, 0x9f, 0x06, 0xc1, + 0xa4, 0xaf, 0xa6, 0xc0, 0x34, 0xed, 0xc9, 0xc0, 0x2e, 0x14, 0xd1, 0xc0, 0x2a, 0x2e, 0xe0, 0xc0, + 0xc7, 0x3d, 0x9b, 0xc0, 0x8b, 0x04, 0xf5, 0xc0, 0x2d, 0x22, 0xa3, 0xc0}; +// [c h w]: [3 1 1] +unsigned char global_maxpool_fp32_out[] = {0x97, 0xe7, 0x85, 0xc0, 0x52, 0xae, + 0x55, 0xc0, 0xd1, 0xd3, 0x8b, 0xc0}; +unsigned char global_maxpool_fp16_in[] = { + 0xa0, 0xc5, 0xfe, 0xc4, 0x25, 0xc5, 0xfa, 0xc5, 0xd0, 0xc5, 0xa7, 0xc6, 0xd3, 0xc5, 0xe2, 0xc4, + 0x7d, 0xc7, 0x7b, 0xc6, 0x7a, 0xc5, 0xf2, 0xc6, 0xe5, 0xc5, 0x65, 0xc5, 0x2f, 0xc4, 0x7b, 0xc7, + 0x6a, 0xc5, 0x65, 0xc5, 0x17, 0xc8, 0x90, 0xc6, 0xd6, 0xc5, 0x7c, 0xc6, 0xad, 0xc4, 0x59, 0xc5, + 0x8b, 0xc6, 0x4d, 0xc5, 0x73, 0xc5, 0x09, 0xc8, 0xb2, 0xc4, 0x11, 0xc6, 0x95, 0xc6, 0x6a, 0xc6, + 0xdd, 0xc5, 0x26, 0xc7, 0x80, 0xc4, 0x44, 0xc5, 0x81, 0xc5, 0x09, 0xc6, 0x7b, 0xc7, 0xe1, 0xc4, + 0xbb, 0xc5, 0xf3, 0xc5, 0x2d, 0xc6, 0xf9, 0xc6, 0xca, 0xc4, 0x08, 0xc8, 0x88, 0xc4, 0x03, 0xc5, + 0xbd, 0xc6, 0x37, 0xc5, 0xd2, 0xc4, 0xd4, 0xc5, 0x07, 0xc5, 0xdd, 0xc4, 0xfa, 0xc4, 0x93, 0xc5, + 0x4f, 0xc7, 0x90, 0xc5, 0x5f, 0xc6, 0x64, 0xc5, 0xd7, 0xc6, 0x35, 0xc8, 0xbd, 0xc6, 0x41, 0xc7, + 0xc9, 0xc5, 0x1a, 0xc7, 0xad, 0xc5, 0x6c, 0xc6, 0x40, 0xc6, 0x78, 0xc6, 0x55, 0xc5, 0x8d, 0xc6, + 0x50, 0xc6, 0x4f, 0xc6, 0x97, 0xc6, 0x48, 0xc5, 0xad, 0xc2, 0xed, 0xc5, 0x7f, 0xc5, 0x3e, 0xc5, + 0x93, 0xc4, 0xcf, 0xc5, 0x3c, 0xc6, 0x12, 0xc5, 0xf7, 0xc6, 0x9c, 0xc4, 0x69, 0xc6, 0x4e, 0xc7, + 0x01, 0xc7, 0x33, 0xc8, 0xf7, 0xc4, 0xf2, 0xc4, 0x08, 0xc6, 0xef, 0xc7, 0xf5, 0xc5, 0xcb, 0xc5, + 0xa9, 0xc6, 0xb6, 0xc5, 0x5e, 0xc4, 0x1f, 0xc7, 0xcf, 0xc4, 0xfc, 0xc5, 0xc8, 0xc5, 0xc3, 0xc4, + 0xc6, 0xc7, 0xd9, 0xc4, 0x34, 0xc6, 0x40, 0xc6, 0xde, 0xc5, 0xdf, 0xc6, 0xed, 0xc5, 0x5c, 0xc5, + 0x5f, 0xc4, 0xae, 0xc6, 0x7f, 0xc7, 0x4e, 0xc6, 0xde, 0xc5, 0xd7, 0xc5, 0xad, 0xc4, 0x65, 0xc5, + 0xf8, 0xc4, 0xb4, 0xc6, 0x61, 0xc5, 0xda, 0xc5, 0xbc, 0xc5, 0x52, 0xc6, 0x42, 0xc8, 0x99, 0xc7, + 0x9c, 0xc5, 0xad, 0xc5, 0x80, 0xc4, 0x55, 0xc6, 0x40, 0xc6, 0xae, 0xc5, 0x18, 0xc6, 0x99, 0xc6, + 0xe2, 0xc6, 0x5a, 0xc6, 0xf6, 0xc6, 0x34, 0xc8, 0x35, 0xc5, 0x4f, 0xc6, 0x88, 0xc6, 0x01, 0xc7, + 0xd9, 0xc4, 0xa8, 0xc7, 0x19, 0xc5}; +unsigned char global_maxpool_fp16_out[] = {0x2f, 0xc4, 0xad, 0xc2, 0x5e, 0xc4}; diff --git a/tests/unit_test/valid_data/pad.dat b/tests/unit_test/valid_data/pad.dat new file mode 100644 index 00000000..43ee1ee3 --- /dev/null +++ b/tests/unit_test/valid_data/pad.dat @@ -0,0 +1,281 @@ +/************************************************************************** + * NCHW layout + **************************************************************************/ +// [c h w]: [3 4 19] +// [c h w]: [3 6 21] +// pad_top = pad_left = pad_down = pad_right +unsigned char pad_fp32_in[] = { + 0x56, 0x49, 0x72, 0x40, 0x8d, 0x4e, 0x13, 0xbf, 0x04, 0x4c, 0xd9, 0x3e, 0x4e, 0xf7, 0x24, 0x40, + 0xa3, 0xe4, 0x2e, 0x40, 0xb7, 0xc8, 0x66, 0xbf, 0x18, 0x9b, 0x99, 0x40, 0xb7, 0x5d, 0x6b, 0x40, + 0x9f, 0x03, 0x5a, 0x40, 0x6b, 0x4f, 0x94, 0x40, 0x12, 0x52, 0x46, 0x40, 0xe7, 0x01, 0xd9, 0x3f, + 0xbf, 0xaa, 0xfb, 0x3f, 0xbf, 0x40, 0xd0, 0x3f, 0x2a, 0x47, 0x3e, 0x40, 0x1d, 0x02, 0x84, 0x40, + 0xc7, 0xa7, 0xb8, 0x3f, 0xa6, 0x11, 0x33, 0x40, 0xb5, 0xab, 0x78, 0x40, 0xfe, 0x6d, 0xdd, 0x3f, + 0xb2, 0xdb, 0x71, 0x40, 0x86, 0x33, 0x14, 0x40, 0x59, 0x05, 0x28, 0x3f, 0x73, 0xc8, 0x0b, 0xbf, + 0x26, 0x2a, 0x72, 0xbe, 0x30, 0x60, 0xae, 0xbf, 0x65, 0x3f, 0x5c, 0x40, 0xdd, 0x6e, 0x40, 0x40, + 0xa5, 0x13, 0xb2, 0xbe, 0xbe, 0x49, 0x64, 0x40, 0xb1, 0x22, 0x8c, 0x40, 0xd0, 0xba, 0xa1, 0xbf, + 0x96, 0xc5, 0x38, 0x40, 0x5a, 0x4c, 0x45, 0x40, 0x5c, 0x96, 0x91, 0x3f, 0x3b, 0x30, 0x03, 0x40, + 0x82, 0x7b, 0xb1, 0x3e, 0xc3, 0x36, 0x8d, 0x40, 0xdb, 0xb5, 0x56, 0x40, 0x58, 0x9b, 0xde, 0xbf, + 0x6e, 0x6f, 0x35, 0x40, 0xe9, 0x4c, 0xeb, 0x3f, 0xa8, 0x1b, 0x76, 0x40, 0x6f, 0xba, 0x21, 0x40, + 0xfd, 0xc6, 0x86, 0x40, 0x89, 0xfe, 0x2c, 0xbe, 0x25, 0x6f, 0xdc, 0xbf, 0x94, 0x58, 0x8c, 0x40, + 0x08, 0x5a, 0x92, 0xbf, 0x7b, 0xb1, 0xcb, 0x3e, 0x2c, 0x4b, 0x16, 0x3f, 0x11, 0x16, 0xe3, 0x3f, + 0x58, 0x82, 0xa6, 0xbf, 0x8b, 0xfa, 0x49, 0x40, 0xeb, 0x01, 0xe3, 0xbf, 0x50, 0xce, 0xc5, 0x3f, + 0x3a, 0x71, 0x34, 0x3f, 0xe9, 0xca, 0xd1, 0x3f, 0xc7, 0xd3, 0x8f, 0x40, 0xe9, 0x5a, 0x47, 0x40, + 0x45, 0xdf, 0x9a, 0x40, 0xf0, 0x3a, 0x26, 0xbd, 0x4b, 0x02, 0x70, 0x3f, 0x2f, 0xe6, 0x06, 0x40, + 0x02, 0x96, 0x72, 0x3f, 0x86, 0x6e, 0xa4, 0x3f, 0x1b, 0xd4, 0x3b, 0x3f, 0xc4, 0xfc, 0x81, 0x40, + 0x92, 0xa5, 0x4d, 0x40, 0xf3, 0x9f, 0x02, 0x40, 0xd4, 0xc7, 0xe8, 0x3e, 0x02, 0xd1, 0x9d, 0x3f, + 0x56, 0xb8, 0x6b, 0x40, 0x48, 0x35, 0x35, 0x40, 0x4f, 0xa5, 0xa3, 0xbf, 0x7e, 0x9a, 0x8c, 0x3d, + 0xbc, 0xf1, 0x78, 0x40, 0x59, 0x85, 0x30, 0xbf, 0xca, 0xa7, 0x38, 0x40, 0x82, 0x0d, 0x57, 0x40, + 0xee, 0xbb, 0x6c, 0x40, 0x4e, 0xa5, 0xba, 0x3e, 0x18, 0xce, 0xda, 0x3f, 0x25, 0x5b, 0x93, 0x40, + 0xdc, 0xeb, 0xca, 0xbc, 0xab, 0x11, 0xd3, 0x3e, 0x57, 0x2b, 0x3a, 0x3f, 0x60, 0x4a, 0xc0, 0x3f, + 0xfb, 0x98, 0x9b, 0x3f, 0xb8, 0xe7, 0x6d, 0x40, 0xb9, 0x9c, 0x13, 0x3f, 0x5e, 0xdb, 0x94, 0x40, + 0x10, 0x73, 0x89, 0x40, 0x21, 0xd6, 0xb8, 0x3f, 0xb7, 0x1c, 0xdd, 0xbf, 0x23, 0x2e, 0x7f, 0x40, + 0xa3, 0x44, 0xdd, 0x3f, 0x82, 0xeb, 0x3b, 0x40, 0xa8, 0x8f, 0x8f, 0x40, 0x98, 0x4d, 0x88, 0x40, + 0x2d, 0xcd, 0x64, 0xbf, 0x43, 0x3f, 0x24, 0x3f, 0x93, 0x2e, 0x83, 0x3f, 0x7f, 0x58, 0x74, 0xbf, + 0x9d, 0x45, 0xf1, 0x3f, 0xcf, 0x8a, 0xf1, 0xbe, 0x62, 0x5d, 0x77, 0x3f, 0x31, 0x58, 0x58, 0x40, + 0xf6, 0x79, 0x8e, 0x40, 0x91, 0xfd, 0x6c, 0x40, 0xc0, 0xf5, 0x3b, 0xbe, 0x0a, 0x3e, 0x4d, 0x3f, + 0xac, 0xbc, 0xba, 0x3f, 0x37, 0xbc, 0x5e, 0x40, 0xac, 0x09, 0xeb, 0xbf, 0x00, 0xcd, 0x68, 0x40, + 0xb3, 0xcd, 0x71, 0x3f, 0xfc, 0x41, 0x18, 0x40, 0xc5, 0xa5, 0x87, 0xbf, 0xda, 0x41, 0xe9, 0xbf, + 0xd1, 0xff, 0x71, 0xbf, 0x06, 0xbb, 0xd7, 0xbf, 0x6b, 0xde, 0x69, 0x40, 0x05, 0x5f, 0xbb, 0x3f, + 0x0b, 0x9d, 0x23, 0x40, 0xa4, 0xbc, 0x98, 0x40, 0x29, 0xa5, 0x70, 0x40, 0x50, 0x58, 0x68, 0x40, + 0x82, 0x78, 0x63, 0x40, 0xb8, 0x89, 0x94, 0x40, 0xa1, 0x92, 0x24, 0xbf, 0x9f, 0x3a, 0xfe, 0xbc, + 0x98, 0x72, 0x6b, 0xbf, 0x16, 0x5e, 0x1b, 0x3f, 0x4f, 0xd0, 0x8a, 0x40, 0xf1, 0xef, 0x91, 0x3d, + 0x28, 0x61, 0x35, 0x40, 0xc6, 0xeb, 0xee, 0x3f, 0xcd, 0x7a, 0x9d, 0x40, 0x0e, 0x00, 0xf0, 0xbf, + 0x6e, 0xa8, 0x91, 0xbf, 0x52, 0x0d, 0x9f, 0xbe, 0xf8, 0x80, 0xca, 0x3f, 0x49, 0x3c, 0x60, 0x40, + 0xbb, 0x0b, 0x6a, 0x3f, 0x33, 0x3a, 0x70, 0x40, 0x86, 0x76, 0xff, 0x3f, 0x9d, 0x03, 0xe8, 0x3f, + 0x4b, 0xc4, 0x9f, 0x3f, 0xc6, 0x95, 0x71, 0xbe, 0x80, 0xcd, 0x57, 0x40, 0xdd, 0xd1, 0xcd, 0x3f, + 0x9d, 0x9f, 0x91, 0x3b, 0x86, 0xcf, 0xee, 0xbf, 0xa7, 0x6d, 0x89, 0x40, 0x41, 0x71, 0x98, 0x40, + 0x2c, 0x5c, 0x52, 0x3f, 0xcd, 0x06, 0x5b, 0x40, 0x98, 0xfb, 0xaa, 0xbe, 0x9c, 0x02, 0xdd, 0x3e, + 0xfd, 0x1d, 0x88, 0xbe, 0x35, 0xfd, 0x90, 0x3f, 0x5d, 0x85, 0xad, 0xbf, 0x15, 0xab, 0xbe, 0x3f, + 0x0c, 0x63, 0x4a, 0x40, 0x52, 0x97, 0x5f, 0x3f, 0x41, 0x39, 0x19, 0x40, 0x2b, 0xe6, 0x93, 0x40, + 0x1f, 0x5b, 0x85, 0x3f, 0xbc, 0x46, 0xc2, 0xbf, 0x25, 0x98, 0x67, 0xbf, 0x2a, 0xfd, 0x5f, 0x40, + 0xf1, 0x27, 0x70, 0xbf, 0xaa, 0x7d, 0xa9, 0x3f, 0x81, 0x30, 0x58, 0x40, 0xac, 0xa1, 0xd5, 0x3f, + 0xb9, 0x78, 0x83, 0x40, 0xcb, 0xb6, 0x54, 0x40, 0x95, 0x7c, 0xfb, 0xbe, 0x45, 0xc1, 0x61, 0xbf, + 0xa6, 0x20, 0xdb, 0xbf, 0xe4, 0xd6, 0xae, 0x3f, 0xbc, 0x7b, 0x61, 0x40, 0xc1, 0xd1, 0xd3, 0x3f, + 0xb3, 0xab, 0xbe, 0xbf, 0x69, 0x8d, 0x75, 0x40, 0xfb, 0xcf, 0x47, 0x40, 0xed, 0x0f, 0x91, 0xbf, + 0x0f, 0xcc, 0xa8, 0x3f, 0xde, 0xe8, 0x7d, 0x40, 0x38, 0xe4, 0x95, 0x40, 0xf0, 0x32, 0x1a, 0x3f, + 0xdb, 0x8c, 0x68, 0x40, 0xef, 0x22, 0x93, 0x40, 0x68, 0x29, 0xa4, 0xbf, 0x3c, 0xcd, 0x32, 0x40, + 0x6f, 0xca, 0xd0, 0xbf, 0x27, 0xe8, 0x0c, 0x40, 0xa0, 0xdc, 0x0a, 0x3f, 0xfc, 0x8c, 0x45, 0x40, + 0x89, 0x98, 0xea, 0x3d, 0x2e, 0xba, 0x77, 0x40, 0x3f, 0x8c, 0x6e, 0x3e, 0x5b, 0x34, 0xcf, 0x3f, + 0x6e, 0x64, 0x93, 0x40, 0xe2, 0x66, 0x69, 0x40, 0x59, 0x73, 0x51, 0x3f, 0xb3, 0xcf, 0x2b, 0x40, + 0xad, 0xb9, 0x14, 0x40, 0x42, 0xf6, 0x33, 0xbf, 0x03, 0x6b, 0x2d, 0x40, 0x87, 0x3c, 0xd7, 0x3d, + 0x62, 0xe4, 0x6d, 0x40, 0x43, 0x41, 0x2f, 0x40, 0x75, 0x19, 0x8f, 0x40, 0x84, 0x6a, 0xc8, 0xbf, + 0xad, 0xf9, 0xde, 0x3f, 0xdb, 0x66, 0x84, 0x40, 0x3d, 0xf4, 0x86, 0xbe, 0x70, 0x94, 0xfb, 0x3e, + 0x78, 0x08, 0x6c, 0x40, 0xe9, 0x3a, 0x80, 0xbc, 0x50, 0x53, 0xa5, 0xbf, 0xf6, 0xe6, 0x25, 0xbf, + 0x70, 0xf4, 0x25, 0x40, 0x69, 0xcf, 0x64, 0x40, 0x25, 0x81, 0x48, 0x40, 0x09, 0x28, 0xd2, 0xbd}; +unsigned char pad_fp32_out[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x56, 0x49, 0x72, 0x40, 0x8d, 0x4e, 0x13, 0xbf, + 0x04, 0x4c, 0xd9, 0x3e, 0x4e, 0xf7, 0x24, 0x40, 0xa3, 0xe4, 0x2e, 0x40, 0xb7, 0xc8, 0x66, 0xbf, + 0x18, 0x9b, 0x99, 0x40, 0xb7, 0x5d, 0x6b, 0x40, 0x9f, 0x03, 0x5a, 0x40, 0x6b, 0x4f, 0x94, 0x40, + 0x12, 0x52, 0x46, 0x40, 0xe7, 0x01, 0xd9, 0x3f, 0xbf, 0xaa, 0xfb, 0x3f, 0xbf, 0x40, 0xd0, 0x3f, + 0x2a, 0x47, 0x3e, 0x40, 0x1d, 0x02, 0x84, 0x40, 0xc7, 0xa7, 0xb8, 0x3f, 0xa6, 0x11, 0x33, 0x40, + 0xb5, 0xab, 0x78, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x6d, 0xdd, 0x3f, + 0xb2, 0xdb, 0x71, 0x40, 0x86, 0x33, 0x14, 0x40, 0x59, 0x05, 0x28, 0x3f, 0x73, 0xc8, 0x0b, 0xbf, + 0x26, 0x2a, 0x72, 0xbe, 0x30, 0x60, 0xae, 0xbf, 0x65, 0x3f, 0x5c, 0x40, 0xdd, 0x6e, 0x40, 0x40, + 0xa5, 0x13, 0xb2, 0xbe, 0xbe, 0x49, 0x64, 0x40, 0xb1, 0x22, 0x8c, 0x40, 0xd0, 0xba, 0xa1, 0xbf, + 0x96, 0xc5, 0x38, 0x40, 0x5a, 0x4c, 0x45, 0x40, 0x5c, 0x96, 0x91, 0x3f, 0x3b, 0x30, 0x03, 0x40, + 0x82, 0x7b, 0xb1, 0x3e, 0xc3, 0x36, 0x8d, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xdb, 0xb5, 0x56, 0x40, 0x58, 0x9b, 0xde, 0xbf, 0x6e, 0x6f, 0x35, 0x40, 0xe9, 0x4c, 0xeb, 0x3f, + 0xa8, 0x1b, 0x76, 0x40, 0x6f, 0xba, 0x21, 0x40, 0xfd, 0xc6, 0x86, 0x40, 0x89, 0xfe, 0x2c, 0xbe, + 0x25, 0x6f, 0xdc, 0xbf, 0x94, 0x58, 0x8c, 0x40, 0x08, 0x5a, 0x92, 0xbf, 0x7b, 0xb1, 0xcb, 0x3e, + 0x2c, 0x4b, 0x16, 0x3f, 0x11, 0x16, 0xe3, 0x3f, 0x58, 0x82, 0xa6, 0xbf, 0x8b, 0xfa, 0x49, 0x40, + 0xeb, 0x01, 0xe3, 0xbf, 0x50, 0xce, 0xc5, 0x3f, 0x3a, 0x71, 0x34, 0x3f, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xe9, 0xca, 0xd1, 0x3f, 0xc7, 0xd3, 0x8f, 0x40, 0xe9, 0x5a, 0x47, 0x40, + 0x45, 0xdf, 0x9a, 0x40, 0xf0, 0x3a, 0x26, 0xbd, 0x4b, 0x02, 0x70, 0x3f, 0x2f, 0xe6, 0x06, 0x40, + 0x02, 0x96, 0x72, 0x3f, 0x86, 0x6e, 0xa4, 0x3f, 0x1b, 0xd4, 0x3b, 0x3f, 0xc4, 0xfc, 0x81, 0x40, + 0x92, 0xa5, 0x4d, 0x40, 0xf3, 0x9f, 0x02, 0x40, 0xd4, 0xc7, 0xe8, 0x3e, 0x02, 0xd1, 0x9d, 0x3f, + 0x56, 0xb8, 0x6b, 0x40, 0x48, 0x35, 0x35, 0x40, 0x4f, 0xa5, 0xa3, 0xbf, 0x7e, 0x9a, 0x8c, 0x3d, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xbc, 0xf1, 0x78, 0x40, 0x59, 0x85, 0x30, 0xbf, 0xca, 0xa7, 0x38, 0x40, 0x82, 0x0d, 0x57, 0x40, + 0xee, 0xbb, 0x6c, 0x40, 0x4e, 0xa5, 0xba, 0x3e, 0x18, 0xce, 0xda, 0x3f, 0x25, 0x5b, 0x93, 0x40, + 0xdc, 0xeb, 0xca, 0xbc, 0xab, 0x11, 0xd3, 0x3e, 0x57, 0x2b, 0x3a, 0x3f, 0x60, 0x4a, 0xc0, 0x3f, + 0xfb, 0x98, 0x9b, 0x3f, 0xb8, 0xe7, 0x6d, 0x40, 0xb9, 0x9c, 0x13, 0x3f, 0x5e, 0xdb, 0x94, 0x40, + 0x10, 0x73, 0x89, 0x40, 0x21, 0xd6, 0xb8, 0x3f, 0xb7, 0x1c, 0xdd, 0xbf, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x23, 0x2e, 0x7f, 0x40, 0xa3, 0x44, 0xdd, 0x3f, 0x82, 0xeb, 0x3b, 0x40, + 0xa8, 0x8f, 0x8f, 0x40, 0x98, 0x4d, 0x88, 0x40, 0x2d, 0xcd, 0x64, 0xbf, 0x43, 0x3f, 0x24, 0x3f, + 0x93, 0x2e, 0x83, 0x3f, 0x7f, 0x58, 0x74, 0xbf, 0x9d, 0x45, 0xf1, 0x3f, 0xcf, 0x8a, 0xf1, 0xbe, + 0x62, 0x5d, 0x77, 0x3f, 0x31, 0x58, 0x58, 0x40, 0xf6, 0x79, 0x8e, 0x40, 0x91, 0xfd, 0x6c, 0x40, + 0xc0, 0xf5, 0x3b, 0xbe, 0x0a, 0x3e, 0x4d, 0x3f, 0xac, 0xbc, 0xba, 0x3f, 0x37, 0xbc, 0x5e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xac, 0x09, 0xeb, 0xbf, 0x00, 0xcd, 0x68, 0x40, + 0xb3, 0xcd, 0x71, 0x3f, 0xfc, 0x41, 0x18, 0x40, 0xc5, 0xa5, 0x87, 0xbf, 0xda, 0x41, 0xe9, 0xbf, + 0xd1, 0xff, 0x71, 0xbf, 0x06, 0xbb, 0xd7, 0xbf, 0x6b, 0xde, 0x69, 0x40, 0x05, 0x5f, 0xbb, 0x3f, + 0x0b, 0x9d, 0x23, 0x40, 0xa4, 0xbc, 0x98, 0x40, 0x29, 0xa5, 0x70, 0x40, 0x50, 0x58, 0x68, 0x40, + 0x82, 0x78, 0x63, 0x40, 0xb8, 0x89, 0x94, 0x40, 0xa1, 0x92, 0x24, 0xbf, 0x9f, 0x3a, 0xfe, 0xbc, + 0x98, 0x72, 0x6b, 0xbf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0x5e, 0x1b, 0x3f, + 0x4f, 0xd0, 0x8a, 0x40, 0xf1, 0xef, 0x91, 0x3d, 0x28, 0x61, 0x35, 0x40, 0xc6, 0xeb, 0xee, 0x3f, + 0xcd, 0x7a, 0x9d, 0x40, 0x0e, 0x00, 0xf0, 0xbf, 0x6e, 0xa8, 0x91, 0xbf, 0x52, 0x0d, 0x9f, 0xbe, + 0xf8, 0x80, 0xca, 0x3f, 0x49, 0x3c, 0x60, 0x40, 0xbb, 0x0b, 0x6a, 0x3f, 0x33, 0x3a, 0x70, 0x40, + 0x86, 0x76, 0xff, 0x3f, 0x9d, 0x03, 0xe8, 0x3f, 0x4b, 0xc4, 0x9f, 0x3f, 0xc6, 0x95, 0x71, 0xbe, + 0x80, 0xcd, 0x57, 0x40, 0xdd, 0xd1, 0xcd, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9d, 0x9f, 0x91, 0x3b, 0x86, 0xcf, 0xee, 0xbf, + 0xa7, 0x6d, 0x89, 0x40, 0x41, 0x71, 0x98, 0x40, 0x2c, 0x5c, 0x52, 0x3f, 0xcd, 0x06, 0x5b, 0x40, + 0x98, 0xfb, 0xaa, 0xbe, 0x9c, 0x02, 0xdd, 0x3e, 0xfd, 0x1d, 0x88, 0xbe, 0x35, 0xfd, 0x90, 0x3f, + 0x5d, 0x85, 0xad, 0xbf, 0x15, 0xab, 0xbe, 0x3f, 0x0c, 0x63, 0x4a, 0x40, 0x52, 0x97, 0x5f, 0x3f, + 0x41, 0x39, 0x19, 0x40, 0x2b, 0xe6, 0x93, 0x40, 0x1f, 0x5b, 0x85, 0x3f, 0xbc, 0x46, 0xc2, 0xbf, + 0x25, 0x98, 0x67, 0xbf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0xfd, 0x5f, 0x40, + 0xf1, 0x27, 0x70, 0xbf, 0xaa, 0x7d, 0xa9, 0x3f, 0x81, 0x30, 0x58, 0x40, 0xac, 0xa1, 0xd5, 0x3f, + 0xb9, 0x78, 0x83, 0x40, 0xcb, 0xb6, 0x54, 0x40, 0x95, 0x7c, 0xfb, 0xbe, 0x45, 0xc1, 0x61, 0xbf, + 0xa6, 0x20, 0xdb, 0xbf, 0xe4, 0xd6, 0xae, 0x3f, 0xbc, 0x7b, 0x61, 0x40, 0xc1, 0xd1, 0xd3, 0x3f, + 0xb3, 0xab, 0xbe, 0xbf, 0x69, 0x8d, 0x75, 0x40, 0xfb, 0xcf, 0x47, 0x40, 0xed, 0x0f, 0x91, 0xbf, + 0x0f, 0xcc, 0xa8, 0x3f, 0xde, 0xe8, 0x7d, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x38, 0xe4, 0x95, 0x40, 0xf0, 0x32, 0x1a, 0x3f, 0xdb, 0x8c, 0x68, 0x40, 0xef, 0x22, 0x93, 0x40, + 0x68, 0x29, 0xa4, 0xbf, 0x3c, 0xcd, 0x32, 0x40, 0x6f, 0xca, 0xd0, 0xbf, 0x27, 0xe8, 0x0c, 0x40, + 0xa0, 0xdc, 0x0a, 0x3f, 0xfc, 0x8c, 0x45, 0x40, 0x89, 0x98, 0xea, 0x3d, 0x2e, 0xba, 0x77, 0x40, + 0x3f, 0x8c, 0x6e, 0x3e, 0x5b, 0x34, 0xcf, 0x3f, 0x6e, 0x64, 0x93, 0x40, 0xe2, 0x66, 0x69, 0x40, + 0x59, 0x73, 0x51, 0x3f, 0xb3, 0xcf, 0x2b, 0x40, 0xad, 0xb9, 0x14, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x42, 0xf6, 0x33, 0xbf, 0x03, 0x6b, 0x2d, 0x40, 0x87, 0x3c, 0xd7, 0x3d, + 0x62, 0xe4, 0x6d, 0x40, 0x43, 0x41, 0x2f, 0x40, 0x75, 0x19, 0x8f, 0x40, 0x84, 0x6a, 0xc8, 0xbf, + 0xad, 0xf9, 0xde, 0x3f, 0xdb, 0x66, 0x84, 0x40, 0x3d, 0xf4, 0x86, 0xbe, 0x70, 0x94, 0xfb, 0x3e, + 0x78, 0x08, 0x6c, 0x40, 0xe9, 0x3a, 0x80, 0xbc, 0x50, 0x53, 0xa5, 0xbf, 0xf6, 0xe6, 0x25, 0xbf, + 0x70, 0xf4, 0x25, 0x40, 0x69, 0xcf, 0x64, 0x40, 0x25, 0x81, 0x48, 0x40, 0x09, 0x28, 0xd2, 0xbd, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +unsigned char pad_fp16_in[] = { + 0x3e, 0x41, 0x6c, 0x3d, 0x15, 0x3d, 0x00, 0x45, 0x52, 0x43, 0xfc, 0x42, 0x8b, 0x41, 0x00, 0x40, + 0xc1, 0x41, 0xcc, 0x3d, 0xe9, 0x3d, 0x1e, 0x3e, 0xa6, 0x40, 0x01, 0x3e, 0x04, 0x44, 0x75, 0x3f, + 0x0e, 0x41, 0xb8, 0x3d, 0x0f, 0x44, 0x31, 0x44, 0xbd, 0x3e, 0xd8, 0x3d, 0xc6, 0x42, 0x1a, 0x40, + 0x8f, 0x42, 0xe1, 0x42, 0xc4, 0x44, 0x55, 0x42, 0x12, 0x40, 0x54, 0x42, 0xa7, 0x3d, 0x68, 0x44, + 0xa6, 0x44, 0x3f, 0x3d, 0xdf, 0x44, 0x0b, 0x43, 0x9d, 0x44, 0x69, 0x43, 0xe1, 0x44, 0x13, 0x42, + 0x6e, 0x44, 0x64, 0x43, 0xa7, 0x41, 0x93, 0x43, 0x7a, 0x42, 0xf6, 0x3e, 0xac, 0x43, 0x3b, 0x44, + 0x02, 0x44, 0x84, 0x3e, 0x1d, 0x44, 0xc8, 0x42, 0xb5, 0x44, 0xa5, 0x43, 0x4d, 0x40, 0x43, 0x40, + 0x16, 0x41, 0x15, 0x44, 0xbb, 0x44, 0xd5, 0x3c, 0xe0, 0x41, 0xb9, 0x3e, 0xc0, 0x44, 0xa5, 0x41, + 0xc0, 0x3c, 0xf5, 0x44, 0x7c, 0x42, 0x31, 0x3c, 0x84, 0x40, 0xaf, 0x44, 0xea, 0x44, 0xbe, 0x40, + 0x1e, 0x41, 0x89, 0x42, 0xb8, 0x3f, 0x2d, 0x44, 0xa4, 0x40, 0x77, 0x44, 0xb0, 0x41, 0x8c, 0x41, + 0x00, 0x44, 0x78, 0x44, 0x1d, 0x40, 0x47, 0x44, 0xbd, 0x44, 0xbd, 0x41, 0x9f, 0x44, 0xd5, 0x44, + 0x72, 0x41, 0xde, 0x41, 0xf3, 0x42, 0xae, 0x3d, 0x9d, 0x3e, 0xd8, 0x44, 0x79, 0x42, 0x27, 0x43, + 0x42, 0x44, 0xa2, 0x3e, 0xcc, 0x43, 0x76, 0x3d, 0xbe, 0x40, 0xdb, 0x40, 0xd2, 0x3f, 0xd8, 0x41, + 0x3f, 0x44, 0x3a, 0x44, 0xe2, 0x3d, 0xb9, 0x40, 0xbe, 0x44, 0x70, 0x43, 0x02, 0x40, 0xdc, 0x42, + 0xf1, 0x42, 0x61, 0x40, 0xab, 0x41, 0x7a, 0x40, 0x79, 0x3d, 0xae, 0x41, 0xe2, 0x3f, 0x5a, 0x42, + 0x56, 0x44, 0xf9, 0x43, 0x66, 0x44, 0xce, 0x44, 0x4b, 0x43, 0xae, 0x3e, 0xcb, 0x44, 0x2a, 0x3c, + 0x83, 0x3f, 0x66, 0x42, 0x64, 0x44, 0x3a, 0x44, 0x91, 0x44, 0x16, 0x44, 0xcd, 0x41, 0x45, 0x3d, + 0xf0, 0x40, 0x91, 0x41, 0x46, 0x43, 0xbf, 0x44, 0x43, 0x43, 0xd5, 0x40, 0x98, 0x44, 0x36, 0x44, + 0xca, 0x41, 0x07, 0x41, 0x68, 0x3d, 0xa9, 0x41, 0x20, 0x42, 0xb7, 0x3c, 0xc5, 0x44, 0x16, 0x44, + 0x49, 0x44, 0x39, 0x43, 0x2a, 0x44, 0xcb, 0x3f, 0x3c, 0x3e, 0xd6, 0x40, 0x56, 0x3c, 0x1c, 0x44, + 0x26, 0x44, 0x6b, 0x3e, 0xee, 0x44, 0x9b, 0x42, 0x2b, 0x3e, 0x81, 0x42, 0x50, 0x3f, 0xac, 0x42, + 0xaf, 0x3c, 0x17, 0x44, 0xe6, 0x42, 0x7e, 0x3d, 0x47, 0x43, 0x8f, 0x41, 0x1b, 0x42, 0xf6, 0x43, + 0xd3, 0x3c, 0x16, 0x40, 0x3f, 0x40, 0xd1, 0x44, 0xb4, 0x3c, 0x68, 0x43, 0xd6, 0x44, 0x72, 0x3e, + 0x74, 0x43, 0x0b, 0x44, 0x2d, 0x3e, 0x1b, 0x3d, 0x22, 0x44, 0xab, 0x43, 0x0e, 0x42, 0x25, 0x44, + 0x7e, 0x3e, 0x61, 0x44, 0xca, 0x3d, 0x4a, 0x3c, 0xa3, 0x3f, 0x45, 0x3e, 0xa4, 0x41, 0x9e, 0x42, + 0x22, 0x3d, 0xbf, 0x40, 0x9c, 0x41, 0xa2, 0x40, 0x3b, 0x42, 0xd2, 0x41, 0x48, 0x44, 0xb8, 0x43, + 0x3d, 0x44, 0xcd, 0x43, 0x65, 0x3e, 0x85, 0x3f, 0x2f, 0x43, 0x8b, 0x41, 0x6b, 0x41, 0x6d, 0x40, + 0xd5, 0x40, 0xfa, 0x42, 0xf0, 0x3c, 0x86, 0x44, 0x8e, 0x41, 0x55, 0x40, 0x54, 0x42, 0xe3, 0x41, + 0x04, 0x43, 0xf6, 0x41, 0xd5, 0x3f, 0x6a, 0x3e}; +unsigned char pad_fp16_out[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x41, 0x6c, 0x3d, + 0x15, 0x3d, 0x00, 0x45, 0x52, 0x43, 0xfc, 0x42, 0x8b, 0x41, 0x00, 0x40, 0xc1, 0x41, 0xcc, 0x3d, + 0xe9, 0x3d, 0x1e, 0x3e, 0xa6, 0x40, 0x01, 0x3e, 0x04, 0x44, 0x75, 0x3f, 0x0e, 0x41, 0xb8, 0x3d, + 0x0f, 0x44, 0x00, 0x00, 0x00, 0x00, 0x31, 0x44, 0xbd, 0x3e, 0xd8, 0x3d, 0xc6, 0x42, 0x1a, 0x40, + 0x8f, 0x42, 0xe1, 0x42, 0xc4, 0x44, 0x55, 0x42, 0x12, 0x40, 0x54, 0x42, 0xa7, 0x3d, 0x68, 0x44, + 0xa6, 0x44, 0x3f, 0x3d, 0xdf, 0x44, 0x0b, 0x43, 0x9d, 0x44, 0x69, 0x43, 0x00, 0x00, 0x00, 0x00, + 0xe1, 0x44, 0x13, 0x42, 0x6e, 0x44, 0x64, 0x43, 0xa7, 0x41, 0x93, 0x43, 0x7a, 0x42, 0xf6, 0x3e, + 0xac, 0x43, 0x3b, 0x44, 0x02, 0x44, 0x84, 0x3e, 0x1d, 0x44, 0xc8, 0x42, 0xb5, 0x44, 0xa5, 0x43, + 0x4d, 0x40, 0x43, 0x40, 0x16, 0x41, 0x00, 0x00, 0x00, 0x00, 0x15, 0x44, 0xbb, 0x44, 0xd5, 0x3c, + 0xe0, 0x41, 0xb9, 0x3e, 0xc0, 0x44, 0xa5, 0x41, 0xc0, 0x3c, 0xf5, 0x44, 0x7c, 0x42, 0x31, 0x3c, + 0x84, 0x40, 0xaf, 0x44, 0xea, 0x44, 0xbe, 0x40, 0x1e, 0x41, 0x89, 0x42, 0xb8, 0x3f, 0x2d, 0x44, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa4, 0x40, 0x77, 0x44, 0xb0, 0x41, 0x8c, 0x41, + 0x00, 0x44, 0x78, 0x44, 0x1d, 0x40, 0x47, 0x44, 0xbd, 0x44, 0xbd, 0x41, 0x9f, 0x44, 0xd5, 0x44, + 0x72, 0x41, 0xde, 0x41, 0xf3, 0x42, 0xae, 0x3d, 0x9d, 0x3e, 0xd8, 0x44, 0x79, 0x42, 0x00, 0x00, + 0x00, 0x00, 0x27, 0x43, 0x42, 0x44, 0xa2, 0x3e, 0xcc, 0x43, 0x76, 0x3d, 0xbe, 0x40, 0xdb, 0x40, + 0xd2, 0x3f, 0xd8, 0x41, 0x3f, 0x44, 0x3a, 0x44, 0xe2, 0x3d, 0xb9, 0x40, 0xbe, 0x44, 0x70, 0x43, + 0x02, 0x40, 0xdc, 0x42, 0xf1, 0x42, 0x61, 0x40, 0x00, 0x00, 0x00, 0x00, 0xab, 0x41, 0x7a, 0x40, + 0x79, 0x3d, 0xae, 0x41, 0xe2, 0x3f, 0x5a, 0x42, 0x56, 0x44, 0xf9, 0x43, 0x66, 0x44, 0xce, 0x44, + 0x4b, 0x43, 0xae, 0x3e, 0xcb, 0x44, 0x2a, 0x3c, 0x83, 0x3f, 0x66, 0x42, 0x64, 0x44, 0x3a, 0x44, + 0x91, 0x44, 0x00, 0x00, 0x00, 0x00, 0x16, 0x44, 0xcd, 0x41, 0x45, 0x3d, 0xf0, 0x40, 0x91, 0x41, + 0x46, 0x43, 0xbf, 0x44, 0x43, 0x43, 0xd5, 0x40, 0x98, 0x44, 0x36, 0x44, 0xca, 0x41, 0x07, 0x41, + 0x68, 0x3d, 0xa9, 0x41, 0x20, 0x42, 0xb7, 0x3c, 0xc5, 0x44, 0x16, 0x44, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x49, 0x44, 0x39, 0x43, 0x2a, 0x44, 0xcb, 0x3f, 0x3c, 0x3e, 0xd6, 0x40, + 0x56, 0x3c, 0x1c, 0x44, 0x26, 0x44, 0x6b, 0x3e, 0xee, 0x44, 0x9b, 0x42, 0x2b, 0x3e, 0x81, 0x42, + 0x50, 0x3f, 0xac, 0x42, 0xaf, 0x3c, 0x17, 0x44, 0xe6, 0x42, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x3d, + 0x47, 0x43, 0x8f, 0x41, 0x1b, 0x42, 0xf6, 0x43, 0xd3, 0x3c, 0x16, 0x40, 0x3f, 0x40, 0xd1, 0x44, + 0xb4, 0x3c, 0x68, 0x43, 0xd6, 0x44, 0x72, 0x3e, 0x74, 0x43, 0x0b, 0x44, 0x2d, 0x3e, 0x1b, 0x3d, + 0x22, 0x44, 0xab, 0x43, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x42, 0x25, 0x44, 0x7e, 0x3e, 0x61, 0x44, + 0xca, 0x3d, 0x4a, 0x3c, 0xa3, 0x3f, 0x45, 0x3e, 0xa4, 0x41, 0x9e, 0x42, 0x22, 0x3d, 0xbf, 0x40, + 0x9c, 0x41, 0xa2, 0x40, 0x3b, 0x42, 0xd2, 0x41, 0x48, 0x44, 0xb8, 0x43, 0x3d, 0x44, 0x00, 0x00, + 0x00, 0x00, 0xcd, 0x43, 0x65, 0x3e, 0x85, 0x3f, 0x2f, 0x43, 0x8b, 0x41, 0x6b, 0x41, 0x6d, 0x40, + 0xd5, 0x40, 0xfa, 0x42, 0xf0, 0x3c, 0x86, 0x44, 0x8e, 0x41, 0x55, 0x40, 0x54, 0x42, 0xe3, 0x41, + 0x04, 0x43, 0xf6, 0x41, 0xd5, 0x3f, 0x6a, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00}; +// pad_value = 0 +unsigned char pad_int8_in[] = { + 0xac, 0x21, 0x22, 0x40, 0x8a, 0xe0, 0x2d, 0x3f, 0xbb, 0x77, 0x1d, 0x40, 0xa0, 0xcc, 0x8f, 0xbf, + 0x4d, 0x1d, 0x9d, 0x40, 0xb1, 0xb1, 0x14, 0x40, 0xfb, 0xf3, 0x96, 0xbf, 0x76, 0x8b, 0x3f, 0xbf, + 0x98, 0x1f, 0x8d, 0x40, 0x93, 0x3a, 0xc8, 0x3e, 0x8f, 0x8c, 0x94, 0x40, 0x7e, 0xeb, 0x8e, 0x40, + 0x67, 0xb2, 0x8c, 0x3f, 0x6b, 0xda, 0x7e, 0x40, 0x24, 0x07, 0xa6, 0xbf, 0xcb, 0x27, 0x9c, 0x40, + 0x1c, 0x3f, 0x9a, 0xbf, 0x75, 0x01, 0xd4, 0x3f, 0xce, 0xac, 0xdd, 0xbf, 0x65, 0x66, 0x83, 0x3f, + 0x4c, 0xa9, 0x80, 0x40, 0x75, 0xed, 0xa9, 0x3e, 0x79, 0x94, 0x86, 0xbf, 0x96, 0xa9, 0x42, 0x40, + 0x4f, 0x4e, 0x4b, 0x40, 0x9a, 0x1e, 0x82, 0xbf, 0x51, 0x81, 0xbc, 0x3f, 0x84, 0x86, 0x98, 0x40, + 0xbe, 0x5e, 0xbf, 0xbf, 0x78, 0x59, 0x3a, 0x40, 0xf4, 0x53, 0x26, 0xbf, 0xd7, 0xc0, 0x88, 0x40, + 0x7d, 0x5d, 0xb4, 0xbf, 0x09, 0xef, 0x73, 0x40, 0xd7, 0x9c, 0x9f, 0x40, 0x0a, 0x7a, 0x88, 0x3f, + 0x3e, 0xb2, 0x29, 0x3f, 0xde, 0x96, 0xdc, 0xbf, 0x22, 0xfb, 0x3c, 0x40, 0x11, 0x1c, 0x49, 0x40, + 0xab, 0x3e, 0x8a, 0x40, 0x56, 0xbe, 0x8a, 0x3f, 0x10, 0xcb, 0x81, 0xbf, 0xdf, 0xe7, 0x92, 0x3f, + 0x68, 0x84, 0xd5, 0xbf, 0x6a, 0x95, 0xd3, 0x3f, 0x6d, 0xcd, 0x7f, 0x40, 0x66, 0x38, 0x3d, 0x40, + 0x37, 0x7e, 0xc2, 0x3e, 0xb0, 0x00, 0xc3, 0xbe, 0x02, 0x4b, 0x16, 0x40, 0xfb, 0xe7, 0x8d, 0x3e, + 0x1e, 0x41, 0xd5, 0x3f, 0xe5, 0x1f, 0x55, 0x40, 0x42, 0x51, 0x78, 0x40, 0x1c, 0xed, 0x7b, 0x40, + 0x2c, 0x13, 0x27, 0x40}; +unsigned char pad_int8_out[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xac, 0x21, 0x22, 0x40, 0x8a, 0xe0, 0x2d, 0x3f, 0xbb, 0x77, + 0x1d, 0x40, 0xa0, 0xcc, 0x8f, 0xbf, 0x4d, 0x1d, 0x9d, 0x00, 0x00, 0x40, 0xb1, 0xb1, 0x14, 0x40, + 0xfb, 0xf3, 0x96, 0xbf, 0x76, 0x8b, 0x3f, 0xbf, 0x98, 0x1f, 0x8d, 0x40, 0x93, 0x3a, 0x00, 0x00, + 0xc8, 0x3e, 0x8f, 0x8c, 0x94, 0x40, 0x7e, 0xeb, 0x8e, 0x40, 0x67, 0xb2, 0x8c, 0x3f, 0x6b, 0xda, + 0x7e, 0x40, 0x24, 0x00, 0x00, 0x07, 0xa6, 0xbf, 0xcb, 0x27, 0x9c, 0x40, 0x1c, 0x3f, 0x9a, 0xbf, + 0x75, 0x01, 0xd4, 0x3f, 0xce, 0xac, 0xdd, 0xbf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x65, 0x66, 0x83, 0x3f, 0x4c, 0xa9, 0x80, 0x40, 0x75, 0xed, 0xa9, 0x3e, + 0x79, 0x94, 0x86, 0xbf, 0x96, 0xa9, 0x42, 0x00, 0x00, 0x40, 0x4f, 0x4e, 0x4b, 0x40, 0x9a, 0x1e, + 0x82, 0xbf, 0x51, 0x81, 0xbc, 0x3f, 0x84, 0x86, 0x98, 0x40, 0xbe, 0x5e, 0x00, 0x00, 0xbf, 0xbf, + 0x78, 0x59, 0x3a, 0x40, 0xf4, 0x53, 0x26, 0xbf, 0xd7, 0xc0, 0x88, 0x40, 0x7d, 0x5d, 0xb4, 0xbf, + 0x09, 0x00, 0x00, 0xef, 0x73, 0x40, 0xd7, 0x9c, 0x9f, 0x40, 0x0a, 0x7a, 0x88, 0x3f, 0x3e, 0xb2, + 0x29, 0x3f, 0xde, 0x96, 0xdc, 0xbf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x22, 0xfb, 0x3c, 0x40, 0x11, 0x1c, 0x49, 0x40, 0xab, 0x3e, 0x8a, 0x40, 0x56, 0xbe, + 0x8a, 0x3f, 0x10, 0xcb, 0x81, 0x00, 0x00, 0xbf, 0xdf, 0xe7, 0x92, 0x3f, 0x68, 0x84, 0xd5, 0xbf, + 0x6a, 0x95, 0xd3, 0x3f, 0x6d, 0xcd, 0x7f, 0x40, 0x66, 0x38, 0x00, 0x00, 0x3d, 0x40, 0x37, 0x7e, + 0xc2, 0x3e, 0xb0, 0x00, 0xc3, 0xbe, 0x02, 0x4b, 0x16, 0x40, 0xfb, 0xe7, 0x8d, 0x3e, 0x1e, 0x00, + 0x00, 0x41, 0xd5, 0x3f, 0xe5, 0x1f, 0x55, 0x40, 0x42, 0x51, 0x78, 0x40, 0x1c, 0xed, 0x7b, 0x40, + 0x2c, 0x13, 0x27, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; \ No newline at end of file diff --git a/tests/utils/math_snr.c b/tests/utils/math_snr.c index 7cb5580b..a2ec95f9 100644 --- a/tests/utils/math_snr.c +++ b/tests/utils/math_snr.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ /* ---------------------------------------------------------------------- * Include project header files diff --git a/tests/utils/math_snr.h b/tests/utils/math_snr.h index 7fb36b33..474c4ca1 100644 --- a/tests/utils/math_snr.h +++ b/tests/utils/math_snr.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include #include diff --git a/tests/utils/test_utils.c b/tests/utils/test_utils.c index 444b29c5..d191537e 100644 --- a/tests/utils/test_utils.c +++ b/tests/utils/test_utils.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,14 +16,15 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" -#include "stdint.h" -#include "stdio.h" -#include "math.h" #include "float.h" +#include "math.h" #include "math_snr.h" -#include "test_utils.h" +#include "stdint.h" +#include "stdio.h" int test_number = 0; int failures = 0; @@ -31,27 +32,26 @@ int failures = 0; int done_testing(void) { if (0 < failures) { - printf("Failed %d tests\n", failures); - exit(EXIT_FAILURE); - } else { - printf("All functions tested sucessfully\n"); - exit(EXIT_SUCCESS); - } - return failures; + printf("Failed %d tests\n", failures); + exit(EXIT_FAILURE); + } else { + printf("All functions tested sucessfully\n"); + exit(EXIT_SUCCESS); + } + return failures; } -void init_testsuite(const char* testname) +void init_testsuite(const char *testname) { - printf("%s", testname); - test_number = 0; - failures = 0; + printf("%s", testname); + test_number = 0; + failures = 0; } - int *read_input_data_f32(char *path) { FILE *fp = fopen(path, "rb"); - if(fp == NULL) { + if (fp == NULL) { printf("Invalid input file: %s\n", path); return NULL; } @@ -59,8 +59,8 @@ int *read_input_data_f32(char *path) int size; fread(&size, 4, 1, fp); - int *buffer = malloc(size* sizeof(int)); - if(buffer == NULL) { + int *buffer = malloc(size * sizeof(int)); + if (buffer == NULL) { printf("Malloc fail.\n"); return NULL; } @@ -74,7 +74,7 @@ int *read_input_data_f32(char *path) char *read_input_data_fp16(char *path, int int_size) { FILE *fp = fopen(path, "rb"); - if(fp == NULL) { + if (fp == NULL) { printf("Invalid input file: %s\n", path); return NULL; } @@ -86,7 +86,7 @@ char *read_input_data_fp16(char *path, int int_size) fread(buffer, 4, int_size, fp); - fread(buffer+int_size*4, 2, size-int_size, fp); + fread(buffer + int_size * 4, 2, size - int_size, fp); fclose(fp); return buffer; @@ -123,9 +123,9 @@ float compute_kl(float *p, float *q, uint32_t size) // calculate cosine similarity float compute_cs(float *a, float *b, uint32_t size) { - float dot_sum = 0.0; - float a_norm = 0.0; - float b_norm = 0.0; + double dot_sum = 0.0; + double a_norm = 0.0; + double b_norm = 0.0; float res = 0.0; for (int i = 0; i < size; i++) { @@ -150,38 +150,37 @@ void result_verify_int32(int *reference, int *output, int *input, float gap, int failures++; } #ifdef BASIC_DEBUG - if (error > gap) - { - printf("i = %d :%d, %d, %d\n", i, reference[i], output[i], input[i]); + if (error > gap) { + printf("i = %d :%d, %d, %d\n", i, reference[i], output[i], input[i]); } #endif } } -void result_verify_f32(float *reference, float *output, float *input, float gap, int size, bool save) +void result_verify_f32(float *reference, float *output, float *input, float gap, int size, + bool save) { int i; float error, snr; float max_error = 0; for (i = 0; i < size; i++) { - if(isinf(reference[i]) && isinf(output[i]) || isnan(reference[i]) && isnan(output[i])){ + if (isinf(reference[i]) && isinf(output[i]) || isnan(reference[i]) && isnan(output[i])) { error = 0; } else { error = fabs(reference[i] - output[i]); - if(error > gap) { - error = fabs(reference[i] - output[i])/fabs(reference[i] + 1e-9); + if (error > gap) { + error = fabs(reference[i] - output[i]) / fabs(reference[i] + 1e-9); } } - if(error > max_error) { + if (error > max_error) { max_error = error; } test_number++; #ifdef BASIC_DEBUG - if (error > gap) - { - printf("i = %d :%.6f, %.6f, %.6f\n", i, reference[i], output[i], input[i]); + if (error > gap) { + printf("i = %d :%.6f, %.6f, %.6f\n", i, reference[i], output[i], input[i]); } #endif } @@ -214,7 +213,8 @@ float compute_cs_fp16(__fp16 *a, __fp16 *b, uint32_t size) return res; } -void result_verify_fp16(__fp16 *reference, __fp16 *output, __fp16 *input, float gap, int size, bool save) +void result_verify_fp16(__fp16 *reference, __fp16 *output, __fp16 *input, float gap, int size, + bool save) { int i; __fp16 error = 0; @@ -222,18 +222,17 @@ void result_verify_fp16(__fp16 *reference, __fp16 *output, __fp16 *input, float for (i = 0; i < size; i++) { error = fabs(reference[i] - output[i]); - if(error > gap) { - error = fabs(reference[i] - output[i])/fabs(reference[i] + 1e-9); + if (error > gap) { + error = fabs(reference[i] - output[i]) / fabs(reference[i] + 1e-9); } - if(error > max_error) { + if (error > max_error) { max_error = error; } test_number++; #ifdef BASIC_DEBUG - if (error > gap) - { - printf("i = %d :%.6f, %.6f, %.6f\n", i, reference[i], output[i], input[i]); + if (error > gap) { + printf("i = %d :%.6f, %.6f, %.6f\n", i, reference[i], output[i], input[i]); } #endif } @@ -241,7 +240,6 @@ void result_verify_fp16(__fp16 *reference, __fp16 *output, __fp16 *input, float float cs = compute_cs_fp16(output, reference, size); printf("The cos sim is %f.\n", cs); - } #endif @@ -252,8 +250,8 @@ void result_verify_bool(bool *reference, bool *output, float *input, float gap, for (i = 0; i < size; i++) { error = fabs(reference[i] - output[i]); - if(error > gap) { - error = fabs(reference[i] - output[i])/fabs(reference[i] + 1e-9); + if (error > gap) { + error = fabs(reference[i] - output[i]) / fabs(reference[i] + 1e-9); } test_number++; @@ -261,15 +259,15 @@ void result_verify_bool(bool *reference, bool *output, float *input, float gap, failures++; } #ifdef BASIC_DEBUG - if (error > gap) - { - printf("i = %d, %d, %.6f\n", i, reference[i], output[i], input[i]); + if (error > gap) { + printf("i = %d, %d, %.6f\n", i, reference[i], output[i], input[i]); } #endif } } -void result_verify_8(float *reference, struct csi_tensor *output, int8_t *input, float gap, int size, bool save) +void result_verify_8(float *reference, struct csi_tensor *output, int8_t *input, float gap, + int size, bool save) { int i; float error; @@ -280,26 +278,27 @@ void result_verify_8(float *reference, struct csi_tensor *output, int8_t *input, for (i = 0; i < size; i++) { if (output->dtype == CSINN_DTYPE_UINT8) { - output_tmp[i] = csi_ref_dequantize_u8_to_f32(*((uint8_t *)output_data + i), output->qinfo); + output_tmp[i] = + csi_ref_dequantize_u8_to_f32(*((uint8_t *)output_data + i), output->qinfo); } else if (output->dtype == CSINN_DTYPE_INT8) { - output_tmp[i] = csi_ref_dequantize_i8_to_f32(*((int8_t *)output_data + i), output->qinfo); + output_tmp[i] = + csi_ref_dequantize_i8_to_f32(*((int8_t *)output_data + i), output->qinfo); } - if(isinf(reference[i]) || isnan(reference[i])){ + if (isinf(reference[i]) || isnan(reference[i])) { error = 0; } else { error = fabs(reference[i] - output_tmp[i]); - if(error > gap) { - error = fabs(reference[i] - output_tmp[i])/fabs(reference[i] + 1e-9); + if (error > gap) { + error = fabs(reference[i] - output_tmp[i]) / fabs(reference[i] + 1e-9); } } - if(error > max_error) { + if (error > max_error) { max_error = error; } test_number++; #ifdef BASIC_DEBUG - if (error > gap) - { + if (error > gap) { printf("i = %d :%.6f, %.6f, %.6f\n", i, reference[i], output_tmp[i], input[i]); } #endif @@ -317,8 +316,8 @@ void result_verify_8(float *reference, struct csi_tensor *output, int8_t *input, free(output_tmp); } - -void result_verify_q7(int8_t *reference, int8_t *output, int8_t *input, float gap, int size, bool save) +void result_verify_q7(int8_t *reference, int8_t *output, int8_t *input, float gap, int size, + bool save) { int i; float error, snr; @@ -331,17 +330,16 @@ void result_verify_q7(int8_t *reference, int8_t *output, int8_t *input, float ga failures++; } #ifdef BASIC_DEBUG - if (error > gap) - { - printf("i = %d :%#x, %#x, %#x\n", i, reference[i], output[i], input[i]); + if (error > gap) { + printf("i = %d :%#x, %#x, %#x\n", i, reference[i], output[i], input[i]); } #endif } printf("/====== total = %6d(size=%5d) || error = %5d =======/\n", test_number, size, failures); } - -void result_verify_q15(int16_t *reference, int16_t *output, int16_t *input, float gap, int size, bool save) +void result_verify_q15(int16_t *reference, int16_t *output, int16_t *input, float gap, int size, + bool save) { int i; float error, snr; @@ -354,16 +352,14 @@ void result_verify_q15(int16_t *reference, int16_t *output, int16_t *input, floa failures++; } #ifdef BASIC_DEBUG - if (error > gap) - { - printf("i = %d :%d, %d, %d\n", i, reference[i], output[i], input[i]); + if (error > gap) { + printf("i = %d :%d, %d, %d\n", i, reference[i], output[i], input[i]); } #endif } printf("/====== total = %6d(size=%5d) || error = %5d =======/\n", test_number, size, failures); } - void get_scale_and_zp(float max_value, float min_value, float *scale, int *zp) { int valid_range = 255; @@ -374,11 +370,11 @@ void get_scale_and_zp(float max_value, float min_value, float *scale, int *zp) scale_tmp = (max_value - min_value) / (float)valid_range; - if (scale_tmp){ + if (scale_tmp) { zp_tmp = 0 - min_value / scale_tmp; } else { scale_tmp = 1; - zp_tmp = max_value; + zp_tmp = max_value; } zp_tmp = zp_tmp > 255 ? 255 : zp_tmp; zp_tmp = zp_tmp < 0 ? 0 : zp_tmp; @@ -397,11 +393,11 @@ void get_scale_and_zp_i8_asym(float max_value, float min_value, float *scale, in scale_tmp = (max_value - min_value) / (float)valid_range; - if (scale_tmp){ + if (scale_tmp) { zp_tmp = -128 - min_value / scale_tmp; } else { scale_tmp = 1; - zp_tmp = 0; + zp_tmp = 0; } *zp = (int)round(zp_tmp); @@ -413,15 +409,15 @@ void get_scale_and_zp_i8(float max_value, float min_value, float *scale, int *zp int valid_range = 255; float scale_tmp, zp_tmp, max_tmp; - if (fabs(max_value) >= fabs(min_value)){ + if (fabs(max_value) >= fabs(min_value)) { max_tmp = fabs(max_value); - } else{ + } else { max_tmp = fabs(min_value); } scale_tmp = 2 * max_tmp / (float)valid_range; zp_tmp = 0; - if (scale_tmp == 0){ + if (scale_tmp == 0) { scale_tmp = 1; } @@ -459,12 +455,12 @@ void find_min_max(float *input, float *max_value, float *min_value, int size) float max_tmp = -FLT_MAX; float min_tmp = FLT_MAX; - for(i = 0; i < size; i++) { - if(input[i] != -FLT_MAX && input[i] != FLT_MAX) { - if(input[i] > max_tmp) { + for (i = 0; i < size; i++) { + if (input[i] != -FLT_MAX && input[i] != FLT_MAX) { + if (input[i] > max_tmp) { max_tmp = input[i]; } - if(input[i] < min_tmp) { + if (input[i] < min_tmp) { min_tmp = input[i]; } } @@ -538,7 +534,7 @@ void get_quant_info(struct csi_tensor *tensor) } int size = csi_tensor_size(tensor); find_min_max(tensor->data, &max, &min, size); - if (tensor->sess->base_api == CSINN_LIGHT) { + if ((tensor->sess != NULL) && (tensor->sess->base_api == CSINN_LIGHT)) { get_scale_and_zp_power2_i8(max, min, &scale, &zp); tensor->qinfo->max = max; if (min >= 0 && max > 0) { @@ -587,8 +583,103 @@ struct csi_tensor *convert_f32_input(struct csi_tensor *tensor, int dtype, struc return ret; } +struct csi_tensor *convert_f32_layer(struct csi_tensor *tensor, enum csinn_quant_enum qtype, + enum csinn_api_enum api) +{ + set_quant_info(tensor, qtype, api); + struct csi_tensor *ret = csi_alloc_tensor(NULL); + csi_tensor_copy(ret, tensor); + if ((qtype == CSINN_QUANT_INT8_SYM) || (qtype == CSINN_QUANT_INT8_ASYM)) { + ret->dtype = CSINN_DTYPE_INT8; + } else if (qtype == CSINN_QUANT_UINT8_ASYM) { + ret->dtype = CSINN_DTYPE_UINT8; + } else if (qtype == CSINN_QUANT_INT16_SYM) { + ret->dtype = CSINN_DTYPE_INT16; + } else if (qtype == CSINN_QUANT_FLOAT16) { + ret->dtype = CSINN_DTYPE_FLOAT16; + } else if (qtype == CSINN_QUANT_FLOAT32) { + ret->dtype = CSINN_DTYPE_FLOAT32; + } else { + printf("unsupport qinfo\n"); + } + + ret->data = malloc(csi_tensor_byte_size(ret)); + csi_tensor_data_convert(ret, tensor); + + return ret; +} + void free_input(struct csi_tensor *tensor) { csi_mem_free(tensor->data); csi_free_tensor(tensor); } + +struct csi_tensor *fuse_zp_to_bias(struct csi_tensor *input, struct csi_tensor *weight, + struct csi_tensor *bias, enum csinn_api_enum api) +{ + set_quant_info(input, CSINN_QUANT_INT8_ASYM, api); + set_quant_info(weight, CSINN_QUANT_INT8_SYM, api); + int b_size = csi_tensor_size(bias); + struct csi_tensor *ret = csi_alloc_tensor(NULL); + csi_tensor_copy(ret, bias); + ret->qinfo->scale = input->qinfo->scale * weight->qinfo->scale; + ret->qinfo->zero_point = 0; + ret->dtype = CSINN_DTYPE_INT32; + ret->data = malloc(csi_tensor_byte_size(ret)); + int32_t *ret_data = ret->data; + + int b_length = b_size ? bias->dim[0] : weight->dim[0]; + int inner_size = 1; + float new_b = 0.0; + for (int i = 1; i < weight->dim_count; i++) { + inner_size *= weight->dim[i]; + } + + float *bias_data = (float *)bias->data; + float *weight_data = (float *)weight->data; + + float sp = input->qinfo->scale * input->qinfo->zero_point; + + for (int i = 0; i < b_length; i++) { + new_b = b_size ? bias_data[i] : 0.0; + for (int j = 0; j < inner_size; j++) { + int w_index = i * inner_size + j; + new_b -= weight_data[w_index] * sp; + } + ret_data[i] = new_b / ret->qinfo->scale; + } + + return ret; +} + +void evaluate_error(void *out, void *ref, int size, enum csinn_dtype_enum dtype) +{ + float *output = csi_mem_alloc(size * sizeof(float)); + float *reference = csi_mem_alloc(size * sizeof(float)); + if (dtype == CSINN_DTYPE_FLOAT32) { + memcpy(output, out, size * sizeof(float)); + memcpy(reference, ref, size * sizeof(float)); + } else if (dtype == CSINN_DTYPE_FLOAT16) { + for (int i = 0; i < size; i++) { + output[i] = *((__fp16 *)out + i); + reference[i] = *((__fp16 *)ref + i); + } + } else if (dtype == CSINN_DTYPE_INT8) { + for (int i = 0; i < size; i++) { + output[i] = *((int8_t *)out + i); + reference[i] = *((int8_t *)ref + i); + } + } + float kl = compute_kl(output, reference, size); + printf("The kl diver is %f.\n", kl); + + float cs = compute_cs(output, reference, size); + printf("The cos sim is %f.\n", cs); + + if (kl > 0.01f || cs < 0.99f) { + failures++; + } + csi_mem_free(output); + csi_mem_free(reference); +} diff --git a/tests/utils/test_utils.h b/tests/utils/test_utils.h index 9310d048..b397f2b9 100644 --- a/tests/utils/test_utils.h +++ b/tests/utils/test_utils.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,41 +16,64 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #ifndef TEST_UTILS_H #define TEST_UTILS_H - +#include +#include #include #include -#include -#include + #include "csi_nn.h" #include "csi_ref.h" +#ifdef __cplusplus +extern "C" { +#endif + int *read_input_data_f32(char *path); char *read_input_data_fp16(char *path, int int_size); float compute_kl(float *p, float *q, uint32_t size); float compute_cs(float *a, float *b, uint32_t size); void result_verify_int32(int *reference, int *output, int *input, float gap, int size, bool save); -void result_verify_f32(float *reference, float *output, float *input, float gap, int size, bool save); -void result_verify_bool(bool *reference, bool *output, float *input, float gap, int size, bool save); -void result_verify_8(float *reference, struct csi_tensor *output, int8_t *input, float gap, int size, bool save); -void result_verify_q7(int8_t *reference, int8_t *output, int8_t *input, float gap, int size, bool save); -void result_verify_q15(int16_t *reference, int16_t *output, int16_t *input, float gap, int size, bool save); +void result_verify_f32(float *reference, float *output, float *input, float gap, int size, + bool save); +void result_verify_bool(bool *reference, bool *output, float *input, float gap, int size, + bool save); +void result_verify_8(float *reference, struct csi_tensor *output, int8_t *input, float gap, + int size, bool save); +void result_verify_q7(int8_t *reference, int8_t *output, int8_t *input, float gap, int size, + bool save); +void result_verify_q15(int16_t *reference, int16_t *output, int16_t *input, float gap, int size, + bool save); void get_scale_and_zp(float max_value, float min_value, float *scale, int *zp); void get_scale_and_zp_i8(float max_value, float min_value, float *scale, int *zp); void find_min_max(float *input, float *max_value, float *min_value, int size); void get_quant_info(struct csi_tensor *tensor); -void set_quant_info(struct csi_tensor *tensor, enum csinn_quant_enum qtype, enum csinn_api_enum api); +void set_quant_info(struct csi_tensor *tensor, enum csinn_quant_enum qtype, + enum csinn_api_enum api); struct csi_tensor *convert_input(struct csi_tensor *tensor, int dtype); -struct csi_tensor *convert_f32_input(struct csi_tensor *tensor, int dtype, struct csi_session *sess); +struct csi_tensor *convert_f32_input(struct csi_tensor *tensor, int dtype, + struct csi_session *sess); +struct csi_tensor *convert_f32_layer(struct csi_tensor *tensor, enum csinn_quant_enum qtype, + enum csinn_api_enum api); +struct csi_tensor *fuse_zp_to_bias(struct csi_tensor *input, struct csi_tensor *weight, + struct csi_tensor *bias, enum csinn_api_enum api); void free_input(struct csi_tensor *tensor); -extern void init_testsuite(const char* testname); -extern int done_testing(void); +extern void init_testsuite(const char *testname); +extern int done_testing(void); #ifdef RISCV_TEST float compute_cs_fp16(__fp16 *a, __fp16 *b, uint32_t size); -void result_verify_fp16(__fp16 *reference, __fp16 *output, __fp16 *input, float gap, int size, bool save); +void result_verify_fp16(__fp16 *reference, __fp16 *output, __fp16 *input, float gap, int size, + bool save); +#endif + +void evaluate_error(void *out, void *ref, int size, enum csinn_dtype_enum dtype); + +#ifdef __cplusplus +} #endif -#endif /* TEST_UTILS_H */ + +#endif /* TEST_UTILS_H */ diff --git a/tests/validation/Makefile.c906 b/tests/validation/Makefile.c906 index d051276b..2e2c8a32 100644 --- a/tests/validation/Makefile.c906 +++ b/tests/validation/Makefile.c906 @@ -142,7 +142,7 @@ test_objs += maxpool3d_f32.o test_objs += maxpool_nchw_f32.o test_objs += averagepool_nchw_f32.o -test_objs += ./riscv_xt9xx/relu_fp16.o +# test_objs += ./riscv_xt9xx/relu_fp16.o utils_objs = diff --git a/tests/validation/abs_f32.c b/tests/validation/abs_f32.c index 8da10a4f..3181eb0c 100644 --- a/tests/validation/abs_f32.c +++ b/tests/validation/abs_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/abs_i8.c b/tests/validation/abs_i8.c index 6182d29a..9f3a8b01 100644 --- a/tests/validation/abs_i8.c +++ b/tests/validation/abs_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/abs_u8.c b/tests/validation/abs_u8.c index c07d9cba..57d9c949 100644 --- a/tests/validation/abs_u8.c +++ b/tests/validation/abs_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/acos_f32.c b/tests/validation/acos_f32.c index 25b5cadf..d7bd5557 100644 --- a/tests/validation/acos_f32.c +++ b/tests/validation/acos_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/acos_i8.c b/tests/validation/acos_i8.c index 314a29d5..d077513e 100644 --- a/tests/validation/acos_i8.c +++ b/tests/validation/acos_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/acos_u8.c b/tests/validation/acos_u8.c index 8b2ea6a6..4c5e83c5 100644 --- a/tests/validation/acos_u8.c +++ b/tests/validation/acos_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/acosh_f32.c b/tests/validation/acosh_f32.c index 60a1d4b4..a6ee3a28 100644 --- a/tests/validation/acosh_f32.c +++ b/tests/validation/acosh_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/acosh_i8.c b/tests/validation/acosh_i8.c index 94819fbb..dffb1000 100644 --- a/tests/validation/acosh_i8.c +++ b/tests/validation/acosh_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/acosh_u8.c b/tests/validation/acosh_u8.c index 3ce1cfc5..dec25e2b 100644 --- a/tests/validation/acosh_u8.c +++ b/tests/validation/acosh_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/add_f32.c b/tests/validation/add_f32.c index 6a52a40d..a9932df1 100644 --- a/tests/validation/add_f32.c +++ b/tests/validation/add_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/add_i8.c b/tests/validation/add_i8.c index 1ebd7bd7..24207a1b 100644 --- a/tests/validation/add_i8.c +++ b/tests/validation/add_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/add_u8.c b/tests/validation/add_u8.c index fea5f788..43fca61b 100644 --- a/tests/validation/add_u8.c +++ b/tests/validation/add_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/and_u32.c b/tests/validation/and_u32.c index 28537cfe..b00a3210 100644 --- a/tests/validation/and_u32.c +++ b/tests/validation/and_u32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/arange_f32.c b/tests/validation/arange_f32.c index 066de7a0..d14f0e3c 100644 --- a/tests/validation/arange_f32.c +++ b/tests/validation/arange_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/arange_i8.c b/tests/validation/arange_i8.c index a36aebbd..42842c52 100644 --- a/tests/validation/arange_i8.c +++ b/tests/validation/arange_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/arange_u8.c b/tests/validation/arange_u8.c index 56b0018d..09651ea2 100644 --- a/tests/validation/arange_u8.c +++ b/tests/validation/arange_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/argmax_stride_f32.c b/tests/validation/argmax_stride_f32.c index 1a330007..d5152a10 100644 --- a/tests/validation/argmax_stride_f32.c +++ b/tests/validation/argmax_stride_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/argmax_stride_u8.c b/tests/validation/argmax_stride_u8.c index 3fd2605f..db9c9f6d 100644 --- a/tests/validation/argmax_stride_u8.c +++ b/tests/validation/argmax_stride_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/argmin_stride_f32.c b/tests/validation/argmin_stride_f32.c index d5c140be..f7413fd8 100644 --- a/tests/validation/argmin_stride_f32.c +++ b/tests/validation/argmin_stride_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/argmin_stride_u8.c b/tests/validation/argmin_stride_u8.c index b6c2c122..de82f431 100644 --- a/tests/validation/argmin_stride_u8.c +++ b/tests/validation/argmin_stride_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/asin_f32.c b/tests/validation/asin_f32.c index 274a3d3c..cbd02916 100644 --- a/tests/validation/asin_f32.c +++ b/tests/validation/asin_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/asin_i8.c b/tests/validation/asin_i8.c index b36bb7e4..53275bf0 100644 --- a/tests/validation/asin_i8.c +++ b/tests/validation/asin_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/asin_u8.c b/tests/validation/asin_u8.c index 6adff1bb..486f7cc8 100644 --- a/tests/validation/asin_u8.c +++ b/tests/validation/asin_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/asinh_f32.c b/tests/validation/asinh_f32.c index c9475b65..16c3de0c 100644 --- a/tests/validation/asinh_f32.c +++ b/tests/validation/asinh_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/asinh_i8.c b/tests/validation/asinh_i8.c index 3575eac7..ee8d9dd0 100644 --- a/tests/validation/asinh_i8.c +++ b/tests/validation/asinh_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/asinh_u8.c b/tests/validation/asinh_u8.c index 7dbcb4c0..5f989c38 100644 --- a/tests/validation/asinh_u8.c +++ b/tests/validation/asinh_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/atan_f32.c b/tests/validation/atan_f32.c index 9bc3b2ae..0d17d612 100644 --- a/tests/validation/atan_f32.c +++ b/tests/validation/atan_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/atan_i8.c b/tests/validation/atan_i8.c index af789f91..72bd32f1 100644 --- a/tests/validation/atan_i8.c +++ b/tests/validation/atan_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/atan_u8.c b/tests/validation/atan_u8.c index 4b3583b5..5c348568 100644 --- a/tests/validation/atan_u8.c +++ b/tests/validation/atan_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/atanh_f32.c b/tests/validation/atanh_f32.c index 8d577957..ee0fa35a 100644 --- a/tests/validation/atanh_f32.c +++ b/tests/validation/atanh_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/atanh_i8.c b/tests/validation/atanh_i8.c index d4fbeaca..36e3e2f0 100644 --- a/tests/validation/atanh_i8.c +++ b/tests/validation/atanh_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/atanh_u8.c b/tests/validation/atanh_u8.c index 741d2846..5483eab5 100644 --- a/tests/validation/atanh_u8.c +++ b/tests/validation/atanh_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/averagepool3d_f32.c b/tests/validation/averagepool3d_f32.c index fedad21b..f9b2601b 100644 --- a/tests/validation/averagepool3d_f32.c +++ b/tests/validation/averagepool3d_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/averagepool3d_i8.c b/tests/validation/averagepool3d_i8.c index 73f79ed8..381b55b9 100644 --- a/tests/validation/averagepool3d_i8.c +++ b/tests/validation/averagepool3d_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/averagepool3d_u8.c b/tests/validation/averagepool3d_u8.c index 672cc766..7692de6b 100644 --- a/tests/validation/averagepool3d_u8.c +++ b/tests/validation/averagepool3d_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/averagepool_f32.c b/tests/validation/averagepool_f32.c index 42f59e34..44f7cc9a 100644 --- a/tests/validation/averagepool_f32.c +++ b/tests/validation/averagepool_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/averagepool_i8.c b/tests/validation/averagepool_i8.c index a0d02617..2d3762eb 100644 --- a/tests/validation/averagepool_i8.c +++ b/tests/validation/averagepool_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/averagepool_nchw_f32.c b/tests/validation/averagepool_nchw_f32.c index be691a43..4fe417ee 100644 --- a/tests/validation/averagepool_nchw_f32.c +++ b/tests/validation/averagepool_nchw_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/averagepool_nchw_i8.c b/tests/validation/averagepool_nchw_i8.c index c5f951cb..d9389fdd 100644 --- a/tests/validation/averagepool_nchw_i8.c +++ b/tests/validation/averagepool_nchw_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/averagepool_nchw_u8.c b/tests/validation/averagepool_nchw_u8.c index 719cad85..4d6b11d6 100644 --- a/tests/validation/averagepool_nchw_u8.c +++ b/tests/validation/averagepool_nchw_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/averagepool_u8.c b/tests/validation/averagepool_u8.c index 549e9144..41df8b2e 100644 --- a/tests/validation/averagepool_u8.c +++ b/tests/validation/averagepool_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/batch_norm_f32.c b/tests/validation/batch_norm_f32.c index 9658c1e8..9629b576 100644 --- a/tests/validation/batch_norm_f32.c +++ b/tests/validation/batch_norm_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/batch_norm_i8.c b/tests/validation/batch_norm_i8.c index c7976a4e..bc8a9f0c 100644 --- a/tests/validation/batch_norm_i8.c +++ b/tests/validation/batch_norm_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/batch_norm_u8.c b/tests/validation/batch_norm_u8.c index 043ffa42..14e0361d 100644 --- a/tests/validation/batch_norm_u8.c +++ b/tests/validation/batch_norm_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/batch_to_space_f32.c b/tests/validation/batch_to_space_f32.c index e259d393..1d0be8df 100644 --- a/tests/validation/batch_to_space_f32.c +++ b/tests/validation/batch_to_space_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/batch_to_space_i8.c b/tests/validation/batch_to_space_i8.c index 1027296a..b7fb96f9 100644 --- a/tests/validation/batch_to_space_i8.c +++ b/tests/validation/batch_to_space_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/batch_to_space_u8.c b/tests/validation/batch_to_space_u8.c index fc71bbad..e3f7ebbb 100644 --- a/tests/validation/batch_to_space_u8.c +++ b/tests/validation/batch_to_space_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/broadcast_to_f32.c b/tests/validation/broadcast_to_f32.c index 1dd269ed..7bd718f2 100644 --- a/tests/validation/broadcast_to_f32.c +++ b/tests/validation/broadcast_to_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/broadcast_to_i8.c b/tests/validation/broadcast_to_i8.c index f9f5b7a6..a07fdd49 100644 --- a/tests/validation/broadcast_to_i8.c +++ b/tests/validation/broadcast_to_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/broadcast_to_u8.c b/tests/validation/broadcast_to_u8.c index 5c9b820e..733cfe07 100644 --- a/tests/validation/broadcast_to_u8.c +++ b/tests/validation/broadcast_to_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/ceil_f32.c b/tests/validation/ceil_f32.c index 878e0567..c2110d45 100644 --- a/tests/validation/ceil_f32.c +++ b/tests/validation/ceil_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/ceil_i8.c b/tests/validation/ceil_i8.c index 71b761b8..5d272e46 100644 --- a/tests/validation/ceil_i8.c +++ b/tests/validation/ceil_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/ceil_u8.c b/tests/validation/ceil_u8.c index ff7b427b..76d4fc56 100644 --- a/tests/validation/ceil_u8.c +++ b/tests/validation/ceil_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/clip_f32.c b/tests/validation/clip_f32.c index 2a0b2e54..2932932c 100644 --- a/tests/validation/clip_f32.c +++ b/tests/validation/clip_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/clip_i8.c b/tests/validation/clip_i8.c index bd0b7e7c..db802698 100644 --- a/tests/validation/clip_i8.c +++ b/tests/validation/clip_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/clip_u8.c b/tests/validation/clip_u8.c index 33efde2a..24b5dad8 100644 --- a/tests/validation/clip_u8.c +++ b/tests/validation/clip_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/concat_f32.c b/tests/validation/concat_f32.c index e4753e32..ffd2a41a 100644 --- a/tests/validation/concat_f32.c +++ b/tests/validation/concat_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/concat_i8.c b/tests/validation/concat_i8.c index 7c701cfd..dcf2f197 100644 --- a/tests/validation/concat_i8.c +++ b/tests/validation/concat_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/concat_u8.c b/tests/validation/concat_u8.c index 3aaffa3d..8e413b9e 100644 --- a/tests/validation/concat_u8.c +++ b/tests/validation/concat_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/convolution3d_f32.c b/tests/validation/convolution3d_f32.c index 527f5400..c46f9ac6 100644 --- a/tests/validation/convolution3d_f32.c +++ b/tests/validation/convolution3d_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/convolution3d_i8.c b/tests/validation/convolution3d_i8.c index de38bcea..819e2f5b 100644 --- a/tests/validation/convolution3d_i8.c +++ b/tests/validation/convolution3d_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/convolution3d_u8.c b/tests/validation/convolution3d_u8.c index bb816a70..fe1b3b2d 100644 --- a/tests/validation/convolution3d_u8.c +++ b/tests/validation/convolution3d_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/convolution_channel_nchw_i8.c b/tests/validation/convolution_channel_nchw_i8.c index f99044d4..a1766874 100644 --- a/tests/validation/convolution_channel_nchw_i8.c +++ b/tests/validation/convolution_channel_nchw_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/convolution_channel_nchw_u8.c b/tests/validation/convolution_channel_nchw_u8.c index 36a4e5c0..3cb5a0d0 100644 --- a/tests/validation/convolution_channel_nchw_u8.c +++ b/tests/validation/convolution_channel_nchw_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/convolution_f32.c b/tests/validation/convolution_f32.c index 49fedab4..93762a2a 100644 --- a/tests/validation/convolution_f32.c +++ b/tests/validation/convolution_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/convolution_i8.c b/tests/validation/convolution_i8.c index 5a597b01..2ad9ed11 100644 --- a/tests/validation/convolution_i8.c +++ b/tests/validation/convolution_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/convolution_nchw_f32.c b/tests/validation/convolution_nchw_f32.c index bf0e2d6d..ea4f5cb6 100644 --- a/tests/validation/convolution_nchw_f32.c +++ b/tests/validation/convolution_nchw_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/convolution_nchw_i8.c b/tests/validation/convolution_nchw_i8.c index 103bbb86..66c143c3 100644 --- a/tests/validation/convolution_nchw_i8.c +++ b/tests/validation/convolution_nchw_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/convolution_nchw_u8.c b/tests/validation/convolution_nchw_u8.c index c8fa3864..132332cb 100644 --- a/tests/validation/convolution_nchw_u8.c +++ b/tests/validation/convolution_nchw_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/convolution_relu6_i8.c b/tests/validation/convolution_relu6_i8.c index eafae4fe..befadf58 100644 --- a/tests/validation/convolution_relu6_i8.c +++ b/tests/validation/convolution_relu6_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/convolution_relu6_nchw_i8.c b/tests/validation/convolution_relu6_nchw_i8.c index 153f48f9..db52ae8d 100644 --- a/tests/validation/convolution_relu6_nchw_i8.c +++ b/tests/validation/convolution_relu6_nchw_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/convolution_relu6_nchw_u8.c b/tests/validation/convolution_relu6_nchw_u8.c index f774cae6..d6d15bf8 100644 --- a/tests/validation/convolution_relu6_nchw_u8.c +++ b/tests/validation/convolution_relu6_nchw_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/convolution_relu6_u8.c b/tests/validation/convolution_relu6_u8.c index 5016f95a..febbbcf8 100644 --- a/tests/validation/convolution_relu6_u8.c +++ b/tests/validation/convolution_relu6_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/convolution_relu_i8.c b/tests/validation/convolution_relu_i8.c index 1a97d5ec..92d80e65 100644 --- a/tests/validation/convolution_relu_i8.c +++ b/tests/validation/convolution_relu_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/convolution_relu_nchw_i8.c b/tests/validation/convolution_relu_nchw_i8.c index 2717c5e9..c715d944 100644 --- a/tests/validation/convolution_relu_nchw_i8.c +++ b/tests/validation/convolution_relu_nchw_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/convolution_relu_nchw_u8.c b/tests/validation/convolution_relu_nchw_u8.c index 6de5f7e9..72f6465a 100644 --- a/tests/validation/convolution_relu_nchw_u8.c +++ b/tests/validation/convolution_relu_nchw_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/convolution_relu_u8.c b/tests/validation/convolution_relu_u8.c index d3f4ae29..a6e0a0c5 100644 --- a/tests/validation/convolution_relu_u8.c +++ b/tests/validation/convolution_relu_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/convolution_u8.c b/tests/validation/convolution_u8.c index 72e9f5ed..bb2fc183 100644 --- a/tests/validation/convolution_u8.c +++ b/tests/validation/convolution_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/cos_f32.c b/tests/validation/cos_f32.c index f8267e77..c8dcede9 100644 --- a/tests/validation/cos_f32.c +++ b/tests/validation/cos_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/cos_i8.c b/tests/validation/cos_i8.c index 10eaff2e..941a640b 100644 --- a/tests/validation/cos_i8.c +++ b/tests/validation/cos_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/cos_u8.c b/tests/validation/cos_u8.c index 1818d26a..de00bd6d 100644 --- a/tests/validation/cos_u8.c +++ b/tests/validation/cos_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/cosh_f32.c b/tests/validation/cosh_f32.c index 16d99951..122de8f1 100644 --- a/tests/validation/cosh_f32.c +++ b/tests/validation/cosh_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/cosh_i8.c b/tests/validation/cosh_i8.c index 26db50e0..3ab765ef 100644 --- a/tests/validation/cosh_i8.c +++ b/tests/validation/cosh_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/cosh_u8.c b/tests/validation/cosh_u8.c index f2b8b16c..a63eb9f7 100644 --- a/tests/validation/cosh_u8.c +++ b/tests/validation/cosh_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/cumprod_f32.c b/tests/validation/cumprod_f32.c index 382a0aa5..7458b2a9 100644 --- a/tests/validation/cumprod_f32.c +++ b/tests/validation/cumprod_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/cumprod_i8.c b/tests/validation/cumprod_i8.c index 22317e32..424a55a7 100644 --- a/tests/validation/cumprod_i8.c +++ b/tests/validation/cumprod_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/cumprod_u8.c b/tests/validation/cumprod_u8.c index 0fc409ee..1bd713a1 100644 --- a/tests/validation/cumprod_u8.c +++ b/tests/validation/cumprod_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/cumsum_f32.c b/tests/validation/cumsum_f32.c index 7aba2351..fa1a6044 100644 --- a/tests/validation/cumsum_f32.c +++ b/tests/validation/cumsum_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/cumsum_i8.c b/tests/validation/cumsum_i8.c index bdeb14bc..2fddd428 100644 --- a/tests/validation/cumsum_i8.c +++ b/tests/validation/cumsum_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/cumsum_u8.c b/tests/validation/cumsum_u8.c index 61cc88b4..22cbd59b 100644 --- a/tests/validation/cumsum_u8.c +++ b/tests/validation/cumsum_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/deconvolution3d_f32.c b/tests/validation/deconvolution3d_f32.c index 6801781e..96b755f6 100644 --- a/tests/validation/deconvolution3d_f32.c +++ b/tests/validation/deconvolution3d_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/deconvolution3d_u8.c b/tests/validation/deconvolution3d_u8.c index ef8df9b1..7cbc7947 100644 --- a/tests/validation/deconvolution3d_u8.c +++ b/tests/validation/deconvolution3d_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/deconvolution_f32.c b/tests/validation/deconvolution_f32.c index d8bd0714..499333ef 100644 --- a/tests/validation/deconvolution_f32.c +++ b/tests/validation/deconvolution_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/deconvolution_i8.c b/tests/validation/deconvolution_i8.c index cc069d97..1aae0eab 100644 --- a/tests/validation/deconvolution_i8.c +++ b/tests/validation/deconvolution_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/deconvolution_nchw_f32.c b/tests/validation/deconvolution_nchw_f32.c index 3e9bf57b..eff93163 100644 --- a/tests/validation/deconvolution_nchw_f32.c +++ b/tests/validation/deconvolution_nchw_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/deconvolution_nchw_i8.c b/tests/validation/deconvolution_nchw_i8.c index 19dd9d9d..45090777 100644 --- a/tests/validation/deconvolution_nchw_i8.c +++ b/tests/validation/deconvolution_nchw_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/deconvolution_nchw_u8.c b/tests/validation/deconvolution_nchw_u8.c index 8e4aff80..e9dd2105 100644 --- a/tests/validation/deconvolution_nchw_u8.c +++ b/tests/validation/deconvolution_nchw_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/deconvolution_u8.c b/tests/validation/deconvolution_u8.c index 18cc32fb..0e4ac548 100644 --- a/tests/validation/deconvolution_u8.c +++ b/tests/validation/deconvolution_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depth_to_space_f32.c b/tests/validation/depth_to_space_f32.c index 0aa42253..9022ddf1 100644 --- a/tests/validation/depth_to_space_f32.c +++ b/tests/validation/depth_to_space_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depth_to_space_i8.c b/tests/validation/depth_to_space_i8.c index 27e65815..00c28046 100644 --- a/tests/validation/depth_to_space_i8.c +++ b/tests/validation/depth_to_space_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depth_to_space_u8.c b/tests/validation/depth_to_space_u8.c index c4efbfcd..cddaaebc 100644 --- a/tests/validation/depth_to_space_u8.c +++ b/tests/validation/depth_to_space_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depthwise_convolution_f32.c b/tests/validation/depthwise_convolution_f32.c index cc929538..8fbd1a00 100644 --- a/tests/validation/depthwise_convolution_f32.c +++ b/tests/validation/depthwise_convolution_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depthwise_convolution_i8.c b/tests/validation/depthwise_convolution_i8.c index ecf196fb..e7e43b2a 100644 --- a/tests/validation/depthwise_convolution_i8.c +++ b/tests/validation/depthwise_convolution_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depthwise_convolution_nchw_f32.c b/tests/validation/depthwise_convolution_nchw_f32.c index 7a2490d7..5678dea4 100644 --- a/tests/validation/depthwise_convolution_nchw_f32.c +++ b/tests/validation/depthwise_convolution_nchw_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depthwise_convolution_nchw_i8.c b/tests/validation/depthwise_convolution_nchw_i8.c index c721e419..f3089733 100644 --- a/tests/validation/depthwise_convolution_nchw_i8.c +++ b/tests/validation/depthwise_convolution_nchw_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depthwise_convolution_nchw_u8.c b/tests/validation/depthwise_convolution_nchw_u8.c index 19b02c92..1d225355 100644 --- a/tests/validation/depthwise_convolution_nchw_u8.c +++ b/tests/validation/depthwise_convolution_nchw_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depthwise_convolution_relu6_i8.c b/tests/validation/depthwise_convolution_relu6_i8.c index 61b71629..5e1f831a 100644 --- a/tests/validation/depthwise_convolution_relu6_i8.c +++ b/tests/validation/depthwise_convolution_relu6_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depthwise_convolution_relu6_nchw_i8.c b/tests/validation/depthwise_convolution_relu6_nchw_i8.c index 71a13fb9..2be157b6 100644 --- a/tests/validation/depthwise_convolution_relu6_nchw_i8.c +++ b/tests/validation/depthwise_convolution_relu6_nchw_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depthwise_convolution_relu6_nchw_u8.c b/tests/validation/depthwise_convolution_relu6_nchw_u8.c index a852a5db..a8e04516 100644 --- a/tests/validation/depthwise_convolution_relu6_nchw_u8.c +++ b/tests/validation/depthwise_convolution_relu6_nchw_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depthwise_convolution_relu6_u8.c b/tests/validation/depthwise_convolution_relu6_u8.c index 256e34b0..5b8b656f 100644 --- a/tests/validation/depthwise_convolution_relu6_u8.c +++ b/tests/validation/depthwise_convolution_relu6_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depthwise_convolution_relu_i8.c b/tests/validation/depthwise_convolution_relu_i8.c index 502524dd..d0102fe8 100644 --- a/tests/validation/depthwise_convolution_relu_i8.c +++ b/tests/validation/depthwise_convolution_relu_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depthwise_convolution_relu_nchw_i8.c b/tests/validation/depthwise_convolution_relu_nchw_i8.c index 6ec4fc56..0c4bddec 100644 --- a/tests/validation/depthwise_convolution_relu_nchw_i8.c +++ b/tests/validation/depthwise_convolution_relu_nchw_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depthwise_convolution_relu_nchw_u8.c b/tests/validation/depthwise_convolution_relu_nchw_u8.c index 3a6fea3b..b1609629 100644 --- a/tests/validation/depthwise_convolution_relu_nchw_u8.c +++ b/tests/validation/depthwise_convolution_relu_nchw_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depthwise_convolution_relu_u8.c b/tests/validation/depthwise_convolution_relu_u8.c index eb096677..c8e9e832 100644 --- a/tests/validation/depthwise_convolution_relu_u8.c +++ b/tests/validation/depthwise_convolution_relu_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depthwise_convolution_u8.c b/tests/validation/depthwise_convolution_u8.c index 75be9237..96241f47 100644 --- a/tests/validation/depthwise_convolution_u8.c +++ b/tests/validation/depthwise_convolution_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depthwise_deconvolution_f32.c b/tests/validation/depthwise_deconvolution_f32.c index 73dbf558..edff7e40 100644 --- a/tests/validation/depthwise_deconvolution_f32.c +++ b/tests/validation/depthwise_deconvolution_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depthwise_deconvolution_i8.c b/tests/validation/depthwise_deconvolution_i8.c index ddf6cf2b..b06cf6fd 100644 --- a/tests/validation/depthwise_deconvolution_i8.c +++ b/tests/validation/depthwise_deconvolution_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depthwise_deconvolution_nchw_f32.c b/tests/validation/depthwise_deconvolution_nchw_f32.c index 92d8a2bd..5811f3b8 100644 --- a/tests/validation/depthwise_deconvolution_nchw_f32.c +++ b/tests/validation/depthwise_deconvolution_nchw_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depthwise_deconvolution_nchw_u8.c b/tests/validation/depthwise_deconvolution_nchw_u8.c index 8ddea4b4..8f7eee18 100644 --- a/tests/validation/depthwise_deconvolution_nchw_u8.c +++ b/tests/validation/depthwise_deconvolution_nchw_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/depthwise_deconvolution_u8.c b/tests/validation/depthwise_deconvolution_u8.c index 35be13af..884655a4 100644 --- a/tests/validation/depthwise_deconvolution_u8.c +++ b/tests/validation/depthwise_deconvolution_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/dequantize_f32.c b/tests/validation/dequantize_f32.c index 658457f8..e946e95d 100644 --- a/tests/validation/dequantize_f32.c +++ b/tests/validation/dequantize_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_c860.h" diff --git a/tests/validation/div_f32.c b/tests/validation/div_f32.c index 5467eb3c..eac3b2f4 100644 --- a/tests/validation/div_f32.c +++ b/tests/validation/div_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/div_i8.c b/tests/validation/div_i8.c index 7162ead8..eaeaab41 100644 --- a/tests/validation/div_i8.c +++ b/tests/validation/div_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/div_u8.c b/tests/validation/div_u8.c index cf6d4094..a0b374dc 100644 --- a/tests/validation/div_u8.c +++ b/tests/validation/div_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/elu_f32.c b/tests/validation/elu_f32.c index 7f242c01..d8b19a9e 100644 --- a/tests/validation/elu_f32.c +++ b/tests/validation/elu_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/elu_i8.c b/tests/validation/elu_i8.c index 7ceffd88..cba90917 100644 --- a/tests/validation/elu_i8.c +++ b/tests/validation/elu_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/elu_u8.c b/tests/validation/elu_u8.c index 0188e5d6..eb545673 100644 --- a/tests/validation/elu_u8.c +++ b/tests/validation/elu_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/equal_f32.c b/tests/validation/equal_f32.c index e9fe9760..0acea4af 100644 --- a/tests/validation/equal_f32.c +++ b/tests/validation/equal_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/equal_i8.c b/tests/validation/equal_i8.c index cd26b651..29527fef 100644 --- a/tests/validation/equal_i8.c +++ b/tests/validation/equal_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/equal_u8.c b/tests/validation/equal_u8.c index ec1c6aef..57c6b54d 100644 --- a/tests/validation/equal_u8.c +++ b/tests/validation/equal_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/erf_f32.c b/tests/validation/erf_f32.c index a7973335..8d216e12 100644 --- a/tests/validation/erf_f32.c +++ b/tests/validation/erf_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/erf_i8.c b/tests/validation/erf_i8.c index 04a448cc..6c85ea58 100644 --- a/tests/validation/erf_i8.c +++ b/tests/validation/erf_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/erf_u8.c b/tests/validation/erf_u8.c index 49faf5f5..eab7ef2f 100644 --- a/tests/validation/erf_u8.c +++ b/tests/validation/erf_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/exp_f32.c b/tests/validation/exp_f32.c index 38c6838d..ea32e20f 100644 --- a/tests/validation/exp_f32.c +++ b/tests/validation/exp_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/exp_i8.c b/tests/validation/exp_i8.c index b718776f..6e4a0806 100644 --- a/tests/validation/exp_i8.c +++ b/tests/validation/exp_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/exp_u8.c b/tests/validation/exp_u8.c index ff8aff2e..b34219d3 100644 --- a/tests/validation/exp_u8.c +++ b/tests/validation/exp_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/expand_dims_f32.c b/tests/validation/expand_dims_f32.c index 005a3df9..ab9804ee 100644 --- a/tests/validation/expand_dims_f32.c +++ b/tests/validation/expand_dims_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/expand_dims_i8.c b/tests/validation/expand_dims_i8.c index 456e25d9..b1380387 100644 --- a/tests/validation/expand_dims_i8.c +++ b/tests/validation/expand_dims_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/expand_dims_u8.c b/tests/validation/expand_dims_u8.c index ba473a30..98410380 100644 --- a/tests/validation/expand_dims_u8.c +++ b/tests/validation/expand_dims_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/expm1_f32.c b/tests/validation/expm1_f32.c index 974c79a6..bfde48b4 100644 --- a/tests/validation/expm1_f32.c +++ b/tests/validation/expm1_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/expm1_i8.c b/tests/validation/expm1_i8.c index 0a442fc3..b53f8dea 100644 --- a/tests/validation/expm1_i8.c +++ b/tests/validation/expm1_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/expm1_u8.c b/tests/validation/expm1_u8.c index ece725b7..da5dcf7e 100644 --- a/tests/validation/expm1_u8.c +++ b/tests/validation/expm1_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/flatten_f32.c b/tests/validation/flatten_f32.c index 258ba64a..a77c4c1b 100644 --- a/tests/validation/flatten_f32.c +++ b/tests/validation/flatten_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/flatten_i8.c b/tests/validation/flatten_i8.c index 168edad0..db7d1d48 100644 --- a/tests/validation/flatten_i8.c +++ b/tests/validation/flatten_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/flatten_u8.c b/tests/validation/flatten_u8.c index f8c81f27..a6ce4266 100644 --- a/tests/validation/flatten_u8.c +++ b/tests/validation/flatten_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/floor_div_f32.c b/tests/validation/floor_div_f32.c index 6c2daa03..70390391 100644 --- a/tests/validation/floor_div_f32.c +++ b/tests/validation/floor_div_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/floor_div_i8.c b/tests/validation/floor_div_i8.c index e5a456fa..c23f86e6 100644 --- a/tests/validation/floor_div_i8.c +++ b/tests/validation/floor_div_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/floor_div_u8.c b/tests/validation/floor_div_u8.c index 84bcec72..20f6831b 100644 --- a/tests/validation/floor_div_u8.c +++ b/tests/validation/floor_div_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/floor_f32.c b/tests/validation/floor_f32.c index 4e489646..43afe982 100644 --- a/tests/validation/floor_f32.c +++ b/tests/validation/floor_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/floor_i8.c b/tests/validation/floor_i8.c index cf4ce8dc..833f376f 100644 --- a/tests/validation/floor_i8.c +++ b/tests/validation/floor_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/floor_mod_f32.c b/tests/validation/floor_mod_f32.c index e6204422..7bf814c9 100644 --- a/tests/validation/floor_mod_f32.c +++ b/tests/validation/floor_mod_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/floor_mod_i8.c b/tests/validation/floor_mod_i8.c index c8a16399..661f9593 100644 --- a/tests/validation/floor_mod_i8.c +++ b/tests/validation/floor_mod_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/floor_mod_u8.c b/tests/validation/floor_mod_u8.c index 3c8d5743..fba11778 100644 --- a/tests/validation/floor_mod_u8.c +++ b/tests/validation/floor_mod_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/floor_u8.c b/tests/validation/floor_u8.c index 4133a7d6..4258448c 100644 --- a/tests/validation/floor_u8.c +++ b/tests/validation/floor_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/fullyconnected_f32.c b/tests/validation/fullyconnected_f32.c index 0410e010..0b1e1737 100644 --- a/tests/validation/fullyconnected_f32.c +++ b/tests/validation/fullyconnected_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/fullyconnected_i8.c b/tests/validation/fullyconnected_i8.c index f5310787..26036b08 100644 --- a/tests/validation/fullyconnected_i8.c +++ b/tests/validation/fullyconnected_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/fullyconnected_u8.c b/tests/validation/fullyconnected_u8.c index a04d3cc2..3948d1f3 100644 --- a/tests/validation/fullyconnected_u8.c +++ b/tests/validation/fullyconnected_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/gather_f32.c b/tests/validation/gather_f32.c index 43d11e60..0f65cfe6 100644 --- a/tests/validation/gather_f32.c +++ b/tests/validation/gather_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/gather_i8.c b/tests/validation/gather_i8.c index 3e29320b..a8d44c3b 100644 --- a/tests/validation/gather_i8.c +++ b/tests/validation/gather_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/gather_nd_f32.c b/tests/validation/gather_nd_f32.c index 33a953b3..57873852 100644 --- a/tests/validation/gather_nd_f32.c +++ b/tests/validation/gather_nd_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/gather_nd_i8.c b/tests/validation/gather_nd_i8.c index ea1cd8c3..f2775fa7 100644 --- a/tests/validation/gather_nd_i8.c +++ b/tests/validation/gather_nd_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/gather_nd_u8.c b/tests/validation/gather_nd_u8.c index fdbba698..c9d0bce1 100644 --- a/tests/validation/gather_nd_u8.c +++ b/tests/validation/gather_nd_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/gather_u8.c b/tests/validation/gather_u8.c index e1b29b85..62349a3b 100644 --- a/tests/validation/gather_u8.c +++ b/tests/validation/gather_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/global_avgpool_i8.c b/tests/validation/global_avgpool_i8.c index 8c18dfff..84d48900 100644 --- a/tests/validation/global_avgpool_i8.c +++ b/tests/validation/global_avgpool_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/global_avgpool_nchw_i8.c b/tests/validation/global_avgpool_nchw_i8.c index c6434475..574bdc92 100644 --- a/tests/validation/global_avgpool_nchw_i8.c +++ b/tests/validation/global_avgpool_nchw_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/global_avgpool_nchw_u8.c b/tests/validation/global_avgpool_nchw_u8.c index a218ca64..50b106a6 100644 --- a/tests/validation/global_avgpool_nchw_u8.c +++ b/tests/validation/global_avgpool_nchw_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/global_avgpool_u8.c b/tests/validation/global_avgpool_u8.c index 53eb6aaf..8f7f9662 100644 --- a/tests/validation/global_avgpool_u8.c +++ b/tests/validation/global_avgpool_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/global_maxpool_i8.c b/tests/validation/global_maxpool_i8.c index 6628bf0b..9989c9a6 100644 --- a/tests/validation/global_maxpool_i8.c +++ b/tests/validation/global_maxpool_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/global_maxpool_nchw_i8.c b/tests/validation/global_maxpool_nchw_i8.c index 14d907a6..1abb1d25 100644 --- a/tests/validation/global_maxpool_nchw_i8.c +++ b/tests/validation/global_maxpool_nchw_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/global_maxpool_nchw_u8.c b/tests/validation/global_maxpool_nchw_u8.c index a3ce5734..e86b1950 100644 --- a/tests/validation/global_maxpool_nchw_u8.c +++ b/tests/validation/global_maxpool_nchw_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/global_maxpool_u8.c b/tests/validation/global_maxpool_u8.c index 3b92db9d..2ae18904 100644 --- a/tests/validation/global_maxpool_u8.c +++ b/tests/validation/global_maxpool_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/greater_equal_f32.c b/tests/validation/greater_equal_f32.c index 37e62831..0345f5e9 100644 --- a/tests/validation/greater_equal_f32.c +++ b/tests/validation/greater_equal_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/greater_equal_i8.c b/tests/validation/greater_equal_i8.c index 257e815b..98233020 100644 --- a/tests/validation/greater_equal_i8.c +++ b/tests/validation/greater_equal_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/greater_equal_u8.c b/tests/validation/greater_equal_u8.c index 597f4584..a5970369 100644 --- a/tests/validation/greater_equal_u8.c +++ b/tests/validation/greater_equal_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/greater_f32.c b/tests/validation/greater_f32.c index 37e62831..0345f5e9 100644 --- a/tests/validation/greater_f32.c +++ b/tests/validation/greater_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/greater_i8.c b/tests/validation/greater_i8.c index 24887004..534d0208 100644 --- a/tests/validation/greater_i8.c +++ b/tests/validation/greater_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/greater_u8.c b/tests/validation/greater_u8.c index ac492563..520b4cd6 100644 --- a/tests/validation/greater_u8.c +++ b/tests/validation/greater_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/group_convolution_f32.c b/tests/validation/group_convolution_f32.c index 13f31f74..502f13fd 100644 --- a/tests/validation/group_convolution_f32.c +++ b/tests/validation/group_convolution_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/group_convolution_i8.c b/tests/validation/group_convolution_i8.c index 04e70323..fbdc106b 100644 --- a/tests/validation/group_convolution_i8.c +++ b/tests/validation/group_convolution_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/group_convolution_nchw_f32.c b/tests/validation/group_convolution_nchw_f32.c index 89687e8f..ee59af16 100644 --- a/tests/validation/group_convolution_nchw_f32.c +++ b/tests/validation/group_convolution_nchw_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/group_convolution_nchw_i8.c b/tests/validation/group_convolution_nchw_i8.c index b4623c84..5f9e136b 100644 --- a/tests/validation/group_convolution_nchw_i8.c +++ b/tests/validation/group_convolution_nchw_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/group_convolution_nchw_u8.c b/tests/validation/group_convolution_nchw_u8.c index 93c3414f..3da0b7c5 100644 --- a/tests/validation/group_convolution_nchw_u8.c +++ b/tests/validation/group_convolution_nchw_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/group_convolution_relu6_i8.c b/tests/validation/group_convolution_relu6_i8.c index 348b35ca..1654d0db 100644 --- a/tests/validation/group_convolution_relu6_i8.c +++ b/tests/validation/group_convolution_relu6_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/group_convolution_relu6_nchw_i8.c b/tests/validation/group_convolution_relu6_nchw_i8.c index 5168d66b..12c61035 100644 --- a/tests/validation/group_convolution_relu6_nchw_i8.c +++ b/tests/validation/group_convolution_relu6_nchw_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/group_convolution_relu6_nchw_u8.c b/tests/validation/group_convolution_relu6_nchw_u8.c index 8cea134b..774b5657 100644 --- a/tests/validation/group_convolution_relu6_nchw_u8.c +++ b/tests/validation/group_convolution_relu6_nchw_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/group_convolution_relu6_u8.c b/tests/validation/group_convolution_relu6_u8.c index 28d6a4c0..0fbcfefa 100644 --- a/tests/validation/group_convolution_relu6_u8.c +++ b/tests/validation/group_convolution_relu6_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/group_convolution_relu_i8.c b/tests/validation/group_convolution_relu_i8.c index b32bd443..52e626a0 100644 --- a/tests/validation/group_convolution_relu_i8.c +++ b/tests/validation/group_convolution_relu_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/group_convolution_relu_nchw_i8.c b/tests/validation/group_convolution_relu_nchw_i8.c index 73eadab7..bbc75957 100644 --- a/tests/validation/group_convolution_relu_nchw_i8.c +++ b/tests/validation/group_convolution_relu_nchw_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/group_convolution_relu_nchw_u8.c b/tests/validation/group_convolution_relu_nchw_u8.c index 67a46096..688285d5 100644 --- a/tests/validation/group_convolution_relu_nchw_u8.c +++ b/tests/validation/group_convolution_relu_nchw_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/group_convolution_relu_u8.c b/tests/validation/group_convolution_relu_u8.c index 63c3ba36..cde197e7 100644 --- a/tests/validation/group_convolution_relu_u8.c +++ b/tests/validation/group_convolution_relu_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/group_convolution_u8.c b/tests/validation/group_convolution_u8.c index 74b37f8c..209f7b35 100644 --- a/tests/validation/group_convolution_u8.c +++ b/tests/validation/group_convolution_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/hard_sigmoid_f32.c b/tests/validation/hard_sigmoid_f32.c index 1600bfe7..2d3c685a 100644 --- a/tests/validation/hard_sigmoid_f32.c +++ b/tests/validation/hard_sigmoid_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/hard_sigmoid_i8.c b/tests/validation/hard_sigmoid_i8.c index 75bd44bd..2e8ec6be 100644 --- a/tests/validation/hard_sigmoid_i8.c +++ b/tests/validation/hard_sigmoid_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/hard_sigmoid_u8.c b/tests/validation/hard_sigmoid_u8.c index 326c3663..38035f4c 100644 --- a/tests/validation/hard_sigmoid_u8.c +++ b/tests/validation/hard_sigmoid_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/im2col_f32.c b/tests/validation/im2col_f32.c index 61afa8a8..28116902 100644 --- a/tests/validation/im2col_f32.c +++ b/tests/validation/im2col_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/im2col_i8.c b/tests/validation/im2col_i8.c index e9b5efaf..32f4078d 100644 --- a/tests/validation/im2col_i8.c +++ b/tests/validation/im2col_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/im2col_u8.c b/tests/validation/im2col_u8.c index 22ac5e4d..c85de842 100644 --- a/tests/validation/im2col_u8.c +++ b/tests/validation/im2col_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/is_nan_f32.c b/tests/validation/is_nan_f32.c index 8152aca6..a73591a0 100644 --- a/tests/validation/is_nan_f32.c +++ b/tests/validation/is_nan_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/l2_norm_f32.c b/tests/validation/l2_norm_f32.c index 39509b03..cb4bcdde 100644 --- a/tests/validation/l2_norm_f32.c +++ b/tests/validation/l2_norm_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/l2_norm_i8.c b/tests/validation/l2_norm_i8.c index 556c753b..2d20bbb3 100644 --- a/tests/validation/l2_norm_i8.c +++ b/tests/validation/l2_norm_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/l2_norm_u8.c b/tests/validation/l2_norm_u8.c index 151cdb7b..9e4aed4c 100644 --- a/tests/validation/l2_norm_u8.c +++ b/tests/validation/l2_norm_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/leaky_relu_f32.c b/tests/validation/leaky_relu_f32.c index 82ad9b71..f85eeff7 100644 --- a/tests/validation/leaky_relu_f32.c +++ b/tests/validation/leaky_relu_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/leaky_relu_i8.c b/tests/validation/leaky_relu_i8.c index a8ba8e3a..2722e59a 100644 --- a/tests/validation/leaky_relu_i8.c +++ b/tests/validation/leaky_relu_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/leaky_relu_u8.c b/tests/validation/leaky_relu_u8.c index 5055d3c7..3140eba8 100644 --- a/tests/validation/leaky_relu_u8.c +++ b/tests/validation/leaky_relu_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/less_equal_f32.c b/tests/validation/less_equal_f32.c index b55120e6..499ee073 100644 --- a/tests/validation/less_equal_f32.c +++ b/tests/validation/less_equal_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/less_equal_i8.c b/tests/validation/less_equal_i8.c index a0fcd1c5..8e589762 100644 --- a/tests/validation/less_equal_i8.c +++ b/tests/validation/less_equal_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/less_equal_u8.c b/tests/validation/less_equal_u8.c index 5e2e21f7..4281eaf5 100644 --- a/tests/validation/less_equal_u8.c +++ b/tests/validation/less_equal_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/less_f32.c b/tests/validation/less_f32.c index 08848aae..f655e34b 100644 --- a/tests/validation/less_f32.c +++ b/tests/validation/less_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/less_i8.c b/tests/validation/less_i8.c index 2fed1978..326bf87b 100644 --- a/tests/validation/less_i8.c +++ b/tests/validation/less_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/less_u8.c b/tests/validation/less_u8.c index 2c0c0f49..3f97a658 100644 --- a/tests/validation/less_u8.c +++ b/tests/validation/less_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/log1p_f32.c b/tests/validation/log1p_f32.c index 5dab5c95..2b39ff25 100644 --- a/tests/validation/log1p_f32.c +++ b/tests/validation/log1p_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/log1p_i8.c b/tests/validation/log1p_i8.c index f26e7d14..300c54a5 100644 --- a/tests/validation/log1p_i8.c +++ b/tests/validation/log1p_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/log1p_u8.c b/tests/validation/log1p_u8.c index b3635250..98b0a730 100644 --- a/tests/validation/log1p_u8.c +++ b/tests/validation/log1p_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/log_f32.c b/tests/validation/log_f32.c index 53ce36a4..68b8e5ed 100644 --- a/tests/validation/log_f32.c +++ b/tests/validation/log_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/log_i8.c b/tests/validation/log_i8.c index 93242999..111503df 100644 --- a/tests/validation/log_i8.c +++ b/tests/validation/log_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/log_softmax_f32.c b/tests/validation/log_softmax_f32.c index 3c76ccdc..92ff200c 100644 --- a/tests/validation/log_softmax_f32.c +++ b/tests/validation/log_softmax_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/log_softmax_i8.c b/tests/validation/log_softmax_i8.c index 17dc1976..0e41c331 100644 --- a/tests/validation/log_softmax_i8.c +++ b/tests/validation/log_softmax_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/log_softmax_u8.c b/tests/validation/log_softmax_u8.c index 96b1059c..184c8cee 100644 --- a/tests/validation/log_softmax_u8.c +++ b/tests/validation/log_softmax_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/log_u8.c b/tests/validation/log_u8.c index f86aaae2..c382436b 100644 --- a/tests/validation/log_u8.c +++ b/tests/validation/log_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/logical_and_f32.c b/tests/validation/logical_and_f32.c index dceb267b..a0147d70 100644 --- a/tests/validation/logical_and_f32.c +++ b/tests/validation/logical_and_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/logical_and_i8.c b/tests/validation/logical_and_i8.c index 194803d6..1653a366 100644 --- a/tests/validation/logical_and_i8.c +++ b/tests/validation/logical_and_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/logical_and_u8.c b/tests/validation/logical_and_u8.c index 6c2fb2e7..84c44d1c 100644 --- a/tests/validation/logical_and_u8.c +++ b/tests/validation/logical_and_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/logical_not_f32.c b/tests/validation/logical_not_f32.c index b5d81d0d..e67d9c4b 100644 --- a/tests/validation/logical_not_f32.c +++ b/tests/validation/logical_not_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/logical_not_i8.c b/tests/validation/logical_not_i8.c index 8839cf06..adb0975f 100644 --- a/tests/validation/logical_not_i8.c +++ b/tests/validation/logical_not_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/logical_not_u8.c b/tests/validation/logical_not_u8.c index c58d411d..0a03332a 100644 --- a/tests/validation/logical_not_u8.c +++ b/tests/validation/logical_not_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/logical_or_f32.c b/tests/validation/logical_or_f32.c index 37a06d18..cf4c3c27 100644 --- a/tests/validation/logical_or_f32.c +++ b/tests/validation/logical_or_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/logical_or_i8.c b/tests/validation/logical_or_i8.c index aa8b0784..c7ef28af 100644 --- a/tests/validation/logical_or_i8.c +++ b/tests/validation/logical_or_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/logical_or_u8.c b/tests/validation/logical_or_u8.c index bff738eb..b6754616 100644 --- a/tests/validation/logical_or_u8.c +++ b/tests/validation/logical_or_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/logical_xor_f32.c b/tests/validation/logical_xor_f32.c index e0dd4cac..92301851 100644 --- a/tests/validation/logical_xor_f32.c +++ b/tests/validation/logical_xor_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/logical_xor_i8.c b/tests/validation/logical_xor_i8.c index 5c836d20..04603aba 100644 --- a/tests/validation/logical_xor_i8.c +++ b/tests/validation/logical_xor_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/logical_xor_u8.c b/tests/validation/logical_xor_u8.c index 0dae9547..1acb1663 100644 --- a/tests/validation/logical_xor_u8.c +++ b/tests/validation/logical_xor_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/lrn_f32.c b/tests/validation/lrn_f32.c index b0bac0e7..ff393658 100644 --- a/tests/validation/lrn_f32.c +++ b/tests/validation/lrn_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/lrn_i8.c b/tests/validation/lrn_i8.c index bc96408f..460296e9 100644 --- a/tests/validation/lrn_i8.c +++ b/tests/validation/lrn_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/lrn_u8.c b/tests/validation/lrn_u8.c index ea5b497d..a1f8820d 100644 --- a/tests/validation/lrn_u8.c +++ b/tests/validation/lrn_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/matmul_f32.c b/tests/validation/matmul_f32.c index 95caaa12..74681de8 100644 --- a/tests/validation/matmul_f32.c +++ b/tests/validation/matmul_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/matmul_i8.c b/tests/validation/matmul_i8.c index f7b6633b..69ffd9b2 100644 --- a/tests/validation/matmul_i8.c +++ b/tests/validation/matmul_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/matmul_u8.c b/tests/validation/matmul_u8.c index 6f8d580b..2a88dd2e 100644 --- a/tests/validation/matmul_u8.c +++ b/tests/validation/matmul_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/max_stride_f32.c b/tests/validation/max_stride_f32.c index 0b5fc030..0736dd45 100644 --- a/tests/validation/max_stride_f32.c +++ b/tests/validation/max_stride_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/max_stride_u8.c b/tests/validation/max_stride_u8.c index fd1ac6d0..3db21059 100644 --- a/tests/validation/max_stride_u8.c +++ b/tests/validation/max_stride_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/maximum_f32.c b/tests/validation/maximum_f32.c index 37cbe457..66182bf7 100644 --- a/tests/validation/maximum_f32.c +++ b/tests/validation/maximum_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/maximum_i8.c b/tests/validation/maximum_i8.c index d69a8353..8f00f7e8 100644 --- a/tests/validation/maximum_i8.c +++ b/tests/validation/maximum_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/maximum_u8.c b/tests/validation/maximum_u8.c index e99439dd..be045c01 100644 --- a/tests/validation/maximum_u8.c +++ b/tests/validation/maximum_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/maxpool3d_f32.c b/tests/validation/maxpool3d_f32.c index 88f95fa8..522910a7 100644 --- a/tests/validation/maxpool3d_f32.c +++ b/tests/validation/maxpool3d_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/maxpool3d_i8.c b/tests/validation/maxpool3d_i8.c index 80d51722..93d7e7dc 100644 --- a/tests/validation/maxpool3d_i8.c +++ b/tests/validation/maxpool3d_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/maxpool3d_u8.c b/tests/validation/maxpool3d_u8.c index 37dd5689..c4f1cb9a 100644 --- a/tests/validation/maxpool3d_u8.c +++ b/tests/validation/maxpool3d_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/maxpool_f32.c b/tests/validation/maxpool_f32.c index 5af2b64a..76b3bfd4 100644 --- a/tests/validation/maxpool_f32.c +++ b/tests/validation/maxpool_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/maxpool_nchw_f32.c b/tests/validation/maxpool_nchw_f32.c index a3488f85..3a147919 100644 --- a/tests/validation/maxpool_nchw_f32.c +++ b/tests/validation/maxpool_nchw_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/maxpool_u8.c b/tests/validation/maxpool_u8.c index 8174520c..81528a3d 100644 --- a/tests/validation/maxpool_u8.c +++ b/tests/validation/maxpool_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/mean_stride_f32.c b/tests/validation/mean_stride_f32.c index e0cc0e40..1d73e12b 100644 --- a/tests/validation/mean_stride_f32.c +++ b/tests/validation/mean_stride_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/mean_stride_u8.c b/tests/validation/mean_stride_u8.c index d0445939..d73bc410 100644 --- a/tests/validation/mean_stride_u8.c +++ b/tests/validation/mean_stride_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/min_stride_f32.c b/tests/validation/min_stride_f32.c index f098417c..00466290 100644 --- a/tests/validation/min_stride_f32.c +++ b/tests/validation/min_stride_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/min_stride_u8.c b/tests/validation/min_stride_u8.c index cabf7f71..9c72841f 100644 --- a/tests/validation/min_stride_u8.c +++ b/tests/validation/min_stride_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/minimum_f32.c b/tests/validation/minimum_f32.c index 60750435..f724c2b0 100644 --- a/tests/validation/minimum_f32.c +++ b/tests/validation/minimum_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/minimum_i8.c b/tests/validation/minimum_i8.c index fe99980a..99a888b7 100644 --- a/tests/validation/minimum_i8.c +++ b/tests/validation/minimum_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/minimum_u8.c b/tests/validation/minimum_u8.c index 5b61203b..879c6cf4 100644 --- a/tests/validation/minimum_u8.c +++ b/tests/validation/minimum_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/mod_f32.c b/tests/validation/mod_f32.c index 7a9fe6b4..b2057c14 100644 --- a/tests/validation/mod_f32.c +++ b/tests/validation/mod_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/mod_i8.c b/tests/validation/mod_i8.c index 9a273c2d..60cddfdc 100644 --- a/tests/validation/mod_i8.c +++ b/tests/validation/mod_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/mod_u8.c b/tests/validation/mod_u8.c index fbfb71c9..4ad679f7 100644 --- a/tests/validation/mod_u8.c +++ b/tests/validation/mod_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/mul_f32.c b/tests/validation/mul_f32.c index f87682c1..e30287c1 100644 --- a/tests/validation/mul_f32.c +++ b/tests/validation/mul_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/mul_i8.c b/tests/validation/mul_i8.c index 214f3f20..a64fbdda 100644 --- a/tests/validation/mul_i8.c +++ b/tests/validation/mul_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/mul_u8.c b/tests/validation/mul_u8.c index 320beec5..7e7a8042 100644 --- a/tests/validation/mul_u8.c +++ b/tests/validation/mul_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/ndarray_size_f32.c b/tests/validation/ndarray_size_f32.c index eb33e2a9..6beda6d3 100644 --- a/tests/validation/ndarray_size_f32.c +++ b/tests/validation/ndarray_size_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/ndarray_size_i8.c b/tests/validation/ndarray_size_i8.c index 999f3eed..cbc2209b 100644 --- a/tests/validation/ndarray_size_i8.c +++ b/tests/validation/ndarray_size_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/ndarray_size_u8.c b/tests/validation/ndarray_size_u8.c index cf2ee479..2a857f6d 100644 --- a/tests/validation/ndarray_size_u8.c +++ b/tests/validation/ndarray_size_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/negative_f32.c b/tests/validation/negative_f32.c index 38caa46a..c676f2e1 100644 --- a/tests/validation/negative_f32.c +++ b/tests/validation/negative_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/negative_i8.c b/tests/validation/negative_i8.c index 40b1d1c2..817599a6 100644 --- a/tests/validation/negative_i8.c +++ b/tests/validation/negative_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/negative_u8.c b/tests/validation/negative_u8.c index 8bcba397..731e001b 100644 --- a/tests/validation/negative_u8.c +++ b/tests/validation/negative_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/non_max_suppression_f32.c b/tests/validation/non_max_suppression_f32.c index 6cce1c49..df3644fd 100644 --- a/tests/validation/non_max_suppression_f32.c +++ b/tests/validation/non_max_suppression_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/not_equal_f32.c b/tests/validation/not_equal_f32.c index 79b38287..0840a9d0 100644 --- a/tests/validation/not_equal_f32.c +++ b/tests/validation/not_equal_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/not_equal_i8.c b/tests/validation/not_equal_i8.c index 4ae20833..7dfe9457 100644 --- a/tests/validation/not_equal_i8.c +++ b/tests/validation/not_equal_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/not_equal_u8.c b/tests/validation/not_equal_u8.c index be5b1b09..080ff77c 100644 --- a/tests/validation/not_equal_u8.c +++ b/tests/validation/not_equal_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/not_f32.c b/tests/validation/not_f32.c index 1c84209b..68e6a84d 100644 --- a/tests/validation/not_f32.c +++ b/tests/validation/not_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/not_u32.c b/tests/validation/not_u32.c index fecb2f4c..597a4af9 100644 --- a/tests/validation/not_u32.c +++ b/tests/validation/not_u32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/or_u32.c b/tests/validation/or_u32.c index aebad554..12e05f4e 100644 --- a/tests/validation/or_u32.c +++ b/tests/validation/or_u32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/pad_f32.c b/tests/validation/pad_f32.c index bddf86ef..c2fae295 100644 --- a/tests/validation/pad_f32.c +++ b/tests/validation/pad_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/pad_nchw_f32.c b/tests/validation/pad_nchw_f32.c index 66696ff4..8a386eba 100644 --- a/tests/validation/pad_nchw_f32.c +++ b/tests/validation/pad_nchw_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/pad_nchw_u8.c b/tests/validation/pad_nchw_u8.c index 69e5947a..e2ad2861 100644 --- a/tests/validation/pad_nchw_u8.c +++ b/tests/validation/pad_nchw_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/pad_u8.c b/tests/validation/pad_u8.c index 80947781..06e55e77 100644 --- a/tests/validation/pad_u8.c +++ b/tests/validation/pad_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/pow_f32.c b/tests/validation/pow_f32.c index 40f8c5b7..135210f5 100644 --- a/tests/validation/pow_f32.c +++ b/tests/validation/pow_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/pow_i8.c b/tests/validation/pow_i8.c index 1f73aefe..f589632c 100644 --- a/tests/validation/pow_i8.c +++ b/tests/validation/pow_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/pow_u8.c b/tests/validation/pow_u8.c index 1dcbacc4..e1075b81 100644 --- a/tests/validation/pow_u8.c +++ b/tests/validation/pow_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/prelu_f32.c b/tests/validation/prelu_f32.c index 0a1cb5f5..ee33ce26 100644 --- a/tests/validation/prelu_f32.c +++ b/tests/validation/prelu_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/prelu_i8.c b/tests/validation/prelu_i8.c index 8ebccb1b..7168a2cf 100644 --- a/tests/validation/prelu_i8.c +++ b/tests/validation/prelu_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/prelu_nhwc_f32.c b/tests/validation/prelu_nhwc_f32.c index 8f8c5ef5..ef1410a3 100644 --- a/tests/validation/prelu_nhwc_f32.c +++ b/tests/validation/prelu_nhwc_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/prelu_nhwc_i8.c b/tests/validation/prelu_nhwc_i8.c index 649ccd11..f4943ba9 100644 --- a/tests/validation/prelu_nhwc_i8.c +++ b/tests/validation/prelu_nhwc_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/prelu_nhwc_u8.c b/tests/validation/prelu_nhwc_u8.c index 52a34a66..7e6181d5 100644 --- a/tests/validation/prelu_nhwc_u8.c +++ b/tests/validation/prelu_nhwc_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/prelu_u8.c b/tests/validation/prelu_u8.c index a1f26cf1..23536091 100644 --- a/tests/validation/prelu_u8.c +++ b/tests/validation/prelu_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/prod_stride_f32.c b/tests/validation/prod_stride_f32.c index 7821e299..d0614aae 100644 --- a/tests/validation/prod_stride_f32.c +++ b/tests/validation/prod_stride_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/prod_stride_u8.c b/tests/validation/prod_stride_u8.c index 7636bf28..3bb864b3 100644 --- a/tests/validation/prod_stride_u8.c +++ b/tests/validation/prod_stride_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/psroipooling_f32.c b/tests/validation/psroipooling_f32.c index 152af1ca..8cb73906 100644 --- a/tests/validation/psroipooling_f32.c +++ b/tests/validation/psroipooling_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/psroipooling_u8.c b/tests/validation/psroipooling_u8.c index 9c8830ee..86a24d4d 100644 --- a/tests/validation/psroipooling_u8.c +++ b/tests/validation/psroipooling_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reduce_logsumexp_f32.c b/tests/validation/reduce_logsumexp_f32.c index 13ad1a7c..91d10763 100644 --- a/tests/validation/reduce_logsumexp_f32.c +++ b/tests/validation/reduce_logsumexp_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reduce_logsumexp_i8.c b/tests/validation/reduce_logsumexp_i8.c index 9cfaebd3..08bb0e42 100644 --- a/tests/validation/reduce_logsumexp_i8.c +++ b/tests/validation/reduce_logsumexp_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reduce_logsumexp_u8.c b/tests/validation/reduce_logsumexp_u8.c index 8b8b12e4..f614c78e 100644 --- a/tests/validation/reduce_logsumexp_u8.c +++ b/tests/validation/reduce_logsumexp_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reduce_max_f32.c b/tests/validation/reduce_max_f32.c index 4a9edc6f..5e62f1b9 100644 --- a/tests/validation/reduce_max_f32.c +++ b/tests/validation/reduce_max_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reduce_max_i8.c b/tests/validation/reduce_max_i8.c index 2817f5bd..e106d306 100644 --- a/tests/validation/reduce_max_i8.c +++ b/tests/validation/reduce_max_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reduce_max_u8.c b/tests/validation/reduce_max_u8.c index fd9388e6..bc97d2c5 100644 --- a/tests/validation/reduce_max_u8.c +++ b/tests/validation/reduce_max_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reduce_mean_f32.c b/tests/validation/reduce_mean_f32.c index 9fc08dc2..52414efd 100644 --- a/tests/validation/reduce_mean_f32.c +++ b/tests/validation/reduce_mean_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reduce_mean_i8.c b/tests/validation/reduce_mean_i8.c index f861e82d..01ad72f6 100644 --- a/tests/validation/reduce_mean_i8.c +++ b/tests/validation/reduce_mean_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reduce_mean_u8.c b/tests/validation/reduce_mean_u8.c index 6aea3abc..fd200918 100644 --- a/tests/validation/reduce_mean_u8.c +++ b/tests/validation/reduce_mean_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reduce_min_f32.c b/tests/validation/reduce_min_f32.c index 838fc40d..3852fe1d 100644 --- a/tests/validation/reduce_min_f32.c +++ b/tests/validation/reduce_min_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reduce_min_i8.c b/tests/validation/reduce_min_i8.c index 7a4ee19f..f1368ae7 100644 --- a/tests/validation/reduce_min_i8.c +++ b/tests/validation/reduce_min_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reduce_min_u8.c b/tests/validation/reduce_min_u8.c index 8c84aa5c..0c71895b 100644 --- a/tests/validation/reduce_min_u8.c +++ b/tests/validation/reduce_min_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reduce_prod_f32.c b/tests/validation/reduce_prod_f32.c index 79fadccc..2caa4032 100644 --- a/tests/validation/reduce_prod_f32.c +++ b/tests/validation/reduce_prod_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reduce_prod_i8.c b/tests/validation/reduce_prod_i8.c index 4bae5cc7..388b518f 100644 --- a/tests/validation/reduce_prod_i8.c +++ b/tests/validation/reduce_prod_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reduce_prod_u8.c b/tests/validation/reduce_prod_u8.c index 39e9a3e9..0d25cef3 100644 --- a/tests/validation/reduce_prod_u8.c +++ b/tests/validation/reduce_prod_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reduce_sum_f32.c b/tests/validation/reduce_sum_f32.c index 6fb182a5..158ac2f7 100644 --- a/tests/validation/reduce_sum_f32.c +++ b/tests/validation/reduce_sum_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reduce_sum_i8.c b/tests/validation/reduce_sum_i8.c index 723f531e..b904bbb6 100644 --- a/tests/validation/reduce_sum_i8.c +++ b/tests/validation/reduce_sum_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reduce_sum_u8.c b/tests/validation/reduce_sum_u8.c index 36a11561..278159d8 100644 --- a/tests/validation/reduce_sum_u8.c +++ b/tests/validation/reduce_sum_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/relu1_f32.c b/tests/validation/relu1_f32.c index 5120acdb..13355cb0 100644 --- a/tests/validation/relu1_f32.c +++ b/tests/validation/relu1_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/relu1_i8.c b/tests/validation/relu1_i8.c index 467d6dfe..f53f53dd 100644 --- a/tests/validation/relu1_i8.c +++ b/tests/validation/relu1_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/relu1_u8.c b/tests/validation/relu1_u8.c index 7bfcb12a..b2dd5476 100644 --- a/tests/validation/relu1_u8.c +++ b/tests/validation/relu1_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/relu6_f32.c b/tests/validation/relu6_f32.c index 2def1a53..57fc87f5 100644 --- a/tests/validation/relu6_f32.c +++ b/tests/validation/relu6_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/relu6_i8.c b/tests/validation/relu6_i8.c index 59d468b8..e0179f5b 100644 --- a/tests/validation/relu6_i8.c +++ b/tests/validation/relu6_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/relu6_u8.c b/tests/validation/relu6_u8.c index 25015d8e..1d6f801a 100644 --- a/tests/validation/relu6_u8.c +++ b/tests/validation/relu6_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/relu_f32.c b/tests/validation/relu_f32.c index 1d3f337a..baa13cdf 100644 --- a/tests/validation/relu_f32.c +++ b/tests/validation/relu_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/relu_i8.c b/tests/validation/relu_i8.c index f00a3c24..8a48d4ac 100644 --- a/tests/validation/relu_i8.c +++ b/tests/validation/relu_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/relu_u8.c b/tests/validation/relu_u8.c index 372b39bc..129ae7d9 100644 --- a/tests/validation/relu_u8.c +++ b/tests/validation/relu_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/relun_f32.c b/tests/validation/relun_f32.c index 14bd2f00..f0eda3a9 100644 --- a/tests/validation/relun_f32.c +++ b/tests/validation/relun_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/relun_i8.c b/tests/validation/relun_i8.c index 9c7dee39..839ad6e5 100644 --- a/tests/validation/relun_i8.c +++ b/tests/validation/relun_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/relun_u8.c b/tests/validation/relun_u8.c index f36f02f0..2f18e669 100644 --- a/tests/validation/relun_u8.c +++ b/tests/validation/relun_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reshape_f32.c b/tests/validation/reshape_f32.c index e4012936..87293d3b 100644 --- a/tests/validation/reshape_f32.c +++ b/tests/validation/reshape_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/resize_bilinear_f32.c b/tests/validation/resize_bilinear_f32.c index 693b82d9..d1d03c99 100644 --- a/tests/validation/resize_bilinear_f32.c +++ b/tests/validation/resize_bilinear_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/resize_bilinear_i8.c b/tests/validation/resize_bilinear_i8.c index dfa67bea..cb824924 100644 --- a/tests/validation/resize_bilinear_i8.c +++ b/tests/validation/resize_bilinear_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/resize_bilinear_u8.c b/tests/validation/resize_bilinear_u8.c index 955d2d3e..d582f53f 100644 --- a/tests/validation/resize_bilinear_u8.c +++ b/tests/validation/resize_bilinear_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/resize_nearestneighbor_f32.c b/tests/validation/resize_nearestneighbor_f32.c index 6cf5d957..2a0afbe4 100644 --- a/tests/validation/resize_nearestneighbor_f32.c +++ b/tests/validation/resize_nearestneighbor_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/resize_nearestneighbor_i8.c b/tests/validation/resize_nearestneighbor_i8.c index 210169d9..d13aeb32 100644 --- a/tests/validation/resize_nearestneighbor_i8.c +++ b/tests/validation/resize_nearestneighbor_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/resize_nearestneighbor_nchw_f32.c b/tests/validation/resize_nearestneighbor_nchw_f32.c index 28643931..cc8cd775 100644 --- a/tests/validation/resize_nearestneighbor_nchw_f32.c +++ b/tests/validation/resize_nearestneighbor_nchw_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/resize_nearestneighbor_nchw_i8.c b/tests/validation/resize_nearestneighbor_nchw_i8.c index 70bcaa17..2b5e2e25 100644 --- a/tests/validation/resize_nearestneighbor_nchw_i8.c +++ b/tests/validation/resize_nearestneighbor_nchw_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/resize_nearestneighbor_nchw_u8.c b/tests/validation/resize_nearestneighbor_nchw_u8.c index 9493e70a..ee461bfa 100644 --- a/tests/validation/resize_nearestneighbor_nchw_u8.c +++ b/tests/validation/resize_nearestneighbor_nchw_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/resize_nearestneighbor_u8.c b/tests/validation/resize_nearestneighbor_u8.c index 17d45365..c198b595 100644 --- a/tests/validation/resize_nearestneighbor_u8.c +++ b/tests/validation/resize_nearestneighbor_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reverse_f32.c b/tests/validation/reverse_f32.c index 81bc2800..28a66c56 100644 --- a/tests/validation/reverse_f32.c +++ b/tests/validation/reverse_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reverse_i8.c b/tests/validation/reverse_i8.c index 032ceab5..9f3d53e4 100644 --- a/tests/validation/reverse_i8.c +++ b/tests/validation/reverse_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/reverse_u8.c b/tests/validation/reverse_u8.c index afaa7019..01b79b4b 100644 --- a/tests/validation/reverse_u8.c +++ b/tests/validation/reverse_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/riscv_xt9xx/relu_fp16.c b/tests/validation/riscv_xt9xx/relu_fp16.c index 214dbb31..72f199d5 100644 --- a/tests/validation/riscv_xt9xx/relu_fp16.c +++ b/tests/validation/riscv_xt9xx/relu_fp16.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/roialign_f32.c b/tests/validation/roialign_f32.c index f2cf4e75..13396400 100644 --- a/tests/validation/roialign_f32.c +++ b/tests/validation/roialign_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/roipooling_f32.c b/tests/validation/roipooling_f32.c index 77e48b2e..594ed45e 100644 --- a/tests/validation/roipooling_f32.c +++ b/tests/validation/roipooling_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/roipooling_u8.c b/tests/validation/roipooling_u8.c index 6f581bee..c9d2e9fe 100644 --- a/tests/validation/roipooling_u8.c +++ b/tests/validation/roipooling_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/round_f32.c b/tests/validation/round_f32.c index 8ac5ce02..f2a49637 100644 --- a/tests/validation/round_f32.c +++ b/tests/validation/round_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/round_i8.c b/tests/validation/round_i8.c index 8322f89a..62908cf0 100644 --- a/tests/validation/round_i8.c +++ b/tests/validation/round_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/round_u8.c b/tests/validation/round_u8.c index 74e7aef2..73468b27 100644 --- a/tests/validation/round_u8.c +++ b/tests/validation/round_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/rsqrt_f32.c b/tests/validation/rsqrt_f32.c index 39f63dda..d3c66b74 100644 --- a/tests/validation/rsqrt_f32.c +++ b/tests/validation/rsqrt_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/rsqrt_i8.c b/tests/validation/rsqrt_i8.c index 6110b06f..54187dc1 100644 --- a/tests/validation/rsqrt_i8.c +++ b/tests/validation/rsqrt_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/rsqrt_u8.c b/tests/validation/rsqrt_u8.c index cf426b72..6e5637c6 100644 --- a/tests/validation/rsqrt_u8.c +++ b/tests/validation/rsqrt_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/segment_max_f32.c b/tests/validation/segment_max_f32.c index 95ba8b03..4aa028c4 100644 --- a/tests/validation/segment_max_f32.c +++ b/tests/validation/segment_max_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/segment_max_i8.c b/tests/validation/segment_max_i8.c index b908d91f..79a97ab3 100644 --- a/tests/validation/segment_max_i8.c +++ b/tests/validation/segment_max_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/segment_max_u8.c b/tests/validation/segment_max_u8.c index bc29f126..84162633 100644 --- a/tests/validation/segment_max_u8.c +++ b/tests/validation/segment_max_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/segment_mean_f32.c b/tests/validation/segment_mean_f32.c index b303b09c..f6396a1a 100644 --- a/tests/validation/segment_mean_f32.c +++ b/tests/validation/segment_mean_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/segment_mean_i8.c b/tests/validation/segment_mean_i8.c index b79e40c2..238a6cef 100644 --- a/tests/validation/segment_mean_i8.c +++ b/tests/validation/segment_mean_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/segment_mean_u8.c b/tests/validation/segment_mean_u8.c index 2b70111d..cdf06cee 100644 --- a/tests/validation/segment_mean_u8.c +++ b/tests/validation/segment_mean_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/segment_min_f32.c b/tests/validation/segment_min_f32.c index d3a86c75..c531263e 100644 --- a/tests/validation/segment_min_f32.c +++ b/tests/validation/segment_min_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/segment_min_i8.c b/tests/validation/segment_min_i8.c index be82af04..1b59745f 100644 --- a/tests/validation/segment_min_i8.c +++ b/tests/validation/segment_min_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/segment_min_u8.c b/tests/validation/segment_min_u8.c index aa0846c2..06271951 100644 --- a/tests/validation/segment_min_u8.c +++ b/tests/validation/segment_min_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/segment_prod_f32.c b/tests/validation/segment_prod_f32.c index 5621ef29..04109a83 100644 --- a/tests/validation/segment_prod_f32.c +++ b/tests/validation/segment_prod_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/segment_prod_i8.c b/tests/validation/segment_prod_i8.c index 6556b692..1567dfa4 100644 --- a/tests/validation/segment_prod_i8.c +++ b/tests/validation/segment_prod_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/segment_prod_u8.c b/tests/validation/segment_prod_u8.c index aeb06faf..850beabb 100644 --- a/tests/validation/segment_prod_u8.c +++ b/tests/validation/segment_prod_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/segment_sum_f32.c b/tests/validation/segment_sum_f32.c index 6e371c91..dcab8e5f 100644 --- a/tests/validation/segment_sum_f32.c +++ b/tests/validation/segment_sum_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/segment_sum_i8.c b/tests/validation/segment_sum_i8.c index 82bcd310..134e156b 100644 --- a/tests/validation/segment_sum_i8.c +++ b/tests/validation/segment_sum_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/segment_sum_u8.c b/tests/validation/segment_sum_u8.c index 119bbc3f..4781f597 100644 --- a/tests/validation/segment_sum_u8.c +++ b/tests/validation/segment_sum_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/select_f32.c b/tests/validation/select_f32.c index 84ac101e..7d202d04 100644 --- a/tests/validation/select_f32.c +++ b/tests/validation/select_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/select_i8.c b/tests/validation/select_i8.c index fea56560..219d72ec 100644 --- a/tests/validation/select_i8.c +++ b/tests/validation/select_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/select_u8.c b/tests/validation/select_u8.c index e5493334..c500fe9d 100644 --- a/tests/validation/select_u8.c +++ b/tests/validation/select_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/shuffle_channel_f32.c b/tests/validation/shuffle_channel_f32.c index cc630905..6f5e42fa 100644 --- a/tests/validation/shuffle_channel_f32.c +++ b/tests/validation/shuffle_channel_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/shuffle_channel_i8.c b/tests/validation/shuffle_channel_i8.c index 7e7bcc39..fb68f372 100644 --- a/tests/validation/shuffle_channel_i8.c +++ b/tests/validation/shuffle_channel_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/shuffle_channel_nchw_f32.c b/tests/validation/shuffle_channel_nchw_f32.c index 7cc53d84..cf1e3052 100644 --- a/tests/validation/shuffle_channel_nchw_f32.c +++ b/tests/validation/shuffle_channel_nchw_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/shuffle_channel_nchw_i8.c b/tests/validation/shuffle_channel_nchw_i8.c index 5f9fe7eb..56ab3599 100644 --- a/tests/validation/shuffle_channel_nchw_i8.c +++ b/tests/validation/shuffle_channel_nchw_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/shuffle_channel_nchw_u8.c b/tests/validation/shuffle_channel_nchw_u8.c index 1e219c70..584e4d7d 100644 --- a/tests/validation/shuffle_channel_nchw_u8.c +++ b/tests/validation/shuffle_channel_nchw_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/shuffle_channel_u8.c b/tests/validation/shuffle_channel_u8.c index 958c1b4f..cb58c074 100644 --- a/tests/validation/shuffle_channel_u8.c +++ b/tests/validation/shuffle_channel_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/sigmoid_f32.c b/tests/validation/sigmoid_f32.c index c39df0fd..0ce02e38 100644 --- a/tests/validation/sigmoid_f32.c +++ b/tests/validation/sigmoid_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/sigmoid_i8.c b/tests/validation/sigmoid_i8.c index c65984c9..3fff55a1 100644 --- a/tests/validation/sigmoid_i8.c +++ b/tests/validation/sigmoid_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/sigmoid_u8.c b/tests/validation/sigmoid_u8.c index ed868518..c31868aa 100644 --- a/tests/validation/sigmoid_u8.c +++ b/tests/validation/sigmoid_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/sign_f32.c b/tests/validation/sign_f32.c index bfaad0e9..3e378a8a 100644 --- a/tests/validation/sign_f32.c +++ b/tests/validation/sign_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/sin_f32.c b/tests/validation/sin_f32.c index e218e877..a24c5311 100644 --- a/tests/validation/sin_f32.c +++ b/tests/validation/sin_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/sin_i8.c b/tests/validation/sin_i8.c index b9538df1..3c3dd779 100644 --- a/tests/validation/sin_i8.c +++ b/tests/validation/sin_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/sin_u8.c b/tests/validation/sin_u8.c index dc90b8f7..bab3d3ff 100644 --- a/tests/validation/sin_u8.c +++ b/tests/validation/sin_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/sinh_f32.c b/tests/validation/sinh_f32.c index 1663d15f..2b7fe837 100644 --- a/tests/validation/sinh_f32.c +++ b/tests/validation/sinh_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/sinh_i8.c b/tests/validation/sinh_i8.c index dbdb3ba2..05605e37 100644 --- a/tests/validation/sinh_i8.c +++ b/tests/validation/sinh_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/sinh_u8.c b/tests/validation/sinh_u8.c index 8f685e4f..66c0f384 100644 --- a/tests/validation/sinh_u8.c +++ b/tests/validation/sinh_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/slice_f32.c b/tests/validation/slice_f32.c index 48822038..eb5014e0 100644 --- a/tests/validation/slice_f32.c +++ b/tests/validation/slice_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/slice_i8.c b/tests/validation/slice_i8.c index 281bfb1a..7d02fb3d 100644 --- a/tests/validation/slice_i8.c +++ b/tests/validation/slice_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/slice_u8.c b/tests/validation/slice_u8.c index f0dc75f0..f1b0ecdf 100644 --- a/tests/validation/slice_u8.c +++ b/tests/validation/slice_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/softmax_f32.c b/tests/validation/softmax_f32.c index 58178f56..a3ef1fd1 100644 --- a/tests/validation/softmax_f32.c +++ b/tests/validation/softmax_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/softmax_i8.c b/tests/validation/softmax_i8.c index 87f1c9df..d3b9d115 100644 --- a/tests/validation/softmax_i8.c +++ b/tests/validation/softmax_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/softmax_u8.c b/tests/validation/softmax_u8.c index d2153d95..5e335e60 100644 --- a/tests/validation/softmax_u8.c +++ b/tests/validation/softmax_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/softplus_f32.c b/tests/validation/softplus_f32.c index 59d313ae..a021c80c 100644 --- a/tests/validation/softplus_f32.c +++ b/tests/validation/softplus_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/softplus_i8.c b/tests/validation/softplus_i8.c index ff855414..b7a20a55 100644 --- a/tests/validation/softplus_i8.c +++ b/tests/validation/softplus_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/softplus_u8.c b/tests/validation/softplus_u8.c index c2991be1..81f4cfab 100644 --- a/tests/validation/softplus_u8.c +++ b/tests/validation/softplus_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/softrelu_f32.c b/tests/validation/softrelu_f32.c index 088e1c11..2e7db9d2 100644 --- a/tests/validation/softrelu_f32.c +++ b/tests/validation/softrelu_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/softrelu_i8.c b/tests/validation/softrelu_i8.c index f39aa0e5..69cb40c7 100644 --- a/tests/validation/softrelu_i8.c +++ b/tests/validation/softrelu_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/softrelu_u8.c b/tests/validation/softrelu_u8.c index 8b7c17f7..7e45da63 100644 --- a/tests/validation/softrelu_u8.c +++ b/tests/validation/softrelu_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/softsign_f32.c b/tests/validation/softsign_f32.c index 90f701ac..ff19bdf2 100644 --- a/tests/validation/softsign_f32.c +++ b/tests/validation/softsign_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/softsign_i8.c b/tests/validation/softsign_i8.c index fff5ecb1..86dad06a 100644 --- a/tests/validation/softsign_i8.c +++ b/tests/validation/softsign_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/softsign_u8.c b/tests/validation/softsign_u8.c index e678de15..c9e26f5b 100644 --- a/tests/validation/softsign_u8.c +++ b/tests/validation/softsign_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/space_to_batch_f32.c b/tests/validation/space_to_batch_f32.c index 2e528eb9..617f9a25 100644 --- a/tests/validation/space_to_batch_f32.c +++ b/tests/validation/space_to_batch_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/space_to_batch_i8.c b/tests/validation/space_to_batch_i8.c index 8d6d2639..8dc5db12 100644 --- a/tests/validation/space_to_batch_i8.c +++ b/tests/validation/space_to_batch_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/space_to_batch_u8.c b/tests/validation/space_to_batch_u8.c index 0539cb88..905a88cb 100644 --- a/tests/validation/space_to_batch_u8.c +++ b/tests/validation/space_to_batch_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/space_to_depth_f32.c b/tests/validation/space_to_depth_f32.c index abc2da4c..da3fd537 100644 --- a/tests/validation/space_to_depth_f32.c +++ b/tests/validation/space_to_depth_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/space_to_depth_i8.c b/tests/validation/space_to_depth_i8.c index fec7a87b..27f3ad66 100644 --- a/tests/validation/space_to_depth_i8.c +++ b/tests/validation/space_to_depth_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/space_to_depth_u8.c b/tests/validation/space_to_depth_u8.c index 0553c958..57c0284d 100644 --- a/tests/validation/space_to_depth_u8.c +++ b/tests/validation/space_to_depth_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/split_f32.c b/tests/validation/split_f32.c index 212a7871..8238f640 100644 --- a/tests/validation/split_f32.c +++ b/tests/validation/split_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/sqrt_f32.c b/tests/validation/sqrt_f32.c index 8f137ad4..8b588aa0 100644 --- a/tests/validation/sqrt_f32.c +++ b/tests/validation/sqrt_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/sqrt_i8.c b/tests/validation/sqrt_i8.c index f6d5f5fc..ec531a37 100644 --- a/tests/validation/sqrt_i8.c +++ b/tests/validation/sqrt_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/sqrt_u8.c b/tests/validation/sqrt_u8.c index c11285b3..be0c6d63 100644 --- a/tests/validation/sqrt_u8.c +++ b/tests/validation/sqrt_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/square_f32.c b/tests/validation/square_f32.c index 99fc4354..f3a01439 100644 --- a/tests/validation/square_f32.c +++ b/tests/validation/square_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/squeeze_f32.c b/tests/validation/squeeze_f32.c index 04d3eaf8..aede108a 100644 --- a/tests/validation/squeeze_f32.c +++ b/tests/validation/squeeze_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/squeeze_i8.c b/tests/validation/squeeze_i8.c index 32ce1749..dfc6120c 100644 --- a/tests/validation/squeeze_i8.c +++ b/tests/validation/squeeze_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/squeeze_u8.c b/tests/validation/squeeze_u8.c index 38c09909..1575b447 100644 --- a/tests/validation/squeeze_u8.c +++ b/tests/validation/squeeze_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/stack_f32.c b/tests/validation/stack_f32.c index df28adb4..cefe4a95 100644 --- a/tests/validation/stack_f32.c +++ b/tests/validation/stack_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/stack_i8.c b/tests/validation/stack_i8.c index f2518f34..eb64e567 100644 --- a/tests/validation/stack_i8.c +++ b/tests/validation/stack_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/stack_u8.c b/tests/validation/stack_u8.c index 34ce6570..dd3f6248 100644 --- a/tests/validation/stack_u8.c +++ b/tests/validation/stack_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/strided_slice_f32.c b/tests/validation/strided_slice_f32.c index ecf4e464..c32268bb 100644 --- a/tests/validation/strided_slice_f32.c +++ b/tests/validation/strided_slice_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/strided_slice_i8.c b/tests/validation/strided_slice_i8.c index 2c350baf..a06ac00b 100644 --- a/tests/validation/strided_slice_i8.c +++ b/tests/validation/strided_slice_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/strided_slice_u8.c b/tests/validation/strided_slice_u8.c index 48b2b9f4..5bf79d97 100644 --- a/tests/validation/strided_slice_u8.c +++ b/tests/validation/strided_slice_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/sub_f32.c b/tests/validation/sub_f32.c index f7408a73..d950a3c7 100644 --- a/tests/validation/sub_f32.c +++ b/tests/validation/sub_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/sub_i8.c b/tests/validation/sub_i8.c index ad75587d..c04ee3cb 100644 --- a/tests/validation/sub_i8.c +++ b/tests/validation/sub_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/sub_u8.c b/tests/validation/sub_u8.c index 204a9c46..6847c74e 100644 --- a/tests/validation/sub_u8.c +++ b/tests/validation/sub_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/sum_stride_f32.c b/tests/validation/sum_stride_f32.c index b3d94fde..d61e2649 100644 --- a/tests/validation/sum_stride_f32.c +++ b/tests/validation/sum_stride_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/sum_stride_u8.c b/tests/validation/sum_stride_u8.c index be64d3e5..f7fb2fbf 100644 --- a/tests/validation/sum_stride_u8.c +++ b/tests/validation/sum_stride_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/tan_f32.c b/tests/validation/tan_f32.c index f64e0296..4a1124a5 100644 --- a/tests/validation/tan_f32.c +++ b/tests/validation/tan_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/tan_i8.c b/tests/validation/tan_i8.c index d65f02a3..903386c0 100644 --- a/tests/validation/tan_i8.c +++ b/tests/validation/tan_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/tan_u8.c b/tests/validation/tan_u8.c index d823c676..ff854708 100644 --- a/tests/validation/tan_u8.c +++ b/tests/validation/tan_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/tanh_f32.c b/tests/validation/tanh_f32.c index 035d27fa..58aed6f1 100644 --- a/tests/validation/tanh_f32.c +++ b/tests/validation/tanh_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/tanh_i8.c b/tests/validation/tanh_i8.c index 1bf0231b..7f990b3d 100644 --- a/tests/validation/tanh_i8.c +++ b/tests/validation/tanh_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/tanh_u8.c b/tests/validation/tanh_u8.c index 952c6f20..18bdc788 100644 --- a/tests/validation/tanh_u8.c +++ b/tests/validation/tanh_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/threshold_relu_f32.c b/tests/validation/threshold_relu_f32.c index cd9a1a68..8f5ebcda 100644 --- a/tests/validation/threshold_relu_f32.c +++ b/tests/validation/threshold_relu_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/threshold_relu_i8.c b/tests/validation/threshold_relu_i8.c index ce9ab05c..f2d14bdb 100644 --- a/tests/validation/threshold_relu_i8.c +++ b/tests/validation/threshold_relu_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/threshold_relu_u8.c b/tests/validation/threshold_relu_u8.c index d00c1ea5..17deff20 100644 --- a/tests/validation/threshold_relu_u8.c +++ b/tests/validation/threshold_relu_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/tile_f32.c b/tests/validation/tile_f32.c index df717a37..8ef5641a 100644 --- a/tests/validation/tile_f32.c +++ b/tests/validation/tile_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/tile_i8.c b/tests/validation/tile_i8.c index 2d104bee..9c0a8031 100644 --- a/tests/validation/tile_i8.c +++ b/tests/validation/tile_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/tile_u8.c b/tests/validation/tile_u8.c index 90dd9e55..a1bd90ec 100644 --- a/tests/validation/tile_u8.c +++ b/tests/validation/tile_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/topk_f32.c b/tests/validation/topk_f32.c index e019f14f..3db6a576 100644 --- a/tests/validation/topk_f32.c +++ b/tests/validation/topk_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/topk_i8.c b/tests/validation/topk_i8.c index 2cbfbb66..83eabced 100644 --- a/tests/validation/topk_i8.c +++ b/tests/validation/topk_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/topk_u8.c b/tests/validation/topk_u8.c index aa19e1ec..dbcb317d 100644 --- a/tests/validation/topk_u8.c +++ b/tests/validation/topk_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/transpose_f32.c b/tests/validation/transpose_f32.c index 74e2f019..788ff335 100644 --- a/tests/validation/transpose_f32.c +++ b/tests/validation/transpose_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/transpose_i8.c b/tests/validation/transpose_i8.c index 2d2aae4f..ceb62bee 100644 --- a/tests/validation/transpose_i8.c +++ b/tests/validation/transpose_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/transpose_u8.c b/tests/validation/transpose_u8.c index d2b28bf9..ce86a52f 100644 --- a/tests/validation/transpose_u8.c +++ b/tests/validation/transpose_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/trunc_f32.c b/tests/validation/trunc_f32.c index dcc343b5..b137c48e 100644 --- a/tests/validation/trunc_f32.c +++ b/tests/validation/trunc_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/trunc_i8.c b/tests/validation/trunc_i8.c index 5896ede5..a2c40b82 100644 --- a/tests/validation/trunc_i8.c +++ b/tests/validation/trunc_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/trunc_u8.c b/tests/validation/trunc_u8.c index aabe665e..79468421 100644 --- a/tests/validation/trunc_u8.c +++ b/tests/validation/trunc_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/unsorted_segment_max_f32.c b/tests/validation/unsorted_segment_max_f32.c index 6f801605..76743186 100644 --- a/tests/validation/unsorted_segment_max_f32.c +++ b/tests/validation/unsorted_segment_max_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/unsorted_segment_max_i8.c b/tests/validation/unsorted_segment_max_i8.c index 492525ca..099d168d 100644 --- a/tests/validation/unsorted_segment_max_i8.c +++ b/tests/validation/unsorted_segment_max_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/unsorted_segment_max_u8.c b/tests/validation/unsorted_segment_max_u8.c index 6821e3da..d2eabbe4 100644 --- a/tests/validation/unsorted_segment_max_u8.c +++ b/tests/validation/unsorted_segment_max_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/unsorted_segment_mean_f32.c b/tests/validation/unsorted_segment_mean_f32.c index c87cba70..ea437560 100644 --- a/tests/validation/unsorted_segment_mean_f32.c +++ b/tests/validation/unsorted_segment_mean_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/unsorted_segment_mean_i8.c b/tests/validation/unsorted_segment_mean_i8.c index e16fdaac..4dbf2bb8 100644 --- a/tests/validation/unsorted_segment_mean_i8.c +++ b/tests/validation/unsorted_segment_mean_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/unsorted_segment_mean_u8.c b/tests/validation/unsorted_segment_mean_u8.c index c2c8f1b4..5cd6241e 100644 --- a/tests/validation/unsorted_segment_mean_u8.c +++ b/tests/validation/unsorted_segment_mean_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/unsorted_segment_min_f32.c b/tests/validation/unsorted_segment_min_f32.c index 58f71d05..80e7b685 100644 --- a/tests/validation/unsorted_segment_min_f32.c +++ b/tests/validation/unsorted_segment_min_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/unsorted_segment_min_i8.c b/tests/validation/unsorted_segment_min_i8.c index d052ad09..70dc34bb 100644 --- a/tests/validation/unsorted_segment_min_i8.c +++ b/tests/validation/unsorted_segment_min_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/unsorted_segment_min_u8.c b/tests/validation/unsorted_segment_min_u8.c index 03dbee34..60c1ce74 100644 --- a/tests/validation/unsorted_segment_min_u8.c +++ b/tests/validation/unsorted_segment_min_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/unsorted_segment_prod_f32.c b/tests/validation/unsorted_segment_prod_f32.c index 6d164249..0ee065ea 100644 --- a/tests/validation/unsorted_segment_prod_f32.c +++ b/tests/validation/unsorted_segment_prod_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/unsorted_segment_prod_i8.c b/tests/validation/unsorted_segment_prod_i8.c index 620e8fe9..a22b9a49 100644 --- a/tests/validation/unsorted_segment_prod_i8.c +++ b/tests/validation/unsorted_segment_prod_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/unsorted_segment_prod_u8.c b/tests/validation/unsorted_segment_prod_u8.c index 5ae73756..5d4f468c 100644 --- a/tests/validation/unsorted_segment_prod_u8.c +++ b/tests/validation/unsorted_segment_prod_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/unsorted_segment_sum_f32.c b/tests/validation/unsorted_segment_sum_f32.c index 5297477e..84c365fc 100644 --- a/tests/validation/unsorted_segment_sum_f32.c +++ b/tests/validation/unsorted_segment_sum_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/unsorted_segment_sum_i8.c b/tests/validation/unsorted_segment_sum_i8.c index c69e1e2d..911dc5e8 100644 --- a/tests/validation/unsorted_segment_sum_i8.c +++ b/tests/validation/unsorted_segment_sum_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/unsorted_segment_sum_u8.c b/tests/validation/unsorted_segment_sum_u8.c index 32b938ee..8ae7236f 100644 --- a/tests/validation/unsorted_segment_sum_u8.c +++ b/tests/validation/unsorted_segment_sum_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/unstack_f32.c b/tests/validation/unstack_f32.c index ef9466d1..a260065f 100644 --- a/tests/validation/unstack_f32.c +++ b/tests/validation/unstack_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/unstack_i8.c b/tests/validation/unstack_i8.c index 9a3dc6a5..5c7711f3 100644 --- a/tests/validation/unstack_i8.c +++ b/tests/validation/unstack_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/unstack_u8.c b/tests/validation/unstack_u8.c index 6c3d0f26..785c3376 100644 --- a/tests/validation/unstack_u8.c +++ b/tests/validation/unstack_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/xor_u32.c b/tests/validation/xor_u32.c index ebb34b55..4bfae44d 100644 --- a/tests/validation/xor_u32.c +++ b/tests/validation/xor_u32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/yuv_rgb_scale_f32.c b/tests/validation/yuv_rgb_scale_f32.c index d354259f..df6d4b90 100644 --- a/tests/validation/yuv_rgb_scale_f32.c +++ b/tests/validation/yuv_rgb_scale_f32.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/yuv_rgb_scale_i8.c b/tests/validation/yuv_rgb_scale_i8.c index 2736025f..58c5449b 100644 --- a/tests/validation/yuv_rgb_scale_i8.c +++ b/tests/validation/yuv_rgb_scale_i8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation/yuv_rgb_scale_u8.c b/tests/validation/yuv_rgb_scale_u8.c index 1f72d4e5..97e28016 100644 --- a/tests/validation/yuv_rgb_scale_u8.c +++ b/tests/validation/yuv_rgb_scale_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/add.c b/tests/validation_graph/add.c index a0944d2e..1d60f93e 100644 --- a/tests/validation_graph/add.c +++ b/tests/validation_graph/add.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" #include "math_snr.h" diff --git a/tests/validation_graph/anole/abs.c b/tests/validation_graph/anole/abs.c index df5ca3f5..9edc066c 100644 --- a/tests/validation_graph/anole/abs.c +++ b/tests/validation_graph/anole/abs.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/add.c b/tests/validation_graph/anole/add.c index 73b6c5fb..684aca13 100644 --- a/tests/validation_graph/anole/add.c +++ b/tests/validation_graph/anole/add.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" #include "math_snr.h" diff --git a/tests/validation_graph/anole/and.c b/tests/validation_graph/anole/and.c index 4db21f49..de8878e0 100644 --- a/tests/validation_graph/anole/and.c +++ b/tests/validation_graph/anole/and.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/argmax.c b/tests/validation_graph/anole/argmax.c index 51977113..495d31e8 100644 --- a/tests/validation_graph/anole/argmax.c +++ b/tests/validation_graph/anole/argmax.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/argmin.c b/tests/validation_graph/anole/argmin.c index 04c6d656..06bae27e 100644 --- a/tests/validation_graph/anole/argmin.c +++ b/tests/validation_graph/anole/argmin.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/avgpool.c b/tests/validation_graph/anole/avgpool.c index 437eb102..12d870a3 100644 --- a/tests/validation_graph/anole/avgpool.c +++ b/tests/validation_graph/anole/avgpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/batch_normalization.c b/tests/validation_graph/anole/batch_normalization.c index 9fc0f005..372ecab6 100644 --- a/tests/validation_graph/anole/batch_normalization.c +++ b/tests/validation_graph/anole/batch_normalization.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/batch_to_space.c b/tests/validation_graph/anole/batch_to_space.c index b35e1777..5977b340 100644 --- a/tests/validation_graph/anole/batch_to_space.c +++ b/tests/validation_graph/anole/batch_to_space.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/batch_to_space_nd.c b/tests/validation_graph/anole/batch_to_space_nd.c index 7c0036af..bc6fba4a 100644 --- a/tests/validation_graph/anole/batch_to_space_nd.c +++ b/tests/validation_graph/anole/batch_to_space_nd.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/clip.c b/tests/validation_graph/anole/clip.c index 5b72ffdc..e3d2a09d 100644 --- a/tests/validation_graph/anole/clip.c +++ b/tests/validation_graph/anole/clip.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/concat.c b/tests/validation_graph/anole/concat.c index 2930ed97..b8eccd74 100644 --- a/tests/validation_graph/anole/concat.c +++ b/tests/validation_graph/anole/concat.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/convolution.c b/tests/validation_graph/anole/convolution.c index db9c082f..debe97f4 100644 --- a/tests/validation_graph/anole/convolution.c +++ b/tests/validation_graph/anole/convolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" #include "math_snr.h" diff --git a/tests/validation_graph/anole/convolution_relu.c b/tests/validation_graph/anole/convolution_relu.c index 34d3c871..d70ff6e9 100644 --- a/tests/validation_graph/anole/convolution_relu.c +++ b/tests/validation_graph/anole/convolution_relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/convolution_relu6.c b/tests/validation_graph/anole/convolution_relu6.c index 535914ff..059b4b1f 100644 --- a/tests/validation_graph/anole/convolution_relu6.c +++ b/tests/validation_graph/anole/convolution_relu6.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/deconvolution.c b/tests/validation_graph/anole/deconvolution.c index 6a5539fc..0c375232 100644 --- a/tests/validation_graph/anole/deconvolution.c +++ b/tests/validation_graph/anole/deconvolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/depth_to_space.c b/tests/validation_graph/anole/depth_to_space.c index 74dd72b4..d9db4361 100644 --- a/tests/validation_graph/anole/depth_to_space.c +++ b/tests/validation_graph/anole/depth_to_space.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/depthwise_convolution.c b/tests/validation_graph/anole/depthwise_convolution.c index 6e9940e7..d70a2d76 100644 --- a/tests/validation_graph/anole/depthwise_convolution.c +++ b/tests/validation_graph/anole/depthwise_convolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" #include "math_snr.h" diff --git a/tests/validation_graph/anole/depthwise_deconvolution.c b/tests/validation_graph/anole/depthwise_deconvolution.c index f4012ddd..d0b9b822 100644 --- a/tests/validation_graph/anole/depthwise_deconvolution.c +++ b/tests/validation_graph/anole/depthwise_deconvolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/div.c b/tests/validation_graph/anole/div.c index 61792eea..c61a0ec5 100644 --- a/tests/validation_graph/anole/div.c +++ b/tests/validation_graph/anole/div.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" #include "math_snr.h" diff --git a/tests/validation_graph/anole/elu.c b/tests/validation_graph/anole/elu.c index 09fdcf74..bba4935e 100644 --- a/tests/validation_graph/anole/elu.c +++ b/tests/validation_graph/anole/elu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/equal.c b/tests/validation_graph/anole/equal.c index b98e800e..d4de737c 100644 --- a/tests/validation_graph/anole/equal.c +++ b/tests/validation_graph/anole/equal.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/exp.c b/tests/validation_graph/anole/exp.c index dbb101b8..eb179e98 100644 --- a/tests/validation_graph/anole/exp.c +++ b/tests/validation_graph/anole/exp.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/expand_dims.c b/tests/validation_graph/anole/expand_dims.c index 64bb2a24..3637bfd9 100644 --- a/tests/validation_graph/anole/expand_dims.c +++ b/tests/validation_graph/anole/expand_dims.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/flatten.c b/tests/validation_graph/anole/flatten.c index 51d0f725..450ec47c 100644 --- a/tests/validation_graph/anole/flatten.c +++ b/tests/validation_graph/anole/flatten.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/floor.c b/tests/validation_graph/anole/floor.c index 56407034..ff72a01e 100644 --- a/tests/validation_graph/anole/floor.c +++ b/tests/validation_graph/anole/floor.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/floor_divide.c b/tests/validation_graph/anole/floor_divide.c index d53e0552..8173ad2a 100644 --- a/tests/validation_graph/anole/floor_divide.c +++ b/tests/validation_graph/anole/floor_divide.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/fullyconnected.c b/tests/validation_graph/anole/fullyconnected.c index 5f1be3d8..898bc289 100644 --- a/tests/validation_graph/anole/fullyconnected.c +++ b/tests/validation_graph/anole/fullyconnected.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/gather.c b/tests/validation_graph/anole/gather.c index 91f0e285..9325e735 100644 --- a/tests/validation_graph/anole/gather.c +++ b/tests/validation_graph/anole/gather.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/gather_nd.c b/tests/validation_graph/anole/gather_nd.c index 3af2ef33..cd65c2d7 100644 --- a/tests/validation_graph/anole/gather_nd.c +++ b/tests/validation_graph/anole/gather_nd.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/global_avgpool.c b/tests/validation_graph/anole/global_avgpool.c index 33c8729a..b3710986 100644 --- a/tests/validation_graph/anole/global_avgpool.c +++ b/tests/validation_graph/anole/global_avgpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/global_maxpool.c b/tests/validation_graph/anole/global_maxpool.c index 755f9848..803c6890 100644 --- a/tests/validation_graph/anole/global_maxpool.c +++ b/tests/validation_graph/anole/global_maxpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/greater.c b/tests/validation_graph/anole/greater.c index 288be99f..25d66c2a 100644 --- a/tests/validation_graph/anole/greater.c +++ b/tests/validation_graph/anole/greater.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/greater_equal.c b/tests/validation_graph/anole/greater_equal.c index 1a5ce517..1d331a84 100644 --- a/tests/validation_graph/anole/greater_equal.c +++ b/tests/validation_graph/anole/greater_equal.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/group_convolution.c b/tests/validation_graph/anole/group_convolution.c index ffda0155..216020c1 100644 --- a/tests/validation_graph/anole/group_convolution.c +++ b/tests/validation_graph/anole/group_convolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/l2_normalization.c b/tests/validation_graph/anole/l2_normalization.c index cacff641..86d9a531 100644 --- a/tests/validation_graph/anole/l2_normalization.c +++ b/tests/validation_graph/anole/l2_normalization.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/l2_pool.c b/tests/validation_graph/anole/l2_pool.c index f95ae867..659db72f 100644 --- a/tests/validation_graph/anole/l2_pool.c +++ b/tests/validation_graph/anole/l2_pool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/leaky_relu.c b/tests/validation_graph/anole/leaky_relu.c index ac332ea7..f147d4ad 100644 --- a/tests/validation_graph/anole/leaky_relu.c +++ b/tests/validation_graph/anole/leaky_relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/less.c b/tests/validation_graph/anole/less.c index b4aee943..680d004e 100644 --- a/tests/validation_graph/anole/less.c +++ b/tests/validation_graph/anole/less.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/less_equal.c b/tests/validation_graph/anole/less_equal.c index 5b3036c8..4668bd39 100644 --- a/tests/validation_graph/anole/less_equal.c +++ b/tests/validation_graph/anole/less_equal.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/log.c b/tests/validation_graph/anole/log.c index 5712f81a..25b741fb 100644 --- a/tests/validation_graph/anole/log.c +++ b/tests/validation_graph/anole/log.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/log_softmax.c b/tests/validation_graph/anole/log_softmax.c index 291b9322..d6ab3a2f 100644 --- a/tests/validation_graph/anole/log_softmax.c +++ b/tests/validation_graph/anole/log_softmax.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/lrn.c b/tests/validation_graph/anole/lrn.c index a57104fb..21eeae5b 100644 --- a/tests/validation_graph/anole/lrn.c +++ b/tests/validation_graph/anole/lrn.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/matmul.c b/tests/validation_graph/anole/matmul.c index d606c5b5..ab253f7b 100644 --- a/tests/validation_graph/anole/matmul.c +++ b/tests/validation_graph/anole/matmul.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/max.c b/tests/validation_graph/anole/max.c index 43812756..cca81149 100644 --- a/tests/validation_graph/anole/max.c +++ b/tests/validation_graph/anole/max.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/maximum.c b/tests/validation_graph/anole/maximum.c index fa3f6719..1b7970b1 100644 --- a/tests/validation_graph/anole/maximum.c +++ b/tests/validation_graph/anole/maximum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/maxpool.c b/tests/validation_graph/anole/maxpool.c index 39794917..606a4ea4 100644 --- a/tests/validation_graph/anole/maxpool.c +++ b/tests/validation_graph/anole/maxpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/maxpool2d_locat.c b/tests/validation_graph/anole/maxpool2d_locat.c index a2cb5b32..51ba11e7 100644 --- a/tests/validation_graph/anole/maxpool2d_locat.c +++ b/tests/validation_graph/anole/maxpool2d_locat.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/mean.c b/tests/validation_graph/anole/mean.c index 87a03e3f..119eef94 100644 --- a/tests/validation_graph/anole/mean.c +++ b/tests/validation_graph/anole/mean.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/min.c b/tests/validation_graph/anole/min.c index e85eb224..96f18020 100644 --- a/tests/validation_graph/anole/min.c +++ b/tests/validation_graph/anole/min.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/minimum.c b/tests/validation_graph/anole/minimum.c index 39423204..4dbb6042 100644 --- a/tests/validation_graph/anole/minimum.c +++ b/tests/validation_graph/anole/minimum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/mul.c b/tests/validation_graph/anole/mul.c index 1d2efe9b..a95d0fc9 100644 --- a/tests/validation_graph/anole/mul.c +++ b/tests/validation_graph/anole/mul.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/not_equal.c b/tests/validation_graph/anole/not_equal.c index 250d63d4..6106fa63 100644 --- a/tests/validation_graph/anole/not_equal.c +++ b/tests/validation_graph/anole/not_equal.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/or.c b/tests/validation_graph/anole/or.c index 91424a53..c2f0df7c 100644 --- a/tests/validation_graph/anole/or.c +++ b/tests/validation_graph/anole/or.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/pad.c b/tests/validation_graph/anole/pad.c index 7f1cc369..98076684 100644 --- a/tests/validation_graph/anole/pad.c +++ b/tests/validation_graph/anole/pad.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/pow.c b/tests/validation_graph/anole/pow.c index 18945df3..15253962 100644 --- a/tests/validation_graph/anole/pow.c +++ b/tests/validation_graph/anole/pow.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/prelu.c b/tests/validation_graph/anole/prelu.c index efd764bd..add42f03 100644 --- a/tests/validation_graph/anole/prelu.c +++ b/tests/validation_graph/anole/prelu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/prod.c b/tests/validation_graph/anole/prod.c index b18c03d9..a8da8dca 100644 --- a/tests/validation_graph/anole/prod.c +++ b/tests/validation_graph/anole/prod.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/psroipooling.c b/tests/validation_graph/anole/psroipooling.c index 250d2c86..1c79b6c7 100644 --- a/tests/validation_graph/anole/psroipooling.c +++ b/tests/validation_graph/anole/psroipooling.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/relu.c b/tests/validation_graph/anole/relu.c index 1d05640c..b341bec7 100644 --- a/tests/validation_graph/anole/relu.c +++ b/tests/validation_graph/anole/relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/relu1.c b/tests/validation_graph/anole/relu1.c index 8aa37585..684634cd 100644 --- a/tests/validation_graph/anole/relu1.c +++ b/tests/validation_graph/anole/relu1.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/relu6.c b/tests/validation_graph/anole/relu6.c index a08bc491..2504c6b2 100644 --- a/tests/validation_graph/anole/relu6.c +++ b/tests/validation_graph/anole/relu6.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/relun.c b/tests/validation_graph/anole/relun.c index e29b320e..5191fefa 100644 --- a/tests/validation_graph/anole/relun.c +++ b/tests/validation_graph/anole/relun.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/reorg.c b/tests/validation_graph/anole/reorg.c index 04a71c9f..229cd081 100644 --- a/tests/validation_graph/anole/reorg.c +++ b/tests/validation_graph/anole/reorg.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/reshape.c b/tests/validation_graph/anole/reshape.c index a726e89f..11c06ae9 100644 --- a/tests/validation_graph/anole/reshape.c +++ b/tests/validation_graph/anole/reshape.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/reverse.c b/tests/validation_graph/anole/reverse.c index 277e3a6b..c4dafac0 100644 --- a/tests/validation_graph/anole/reverse.c +++ b/tests/validation_graph/anole/reverse.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/roipooling.c b/tests/validation_graph/anole/roipooling.c index 1299573a..d2098b92 100644 --- a/tests/validation_graph/anole/roipooling.c +++ b/tests/validation_graph/anole/roipooling.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/rsqrt.c b/tests/validation_graph/anole/rsqrt.c index 8c71796e..4bb8648d 100644 --- a/tests/validation_graph/anole/rsqrt.c +++ b/tests/validation_graph/anole/rsqrt.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/select.c b/tests/validation_graph/anole/select.c index f67a8244..1f9a3f1e 100644 --- a/tests/validation_graph/anole/select.c +++ b/tests/validation_graph/anole/select.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/shuffle_channel.c b/tests/validation_graph/anole/shuffle_channel.c index 26fa105e..334d8b4b 100644 --- a/tests/validation_graph/anole/shuffle_channel.c +++ b/tests/validation_graph/anole/shuffle_channel.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/sigmoid.c b/tests/validation_graph/anole/sigmoid.c index 98a555d1..29f2f8f6 100644 --- a/tests/validation_graph/anole/sigmoid.c +++ b/tests/validation_graph/anole/sigmoid.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/sin.c b/tests/validation_graph/anole/sin.c index 83b7e186..a08058e9 100644 --- a/tests/validation_graph/anole/sin.c +++ b/tests/validation_graph/anole/sin.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/slice.c b/tests/validation_graph/anole/slice.c index c220a4b7..93feabcd 100644 --- a/tests/validation_graph/anole/slice.c +++ b/tests/validation_graph/anole/slice.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/softmax.c b/tests/validation_graph/anole/softmax.c index 76e4f307..d8ea5584 100644 --- a/tests/validation_graph/anole/softmax.c +++ b/tests/validation_graph/anole/softmax.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/softplus.c b/tests/validation_graph/anole/softplus.c index d3700f20..1a736f37 100644 --- a/tests/validation_graph/anole/softplus.c +++ b/tests/validation_graph/anole/softplus.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/softrelu.c b/tests/validation_graph/anole/softrelu.c index 81c19880..00c83318 100644 --- a/tests/validation_graph/anole/softrelu.c +++ b/tests/validation_graph/anole/softrelu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/space_to_batch.c b/tests/validation_graph/anole/space_to_batch.c index 1a270567..649ac24e 100644 --- a/tests/validation_graph/anole/space_to_batch.c +++ b/tests/validation_graph/anole/space_to_batch.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/split.c b/tests/validation_graph/anole/split.c index dba4e2a5..bf77f7f0 100644 --- a/tests/validation_graph/anole/split.c +++ b/tests/validation_graph/anole/split.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/sqrt.c b/tests/validation_graph/anole/sqrt.c index b30f10e2..7594baa4 100644 --- a/tests/validation_graph/anole/sqrt.c +++ b/tests/validation_graph/anole/sqrt.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/square.c b/tests/validation_graph/anole/square.c index 44c5cf3e..1642c4f2 100644 --- a/tests/validation_graph/anole/square.c +++ b/tests/validation_graph/anole/square.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/squeeze.c b/tests/validation_graph/anole/squeeze.c index 82cc41f7..e3860466 100644 --- a/tests/validation_graph/anole/squeeze.c +++ b/tests/validation_graph/anole/squeeze.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/stack.c b/tests/validation_graph/anole/stack.c index b1b212da..686e58d5 100644 --- a/tests/validation_graph/anole/stack.c +++ b/tests/validation_graph/anole/stack.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/sub.c b/tests/validation_graph/anole/sub.c index af793e0e..f672f81f 100644 --- a/tests/validation_graph/anole/sub.c +++ b/tests/validation_graph/anole/sub.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/sum.c b/tests/validation_graph/anole/sum.c index f32bc675..e86e2e9d 100644 --- a/tests/validation_graph/anole/sum.c +++ b/tests/validation_graph/anole/sum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/tanh.c b/tests/validation_graph/anole/tanh.c index 26d45218..22dc2e71 100644 --- a/tests/validation_graph/anole/tanh.c +++ b/tests/validation_graph/anole/tanh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/tile.c b/tests/validation_graph/anole/tile.c index c7afc9c6..81138777 100644 --- a/tests/validation_graph/anole/tile.c +++ b/tests/validation_graph/anole/tile.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/topk.c b/tests/validation_graph/anole/topk.c index 67ffab11..cb60800e 100644 --- a/tests/validation_graph/anole/topk.c +++ b/tests/validation_graph/anole/topk.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/transpose.c b/tests/validation_graph/anole/transpose.c index 6c3136a8..bff3a514 100644 --- a/tests/validation_graph/anole/transpose.c +++ b/tests/validation_graph/anole/transpose.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/unpooling.c b/tests/validation_graph/anole/unpooling.c index e8fd9ff0..d1e25bf9 100644 --- a/tests/validation_graph/anole/unpooling.c +++ b/tests/validation_graph/anole/unpooling.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/anole/unstack.c b/tests/validation_graph/anole/unstack.c index 412cac13..491d63b9 100644 --- a/tests/validation_graph/anole/unstack.c +++ b/tests/validation_graph/anole/unstack.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/argmax.c b/tests/validation_graph/argmax.c index 51977113..495d31e8 100644 --- a/tests/validation_graph/argmax.c +++ b/tests/validation_graph/argmax.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/avgpool.c b/tests/validation_graph/avgpool.c index 56071a35..ac8eb428 100644 --- a/tests/validation_graph/avgpool.c +++ b/tests/validation_graph/avgpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/batch_normalization.c b/tests/validation_graph/batch_normalization.c index 244f841b..970faa36 100644 --- a/tests/validation_graph/batch_normalization.c +++ b/tests/validation_graph/batch_normalization.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/batch_to_space_nd.c b/tests/validation_graph/batch_to_space_nd.c index fecbeff6..dbadb1d0 100644 --- a/tests/validation_graph/batch_to_space_nd.c +++ b/tests/validation_graph/batch_to_space_nd.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/add.c b/tests/validation_graph/c906/add.c index 0b83312c..9c9ad745 100644 --- a/tests/validation_graph/c906/add.c +++ b/tests/validation_graph/c906/add.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" #include "math_snr.h" diff --git a/tests/validation_graph/c906/avgpool.c b/tests/validation_graph/c906/avgpool.c index 83d4234e..9190fa0c 100644 --- a/tests/validation_graph/c906/avgpool.c +++ b/tests/validation_graph/c906/avgpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/concat.c b/tests/validation_graph/c906/concat.c index ef3c2939..7673a6e1 100644 --- a/tests/validation_graph/c906/concat.c +++ b/tests/validation_graph/c906/concat.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/convolution.c b/tests/validation_graph/c906/convolution.c index c0ab9c90..040f0637 100644 --- a/tests/validation_graph/c906/convolution.c +++ b/tests/validation_graph/c906/convolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" #include "math_snr.h" diff --git a/tests/validation_graph/c906/deconvolution.c b/tests/validation_graph/c906/deconvolution.c index 7adf9d41..b87aada3 100644 --- a/tests/validation_graph/c906/deconvolution.c +++ b/tests/validation_graph/c906/deconvolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/depth_to_space.c b/tests/validation_graph/c906/depth_to_space.c index 16bb9ccf..63d35028 100644 --- a/tests/validation_graph/c906/depth_to_space.c +++ b/tests/validation_graph/c906/depth_to_space.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/depthwise_convolution.c b/tests/validation_graph/c906/depthwise_convolution.c index 4792e8d4..1302dfce 100644 --- a/tests/validation_graph/c906/depthwise_convolution.c +++ b/tests/validation_graph/c906/depthwise_convolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" #include "math_snr.h" diff --git a/tests/validation_graph/c906/div.c b/tests/validation_graph/c906/div.c index 2220d71c..1ca03d47 100644 --- a/tests/validation_graph/c906/div.c +++ b/tests/validation_graph/c906/div.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" #include "math_snr.h" diff --git a/tests/validation_graph/c906/flatten.c b/tests/validation_graph/c906/flatten.c index a195c981..7587b0f0 100644 --- a/tests/validation_graph/c906/flatten.c +++ b/tests/validation_graph/c906/flatten.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/fullyconnected.c b/tests/validation_graph/c906/fullyconnected.c index 46cf9610..8e02cef2 100644 --- a/tests/validation_graph/c906/fullyconnected.c +++ b/tests/validation_graph/c906/fullyconnected.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/global_avgpool.c b/tests/validation_graph/c906/global_avgpool.c index bb571878..336eb093 100644 --- a/tests/validation_graph/c906/global_avgpool.c +++ b/tests/validation_graph/c906/global_avgpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/global_maxpool.c b/tests/validation_graph/c906/global_maxpool.c index c63724ed..d725f713 100644 --- a/tests/validation_graph/c906/global_maxpool.c +++ b/tests/validation_graph/c906/global_maxpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/group_convolution.c b/tests/validation_graph/c906/group_convolution.c index f487c539..14561fa0 100644 --- a/tests/validation_graph/c906/group_convolution.c +++ b/tests/validation_graph/c906/group_convolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/leaky_relu.c b/tests/validation_graph/c906/leaky_relu.c index 1d65263a..74aafa5e 100644 --- a/tests/validation_graph/c906/leaky_relu.c +++ b/tests/validation_graph/c906/leaky_relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/maximum.c b/tests/validation_graph/c906/maximum.c index 916c8a14..f1dc2e28 100644 --- a/tests/validation_graph/c906/maximum.c +++ b/tests/validation_graph/c906/maximum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/maxpool.c b/tests/validation_graph/c906/maxpool.c index dea26b12..ff314459 100644 --- a/tests/validation_graph/c906/maxpool.c +++ b/tests/validation_graph/c906/maxpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/minimum.c b/tests/validation_graph/c906/minimum.c index 86867fc7..087bb8f2 100644 --- a/tests/validation_graph/c906/minimum.c +++ b/tests/validation_graph/c906/minimum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/pad.c b/tests/validation_graph/c906/pad.c index b2930d4e..b46011aa 100644 --- a/tests/validation_graph/c906/pad.c +++ b/tests/validation_graph/c906/pad.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/relu.c b/tests/validation_graph/c906/relu.c index ac2c3134..59c8c71d 100644 --- a/tests/validation_graph/c906/relu.c +++ b/tests/validation_graph/c906/relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/relu1.c b/tests/validation_graph/c906/relu1.c index 4c1778a6..3b8165ec 100644 --- a/tests/validation_graph/c906/relu1.c +++ b/tests/validation_graph/c906/relu1.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/relu6.c b/tests/validation_graph/c906/relu6.c index 8c4ff709..d14f0653 100644 --- a/tests/validation_graph/c906/relu6.c +++ b/tests/validation_graph/c906/relu6.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/reshape.c b/tests/validation_graph/c906/reshape.c index 19d6a85a..632916ba 100644 --- a/tests/validation_graph/c906/reshape.c +++ b/tests/validation_graph/c906/reshape.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/resize_bilinear.c b/tests/validation_graph/c906/resize_bilinear.c index 6517f8dd..e151a4cc 100644 --- a/tests/validation_graph/c906/resize_bilinear.c +++ b/tests/validation_graph/c906/resize_bilinear.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/resize_nearest_neighbor.c b/tests/validation_graph/c906/resize_nearest_neighbor.c index c40c0755..4b51edae 100644 --- a/tests/validation_graph/c906/resize_nearest_neighbor.c +++ b/tests/validation_graph/c906/resize_nearest_neighbor.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/sigmoid.c b/tests/validation_graph/c906/sigmoid.c index 7fcac743..91d85342 100644 --- a/tests/validation_graph/c906/sigmoid.c +++ b/tests/validation_graph/c906/sigmoid.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/space_to_depth.c b/tests/validation_graph/c906/space_to_depth.c index a41e9afa..9bfe7e1b 100644 --- a/tests/validation_graph/c906/space_to_depth.c +++ b/tests/validation_graph/c906/space_to_depth.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/split.c b/tests/validation_graph/c906/split.c index c24ef3b5..125b1d88 100644 --- a/tests/validation_graph/c906/split.c +++ b/tests/validation_graph/c906/split.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/squeeze.c b/tests/validation_graph/c906/squeeze.c index 2cedcaf0..efcaad66 100644 --- a/tests/validation_graph/c906/squeeze.c +++ b/tests/validation_graph/c906/squeeze.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/sub.c b/tests/validation_graph/c906/sub.c index 09e3e47f..50aadf4a 100644 --- a/tests/validation_graph/c906/sub.c +++ b/tests/validation_graph/c906/sub.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/tanh.c b/tests/validation_graph/c906/tanh.c index d04fb41b..7bddbef1 100644 --- a/tests/validation_graph/c906/tanh.c +++ b/tests/validation_graph/c906/tanh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/c906/transpose.c b/tests/validation_graph/c906/transpose.c index 0375dc77..12d954de 100644 --- a/tests/validation_graph/c906/transpose.c +++ b/tests/validation_graph/c906/transpose.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/concat.c b/tests/validation_graph/concat.c index e7647811..392cd17a 100644 --- a/tests/validation_graph/concat.c +++ b/tests/validation_graph/concat.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/convolution.c b/tests/validation_graph/convolution.c index 4f77425b..7b6c57f6 100644 --- a/tests/validation_graph/convolution.c +++ b/tests/validation_graph/convolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" #include "math_snr.h" diff --git a/tests/validation_graph/crop.c b/tests/validation_graph/crop.c index 8d9b9868..da67da9f 100644 --- a/tests/validation_graph/crop.c +++ b/tests/validation_graph/crop.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/deconvolution.c b/tests/validation_graph/deconvolution.c index a5342b02..e72f59f6 100644 --- a/tests/validation_graph/deconvolution.c +++ b/tests/validation_graph/deconvolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/depth_to_space.c b/tests/validation_graph/depth_to_space.c index c2e33db4..7059a8fe 100644 --- a/tests/validation_graph/depth_to_space.c +++ b/tests/validation_graph/depth_to_space.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/depthwise_convolution.c b/tests/validation_graph/depthwise_convolution.c index 1d2840ee..8472f6d7 100644 --- a/tests/validation_graph/depthwise_convolution.c +++ b/tests/validation_graph/depthwise_convolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" #include "math_snr.h" diff --git a/tests/validation_graph/div.c b/tests/validation_graph/div.c index 95571fb6..a88396df 100644 --- a/tests/validation_graph/div.c +++ b/tests/validation_graph/div.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" #include "math_snr.h" diff --git a/tests/validation_graph/flatten.c b/tests/validation_graph/flatten.c index 091e50aa..fd640534 100644 --- a/tests/validation_graph/flatten.c +++ b/tests/validation_graph/flatten.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/fullyconnected.c b/tests/validation_graph/fullyconnected.c index 35818d43..8ba7d304 100644 --- a/tests/validation_graph/fullyconnected.c +++ b/tests/validation_graph/fullyconnected.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/global_avgpool.c b/tests/validation_graph/global_avgpool.c index 1afa105f..a14a31b3 100644 --- a/tests/validation_graph/global_avgpool.c +++ b/tests/validation_graph/global_avgpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/global_maxpool.c b/tests/validation_graph/global_maxpool.c index d121c72d..834fc5ee 100644 --- a/tests/validation_graph/global_maxpool.c +++ b/tests/validation_graph/global_maxpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/group_convolution.c b/tests/validation_graph/group_convolution.c index d906effa..1bbed89b 100644 --- a/tests/validation_graph/group_convolution.c +++ b/tests/validation_graph/group_convolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/hlight/Makefile b/tests/validation_graph/hlight/Makefile new file mode 100644 index 00000000..f47c93b3 --- /dev/null +++ b/tests/validation_graph/hlight/Makefile @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Makefile Example to deploy TVM modules. + +ROOT_DIR=$(shell cd ../../..; pwd) +INCLUDE += -I${ROOT_DIR}/include +LDFLAGS = -L${ROOT_DIR}/x86_build -Wl,-unresolved-symbols=ignore-in-shared-libs + +CC = gcc + +CFLAGS += -O0 -g3 ${INCLUDE} + +LDFLAGS += -lstdc++ -lcsi_nn2_hlight_x86 -lm + +.PHONY: clean all + +all: test_subgraph + +test_subgraph: test_subgraph.c + $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) + +clean: + -rm subgraph \ No newline at end of file diff --git a/tests/validation_graph/hlight/run.sh b/tests/validation_graph/hlight/run.sh new file mode 100755 index 00000000..4408330d --- /dev/null +++ b/tests/validation_graph/hlight/run.sh @@ -0,0 +1,3 @@ +export LD_LIBRARY_PATH=../../../module/nna_ddk_install/x86:../../../install_nn2/lib +export SIM_VISION_PATH=../../../module/nna_ddk_install/x86/sim_nna.so +./test_subgraph \ No newline at end of file diff --git a/tests/validation_graph/hlight/test_subgraph.c b/tests/validation_graph/hlight/test_subgraph.c new file mode 100644 index 00000000..f656f6d7 --- /dev/null +++ b/tests/validation_graph/hlight/test_subgraph.c @@ -0,0 +1,275 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_gref.h" +#include "csi_nn.h" + +struct csi_session *create_session_base(int input_num, int output_num) +{ + struct csi_session *sess = csi_alloc_session(); + sess->base_run_mode = CSINN_RM_CPU_GRAPH; + sess->model_name = "csi.mbs.bin"; + sess->base_quant_type = CSINN_QUANT_INT8_ASYM; + sess->base_api = CSINN_REF; + sess->base_dtype = CSINN_DTYPE_INT8; + // sess->debug_level = CSI_DEBUG_LEVEL_INFO; + csi_session_init(sess); + csi_set_input_number(input_num, sess); + csi_set_output_number(output_num, sess); + + return sess; +} + +struct csi_tensor *create_tensor_base(char *name, struct csi_session *sess, int *shape, + int shape_len) +{ + struct csi_tensor *tensor = csi_alloc_tensor(sess); + tensor->name = name; + tensor->layout = CSINN_LAYOUT_NCHW; + for (int i = 0; i < shape_len; i++) { + tensor->dim[i] = shape[i]; + } + tensor->dim_count = shape_len; + tensor->qinfo = (struct csi_quant_info *)malloc(sizeof(struct csi_quant_info)); + tensor->qinfo->max = 1.0; + tensor->qinfo->min = 0; + tensor->quant_channel = 1; + + return tensor; +} + +struct csi_ref_graph *convert_graph2subgraph(struct csi_ref_graph *ograph) +{ + if (csi_debug_get_level() <= CSI_DEBUG_LEVEL_INFO) { + csi_debug_info("\nOriginal graph:\n"); + csi_gref_post_dfs(ograph, csi_subgraph_fvisit_print); + csi_gref_reset_graph_visit(ograph); + } + + struct csi_ref_graph *subgraph = csi_subgraph_generate(ograph); + + csi_debug_info("\nGenerated subgraph:\n"); + for (int i = 0; i < subgraph->layer_index; i++) { + if (subgraph->layer[i]->type == CSINN_SUBGRAPH) { + struct csi_ref_graph *s_subgraph = subgraph->layer[i]->data; + if (s_subgraph->layer_size == 0) continue; + csi_gref_update_input_output(subgraph, i); + if (csi_debug_get_level() <= CSI_DEBUG_LEVEL_INFO) { + csi_debug_info("\n---- subgraph_%d: ----\n", i); + csi_gref_reset_graph_visit(s_subgraph); + csi_gref_post_dfs(s_subgraph, csi_subgraph_fvisit_print); + csi_gref_reset_graph_visit(s_subgraph); + csi_debug_info("----subgraph_%d end.----\n\n", i); + } + } else { + csi_debug_info("%s\n", subgraph->layer[i]->name); + } + } + + struct csi_ref_graph *ggraph = csi_subgraph_rebuild(subgraph); + + return ggraph; +} + +/** Normal structure + * input -> relu ->relu -> softmax(cpu) -> relu + * + * Results: + * subgraph 1: relu->relu + * subgraph 2: softmax + * subgraph 3: relu + */ +void test_model1() +{ + printf("Start to test test_model1:\n"); + struct csi_session *sess = create_session_base(1, 1); + + int input_shape[] = {1, 3, 32, 32}; + struct csi_tensor *input = create_tensor_base("input", sess, input_shape, 4); + struct relu_params *params_1 = csi_alloc_params(sizeof(struct relu_params), sess); + params_1->base.layout = CSINN_LAYOUT_NCHW; + params_1->base.name = "params_1"; + params_1->base.api = CSINN_LIGHT; + struct csi_tensor *relu1_out = create_tensor_base("relu1_out", sess, input_shape, 4); + csi_relu_init(input, relu1_out, params_1); + + struct relu_params *params_2 = csi_alloc_params(sizeof(struct relu_params), sess); + params_2->base.layout = CSINN_LAYOUT_NCHW; + params_2->base.name = "params_2"; + params_2->base.api = CSINN_LIGHT; + struct csi_tensor *relu2_out = create_tensor_base("relu2_out", sess, input_shape, 4); + csi_relu_init(relu1_out, relu2_out, params_2); + + struct softmax_params *params_3 = csi_alloc_params(sizeof(struct softmax_params), sess); + params_3->axis = 1; + params_3->base.layout = CSINN_LAYOUT_NCHW; + params_3->base.name = "params_3"; + struct csi_tensor *softmax_out = create_tensor_base("softmax_out", sess, input_shape, 4); + csi_softmax_init(relu2_out, softmax_out, params_3); + + struct relu_params *params_4 = csi_alloc_params(sizeof(struct relu_params), sess); + params_4->base.layout = CSINN_LAYOUT_NCHW; + params_4->base.name = "params_4"; + params_4->base.api = CSINN_LIGHT; + struct csi_tensor *relu3_out = create_tensor_base("relu3_out", sess, input_shape, 4); + csi_relu_init(softmax_out, relu3_out, params_4); + + csi_set_tensor_entry(input, sess); + csi_set_input(0, input, sess); + + csi_relu(input, relu1_out, params_1); + csi_relu(relu1_out, relu2_out, params_2); + csi_softmax(relu2_out, softmax_out, params_3); + csi_relu(softmax_out, relu3_out, params_4); + + csi_set_output(0, relu3_out, sess); + + struct csi_ref_graph *graph = csi_gref_get_graph(sess); + struct csi_ref_graph *ggraph = convert_graph2subgraph(graph); + + // check results + int fail = 0; + if (ggraph->layer_index != 3) { + printf("Actual subgraph number: %d, Reference: 3\n", ggraph->layer_index); + fail = 1; + } + if (ggraph->layer[0]->type != CSINN_SUBGRAPH) { + printf("0-th layer's type is %d, should be CSINN_SUBGRAPH(197)\n", ggraph->layer[0]->type); + fail = 1; + } + if (ggraph->layer[1]->type != CSINN_OP_SOFTMAX) { + printf("1-th layer's type is %d, should be CSINN_OP_SOFTMAX(153)\n", + ggraph->layer[1]->type); + fail = 1; + } + if (ggraph->layer[2]->type != CSINN_SUBGRAPH) { + printf("2-th layer's type is %d, should be CSINN_SUBGRAPH(197)\n", ggraph->layer[2]->type); + fail = 1; + } + if (fail) { + printf("Test test_model1 fails.\n"); + } else { + printf("Test test_model1 succeed.\n"); + } +} + +/** Multi-branch structure: + * input + * | + * relu + * / \ + * relu softmax(cpu) + * \ / + * add + * | + * output + * + * Results: + * subgraph 1: relu relu + * subgraph 2: softmax + * subgraph 3: add + */ +void test_model2() +{ + printf("Start to test test_model2:\n"); + struct csi_session *sess = create_session_base(1, 1); + + int input_shape[] = {1, 3, 32, 32}; + struct csi_tensor *input = create_tensor_base("input", sess, input_shape, 4); + struct relu_params *params_1 = csi_alloc_params(sizeof(struct relu_params), sess); + params_1->base.layout = CSINN_LAYOUT_NCHW; + params_1->base.name = "params_1"; + params_1->base.api = CSINN_LIGHT; + struct csi_tensor *relu1_out = create_tensor_base("relu1_out", sess, input_shape, 4); + csi_relu_init(input, relu1_out, params_1); + + struct relu_params *params_2 = csi_alloc_params(sizeof(struct relu_params), sess); + params_2->base.layout = CSINN_LAYOUT_NCHW; + params_2->base.name = "params_2"; + params_2->base.api = CSINN_LIGHT; + struct csi_tensor *relu2_out = create_tensor_base("relu2_out", sess, input_shape, 4); + csi_relu_init(relu1_out, relu2_out, params_2); + + struct softmax_params *params_3 = csi_alloc_params(sizeof(struct softmax_params), sess); + params_3->axis = 1; + params_3->base.layout = CSINN_LAYOUT_NCHW; + params_3->base.name = "params_3"; + struct csi_tensor *softmax_out = create_tensor_base("softmax_out", sess, input_shape, 4); + csi_softmax_init(relu1_out, softmax_out, params_3); + + struct diso_params *params_4 = csi_alloc_params(sizeof(struct diso_params), sess); + params_4->base.name = "params_4"; + params_4->base.api = CSINN_LIGHT; + struct csi_tensor *add_out = create_tensor_base("add_out", sess, input_shape, 4); + csi_add_init(relu2_out, softmax_out, add_out, params_4); + + csi_set_tensor_entry(input, sess); + csi_set_input(0, input, sess); + + csi_relu(input, relu1_out, params_1); + csi_relu(relu1_out, relu2_out, params_2); + csi_softmax(relu2_out, softmax_out, params_3); + csi_add(relu2_out, softmax_out, add_out, params_4); + + csi_set_output(0, add_out, sess); + + struct csi_ref_graph *graph = csi_gref_get_graph(sess); + struct csi_ref_graph *ggraph = convert_graph2subgraph(graph); + + // check results + int fail = 0; + if (ggraph->layer_index != 3) { + printf("Actual subgraph number: %d, Reference: 3\n", ggraph->layer_index); + fail = 1; + } + if (ggraph->layer[0]->type != CSINN_SUBGRAPH) { + printf("0-th layer's type is %d, should be CSINN_SUBGRAPH(197)\n", ggraph->layer[0]->type); + fail = 1; + } + if (ggraph->layer[1]->type != CSINN_OP_SOFTMAX) { + printf("1-th layer's type is %d, should be CSINN_OP_SOFTMAX(153)\n", + ggraph->layer[1]->type); + fail = 1; + } + if (ggraph->layer[2]->type != CSINN_SUBGRAPH) { + printf("2-th layer's type is %d, should be CSINN_SUBGRAPH(197)\n", ggraph->layer[2]->type); + fail = 1; + } + if (fail) { + printf("Test test_model2 fails.\n"); + } else { + printf("Test test_model2 succeed.\n"); + } +} + +void test_subgraph() +{ + test_model1(); + test_model2(); +} + +int main(int argc, char **argv) +{ + printf("Testing function of subgraph fusion.\n"); + + test_subgraph(); + + return 0; +} \ No newline at end of file diff --git a/tests/validation_graph/l2_normalization.c b/tests/validation_graph/l2_normalization.c index cacff641..86d9a531 100644 --- a/tests/validation_graph/l2_normalization.c +++ b/tests/validation_graph/l2_normalization.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/leaky_relu.c b/tests/validation_graph/leaky_relu.c index 31d0abb4..38b0d469 100644 --- a/tests/validation_graph/leaky_relu.c +++ b/tests/validation_graph/leaky_relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/add.c b/tests/validation_graph/light/add.c index a1c0eeb3..abc8b631 100644 --- a/tests/validation_graph/light/add.c +++ b/tests/validation_graph/light/add.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" #include "math_snr.h" diff --git a/tests/validation_graph/light/argmax.c b/tests/validation_graph/light/argmax.c index 33d1fa9e..d9155bf6 100644 --- a/tests/validation_graph/light/argmax.c +++ b/tests/validation_graph/light/argmax.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/avgpool.c b/tests/validation_graph/light/avgpool.c index 61654e60..80e6ea53 100644 --- a/tests/validation_graph/light/avgpool.c +++ b/tests/validation_graph/light/avgpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/batch_normalization.c b/tests/validation_graph/light/batch_normalization.c index 9fc0f005..372ecab6 100644 --- a/tests/validation_graph/light/batch_normalization.c +++ b/tests/validation_graph/light/batch_normalization.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/batch_to_space_nd.c b/tests/validation_graph/light/batch_to_space_nd.c index 7c0036af..bc6fba4a 100644 --- a/tests/validation_graph/light/batch_to_space_nd.c +++ b/tests/validation_graph/light/batch_to_space_nd.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/concat.c b/tests/validation_graph/light/concat.c index 886ea692..c2a5ad9f 100644 --- a/tests/validation_graph/light/concat.c +++ b/tests/validation_graph/light/concat.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/convolution.c b/tests/validation_graph/light/convolution.c index 62270d7e..dce044cb 100644 --- a/tests/validation_graph/light/convolution.c +++ b/tests/validation_graph/light/convolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" #include "math_snr.h" diff --git a/tests/validation_graph/light/crop.c b/tests/validation_graph/light/crop.c index 9fc0f005..372ecab6 100644 --- a/tests/validation_graph/light/crop.c +++ b/tests/validation_graph/light/crop.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/deconvolution.c b/tests/validation_graph/light/deconvolution.c index 7dfdc09a..dfe8bd4a 100644 --- a/tests/validation_graph/light/deconvolution.c +++ b/tests/validation_graph/light/deconvolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/depth_to_space.c b/tests/validation_graph/light/depth_to_space.c index c8458433..4657f674 100644 --- a/tests/validation_graph/light/depth_to_space.c +++ b/tests/validation_graph/light/depth_to_space.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/depthwise_convolution.c b/tests/validation_graph/light/depthwise_convolution.c index 3c2ba61a..ec637576 100644 --- a/tests/validation_graph/light/depthwise_convolution.c +++ b/tests/validation_graph/light/depthwise_convolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" #include "math_snr.h" diff --git a/tests/validation_graph/light/div.c b/tests/validation_graph/light/div.c index dd59ca90..15440670 100644 --- a/tests/validation_graph/light/div.c +++ b/tests/validation_graph/light/div.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "csi_nn.h" #include "math_snr.h" diff --git a/tests/validation_graph/light/flatten.c b/tests/validation_graph/light/flatten.c index 917822af..f2edb3b8 100644 --- a/tests/validation_graph/light/flatten.c +++ b/tests/validation_graph/light/flatten.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/fullyconnected.c b/tests/validation_graph/light/fullyconnected.c index db422fab..22571197 100644 --- a/tests/validation_graph/light/fullyconnected.c +++ b/tests/validation_graph/light/fullyconnected.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/global_avgpool.c b/tests/validation_graph/light/global_avgpool.c index 67d0c697..f33b2f01 100644 --- a/tests/validation_graph/light/global_avgpool.c +++ b/tests/validation_graph/light/global_avgpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/global_maxpool.c b/tests/validation_graph/light/global_maxpool.c index 2c6e3a48..29002b68 100644 --- a/tests/validation_graph/light/global_maxpool.c +++ b/tests/validation_graph/light/global_maxpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/group_convolution.c b/tests/validation_graph/light/group_convolution.c index 4de7293c..43977b5d 100644 --- a/tests/validation_graph/light/group_convolution.c +++ b/tests/validation_graph/light/group_convolution.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/l2_normalization.c b/tests/validation_graph/light/l2_normalization.c index cacff641..86d9a531 100644 --- a/tests/validation_graph/light/l2_normalization.c +++ b/tests/validation_graph/light/l2_normalization.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/leaky_relu.c b/tests/validation_graph/light/leaky_relu.c index bb7bcd9d..a8fcaf62 100644 --- a/tests/validation_graph/light/leaky_relu.c +++ b/tests/validation_graph/light/leaky_relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/lrn.c b/tests/validation_graph/light/lrn.c index b5fd862e..0a62acb8 100644 --- a/tests/validation_graph/light/lrn.c +++ b/tests/validation_graph/light/lrn.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/maximum.c b/tests/validation_graph/light/maximum.c index 58354934..fcab4ffb 100644 --- a/tests/validation_graph/light/maximum.c +++ b/tests/validation_graph/light/maximum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/maxpool.c b/tests/validation_graph/light/maxpool.c index 121ee00f..6f816c20 100644 --- a/tests/validation_graph/light/maxpool.c +++ b/tests/validation_graph/light/maxpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/mean.c b/tests/validation_graph/light/mean.c index 80787108..d222c9c6 100644 --- a/tests/validation_graph/light/mean.c +++ b/tests/validation_graph/light/mean.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/minimum.c b/tests/validation_graph/light/minimum.c index badcbeb5..4bd0da60 100644 --- a/tests/validation_graph/light/minimum.c +++ b/tests/validation_graph/light/minimum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/mul.c b/tests/validation_graph/light/mul.c index ee480116..259acacb 100644 --- a/tests/validation_graph/light/mul.c +++ b/tests/validation_graph/light/mul.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/negative.c b/tests/validation_graph/light/negative.c index c8858b8c..0b477fb1 100644 --- a/tests/validation_graph/light/negative.c +++ b/tests/validation_graph/light/negative.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/pad.c b/tests/validation_graph/light/pad.c index 91dd9e61..34766ec7 100644 --- a/tests/validation_graph/light/pad.c +++ b/tests/validation_graph/light/pad.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/prelu.c b/tests/validation_graph/light/prelu.c index 1307bb34..09e0f5b4 100644 --- a/tests/validation_graph/light/prelu.c +++ b/tests/validation_graph/light/prelu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/relu.c b/tests/validation_graph/light/relu.c index 3d27fab0..3fcd983e 100644 --- a/tests/validation_graph/light/relu.c +++ b/tests/validation_graph/light/relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/relu1.c b/tests/validation_graph/light/relu1.c index 7296ae89..b94e7e0a 100644 --- a/tests/validation_graph/light/relu1.c +++ b/tests/validation_graph/light/relu1.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/relu6.c b/tests/validation_graph/light/relu6.c index baca78cb..56fcbf24 100644 --- a/tests/validation_graph/light/relu6.c +++ b/tests/validation_graph/light/relu6.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/reshape.c b/tests/validation_graph/light/reshape.c index 790a3e99..b03a32b8 100644 --- a/tests/validation_graph/light/reshape.c +++ b/tests/validation_graph/light/reshape.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/resize_bilinear.c b/tests/validation_graph/light/resize_bilinear.c index 9f6de8f2..52f89222 100644 --- a/tests/validation_graph/light/resize_bilinear.c +++ b/tests/validation_graph/light/resize_bilinear.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/resize_nearest_neighbor.c b/tests/validation_graph/light/resize_nearest_neighbor.c index 07f47ff4..2bca9f63 100644 --- a/tests/validation_graph/light/resize_nearest_neighbor.c +++ b/tests/validation_graph/light/resize_nearest_neighbor.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/sigmoid.c b/tests/validation_graph/light/sigmoid.c index df7c4f26..814b71cb 100644 --- a/tests/validation_graph/light/sigmoid.c +++ b/tests/validation_graph/light/sigmoid.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/softmax.c b/tests/validation_graph/light/softmax.c index bb4b4141..0d0ea6c2 100644 --- a/tests/validation_graph/light/softmax.c +++ b/tests/validation_graph/light/softmax.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/space_to_batch_nd.c b/tests/validation_graph/light/space_to_batch_nd.c index b1ade6e1..85e0c07f 100644 --- a/tests/validation_graph/light/space_to_batch_nd.c +++ b/tests/validation_graph/light/space_to_batch_nd.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/space_to_depth.c b/tests/validation_graph/light/space_to_depth.c index 8782ce50..55b3e6cd 100644 --- a/tests/validation_graph/light/space_to_depth.c +++ b/tests/validation_graph/light/space_to_depth.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/split.c b/tests/validation_graph/light/split.c index b6a1ca47..51a40937 100644 --- a/tests/validation_graph/light/split.c +++ b/tests/validation_graph/light/split.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/squeeze.c b/tests/validation_graph/light/squeeze.c index 6e158cf2..f9c02fd0 100644 --- a/tests/validation_graph/light/squeeze.c +++ b/tests/validation_graph/light/squeeze.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/strided_slice.c b/tests/validation_graph/light/strided_slice.c index 18674d15..18174212 100644 --- a/tests/validation_graph/light/strided_slice.c +++ b/tests/validation_graph/light/strided_slice.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/sub.c b/tests/validation_graph/light/sub.c index 9774da95..2ec6a411 100644 --- a/tests/validation_graph/light/sub.c +++ b/tests/validation_graph/light/sub.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/tanh.c b/tests/validation_graph/light/tanh.c index 9b283d00..96c5ac86 100644 --- a/tests/validation_graph/light/tanh.c +++ b/tests/validation_graph/light/tanh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/light/transpose.c b/tests/validation_graph/light/transpose.c index 759d543d..98da5fbe 100644 --- a/tests/validation_graph/light/transpose.c +++ b/tests/validation_graph/light/transpose.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/lrn.c b/tests/validation_graph/lrn.c index c38654e3..57818ac9 100644 --- a/tests/validation_graph/lrn.c +++ b/tests/validation_graph/lrn.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/maximum.c b/tests/validation_graph/maximum.c index 09077db1..dc83520b 100644 --- a/tests/validation_graph/maximum.c +++ b/tests/validation_graph/maximum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/maxpool.c b/tests/validation_graph/maxpool.c index 6e65326f..d228b980 100644 --- a/tests/validation_graph/maxpool.c +++ b/tests/validation_graph/maxpool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/mean.c b/tests/validation_graph/mean.c index 1a6d1014..9e6f5c15 100644 --- a/tests/validation_graph/mean.c +++ b/tests/validation_graph/mean.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/minimum.c b/tests/validation_graph/minimum.c index 13f6b3a5..85bbcba5 100644 --- a/tests/validation_graph/minimum.c +++ b/tests/validation_graph/minimum.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/mul.c b/tests/validation_graph/mul.c index f1c5d811..e4913c2b 100644 --- a/tests/validation_graph/mul.c +++ b/tests/validation_graph/mul.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/negative.c b/tests/validation_graph/negative.c index c8858b8c..0b477fb1 100644 --- a/tests/validation_graph/negative.c +++ b/tests/validation_graph/negative.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/pad.c b/tests/validation_graph/pad.c index 4b4e3a5d..9c7ce11a 100644 --- a/tests/validation_graph/pad.c +++ b/tests/validation_graph/pad.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/prelu.c b/tests/validation_graph/prelu.c index a4155a9c..bc33153f 100644 --- a/tests/validation_graph/prelu.c +++ b/tests/validation_graph/prelu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/relu.c b/tests/validation_graph/relu.c index 510f0e68..ec336f6c 100644 --- a/tests/validation_graph/relu.c +++ b/tests/validation_graph/relu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/relu1.c b/tests/validation_graph/relu1.c index 7edf6a0d..dd9c51cf 100644 --- a/tests/validation_graph/relu1.c +++ b/tests/validation_graph/relu1.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/relu6.c b/tests/validation_graph/relu6.c index 9eed2432..53594b5a 100644 --- a/tests/validation_graph/relu6.c +++ b/tests/validation_graph/relu6.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/reshape.c b/tests/validation_graph/reshape.c index c8162437..44721bcd 100644 --- a/tests/validation_graph/reshape.c +++ b/tests/validation_graph/reshape.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/resize_bilinear.c b/tests/validation_graph/resize_bilinear.c index 519c631d..100198b5 100644 --- a/tests/validation_graph/resize_bilinear.c +++ b/tests/validation_graph/resize_bilinear.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/resize_nearest_neighbor.c b/tests/validation_graph/resize_nearest_neighbor.c index 850c784b..86ddad93 100644 --- a/tests/validation_graph/resize_nearest_neighbor.c +++ b/tests/validation_graph/resize_nearest_neighbor.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/sigmoid.c b/tests/validation_graph/sigmoid.c index 3e002cc4..6cbe9c64 100644 --- a/tests/validation_graph/sigmoid.c +++ b/tests/validation_graph/sigmoid.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/softmax.c b/tests/validation_graph/softmax.c index 5013f1e4..ccd060fd 100644 --- a/tests/validation_graph/softmax.c +++ b/tests/validation_graph/softmax.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/space_to_batch_nd.c b/tests/validation_graph/space_to_batch_nd.c index b1ade6e1..85e0c07f 100644 --- a/tests/validation_graph/space_to_batch_nd.c +++ b/tests/validation_graph/space_to_batch_nd.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/space_to_depth.c b/tests/validation_graph/space_to_depth.c index 4e58afe3..d48ec375 100644 --- a/tests/validation_graph/space_to_depth.c +++ b/tests/validation_graph/space_to_depth.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/split.c b/tests/validation_graph/split.c index c03a836a..793dada9 100644 --- a/tests/validation_graph/split.c +++ b/tests/validation_graph/split.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/squeeze.c b/tests/validation_graph/squeeze.c index 47eb61fd..164ac35e 100644 --- a/tests/validation_graph/squeeze.c +++ b/tests/validation_graph/squeeze.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/strided_slice.c b/tests/validation_graph/strided_slice.c index 18674d15..18174212 100644 --- a/tests/validation_graph/strided_slice.c +++ b/tests/validation_graph/strided_slice.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/sub.c b/tests/validation_graph/sub.c index b1e66a2b..bab9ff54 100644 --- a/tests/validation_graph/sub.c +++ b/tests/validation_graph/sub.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/tanh.c b/tests/validation_graph/tanh.c index 214634fc..91e9c244 100644 --- a/tests/validation_graph/tanh.c +++ b/tests/validation_graph/tanh.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_graph/transpose.c b/tests/validation_graph/transpose.c index 17189a20..d3ecab37 100644 --- a/tests/validation_graph/transpose.c +++ b/tests/validation_graph/transpose.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_layer/Makefile.c906 b/tests/validation_layer/Makefile.c906 new file mode 100644 index 00000000..f31a4774 --- /dev/null +++ b/tests/validation_layer/Makefile.c906 @@ -0,0 +1,192 @@ +LIB_DIR = ../../riscv_build +INCLUDE = -I../../include -I../utils -I./layer +CFLAGS = -O0 -g3 -static +CFLAGS += -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d +CFLAGS += -ffunction-sections -fdata-sections -Wl,--gc-sections +CFLAGS += -DCSINN_API=3 +LIB_NAME = csi_nn2_c906 +CC = riscv64-unknown-linux-gnu-gcc + +test_objs = + +# test_objs += abs.o +test_objs += acos.o +test_objs += acosh.o +test_objs += asin.o +test_objs += asinh.o +test_objs += atan.o +test_objs += atanh.o +test_objs += ceil.o +test_objs += cos.o +test_objs += cosh.o +test_objs += erf.o +test_objs += exp.o +test_objs += expm1.o +test_objs += floor.o +test_objs += log.o +test_objs += log1p.o +test_objs += logical_not.o +# test_objs += not.o +test_objs += round.o +test_objs += rsqrt.o +test_objs += sign.o +test_objs += negative.o +test_objs += sin.o +test_objs += sinh.o +test_objs += softplus.o +test_objs += softsign.o +test_objs += sqrt.o +test_objs += square.o +test_objs += tan.o +test_objs += tanh.o +test_objs += trunc.o +test_objs += yuv_rgb_scale.o +test_objs += broadcast_to.o +test_objs += arange.o + + +# test_objs += add.o +# test_objs += and.o +test_objs += div.o +test_objs += equal.o ### output is bool +test_objs += floor_div.o +test_objs += floor_mod.o +test_objs += greater_equal.o +test_objs += greater.o +test_objs += less_equal.o +test_objs += less.o +test_objs += logical_and.o +test_objs += logical_or.o +test_objs += logical_xor.o +test_objs += maximum.o +test_objs += minimum.o +test_objs += mod.o +# test_objs += mul.o +test_objs += not_equal.o +test_objs += power.o +test_objs += sub.o +# test_objs += xor.o +test_objs += argmax.o +test_objs += argmin.o + +# test_objs += averagepool.o +test_objs += averagepool3d.o +test_objs += batch_norm.o +test_objs += clip.o +# test_objs += concat.o +# test_objs += convolution.o +test_objs += convolution3d.o +test_objs += convolution_relu.o +test_objs += convolution_relu6.o +test_objs += batch_to_space.o +test_objs += cumprod.o +test_objs += cumsum.o +test_objs += deconvolution.o +test_objs += deconvolution3d.o +test_objs += depth_to_space.o +# test_objs += depthwise_convolution.o +test_objs += depthwise_convolution_relu.o +test_objs += depthwise_convolution_relu6.o +test_objs += depthwise_deconvolution.o +test_objs += elu.o + +test_objs += expand_dims.o +test_objs += flatten.o +# test_objs += fullyconnected.o +test_objs += gather_nd.o ###TODO +test_objs += gather.o +# test_objs += global_avgpool.o +# test_objs += global_maxpool.o +# test_objs += group_convolution_relu.o +# test_objs += group_convolution_relu6.o +# test_objs += group_convolution.o +test_objs += hard_sigmoid.o + +test_objs += im2col.o +test_objs += l2_norm.o +# test_objs += leaky_relu.o +test_objs += log_softmax.o +test_objs += lrn.o + +test_objs += matmul.o +test_objs += max_stride.o +# test_objs += maxpool.o +test_objs += maxpool3d.o +test_objs += mean_stride.o +test_objs += min_stride.o +test_objs += non_max_suppression.o +# test_objs += pad.o +test_objs += prelu.o +test_objs += prod_stride.o +test_objs += psroipooling.o +test_objs += reduce_logsumexp.o +test_objs += reduce_max.o +test_objs += reduce_mean.o +test_objs += reduce_min.o +test_objs += reduce_prod.o +test_objs += reduce_sum.o +# test_objs += relu.o +test_objs += relu1.o +test_objs += relu6.o +test_objs += relun.o +test_objs += reshape.o +test_objs += resize_bilinear.o +test_objs += resize_nearestneighbor.o +test_objs += reverse.o +test_objs += roialign.o +test_objs += roipooling.o + + + +test_objs += segment_max.o +test_objs += segment_mean.o +test_objs += segment_min.o +test_objs += segment_prod.o +test_objs += segment_sum.o +test_objs += select.o +test_objs += shuffle_channel.o +# test_objs += sigmoid.o +test_objs += slice.o +# test_objs += softmax.o +test_objs += softrelu.o +test_objs += space_to_batch.o +test_objs += space_to_depth.o +test_objs += split.o + +test_objs += squeeze.o +test_objs += stack.o +test_objs += strided_slice.o +# test_objs += sum_stride.o +test_objs += threshold_relu.o + +test_objs += tile.o +test_objs += topk.o +test_objs += transpose.o +test_objs += unsorted_segment_max.o +test_objs += unsorted_segment_mean.o +test_objs += unsorted_segment_min.o +test_objs += unsorted_segment_prod.o +test_objs += unsorted_segment_sum.o +test_objs += unstack.o + + + +utils_objs = + +utils_objs += ../utils/math_snr.o +utils_objs += ../utils/test_utils.o +utils_objs += ./layer/common.o + +all: csi + +csi: $(utils_objs) $(test_objs) + +$(utils_objs): %.o: %.c + $(CC) -c $(CFLAGS) $(INCLUDE) $< -o $@ + +$(test_objs): %.o: %.c + $(CC) -c $(CFLAGS) $(INCLUDE) $< -o $@ + $(CC) $@ $(CFLAGS) $(BOARD) $(utils_objs) -L$(LIB_DIR) -l$(LIB_NAME) -lc -lm -o $@.elf -lgcov + +clean: + rm -rf $(test_objs) $(utils_objs) *.a *.asm *.elf *.bin *.asm diff --git a/tests/validation_layer/Makefile.rvv b/tests/validation_layer/Makefile.rvv new file mode 100644 index 00000000..d5e1dc23 --- /dev/null +++ b/tests/validation_layer/Makefile.rvv @@ -0,0 +1,52 @@ +LIB_DIR = ../../riscv_build +INCLUDE = -I../../include -I../utils -I./layer +CFLAGS = -O0 -g3 -static +CFLAGS += -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d +CFLAGS += -ffunction-sections -fdata-sections -Wl,--gc-sections +CFLAGS += -DCSINN_API=15 +LIB_NAME = csi_nn2_rvv +CC = riscv64-unknown-linux-gnu-gcc +CPLUS = riscv64-unknown-linux-gnu-g++ +RVV=1 + +test_objs = + +test_objs += add.o +test_objs += averagepool.o +test_objs += concat.o +test_objs += convolution.o +test_objs += group_convolution.o +test_objs += depthwise_convolution.o +test_objs += fullyconnected.o +test_objs += global_avgpool.o +test_objs += global_maxpool.o +test_objs += leaky_relu.o +test_objs += maxpool.o +test_objs += mul.o +test_objs += relu.o +test_objs += sigmoid.o +test_objs += softmax.o +test_objs += sum_stride.o + + + + +utils_objs = + +utils_objs += ../utils/math_snr.o +utils_objs += ../utils/test_utils.o +# template_objs += ./layer/common.o + +all: csi + +csi: $(utils_objs) $(test_objs) + +$(utils_objs): %.o: %.c + $(CC) -c $(CFLAGS) $(INCLUDE) $< -o $@ + +$(test_objs): %.o: %.cpp + $(CPLUS) -c $(CFLAGS) $(INCLUDE) -D THEAD_RVV=$(RVV) $< -o $@ + $(CPLUS) $@ $(CFLAGS) $(BOARD) $(utils_objs) $(template_objs) -L$(LIB_DIR) -l$(LIB_NAME) -lc -lm -o $@.elf -lgcov + +clean: + rm -rf $(test_objs) $(utils_objs) *.a *.asm *.elf *.asm diff --git a/tests/validation_layer/abs.cpp b/tests/validation_layer/abs.cpp new file mode 100644 index 00000000..7ef09536 --- /dev/null +++ b/tests/validation_layer/abs.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "csi_utils.h" +#include "math_snr.h" +#include "test_utils.h" +#include "testutil.h" + +int main(int argc, char **argv) +{ + init_testsuite("Testing function of abs(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // height + input->dim[2] = buffer[2]; // width + input->dim[3] = buffer[3]; // channel + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 4); + reference->data = (float *)(buffer + 4 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT32, csi_abs_init, csi_abs, &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_UINT8_ASYM, csi_abs_init, csi_abs, + &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_INT8_SYM, csi_abs_init, csi_abs, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/acos.c b/tests/validation_layer/acos.c new file mode 100644 index 00000000..5881b2fb --- /dev/null +++ b/tests/validation_layer/acos.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of acos(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + output->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + output->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 1 + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_acos_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_acos_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_acos_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/acosh.c b/tests/validation_layer/acosh.c new file mode 100644 index 00000000..f72d5042 --- /dev/null +++ b/tests/validation_layer/acosh.c @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of acosh(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + output->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + output->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 1 + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_acosh_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_acosh_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_acosh_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/add.cpp b/tests/validation_layer/add.cpp new file mode 100644 index 00000000..5d5302fa --- /dev/null +++ b/tests/validation_layer/add.cpp @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "csi_utils.h" +#include "math_snr.h" +#include "test_utils.h" +#include "testutil.h" + +int main(int argc, char **argv) +{ + init_testsuite("Testing function of add(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size0, in_size1, out_size; + + int *buffer = read_input_data_f32(argv[1]); + int flag = buffer[4]; + input0->dim[0] = buffer[0]; // batch + input0->dim[1] = buffer[1]; // height + input0->dim[2] = buffer[2]; // width + input0->dim[3] = buffer[3]; // channel + in_size0 = input0->dim[0] * input0->dim[1] * input0->dim[2] * input0->dim[3]; + input0->dim_count = 4; + input0->layout = CSINN_LAYOUT_NCHW; + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->is_const = 0; + input0->quant_channel = 1; + if (flag) { + input1->dim[0] = input0->dim[3]; + input1->dim_count = 1; + in_size1 = input1->dim[0]; + } else { + input1->dim[0] = input0->dim[0]; + input1->dim[1] = input0->dim[1]; + input1->dim[2] = input0->dim[2]; + input1->dim[3] = input0->dim[3]; + input1->dim_count = 4; + in_size1 = in_size0; + } + + output->dim[0] = input0->dim[0]; + output->dim[1] = input0->dim[1]; + output->dim[2] = input0->dim[2]; + output->dim[3] = input0->dim[3]; + output->dim_count = 4; + + out_size = in_size0; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 5); + input1->data = (float *)(buffer + 5 + in_size0); + reference->data = (float *)(buffer + 5 + in_size0 + in_size1); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.9; + +#if THEAD_RVV + test_binary_op(input0, input1, output, ¶ms, CSINN_QUANT_FLOAT32, csi_add_init, + csi_nn_rvv_add_fp32, &difference); + test_binary_op(input0, input1, output, ¶ms, CSINN_QUANT_FLOAT16, csi_add_init, + csi_nn_rvv_add_fp16, &difference); + test_binary_op(input0, input1, output, ¶ms, CSINN_QUANT_INT8_SYM, csi_add_init, + csi_nn_rvv_add_int8, &difference); +#else + test_binary_op(input0, input1, output, ¶ms, CSINN_QUANT_FLOAT32, csi_add_init, csi_add, + &difference); + test_binary_op(input0, input1, output, ¶ms, CSINN_QUANT_UINT8_ASYM, csi_add_init, csi_add, + &difference); + test_binary_op(input0, input1, output, ¶ms, CSINN_QUANT_INT8_SYM, csi_add_init, csi_add, + &difference); +#endif + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/and.c b/tests/validation_layer/and.c new file mode 100644 index 00000000..6511d898 --- /dev/null +++ b/tests/validation_layer/and.c @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of and u32.\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input0->dim_count = buffer[0]; + input1->dim_count = buffer[0]; + output->dim_count = input0->dim_count; + for(int i = 0; i < input0->dim_count; i++) { + input0->dim[i] = buffer[i + 1]; + input1->dim[i] = buffer[i + 1]; + output->dim[i] = input0->dim[i]; + in_size *= input0->dim[i]; + } + + out_size = in_size; + input0->dtype = CSINN_DTYPE_UINT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input1->dtype = CSINN_DTYPE_UINT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_UINT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (uint32_t *)(buffer + 1 + input0->dim_count); + input1->data = (uint32_t *)(buffer + 1 + input0->dim_count + in_size); + reference->data = (uint32_t *)(buffer + 1 + input0->dim_count + 2 * in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_and_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_and_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_and_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/arange.c b/tests/validation_layer/arange.c new file mode 100644 index 00000000..196b33eb --- /dev/null +++ b/tests/validation_layer/arange.c @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "math_snr.h" +#include "test_utils.h" + +int main(int argc, char **argv) +{ + init_testsuite("Testing function of arange(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct arange_params params; + int out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + + out_size = buffer[3]; + params.start = buffer[0]; + params.stop = buffer[1]; + params.step = buffer[2]; + output->dim_count = 1; + output->dim[0] = out_size; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + input->data = 0; + + reference->data = (float *)(buffer + 4); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_arange_CSINN_QUANT_FLOAT32(output, ¶ms, &difference); + test_arange_CSINN_QUANT_UINT8_ASYM(output, ¶ms, &difference); + test_arange_CSINN_QUANT_INT8_SYM(output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/argmax.c b/tests/validation_layer/argmax.c new file mode 100644 index 00000000..b7d67e8f --- /dev/null +++ b/tests/validation_layer/argmax.c @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + + + init_testsuite("Testing function of argmax(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct reduce_params params; + int in_size = 0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + input->dim_count = 4; + int axis = buffer[4]; + int m = buffer[5]; + int n = buffer[6]; + + for(int i = 0; i < input->dim_count; i++) { + if(i < axis){ + output->dim[i] = input->dim[i]; + } + else if(i > axis){ + output->dim[i-1] = input->dim[i]; + } + } + + + int32_t *out_strides_0 = (int32_t *)malloc(n * sizeof(int32_t)); + int32_t *out_extents_0 = (int32_t *)malloc(n * sizeof(int32_t)); + int32_t *inner_strides_0 = (int32_t *)malloc(m * sizeof(int32_t)); + int32_t *inner_extents_0 = (int32_t *)malloc(m * sizeof(int32_t)); + + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size / input->dim[axis]; + output->dim_count = 3; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + + input->data = (float *)(buffer + 7); + out_strides_0 = (int32_t *)(buffer + 7 + in_size); + out_extents_0 = (int32_t *)(buffer + 7 + in_size + n); + inner_strides_0 = (int32_t *)(buffer + 7 + in_size + 2 * n); + inner_extents_0 = (int32_t *)(buffer + 7 + in_size + 2 * n + m); + reference->data = (float *)(buffer + 7 + in_size + 2 * n + 2 * m); + // output->data = malloc(out_size * sizeof(float)); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + + params.axis = &axis; + params.axis_count = 1; // must be 1 + params.m = m; + params.n = n; + params.out_strides = out_strides_0; + params.out_extents = out_extents_0; + params.inner_strides = inner_strides_0; + params.inner_extents = inner_extents_0; + params.base.api = CSINN_API; + params.base.layout = CSINN_LAYOUT_NCHW; + params.base.run_mode = CSINN_RM_LAYER; + + test_argmax_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_argmax_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_argmax_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/argmin.c b/tests/validation_layer/argmin.c new file mode 100644 index 00000000..46ec6c87 --- /dev/null +++ b/tests/validation_layer/argmin.c @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + + + init_testsuite("Testing function of argmin(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct reduce_params params; + int in_size = 0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + input->dim_count = 4; + int axis = buffer[4]; + int m = buffer[5]; + int n = buffer[6]; + + for(int i = 0; i < input->dim_count; i++) { + if(i < axis){ + output->dim[i] = input->dim[i]; + } + else if(i > axis){ + output->dim[i-1] = input->dim[i]; + } + } + + + int32_t *out_strides_0 = (int32_t *)malloc(n * sizeof(int32_t)); + int32_t *out_extents_0 = (int32_t *)malloc(n * sizeof(int32_t)); + int32_t *inner_strides_0 = (int32_t *)malloc(m * sizeof(int32_t)); + int32_t *inner_extents_0 = (int32_t *)malloc(m * sizeof(int32_t)); + + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size / input->dim[axis]; + output->dim_count = 3; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + + input->data = (float *)(buffer + 7); + out_strides_0 = (int32_t *)(buffer + 7 + in_size); + out_extents_0 = (int32_t *)(buffer + 7 + in_size + n); + inner_strides_0 = (int32_t *)(buffer + 7 + in_size + 2 * n); + inner_extents_0 = (int32_t *)(buffer + 7 + in_size + 2 * n + m); + reference->data = (float *)(buffer + 7 + in_size + 2 * n + 2 * m); + // output->data = malloc(out_size * sizeof(float)); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + + params.axis = &axis; + params.axis_count = 1; // must be 1 + params.m = m; + params.n = n; + params.out_strides = out_strides_0; + params.out_extents = out_extents_0; + params.inner_strides = inner_strides_0; + params.inner_extents = inner_extents_0; + params.base.api = CSINN_API; + params.base.layout = CSINN_LAYOUT_NCHW; + params.base.run_mode = CSINN_RM_LAYER; + + test_argmin_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_argmin_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_argmin_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/asin.c b/tests/validation_layer/asin.c new file mode 100644 index 00000000..eeafb769 --- /dev/null +++ b/tests/validation_layer/asin.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of asin(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + output->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + output->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 1 + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_asin_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_asin_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_asin_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/asinh.c b/tests/validation_layer/asinh.c new file mode 100644 index 00000000..9b6f6a17 --- /dev/null +++ b/tests/validation_layer/asinh.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of asinh(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + output->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + output->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 1 + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_asinh_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_asinh_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_asinh_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/atan.c b/tests/validation_layer/atan.c new file mode 100644 index 00000000..67d067b6 --- /dev/null +++ b/tests/validation_layer/atan.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of atan(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + output->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + output->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 1 + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_atan_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_atan_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_atan_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/atanh.c b/tests/validation_layer/atanh.c new file mode 100644 index 00000000..f2194bad --- /dev/null +++ b/tests/validation_layer/atanh.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of atanh(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + output->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + output->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 1 + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_atanh_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_atanh_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_atanh_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/averagepool.cpp b/tests/validation_layer/averagepool.cpp new file mode 100644 index 00000000..821b8d03 --- /dev/null +++ b/tests/validation_layer/averagepool.cpp @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "csi_utils.h" +#include "math_snr.h" +#include "test_utils.h" +#include "testutil.h" + + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of avgpool2d(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct pool_params params; + int in_size = 1; + int out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // in_channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + + output->dim[0] = buffer[0]; + output->dim[1] = buffer[1]; + output->dim[2] = buffer[12]; + output->dim[3] = buffer[13]; + + params.stride_height = buffer[4]; + params.stride_width = buffer[5]; + params.filter_height = buffer[6]; + params.filter_width = buffer[7]; + + params.pad_left = buffer[8]; + params.pad_right = buffer[9]; + params.pad_top = buffer[10]; + params.pad_down = buffer[11]; + params.base.layout = CSINN_LAYOUT_NCHW; + + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + input->dim_count = 4; + output->dim_count = 4; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + params.count_include_pad = 1; + params.ceil_mode = 0; + + input->data = (float *)(buffer + 15); + reference->data = (float *)(buffer + 15 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT32, csi_avgpool2d_init, csi_avgpool2d, + &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT16, csi_avgpool2d_init, csi_avgpool2d, + &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_INT8_SYM, csi_avgpool2d_init, csi_avgpool2d, + &difference); + + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/averagepool3d.c b/tests/validation_layer/averagepool3d.c new file mode 100644 index 00000000..d1bc927f --- /dev/null +++ b/tests/validation_layer/averagepool3d.c @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of avgpool3d(layer).\n"); + + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct pool_params params; + int in_size = 1; + int out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; //batch + input->dim[1] = buffer[1]; //channel + input->dim[2] = buffer[2]; //depth + input->dim[3] = buffer[3]; //height + input->dim[4] = buffer[4]; //width + + output->dim[0] = buffer[0]; + output->dim[1] = buffer[1]; + output->dim[2] = buffer[17]; + output->dim[3] = buffer[18]; + output->dim[4] = buffer[19]; + + params.stride_depth = buffer[5]; + params.stride_height = buffer[6]; + params.stride_width = buffer[7]; + params.filter_depth = buffer[8]; + params.filter_height = buffer[9]; + params.filter_width = buffer[10]; + + params.pad_left = buffer[11]; + params.pad_right = buffer[12]; + params.pad_top = buffer[13]; + params.pad_down = buffer[14]; + params.pad_front = buffer[15]; + params.pad_back = buffer[16]; + params.count_include_pad = buffer[20]; + params.base.layout = CSINN_LAYOUT_NCDHW; + + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCDHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCDHW; + output->is_const = 0; + output->quant_channel = 1; + input->dim_count = 5; + output->dim_count = 5; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3] * input->dim[4]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3] * output->dim[4]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 21); + reference->data = (float *)(buffer + 21 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_avgpool3d_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_avgpool3d_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_avgpool3d_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/batch_norm.c b/tests/validation_layer/batch_norm.c new file mode 100644 index 00000000..073a019a --- /dev/null +++ b/tests/validation_layer/batch_norm.c @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of batch normalization(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *mean = csi_alloc_tensor(NULL); + struct csi_tensor *variance = csi_alloc_tensor(NULL); + struct csi_tensor *beta = csi_alloc_tensor(NULL); + struct csi_tensor *gamma = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct bn_params params; + int size = 1; + + int *buffer = read_input_data_f32(argv[1]); + /* get the dim para */ + output->dim_count = input->dim_count = buffer[0]; + for (int i = 0; i < input->dim_count; ++i) { + output->dim[i] = input->dim[i] = buffer[1 + i]; + } + + for (int i = 0; i < input->dim_count; ++i) { + size *= input->dim[i]; + } + + mean->dim_count = 1; + variance->dim_count = 1; + gamma->dim_count = 1; + beta->dim_count = 1; + + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NHWC; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NHWC; + output->is_const = 0; + output->quant_channel = 1; + mean->dtype = CSINN_DTYPE_FLOAT32; + mean->layout = CSINN_LAYOUT_O; + mean->is_const = 0; + mean->quant_channel = 1; + variance->dtype = CSINN_DTYPE_INT8; + variance->layout = CSINN_LAYOUT_O; + variance->is_const = 0; + variance->quant_channel = 1; + gamma->dtype = CSINN_DTYPE_FLOAT32; + gamma->layout = CSINN_LAYOUT_O; + gamma->is_const = 0; + gamma->quant_channel = 1; + beta->dtype = CSINN_DTYPE_FLOAT32; + beta->layout = CSINN_LAYOUT_O; + beta->is_const = 0; + beta->quant_channel = 1; + params.base.layout = CSINN_LAYOUT_NHWC; + params.epsilon = *((float *)buffer + 1 + input->dim_count); + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 2 + input->dim_count); + mean->data = (float *)(buffer + 2 + input->dim_count + size); + variance->data = (float *)(buffer + 2 + input->dim_count + size + input->dim[input->dim_count - 1]); + gamma->data = (float *)(buffer + 2 + input->dim_count + size + 2 * input->dim[input->dim_count - 1]); + beta->data = (float *)(buffer + 2 + input->dim_count + size + 3 * input->dim[input->dim_count - 1]); + reference->data = (float *)(buffer + 2 + input->dim_count + size + 4 * input->dim[input->dim_count - 1]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_batch_normalization_CSINN_QUANT_FLOAT32(input, mean, variance, gamma, beta, output, ¶ms, &difference); + test_batch_normalization_CSINN_QUANT_UINT8_ASYM(input, mean, variance, gamma, beta, output, ¶ms, &difference); + test_batch_normalization_CSINN_QUANT_INT8_SYM(input, mean, variance, gamma, beta, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/batch_to_space.c b/tests/validation_layer/batch_to_space.c new file mode 100644 index 00000000..ed4be8e5 --- /dev/null +++ b/tests/validation_layer/batch_to_space.c @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of batch_to_space(laver).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct batch_to_space_params params; + int in_size = 0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; //in_batch + input->dim[1] = buffer[1]; //in_channel + input->dim[2] = buffer[2]; //in_height + input->dim[3] = buffer[3]; //in_width + params.block_size = buffer[4]; + params.crop_top = buffer[5]; + params.crop_bottom = buffer[6]; + params.crop_left = buffer[7]; + params.crop_right = buffer[8]; + + output->dim[0] = input->dim[0] / (params.block_size * params.block_size); + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2] * params.block_size - params.crop_top - params.crop_bottom; + output->dim[3] = input->dim[3] * params.block_size - params.crop_left - params.crop_right; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 9); + reference->data = (float *)(buffer + 9 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + + test_batch_to_space_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_batch_to_space_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_batch_to_space_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/broadcast_to.c b/tests/validation_layer/broadcast_to.c new file mode 100644 index 00000000..5180c575 --- /dev/null +++ b/tests/validation_layer/broadcast_to.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "math_snr.h" +#include "test_utils.h" + +int main(int argc, char **argv) +{ + init_testsuite("Testing function of broadcast_to(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct broadcast_to_params params; + int in_size = 1; + int out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim_count = buffer[0]; + params.shape_count = buffer[1]; + output->dim_count = buffer[1]; + + for (int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[2 + i]; + in_size = in_size * input->dim[i]; + } + + params.shape = (int *)malloc(params.shape_count * sizeof(int)); + + for (int i = 0; i < params.shape_count; i++) { + output->dim[i] = buffer[2 + input->dim_count + i]; + out_size = out_size * output->dim[i]; + params.shape[i] = output->dim[i]; + } + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 2 + input->dim_count + params.shape_count); + reference->data = (float *)(buffer + 2 + input->dim_count + params.shape_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_broadcast_to_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_broadcast_to_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_broadcast_to_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/ceil.c b/tests/validation_layer/ceil.c new file mode 100644 index 00000000..07141e14 --- /dev/null +++ b/tests/validation_layer/ceil.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of ceil(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // height + input->dim[2] = buffer[2]; // width + input->dim[3] = buffer[3]; // channel + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 4); + reference->data = (float *)(buffer + 4 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_ceil_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_ceil_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_ceil_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/clip.c b/tests/validation_layer/clip.c new file mode 100644 index 00000000..b251eecb --- /dev/null +++ b/tests/validation_layer/clip.c @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of clip(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct clip_params params; + int in_size = 0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size; + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + params.min_value = buffer[4]; + params.max_value = buffer[5]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 6); + reference->data = (float *)(buffer + 6 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_clip_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_clip_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_clip_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/concat.cpp b/tests/validation_layer/concat.cpp new file mode 100644 index 00000000..24a5e2e3 --- /dev/null +++ b/tests/validation_layer/concat.cpp @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "csi_utils.h" +#include "math_snr.h" +#include "test_utils.h" +#include "testutil.h" + +int main(int argc, char **argv) +{ + init_testsuite("Testing function of concat(layer).\n"); + int in_size = 1; + int out_size = 1; + int *buffer = read_input_data_f32(argv[1]); + + struct concat_params params; + + params.inputs_count = buffer[4]; + + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *input[params.inputs_count]; + + for (int i = 0; i < params.inputs_count; i++) { + input[i] = csi_alloc_tensor(NULL); + } + + params.axis = buffer[5]; + output->dim_count = 4; + + for (int i = 0; i < output->dim_count; i++) { + if (i == params.axis) { + output->dim[i] = params.inputs_count * buffer[i]; + } else { + output->dim[i] = buffer[i]; + } + out_size *= output->dim[i]; + } + in_size = out_size / params.inputs_count; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + for (int i = 0; i < params.inputs_count; i++) { + input[i]->data = (float *)(buffer + 6 + in_size * i); + input[i]->dim[0] = buffer[0]; // batch + input[i]->dim[1] = buffer[1]; // height + input[i]->dim[2] = buffer[2]; // width + input[i]->dim[3] = buffer[3]; // channel + input[i]->dim_count = 4; + input[i]->dtype = CSINN_DTYPE_FLOAT32; + input[i]->layout = CSINN_LAYOUT_NCHW; + input[i]->is_const = 0; + input[i]->quant_channel = 1; + } + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + reference->data = (float *)(buffer + 6 + in_size * params.inputs_count); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + +#if THEAD_RVV + test_concat_op((struct csi_tensor **)input, output, ¶ms, CSINN_QUANT_FLOAT32, + csi_concat_init, csi_nn_rvv_concat_fp32, &difference); + test_concat_op((struct csi_tensor **)input, output, ¶ms, CSINN_QUANT_FLOAT16, + csi_concat_init, csi_nn_rvv_concat_fp16, &difference); + test_concat_op((struct csi_tensor **)input, output, ¶ms, CSINN_QUANT_INT8_SYM, + csi_concat_init, csi_nn_rvv_concat_int8, &difference); +#else + test_concat_op((struct csi_tensor **)input, output, ¶ms, CSINN_QUANT_FLOAT32, + csi_concat_init, csi_concat, &difference); + test_concat_op((struct csi_tensor **)input, output, ¶ms, CSINN_QUANT_UINT8_ASYM, + csi_concat_init, csi_concat, &difference); + test_concat_op((struct csi_tensor **)input, output, ¶ms, CSINN_QUANT_INT8_SYM, + csi_concat_init, csi_concat, &difference); +#endif + + return done_testing(); +} diff --git a/tests/validation_layer/convolution.cpp b/tests/validation_layer/convolution.cpp new file mode 100644 index 00000000..e383b461 --- /dev/null +++ b/tests/validation_layer/convolution.cpp @@ -0,0 +1,122 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "csi_utils.h" +#include "math_snr.h" +#include "test_utils.h" +#include "testutil.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of convolution(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *kernel = csi_alloc_tensor(NULL); + struct csi_tensor *bias = csi_alloc_tensor(NULL); + struct conv2d_params params; + int in_size, out_size, kernel_size; + + if (argc == 1) { + printf("please assign the input data.\n"); + return 0; + } + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // in_channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + kernel->dim[1] = buffer[1]; + kernel->dim[2] = buffer[6]; + kernel->dim[3] = buffer[7]; + kernel->dim[0] = buffer[12]; + bias->dim[0] = buffer[12]; + output->dim[0] = buffer[0]; // batch + output->dim[1] = buffer[12]; // out_channel + output->dim[2] = buffer[16]; // height + output->dim[3] = buffer[15]; // width + + params.stride_height = buffer[4]; + params.stride_width = buffer[5]; + params.pad_left = buffer[8]; + params.pad_right = buffer[9]; + params.pad_top = buffer[10]; + params.pad_down = buffer[11]; + params.dilation_width = buffer[13]; + params.dilation_height = buffer[14]; + params.base.layout = CSINN_LAYOUT_NCHW; + params.group = 1; + + input->dim_count = 4; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + kernel->dim_count = 4; + kernel->layout = CSINN_LAYOUT_OIHW; + kernel->is_const = 1; + kernel->quant_channel = 1; + + bias->dim_count = 1; + bias->layout = CSINN_LAYOUT_O; + bias->is_const = 1; + bias->quant_channel = 1; + + output->dim_count = 4; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + input->dtype = CSINN_DTYPE_FLOAT32; + kernel->dtype = CSINN_DTYPE_FLOAT32; + bias->dtype = CSINN_DTYPE_FLOAT32; + output->dtype = CSINN_DTYPE_FLOAT32; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + kernel_size = kernel->dim[0] * kernel->dim[1] * kernel->dim[2] * kernel->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 17); + kernel->data = (float *)(buffer + 17 + in_size); + bias->data = (float *)(buffer + 17 + in_size + kernel_size); + reference->data = (float *)(buffer + 17 + in_size + kernel_size + output->dim[1]); + // output->data = malloc(out_size * sizeof(float)); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + + test_conv2d_op(input, output, kernel, bias, ¶ms, CSINN_QUANT_FLOAT32, + csi_conv2d_init, csi_conv2d, &difference); + test_conv2d_op(input, output, kernel, bias, ¶ms, CSINN_QUANT_FLOAT16, + csi_conv2d_init, csi_conv2d, &difference); + + if (params.base.api != CSINN_RVV && params.base.api != CSINN_C908 && params.base.api != CSINN_C906 && params.base.api != CSINN_C910) { + test_conv2d_op(input, output, kernel, bias, ¶ms, CSINN_QUANT_INT8_ASYM, + csi_conv2d_init, csi_conv2d, &difference); + } + + return done_testing(); +} diff --git a/tests/validation_layer/convolution3d.c b/tests/validation_layer/convolution3d.c new file mode 100644 index 00000000..1f77fdc8 --- /dev/null +++ b/tests/validation_layer/convolution3d.c @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "math_snr.h" +#include "test_utils.h" + +int main(int argc, char **argv) +{ + init_testsuite("Testing function of convolution3d(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *kernel = csi_alloc_tensor(NULL); + struct csi_tensor *bias = csi_alloc_tensor(NULL); + struct conv3d_params params; + int in_size, out_size, weight_size, bias_size; + + if (argc == 1) { + printf("please assign the input data.\n"); + return 0; + } + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // in_channel + input->dim[2] = buffer[2]; // in_depth + input->dim[3] = buffer[3]; // in_height + input->dim[4] = buffer[4]; // in_width + + kernel->dim[0] = buffer[5]; // out_channel + kernel->dim[1] = buffer[1]; // in_channel + kernel->dim[2] = buffer[6]; // filter_depth + kernel->dim[3] = buffer[7]; // filter_height + kernel->dim[4] = buffer[8]; // filter_width + + bias->dim[0] = buffer[5]; + + output->dim[0] = buffer[0]; // batch + output->dim[1] = buffer[5]; // out_channel + output->dim[2] = buffer[9]; // out_depth + output->dim[3] = buffer[10]; // out_height + output->dim[4] = buffer[11]; // out_width + + params.stride_depth = buffer[12]; + params.stride_height = buffer[13]; + params.stride_width = buffer[14]; + params.pad_left = buffer[15]; + params.pad_right = buffer[16]; + params.pad_top = buffer[17]; + params.pad_down = buffer[18]; + params.pad_front = buffer[19]; + params.pad_back = buffer[20]; + + params.dilation_depth = buffer[21]; + params.dilation_height = buffer[22]; + params.dilation_width = buffer[23]; + params.base.layout = CSINN_LAYOUT_NCDHW; + params.group = 1; + + input->dim_count = 5; + kernel->dim_count = 5; + bias->dim_count = 1; + output->dim_count = 5; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCDHW; + input->is_const = 0; + input->quant_channel = 1; + + kernel->dtype = CSINN_DTYPE_FLOAT32; + kernel->layout = CSINN_LAYOUT_OIDHW; + kernel->is_const = 1; + kernel->quant_channel = 1; + + bias->dtype = CSINN_DTYPE_FLOAT32; + bias->layout = CSINN_LAYOUT_O; + bias->is_const = 1; + bias->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCDHW; + output->is_const = 0; + output->quant_channel = 1; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3] * input->dim[4]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3] * output->dim[4]; + weight_size = + kernel->dim[0] * kernel->dim[1] * kernel->dim[2] * kernel->dim[3] * kernel->dim[4]; + bias_size = output->dim[1]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 24); + kernel->data = (float *)(buffer + 24 + in_size); + bias->data = (float *)(buffer + 24 + in_size + weight_size); + reference->data = (float *)(buffer + 24 + in_size + weight_size + bias_size); + + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_conv3d_CSINN_QUANT_FLOAT32(input, output, kernel, bias, ¶ms, &difference); + test_conv3d_CSINN_QUANT_UINT8_ASYM(input, output, kernel, bias, ¶ms, &difference); + test_conv3d_CSINN_QUANT_INT8_SYM(input, output, kernel, bias, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/convolution_relu.c b/tests/validation_layer/convolution_relu.c new file mode 100644 index 00000000..8c824fa0 --- /dev/null +++ b/tests/validation_layer/convolution_relu.c @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of convolution relu(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *kernel = csi_alloc_tensor(NULL); + struct csi_tensor *bias = csi_alloc_tensor(NULL); + struct conv2d_params params; + int in_size, out_size, weight_size; + + + if (argc == 1) { + printf("please assign the input data.\n"); + return 0; + } + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // in_channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + kernel->dim[1] = buffer[1]; + kernel->dim[2] = buffer[6]; + kernel->dim[3] = buffer[7]; + kernel->dim[0] = buffer[12]; + bias->dim[0] = buffer[12]; + output->dim[0] = buffer[0]; // batch + output->dim[1] = buffer[12]; // out_channel + output->dim[2] = buffer[16]; // height + output->dim[3] = buffer[15]; // width + + params.stride_height = buffer[4]; + params.stride_width = buffer[5]; + params.pad_left = buffer[8]; + params.pad_right = buffer[9]; + params.pad_top = buffer[10]; + params.pad_down = buffer[11]; + params.dilation_width = buffer[13]; + params.dilation_height = buffer[14]; + params.base.layout = CSINN_LAYOUT_NCHW; + params.group = 1; + + input->dim_count = 4; + kernel->dim_count = 4; + bias->dim_count = 1; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + kernel->dim_count = 4; + kernel->dtype = CSINN_DTYPE_FLOAT32; + kernel->layout = CSINN_LAYOUT_OIHW; + kernel->is_const = 1; + kernel->quant_channel = 1; + + bias->dim_count = 1; + bias->dtype = CSINN_DTYPE_FLOAT32; + bias->layout = CSINN_LAYOUT_O; + bias->is_const = 1; + bias->quant_channel = 1; + + output->dim_count = 4; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + weight_size = output->dim[1] * input->dim[1] * kernel->dim[2] * kernel->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 17); + kernel->data = (float *)(buffer + 17 + in_size); + bias->data = (float *)(buffer + 17 + in_size + weight_size); + reference->data = (float *)(buffer + 17 + in_size + weight_size + output->dim[1]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_conv2d_relu_CSINN_QUANT_FLOAT32(input, output, kernel, bias, ¶ms, &difference); + // test_conv2d_relu_CSINN_QUANT_UINT8_ASYM(input, output, kernel, bias, ¶ms, &difference); + // test_conv2d_relu_CSINN_QUANT_INT8_SYM(input, output, kernel, bias, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/convolution_relu6.c b/tests/validation_layer/convolution_relu6.c new file mode 100644 index 00000000..319fdcf1 --- /dev/null +++ b/tests/validation_layer/convolution_relu6.c @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of convolution relu6(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *kernel = csi_alloc_tensor(NULL); + struct csi_tensor *bias = csi_alloc_tensor(NULL); + struct conv2d_params params; + int in_size, out_size, weight_size; + + + if (argc == 1) { + printf("please assign the input data.\n"); + return 0; + } + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // in_channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + kernel->dim[1] = buffer[1]; + kernel->dim[2] = buffer[6]; + kernel->dim[3] = buffer[7]; + kernel->dim[0] = buffer[12]; + bias->dim[0] = buffer[12]; + output->dim[0] = buffer[0]; // batch + output->dim[1] = buffer[12]; // out_channel + output->dim[2] = buffer[16]; // height + output->dim[3] = buffer[15]; // width + + params.stride_height = buffer[4]; + params.stride_width = buffer[5]; + params.pad_left = buffer[8]; + params.pad_right = buffer[9]; + params.pad_top = buffer[10]; + params.pad_down = buffer[11]; + params.dilation_width = buffer[13]; + params.dilation_height = buffer[14]; + params.base.layout = CSINN_LAYOUT_NCHW; + params.group = 1; + + input->dim_count = 4; + kernel->dim_count = 4; + bias->dim_count = 1; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + kernel->dim_count = 4; + kernel->dtype = CSINN_DTYPE_FLOAT32; + kernel->layout = CSINN_LAYOUT_OIHW; + kernel->is_const = 1; + kernel->quant_channel = 1; + + bias->dim_count = 1; + bias->dtype = CSINN_DTYPE_FLOAT32; + bias->layout = CSINN_LAYOUT_O; + bias->is_const = 1; + bias->quant_channel = 1; + + output->dim_count = 4; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + weight_size = output->dim[1] * input->dim[1] * kernel->dim[2] * kernel->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 17); + kernel->data = (float *)(buffer + 17 + in_size); + bias->data = (float *)(buffer + 17 + in_size + weight_size); + reference->data = (float *)(buffer + 17 + in_size + weight_size + output->dim[1]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_conv2d_relu6_CSINN_QUANT_FLOAT32(input, output, kernel, bias, ¶ms, &difference); + test_conv2d_relu6_CSINN_QUANT_UINT8_ASYM(input, output, kernel, bias, ¶ms, &difference); + test_conv2d_relu6_CSINN_QUANT_INT8_SYM(input, output, kernel, bias, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/cos.c b/tests/validation_layer/cos.c new file mode 100644 index 00000000..0b42fdfc --- /dev/null +++ b/tests/validation_layer/cos.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of cos(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + output->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + output->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 1 + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_cos_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_cos_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_cos_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/cosh.c b/tests/validation_layer/cosh.c new file mode 100644 index 00000000..5da416d2 --- /dev/null +++ b/tests/validation_layer/cosh.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of cosh(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + output->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + output->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 1 + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_cosh_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_cosh_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_cosh_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/cumprod.c b/tests/validation_layer/cumprod.c new file mode 100644 index 00000000..91e84914 --- /dev/null +++ b/tests/validation_layer/cumprod.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of cumprod(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct cumprod_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + params.axis = buffer[4]; + params.exclusive = buffer[5]; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size; + input->dim_count = 4; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dim_count = 4; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + input->dtype = CSINN_DTYPE_FLOAT32; + output->dtype = CSINN_DTYPE_FLOAT32; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 6); + reference->data = (float *)(buffer + 6 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_cumprod_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_cumprod_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_cumprod_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/cumsum.c b/tests/validation_layer/cumsum.c new file mode 100644 index 00000000..35e51d46 --- /dev/null +++ b/tests/validation_layer/cumsum.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of cumsum(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct cumsum_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + params.axis = buffer[4]; + params.exclusive = buffer[5]; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size; + input->dim_count = 4; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dim_count = 4; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + input->dtype = CSINN_DTYPE_FLOAT32; + output->dtype = CSINN_DTYPE_FLOAT32; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 6); + reference->data = (float *)(buffer + 6 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_cumsum_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_cumsum_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_cumsum_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/deconvolution.c b/tests/validation_layer/deconvolution.c new file mode 100644 index 00000000..5cc696ed --- /dev/null +++ b/tests/validation_layer/deconvolution.c @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of deconvolution(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *kernel = csi_alloc_tensor(NULL); + struct csi_tensor *bias = csi_alloc_tensor(NULL); + struct conv2d_params params; + int in_size, out_size, weight_size; + + + if (argc == 1) { + printf("please assign the input data.\n"); + return 0; + } + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // in_channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + kernel->dim[0] = buffer[1]; // i + kernel->dim[1] = buffer[14]; // o + kernel->dim[2] = buffer[6]; // h + kernel->dim[3] = buffer[7]; // w + bias->dim[0] = buffer[14]; + output->dim[0] = buffer[0]; // batch + output->dim[1] = buffer[14]; // out_channel + output->dim[2] = buffer[16]; // height + output->dim[3] = buffer[15]; // width + params.stride_height = buffer[4]; + params.stride_width = buffer[5]; + params.pad_left = buffer[8]; + params.pad_right = buffer[9]; + params.pad_top = buffer[10]; + params.pad_down = buffer[11]; + params.dilation_width = buffer[12]; + params.dilation_height = buffer[13]; + params.base.layout = CSINN_LAYOUT_NCHW; + params.group = 1; + + input->dim_count = 4; + kernel->dim_count = 4; + bias->dim_count = 1; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + kernel->dtype = CSINN_DTYPE_FLOAT32; + kernel->layout = CSINN_LAYOUT_OIHW; + kernel->is_const = 1; + kernel->quant_channel = 1; + bias->dtype = CSINN_DTYPE_FLOAT32; + bias->layout = CSINN_LAYOUT_O; + bias->is_const = 1; + bias->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + weight_size = kernel->dim[0] * kernel->dim[1] * kernel->dim[2] * kernel->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + + input->data = (float *)(buffer + 17); + kernel->data = (float *)(buffer + 17 + in_size); + bias->data = (float *)(buffer + 17 + in_size + weight_size); + reference->data = (float *)(buffer + 17 + in_size + weight_size + output->dim[1]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_deconv2d_CSINN_QUANT_FLOAT32(input, output, kernel, bias, ¶ms, &difference); + test_deconv2d_CSINN_QUANT_UINT8_ASYM(input, output, kernel, bias, ¶ms, &difference); + test_deconv2d_CSINN_QUANT_INT8_SYM(input, output, kernel, bias, ¶ms, &difference); + + + return done_testing(); +} + diff --git a/tests/validation_layer/deconvolution3d.c b/tests/validation_layer/deconvolution3d.c new file mode 100644 index 00000000..6620d02e --- /dev/null +++ b/tests/validation_layer/deconvolution3d.c @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "math_snr.h" +#include "test_utils.h" + +int main(int argc, char **argv) +{ + init_testsuite("Testing function of deconvolution3d(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *kernel = csi_alloc_tensor(NULL); + struct csi_tensor *bias = csi_alloc_tensor(NULL); + struct conv3d_params params; + int in_size, out_size, weight_size, bias_size; + + if (argc == 1) { + printf("please assign the input data.\n"); + return 0; + } + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // in_channel + input->dim[2] = buffer[2]; // in_depth + input->dim[3] = buffer[3]; // in_height + input->dim[4] = buffer[4]; // in_width + + kernel->dim[0] = buffer[1]; // in_channel + kernel->dim[1] = buffer[5]; // out_channel + kernel->dim[2] = buffer[6]; // filter_depth + kernel->dim[3] = buffer[7]; // filter_height + kernel->dim[4] = buffer[8]; // filter_width + + bias->dim[0] = buffer[5]; // out_channel + + output->dim[0] = buffer[0]; // batch + output->dim[1] = buffer[5]; // out_channel + output->dim[2] = buffer[9]; // out_depth + output->dim[3] = buffer[10]; // out_height + output->dim[4] = buffer[11]; // out_width + + params.stride_depth = buffer[12]; + params.stride_height = buffer[13]; + params.stride_width = buffer[14]; + params.pad_left = buffer[15]; + params.pad_right = buffer[16]; + params.pad_top = buffer[17]; + params.pad_down = buffer[18]; + params.pad_front = buffer[19]; + params.pad_back = buffer[20]; + + params.out_pad_depth = buffer[21]; + params.out_pad_height = buffer[22]; + params.out_pad_width = buffer[23]; + + params.dilation_depth = buffer[24]; + params.dilation_height = buffer[25]; + params.dilation_width = buffer[26]; + params.base.layout = CSINN_LAYOUT_NCDHW; + params.group = 1; + + input->dim_count = 5; + kernel->dim_count = 5; + bias->dim_count = 1; + output->dim_count = 5; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCDHW; + input->is_const = 0; + input->quant_channel = 1; + + kernel->dtype = CSINN_DTYPE_FLOAT32; + kernel->layout = CSINN_LAYOUT_OIDHW; + kernel->is_const = 1; + kernel->quant_channel = 1; + + bias->dtype = CSINN_DTYPE_FLOAT32; + bias->layout = CSINN_LAYOUT_O; + bias->is_const = 1; + bias->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCDHW; + output->is_const = 0; + output->quant_channel = 1; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3] * input->dim[4]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3] * output->dim[4]; + weight_size = + kernel->dim[0] * kernel->dim[1] * kernel->dim[2] * kernel->dim[3] * kernel->dim[4]; + bias_size = bias->dim[0]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 27); + kernel->data = (float *)(buffer + 27 + in_size); + bias->data = (float *)(buffer + 27 + in_size + weight_size); + reference->data = (float *)(buffer + 27 + in_size + weight_size + bias_size); + + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_deconv3d_CSINN_QUANT_FLOAT32(input, output, kernel, bias, ¶ms, &difference); + test_deconv3d_CSINN_QUANT_UINT8_ASYM(input, output, kernel, bias, ¶ms, &difference); + test_deconv3d_CSINN_QUANT_INT8_SYM(input, output, kernel, bias, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/depth_to_space.c b/tests/validation_layer/depth_to_space.c new file mode 100644 index 00000000..8f21bb09 --- /dev/null +++ b/tests/validation_layer/depth_to_space.c @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of depth_to_space(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct depth_to_space_params params; + int in_size = 0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; //batch + input->dim[1] = buffer[1]; //in_channel + input->dim[2] = buffer[2]; //in_height + input->dim[3] = buffer[3]; //in_width + + params.block_size = buffer[4]; + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1] / (params.block_size * params.block_size); + output->dim[2] = input->dim[2] * params.block_size; + output->dim[3] = input->dim[3] * params.block_size; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 5); + reference->data = (float *)(buffer + 5 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_depth_to_space_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_depth_to_space_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_depth_to_space_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/depthwise_convolution.cpp b/tests/validation_layer/depthwise_convolution.cpp new file mode 100644 index 00000000..299fa5e0 --- /dev/null +++ b/tests/validation_layer/depthwise_convolution.cpp @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "csi_utils.h" +#include "math_snr.h" +#include "test_utils.h" +#include "testutil.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of depthwise convolution(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *kernel = csi_alloc_tensor(NULL); + struct csi_tensor *bias = csi_alloc_tensor(NULL); + struct conv2d_params params; + int in_size, out_size, weight_size; + + if (argc == 1) { + printf("please assign the input data.\n"); + return 0; + } + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // in_channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + + kernel->dim[0] = buffer[1]; + kernel->dim[1] = 1; + kernel->dim[2] = buffer[6]; + kernel->dim[3] = buffer[7]; + + bias->dim[0] = buffer[12]; + + output->dim[0] = buffer[0]; // batch + output->dim[1] = buffer[12]; // out_channel + output->dim[2] = buffer[15]; // height + output->dim[3] = buffer[16]; // width + + params.stride_height = buffer[4]; + params.stride_width = buffer[5]; + params.pad_left = buffer[8]; + params.pad_right = buffer[9]; + params.pad_top = buffer[10]; + params.pad_down = buffer[11]; + params.dilation_width = buffer[14]; + params.dilation_height = buffer[13]; + params.base.layout = CSINN_LAYOUT_NCHW; + params.group = buffer[1]; + + + input->dim_count = 4; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + kernel->dim_count = 4; + kernel->layout = CSINN_LAYOUT_OIHW; + kernel->is_const = 1; + kernel->quant_channel = 1; + bias->dim_count = 1; + bias->layout = CSINN_LAYOUT_O; + bias->is_const = 1; + bias->quant_channel = 1; + output->dim_count = 4; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + input->dtype = CSINN_DTYPE_FLOAT32; + kernel->dtype = CSINN_DTYPE_FLOAT32; + bias->dtype = CSINN_DTYPE_FLOAT32; + output->dtype = CSINN_DTYPE_FLOAT32; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + weight_size = kernel->dim[3] * kernel->dim[2] * kernel->dim[1] * kernel->dim[0]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 17); + kernel->data = (float *)(buffer + 17 + in_size); + bias->data = (float *)(buffer + 17 + in_size + weight_size); + reference->data = (float *)(buffer + 17 + in_size + weight_size + output->dim[1]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_conv2d_op(input, output, kernel, bias, ¶ms, CSINN_QUANT_FLOAT32, + csi_conv2d_init, csi_conv2d, &difference); + test_conv2d_op(input, output, kernel, bias, ¶ms, CSINN_QUANT_FLOAT16, + csi_conv2d_init, csi_conv2d, &difference); + test_conv2d_op(input, output, kernel, bias, ¶ms, CSINN_QUANT_INT8_SYM, + csi_conv2d_init, csi_conv2d, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/depthwise_convolution_relu.c b/tests/validation_layer/depthwise_convolution_relu.c new file mode 100644 index 00000000..033b9daa --- /dev/null +++ b/tests/validation_layer/depthwise_convolution_relu.c @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of depthwise convolution relu(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *kernel = csi_alloc_tensor(NULL); + struct csi_tensor *bias = csi_alloc_tensor(NULL); + struct conv2d_params params; + int in_size, out_size, weight_size; + + + if (argc == 1) { + printf("please assign the input data.\n"); + return 0; + } + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // in_channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + + kernel->dim[0] = buffer[1]; + kernel->dim[1] = 1; + kernel->dim[2] = buffer[6]; + kernel->dim[3] = buffer[7]; + + bias->dim[0] = buffer[12]; + + output->dim[0] = buffer[0]; // batch + output->dim[1] = buffer[12]; // out_channel + output->dim[2] = buffer[15]; // height + output->dim[3] = buffer[16]; // width + + params.stride_height = buffer[4]; + params.stride_width = buffer[5]; + params.pad_left = buffer[8]; + params.pad_right = buffer[9]; + params.pad_top = buffer[10]; + params.pad_down = buffer[11]; + params.dilation_width = buffer[14]; + params.dilation_height = buffer[13]; + params.base.layout = CSINN_LAYOUT_NCHW; + params.group = buffer[1]; + + + input->dim_count = 4; + kernel->dim_count = 4; + bias->dim_count = 1; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + kernel->dtype = CSINN_DTYPE_FLOAT32; + kernel->layout = CSINN_LAYOUT_OIHW; + kernel->is_const = 1; + kernel->quant_channel = 1; + + bias->dtype = CSINN_DTYPE_FLOAT32; + bias->layout = CSINN_LAYOUT_O; + bias->is_const = 0; + bias->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + weight_size = kernel->dim[3] * kernel->dim[2] * kernel->dim[1] * kernel->dim[0]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 17); + kernel->data = (float *)(buffer + 17 + in_size); + bias->data = (float *)(buffer + 17 + in_size + weight_size); + reference->data = (float *)(buffer + 17 + in_size + weight_size + output->dim[1]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.90; + + test_conv2d_relu_CSINN_QUANT_FLOAT32(input, output, kernel, bias, ¶ms, &difference); + // test_conv2d_relu_CSINN_QUANT_UINT8_ASYM(input, output, kernel, bias, ¶ms, &difference); + // test_conv2d_relu_CSINN_QUANT_INT8_SYM(input, output, kernel, bias, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/depthwise_convolution_relu6.c b/tests/validation_layer/depthwise_convolution_relu6.c new file mode 100644 index 00000000..fe108643 --- /dev/null +++ b/tests/validation_layer/depthwise_convolution_relu6.c @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of depthwise convolution relu6(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *kernel = csi_alloc_tensor(NULL); + struct csi_tensor *bias = csi_alloc_tensor(NULL); + struct conv2d_params params; + int in_size, out_size, weight_size; + + + if (argc == 1) { + printf("please assign the input data.\n"); + return 0; + } + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // in_channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + + kernel->dim[0] = buffer[1]; + kernel->dim[1] = 1; + kernel->dim[2] = buffer[6]; + kernel->dim[3] = buffer[7]; + + bias->dim[0] = buffer[12]; + + output->dim[0] = buffer[0]; // batch + output->dim[1] = buffer[12]; // out_channel + output->dim[2] = buffer[15]; // height + output->dim[3] = buffer[16]; // width + + params.stride_height = buffer[4]; + params.stride_width = buffer[5]; + params.pad_left = buffer[8]; + params.pad_right = buffer[9]; + params.pad_top = buffer[10]; + params.pad_down = buffer[11]; + params.dilation_width = buffer[14]; + params.dilation_height = buffer[13]; + params.base.layout = CSINN_LAYOUT_NCHW; + params.group = buffer[1]; + + + input->dim_count = 4; + kernel->dim_count = 4; + bias->dim_count = 1; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + kernel->dtype = CSINN_DTYPE_FLOAT32; + kernel->layout = CSINN_LAYOUT_OIHW; + kernel->is_const = 1; + kernel->quant_channel = 1; + + bias->dtype = CSINN_DTYPE_FLOAT32; + bias->layout = CSINN_LAYOUT_O; + bias->is_const = 0; + bias->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + weight_size = kernel->dim[3] * kernel->dim[2] * kernel->dim[1] * kernel->dim[0]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 17); + kernel->data = (float *)(buffer + 17 + in_size); + bias->data = (float *)(buffer + 17 + in_size + weight_size); + reference->data = (float *)(buffer + 17 + in_size + weight_size + output->dim[1]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_conv2d_relu6_CSINN_QUANT_FLOAT32(input, output, kernel, bias, ¶ms, &difference); + test_conv2d_relu6_CSINN_QUANT_UINT8_ASYM(input, output, kernel, bias, ¶ms, &difference); + test_conv2d_relu6_CSINN_QUANT_INT8_SYM(input, output, kernel, bias, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/depthwise_deconvolution.c b/tests/validation_layer/depthwise_deconvolution.c new file mode 100644 index 00000000..87b54af8 --- /dev/null +++ b/tests/validation_layer/depthwise_deconvolution.c @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of depthwise deconvolution(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *kernel = csi_alloc_tensor(NULL); + struct csi_tensor *bias = csi_alloc_tensor(NULL); + struct conv2d_params params; + int in_size, out_size, weight_size; + + if (argc == 1) { + printf("please assign the input data.\n"); + return 0; + } + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // in_channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + + kernel->dim[0] = buffer[1]; + kernel->dim[1] = 1; + kernel->dim[2] = buffer[6]; + kernel->dim[3] = buffer[7]; + + bias->dim[0] = buffer[12]; + + output->dim[0] = buffer[0]; // batch + output->dim[1] = buffer[12]; // out_channel + output->dim[2] = buffer[15]; // height + output->dim[3] = buffer[16]; // width + + params.stride_height = buffer[4]; + params.stride_width = buffer[5]; + params.pad_left = buffer[8]; + params.pad_right = buffer[9]; + params.pad_top = buffer[10]; + params.pad_down = buffer[11]; + params.dilation_width = buffer[14]; + params.dilation_height = buffer[13]; + params.base.layout = CSINN_LAYOUT_NCHW; + params.group = buffer[1]; + + + input->dim_count = 4; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + kernel->dim_count = 4; + kernel->layout = CSINN_LAYOUT_OIHW; + kernel->is_const = 1; + kernel->quant_channel = 1; + bias->dim_count = 1; + bias->layout = CSINN_LAYOUT_O; + bias->is_const = 1; + bias->quant_channel = 1; + output->dim_count = 4; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + input->dtype = CSINN_DTYPE_FLOAT32; + kernel->dtype = CSINN_DTYPE_FLOAT32; + bias->dtype = CSINN_DTYPE_FLOAT32; + output->dtype = CSINN_DTYPE_FLOAT32; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + weight_size = kernel->dim[3] * kernel->dim[2] * kernel->dim[1] * kernel->dim[0]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 17); + kernel->data = (float *)(buffer + 17 + in_size); + bias->data = (float *)(buffer + 17 + in_size + weight_size); + reference->data = (float *)(buffer + 17 + in_size + weight_size + output->dim[1]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_deconv2d_CSINN_QUANT_FLOAT32(input, output, kernel, bias, ¶ms, &difference); + test_deconv2d_CSINN_QUANT_UINT8_ASYM(input, output, kernel, bias, ¶ms, &difference); + test_deconv2d_CSINN_QUANT_INT8_SYM(input, output, kernel, bias, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/div.c b/tests/validation_layer/div.c new file mode 100644 index 00000000..6cff02b9 --- /dev/null +++ b/tests/validation_layer/div.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of div(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + int flag = buffer[4]; + input1->dim[0] = input0->dim[0] = buffer[0]; // batch + input1->dim[1] = input0->dim[1] = buffer[1]; // height + input1->dim[2] = input0->dim[2] = buffer[2]; // width + input1->dim[3] = input0->dim[3] = buffer[3]; // channel + + output->dim[0] = input0->dim[0]; + output->dim[1] = input0->dim[1]; + output->dim[2] = input0->dim[2]; + output->dim[3] = input0->dim[3]; + + in_size = input0->dim[0] * input0->dim[1] * input0->dim[2] * input0->dim[3]; + out_size = in_size; + input0->dim_count = 4; + input1->dim_count = 4; + output->dim_count = 4; + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 4); + input1->data = (float *)(buffer + 4 + in_size); + reference->data = (float *)(buffer + 4 + 2 * in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_div_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_div_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_div_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/elu.c b/tests/validation_layer/elu.c new file mode 100644 index 00000000..80e288ee --- /dev/null +++ b/tests/validation_layer/elu.c @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of elu(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct relu_params params; + int in_size = 0; + int out_size =0; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 4); + reference->data = (float *)(buffer + 4 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_elu_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_elu_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_elu_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/equal.c b/tests/validation_layer/equal.c new file mode 100644 index 00000000..bb480fd6 --- /dev/null +++ b/tests/validation_layer/equal.c @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of equal(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size = 1; + int out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input0->dim_count = input1->dim_count = buffer[0]; + output->dim_count = input0->dim_count; + + for (int i = 0; i < input0->dim_count; i++ ) { + input0->dim[i] = buffer[1 + i]; + input1->dim[i] = input0->dim[i]; + output->dim[i] = input0->dim[i]; + in_size = in_size * input0->dim[i]; + } + + out_size = in_size; + + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 1 + input0->dim_count); + input1->data = (float *)(buffer + 1 + input0->dim_count + in_size); + reference->data = (float *)(buffer + 1 + input0->dim_count + 2 * in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_equal_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_equal_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_equal_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/erf.c b/tests/validation_layer/erf.c new file mode 100644 index 00000000..a5b20c4d --- /dev/null +++ b/tests/validation_layer/erf.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of erf(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + output->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + output->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 1 + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_erf_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_erf_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_erf_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/exp.c b/tests/validation_layer/exp.c new file mode 100644 index 00000000..095b88d6 --- /dev/null +++ b/tests/validation_layer/exp.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of exp(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + output->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + output->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 1 + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_exp_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_exp_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_exp_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/expand_dims.c b/tests/validation_layer/expand_dims.c new file mode 100644 index 00000000..db8f17f5 --- /dev/null +++ b/tests/validation_layer/expand_dims.c @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of expand_dims(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct expand_dims_params params; + int in_size = 1; + int out_size = 1; + int *buffer = read_input_data_f32(argv[1]); + + int dim_count = buffer[0]; + int axis = buffer[1]; + for(int i = 0; i < dim_count; i++) { + input->dim[i] = buffer[2 + i]; + in_size *= input->dim[i]; + } + input->dim_count = dim_count; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dim_count = input->dim_count + 1; // axis is 0-D scalar + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + for(int i = 0; i < output->dim_count; i++) { + if(i < axis) { + output->dim[i] = input->dim[i]; + } else if(i == axis) { + output->dim[i] = 1; + } else { + output->dim[i] = input->dim[i - 1]; + } + } + + input->dtype = CSINN_DTYPE_FLOAT32; + output->dtype = CSINN_DTYPE_FLOAT32; + out_size = in_size; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 2 + dim_count); + reference->data = (float *)(buffer + 2 + dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_expand_dims_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_expand_dims_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_expand_dims_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/expm1.c b/tests/validation_layer/expm1.c new file mode 100644 index 00000000..89b0a01c --- /dev/null +++ b/tests/validation_layer/expm1.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of expm1(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + output->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + output->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 1 + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_expm1_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_expm1_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_expm1_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/flatten.c b/tests/validation_layer/flatten.c new file mode 100644 index 00000000..1718e603 --- /dev/null +++ b/tests/validation_layer/flatten.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of flatten(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct flatten_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + in_size *= input->dim[i]; + } + output->dim[0] = in_size; + output->dim_count = 1; + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 1 + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_flatten_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_flatten_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_flatten_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/floor.c b/tests/validation_layer/floor.c new file mode 100644 index 00000000..c74c398c --- /dev/null +++ b/tests/validation_layer/floor.c @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of floor(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // height + input->dim[2] = buffer[2]; // width + input->dim[3] = buffer[3]; // channel + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 4); + reference->data = (float *)(buffer + 4 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_floor_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_floor_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_floor_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/floor_div.c b/tests/validation_layer/floor_div.c new file mode 100644 index 00000000..d79e1f9b --- /dev/null +++ b/tests/validation_layer/floor_div.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of floor_divide(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + int flag = buffer[4]; + input1->dim[0] = input0->dim[0] = buffer[0]; // batch + input1->dim[1] = input0->dim[1] = buffer[1]; // height + input1->dim[2] = input0->dim[2] = buffer[2]; // width + input1->dim[3] = input0->dim[3] = buffer[3]; // channel + + output->dim[0] = input0->dim[0]; + output->dim[1] = input0->dim[1]; + output->dim[2] = input0->dim[2]; + output->dim[3] = input0->dim[3]; + + in_size = input0->dim[0] * input0->dim[1] * input0->dim[2] * input0->dim[3]; + out_size = in_size; + input0->dim_count = 4; + input1->dim_count = 4; + output->dim_count = 4; + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 4); + input1->data = (float *)(buffer + 4 + in_size); + reference->data = (float *)(buffer + 4 + 2 * in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_floor_divide_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_floor_divide_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_floor_divide_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/floor_mod.c b/tests/validation_layer/floor_mod.c new file mode 100644 index 00000000..75ae5486 --- /dev/null +++ b/tests/validation_layer/floor_mod.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of floor_mod(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + int flag = buffer[4]; + input1->dim[0] = input0->dim[0] = buffer[0]; // batch + input1->dim[1] = input0->dim[1] = buffer[1]; // height + input1->dim[2] = input0->dim[2] = buffer[2]; // width + input1->dim[3] = input0->dim[3] = buffer[3]; // channel + + output->dim[0] = input0->dim[0]; + output->dim[1] = input0->dim[1]; + output->dim[2] = input0->dim[2]; + output->dim[3] = input0->dim[3]; + + in_size = input0->dim[0] * input0->dim[1] * input0->dim[2] * input0->dim[3]; + out_size = in_size; + input0->dim_count = 4; + input1->dim_count = 4; + output->dim_count = 4; + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 4); + input1->data = (float *)(buffer + 4 + in_size); + reference->data = (float *)(buffer + 4 + 2 * in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_floor_mod_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_floor_mod_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_floor_mod_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/fullyconnected.cpp b/tests/validation_layer/fullyconnected.cpp new file mode 100644 index 00000000..7f621f17 --- /dev/null +++ b/tests/validation_layer/fullyconnected.cpp @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "csi_utils.h" +#include "math_snr.h" +#include "test_utils.h" +#include "testutil.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of fullyconnected(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *weight = csi_alloc_tensor(NULL); + struct csi_tensor *bias = csi_alloc_tensor(NULL); + struct fc_params params; + int in_size0, in_size1, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // in_size + weight->dim[0] = buffer[2]; // out_size + weight->dim[1] = buffer[1]; // in_size + bias->dim[0] = buffer[2]; + output->dim[0] = buffer[0]; + output->dim[1] = buffer[2]; + input->dim_count = 2; + weight->dim_count = 2; + bias->dim_count = 1; + output->dim_count = 2; + in_size0 = input->dim[0] * input->dim[1]; + in_size1 = weight->dim[0] * weight->dim[1]; + out_size = output->dim[0] * output->dim[1]; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NC; + input->is_const = 0; + input->quant_channel = 1; + weight->dtype = CSINN_DTYPE_FLOAT32; + weight->layout = CSINN_LAYOUT_OI; + weight->is_const = 1; + weight->quant_channel = 1; + + bias->dtype = CSINN_DTYPE_FLOAT32; + bias->layout = CSINN_LAYOUT_O; + bias->is_const = 1; + bias->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NC; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 3); + weight->data = (float *)(buffer + 3 + in_size0); + bias->data = (float *)(buffer + 3 + in_size0 + in_size1); + reference->data = (float *)(buffer + 3 + in_size0 + in_size1 + buffer[2]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + +#if THEAD_RVV + test_conv2d_op(input, output, weight, bias, ¶ms, CSINN_QUANT_FLOAT32, csi_fullyconnected_init, + csi_nn_rvv_fullyconnected_packn_fp32, &difference); + test_conv2d_op(input, output, weight, bias, ¶ms, CSINN_QUANT_FLOAT16, csi_fullyconnected_init, + csi_nn_rvv_fullyconnected_packn_fp16, &difference); + test_conv2d_op(input, output, weight, bias, ¶ms, CSINN_QUANT_INT8_SYM, csi_fullyconnected_init, + csi_nn_rvv_fullyconnected_packn_int8, &difference); +#else + test_conv2d_op(input, output, weight, bias, ¶ms, CSINN_QUANT_FLOAT32, + csi_fullyconnected_init, csi_fullyconnected, &difference); + test_conv2d_op(input, output, weight, bias, ¶ms, CSINN_QUANT_FLOAT16, + csi_fullyconnected_init, csi_fullyconnected, &difference); + test_conv2d_op(input, output, weight, bias, ¶ms, CSINN_QUANT_INT8_SYM, + csi_fullyconnected_init, csi_fullyconnected, &difference); +#endif + + return done_testing(); +} diff --git a/tests/validation_layer/gather.c b/tests/validation_layer/gather.c new file mode 100644 index 00000000..4d0bef15 --- /dev/null +++ b/tests/validation_layer/gather.c @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of gather(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *indices = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct gather_params params; + int in_size = 1, indices_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + int axis = buffer[0]; + input->dim_count = buffer[1]; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 2]; + in_size *= input->dim[i]; + } + + indices->dim_count = buffer[2 + input->dim_count]; + for(int i = 0; i < indices->dim_count; i++) { + indices->dim[i] = buffer[3 + input->dim_count + i]; + indices_size *= indices->dim[i]; + } + + output->dim_count = input->dim_count + indices->dim_count - 1; + int j = 0; + for (int i = 0; i < axis; i++) { + output->dim[j] = input->dim[i]; + out_size *= output->dim[j]; + j++; + } + for (int i = 0; i < indices->dim_count; i++) { + output->dim[j] = indices->dim[i]; + out_size *= output->dim[j]; + j++; + } + for (int i = axis + 1; i < input->dim_count; i++) { + output->dim[j] = input->dim[i]; + out_size *= output->dim[j]; + j++; + } + + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + indices->dtype = CSINN_DTYPE_FLOAT32; + indices->layout = CSINN_LAYOUT_NCHW; + indices->is_const = 0; + indices->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + params.axis = axis; + + input->data = (float *)(buffer + 3 + input->dim_count + indices->dim_count); + indices->data = (int32_t *)(buffer + 3 + input->dim_count + indices->dim_count + in_size); + reference->data = (float *)(buffer + 3 + input->dim_count + indices->dim_count + in_size + indices_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_gather_CSINN_QUANT_FLOAT32(input, indices, output, ¶ms, &difference); + test_gather_CSINN_QUANT_UINT8_ASYM(input, indices, output, ¶ms, &difference); + test_gather_CSINN_QUANT_INT8_SYM(input, indices, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/gather_nd.c b/tests/validation_layer/gather_nd.c new file mode 100644 index 00000000..27db94d7 --- /dev/null +++ b/tests/validation_layer/gather_nd.c @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of gather_nd(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *indices = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct gather_nd_params params; + int in_size = 1, out_size = 1, indices_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + output->dim_count = 0; // init output->dim_count = 0 + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + in_size *= input->dim[i]; + } + indices->dim_count = buffer[1 + input->dim_count]; + for(int i = 0; i < indices->dim_count; i++) { + indices->dim[i] = buffer[i + 2 + input->dim_count]; + indices_size *= indices->dim[i]; + if(i < indices->dim_count - 1) { + output->dim_count++; + output->dim[i] = indices->dim[i]; + } + } + + int axis = indices->dim[indices->dim_count - 1]; + + int indices_outer_size = 1; + indices_outer_size = indices_size / indices->dim[indices->dim_count - 1]; + + int input_inner_size = 1; + for(int i = axis; i < input->dim_count; i++) { + input_inner_size *= input->dim[i]; + output->dim[output->dim_count] = input->dim[i]; + output->dim_count++; + } + + out_size = indices_outer_size * input_inner_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + indices->dtype = CSINN_DTYPE_INT32; + indices->layout = CSINN_LAYOUT_NCHW; + indices->is_const = 0; + indices->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + indices->data = (uint32_t *)(buffer + 2 + input->dim_count + indices->dim_count); + input->data = (float *)(buffer + 2 + input->dim_count + indices->dim_count + indices_size); + reference->data = (float *)(buffer + 2 + input->dim_count + indices->dim_count + indices_size + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_gather_nd_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_gather_nd_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_gather_nd_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/global_avgpool.cpp b/tests/validation_layer/global_avgpool.cpp new file mode 100644 index 00000000..b51f4cb6 --- /dev/null +++ b/tests/validation_layer/global_avgpool.cpp @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "csi_utils.h" +#include "math_snr.h" +#include "test_utils.h" +#include "testutil.h" + +int main(int argc, char **argv) +{ + init_testsuite("Testing function of global avgpool(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct pool_params params; + int in_size = 0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // in_channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + + output->dim[0] = buffer[0]; // batch + output->dim[1] = buffer[1]; // in_channel + output->dim[2] = buffer[4]; // out_height + output->dim[3] = buffer[5]; // out_width + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + params.base.layout = CSINN_LAYOUT_NCHW; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 6); + reference->data = (float *)(buffer + 6 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + +#if THEAD_RVV + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT32, csi_global_avgpool2d_init, + csi_nn_rvv_global_avgpool2d_fp32, &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT16, csi_global_avgpool2d_init, + csi_nn_rvv_global_avgpool2d_fp16, &difference); +#else + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT32, csi_global_avgpool2d_init, + csi_global_avgpool2d, &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_UINT8_ASYM, csi_global_avgpool2d_init, + csi_global_avgpool2d, &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_INT8_SYM, csi_global_avgpool2d_init, + csi_global_avgpool2d, &difference); +#endif + + return done_testing(); +} diff --git a/tests/validation_layer/global_maxpool.cpp b/tests/validation_layer/global_maxpool.cpp new file mode 100644 index 00000000..193463dc --- /dev/null +++ b/tests/validation_layer/global_maxpool.cpp @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "csi_utils.h" +#include "math_snr.h" +#include "test_utils.h" +#include "testutil.h" + +int main(int argc, char **argv) +{ + init_testsuite("Testing function of global maxpool(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct pool_params params; + int in_size = 0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // in_channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + + output->dim[0] = buffer[0]; // batch + output->dim[1] = buffer[1]; // in_channel + output->dim[2] = buffer[4]; // out_height + output->dim[3] = buffer[5]; // out_width + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + params.base.layout = CSINN_LAYOUT_NCHW; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 6); + reference->data = (float *)(buffer + 6 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + +#if THEAD_RVV + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT32, csi_global_maxpool2d_init, + csi_nn_rvv_global_maxpool2d_fp32, &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT16, csi_global_maxpool2d_init, + csi_nn_rvv_global_maxpool2d_fp16, &difference); +#else + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT32, csi_global_maxpool2d_init, + csi_global_maxpool2d, &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_UINT8_ASYM, csi_global_maxpool2d_init, + csi_global_maxpool2d, &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_INT8_SYM, csi_global_maxpool2d_init, + csi_global_maxpool2d, &difference); +#endif + + return done_testing(); +} diff --git a/tests/validation_layer/greater.c b/tests/validation_layer/greater.c new file mode 100644 index 00000000..e1cbbac9 --- /dev/null +++ b/tests/validation_layer/greater.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of greater(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + int flag = buffer[4]; + input1->dim[0] = input0->dim[0] = buffer[0]; // batch + input1->dim[1] = input0->dim[1] = buffer[1]; // height + input1->dim[2] = input0->dim[2] = buffer[2]; // width + input1->dim[3] = input0->dim[3] = buffer[3]; // channel + + output->dim[0] = input0->dim[0]; + output->dim[1] = input0->dim[1]; + output->dim[2] = input0->dim[2]; + output->dim[3] = input0->dim[3]; + + in_size = input0->dim[0] * input0->dim[1] * input0->dim[2] * input0->dim[3]; + out_size = in_size; + input0->dim_count = 4; + input1->dim_count = 4; + output->dim_count = 4; + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 4); + input1->data = (float *)(buffer + 4 + in_size); + reference->data = (float *)(buffer + 4 + 2 * in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_greater_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_greater_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_greater_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/greater_equal.c b/tests/validation_layer/greater_equal.c new file mode 100644 index 00000000..15abf322 --- /dev/null +++ b/tests/validation_layer/greater_equal.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of greater_equal(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + int flag = buffer[4]; + input1->dim[0] = input0->dim[0] = buffer[0]; // batch + input1->dim[1] = input0->dim[1] = buffer[1]; // height + input1->dim[2] = input0->dim[2] = buffer[2]; // width + input1->dim[3] = input0->dim[3] = buffer[3]; // channel + + output->dim[0] = input0->dim[0]; + output->dim[1] = input0->dim[1]; + output->dim[2] = input0->dim[2]; + output->dim[3] = input0->dim[3]; + + in_size = input0->dim[0] * input0->dim[1] * input0->dim[2] * input0->dim[3]; + out_size = in_size; + input0->dim_count = 4; + input1->dim_count = 4; + output->dim_count = 4; + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 4); + input1->data = (float *)(buffer + 4 + in_size); + reference->data = (float *)(buffer + 4 + 2 * in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_greater_equal_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_greater_equal_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_greater_equal_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/group_convolution.cpp b/tests/validation_layer/group_convolution.cpp new file mode 100644 index 00000000..270cb106 --- /dev/null +++ b/tests/validation_layer/group_convolution.cpp @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "csi_utils.h" +#include "math_snr.h" +#include "test_utils.h" +#include "testutil.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of group convolution(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *kernel = csi_alloc_tensor(NULL); + struct csi_tensor *bias = csi_alloc_tensor(NULL); + struct conv2d_params params; + int in_size, out_size, weight_size; + + + if (argc == 1) { + printf("please assign the input data.\n"); + return 0; + } + + int *buffer = read_input_data_f32(argv[1]); + int group = buffer[17]; + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // in_channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + + kernel->dim[0] = buffer[12]; + kernel->dim[1] = buffer[1] / group; + kernel->dim[2] = buffer[6]; + kernel->dim[3] = buffer[7]; + + bias->dim[0] = buffer[12]; + + output->dim[0] = buffer[0]; // batch + output->dim[1] = buffer[12]; // out_channel + output->dim[2] = buffer[16]; // height + output->dim[3] = buffer[15]; // width + + params.stride_height = buffer[4]; + params.stride_width = buffer[5]; + params.pad_left = buffer[8]; + params.pad_right = buffer[9]; + params.pad_top = buffer[10]; + params.pad_down = buffer[11]; + params.dilation_width = buffer[13]; + params.dilation_height = buffer[14]; + params.base.layout = CSINN_LAYOUT_NCHW; + params.group = group; + + input->dim_count = 4; + kernel->dim_count = 4; + bias->dim_count = 1; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + kernel->dtype = CSINN_DTYPE_FLOAT32; + kernel->layout = CSINN_LAYOUT_OIHW; + kernel->is_const = 1; + kernel->quant_channel = 1; + + bias->dtype = CSINN_DTYPE_FLOAT32; + bias->layout = CSINN_LAYOUT_O; + bias->is_const = 1; + bias->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + weight_size = kernel->dim[0] * kernel->dim[1] * kernel->dim[2] * kernel->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 18); + kernel->data = (float *)(buffer + 18 + in_size); + bias->data = (float *)(buffer + 18 + in_size + weight_size); + reference->data = (float *)(buffer + 18 + in_size + weight_size + output->dim[1]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_conv2d_op(input, output, kernel, bias, ¶ms, CSINN_QUANT_FLOAT32, + csi_conv2d_init, csi_conv2d, &difference); + test_conv2d_op(input, output, kernel, bias, ¶ms, CSINN_QUANT_FLOAT16, + csi_conv2d_init, csi_conv2d, &difference); + test_conv2d_op(input, output, kernel, bias, ¶ms, CSINN_QUANT_INT8_SYM, + csi_conv2d_init, csi_conv2d, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/group_convolution_relu.c b/tests/validation_layer/group_convolution_relu.c new file mode 100644 index 00000000..af5973b7 --- /dev/null +++ b/tests/validation_layer/group_convolution_relu.c @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of group convolution relu(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *kernel = csi_alloc_tensor(NULL); + struct csi_tensor *bias = csi_alloc_tensor(NULL); + struct conv2d_params params; + int in_size, out_size, weight_size; + + + if (argc == 1) { + printf("please assign the input data.\n"); + return 0; + } + + int *buffer = read_input_data_f32(argv[1]); + int group = buffer[17]; + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[3]; // in_channel + input->dim[2] = buffer[1]; // height + input->dim[3] = buffer[2]; // width + input->dim_count = 4; + kernel->dim[0] = buffer[12]; + kernel->dim[1] = buffer[3] / group; + kernel->dim[2] = buffer[6]; + kernel->dim[3] = buffer[7]; + bias->dim[0] = buffer[12]; + output->dim[0] = buffer[0]; // batch + output->dim[1] = buffer[12]; // out_channel + output->dim[2] = buffer[16]; // height + output->dim[3] = buffer[15]; // width + params.stride_height = buffer[4]; + params.stride_width = buffer[5]; + params.pad_left = buffer[8]; + params.pad_right = buffer[9]; + params.pad_top = buffer[10]; + params.pad_down = buffer[11]; + params.dilation_width = buffer[13]; + params.dilation_height = buffer[14]; + params.base.layout = CSINN_LAYOUT_NCHW; + params.group = group; + + input->dim_count = 4; + kernel->dim_count = 4; + bias->dim_count = 1; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + kernel->dtype = CSINN_DTYPE_FLOAT32; + kernel->layout = CSINN_LAYOUT_OIHW; + kernel->is_const = 1; + kernel->quant_channel = 1; + + bias->dtype = CSINN_DTYPE_FLOAT32; + bias->layout = CSINN_LAYOUT_O; + bias->is_const = 1; + bias->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + weight_size = (output->dim[1] * input->dim[1] * kernel->dim[2] * kernel->dim[3]) / group; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 18); + kernel->data = (float *)(buffer + 18 + in_size); + bias->data = (float *)(buffer + 18 + in_size + weight_size); + reference->data = (float *)(buffer + 18 + in_size + weight_size + output->dim[1]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_conv2d_relu_CSINN_QUANT_FLOAT32(input, output, kernel, bias, ¶ms, &difference); + test_conv2d_relu_CSINN_QUANT_UINT8_ASYM(input, output, kernel, bias, ¶ms, &difference); + test_conv2d_relu_CSINN_QUANT_INT8_SYM(input, output, kernel, bias, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/group_convolution_relu6.c b/tests/validation_layer/group_convolution_relu6.c new file mode 100644 index 00000000..eaa345f2 --- /dev/null +++ b/tests/validation_layer/group_convolution_relu6.c @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of group convolution relu6(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *kernel = csi_alloc_tensor(NULL); + struct csi_tensor *bias = csi_alloc_tensor(NULL); + struct conv2d_params params; + int in_size, out_size, weight_size; + + if (argc == 1) { + printf("please assign the input data.\n"); + return 0; + } + + int *buffer = read_input_data_f32(argv[1]); + int group = buffer[17]; + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[3]; // in_channel + input->dim[2] = buffer[1]; // height + input->dim[3] = buffer[2]; // width + input->dim_count = 4; + kernel->dim[0] = buffer[12]; + kernel->dim[1] = buffer[3] / group; + kernel->dim[2] = buffer[6]; + kernel->dim[3] = buffer[7]; + bias->dim[0] = buffer[12]; + output->dim[0] = buffer[0]; // batch + output->dim[1] = buffer[12]; // out_channel + output->dim[2] = buffer[16]; // height + output->dim[3] = buffer[15]; // width + params.stride_height = buffer[4]; + params.stride_width = buffer[5]; + params.pad_left = buffer[8]; + params.pad_right = buffer[9]; + params.pad_top = buffer[10]; + params.pad_down = buffer[11]; + params.dilation_width = buffer[13]; + params.dilation_height = buffer[14]; + params.base.layout = CSINN_LAYOUT_NCHW; + params.group = group; + + input->dim_count = 4; + kernel->dim_count = 4; + bias->dim_count = 1; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + kernel->dtype = CSINN_DTYPE_FLOAT32; + kernel->layout = CSINN_LAYOUT_OIHW; + kernel->is_const = 1; + kernel->quant_channel = 1; + + bias->dtype = CSINN_DTYPE_FLOAT32; + bias->layout = CSINN_LAYOUT_O; + bias->is_const = 1; + bias->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + weight_size = (output->dim[1] * input->dim[1] * kernel->dim[2] * kernel->dim[3]) / group; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 18); + kernel->data = (float *)(buffer + 18 + in_size); + bias->data = (float *)(buffer + 18 + in_size + weight_size); + reference->data = (float *)(buffer + 18 + in_size + weight_size + output->dim[1]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]):0.99; + + test_conv2d_relu6_CSINN_QUANT_FLOAT32(input, output, kernel, bias, ¶ms, &difference); + test_conv2d_relu6_CSINN_QUANT_UINT8_ASYM(input, output, kernel, bias, ¶ms, &difference); + test_conv2d_relu6_CSINN_QUANT_INT8_SYM(input, output, kernel, bias, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/hard_sigmoid.c b/tests/validation_layer/hard_sigmoid.c new file mode 100644 index 00000000..c2062a38 --- /dev/null +++ b/tests/validation_layer/hard_sigmoid.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of hard_sigmoid(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct sigmoid_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + output->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + output->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 1 + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_hard_sigmoid_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_hard_sigmoid_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_hard_sigmoid_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/im2col.c b/tests/validation_layer/im2col.c new file mode 100644 index 00000000..2cb65ca0 --- /dev/null +++ b/tests/validation_layer/im2col.c @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of im2col(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct im2col_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; //batch + input->dim[1] = buffer[1]; //in_channel + input->dim[2] = buffer[2]; //in_height + input->dim[3] = buffer[3]; //in_width + input->dim_count = 4; + + params.kernel_h = buffer[4]; + params.kernel_w = buffer[5]; + params.stride_h = buffer[6]; + params.stride_w = buffer[7]; + params.pad_left = buffer[8]; + params.pad_right = buffer[9]; + params.pad_top = buffer[10]; + params.pad_down = buffer[11]; + + for(int i = 0; i < input->dim_count; i++) { + in_size *= input->dim[i]; + } + + int out_h = (input->dim[2] + params.pad_top + params.pad_down - params.kernel_h) / params.stride_h + 1; + int out_w = (input->dim[3] + params.pad_left + params.pad_right - params.kernel_w) / params.stride_w + 1; + + output->dim[0] = input->dim[1] * params.kernel_h * params.kernel_w; + output->dim[1] = input->dim[0] * out_h * out_w; + output->dim_count = 2; + + out_size = input->dim[0] * input->dim[1] * params.kernel_h * params.kernel_w * out_h * out_w; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.layout = CSINN_LAYOUT_NCHW; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 12); + reference->data = (float *)(buffer + 12 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_im2col_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_im2col_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_im2col_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} + diff --git a/tests/validation_layer/l2_norm.c b/tests/validation_layer/l2_norm.c new file mode 100644 index 00000000..b4cb46b3 --- /dev/null +++ b/tests/validation_layer/l2_norm.c @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of l2 normalization(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct l2n_params params; + int size = 1; + + int *buffer = read_input_data_f32(argv[1]); + /* get the dim para */ + output->dim_count = input->dim_count = buffer[0]; + params.epsilon = *(float *)&buffer[1]; + int32_t axis[] = {1}; + params.axis = axis; + params.n = 1; + + for (int i = 0; i < input->dim_count; ++i) { + output->dim[i] = input->dim[i] = buffer[2 + i]; + } + + for (int i = 0; i < input->dim_count; ++i) { + size *= input->dim[i]; + } + + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + //params.epsilon = *(float *)&buffer[1 + input->dim_count]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 2 + input->dim_count); + reference->data = (float *)(buffer + 2 + input->dim_count + size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_l2_normalization_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_l2_normalization_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_l2_normalization_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/layer/common.c b/tests/validation_layer/layer/common.c new file mode 100644 index 00000000..cabd0c37 --- /dev/null +++ b/tests/validation_layer/layer/common.c @@ -0,0 +1,236 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "common.h" + +#include +#include + +#include "csi_nn.h" +#include "math_snr.h" +#include "test_utils.h" + +#define LAYER_TEST_DISO(OP, STYPE, SPARAMS) \ + void test_##OP##_##STYPE(struct csi_tensor *input0, struct csi_tensor *input1, \ + struct csi_tensor *output, struct SPARAMS *params, float *difference) \ + { \ + enum csinn_dtype_enum test_dtype = STYPE; \ + enum csinn_api_enum test_api = params->base.api; \ + struct csi_tensor *qinput0 = convert_f32_layer(input0, test_dtype, test_api); \ + struct csi_tensor *qinput1 = convert_f32_layer(input1, test_dtype, test_api); \ + struct csi_tensor *qoutput = convert_f32_layer(output, test_dtype, test_api); \ + if (csi_##OP##_init(qinput0, qinput1, qoutput, params) == CSINN_TRUE) { \ + csi_##OP(qinput0, qinput1, qoutput, params); \ + } \ + struct csi_tensor *foutput = csi_ref_tensor_transform_f32(qoutput); \ + result_verify_f32(output->data, foutput->data, input0->data, *difference, \ + csi_tensor_size(output), false); \ + csi_ref_tensor_transform_free_f32(foutput); \ + } + +#define LAYER_TEST_SEGMENT(OP, STYPE, SPARAMS) \ + void test_##OP##_##STYPE(struct csi_tensor *input0, struct csi_tensor *segment, \ + struct csi_tensor *output, struct SPARAMS *params, float *difference) \ + { \ + enum csinn_dtype_enum test_dtype = STYPE; \ + enum csinn_api_enum test_api = params->base.api; \ + struct csi_tensor *qinput0 = convert_f32_layer(input0, test_dtype, test_api); \ + struct csi_tensor *qoutput = convert_f32_layer(output, test_dtype, test_api); \ + if (csi_##OP##_init(qinput0, segment, qoutput, params) == CSINN_TRUE) { \ + csi_##OP(qinput0, segment, qoutput, params); \ + } \ + struct csi_tensor *foutput = csi_ref_tensor_transform_f32(qoutput); \ + result_verify_f32(output->data, foutput->data, input0->data, *difference, \ + csi_tensor_size(output), false); \ + csi_ref_tensor_transform_free_f32(foutput); \ + } + +#define LAYER_TEST_SISO(OP, STYPE, SPARAMS) \ + void test_##OP##_##STYPE(struct csi_tensor *input, struct csi_tensor *output, \ + struct SPARAMS *params, float *difference) \ + { \ + enum csinn_dtype_enum test_dtype = STYPE; \ + enum csinn_api_enum test_api = params->base.api; \ + struct csi_tensor *qinput = convert_f32_layer(input, test_dtype, test_api); \ + struct csi_tensor *qoutput = convert_f32_layer(output, test_dtype, test_api); \ + if (csi_##OP##_init(qinput, qoutput, params) == CSINN_TRUE) { \ + csi_##OP(qinput, qoutput, params); \ + } \ + struct csi_tensor *foutput = csi_ref_tensor_transform_f32(qoutput); \ + result_verify_f32(output->data, foutput->data, input->data, *difference, \ + csi_tensor_size(output), false); \ + csi_ref_tensor_transform_free_f32(foutput); \ + } + +#define LAYER_TEST_CONCAT(OP, STYPE, SPARAMS) \ + void test_##OP##_##STYPE(struct csi_tensor **input, struct csi_tensor *output, \ + struct SPARAMS *params, float *difference) \ + { \ + enum csinn_dtype_enum test_dtype = STYPE; \ + enum csinn_api_enum test_api = params->base.api; \ + struct csi_tensor *qinput[params->inputs_count]; \ + for (int i = 0; i < params->inputs_count; i++) { \ + qinput[i] = convert_f32_layer(input[i], test_dtype, test_api); \ + } \ + struct csi_tensor *qoutput = convert_f32_layer(output, test_dtype, test_api); \ + if (csi_##OP##_init((struct csi_tensor **)qinput, qoutput, params) == CSINN_TRUE) { \ + csi_##OP((struct csi_tensor **)qinput, qoutput, params); \ + } \ + struct csi_tensor *foutput = csi_ref_tensor_transform_f32(qoutput); \ + result_verify_f32(output->data, foutput->data, input[0]->data, *difference, \ + csi_tensor_size(output), false); \ + csi_ref_tensor_transform_free_f32(foutput); \ + } + +#define LAYER_TEST_SPLIT(OP, STYPE, SPARAMS) \ + void test_##OP##_##STYPE(struct csi_tensor *input, struct csi_tensor **output, \ + struct SPARAMS *params, float *difference) \ + { \ + enum csinn_dtype_enum test_dtype = STYPE; \ + enum csinn_api_enum test_api = params->base.api; \ + struct csi_tensor *qoutput[params->output_num]; \ + int num = params->output_num; \ + struct csi_tensor *qinput = convert_f32_layer(input, test_dtype, test_api); \ + for (int i = 0; i < num; i++) { \ + qoutput[i] = convert_f32_layer(output[i], test_dtype, test_api); \ + } \ + if (csi_##OP##_init(qinput, (struct csi_tensor **)qoutput, params) == CSINN_TRUE) { \ + csi_##OP(qinput, (struct csi_tensor **)qoutput, params); \ + } \ + for (int i = 0; i < num; i++) { \ + struct csi_tensor *foutput = csi_ref_tensor_transform_f32(qoutput[i]); \ + result_verify_f32(output[i]->data, foutput->data, input->data, *difference, \ + csi_tensor_size(output[i]), false); \ + csi_ref_tensor_transform_free_f32(foutput); \ + } \ + } + +#define LAYER_TEST_UNSTACK(OP, STYPE, SPARAMS) \ + void test_##OP##_##STYPE(struct csi_tensor *input, struct csi_tensor **output, \ + struct SPARAMS *params, float *difference) \ + { \ + enum csinn_dtype_enum test_dtype = STYPE; \ + enum csinn_api_enum test_api = params->base.api; \ + struct csi_tensor *qoutput[params->outputs_count]; \ + int num = params->outputs_count; \ + struct csi_tensor *qinput = convert_f32_layer(input, test_dtype, test_api); \ + for (int i = 0; i < num; i++) { \ + qoutput[i] = convert_f32_layer(output[i], test_dtype, test_api); \ + } \ + if (csi_##OP##_init(qinput, (struct csi_tensor **)qoutput, params) == CSINN_TRUE) { \ + csi_##OP(qinput, (struct csi_tensor **)qoutput, params); \ + } \ + for (int i = 0; i < num; i++) { \ + struct csi_tensor *foutput = csi_ref_tensor_transform_f32(qoutput[i]); \ + result_verify_f32(output[i]->data, foutput->data, input->data, *difference, \ + csi_tensor_size(output[i]), false); \ + csi_ref_tensor_transform_free_f32(foutput); \ + } \ + } + +#define LAYER_TEST_CONV2D(OP, STYPE, SPARAMS) \ + void test_##OP##_##STYPE(struct csi_tensor *input, struct csi_tensor *output, \ + struct csi_tensor *kernel, struct csi_tensor *bias, \ + struct SPARAMS *params, float *difference) \ + { \ + enum csinn_dtype_enum test_dtype = STYPE; \ + enum csinn_api_enum test_api = params->base.api; \ + struct csi_tensor *qinput = convert_f32_layer(input, test_dtype, test_api); \ + struct csi_tensor *qoutput = convert_f32_layer(output, test_dtype, test_api); \ + struct csi_tensor *qkernel = convert_f32_layer(kernel, test_dtype, test_api); \ + struct csi_tensor *qbias = convert_f32_layer(bias, test_dtype, test_api); \ + if (csi_##OP##_init(qinput, qoutput, qkernel, qbias, params) == CSINN_TRUE) { \ + csi_##OP(qinput, qoutput, qkernel, qbias, params); \ + } \ + struct csi_tensor *foutput = csi_ref_tensor_transform_f32(qoutput); \ + result_verify_f32(output->data, foutput->data, input->data, *difference, \ + csi_tensor_size(output), false); \ + csi_ref_tensor_transform_free_f32(foutput); \ + } + +#define LAYER_TEST_BATCHNORM(OP, STYPE, SPARAMS) \ + void test_##OP##_##STYPE(struct csi_tensor *input, struct csi_tensor *mean, \ + struct csi_tensor *variance, struct csi_tensor *gamma, \ + struct csi_tensor *beta, struct csi_tensor *output, \ + struct SPARAMS *params, float *difference) \ + { \ + enum csinn_dtype_enum test_dtype = STYPE; \ + enum csinn_api_enum test_api = params->base.api; \ + struct csi_tensor *qinput = convert_f32_layer(input, test_dtype, test_api); \ + struct csi_tensor *qmean = convert_f32_layer(mean, test_dtype, test_api); \ + struct csi_tensor *qvariance = convert_f32_layer(variance, test_dtype, test_api); \ + struct csi_tensor *qgamma = convert_f32_layer(gamma, test_dtype, test_api); \ + struct csi_tensor *qbeta = convert_f32_layer(beta, test_dtype, test_api); \ + struct csi_tensor *qoutput = convert_f32_layer(output, test_dtype, test_api); \ + if (csi_##OP##_init(qinput, qmean, qvariance, qgamma, qbeta, qoutput, params) == \ + CSINN_TRUE) { \ + csi_##OP(qinput, qmean, qvariance, qgamma, qbeta, qoutput, params); \ + } \ + struct csi_tensor *foutput = csi_ref_tensor_transform_f32(qoutput); \ + result_verify_f32(output->data, foutput->data, input->data, *difference, \ + csi_tensor_size(output), false); \ + csi_ref_tensor_transform_free_f32(foutput); \ + } + +#define LAYER_TEST_TISO(OP, STYPE, SPARAMS) \ + void test_##OP##_##STYPE(struct csi_tensor *input0, struct csi_tensor *input1, \ + struct csi_tensor *input2, struct csi_tensor *output, \ + struct SPARAMS *params, float *difference) \ + { \ + enum csinn_dtype_enum test_dtype = STYPE; \ + enum csinn_api_enum test_api = params->base.api; \ + struct csi_tensor *qinput0 = convert_f32_layer(input0, test_dtype, test_api); \ + struct csi_tensor *qinput1 = convert_f32_layer(input1, test_dtype, test_api); \ + struct csi_tensor *qinput2 = convert_f32_layer(input2, test_dtype, test_api); \ + struct csi_tensor *qoutput = convert_f32_layer(output, test_dtype, test_api); \ + if (csi_##OP##_init(qinput0, qinput1, qinput2, qoutput, params) == CSINN_TRUE) { \ + csi_##OP(qinput0, qinput1, qinput2, qoutput, params); \ + } \ + struct csi_tensor *foutput = csi_ref_tensor_transform_f32(qoutput); \ + result_verify_f32(output->data, foutput->data, input1->data, *difference, \ + csi_tensor_size(output), false); \ + csi_ref_tensor_transform_free_f32(foutput); \ + } + +#define LAYER_TEST_ARANGE(OP, STYPE, SPARAMS) \ + void test_##OP##_##STYPE(struct csi_tensor *output, struct SPARAMS *params, float *difference) \ + { \ + enum csinn_dtype_enum test_dtype = STYPE; \ + enum csinn_api_enum test_api = params->base.api; \ + struct csi_tensor *qoutput = convert_f32_layer(output, test_dtype, test_api); \ + if (csi_##OP##_init(qoutput, params) == CSINN_TRUE) { \ + csi_##OP(qoutput, params); \ + } \ + struct csi_tensor *foutput = csi_ref_tensor_transform_f32(qoutput); \ + result_verify_f32(output->data, foutput->data, output->data, *difference, \ + csi_tensor_size(output), false); \ + csi_ref_tensor_transform_free_f32(foutput); \ + } + +LAYER_QUANT_TEST_DISO(LAYER_TEST_DISO) +LAYER_QUANT_TEST_SISO(LAYER_TEST_SISO) +LAYER_QUANT_TEST_BATCHNORM(LAYER_TEST_BATCHNORM) +LAYER_QUANT_TEST_CONCAT(LAYER_TEST_CONCAT) +LAYER_QUANT_TEST_CONV2D(LAYER_TEST_CONV2D) +LAYER_QUANT_TEST_TISO(LAYER_TEST_TISO) +LAYER_QUANT_TEST_SEGMENT(LAYER_TEST_SEGMENT) +LAYER_QUANT_TEST_SPLIT(LAYER_TEST_SPLIT) +LAYER_QUANT_TEST_UNSTACK(LAYER_TEST_UNSTACK) +LAYER_QUANT_TEST_ARANGE(LAYER_TEST_ARANGE) diff --git a/tests/validation_layer/layer/common.h b/tests/validation_layer/layer/common.h new file mode 100644 index 00000000..94fb381f --- /dev/null +++ b/tests/validation_layer/layer/common.h @@ -0,0 +1,444 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include + +#include "csi_nn.h" +#include "math_snr.h" +#include "test_utils.h" + +#define LAYER_QUANT_TEST_SISO(MACRO) \ + MACRO(abs, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(abs, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(abs, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(acos, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(acos, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(acos, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(acosh, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(acosh, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(acosh, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(asin, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(asin, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(asin, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(asinh, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(asinh, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(asinh, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(atan, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(atan, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(atan, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(atanh, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(atanh, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(atanh, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(ceil, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(ceil, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(ceil, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(cos, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(cos, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(cos, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(cosh, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(cosh, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(cosh, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(erf, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(erf, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(erf, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(exp, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(exp, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(exp, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(expm1, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(expm1, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(expm1, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(floor, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(floor, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(floor, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(log, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(log, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(log, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(log1p, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(log1p, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(log1p, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(logical_not, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(logical_not, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(logical_not, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(round, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(round, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(round, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(rsqrt, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(rsqrt, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(rsqrt, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(sign, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(sign, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(sign, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(negative, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(negative, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(negative, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(sin, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(sin, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(sin, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(sinh, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(sinh, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(sinh, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(softplus, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(softplus, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(softplus, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(softsign, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(softsign, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(softsign, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(sqrt, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(sqrt, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(sqrt, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(square, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(square, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(square, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(tan, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(tan, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(tan, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(tanh, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(tanh, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(tanh, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(trunc, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(trunc, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(trunc, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(yuv_rgb_scale, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(yuv_rgb_scale, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(yuv_rgb_scale, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(not, CSINN_QUANT_FLOAT32, siso_params) \ + MACRO(not, CSINN_QUANT_UINT8_ASYM, siso_params) \ + MACRO(not, CSINN_QUANT_INT8_SYM, siso_params) \ + MACRO(avgpool2d, CSINN_QUANT_FLOAT32, pool_params) \ + MACRO(avgpool2d, CSINN_QUANT_UINT8_ASYM, pool_params) \ + MACRO(avgpool2d, CSINN_QUANT_INT8_SYM, pool_params) \ + MACRO(avgpool3d, CSINN_QUANT_FLOAT32, pool_params) \ + MACRO(avgpool3d, CSINN_QUANT_UINT8_ASYM, pool_params) \ + MACRO(avgpool3d, CSINN_QUANT_INT8_SYM, pool_params) \ + MACRO(clip, CSINN_QUANT_FLOAT32, clip_params) \ + MACRO(clip, CSINN_QUANT_UINT8_ASYM, clip_params) \ + MACRO(clip, CSINN_QUANT_INT8_SYM, clip_params) \ + MACRO(batch_to_space, CSINN_QUANT_FLOAT32, batch_to_space_params) \ + MACRO(batch_to_space, CSINN_QUANT_UINT8_ASYM, batch_to_space_params) \ + MACRO(batch_to_space, CSINN_QUANT_INT8_SYM, batch_to_space_params) \ + MACRO(cumprod, CSINN_QUANT_FLOAT32, cumprod_params) \ + MACRO(cumprod, CSINN_QUANT_UINT8_ASYM, cumprod_params) \ + MACRO(cumprod, CSINN_QUANT_INT8_SYM, cumprod_params) \ + MACRO(cumsum, CSINN_QUANT_FLOAT32, cumsum_params) \ + MACRO(cumsum, CSINN_QUANT_UINT8_ASYM, cumsum_params) \ + MACRO(cumsum, CSINN_QUANT_INT8_SYM, cumsum_params) \ + MACRO(depth_to_space, CSINN_QUANT_FLOAT32, depth_to_space_params) \ + MACRO(depth_to_space, CSINN_QUANT_UINT8_ASYM, depth_to_space_params) \ + MACRO(depth_to_space, CSINN_QUANT_INT8_SYM, depth_to_space_params) \ + MACRO(elu, CSINN_QUANT_FLOAT32, relu_params) \ + MACRO(elu, CSINN_QUANT_UINT8_ASYM, relu_params) \ + MACRO(elu, CSINN_QUANT_INT8_SYM, relu_params) \ + MACRO(expand_dims, CSINN_QUANT_FLOAT32, expand_dims_params) \ + MACRO(expand_dims, CSINN_QUANT_UINT8_ASYM, expand_dims_params) \ + MACRO(expand_dims, CSINN_QUANT_INT8_SYM, expand_dims_params) \ + MACRO(flatten, CSINN_QUANT_FLOAT32, flatten_params) \ + MACRO(flatten, CSINN_QUANT_UINT8_ASYM, flatten_params) \ + MACRO(flatten, CSINN_QUANT_INT8_SYM, flatten_params) \ + MACRO(global_avgpool2d, CSINN_QUANT_FLOAT32, pool_params) \ + MACRO(global_avgpool2d, CSINN_QUANT_UINT8_ASYM, pool_params) \ + MACRO(global_avgpool2d, CSINN_QUANT_INT8_SYM, pool_params) \ + MACRO(global_maxpool2d, CSINN_QUANT_FLOAT32, pool_params) \ + MACRO(global_maxpool2d, CSINN_QUANT_UINT8_ASYM, pool_params) \ + MACRO(global_maxpool2d, CSINN_QUANT_INT8_SYM, pool_params) \ + MACRO(hard_sigmoid, CSINN_QUANT_FLOAT32, sigmoid_params) \ + MACRO(hard_sigmoid, CSINN_QUANT_UINT8_ASYM, sigmoid_params) \ + MACRO(hard_sigmoid, CSINN_QUANT_INT8_SYM, sigmoid_params) \ + MACRO(im2col, CSINN_QUANT_FLOAT32, im2col_params) \ + MACRO(im2col, CSINN_QUANT_UINT8_ASYM, im2col_params) \ + MACRO(im2col, CSINN_QUANT_INT8_SYM, im2col_params) \ + MACRO(l2_normalization, CSINN_QUANT_FLOAT32, l2n_params) \ + MACRO(l2_normalization, CSINN_QUANT_UINT8_ASYM, l2n_params) \ + MACRO(l2_normalization, CSINN_QUANT_INT8_SYM, l2n_params) \ + MACRO(leaky_relu, CSINN_QUANT_FLOAT32, relu_params) \ + MACRO(leaky_relu, CSINN_QUANT_UINT8_ASYM, relu_params) \ + MACRO(leaky_relu, CSINN_QUANT_INT8_SYM, relu_params) \ + MACRO(log_softmax, CSINN_QUANT_FLOAT32, softmax_params) \ + MACRO(log_softmax, CSINN_QUANT_UINT8_ASYM, softmax_params) \ + MACRO(log_softmax, CSINN_QUANT_INT8_SYM, softmax_params) \ + MACRO(lrn, CSINN_QUANT_FLOAT32, lrn_params) \ + MACRO(lrn, CSINN_QUANT_UINT8_ASYM, lrn_params) \ + MACRO(lrn, CSINN_QUANT_INT8_SYM, lrn_params) \ + MACRO(max, CSINN_QUANT_FLOAT32, reduce_params) \ + MACRO(max, CSINN_QUANT_UINT8_ASYM, reduce_params) \ + MACRO(max, CSINN_QUANT_INT8_SYM, reduce_params) \ + MACRO(maxpool2d, CSINN_QUANT_FLOAT32, pool_params) \ + MACRO(maxpool2d, CSINN_QUANT_UINT8_ASYM, pool_params) \ + MACRO(maxpool2d, CSINN_QUANT_INT8_SYM, pool_params) \ + MACRO(maxpool3d, CSINN_QUANT_FLOAT32, pool_params) \ + MACRO(maxpool3d, CSINN_QUANT_UINT8_ASYM, pool_params) \ + MACRO(maxpool3d, CSINN_QUANT_INT8_SYM, pool_params) \ + MACRO(mean, CSINN_QUANT_FLOAT32, reduce_params) \ + MACRO(mean, CSINN_QUANT_UINT8_ASYM, reduce_params) \ + MACRO(mean, CSINN_QUANT_INT8_SYM, reduce_params) \ + MACRO(min, CSINN_QUANT_FLOAT32, reduce_params) \ + MACRO(min, CSINN_QUANT_UINT8_ASYM, reduce_params) \ + MACRO(min, CSINN_QUANT_INT8_SYM, reduce_params) \ + MACRO(pad, CSINN_QUANT_FLOAT32, pad_params) \ + MACRO(pad, CSINN_QUANT_UINT8_ASYM, pad_params) \ + MACRO(pad, CSINN_QUANT_INT8_SYM, pad_params) \ + MACRO(prod, CSINN_QUANT_FLOAT32, reduce_params) \ + MACRO(prod, CSINN_QUANT_UINT8_ASYM, reduce_params) \ + MACRO(prod, CSINN_QUANT_INT8_SYM, reduce_params) \ + MACRO(reduce_logsumexp, CSINN_QUANT_FLOAT32, reduce_params) \ + MACRO(reduce_logsumexp, CSINN_QUANT_UINT8_ASYM, reduce_params) \ + MACRO(reduce_logsumexp, CSINN_QUANT_INT8_SYM, reduce_params) \ + MACRO(reduce_max, CSINN_QUANT_FLOAT32, reduce_params) \ + MACRO(reduce_max, CSINN_QUANT_UINT8_ASYM, reduce_params) \ + MACRO(reduce_max, CSINN_QUANT_INT8_SYM, reduce_params) \ + MACRO(reduce_mean, CSINN_QUANT_FLOAT32, reduce_params) \ + MACRO(reduce_mean, CSINN_QUANT_UINT8_ASYM, reduce_params) \ + MACRO(reduce_mean, CSINN_QUANT_INT8_SYM, reduce_params) \ + MACRO(reduce_min, CSINN_QUANT_FLOAT32, reduce_params) \ + MACRO(reduce_min, CSINN_QUANT_UINT8_ASYM, reduce_params) \ + MACRO(reduce_min, CSINN_QUANT_INT8_SYM, reduce_params) \ + MACRO(reduce_prod, CSINN_QUANT_FLOAT32, reduce_params) \ + MACRO(reduce_prod, CSINN_QUANT_UINT8_ASYM, reduce_params) \ + MACRO(reduce_prod, CSINN_QUANT_INT8_SYM, reduce_params) \ + MACRO(reduce_sum, CSINN_QUANT_FLOAT32, reduce_params) \ + MACRO(reduce_sum, CSINN_QUANT_UINT8_ASYM, reduce_params) \ + MACRO(reduce_sum, CSINN_QUANT_INT8_SYM, reduce_params) \ + MACRO(relu, CSINN_QUANT_FLOAT32, relu_params) \ + MACRO(relu, CSINN_QUANT_UINT8_ASYM, relu_params) \ + MACRO(relu, CSINN_QUANT_INT8_SYM, relu_params) \ + MACRO(relu1, CSINN_QUANT_FLOAT32, relu_params) \ + MACRO(relu1, CSINN_QUANT_UINT8_ASYM, relu_params) \ + MACRO(relu1, CSINN_QUANT_INT8_SYM, relu_params) \ + MACRO(relu6, CSINN_QUANT_FLOAT32, relu_params) \ + MACRO(relu6, CSINN_QUANT_UINT8_ASYM, relu_params) \ + MACRO(relu6, CSINN_QUANT_INT8_SYM, relu_params) \ + MACRO(relun, CSINN_QUANT_FLOAT32, relu_params) \ + MACRO(relun, CSINN_QUANT_UINT8_ASYM, relu_params) \ + MACRO(relun, CSINN_QUANT_INT8_SYM, relu_params) \ + MACRO(reshape, CSINN_QUANT_FLOAT32, reshape_params) \ + MACRO(reshape, CSINN_QUANT_UINT8_ASYM, reshape_params) \ + MACRO(reshape, CSINN_QUANT_INT8_SYM, reshape_params) \ + MACRO(resize, CSINN_QUANT_FLOAT32, resize_params) \ + MACRO(resize, CSINN_QUANT_UINT8_ASYM, resize_params) \ + MACRO(resize, CSINN_QUANT_INT8_SYM, resize_params) \ + MACRO(reverse, CSINN_QUANT_FLOAT32, reverse_params) \ + MACRO(reverse, CSINN_QUANT_UINT8_ASYM, reverse_params) \ + MACRO(reverse, CSINN_QUANT_INT8_SYM, reverse_params) \ + MACRO(shuffle_channel, CSINN_QUANT_FLOAT32, shuffle_channel_params) \ + MACRO(shuffle_channel, CSINN_QUANT_UINT8_ASYM, shuffle_channel_params) \ + MACRO(shuffle_channel, CSINN_QUANT_INT8_SYM, shuffle_channel_params) \ + MACRO(sigmoid, CSINN_QUANT_FLOAT32, sigmoid_params) \ + MACRO(sigmoid, CSINN_QUANT_UINT8_ASYM, sigmoid_params) \ + MACRO(sigmoid, CSINN_QUANT_INT8_SYM, sigmoid_params) \ + MACRO(slice, CSINN_QUANT_FLOAT32, slice_params) \ + MACRO(slice, CSINN_QUANT_UINT8_ASYM, slice_params) \ + MACRO(slice, CSINN_QUANT_INT8_SYM, slice_params) \ + MACRO(softmax, CSINN_QUANT_FLOAT32, softmax_params) \ + MACRO(softmax, CSINN_QUANT_UINT8_ASYM, softmax_params) \ + MACRO(softmax, CSINN_QUANT_INT8_SYM, softmax_params) \ + MACRO(softrelu, CSINN_QUANT_FLOAT32, relu_params) \ + MACRO(softrelu, CSINN_QUANT_UINT8_ASYM, relu_params) \ + MACRO(softrelu, CSINN_QUANT_INT8_SYM, relu_params) \ + MACRO(space_to_batch, CSINN_QUANT_FLOAT32, space_to_batch_params) \ + MACRO(space_to_batch, CSINN_QUANT_UINT8_ASYM, space_to_batch_params) \ + MACRO(space_to_batch, CSINN_QUANT_INT8_SYM, space_to_batch_params) \ + MACRO(space_to_depth, CSINN_QUANT_FLOAT32, space_to_depth_params) \ + MACRO(space_to_depth, CSINN_QUANT_UINT8_ASYM, space_to_depth_params) \ + MACRO(space_to_depth, CSINN_QUANT_INT8_SYM, space_to_depth_params) \ + MACRO(squeeze, CSINN_QUANT_FLOAT32, squeeze_params) \ + MACRO(squeeze, CSINN_QUANT_UINT8_ASYM, squeeze_params) \ + MACRO(squeeze, CSINN_QUANT_INT8_SYM, squeeze_params) \ + MACRO(strided_slice, CSINN_QUANT_FLOAT32, strided_slice_params) \ + MACRO(strided_slice, CSINN_QUANT_UINT8_ASYM, strided_slice_params) \ + MACRO(strided_slice, CSINN_QUANT_INT8_SYM, strided_slice_params) \ + MACRO(sum, CSINN_QUANT_FLOAT32, reduce_params) \ + MACRO(sum, CSINN_QUANT_UINT8_ASYM, reduce_params) \ + MACRO(sum, CSINN_QUANT_INT8_SYM, reduce_params) \ + MACRO(threshold_relu, CSINN_QUANT_FLOAT32, relu_params) \ + MACRO(threshold_relu, CSINN_QUANT_UINT8_ASYM, relu_params) \ + MACRO(threshold_relu, CSINN_QUANT_INT8_SYM, relu_params) \ + MACRO(tile, CSINN_QUANT_FLOAT32, tile_params) \ + MACRO(tile, CSINN_QUANT_UINT8_ASYM, tile_params) \ + MACRO(tile, CSINN_QUANT_INT8_SYM, tile_params) \ + MACRO(transpose, CSINN_QUANT_FLOAT32, transpose_params) \ + MACRO(transpose, CSINN_QUANT_UINT8_ASYM, transpose_params) \ + MACRO(transpose, CSINN_QUANT_INT8_SYM, transpose_params) \ + MACRO(argmax, CSINN_QUANT_FLOAT32, reduce_params) \ + MACRO(argmax, CSINN_QUANT_UINT8_ASYM, reduce_params) \ + MACRO(argmax, CSINN_QUANT_INT8_SYM, reduce_params) \ + MACRO(argmin, CSINN_QUANT_FLOAT32, reduce_params) \ + MACRO(argmin, CSINN_QUANT_UINT8_ASYM, reduce_params) \ + MACRO(argmin, CSINN_QUANT_INT8_SYM, reduce_params) \ + MACRO(broadcast_to, CSINN_QUANT_FLOAT32, broadcast_to_params) \ + MACRO(broadcast_to, CSINN_QUANT_UINT8_ASYM, broadcast_to_params) \ + MACRO(broadcast_to, CSINN_QUANT_INT8_SYM, broadcast_to_params) + +#define LAYER_QUANT_TEST_DISO(MACRO) \ + MACRO(add, CSINN_QUANT_FLOAT32, diso_params) \ + MACRO(add, CSINN_QUANT_UINT8_ASYM, diso_params) \ + MACRO(add, CSINN_QUANT_INT8_SYM, diso_params) \ + MACRO(div, CSINN_QUANT_FLOAT32, diso_params) \ + MACRO(div, CSINN_QUANT_UINT8_ASYM, diso_params) \ + MACRO(div, CSINN_QUANT_INT8_SYM, diso_params) \ + MACRO(equal, CSINN_QUANT_FLOAT32, diso_params) \ + MACRO(equal, CSINN_QUANT_UINT8_ASYM, diso_params) \ + MACRO(equal, CSINN_QUANT_INT8_SYM, diso_params) \ + MACRO(floor_divide, CSINN_QUANT_FLOAT32, diso_params) \ + MACRO(floor_divide, CSINN_QUANT_UINT8_ASYM, diso_params) \ + MACRO(floor_divide, CSINN_QUANT_INT8_SYM, diso_params) \ + MACRO(floor_mod, CSINN_QUANT_FLOAT32, diso_params) \ + MACRO(floor_mod, CSINN_QUANT_UINT8_ASYM, diso_params) \ + MACRO(floor_mod, CSINN_QUANT_INT8_SYM, diso_params) \ + MACRO(greater_equal, CSINN_QUANT_FLOAT32, diso_params) \ + MACRO(greater_equal, CSINN_QUANT_UINT8_ASYM, diso_params) \ + MACRO(greater_equal, CSINN_QUANT_INT8_SYM, diso_params) \ + MACRO(greater, CSINN_QUANT_FLOAT32, diso_params) \ + MACRO(greater, CSINN_QUANT_UINT8_ASYM, diso_params) \ + MACRO(greater, CSINN_QUANT_INT8_SYM, diso_params) \ + MACRO(less_equal, CSINN_QUANT_FLOAT32, diso_params) \ + MACRO(less_equal, CSINN_QUANT_UINT8_ASYM, diso_params) \ + MACRO(less_equal, CSINN_QUANT_INT8_SYM, diso_params) \ + MACRO(less, CSINN_QUANT_FLOAT32, diso_params) \ + MACRO(less, CSINN_QUANT_UINT8_ASYM, diso_params) \ + MACRO(less, CSINN_QUANT_INT8_SYM, diso_params) \ + MACRO(logical_and, CSINN_QUANT_FLOAT32, diso_params) \ + MACRO(logical_and, CSINN_QUANT_UINT8_ASYM, diso_params) \ + MACRO(logical_and, CSINN_QUANT_INT8_SYM, diso_params) \ + MACRO(logical_or, CSINN_QUANT_FLOAT32, diso_params) \ + MACRO(logical_or, CSINN_QUANT_UINT8_ASYM, diso_params) \ + MACRO(logical_or, CSINN_QUANT_INT8_SYM, diso_params) \ + MACRO(logical_xor, CSINN_QUANT_FLOAT32, diso_params) \ + MACRO(logical_xor, CSINN_QUANT_UINT8_ASYM, diso_params) \ + MACRO(logical_xor, CSINN_QUANT_INT8_SYM, diso_params) \ + MACRO(mod, CSINN_QUANT_FLOAT32, diso_params) \ + MACRO(mod, CSINN_QUANT_UINT8_ASYM, diso_params) \ + MACRO(mod, CSINN_QUANT_INT8_SYM, diso_params) \ + MACRO(mul, CSINN_QUANT_FLOAT32, diso_params) \ + MACRO(mul, CSINN_QUANT_UINT8_ASYM, diso_params) \ + MACRO(mul, CSINN_QUANT_INT8_SYM, diso_params) \ + MACRO(not_equal, CSINN_QUANT_FLOAT32, diso_params) \ + MACRO(not_equal, CSINN_QUANT_UINT8_ASYM, diso_params) \ + MACRO(not_equal, CSINN_QUANT_INT8_SYM, diso_params) \ + MACRO(power, CSINN_QUANT_FLOAT32, diso_params) \ + MACRO(power, CSINN_QUANT_UINT8_ASYM, diso_params) \ + MACRO(power, CSINN_QUANT_INT8_SYM, diso_params) \ + MACRO(sub, CSINN_QUANT_FLOAT32, diso_params) \ + MACRO(sub, CSINN_QUANT_UINT8_ASYM, diso_params) \ + MACRO(sub, CSINN_QUANT_INT8_SYM, diso_params) \ + MACRO(maximum, CSINN_QUANT_FLOAT32, diso_params) \ + MACRO(maximum, CSINN_QUANT_UINT8_ASYM, diso_params) \ + MACRO(maximum, CSINN_QUANT_INT8_SYM, diso_params) \ + MACRO(minimum, CSINN_QUANT_FLOAT32, diso_params) \ + MACRO(minimum, CSINN_QUANT_UINT8_ASYM, diso_params) \ + MACRO(minimum, CSINN_QUANT_INT8_SYM, diso_params) \ + MACRO(and, CSINN_QUANT_FLOAT32, diso_params) \ + MACRO(and, CSINN_QUANT_UINT8_ASYM, diso_params) \ + MACRO(and, CSINN_QUANT_INT8_SYM, diso_params) \ + MACRO(matmul, CSINN_QUANT_FLOAT32, matmul_params) \ + MACRO(matmul, CSINN_QUANT_UINT8_ASYM, matmul_params) \ + MACRO(matmul, CSINN_QUANT_INT8_SYM, matmul_params) \ + MACRO(prelu, CSINN_QUANT_FLOAT32, prelu_params) \ + MACRO(prelu, CSINN_QUANT_UINT8_ASYM, prelu_params) \ + MACRO(prelu, CSINN_QUANT_INT8_SYM, prelu_params) \ + MACRO(non_max_suppression, CSINN_QUANT_FLOAT32, non_max_suppression_params) \ + MACRO(non_max_suppression, CSINN_QUANT_UINT8_ASYM, non_max_suppression_params) \ + MACRO(non_max_suppression, CSINN_QUANT_INT8_SYM, non_max_suppression_params) \ + MACRO(psroipooling, CSINN_QUANT_FLOAT32, psroipooling_params) \ + MACRO(psroipooling, CSINN_QUANT_UINT8_ASYM, psroipooling_params) \ + MACRO(psroipooling, CSINN_QUANT_INT8_SYM, psroipooling_params) \ + MACRO(roi_align, CSINN_QUANT_FLOAT32, roi_align_params) \ + MACRO(roi_align, CSINN_QUANT_UINT8_ASYM, roi_align_params) \ + MACRO(roi_align, CSINN_QUANT_INT8_SYM, roi_align_params) \ + MACRO(roipool, CSINN_QUANT_FLOAT32, roi_pool_params) \ + MACRO(roipool, CSINN_QUANT_UINT8_ASYM, roi_pool_params) \ + MACRO(roipool, CSINN_QUANT_INT8_SYM, roi_pool_params) \ + MACRO(gather_nd, CSINN_QUANT_FLOAT32, gather_nd_params) \ + MACRO(gather_nd, CSINN_QUANT_UINT8_ASYM, gather_nd_params) \ + MACRO(gather_nd, CSINN_QUANT_INT8_SYM, gather_nd_params) \ + MACRO(gather, CSINN_QUANT_FLOAT32, gather_params) \ + MACRO(gather, CSINN_QUANT_UINT8_ASYM, gather_params) \ + MACRO(gather, CSINN_QUANT_INT8_SYM, gather_params) + +#define LAYER_QUANT_TEST_SEGMENT(MACRO) \ + MACRO(segment_max, CSINN_QUANT_FLOAT32, segment_params) \ + MACRO(segment_max, CSINN_QUANT_UINT8_ASYM, segment_params) \ + MACRO(segment_max, CSINN_QUANT_INT8_SYM, segment_params) \ + MACRO(segment_mean, CSINN_QUANT_FLOAT32, segment_params) \ + MACRO(segment_mean, CSINN_QUANT_UINT8_ASYM, segment_params) \ + MACRO(segment_mean, CSINN_QUANT_INT8_SYM, segment_params) \ + MACRO(segment_min, CSINN_QUANT_FLOAT32, segment_params) \ + MACRO(segment_min, CSINN_QUANT_UINT8_ASYM, segment_params) \ + MACRO(segment_min, CSINN_QUANT_INT8_SYM, segment_params) \ + MACRO(segment_prod, CSINN_QUANT_FLOAT32, segment_params) \ + MACRO(segment_prod, CSINN_QUANT_UINT8_ASYM, segment_params) \ + MACRO(segment_prod, CSINN_QUANT_INT8_SYM, segment_params) \ + MACRO(segment_sum, CSINN_QUANT_FLOAT32, segment_params) \ + MACRO(segment_sum, CSINN_QUANT_UINT8_ASYM, segment_params) \ + MACRO(segment_sum, CSINN_QUANT_INT8_SYM, segment_params) + +#define LAYER_QUANT_TEST_BATCHNORM(MACRO) \ + MACRO(batch_normalization, CSINN_QUANT_FLOAT32, bn_params) \ + MACRO(batch_normalization, CSINN_QUANT_UINT8_ASYM, bn_params) \ + MACRO(batch_normalization, CSINN_QUANT_INT8_SYM, bn_params) + +#define LAYER_QUANT_TEST_CONCAT(MACRO) \ + MACRO(concat, CSINN_QUANT_FLOAT32, concat_params) \ + MACRO(concat, CSINN_QUANT_UINT8_ASYM, concat_params) \ + MACRO(concat, CSINN_QUANT_INT8_SYM, concat_params) \ + MACRO(stack, CSINN_QUANT_FLOAT32, stack_params) \ + MACRO(stack, CSINN_QUANT_UINT8_ASYM, stack_params) \ + MACRO(stack, CSINN_QUANT_INT8_SYM, stack_params) + +#define LAYER_QUANT_TEST_CONV2D(MACRO) \ + MACRO(conv2d, CSINN_QUANT_FLOAT32, conv2d_params) \ + MACRO(conv2d, CSINN_QUANT_UINT8_ASYM, conv2d_params) \ + MACRO(conv2d, CSINN_QUANT_INT8_SYM, conv2d_params) \ + MACRO(conv3d, CSINN_QUANT_FLOAT32, conv3d_params) \ + MACRO(conv3d, CSINN_QUANT_UINT8_ASYM, conv3d_params) \ + MACRO(conv3d, CSINN_QUANT_INT8_SYM, conv3d_params) \ + MACRO(conv2d_relu, CSINN_QUANT_FLOAT32, conv2d_params) \ + MACRO(conv2d_relu, CSINN_QUANT_UINT8_ASYM, conv2d_params) \ + MACRO(conv2d_relu, CSINN_QUANT_INT8_SYM, conv2d_params) \ + MACRO(conv2d_relu6, CSINN_QUANT_FLOAT32, conv2d_params) \ + MACRO(conv2d_relu6, CSINN_QUANT_UINT8_ASYM, conv2d_params) \ + MACRO(conv2d_relu6, CSINN_QUANT_INT8_SYM, conv2d_params) \ + MACRO(deconv2d, CSINN_QUANT_FLOAT32, conv2d_params) \ + MACRO(deconv2d, CSINN_QUANT_UINT8_ASYM, conv2d_params) \ + MACRO(deconv2d, CSINN_QUANT_INT8_SYM, conv2d_params) \ + MACRO(deconv3d, CSINN_QUANT_FLOAT32, conv3d_params) \ + MACRO(deconv3d, CSINN_QUANT_UINT8_ASYM, conv3d_params) \ + MACRO(deconv3d, CSINN_QUANT_INT8_SYM, conv3d_params) \ + MACRO(fullyconnected, CSINN_QUANT_FLOAT32, fc_params) \ + MACRO(fullyconnected, CSINN_QUANT_UINT8_ASYM, fc_params) \ + MACRO(fullyconnected, CSINN_QUANT_INT8_SYM, fc_params) + +#define LAYER_QUANT_TEST_TISO(MACRO) \ + MACRO(select, CSINN_QUANT_FLOAT32, select_params) \ + MACRO(select, CSINN_QUANT_UINT8_ASYM, select_params) \ + MACRO(select, CSINN_QUANT_INT8_SYM, select_params) + +#define LAYER_QUANT_TEST_SPLIT(MACRO) \ + MACRO(split, CSINN_QUANT_FLOAT32, split_params) \ + MACRO(split, CSINN_QUANT_UINT8_ASYM, split_params) \ + MACRO(split, CSINN_QUANT_INT8_SYM, split_params) + +#define LAYER_QUANT_TEST_UNSTACK(MACRO) \ + MACRO(unstack, CSINN_QUANT_FLOAT32, unstack_params) \ + MACRO(unstack, CSINN_QUANT_UINT8_ASYM, unstack_params) \ + MACRO(unstack, CSINN_QUANT_INT8_SYM, unstack_params) + +#define LAYER_QUANT_TEST_ARANGE(MACRO) \ + MACRO(arange, CSINN_QUANT_FLOAT32, arange_params) \ + MACRO(arange, CSINN_QUANT_UINT8_ASYM, arange_params) \ + MACRO(arange, CSINN_QUANT_INT8_SYM, arange_params) diff --git a/tests/validation_layer/leaky_relu.cpp b/tests/validation_layer/leaky_relu.cpp new file mode 100644 index 00000000..ba022e6f --- /dev/null +++ b/tests/validation_layer/leaky_relu.cpp @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "csi_utils.h" +#include "math_snr.h" +#include "test_utils.h" +#include "testutil.h" + +int main(int argc, char **argv) +{ + init_testsuite("Testing function of leaky_relu(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct relu_params params; + int in_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.n = *((float *)buffer + 4); + csi_quantize_multiplier(params.n, &(params.n_multiplier), &(params.n_shift)); + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + params.base.layout = CSINN_LAYOUT_NCHW; + + input->data = (float *)(buffer + 5); + reference->data = (float *)(buffer + 5 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + +#if THEAD_RVV + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT32, csi_leaky_relu_init, + csi_nn_rvv_leaky_relu_fp32, &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT16, csi_leaky_relu_init, + csi_nn_rvv_leaky_relu_fp16, &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_INT8_ASYM, csi_leaky_relu_init, + csi_nn_rvv_leaky_relu_int8, &difference); +#else + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT32, csi_leaky_relu_init, csi_leaky_relu, + &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_UINT8_ASYM, csi_leaky_relu_init, + csi_leaky_relu, &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_INT8_SYM, csi_leaky_relu_init, csi_leaky_relu, + &difference); +#endif + + return done_testing(); +} diff --git a/tests/validation_layer/less.c b/tests/validation_layer/less.c new file mode 100644 index 00000000..7023cbfb --- /dev/null +++ b/tests/validation_layer/less.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of less(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + int flag = buffer[4]; + input1->dim[0] = input0->dim[0] = buffer[0]; // batch + input1->dim[1] = input0->dim[1] = buffer[1]; // height + input1->dim[2] = input0->dim[2] = buffer[2]; // width + input1->dim[3] = input0->dim[3] = buffer[3]; // channel + + output->dim[0] = input0->dim[0]; + output->dim[1] = input0->dim[1]; + output->dim[2] = input0->dim[2]; + output->dim[3] = input0->dim[3]; + + in_size = input0->dim[0] * input0->dim[1] * input0->dim[2] * input0->dim[3]; + out_size = in_size; + input0->dim_count = 4; + input1->dim_count = 4; + output->dim_count = 4; + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 4); + input1->data = (float *)(buffer + 4 + in_size); + reference->data = (float *)(buffer + 4 + 2 * in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_less_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_less_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_less_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/less_equal.c b/tests/validation_layer/less_equal.c new file mode 100644 index 00000000..3bcb370b --- /dev/null +++ b/tests/validation_layer/less_equal.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of less_equal(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + int flag = buffer[4]; + input1->dim[0] = input0->dim[0] = buffer[0]; // batch + input1->dim[1] = input0->dim[1] = buffer[1]; // height + input1->dim[2] = input0->dim[2] = buffer[2]; // width + input1->dim[3] = input0->dim[3] = buffer[3]; // channel + + output->dim[0] = input0->dim[0]; + output->dim[1] = input0->dim[1]; + output->dim[2] = input0->dim[2]; + output->dim[3] = input0->dim[3]; + + in_size = input0->dim[0] * input0->dim[1] * input0->dim[2] * input0->dim[3]; + out_size = in_size; + input0->dim_count = 4; + input1->dim_count = 4; + output->dim_count = 4; + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 4); + input1->data = (float *)(buffer + 4 + in_size); + reference->data = (float *)(buffer + 4 + 2 * in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_less_equal_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_less_equal_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_less_equal_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/log.c b/tests/validation_layer/log.c new file mode 100644 index 00000000..3b1ad97e --- /dev/null +++ b/tests/validation_layer/log.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of log(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // height + input->dim[2] = buffer[2]; // width + input->dim[3] = buffer[3]; // channel + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 4); + reference->data = (float *)(buffer + 4 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_log_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_log_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_log_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/log1p.c b/tests/validation_layer/log1p.c new file mode 100644 index 00000000..1b8e0db5 --- /dev/null +++ b/tests/validation_layer/log1p.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of log1p(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // height + input->dim[2] = buffer[2]; // width + input->dim[3] = buffer[3]; // channel + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 4); + reference->data = (float *)(buffer + 4 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_log1p_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_log1p_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_log1p_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/log_softmax.c b/tests/validation_layer/log_softmax.c new file mode 100644 index 00000000..21158c59 --- /dev/null +++ b/tests/validation_layer/log_softmax.c @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of log_softmax(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct softmax_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + + params.axis = buffer[0]; + input->dim_count = buffer[1]; + output->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 2]; + output->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 2 + input->dim_count); + reference->data = (float *)(buffer + 2 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_log_softmax_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_log_softmax_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_log_softmax_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/logical_and.c b/tests/validation_layer/logical_and.c new file mode 100644 index 00000000..365212e2 --- /dev/null +++ b/tests/validation_layer/logical_and.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of logical_and(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + int flag = buffer[4]; + input1->dim[0] = input0->dim[0] = buffer[0]; // batch + input1->dim[1] = input0->dim[1] = buffer[1]; // height + input1->dim[2] = input0->dim[2] = buffer[2]; // width + input1->dim[3] = input0->dim[3] = buffer[3]; // channel + + output->dim[0] = input0->dim[0]; + output->dim[1] = input0->dim[1]; + output->dim[2] = input0->dim[2]; + output->dim[3] = input0->dim[3]; + + in_size = input0->dim[0] * input0->dim[1] * input0->dim[2] * input0->dim[3]; + out_size = in_size; + input0->dim_count = 4; + input1->dim_count = 4; + output->dim_count = 4; + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 4); + input1->data = (float *)(buffer + 4 + in_size); + reference->data = (float *)(buffer + 4 + 2 * in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_logical_and_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_logical_and_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_logical_and_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/logical_not.c b/tests/validation_layer/logical_not.c new file mode 100644 index 00000000..7ca50337 --- /dev/null +++ b/tests/validation_layer/logical_not.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of logical_not(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // height + input->dim[2] = buffer[2]; // width + input->dim[3] = buffer[3]; // channel + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 4); + reference->data = (float *)(buffer + 4 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_logical_not_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_logical_not_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_logical_not_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/logical_or.c b/tests/validation_layer/logical_or.c new file mode 100644 index 00000000..84fd18cb --- /dev/null +++ b/tests/validation_layer/logical_or.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of logical_or(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + int flag = buffer[4]; + input1->dim[0] = input0->dim[0] = buffer[0]; // batch + input1->dim[1] = input0->dim[1] = buffer[1]; // height + input1->dim[2] = input0->dim[2] = buffer[2]; // width + input1->dim[3] = input0->dim[3] = buffer[3]; // channel + + output->dim[0] = input0->dim[0]; + output->dim[1] = input0->dim[1]; + output->dim[2] = input0->dim[2]; + output->dim[3] = input0->dim[3]; + + in_size = input0->dim[0] * input0->dim[1] * input0->dim[2] * input0->dim[3]; + out_size = in_size; + input0->dim_count = 4; + input1->dim_count = 4; + output->dim_count = 4; + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 4); + input1->data = (float *)(buffer + 4 + in_size); + reference->data = (float *)(buffer + 4 + 2 * in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_logical_or_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_logical_or_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_logical_or_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/logical_xor.c b/tests/validation_layer/logical_xor.c new file mode 100644 index 00000000..b26b2f52 --- /dev/null +++ b/tests/validation_layer/logical_xor.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of logical_xor(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + int flag = buffer[4]; + input1->dim[0] = input0->dim[0] = buffer[0]; // batch + input1->dim[1] = input0->dim[1] = buffer[1]; // height + input1->dim[2] = input0->dim[2] = buffer[2]; // width + input1->dim[3] = input0->dim[3] = buffer[3]; // channel + + output->dim[0] = input0->dim[0]; + output->dim[1] = input0->dim[1]; + output->dim[2] = input0->dim[2]; + output->dim[3] = input0->dim[3]; + + in_size = input0->dim[0] * input0->dim[1] * input0->dim[2] * input0->dim[3]; + out_size = in_size; + input0->dim_count = 4; + input1->dim_count = 4; + output->dim_count = 4; + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 4); + input1->data = (float *)(buffer + 4 + in_size); + reference->data = (float *)(buffer + 4 + 2 * in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_logical_xor_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_logical_xor_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_logical_xor_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/lrn.c b/tests/validation_layer/lrn.c new file mode 100644 index 00000000..d11cb98b --- /dev/null +++ b/tests/validation_layer/lrn.c @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of lrn(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct lrn_params params; + int in_size = 1; + int out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + params.range = buffer[4] * 2 + 1; + params.bias = *(float *)(buffer + 5); + params.alpha = *(float *)(buffer + 6); + params.beta = *(float *)(buffer + 7); + + params.base.layout = CSINN_LAYOUT_NCHW; + + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + input->dim_count = 4; + output->dim_count = 4; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 8); + reference->data = (float *)(buffer + 8 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_lrn_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_lrn_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_lrn_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/matmul.c b/tests/validation_layer/matmul.c new file mode 100644 index 00000000..9e4c492e --- /dev/null +++ b/tests/validation_layer/matmul.c @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of matmul(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct matmul_params params; + int in_size0, in_size1, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input0->dim_count = input1->dim_count = buffer[2]; + output->dim_count = input0->dim_count; + params.trans_a = buffer[0]; + params.trans_b = buffer[1]; + for (int i = 0; i < input0->dim_count; ++i) { + input0->dim[i] = buffer[3 + i]; + input1->dim[i] = buffer[3 + input0->dim_count + i]; + output->dim[i] = buffer[3 + 2 * input0->dim_count + i]; + } + + in_size0 = 1; + for (int i = 0; i < input0->dim_count; ++i) { + in_size0 *= input0->dim[i]; + } + + in_size1 = 1; + for (int i = 0; i < input1->dim_count; ++i) { + in_size1 *= input1->dim[i]; + } + + out_size = 1; + for (int i = 0; i < output->dim_count; ++i) { + out_size *= output->dim[i]; + } + + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 3 + 3 * input0->dim_count); + input1->data = (float *)(buffer + 3 + 3 * input0->dim_count + in_size0); + reference->data = (float *)(buffer + 3 + 3 * input0->dim_count + in_size0 + in_size1); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + + test_matmul_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_matmul_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_matmul_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/max_stride.c b/tests/validation_layer/max_stride.c new file mode 100644 index 00000000..2633432c --- /dev/null +++ b/tests/validation_layer/max_stride.c @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of max(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct reduce_params params; + int in_size = 0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + input->dim_count = 4; + int axis = buffer[4]; + int m = buffer[5]; + int n = buffer[6]; + + for(int i = 0; i < input->dim_count; i++) { + if(i < axis){ + output->dim[i] = input->dim[i]; + } + else if(i > axis){ + output->dim[i-1] = input->dim[i]; + } + } + + + int32_t *out_strides_0 = (int32_t *)malloc(n * sizeof(int32_t)); + int32_t *out_extents_0 = (int32_t *)malloc(n * sizeof(int32_t)); + int32_t *inner_strides_0 = (int32_t *)malloc(m * sizeof(int32_t)); + int32_t *inner_extents_0 = (int32_t *)malloc(m * sizeof(int32_t)); + + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size / input->dim[axis]; + output->dim_count = 3; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + input->data = (float *)(buffer + 7); + out_strides_0 = (int32_t *)(buffer + 7 + in_size); + out_extents_0 = (int32_t *)(buffer + 7 + in_size + n); + inner_strides_0 = (int32_t *)(buffer + 7 + in_size + 2 * n); + inner_extents_0 = (int32_t *)(buffer + 7 + in_size + 2 * n + m); + reference->data = (float *)(buffer + 7 + in_size + 2 * n + 2 * m); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.9; + + params.axis = &axis; + params.axis_count = 1; // must be 1 + params.m = m; + params.n = n; + params.out_strides = out_strides_0; + params.out_extents = out_extents_0; + params.inner_strides = inner_strides_0; + params.inner_extents = inner_extents_0; + params.base.api = CSINN_API; + params.base.layout = CSINN_LAYOUT_NCHW; + params.base.run_mode = CSINN_RM_LAYER; + + + test_max_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_max_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_max_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/maximum.c b/tests/validation_layer/maximum.c new file mode 100644 index 00000000..6be40a02 --- /dev/null +++ b/tests/validation_layer/maximum.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of maximum(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input0->dim_count = input1->dim_count = buffer[0]; + output->dim_count = input0->dim_count; + for(int i = 0; i < input0->dim_count; i++) { + input0->dim[i] = input1->dim[i] = buffer[i + 1]; + output->dim[i] = input0->dim[i]; + in_size *= input0->dim[i]; + } + + out_size = in_size; + + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 1 + input0->dim_count); + input1->data = (float *)(buffer + 1 + input0->dim_count + in_size); + reference->data = (float *)(buffer + 1 + input0->dim_count + 2*in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + + test_maximum_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_maximum_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_maximum_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/maxpool.cpp b/tests/validation_layer/maxpool.cpp new file mode 100644 index 00000000..2cbb381b --- /dev/null +++ b/tests/validation_layer/maxpool.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "csi_utils.h" +#include "math_snr.h" +#include "test_utils.h" +#include "testutil.h" + +int main(int argc, char **argv) +{ + init_testsuite("Testing function of maxpool(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct pool_params params; + int in_size = 1; + int out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // in_channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + + output->dim[0] = buffer[0]; + output->dim[1] = buffer[1]; + output->dim[2] = buffer[12]; + output->dim[3] = buffer[13]; + + params.stride_height = buffer[4]; + params.stride_width = buffer[5]; + params.filter_height = buffer[6]; + params.filter_width = buffer[7]; + + params.pad_left = buffer[8]; + params.pad_right = buffer[9]; + params.pad_top = buffer[10]; + params.pad_down = buffer[11]; + params.base.layout = CSINN_LAYOUT_NCHW; + + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + input->dim_count = 4; + output->dim_count = 4; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 14); + reference->data = (float *)(buffer + 14 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT32, csi_maxpool2d_init, csi_maxpool2d, + &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT16, csi_maxpool2d_init, csi_maxpool2d, + &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_INT8_SYM, csi_maxpool2d_init, csi_maxpool2d, + &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/maxpool3d.c b/tests/validation_layer/maxpool3d.c new file mode 100644 index 00000000..05b93f88 --- /dev/null +++ b/tests/validation_layer/maxpool3d.c @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of maxpool3d(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct pool_params params; + int in_size = 1; + int out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; //batch + input->dim[1] = buffer[1]; //channel + input->dim[2] = buffer[2]; //depth + input->dim[3] = buffer[3]; //height + input->dim[4] = buffer[4]; //width + + output->dim[0] = buffer[0]; + output->dim[1] = buffer[1]; + output->dim[2] = buffer[17]; + output->dim[3] = buffer[18]; + output->dim[4] = buffer[19]; + + params.stride_depth = buffer[5]; + params.stride_height = buffer[6]; + params.stride_width = buffer[7]; + params.filter_depth = buffer[8]; + params.filter_height = buffer[9]; + params.filter_width = buffer[10]; + + params.pad_left = buffer[11]; + params.pad_right = buffer[12]; + params.pad_top = buffer[13]; + params.pad_down = buffer[14]; + params.pad_front = buffer[15]; + params.pad_back = buffer[16]; + params.base.layout = CSINN_LAYOUT_NCDHW; + + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCDHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCDHW; + output->is_const = 0; + output->quant_channel = 1; + input->dim_count = 5; + output->dim_count = 5; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3] * input->dim[4]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3] * output->dim[4]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 20); + reference->data = (float *)(buffer + 20 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_maxpool3d_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_maxpool3d_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_maxpool3d_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/mean_stride.c b/tests/validation_layer/mean_stride.c new file mode 100644 index 00000000..7a06dddb --- /dev/null +++ b/tests/validation_layer/mean_stride.c @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of mean(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct reduce_params params; + int in_size = 0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + input->dim_count = 4; + int axis = buffer[4]; + int m = buffer[5]; + int n = buffer[6]; + + for(int i = 0; i < input->dim_count; i++) { + if(i < axis){ + output->dim[i] = input->dim[i]; + } + else if(i > axis){ + output->dim[i-1] = input->dim[i]; + } + } + + + int32_t *out_strides_0 = (int32_t *)malloc(n * sizeof(int32_t)); + int32_t *out_extents_0 = (int32_t *)malloc(n * sizeof(int32_t)); + int32_t *inner_strides_0 = (int32_t *)malloc(m * sizeof(int32_t)); + int32_t *inner_extents_0 = (int32_t *)malloc(m * sizeof(int32_t)); + + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size / input->dim[axis]; + output->dim_count = 3; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + + input->data = (float *)(buffer + 7); + out_strides_0 = (int32_t *)(buffer + 7 + in_size); + out_extents_0 = (int32_t *)(buffer + 7 + in_size + n); + inner_strides_0 = (int32_t *)(buffer + 7 + in_size + 2 * n); + inner_extents_0 = (int32_t *)(buffer + 7 + in_size + 2 * n + m); + reference->data = (float *)(buffer + 7 + in_size + 2 * n + 2 * m); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.9; + + params.axis = &axis; + params.axis_count = 1; // must be 1 + params.m = m; + params.n = n; + params.out_strides = out_strides_0; + params.out_extents = out_extents_0; + params.inner_strides = inner_strides_0; + params.inner_extents = inner_extents_0; + params.base.api = CSINN_API; + params.base.layout = CSINN_LAYOUT_NCHW; + params.base.run_mode = CSINN_RM_LAYER; + + test_mean_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_mean_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_mean_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/min_stride.c b/tests/validation_layer/min_stride.c new file mode 100644 index 00000000..9f905dda --- /dev/null +++ b/tests/validation_layer/min_stride.c @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of min(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct reduce_params params; + int in_size = 0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + input->dim_count = 4; + int axis = buffer[4]; + int m = buffer[5]; + int n = buffer[6]; + + for(int i = 0; i < input->dim_count; i++) { + if(i < axis){ + output->dim[i] = input->dim[i]; + } + else if(i > axis){ + output->dim[i-1] = input->dim[i]; + } + } + + + int32_t *out_strides_0 = (int32_t *)malloc(n * sizeof(int32_t)); + int32_t *out_extents_0 = (int32_t *)malloc(n * sizeof(int32_t)); + int32_t *inner_strides_0 = (int32_t *)malloc(m * sizeof(int32_t)); + int32_t *inner_extents_0 = (int32_t *)malloc(m * sizeof(int32_t)); + + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size / input->dim[axis]; + output->dim_count = 3; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + + input->data = (float *)(buffer + 7); + out_strides_0 = (int32_t *)(buffer + 7 + in_size); + out_extents_0 = (int32_t *)(buffer + 7 + in_size + n); + inner_strides_0 = (int32_t *)(buffer + 7 + in_size + 2 * n); + inner_extents_0 = (int32_t *)(buffer + 7 + in_size + 2 * n + m); + reference->data = (float *)(buffer + 7 + in_size + 2 * n + 2 * m); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.9; + + params.axis = &axis; + params.axis_count = 1; // must be 1 + params.m = m; + params.n = n; + params.out_strides = out_strides_0; + params.out_extents = out_extents_0; + params.inner_strides = inner_strides_0; + params.inner_extents = inner_extents_0; + params.base.api = CSINN_API; + params.base.layout = CSINN_LAYOUT_NCHW; + params.base.run_mode = CSINN_RM_LAYER; + + test_min_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_min_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_min_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/minimum.c b/tests/validation_layer/minimum.c new file mode 100644 index 00000000..ba7c096d --- /dev/null +++ b/tests/validation_layer/minimum.c @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of minimum(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input0->dim_count = buffer[0]; + output->dim_count = input0->dim_count; + for(int i = 0; i < input0->dim_count; i++) { + input0->dim[i] = buffer[i + 1]; + output->dim[i] = input0->dim[i]; + in_size *= input0->dim[i]; + } + + out_size = in_size; + + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 1 + input0->dim_count); + input1->data = (float *)(buffer + 1 + input0->dim_count + in_size); + reference->data = (float *)(buffer + 1 + input0->dim_count + 2*in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_minimum_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_minimum_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_minimum_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/mod.c b/tests/validation_layer/mod.c new file mode 100644 index 00000000..30892ed9 --- /dev/null +++ b/tests/validation_layer/mod.c @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of mod(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size0, in_size1, out_size; + + int *buffer = read_input_data_f32(argv[1]); + int flag = buffer[4]; + input0->dim[0] = buffer[0]; // batch + input0->dim[1] = buffer[1]; // height + input0->dim[2] = buffer[2]; // width + input0->dim[3] = buffer[3]; // channel + in_size0 = input0->dim[0] * input0->dim[1] * input0->dim[2] * input0->dim[3]; + input0->dim_count = 4; + input0->layout = CSINN_LAYOUT_NCHW; + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->is_const = 0; + input0->quant_channel = 1; + if(flag) { + input1->dim[0] = input0->dim[3]; + input1->dim_count = 1; + in_size1 = input1->dim[0]; + } else { + input1->dim[0] = input0->dim[0]; + input1->dim[1] = input0->dim[1]; + input1->dim[2] = input0->dim[2]; + input1->dim[3] = input0->dim[3]; + input1->dim_count = 4; + in_size1 = in_size0; + } + + output->dim[0] = input0->dim[0]; + output->dim[1] = input0->dim[1]; + output->dim[2] = input0->dim[2]; + output->dim[3] = input0->dim[3]; + output->dim_count = 4; + + out_size = in_size0; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 5); + input1->data = (float *)(buffer + 5 + in_size0); + reference->data = (float *)(buffer + 5 + in_size0 + in_size1); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.9; + + test_mod_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_mod_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_mod_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/mul.cpp b/tests/validation_layer/mul.cpp new file mode 100644 index 00000000..b6905801 --- /dev/null +++ b/tests/validation_layer/mul.cpp @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "csi_utils.h" +#include "math_snr.h" +#include "test_utils.h" +#include "testutil.h" + +int main(int argc, char **argv) +{ + init_testsuite("Testing function of mul(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size0, in_size1, out_size; + + int *buffer = read_input_data_f32(argv[1]); + int flag = buffer[4]; + input0->dim[0] = buffer[0]; // batch + input0->dim[1] = buffer[1]; // height + input0->dim[2] = buffer[2]; // width + input0->dim[3] = buffer[3]; // channel + in_size0 = input0->dim[0] * input0->dim[1] * input0->dim[2] * input0->dim[3]; + input0->dim_count = 4; + input0->layout = CSINN_LAYOUT_NCHW; + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->is_const = 0; + input0->quant_channel = 1; + if (flag) { + input1->dim[0] = input0->dim[3]; + input1->dim_count = 1; + in_size1 = input1->dim[0]; + } else { + input1->dim[0] = input0->dim[0]; + input1->dim[1] = input0->dim[1]; + input1->dim[2] = input0->dim[2]; + input1->dim[3] = input0->dim[3]; + input1->dim_count = 4; + in_size1 = in_size0; + } + + output->dim[0] = input0->dim[0]; + output->dim[1] = input0->dim[1]; + output->dim[2] = input0->dim[2]; + output->dim[3] = input0->dim[3]; + output->dim_count = 4; + + out_size = in_size0; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 5); + input1->data = (float *)(buffer + 5 + in_size0); + reference->data = (float *)(buffer + 5 + in_size0 + in_size1); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.9; + +#if THEAD_RVV + test_binary_op(input0, input1, output, ¶ms, CSINN_QUANT_FLOAT32, csi_mul_init, + csi_nn_rvv_mul_fp32, &difference); + test_binary_op(input0, input1, output, ¶ms, CSINN_QUANT_FLOAT16, csi_mul_init, + csi_nn_rvv_mul_fp16, &difference); + test_binary_op(input0, input1, output, ¶ms, CSINN_QUANT_INT8_SYM, csi_mul_init, + csi_nn_rvv_mul_int8, &difference); +#else + test_binary_op(input0, input1, output, ¶ms, CSINN_QUANT_FLOAT32, csi_mul_init, csi_mul, + &difference); + test_binary_op(input0, input1, output, ¶ms, CSINN_QUANT_UINT8_ASYM, csi_mul_init, csi_mul, + &difference); + test_binary_op(input0, input1, output, ¶ms, CSINN_QUANT_INT8_SYM, csi_mul_init, csi_mul, + &difference); +#endif + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/negative.c b/tests/validation_layer/negative.c new file mode 100644 index 00000000..88c1ec26 --- /dev/null +++ b/tests/validation_layer/negative.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of negative(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + output->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + output->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 1 + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_negative_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_negative_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_negative_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/non_max_suppression.c b/tests/validation_layer/non_max_suppression.c new file mode 100644 index 00000000..ecb87d32 --- /dev/null +++ b/tests/validation_layer/non_max_suppression.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of non_max_suppression(layer).\n"); + + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct non_max_suppression_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input0->dim_count = 2; + input1->dim_count = 1; + input0->dim[0] = buffer[0]; + input0->dim[1] = 4; + input1->dim[0] = buffer[0]; + + params.max_output_size = buffer[1]; + params.iou_threshold = *((float *)buffer + 3); + + output->dim_count = 2; + output->dim[0] = params.max_output_size; + output->dim[1] = 4; + + in_size = input0->dim[0] * 4; + out_size = buffer[2]; + + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 4); + input1->data = (float *)(buffer + 4 + in_size); + reference->data = (int *)(buffer + 4 + in_size + in_size / 4); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_non_max_suppression_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_non_max_suppression_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_non_max_suppression_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/not.c b/tests/validation_layer/not.c new file mode 100644 index 00000000..b86fccad --- /dev/null +++ b/tests/validation_layer/not.c @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +void op_test_run(struct csi_tensor *input, struct csi_tensor *output, + struct siso_params *params, float *output_data, float diff) +{ + + if (csi_not_init(input, output, params) == CSINN_TRUE) { + csi_not(input, output, params); + } + struct csi_tensor *foutput = csi_ref_tensor_transform_f32(output); + result_verify_f32(output_data, foutput->data, input->data, diff, csi_tensor_size(output), + false); + + csi_ref_tensor_transform_free_f32(foutput); +} + +void test_not(struct csi_tensor *input, struct csi_tensor *output, + struct siso_params *params, float &difference); + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of not(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + output->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + output->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 1 + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_not(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/not_equal.c b/tests/validation_layer/not_equal.c new file mode 100644 index 00000000..a5902279 --- /dev/null +++ b/tests/validation_layer/not_equal.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of not_equal(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + int flag = buffer[4]; + input1->dim[0] = input0->dim[0] = buffer[0]; // batch + input1->dim[1] = input0->dim[1] = buffer[1]; // height + input1->dim[2] = input0->dim[2] = buffer[2]; // width + input1->dim[3] = input0->dim[3] = buffer[3]; // channel + + output->dim[0] = input0->dim[0]; + output->dim[1] = input0->dim[1]; + output->dim[2] = input0->dim[2]; + output->dim[3] = input0->dim[3]; + + in_size = input0->dim[0] * input0->dim[1] * input0->dim[2] * input0->dim[3]; + out_size = in_size; + input0->dim_count = 4; + input1->dim_count = 4; + output->dim_count = 4; + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 4); + input1->data = (float *)(buffer + 4 + in_size); + reference->data = (float *)(buffer + 4 + 2 * in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_not_equal_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_not_equal_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_not_equal_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/pad.cpp b/tests/validation_layer/pad.cpp new file mode 100644 index 00000000..841b639d --- /dev/null +++ b/tests/validation_layer/pad.cpp @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "csi_utils.h" +#include "math_snr.h" +#include "test_utils.h" +#include "testutil.h" + +int main(int argc, char **argv) +{ + init_testsuite("Testing function of pad(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct pad_params params; + int in_size = 0, out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + input->dim_count = 4; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2] + buffer[6] + buffer[7]; + output->dim[3] = input->dim[3] + buffer[4] + buffer[5]; + output->dim_count = 4; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.layout = CSINN_LAYOUT_NCHW; + params.base.run_mode = CSINN_RM_LAYER; + params.pad_mode = CSINN_PAD_CONSTANT; + params.pad_value = 0.0f; + params.pad_num = input->dim_count; + + int32_t pad_left = buffer[4]; + int32_t pad_right = buffer[5]; + int32_t pad_top = buffer[6]; + int32_t pad_down = buffer[7]; + + int32_t pad_before[4] = {0, 0, pad_top, pad_left}; + int32_t pad_after[4] = {0, 0, pad_down, pad_right}; + + params.pad_before = pad_before; + params.pad_after = pad_after; + + input->data = (float *)(buffer + 8); + reference->data = (float *)(buffer + 8 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + +#if THEAD_RVV + return 0 +#else + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT32, csi_pad_init, csi_pad, &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_UINT8_ASYM, csi_pad_init, csi_pad, + &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_INT8_SYM, csi_pad_init, csi_pad, &difference); +#endif + + return done_testing(); +} diff --git a/tests/validation_layer/power.c b/tests/validation_layer/power.c new file mode 100644 index 00000000..341a45f6 --- /dev/null +++ b/tests/validation_layer/power.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of power(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + int flag = buffer[4]; + input1->dim[0] = input0->dim[0] = buffer[0]; // batch + input1->dim[1] = input0->dim[1] = buffer[1]; // height + input1->dim[2] = input0->dim[2] = buffer[2]; // width + input1->dim[3] = input0->dim[3] = buffer[3]; // channel + + output->dim[0] = input0->dim[0]; + output->dim[1] = input0->dim[1]; + output->dim[2] = input0->dim[2]; + output->dim[3] = input0->dim[3]; + + in_size = input0->dim[0] * input0->dim[1] * input0->dim[2] * input0->dim[3]; + out_size = in_size; + input0->dim_count = 4; + input1->dim_count = 4; + output->dim_count = 4; + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 4); + input1->data = (float *)(buffer + 4 + in_size); + reference->data = (float *)(buffer + 4 + 2 * in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_power_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_power_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_power_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/prelu.c b/tests/validation_layer/prelu.c new file mode 100644 index 00000000..0bfd650f --- /dev/null +++ b/tests/validation_layer/prelu.c @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of prelu(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *alpha_data = csi_alloc_tensor(NULL); + struct prelu_params params; + int in_size = 1; + int out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + output->dim[0] = input->dim[0] = buffer[0]; // batch + output->dim[1] = input->dim[1] = buffer[1]; // channel + output->dim[2] = input->dim[2] = buffer[2]; // height + output->dim[3] = input->dim[3] = buffer[3]; // width + alpha_data->dim[0] = buffer[1]; + input->dim_count = 4; + alpha_data->dim_count = 1; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + alpha_data->dtype = CSINN_DTYPE_FLOAT32; + alpha_data->layout = CSINN_LAYOUT_O; + alpha_data->is_const = 0; + alpha_data->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.layout = CSINN_LAYOUT_NCHW; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 4); + alpha_data->data = (float *)(buffer + 4 + in_size); + reference->data = (float *)(buffer + 4 + in_size + input->dim[1]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_prelu_CSINN_QUANT_FLOAT32(input, alpha_data, output, ¶ms, &difference); + test_prelu_CSINN_QUANT_UINT8_ASYM(input, alpha_data, output, ¶ms, &difference); + test_prelu_CSINN_QUANT_INT8_SYM(input, alpha_data, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/prod_stride.c b/tests/validation_layer/prod_stride.c new file mode 100644 index 00000000..35a50917 --- /dev/null +++ b/tests/validation_layer/prod_stride.c @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of prod(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct reduce_params params; + int in_size = 0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + input->dim_count = 4; + int axis = buffer[4]; + int m = buffer[5]; + int n = buffer[6]; + + for(int i = 0; i < input->dim_count; i++) { + if(i < axis){ + output->dim[i] = input->dim[i]; + } + else if(i > axis){ + output->dim[i-1] = input->dim[i]; + } + } + + + int32_t *out_strides_0 = (int32_t *)malloc(n * sizeof(int32_t)); + int32_t *out_extents_0 = (int32_t *)malloc(n * sizeof(int32_t)); + int32_t *inner_strides_0 = (int32_t *)malloc(m * sizeof(int32_t)); + int32_t *inner_extents_0 = (int32_t *)malloc(m * sizeof(int32_t)); + + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size / input->dim[axis]; + output->dim_count = 3; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + + input->data = (float *)(buffer + 7); + out_strides_0 = (int32_t *)(buffer + 7 + in_size); + out_extents_0 = (int32_t *)(buffer + 7 + in_size + n); + inner_strides_0 = (int32_t *)(buffer + 7 + in_size + 2 * n); + inner_extents_0 = (int32_t *)(buffer + 7 + in_size + 2 * n + m); + reference->data = (float *)(buffer + 7 + in_size + 2 * n + 2 * m); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.9; + + params.axis = &axis; + params.axis_count = 1; // must be 1 + params.m = m; + params.n = n; + params.out_strides = out_strides_0; + params.out_extents = out_extents_0; + params.inner_strides = inner_strides_0; + params.inner_extents = inner_extents_0; + params.base.api = CSINN_API; + params.base.layout = CSINN_LAYOUT_NCHW; + params.base.run_mode = CSINN_RM_LAYER; + + test_prod_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_prod_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_prod_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/psroipooling.c b/tests/validation_layer/psroipooling.c new file mode 100644 index 00000000..f89010c9 --- /dev/null +++ b/tests/validation_layer/psroipooling.c @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of psropooling(layer).\n"); + + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *spatial_scale = csi_alloc_tensor(NULL); + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct psroipooling_params params; + int in0_size = 0, in1_size = 0, out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + input0->dim[0] = buffer[0]; // batch + input0->dim[1] = buffer[1]; // channel + input0->dim[2] = buffer[2]; // height + input0->dim[3] = buffer[3]; // width + input0->dim_count = 4; + in0_size = input0->dim[0] * input0->dim[1] * input0->dim[2] * input0->dim[3]; + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input0->name = "input0"; + input0->data = (float *)(buffer + 10); + + + + input1->dim[0] = buffer[6]; + input1->dim[1] = 5; + input1->dim_count = 2; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + in1_size = input1->dim[0] * input1->dim[1]; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->name = "input1"; + input1->data = (float *)(buffer + 10 + in0_size); + + + output->dim[0] = input1->dim[0]; // num_rois + output->dim[1] = buffer[7]; // output_dim + output->dim[2] = buffer[4]; + output->dim[3] = buffer[5]; + output->dim_count = 4; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + reference->data = (float *)(buffer + 10 + in0_size + in1_size); + output->data = reference->data; + output->name = "output"; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + params.spatial_scale = *((float *)buffer + 9); + params.output_dim = buffer[7]; + params.group_size = buffer[8]; + params.base.api = CSINN_API; + params.base.name = "params"; + params.base.layout = CSINN_LAYOUT_NCHW; + params.base.run_mode = CSINN_RM_LAYER; + + + test_psroipooling_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_psroipooling_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_psroipooling_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/reduce_logsumexp.c b/tests/validation_layer/reduce_logsumexp.c new file mode 100644 index 00000000..3af665ad --- /dev/null +++ b/tests/validation_layer/reduce_logsumexp.c @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of reduce_logsumexp(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct reduce_params params; + int in_size0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // height + input->dim[2] = buffer[2]; // width + input->dim[3] = buffer[3]; // channel + + params.axis_count = 1; + params.axis = (int *)malloc(sizeof(int) * params.axis_count); + params.axis[0] = buffer[4]; + + in_size0 = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + input->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 5); + reference->data = (float *)(buffer + 5 + in_size0 ); + if(params.axis[0]==-1) { + out_size = 1; + output->dim_count = 1; + output->dim[0] = 1; + } else { + out_size = in_size0/input->dim[params.axis[0]]; + output->dim_count = 4; // keep_dim = 1 + for(int i = 0; i < output->dim_count; i++) { + if(params.axis[0] == i) { + output->dim[i] = 1; + } else { + output->dim[i] = input->dim[i]; + } + } + } + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_reduce_logsumexp_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_reduce_logsumexp_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_reduce_logsumexp_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/reduce_max.c b/tests/validation_layer/reduce_max.c new file mode 100644 index 00000000..2dbd4b5b --- /dev/null +++ b/tests/validation_layer/reduce_max.c @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of reduce_max(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct reduce_params params; + int in_size0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + reference->dim[0] = input->dim[0] = buffer[0]; // batch + reference->dim[1] = input->dim[1] = buffer[1]; // height + reference->dim[2] = input->dim[2] = buffer[2]; // width + reference->dim[3] = input->dim[3] = buffer[3]; // channel + + params.axis_count = 1; + params.axis = (int *)malloc(sizeof(int) * params.axis_count); + params.axis[0] = buffer[4]; + + in_size0 = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + input->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 5); + reference->data = (float *)(buffer + 5 + in_size0 ); + if(params.axis[0]==-1) { + out_size = 1; + output->dim_count = 1; + output->dim[0] = 1; + } else { + out_size = in_size0/input->dim[params.axis[0]]; + output->dim_count = 4; // keep_dim = 1 + for(int i = 0; i < output->dim_count; i++) { + if(params.axis[0] == i) { + output->dim[i] = 1; + } else { + output->dim[i] = input->dim[i]; + } + } + } + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_reduce_max_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_reduce_max_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_reduce_max_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/reduce_mean.c b/tests/validation_layer/reduce_mean.c new file mode 100644 index 00000000..b2b57c40 --- /dev/null +++ b/tests/validation_layer/reduce_mean.c @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of reduce_mean(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct reduce_params params; + int in_size0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + reference->dim[0] = input->dim[0] = buffer[0]; // batch + reference->dim[1] = input->dim[1] = buffer[1]; // height + reference->dim[2] = input->dim[2] = buffer[2]; // width + reference->dim[3] = input->dim[3] = buffer[3]; // channel + + params.axis_count = 1; + params.axis = (int *)malloc(sizeof(int) * params.axis_count); + params.axis[0] = buffer[4]; + + in_size0 = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + input->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 5); + reference->data = (float *)(buffer + 5 + in_size0 ); + if(params.axis[0]==-1) { + out_size = 1; + output->dim_count = 1; + output->dim[0] = 1; + } else { + out_size = in_size0/input->dim[params.axis[0]]; + output->dim_count = 4; // keep_dim = 1 + for(int i = 0; i < output->dim_count; i++) { + if(params.axis[0] == i) { + output->dim[i] = 1; + } else { + output->dim[i] = input->dim[i]; + } + } + } + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_reduce_mean_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_reduce_mean_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_reduce_mean_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/reduce_min.c b/tests/validation_layer/reduce_min.c new file mode 100644 index 00000000..09509701 --- /dev/null +++ b/tests/validation_layer/reduce_min.c @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of reduce_min(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct reduce_params params; + int in_size0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + reference->dim[0] = input->dim[0] = buffer[0]; // batch + reference->dim[1] = input->dim[1] = buffer[1]; // height + reference->dim[2] = input->dim[2] = buffer[2]; // width + reference->dim[3] = input->dim[3] = buffer[3]; // channel + + params.axis_count = 1; + params.axis = (int *)malloc(sizeof(int) * params.axis_count); + params.axis[0] = buffer[4]; + + in_size0 = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + input->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 5); + reference->data = (float *)(buffer + 5 + in_size0 ); + if(params.axis[0]==-1) { + out_size = 1; + output->dim_count = 1; + output->dim[0] = 1; + } else { + out_size = in_size0/input->dim[params.axis[0]]; + output->dim_count = 4; // keep_dim = 1 + for(int i = 0; i < output->dim_count; i++) { + if(params.axis[0] == i) { + output->dim[i] = 1; + } else { + output->dim[i] = input->dim[i]; + } + } + } + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_reduce_min_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_reduce_min_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_reduce_min_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/reduce_prod.c b/tests/validation_layer/reduce_prod.c new file mode 100644 index 00000000..543f8e4e --- /dev/null +++ b/tests/validation_layer/reduce_prod.c @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of reduce_prod(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct reduce_params params; + int in_size0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + reference->dim[0] = input->dim[0] = buffer[0]; + reference->dim[1] = input->dim[1] = buffer[1]; + reference->dim[2] = input->dim[2] = buffer[2]; + reference->dim[3] = input->dim[3] = buffer[3]; + + params.axis_count = 1; + params.axis = (int *)malloc(sizeof(int) * params.axis_count); + params.axis[0] = buffer[4]; + + in_size0 = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + input->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 5); + reference->data = (float *)(buffer + 5 + in_size0); + if(params.axis[0]==-1) { + out_size = 1; + output->dim_count = 1; + output->dim[0] = 1; + } else { + out_size = in_size0/input->dim[params.axis[0]]; + output->dim_count = 4; // keep_dim = 1 + for(int i = 0; i < output->dim_count; i++) { + if(params.axis[0] == i) { + output->dim[i] = 1; + } else { + output->dim[i] = input->dim[i]; + } + } + } + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_reduce_prod_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_reduce_prod_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_reduce_prod_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/reduce_sum.c b/tests/validation_layer/reduce_sum.c new file mode 100644 index 00000000..a9c59978 --- /dev/null +++ b/tests/validation_layer/reduce_sum.c @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of reduce_sum(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct reduce_params params; + int in_size0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + reference->dim[0] = input->dim[0] = buffer[0]; // batch + reference->dim[1] = input->dim[1] = buffer[1]; // height + reference->dim[2] = input->dim[2] = buffer[2]; // width + reference->dim[3] = input->dim[3] = buffer[3]; // channel + + params.axis_count = 1; + params.axis = (int *)malloc(sizeof(int) * params.axis_count); + params.axis[0] = buffer[4]; + + in_size0 = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + input->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 5); + reference->data = (float *)(buffer + 5 + in_size0 ); + if(params.axis[0]==-1) { + out_size = 1; + output->dim_count = 1; + output->dim[0] = 1; + } else { + out_size = in_size0/input->dim[params.axis[0]]; + output->dim_count = 4; // keep_dim = 1 + for(int i = 0; i < output->dim_count; i++) { + if(params.axis[0] == i) { + output->dim[i] = 1; + } else { + output->dim[i] = input->dim[i]; + } + } + } + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_reduce_sum_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_reduce_sum_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_reduce_sum_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/relu.cpp b/tests/validation_layer/relu.cpp new file mode 100644 index 00000000..7aabad6f --- /dev/null +++ b/tests/validation_layer/relu.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "csi_utils.h" +#include "math_snr.h" +#include "test_utils.h" +#include "testutil.h" + +int main(int argc, char **argv) +{ + init_testsuite("Testing function of relu(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct relu_params params; + int in_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 4); + reference->data = (float *)(buffer + 4 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + +#if THEAD_RVV + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT32, csi_relu_init, csi_nn_rvv_relu_fp32, + &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT16, csi_relu_init, csi_nn_rvv_relu_fp16, + &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_INT8_ASYM, csi_relu_init, csi_nn_rvv_relu_int8, + &difference); +#else + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT32, csi_relu_init, csi_relu, + &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_UINT8_ASYM, csi_relu_init, csi_relu, + &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_INT8_SYM, csi_relu_init, csi_relu, + &difference); +#endif + + return done_testing(); +} diff --git a/tests/validation_layer/relu1.c b/tests/validation_layer/relu1.c new file mode 100644 index 00000000..b0417359 --- /dev/null +++ b/tests/validation_layer/relu1.c @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of relu1(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct relu_params params; + int in_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 4); + reference->data = (float *)(buffer + 4 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_relu1_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_relu1_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_relu1_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/relu6.c b/tests/validation_layer/relu6.c new file mode 100644 index 00000000..4960d6dd --- /dev/null +++ b/tests/validation_layer/relu6.c @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of relu6(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct relu_params params; + int in_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 4); + reference->data = (float *)(buffer + 4 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_relu6_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_relu6_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_relu6_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/relun.c b/tests/validation_layer/relun.c new file mode 100644 index 00000000..9b94be51 --- /dev/null +++ b/tests/validation_layer/relun.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of relun(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct relu_params params; + int in_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + params.n = buffer[4]; + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 5); + reference->data = (float *)(buffer + 5 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_relun_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_relun_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_relun_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/reshape.c b/tests/validation_layer/reshape.c new file mode 100644 index 00000000..50b6f9f8 --- /dev/null +++ b/tests/validation_layer/reshape.c @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of reshape(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct reshape_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + int reshape_count = buffer[4]; + int *reshape = (int *)malloc(reshape_count * sizeof(int)); + for(int i = 0; i < reshape_count; i++) { + reshape[i] = buffer[5 + i]; + } + + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + input->dim_count = 4; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + input->name = "input"; + float *input_data = (float *)(buffer + 5 + reshape_count); + input->data = input_data; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dim_count = reshape_count; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + out_size = in_size; + for(int i = 0; i < output->dim_count; i++) { + output->dim[i] = reshape[i]; + // out_size *= output->dim[i]; + } + + reference->data = (float *)(buffer + 5 + reshape_count + in_size); + output->data = reference->data; + output->name = "output"; + output->dtype = CSINN_DTYPE_FLOAT32; + + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + params.base.layout = CSINN_LAYOUT_NCHW; + params.shape = reshape; + params.shape_num = output->dim_count; + + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_reshape_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_reshape_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_reshape_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/resize_bilinear.c b/tests/validation_layer/resize_bilinear.c new file mode 100644 index 00000000..86e456d6 --- /dev/null +++ b/tests/validation_layer/resize_bilinear.c @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of resize bilinear f32.\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct resize_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // height + input->dim[2] = buffer[2]; // width + input->dim[3] = buffer[3]; // channel + output->dim[0] = buffer[0]; // batch + output->dim[1] = buffer[4]; // height + output->dim[2] = buffer[5]; // width + output->dim[3] = buffer[3]; // channel + input->dim_count = 4; + output->dim_count = 4; + params.resize_mode = CSINN_RESIZE_BILINEAR; + params.align_corners = buffer[6]; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + params.base.layout = CSINN_LAYOUT_NHWC; + + input->data = (float *)(buffer + 7); + reference->data = (float *)(buffer + 7 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_resize_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_resize_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_resize_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/resize_nearestneighbor.c b/tests/validation_layer/resize_nearestneighbor.c new file mode 100644 index 00000000..8758eba7 --- /dev/null +++ b/tests/validation_layer/resize_nearestneighbor.c @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of resize nearestneighbor(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct resize_params params; + int in_size, out_size; + int zp, quantized_multiplier, shift; + float scale, min_value, max_value; + float max_error = 0.0f; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + + output->dim[0] = buffer[0]; // batch + output->dim[1] = buffer[1]; // channel + output->dim[2] = buffer[4]; // height + output->dim[3] = buffer[5]; // width + input->dim_count = 4; + output->dim_count = 4; + params.resize_mode = CSINN_RESIZE_NEAREST_NEIGHBOR; + params.align_corners = buffer[6]; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.layout = CSINN_LAYOUT_NCHW; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 7); + reference->data = (float *)(buffer + 7 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_resize_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_resize_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_resize_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/reverse.c b/tests/validation_layer/reverse.c new file mode 100644 index 00000000..cb6c5e4d --- /dev/null +++ b/tests/validation_layer/reverse.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of reverse(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct reverse_params params; + int in_size = 0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + params.axis = buffer[4]; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size; + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 5); + reference->data = (float *)(buffer + 5 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_reverse_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_reverse_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_reverse_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/roialign.c b/tests/validation_layer/roialign.c new file mode 100644 index 00000000..ddab2691 --- /dev/null +++ b/tests/validation_layer/roialign.c @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of roialign(layer).\n"); + + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct roi_align_params params; + int in0_size = 0, in1_size = 0, out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + input0->dim[0] = buffer[0]; // batch + input0->dim[1] = buffer[1]; // channel + input0->dim[2] = buffer[2]; // height + input0->dim[3] = buffer[3]; // width + input0->dim_count = 4; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + in0_size = input0->dim[0] * input0->dim[1] * input0->dim[2] * input0->dim[3]; + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->name = "input0"; + input0->data = (float *)(buffer + 11); + + + input1->dim[0] = buffer[6]; + input1->dim[1] = 5; + input1->dim_count = 2; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + in1_size = input1->dim[0] * input1->dim[1]; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->name = "input1"; + input1->data = (float *)(buffer + 11 + in0_size); + + + output->dim[0] = input1->dim[0]; // num_rois + output->dim[1] = input0->dim[1]; // channel + output->dim[2] = buffer[4]; + output->dim[3] = buffer[5]; + output->dim_count = 4; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + reference->data = (float *)(buffer + 11 + in0_size + in1_size); + output->data = reference->data; + output->name = "output"; + output->dtype = CSINN_DTYPE_FLOAT32; + float difference = argc > 2 ? atof(argv[2]) : 0.9; + + params.spatial_scale = *((float *)buffer + 9); + params.sample_ratio = *((int32_t *)buffer + 10); + params.pooled_size_h = buffer[7]; + params.pooled_size_w = buffer[8]; + params.base.api = CSINN_API; + params.base.name = "params"; + params.base.layout = CSINN_LAYOUT_NCHW; + params.base.run_mode = CSINN_RM_LAYER; + + test_roi_align_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_roi_align_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_roi_align_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/roipooling.c b/tests/validation_layer/roipooling.c new file mode 100644 index 00000000..dd41e1a8 --- /dev/null +++ b/tests/validation_layer/roipooling.c @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of roipooling(layer).\n"); + + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct roi_pool_params params; + int in0_size = 0, in1_size = 0, out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + input0->dim[0] = buffer[0]; // batch + input0->dim[1] = buffer[1]; // channel + input0->dim[2] = buffer[2]; // height + input0->dim[3] = buffer[3]; // width + input0->dim_count = 4; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + in0_size = input0->dim[0] * input0->dim[1] * input0->dim[2] * input0->dim[3]; + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->name = "input0"; + input0->data = (float *)(buffer + 10); + + + input1->dim[0] = buffer[6]; + input1->dim[1] = 5; + input1->dim_count = 2; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + in1_size = input1->dim[0] * input1->dim[1]; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->name = "input1"; + input1->data = (float *)(buffer + 10 + in0_size); + + + output->dim[0] = input1->dim[0]; // num_rois + output->dim[1] = input0->dim[1]; // channel + output->dim[2] = buffer[4]; + output->dim[3] = buffer[5]; + output->dim_count = 4; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + reference->data = (float *)(buffer + 10 + in0_size + in1_size); + output->data = reference->data; + output->name = "output"; + output->dtype = CSINN_DTYPE_FLOAT32; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + params.spatial_scale = *((float *)buffer + 9); + params.pooled_size_h = buffer[7]; + params.pooled_size_w = buffer[8]; + params.base.api = CSINN_API; + params.base.name = "params"; + params.base.layout = CSINN_LAYOUT_NCHW; + params.base.run_mode = CSINN_RM_LAYER; + + test_roipool_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_roipool_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_roipool_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/round.c b/tests/validation_layer/round.c new file mode 100644 index 00000000..3d189f59 --- /dev/null +++ b/tests/validation_layer/round.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of round(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // height + input->dim[2] = buffer[2]; // width + input->dim[3] = buffer[3]; // channel + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 4); + reference->data = (float *)(buffer + 4 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_round_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_round_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_round_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/rsqrt.c b/tests/validation_layer/rsqrt.c new file mode 100644 index 00000000..40558fc7 --- /dev/null +++ b/tests/validation_layer/rsqrt.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of rsqrt(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // height + input->dim[2] = buffer[2]; // width + input->dim[3] = buffer[3]; // channel + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 4); + reference->data = (float *)(buffer + 4 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_rsqrt_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_rsqrt_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_rsqrt_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/segment_max.c b/tests/validation_layer/segment_max.c new file mode 100644 index 00000000..505f0566 --- /dev/null +++ b/tests/validation_layer/segment_max.c @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of segment max(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *segment = csi_alloc_tensor(NULL); + struct segment_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + output->dim[0] = buffer[4]; + output->dim[1] = buffer[1]; + output->dim[2] = buffer[2]; + output->dim[3] = buffer[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.num_segments = buffer[4]; + params.unsorted = CSINN_FALSE; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + + input->data = (float *)(buffer + 5); + segment->data = (int *)(buffer + 5 + in_size); + reference->data = (float *)(buffer + 5 + in_size + buffer[0]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_segment_max_CSINN_QUANT_FLOAT32(input, segment, output, ¶ms, &difference); + test_segment_max_CSINN_QUANT_UINT8_ASYM(input, segment, output, ¶ms, &difference); + test_segment_max_CSINN_QUANT_INT8_SYM(input, segment, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/segment_mean.c b/tests/validation_layer/segment_mean.c new file mode 100644 index 00000000..8ab5e699 --- /dev/null +++ b/tests/validation_layer/segment_mean.c @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of segment mean(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *segment = csi_alloc_tensor(NULL); + struct segment_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + output->dim[0] = buffer[4]; + output->dim[1] = buffer[1]; + output->dim[2] = buffer[2]; + output->dim[3] = buffer[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + params.num_segments = buffer[4]; + params.unsorted = CSINN_FALSE; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + params.base.layout = CSINN_LAYOUT_NCHW; + + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + + input->data = (float *)(buffer + 5); + segment->data = (int *)(buffer + 5 + in_size); + reference->data = (float *)(buffer + 5 + in_size + buffer[0]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + + test_segment_mean_CSINN_QUANT_FLOAT32(input, segment, output, ¶ms, &difference); + test_segment_mean_CSINN_QUANT_UINT8_ASYM(input, segment, output, ¶ms, &difference); + test_segment_mean_CSINN_QUANT_INT8_SYM(input, segment, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/segment_min.c b/tests/validation_layer/segment_min.c new file mode 100644 index 00000000..80434685 --- /dev/null +++ b/tests/validation_layer/segment_min.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of segment min(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *segment = csi_alloc_tensor(NULL); + struct segment_params params; + int in_size, out_size; + + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + output->dim[0] = buffer[4]; + output->dim[1] = buffer[1]; + output->dim[2] = buffer[2]; + output->dim[3] = buffer[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + params.num_segments = buffer[4]; + params.unsorted = CSINN_FALSE; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + + + input->data = (float *)(buffer + 5); + segment->data = (int *)(buffer + 5 + in_size); + reference->data = (float *)(buffer + 5 + in_size + buffer[0]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_segment_min_CSINN_QUANT_FLOAT32(input, segment, output, ¶ms, &difference); + test_segment_min_CSINN_QUANT_UINT8_ASYM(input, segment, output, ¶ms, &difference); + test_segment_min_CSINN_QUANT_INT8_SYM(input, segment, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/segment_prod.c b/tests/validation_layer/segment_prod.c new file mode 100644 index 00000000..983dbbf3 --- /dev/null +++ b/tests/validation_layer/segment_prod.c @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of segment prod(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *segment = csi_alloc_tensor(NULL); + struct segment_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + output->dim[0] = buffer[4]; + output->dim[1] = buffer[1]; + output->dim[2] = buffer[2]; + output->dim[3] = buffer[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.num_segments = buffer[4]; + params.unsorted = CSINN_FALSE; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + + input->data = (float *)(buffer + 5); + segment->data = (int *)(buffer + 5 + in_size); + reference->data = (float *)(buffer + 5 + in_size + buffer[0]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_segment_prod_CSINN_QUANT_FLOAT32(input, segment, output, ¶ms, &difference); + test_segment_prod_CSINN_QUANT_UINT8_ASYM(input, segment, output, ¶ms, &difference); + test_segment_prod_CSINN_QUANT_INT8_SYM(input, segment, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/segment_sum.c b/tests/validation_layer/segment_sum.c new file mode 100644 index 00000000..8300229d --- /dev/null +++ b/tests/validation_layer/segment_sum.c @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of segment sum(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *segment = csi_alloc_tensor(NULL); + struct segment_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + output->dim[0] = buffer[4]; + output->dim[1] = buffer[1]; + output->dim[2] = buffer[2]; + output->dim[3] = buffer[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.num_segments = buffer[4]; + params.unsorted = CSINN_FALSE; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + + input->data = (float *)(buffer + 5); + segment->data = (int *)(buffer + 5 + in_size); + reference->data = (float *)(buffer + 5 + in_size + buffer[0]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_segment_sum_CSINN_QUANT_FLOAT32(input, segment, output, ¶ms, &difference); + test_segment_sum_CSINN_QUANT_UINT8_ASYM(input, segment, output, ¶ms, &difference); + test_segment_sum_CSINN_QUANT_INT8_SYM(input, segment, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/select.c b/tests/validation_layer/select.c new file mode 100644 index 00000000..d83afe30 --- /dev/null +++ b/tests/validation_layer/select.c @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of select(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *condition = csi_alloc_tensor(NULL); + struct select_params params; + int in_size; + + int *buffer = read_input_data_f32(argv[1]); + int flag = buffer[4]; + input0->dim[0] = input1->dim[0] = buffer[0]; + input0->dim[1] = input1->dim[1] = buffer[1]; + input0->dim[2] = input1->dim[2] = buffer[2]; + input0->dim[3] = input1->dim[3] = buffer[3]; + + condition->dim[0] = buffer[0]; + condition->dim[1] = buffer[1]; + condition->dim[2] = buffer[2]; + condition->dim[3] = buffer[3]; + + output->dim[0] = input0->dim[0]; + output->dim[1] = input0->dim[1]; + output->dim[2] = input0->dim[2]; + output->dim[3] = input0->dim[3]; + + in_size = input0->dim[0] * input0->dim[1] * input0->dim[2] * input0->dim[3]; + input0->dim_count = 4; + input1->dim_count = 4; + condition->dim_count = 4; + output->dim_count = 4; + input0->dtype = CSINN_DTYPE_FLOAT32; + input1->dtype = CSINN_DTYPE_FLOAT32; + condition->dtype = CSINN_DTYPE_FLOAT32; + output->dtype = CSINN_DTYPE_FLOAT32; + + input0->layout = CSINN_LAYOUT_NCHW; + input1->layout = CSINN_LAYOUT_NCHW; + condition->layout = CSINN_LAYOUT_NCHW; + output->layout = CSINN_LAYOUT_NCHW; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 4); + input1->data = (float *)(buffer + 4 + in_size); + condition->data = (float *)(buffer + 4 + 2 * in_size); + reference->data = (float *)(buffer + 4 + 3 * in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_select_CSINN_QUANT_FLOAT32(condition, input0, input1, output, ¶ms, &difference); + test_select_CSINN_QUANT_UINT8_ASYM(condition, input0, input1, output, ¶ms, &difference); + test_select_CSINN_QUANT_INT8_SYM(condition, input0, input1, output, ¶ms, &difference); + + return done_testing(); +} + + diff --git a/tests/validation_layer/shuffle_channel.c b/tests/validation_layer/shuffle_channel.c new file mode 100644 index 00000000..7c98bab9 --- /dev/null +++ b/tests/validation_layer/shuffle_channel.c @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of shuffle_channel(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct shuffle_channel_params params; + int in_size = 1, out_size = 1; + + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + params.group = buffer[4]; + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + input->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + params.base.layout = CSINN_LAYOUT_NCHW; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + output->dim_count = 4; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; //out_size = in_size; + + input->data = (float *)(buffer + 5); + reference->data = (float *)(buffer + 5 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + + test_shuffle_channel_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_shuffle_channel_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_shuffle_channel_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/sigmoid.cpp b/tests/validation_layer/sigmoid.cpp new file mode 100644 index 00000000..0b87f0f3 --- /dev/null +++ b/tests/validation_layer/sigmoid.cpp @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "csi_utils.h" +#include "math_snr.h" +#include "test_utils.h" +#include "testutil.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of sigmoid(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct sigmoid_params params; + int in_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 4); + reference->data = (float *)(buffer + 4 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT32, csi_sigmoid_init, + csi_sigmoid, &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT16, csi_sigmoid_init, + csi_sigmoid, &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_INT8_SYM, csi_sigmoid_init, + csi_sigmoid, &difference); + + + return done_testing(); +} diff --git a/tests/validation_layer/sign.c b/tests/validation_layer/sign.c new file mode 100644 index 00000000..778a1670 --- /dev/null +++ b/tests/validation_layer/sign.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of sign(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + output->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + output->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 1 + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_sign_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_sign_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_sign_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/sin.c b/tests/validation_layer/sin.c new file mode 100644 index 00000000..ce2dc168 --- /dev/null +++ b/tests/validation_layer/sin.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of sin(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + output->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + output->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 1 + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_sin_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_sin_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_sin_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/sinh.c b/tests/validation_layer/sinh.c new file mode 100644 index 00000000..2bbdfc5c --- /dev/null +++ b/tests/validation_layer/sinh.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of sinh(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + output->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + output->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 1 + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_sinh_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_sinh_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_sinh_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/slice.c b/tests/validation_layer/slice.c new file mode 100644 index 00000000..c3c2725f --- /dev/null +++ b/tests/validation_layer/slice.c @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of slice(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct slice_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + + params.slice_num = 4; + params.begin = (int *)malloc(4 * sizeof(int)); + params.end = (int *)malloc(4 * sizeof(int)); + for(int i = 0; i < 4; i++) { + params.begin[i] = buffer[4+i]; + params.end[i] = buffer[8+i]; + } + + output->dim[0] = params.end[0] - params.begin[0]; + output->dim[1] = params.end[1] - params.begin[1]; + output->dim[2] = params.end[2] - params.begin[2]; + output->dim[3] = params.end[3] - params.begin[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 12); + reference->data = (float *)(buffer + 12 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_slice_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_slice_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_slice_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/softmax.cpp b/tests/validation_layer/softmax.cpp new file mode 100644 index 00000000..280f886a --- /dev/null +++ b/tests/validation_layer/softmax.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "csi_utils.h" +#include "math_snr.h" +#include "test_utils.h" +#include "testutil.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of softmax(layer)\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct softmax_params params; + int in_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + params.axis = buffer[4]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + params.base.layout = CSINN_LAYOUT_NCHW; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + + input->data = (float *)(buffer + 5); + reference->data = (float *)(buffer + 5 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT32, csi_softmax_init, + csi_softmax, &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT16, csi_softmax_init, + csi_softmax, &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_INT8_SYM, csi_softmax_init, + csi_softmax, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/softplus.c b/tests/validation_layer/softplus.c new file mode 100644 index 00000000..ee9c432f --- /dev/null +++ b/tests/validation_layer/softplus.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of softplus(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // height + input->dim[2] = buffer[2]; // width + input->dim[3] = buffer[3]; // channel + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 4); + reference->data = (float *)(buffer + 4 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_softplus_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_softplus_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_softplus_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/softrelu.c b/tests/validation_layer/softrelu.c new file mode 100644 index 00000000..1f0877c5 --- /dev/null +++ b/tests/validation_layer/softrelu.c @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of softrelu(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct relu_params params; + int in_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + params.n = buffer[4]; + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 5); + reference->data = (float *)(buffer + 5 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_softrelu_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_softrelu_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_softrelu_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/softsign.c b/tests/validation_layer/softsign.c new file mode 100644 index 00000000..8b371210 --- /dev/null +++ b/tests/validation_layer/softsign.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of softsign(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // height + input->dim[2] = buffer[2]; // width + input->dim[3] = buffer[3]; // channel + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 4); + reference->data = (float *)(buffer + 4 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_softsign_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_softsign_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_softsign_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/space_to_batch.c b/tests/validation_layer/space_to_batch.c new file mode 100644 index 00000000..9c090c09 --- /dev/null +++ b/tests/validation_layer/space_to_batch.c @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of space_to_depth(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct space_to_batch_params params; + int in_size = 0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; //batch + input->dim[1] = buffer[1]; //in_channel + input->dim[2] = buffer[2]; //in_height + input->dim[3] = buffer[3]; //in_width + + params.block_size = buffer[4]; + params.pad_top = buffer[5]; + params.pad_bottom = buffer[6]; + params.pad_left = buffer[7]; + params.pad_right = buffer[8]; + + output->dim[0] = input->dim[0] * params.block_size * params.block_size; + output->dim[1] = input->dim[1]; + output->dim[2] = (input->dim[2] + params.pad_top + params.pad_bottom) / params.block_size; + output->dim[3] = (input->dim[3] + params.pad_left + params.pad_right) / params.block_size; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 9); + reference->data = (float *)(buffer + 9 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_space_to_batch_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_space_to_batch_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_space_to_batch_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/space_to_depth.c b/tests/validation_layer/space_to_depth.c new file mode 100644 index 00000000..d3d4aecc --- /dev/null +++ b/tests/validation_layer/space_to_depth.c @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of space_to_depth(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct space_to_depth_params params; + int in_size = 0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; //batch + input->dim[1] = buffer[1]; //in_channel + input->dim[2] = buffer[2]; //in_height + input->dim[3] = buffer[3]; //in_width + + params.block_size = buffer[4]; + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1] * params.block_size * params.block_size; + output->dim[2] = input->dim[2] / params.block_size; + output->dim[3] = input->dim[3] / params.block_size; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + + input->data = (float *)(buffer + 5); + reference->data = (float *)(buffer + 5 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_space_to_depth_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_space_to_depth_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_space_to_depth_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/split.c b/tests/validation_layer/split.c new file mode 100644 index 00000000..0a6753f3 --- /dev/null +++ b/tests/validation_layer/split.c @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of split(layer).\n"); + + int *buffer = read_input_data_f32(argv[1]); + int axis = buffer[4]; + int output_cnt = buffer[5]; + int32_t *split_index = (int32_t *)malloc(output_cnt * sizeof(int32_t)); + for(int i = 0; i < output_cnt; i++) { + split_index[i] = buffer[axis] / output_cnt; + } + + struct csi_tensor *reference[output_cnt]; + for(int i = 0; i < output_cnt; i++) { + reference[i] = csi_alloc_tensor(NULL); + } + int in_size = 0; + int out_size[output_cnt]; + int acc_out_size = 0; + + + struct csi_tensor *input = csi_alloc_tensor(NULL); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + input->dim_count = 4; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + + input->data = (float *)(buffer + 6); + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + struct csi_tensor *output[output_cnt]; + for(int i = 0; i < output_cnt; i++) { + output[i] = csi_alloc_tensor(NULL); + for(int j = 0; j < 4; j++) { + if(j == axis) { + output[i]->dim[j] = split_index[i]; + } else { + output[i]->dim[j] = input->dim[j]; + } + } + output[i]->dim_count = 4; + out_size[i] = output[i]->dim[0] * output[i]->dim[1] * output[i]->dim[2] * output[i]->dim[3]; + + reference[i]->data = (float *)(buffer + 6 + in_size + acc_out_size); + output[i]->data = reference[i]->data; + acc_out_size += out_size[i]; + output[i]->dtype = CSINN_DTYPE_FLOAT32; + output[i]->is_const = 0; + output[i]->layout = CSINN_LAYOUT_NCHW; + output[i]->quant_channel = 1; + } + + struct split_params params; + params.base.api = CSINN_API; + params.base.layout = CSINN_LAYOUT_NCHW; + params.base.run_mode = CSINN_RM_LAYER; + params.axis = axis; + params.output_num = output_cnt; + + int temp = 0; + for(int i = 0; i < output_cnt; i++) { + temp += split_index[i]; + split_index[i] = temp; + printf("%d\n", split_index[i]); + } + params.split_index = split_index; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_split_CSINN_QUANT_FLOAT32(input, (struct csi_tensor **)output, ¶ms, &difference); + test_split_CSINN_QUANT_UINT8_ASYM(input, (struct csi_tensor **)output, ¶ms, &difference); + test_split_CSINN_QUANT_INT8_SYM(input, (struct csi_tensor **)output, ¶ms, &difference); + + + return done_testing(); +} diff --git a/tests/validation_layer/sqrt.c b/tests/validation_layer/sqrt.c new file mode 100644 index 00000000..e18bca51 --- /dev/null +++ b/tests/validation_layer/sqrt.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of sqrt(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // height + input->dim[2] = buffer[2]; // width + input->dim[3] = buffer[3]; // channel + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 4); + reference->data = (float *)(buffer + 4 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_sqrt_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_sqrt_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_sqrt_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/square.c b/tests/validation_layer/square.c new file mode 100644 index 00000000..cef086ba --- /dev/null +++ b/tests/validation_layer/square.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of square(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // height + input->dim[2] = buffer[2]; // width + input->dim[3] = buffer[3]; // channel + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 4); + reference->data = (float *)(buffer + 4 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_square_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_square_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_square_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/squeeze.c b/tests/validation_layer/squeeze.c new file mode 100644 index 00000000..aa790fd4 --- /dev/null +++ b/tests/validation_layer/squeeze.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of squeeze(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct squeeze_params params; + int in_size; + + int *buffer = read_input_data_f32(argv[1]); + int axis_len = buffer[3]; + int32_t *axis = (int32_t *)malloc(axis_len * sizeof(int32_t)); + for(int i = 0; i < axis_len; i++) { + axis[i] = buffer[4 + i]; + } + + output->dim[0] = input->dim[0] = buffer[0]; // batch + output->dim[1] = input->dim[1] = buffer[1]; // height + output->dim[2] = input->dim[2] = buffer[2]; // width + input->dim[3] = 1; + input->dim[4] = 1; + input->dim[5] = 1; + input->dim_count = 6; + output->dim_count = input->dim_count - axis_len; + + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + params.axis = axis; + params.axis_num = axis_len; + params.base.layout = CSINN_LAYOUT_NCHW; + in_size = input->dim[0] * input->dim[1] * input->dim[2]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + + input->data = (float *)(buffer + 4 + axis_len); + reference->data = (float *)(buffer + 4 + axis_len + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_squeeze_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_squeeze_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_squeeze_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/stack.c b/tests/validation_layer/stack.c new file mode 100644 index 00000000..628119da --- /dev/null +++ b/tests/validation_layer/stack.c @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of stack(layer).\n"); + + int in_size = 1; + int out_size = 1; + int *buffer = read_input_data_f32(argv[1]); + + struct stack_params params; + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + + params.inputs_count = buffer[0]; + params.axis = buffer[1]; + output->dim_count = buffer[2]; + for(int i = 0; i < output->dim_count; i++) { + output->dim[i] = buffer[3+i]; + out_size *= output->dim[i]; + } + in_size = out_size / params.inputs_count; + + struct csi_tensor *input[params.inputs_count]; + for (int i = 0; i < params.inputs_count; i++) { + input[i] = csi_alloc_tensor(NULL); + input[i]->data = (float *)(buffer + 3 + output->dim_count + in_size * i); + input[i]->dim_count = buffer[2] - 1; + input[i]->layout = CSINN_LAYOUT_NCHW; + input[i]->is_const = 0; + input[i]->quant_channel = 1; + input[i]->dtype = CSINN_DTYPE_FLOAT32; + for (int j = 0; j < input[i]->dim_count; j++) { + if (j < params.axis) { + input[i]->dim[j] = buffer[3+j]; // input[i]->dim[j] = output->dim[j] + } else { + input[i]->dim[j] = buffer[3+j+1]; // input[i]->dim[j] = output->dim[j + 1] + } + } + } + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + reference->data = (float *)(buffer + 3 + output->dim_count + in_size * params.inputs_count); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_stack_CSINN_QUANT_FLOAT32((struct csi_tensor **)input, output, ¶ms, &difference); + test_stack_CSINN_QUANT_UINT8_ASYM((struct csi_tensor **)input, output, ¶ms, &difference); + test_stack_CSINN_QUANT_INT8_SYM((struct csi_tensor **)input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/strided_slice.c b/tests/validation_layer/strided_slice.c new file mode 100644 index 00000000..92540d5c --- /dev/null +++ b/tests/validation_layer/strided_slice.c @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of strided_slice(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct strided_slice_params params; + int in_size = 1; + int out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i+1]; + in_size *= input->dim[i]; + } + params.slice_count = buffer[1+input->dim_count]; + params.begin = (int *)malloc(params.slice_count * sizeof(int)); + params.end = (int *)malloc(params.slice_count * sizeof(int)); + params.stride = (int *)malloc(params.slice_count * sizeof(int)); + for(int i = 0; i < params.slice_count; i++) { + params.begin[i] = buffer[2+input->dim_count+3*i]; + params.end[i] = buffer[3+input->dim_count+3*i]; + params.stride[i] = buffer[4+input->dim_count+3*i]; + } + output->dim_count = input->dim_count; + for(int i = 0; i < output->dim_count; i++) { + if(i < params.slice_count) { + output->dim[i] = ceil((float)(params.end[i] - params.begin[i]) / params.stride[i]); + } else { + output->dim[i] = input->dim[i]; + } + } + out_size = buffer[2+input->dim_count+3*params.slice_count]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + + input->data = (float *)(buffer + 3 + input->dim_count + 3*params.slice_count); + reference->data = (float *)(buffer + 3 + input->dim_count + 3*params.slice_count + in_size); //input->data + in_size + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_strided_slice_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_strided_slice_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_strided_slice_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/sub.c b/tests/validation_layer/sub.c new file mode 100644 index 00000000..d43fa487 --- /dev/null +++ b/tests/validation_layer/sub.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of sub(layer).\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + int flag = buffer[4]; + input1->dim[0] = input0->dim[0] = buffer[0]; // batch + input1->dim[1] = input0->dim[1] = buffer[1]; // height + input1->dim[2] = input0->dim[2] = buffer[2]; // width + input1->dim[3] = input0->dim[3] = buffer[3]; // channel + + output->dim[0] = input0->dim[0]; + output->dim[1] = input0->dim[1]; + output->dim[2] = input0->dim[2]; + output->dim[3] = input0->dim[3]; + + in_size = input0->dim[0] * input0->dim[1] * input0->dim[2] * input0->dim[3]; + out_size = in_size; + input0->dim_count = 4; + input1->dim_count = 4; + output->dim_count = 4; + input0->dtype = CSINN_DTYPE_FLOAT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input1->dtype = CSINN_DTYPE_FLOAT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (float *)(buffer + 4); + input1->data = (float *)(buffer + 4 + in_size); + reference->data = (float *)(buffer + 4 + 2 * in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_sub_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_sub_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_sub_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/sum_stride.cpp b/tests/validation_layer/sum_stride.cpp new file mode 100644 index 00000000..237dbad5 --- /dev/null +++ b/tests/validation_layer/sum_stride.cpp @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "csi_nn.h" +#include "csi_thead_rvv.h" +#include "csi_utils.h" +#include "math_snr.h" +#include "test_utils.h" +#include "testutil.h" + +int main(int argc, char **argv) +{ + init_testsuite("Testing function of sum(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct reduce_params params; + int in_size = 0; + int out_size = 0; + + int *buffer = read_input_data_f32(argv[1]); + + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // channel + input->dim[2] = buffer[2]; // height + input->dim[3] = buffer[3]; // width + input->dim_count = 4; + int axis = buffer[4]; + int m = buffer[5]; + int n = buffer[6]; + + for (int i = 0; i < input->dim_count; i++) { + if (i < axis) { + output->dim[i] = input->dim[i]; + } else if (i > axis) { + output->dim[i - 1] = input->dim[i]; + } + } + + int32_t *out_strides_0 = (int32_t *)malloc(n * sizeof(int32_t)); + int32_t *out_extents_0 = (int32_t *)malloc(n * sizeof(int32_t)); + int32_t *inner_strides_0 = (int32_t *)malloc(m * sizeof(int32_t)); + int32_t *inner_extents_0 = (int32_t *)malloc(m * sizeof(int32_t)); + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size / input->dim[axis]; + output->dim_count = 3; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + input->data = (float *)(buffer + 7); + out_strides_0 = (int32_t *)(buffer + 7 + in_size); + out_extents_0 = (int32_t *)(buffer + 7 + in_size + n); + inner_strides_0 = (int32_t *)(buffer + 7 + in_size + 2 * n); + inner_extents_0 = (int32_t *)(buffer + 7 + in_size + 2 * n + m); + reference->data = (float *)(buffer + 7 + in_size + 2 * n + 2 * m); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + params.axis = &axis; + params.axis_count = 1; // must be 1 + params.m = m; + params.n = n; + params.out_strides = out_strides_0; + params.out_extents = out_extents_0; + params.inner_strides = inner_strides_0; + params.inner_extents = inner_extents_0; + params.base.api = CSINN_API; + params.base.layout = CSINN_LAYOUT_NCHW; + params.base.run_mode = CSINN_RM_LAYER; + + + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT32, csi_sum_init, csi_sum, + &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_FLOAT16, csi_sum_init, csi_sum, + &difference); + test_unary_op(input, output, ¶ms, CSINN_QUANT_INT8_SYM, csi_sum_init, csi_sum, + &difference); + + + return done_testing(); +} diff --git a/tests/validation_layer/tan.c b/tests/validation_layer/tan.c new file mode 100644 index 00000000..ae7639da --- /dev/null +++ b/tests/validation_layer/tan.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of tan(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + output->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + output->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 1 + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_tan_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_tan_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_tan_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/tanh.c b/tests/validation_layer/tanh.c new file mode 100644 index 00000000..fb7232a3 --- /dev/null +++ b/tests/validation_layer/tanh.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of tanh(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; + output->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + output->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 1 + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_tanh_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_tanh_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_tanh_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/testutil.h b/tests/validation_layer/testutil.h new file mode 100644 index 00000000..076f66de --- /dev/null +++ b/tests/validation_layer/testutil.h @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +// #include "common.h" + +#include +#include + +#include "csi_nn.h" +#include "math_snr.h" +#include "test_utils.h" + +template +void test_unary_op(struct csi_tensor *input, struct csi_tensor *output, T *params, + enum csinn_quant_enum quant_dtype, + int (*init_op)(struct csi_tensor *, struct csi_tensor *, T *), + int (*unary_op)(struct csi_tensor *, struct csi_tensor *, T *), + float *difference) +{ + enum csinn_quant_enum test_dtype = quant_dtype; + int test_api = params->base.api; + struct csi_tensor *qinput = convert_f32_layer(input, test_dtype, (enum csinn_api_enum)test_api); + struct csi_tensor *qoutput = + convert_f32_layer(output, test_dtype, (enum csinn_api_enum)test_api); + if (init_op(qinput, qoutput, params) == CSINN_TRUE) { + unary_op(qinput, qoutput, params); + } + struct csi_tensor *foutput = csi_ref_tensor_transform_f32(qoutput); + result_verify_f32((float *)output->data, (float *)foutput->data, (float *)input->data, + *difference, csi_tensor_size(output), false); + csi_ref_tensor_transform_free_f32(foutput); +} + +template +void test_binary_op(struct csi_tensor *input0, struct csi_tensor *input1, struct csi_tensor *output, + T *params, enum csinn_quant_enum quant_dtype, + int (*init_op)(struct csi_tensor *, struct csi_tensor *, struct csi_tensor *, + T *), + int (*binary_op)(struct csi_tensor *, struct csi_tensor *, struct csi_tensor *, + T *), + float *difference) +{ + enum csinn_quant_enum test_dtype = quant_dtype; + int test_api = params->base.api; + struct csi_tensor *qinput0 = + convert_f32_layer(input0, test_dtype, (enum csinn_api_enum)test_api); + struct csi_tensor *qinput1 = + convert_f32_layer(input1, test_dtype, (enum csinn_api_enum)test_api); + struct csi_tensor *qoutput = + convert_f32_layer(output, test_dtype, (enum csinn_api_enum)test_api); + if (init_op(qinput0, qinput1, qoutput, params) == CSINN_TRUE) { + binary_op(qinput0, qinput1, qoutput, params); + } + struct csi_tensor *foutput = csi_ref_tensor_transform_f32(qoutput); + result_verify_f32((float *)output->data, (float *)foutput->data, (float *)input0->data, + *difference, csi_tensor_size(output), false); + csi_ref_tensor_transform_free_f32(foutput); +} + +template +void test_concat_op(struct csi_tensor **input, struct csi_tensor *output, T *params, + enum csinn_quant_enum quant_dtype, + int (*init_op)(struct csi_tensor **, struct csi_tensor *, T *), + int (*unary_op)(struct csi_tensor **, struct csi_tensor *, T *), + float *difference) +{ + enum csinn_quant_enum test_dtype = quant_dtype; + int test_api = params->base.api; + struct csi_tensor *qinput[params->inputs_count]; + for (int i = 0; i < params->inputs_count; i++) { + qinput[i] = convert_f32_layer(input[i], test_dtype, (enum csinn_api_enum)test_api); + } + struct csi_tensor *qoutput = + convert_f32_layer(output, test_dtype, (enum csinn_api_enum)test_api); + if (init_op((struct csi_tensor **)qinput, qoutput, params) == CSINN_TRUE) { + unary_op((struct csi_tensor **)qinput, qoutput, params); + } + struct csi_tensor *foutput = csi_ref_tensor_transform_f32(qoutput); + result_verify_f32((float *)output->data, (float *)foutput->data, (float *)input[0]->data, + *difference, csi_tensor_size(output), false); + csi_ref_tensor_transform_free_f32(foutput); +} + +template +void test_conv2d_op(struct csi_tensor *input, struct csi_tensor *output, struct csi_tensor *kernel, + struct csi_tensor *bias, T *params, enum csinn_quant_enum quant_dtype, + int (*init_op)(struct csi_tensor *, struct csi_tensor *, struct csi_tensor *, + struct csi_tensor *, T *), + int (*conv2d_op)(struct csi_tensor *, struct csi_tensor *, struct csi_tensor *, + struct csi_tensor *, T *), + float *difference) +{ + enum csinn_quant_enum test_dtype = quant_dtype; + int test_api = params->base.api; + struct csi_tensor *qbias; + struct csi_tensor *qinput; + + if (test_dtype == CSINN_QUANT_INT8_SYM) { + qbias = fuse_zp_to_bias(input, kernel, bias, (enum csinn_api_enum)test_api); + qinput = convert_f32_layer(input, CSINN_QUANT_INT8_ASYM, (enum csinn_api_enum)test_api); + qinput->qinfo->zero_point = 0; + } else { + qbias = convert_f32_layer(bias, test_dtype, (enum csinn_api_enum)test_api); + qinput = convert_f32_layer(input, test_dtype, (enum csinn_api_enum)test_api); + } + + struct csi_tensor *qoutput = + convert_f32_layer(output, test_dtype, (enum csinn_api_enum)test_api); + struct csi_tensor *qkernel = + convert_f32_layer(kernel, test_dtype, (enum csinn_api_enum)test_api); + + if (init_op(qinput, qoutput, qkernel, qbias, params) == CSINN_TRUE) { + conv2d_op(qinput, qoutput, qkernel, qbias, params); + } + struct csi_tensor *foutput = csi_ref_tensor_transform_f32(qoutput); + result_verify_f32((float *)output->data, (float *)foutput->data, (float *)input->data, + *difference, csi_tensor_size(output), false); + csi_ref_tensor_transform_free_f32(foutput); +} \ No newline at end of file diff --git a/tests/validation_layer/threshold_relu.c b/tests/validation_layer/threshold_relu.c new file mode 100644 index 00000000..96b90c68 --- /dev/null +++ b/tests/validation_layer/threshold_relu.c @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of threshold relu(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct relu_params params; + int in_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + params.n = *(float *)&buffer[4]; // theta + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + + input->data = (float *)(buffer + 5); + reference->data = (float *)(buffer + 5 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_threshold_relu_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_threshold_relu_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_threshold_relu_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/tile.c b/tests/validation_layer/tile.c new file mode 100644 index 00000000..ae2b292f --- /dev/null +++ b/tests/validation_layer/tile.c @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of tile(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct tile_params params; + int in_size = 1; + int out_size = 1; + + + int *buffer = read_input_data_f32(argv[1]); + + input->dim_count = buffer[0]; + output->dim_count = input->dim_count; + params.reps_num = buffer[0]; + + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i+1]; + in_size *= input->dim[i]; + } + params.reps = (int *)malloc(params.reps_num * sizeof(int)); + for(int i = 0; i < params.reps_num; i++) { + params.reps[i] = buffer[i+1+input->dim_count]; + output->dim[i] = input->dim[i] * params.reps[i]; + out_size *= params.reps[i]; + } + out_size = out_size * in_size; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + + input->data = (float *)(buffer + 1 + input->dim_count + input->dim_count); + reference->data = (float *)(buffer + 1 + input->dim_count + input->dim_count + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_tile_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_tile_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_tile_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/topk.c b/tests/validation_layer/topk.c new file mode 100644 index 00000000..a3d69e64 --- /dev/null +++ b/tests/validation_layer/topk.c @@ -0,0 +1,134 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of topk(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output1 = csi_alloc_tensor(NULL); + struct csi_tensor *output2 = csi_alloc_tensor(NULL); + struct csi_tensor *reference1 = csi_alloc_tensor(NULL); + struct csi_tensor *reference2 = csi_alloc_tensor(NULL); + struct topk_params params; + int in_size = 1, out_size = 1; + float error = 0.0f; + + int *buffer = read_input_data_f32(argv[1]); + params.k = buffer[0]; + input->dim_count = buffer[1]; + output1->dim_count = input->dim_count; + output2->dim_count = input->dim_count; + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 2]; + output1->dim[i] = input->dim[i]; + output2->dim[i] = input->dim[i]; + in_size *= input->dim[i]; + } + + out_size = in_size / input->dim[input->dim_count - 1] * params.k; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output1->dtype = CSINN_DTYPE_FLOAT32; + output1->layout = CSINN_LAYOUT_NCHW; + output1->is_const = 0; + output1->quant_channel = 1; + + + output2->dtype = CSINN_DTYPE_INT32; + output2->layout = CSINN_LAYOUT_NCHW; + output2->is_const = 0; + output2->quant_channel = 1; + + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + float *src_in_data = (float *)(buffer + 2 + input->dim_count); + float *ref_data1 = (float *)(buffer + 2 + input->dim_count + in_size); + int *ref_data2 = (int *)(buffer + 2 + input->dim_count + in_size + out_size); + + uint8_t *input_data = (uint8_t *)malloc(in_size * sizeof(uint8_t)); + + input->data = src_in_data; + get_quant_info(input); + + for(int i = 0; i < in_size; i++) { + input_data[i] = csi_ref_quantize_f32_to_u8(src_in_data[i], input->qinfo); + } + + /* compute the max quantize error */ + for(int i = 0; i < in_size; i++) { + float error1; + float output_tmp = csi_ref_dequantize_u8_to_f32(input_data[i], input->qinfo); + if(isinf(src_in_data[i]) && isinf(output_tmp) || isnan(src_in_data[i]) && isnan(output_tmp)) { + continue; + } else { + error1 = fabs(src_in_data[i] - output_tmp); + if(error1 > 1e-6) { + error1 = fabs(src_in_data[i] - output_tmp)/fabs(src_in_data[i] + 1e-9); + } + } + if(error1 > error) { + error = error1; + } + } + // if (input->dim_count == 1 && params.k == 1) Follow the input scale and zero_point + if(input->dim_count != 1 || params.k != 1) { + output1->data= ref_data1; + get_quant_info(output1); + } else { + output1->qinfo = input->qinfo; + } + + input->data = input_data; + reference1->data = ref_data1; + reference2->data = ref_data2; + output1->data = (uint8_t *)malloc(out_size * sizeof(uint8_t)); + output2->data = (int *)malloc(out_size * sizeof(int)); + + float difference1 = argc > 2 ? atof(argv[2]) : 2 * error; + float difference2 = argc > 3 ? atof(argv[3]) : 0; + printf("The max error is %.6lf.\n", error); + + if (csi_topk_init(input, output1, output2, ¶ms) == CSINN_TRUE) { + csi_topk(input, output1, output2, ¶ms); + } + + result_verify_8(reference1->data, output1, input->data, difference1, out_size, false); + /* + when inputs: such as [5.0001, 5.0000] + they all quantized by [200, 200] + so their output_indices are reversed + */ + // result_verify_int32(reference2->data, output2->data, input->data, difference2, out_size, false); + + free(buffer); + free(output1->data); + free(output2->data); + free(input_data); + return done_testing(); +} diff --git a/tests/validation_layer/transpose.c b/tests/validation_layer/transpose.c new file mode 100644 index 00000000..8a716a74 --- /dev/null +++ b/tests/validation_layer/transpose.c @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of transpose(layer).\n"); + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct transpose_params params; + int in_size = 1, out_size = 1; + float max_error = 0.0f; + + int *buffer = read_input_data_f32(argv[1]); + input->dim_count = buffer[0]; // input->dim_count == 4 + output->dim_count = input->dim_count; + + int32_t *perm = (int32_t *)malloc(input->dim_count * sizeof(int32_t)); + + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[i + 1]; + perm[i] = buffer[input->dim_count + i + 1]; + output->dim[i] = buffer[2 * input->dim_count + i + 1]; + in_size *= input->dim[i]; + } + out_size = in_size; + + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + params.permute = perm; + params.permute_num = input->dim_count; + params.base.layout = CSINN_LAYOUT_NCHW; + + input->data = (float *)(buffer + 1 + input->dim_count * 3); + reference->data = (float *)(buffer + 1 + input->dim_count * 3 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_transpose_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_transpose_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_transpose_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/trunc.c b/tests/validation_layer/trunc.c new file mode 100644 index 00000000..3299ff42 --- /dev/null +++ b/tests/validation_layer/trunc.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of trunc(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // height + input->dim[2] = buffer[2]; // width + input->dim[3] = buffer[3]; // channel + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = in_size; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 4); + reference->data = (float *)(buffer + 4 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_trunc_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_trunc_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_trunc_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_layer/unsorted_segment_max.c b/tests/validation_layer/unsorted_segment_max.c new file mode 100644 index 00000000..ba9e9afa --- /dev/null +++ b/tests/validation_layer/unsorted_segment_max.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of unsorted segment max(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *segment = csi_alloc_tensor(NULL); + struct segment_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + output->dim[0] = buffer[4]; + output->dim[1] = buffer[1]; + output->dim[2] = buffer[2]; + output->dim[3] = buffer[3]; + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.num_segments = buffer[4]; + params.unsorted = CSINN_TRUE; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + + input->data = (float *)(buffer + 5); + segment->data = (int *)(buffer + 5 + in_size); + reference->data = (float *)(buffer + 5 + in_size + buffer[0]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_segment_max_CSINN_QUANT_FLOAT32(input, segment, output, ¶ms, &difference); + test_segment_max_CSINN_QUANT_UINT8_ASYM(input, segment, output, ¶ms, &difference); + test_segment_max_CSINN_QUANT_INT8_SYM(input, segment, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/unsorted_segment_mean.c b/tests/validation_layer/unsorted_segment_mean.c new file mode 100644 index 00000000..8d37ade8 --- /dev/null +++ b/tests/validation_layer/unsorted_segment_mean.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of unsorted segment mean(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *segment = csi_alloc_tensor(NULL); + struct segment_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + output->dim[0] = buffer[4]; + output->dim[1] = buffer[1]; + output->dim[2] = buffer[2]; + output->dim[3] = buffer[3]; + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.num_segments = buffer[4]; + params.unsorted = CSINN_TRUE; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + + input->data = (float *)(buffer + 5); + segment->data = (int *)(buffer + 5 + in_size); + reference->data = (float *)(buffer + 5 + in_size + buffer[0]);; + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_segment_mean_CSINN_QUANT_FLOAT32(input, segment, output, ¶ms, &difference); + test_segment_mean_CSINN_QUANT_UINT8_ASYM(input, segment, output, ¶ms, &difference); + test_segment_mean_CSINN_QUANT_INT8_SYM(input, segment, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/unsorted_segment_min.c b/tests/validation_layer/unsorted_segment_min.c new file mode 100644 index 00000000..4faae05a --- /dev/null +++ b/tests/validation_layer/unsorted_segment_min.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of unsorted segment min(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *segment = csi_alloc_tensor(NULL); + struct segment_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + output->dim[0] = buffer[4]; + output->dim[1] = buffer[1]; + output->dim[2] = buffer[2]; + output->dim[3] = buffer[3]; + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.num_segments = buffer[4]; + params.unsorted = CSINN_TRUE; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + + input->data = (float *)(buffer + 5); + segment->data = (int *)(buffer + 5 + in_size); + reference->data = (float *)(buffer + 5 + in_size + buffer[0]);; + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_segment_min_CSINN_QUANT_FLOAT32(input, segment, output, ¶ms, &difference); + test_segment_min_CSINN_QUANT_UINT8_ASYM(input, segment, output, ¶ms, &difference); + test_segment_min_CSINN_QUANT_INT8_SYM(input, segment, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/unsorted_segment_prod.c b/tests/validation_layer/unsorted_segment_prod.c new file mode 100644 index 00000000..5b356918 --- /dev/null +++ b/tests/validation_layer/unsorted_segment_prod.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of unsorted segment prod(laye).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *segment = csi_alloc_tensor(NULL); + struct segment_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + output->dim[0] = buffer[4]; + output->dim[1] = buffer[1]; + output->dim[2] = buffer[2]; + output->dim[3] = buffer[3]; + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.num_segments = buffer[4]; + params.unsorted = CSINN_TRUE; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + + input->data = (float *)(buffer + 5); + segment->data = (int *)(buffer + 5 + in_size); + reference->data = (float *)(buffer + 5 + in_size + buffer[0]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_segment_prod_CSINN_QUANT_FLOAT32(input, segment, output, ¶ms, &difference); + test_segment_prod_CSINN_QUANT_UINT8_ASYM(input, segment, output, ¶ms, &difference); + test_segment_prod_CSINN_QUANT_INT8_SYM(input, segment, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/unsorted_segment_sum.c b/tests/validation_layer/unsorted_segment_sum.c new file mode 100644 index 00000000..91888e92 --- /dev/null +++ b/tests/validation_layer/unsorted_segment_sum.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of unsorted segment sum(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct csi_tensor *segment = csi_alloc_tensor(NULL); + struct segment_params params; + int in_size, out_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; + input->dim[1] = buffer[1]; + input->dim[2] = buffer[2]; + input->dim[3] = buffer[3]; + output->dim[0] = buffer[4]; + output->dim[1] = buffer[1]; + output->dim[2] = buffer[2]; + output->dim[3] = buffer[3]; + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.num_segments = buffer[4]; + params.unsorted = CSINN_TRUE; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + out_size = output->dim[0] * output->dim[1] * output->dim[2] * output->dim[3]; + + input->data = (float *)(buffer + 5); + segment->data = (int *)(buffer + 5 + in_size); + reference->data = (float *)(buffer + 5 + in_size + buffer[0]); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_segment_sum_CSINN_QUANT_FLOAT32(input, segment, output, ¶ms, &difference); + test_segment_sum_CSINN_QUANT_UINT8_ASYM(input, segment, output, ¶ms, &difference); + test_segment_sum_CSINN_QUANT_INT8_SYM(input, segment, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/unstack.c b/tests/validation_layer/unstack.c new file mode 100644 index 00000000..cd76944a --- /dev/null +++ b/tests/validation_layer/unstack.c @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of unstack(layer).\n"); + + int in_size = 1; + int out_size = 1; + + + int *buffer = read_input_data_f32(argv[1]); + struct unstack_params params; + struct csi_tensor *input = csi_alloc_tensor(NULL); + params.axis = buffer[0]; + input->dim_count = buffer[1]; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + + for(int i = 0; i < input->dim_count; i++) { + input->dim[i] = buffer[2+i]; + in_size *= input->dim[i]; + } + params.outputs_count = input->dim[params.axis]; + + struct csi_tensor *reference[params.outputs_count]; + for(int i = 0; i < params.outputs_count; i++) { + reference[i] = csi_alloc_tensor(NULL); + } + + out_size = in_size / params.outputs_count; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + input->data = (float *)(buffer + 2 + input->dim_count); + + struct csi_tensor *output[params.outputs_count]; + for (int i = 0; i < params.outputs_count; i++) { + output[i] = csi_alloc_tensor(NULL); + output[i]->dim_count = input->dim_count - 1; + output[i]->dtype = CSINN_DTYPE_FLOAT32; + output[i]->layout = CSINN_LAYOUT_NCHW; + output[i]->is_const = 0; + output[i]->quant_channel = 1; + for(int j = 0; j < input->dim_count; j++) { + if(j < params.axis) { + output[i]->dim[j] = input->dim[j]; + } else if(j > params.axis) { + output[i]->dim[j-1] = input->dim[j]; + } + } + + reference[i]->data = (float *)(buffer + 2 + input->dim_count + in_size + out_size * i); + output[i]->data = reference[i]->data; + } + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_unstack_CSINN_QUANT_FLOAT32(input, (struct csi_tensor **)output, ¶ms, &difference); + test_unstack_CSINN_QUANT_UINT8_ASYM(input, (struct csi_tensor **)output, ¶ms, &difference); + test_unstack_CSINN_QUANT_INT8_SYM(input, (struct csi_tensor **)output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/xor.c b/tests/validation_layer/xor.c new file mode 100644 index 00000000..7c693ec6 --- /dev/null +++ b/tests/validation_layer/xor.c @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of xor u32.\n"); + + struct csi_tensor *input0 = csi_alloc_tensor(NULL); + struct csi_tensor *input1 = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct diso_params params; + int in_size = 1, out_size = 1; + + int *buffer = read_input_data_f32(argv[1]); + input0->dim_count = buffer[0]; + input1->dim_count = buffer[0]; + output->dim_count = input0->dim_count; + for(int i = 0; i < input0->dim_count; i++) { + input0->dim[i] = buffer[i + 1]; + input1->dim[i] = buffer[i + 1]; + output->dim[i] = input0->dim[i]; + in_size *= input0->dim[i]; + } + + out_size = in_size; + input0->dtype = CSINN_DTYPE_UINT32; + input0->layout = CSINN_LAYOUT_NCHW; + input0->is_const = 0; + input0->quant_channel = 1; + input1->dtype = CSINN_DTYPE_UINT32; + input1->layout = CSINN_LAYOUT_NCHW; + input1->is_const = 0; + input1->quant_channel = 1; + output->dtype = CSINN_DTYPE_UINT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input0->data = (uint32_t *)(buffer + 1 + input0->dim_count); + input1->data = (uint32_t *)(buffer + 1 + input0->dim_count + in_size); + reference->data = (uint32_t *)(buffer + 1 + input0->dim_count + 2 * in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_xor_CSINN_QUANT_FLOAT32(input0, input1, output, ¶ms, &difference); + test_xor_CSINN_QUANT_UINT8_ASYM(input0, input1, output, ¶ms, &difference); + test_xor_CSINN_QUANT_INT8_SYM(input0, input1, output, ¶ms, &difference); + + return done_testing(); +} diff --git a/tests/validation_layer/yuv_rgb_scale.c b/tests/validation_layer/yuv_rgb_scale.c new file mode 100644 index 00000000..54796744 --- /dev/null +++ b/tests/validation_layer/yuv_rgb_scale.c @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* CSI-NN2 version 1.12.x */ + +#include "test_utils.h" +#include "csi_nn.h" +#include "math_snr.h" + +int main(int argc, char** argv) +{ + init_testsuite("Testing function of yuv_rgb_scale(layer).\n"); + + struct csi_tensor *input = csi_alloc_tensor(NULL); + struct csi_tensor *output = csi_alloc_tensor(NULL); + struct csi_tensor *reference = csi_alloc_tensor(NULL); + struct siso_params params; + int in_size; + + int *buffer = read_input_data_f32(argv[1]); + input->dim[0] = buffer[0]; // batch + input->dim[1] = buffer[1]; // height + input->dim[2] = buffer[2]; // width + input->dim[3] = 3; // channel + + output->dim[0] = input->dim[0]; + output->dim[1] = input->dim[1]; + output->dim[2] = input->dim[2]; + output->dim[3] = input->dim[3]; + + input->dim_count = 4; + output->dim_count = 4; + input->dtype = CSINN_DTYPE_FLOAT32; + input->layout = CSINN_LAYOUT_NCHW; + input->is_const = 0; + input->quant_channel = 1; + output->dtype = CSINN_DTYPE_FLOAT32; + output->layout = CSINN_LAYOUT_NCHW; + output->is_const = 0; + output->quant_channel = 1; + in_size = input->dim[0] * input->dim[1] * input->dim[2] * input->dim[3]; + params.base.api = CSINN_API; + params.base.run_mode = CSINN_RM_LAYER; + + input->data = (float *)(buffer + 3); + reference->data = (float *)(buffer + 3 + in_size); + output->data = reference->data; + float difference = argc > 2 ? atof(argv[2]) : 0.99; + + test_yuv_rgb_scale_CSINN_QUANT_FLOAT32(input, output, ¶ms, &difference); + test_yuv_rgb_scale_CSINN_QUANT_UINT8_ASYM(input, output, ¶ms, &difference); + test_yuv_rgb_scale_CSINN_QUANT_INT8_SYM(input, output, ¶ms, &difference); + + return done_testing(); +} \ No newline at end of file diff --git a/tests/validation_xt800/Makefile.e804 b/tests/validation_xt800/Makefile.e804 index fbdb8c74..bc2595a6 100644 --- a/tests/validation_xt800/Makefile.e804 +++ b/tests/validation_xt800/Makefile.e804 @@ -1,9 +1,9 @@ -LIB_DIR = ../../lib -INCLUDE = -I../../include -I../utils +LIB_DIR = ../../csky_elf_build +INCLUDE = -I../../include -I../utils CFLAGS = -O0 -g3 -static CFLAGS += -mcpu=e804d CFLAGS += -ffunction-sections -fdata-sections -Wl,--gc-sections -CFLAGS += -DCSINN_API=10 -DCSI_BUILD_E804 +CFLAGS += -DCSINN_API=10 -DCSI_BUILD_E804 -DCSI_BUILD_RTOS LIB_NAME = csi_nn2_e804 CC = csky-abiv2-elf-gcc BOARD = ./board/smartl/crt0.o -T./board/smartl/ckcpu.ld ./board/smartl/uart.o diff --git a/tests/validation_xt800/Makefile.i805 b/tests/validation_xt800/Makefile.i805 index 2c0cfe59..15eced16 100644 --- a/tests/validation_xt800/Makefile.i805 +++ b/tests/validation_xt800/Makefile.i805 @@ -1,9 +1,9 @@ -LIB_DIR = ../../lib +LIB_DIR = ../../csky_elf_build INCLUDE = -I../../include -I../utils CFLAGS = -O0 -g3 -static CFLAGS += -mcpu=ck805ef -mhard-float CFLAGS += -ffunction-sections -fdata-sections -Wl,--gc-sections -CFLAGS += -DCSINN_API=9 -DCSI_BUILD_I805 +CFLAGS += -DCSINN_API=9 -DCSI_BUILD_I805 -DCSI_BUILD_RTOS LIB_NAME = csi_nn2_i805 CC = csky-abiv2-elf-gcc #BOARD = ./board/smartl/crt0.o -T./board/smartl/ckcpu.ld ./board/smartl/uart.o diff --git a/tests/validation_xt800/Makefile.ref_i805 b/tests/validation_xt800/Makefile.ref_i805 index 85d7105d..911d0d09 100644 --- a/tests/validation_xt800/Makefile.ref_i805 +++ b/tests/validation_xt800/Makefile.ref_i805 @@ -1,9 +1,9 @@ -LIB_DIR = ../../lib -INCLUDE = -I../../include -I../utils +LIB_DIR = ../../csky_elf_build +INCLUDE = -I../../include -I../utils CFLAGS = -O0 -g3 -static CFLAGS += -mcpu=i805 CFLAGS += -ffunction-sections -fdata-sections -Wl,--gc-sections -CFLAGS += -DCSINN_API=11 -DCSI_BUILD_REF_I805 +CFLAGS += -DCSINN_API=11 -DCSI_BUILD_REF_I805 -DCSI_BUILD_RTOS LIB_NAME = csi_nn2_ref_i805 CC = csky-abiv2-elf-gcc BOARD = ./board/smartl/crt0.o -T./board/smartl/ckcpu.ld ./board/smartl/uart.o diff --git a/tests/validation_xt800/avgpool_nonsquare_q7_1.c b/tests/validation_xt800/avgpool_nonsquare_q7_1.c index 2a9bd727..fe0f466b 100644 --- a/tests/validation_xt800/avgpool_nonsquare_q7_1.c +++ b/tests/validation_xt800/avgpool_nonsquare_q7_1.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/avgpool_nonsquare_q7_2.c b/tests/validation_xt800/avgpool_nonsquare_q7_2.c index 10a99380..029d313a 100644 --- a/tests/validation_xt800/avgpool_nonsquare_q7_2.c +++ b/tests/validation_xt800/avgpool_nonsquare_q7_2.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/avgpool_q7_1.c b/tests/validation_xt800/avgpool_q7_1.c index e7daa6e6..3fe300d6 100644 --- a/tests/validation_xt800/avgpool_q7_1.c +++ b/tests/validation_xt800/avgpool_q7_1.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/avgpool_q7_2.c b/tests/validation_xt800/avgpool_q7_2.c index 755c6891..c2ae610c 100644 --- a/tests/validation_xt800/avgpool_q7_2.c +++ b/tests/validation_xt800/avgpool_q7_2.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/board/smartl/Makefile b/tests/validation_xt800/board/smartl/Makefile index 82704e00..9f7d4127 100644 --- a/tests/validation_xt800/board/smartl/Makefile +++ b/tests/validation_xt800/board/smartl/Makefile @@ -1,7 +1,7 @@ SRS = $(wildcard *.S) SRC = $(wildcard *.c) -INCLUDE = -I../../../../include/include_xt800/csi_core_include/ +INCLUDE = -I./csi_core_include/ CFLAGS += $(INCLUDE) diff --git a/tests/validation_xt800/board/smartl/crt0.S b/tests/validation_xt800/board/smartl/crt0.S index 17aa6d87..307c2cb8 100755 --- a/tests/validation_xt800/board/smartl/crt0.S +++ b/tests/validation_xt800/board/smartl/crt0.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/include/include_xt800/csi_core_include/core_804.h b/tests/validation_xt800/board/smartl/csi_core_include/core_804.h similarity index 100% rename from include/include_xt800/csi_core_include/core_804.h rename to tests/validation_xt800/board/smartl/csi_core_include/core_804.h diff --git a/include/include_xt800/csi_core_include/core_805.h b/tests/validation_xt800/board/smartl/csi_core_include/core_805.h similarity index 100% rename from include/include_xt800/csi_core_include/core_805.h rename to tests/validation_xt800/board/smartl/csi_core_include/core_805.h diff --git a/include/include_xt800/csi_core_include/csi_core.h b/tests/validation_xt800/board/smartl/csi_core_include/csi_core.h similarity index 100% rename from include/include_xt800/csi_core_include/csi_core.h rename to tests/validation_xt800/board/smartl/csi_core_include/csi_core.h diff --git a/include/include_xt800/csi_core_include/csi_gcc.h b/tests/validation_xt800/board/smartl/csi_core_include/csi_gcc.h similarity index 100% rename from include/include_xt800/csi_core_include/csi_gcc.h rename to tests/validation_xt800/board/smartl/csi_core_include/csi_gcc.h diff --git a/tests/validation_xt800/board/smartl/smartl.h b/tests/validation_xt800/board/smartl/smartl.h index 27da6713..38cc3c42 100644 --- a/tests/validation_xt800/board/smartl/smartl.h +++ b/tests/validation_xt800/board/smartl/smartl.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/tests/validation_xt800/board/smartl/uart.c b/tests/validation_xt800/board/smartl/uart.c index 730674bb..d5935d1f 100644 --- a/tests/validation_xt800/board/smartl/uart.c +++ b/tests/validation_xt800/board/smartl/uart.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/tests/validation_xt800/board/smartl/uart.h b/tests/validation_xt800/board/smartl/uart.h index d1678400..fdaaa18b 100644 --- a/tests/validation_xt800/board/smartl/uart.h +++ b/tests/validation_xt800/board/smartl/uart.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2019 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * diff --git a/tests/validation_xt800/convolution_1x1_q7_1.c b/tests/validation_xt800/convolution_1x1_q7_1.c index 2a1760cf..9957b7c2 100644 --- a/tests/validation_xt800/convolution_1x1_q7_1.c +++ b/tests/validation_xt800/convolution_1x1_q7_1.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/convolution_1x1_q7_2.c b/tests/validation_xt800/convolution_1x1_q7_2.c index 34b942b2..e2087a79 100644 --- a/tests/validation_xt800/convolution_1x1_q7_2.c +++ b/tests/validation_xt800/convolution_1x1_q7_2.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/convolution_RGB_q7.c b/tests/validation_xt800/convolution_RGB_q7.c index a7cab395..11164e16 100644 --- a/tests/validation_xt800/convolution_RGB_q7.c +++ b/tests/validation_xt800/convolution_RGB_q7.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/convolution_basic_q7_1.c b/tests/validation_xt800/convolution_basic_q7_1.c index 562569c5..a3809edb 100644 --- a/tests/validation_xt800/convolution_basic_q7_1.c +++ b/tests/validation_xt800/convolution_basic_q7_1.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/convolution_basic_q7_2.c b/tests/validation_xt800/convolution_basic_q7_2.c index c02a070b..36a997d5 100644 --- a/tests/validation_xt800/convolution_basic_q7_2.c +++ b/tests/validation_xt800/convolution_basic_q7_2.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/convolution_basic_q7_3.c b/tests/validation_xt800/convolution_basic_q7_3.c index 86e9bf2d..ef3f2789 100644 --- a/tests/validation_xt800/convolution_basic_q7_3.c +++ b/tests/validation_xt800/convolution_basic_q7_3.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/convolution_nonsquare_q7_1.c b/tests/validation_xt800/convolution_nonsquare_q7_1.c index 41db7af6..a9afe4ab 100644 --- a/tests/validation_xt800/convolution_nonsquare_q7_1.c +++ b/tests/validation_xt800/convolution_nonsquare_q7_1.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/convolution_nonsquare_q7_2.c b/tests/validation_xt800/convolution_nonsquare_q7_2.c index f9b2504f..89df723f 100644 --- a/tests/validation_xt800/convolution_nonsquare_q7_2.c +++ b/tests/validation_xt800/convolution_nonsquare_q7_2.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/convolution_nonsquare_q7_3.c b/tests/validation_xt800/convolution_nonsquare_q7_3.c index efe5c625..ce8d8178 100644 --- a/tests/validation_xt800/convolution_nonsquare_q7_3.c +++ b/tests/validation_xt800/convolution_nonsquare_q7_3.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/convolution_q15.c b/tests/validation_xt800/convolution_q15.c index 1df83cbd..73b2a484 100644 --- a/tests/validation_xt800/convolution_q15.c +++ b/tests/validation_xt800/convolution_q15.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/depthwise_convolution_nonsquare_q7.c b/tests/validation_xt800/depthwise_convolution_nonsquare_q7.c index c3369002..a622a895 100644 --- a/tests/validation_xt800/depthwise_convolution_nonsquare_q7.c +++ b/tests/validation_xt800/depthwise_convolution_nonsquare_q7.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/depthwise_convolution_q7.c b/tests/validation_xt800/depthwise_convolution_q7.c index 3c7f89c5..53ed208b 100644 --- a/tests/validation_xt800/depthwise_convolution_q7.c +++ b/tests/validation_xt800/depthwise_convolution_q7.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/fullyconnected_q15.c b/tests/validation_xt800/fullyconnected_q15.c index 01d38ece..68560be2 100644 --- a/tests/validation_xt800/fullyconnected_q15.c +++ b/tests/validation_xt800/fullyconnected_q15.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/fullyconnected_q7.c b/tests/validation_xt800/fullyconnected_q7.c index 33bb2e27..6903ebbb 100644 --- a/tests/validation_xt800/fullyconnected_q7.c +++ b/tests/validation_xt800/fullyconnected_q7.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/maxpool_q7_1.c b/tests/validation_xt800/maxpool_q7_1.c index f849be39..ea884ef2 100644 --- a/tests/validation_xt800/maxpool_q7_1.c +++ b/tests/validation_xt800/maxpool_q7_1.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/maxpool_q7_2.c b/tests/validation_xt800/maxpool_q7_2.c index 53d64005..25eb93ff 100644 --- a/tests/validation_xt800/maxpool_q7_2.c +++ b/tests/validation_xt800/maxpool_q7_2.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/relu_q15.c b/tests/validation_xt800/relu_q15.c index 69875f8b..356d464d 100644 --- a/tests/validation_xt800/relu_q15.c +++ b/tests/validation_xt800/relu_q15.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/relu_q7.c b/tests/validation_xt800/relu_q7.c index d02451f8..7e7a5f24 100644 --- a/tests/validation_xt800/relu_q7.c +++ b/tests/validation_xt800/relu_q7.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/sigmoid_q15.c b/tests/validation_xt800/sigmoid_q15.c index 71452422..12ae0b7d 100644 --- a/tests/validation_xt800/sigmoid_q15.c +++ b/tests/validation_xt800/sigmoid_q15.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/sigmoid_q7.c b/tests/validation_xt800/sigmoid_q7.c index f429bd45..e10cfd02 100644 --- a/tests/validation_xt800/sigmoid_q7.c +++ b/tests/validation_xt800/sigmoid_q7.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/softmax_q15.c b/tests/validation_xt800/softmax_q15.c index 12861796..c8447b6b 100644 --- a/tests/validation_xt800/softmax_q15.c +++ b/tests/validation_xt800/softmax_q15.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/softmax_q7.c b/tests/validation_xt800/softmax_q7.c index 7421b97a..553b3c8f 100644 --- a/tests/validation_xt800/softmax_q7.c +++ b/tests/validation_xt800/softmax_q7.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/tanh_q15.c b/tests/validation_xt800/tanh_q15.c index 00c91393..8bf7c4d2 100644 --- a/tests/validation_xt800/tanh_q15.c +++ b/tests/validation_xt800/tanh_q15.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/tanh_q7.c b/tests/validation_xt800/tanh_q7.c index 4602e8ec..dde2ff17 100644 --- a/tests/validation_xt800/tanh_q7.c +++ b/tests/validation_xt800/tanh_q7.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/u8_testcases/add_u8.c b/tests/validation_xt800/u8_testcases/add_u8.c index 3f6b714f..00ac5a33 100644 --- a/tests/validation_xt800/u8_testcases/add_u8.c +++ b/tests/validation_xt800/u8_testcases/add_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/u8_testcases/clip_u8.c b/tests/validation_xt800/u8_testcases/clip_u8.c index 21fcabf4..d8d5c3d8 100644 --- a/tests/validation_xt800/u8_testcases/clip_u8.c +++ b/tests/validation_xt800/u8_testcases/clip_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/u8_testcases/convolution_1x1_u8.c b/tests/validation_xt800/u8_testcases/convolution_1x1_u8.c index 7b9a170f..3a86e77d 100644 --- a/tests/validation_xt800/u8_testcases/convolution_1x1_u8.c +++ b/tests/validation_xt800/u8_testcases/convolution_1x1_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/u8_testcases/convolution_u8.c b/tests/validation_xt800/u8_testcases/convolution_u8.c index 84fdd0fe..9175eaa7 100644 --- a/tests/validation_xt800/u8_testcases/convolution_u8.c +++ b/tests/validation_xt800/u8_testcases/convolution_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/u8_testcases/depthwise_convolution_u8.c b/tests/validation_xt800/u8_testcases/depthwise_convolution_u8.c index 4a9cae96..4ceaccf7 100644 --- a/tests/validation_xt800/u8_testcases/depthwise_convolution_u8.c +++ b/tests/validation_xt800/u8_testcases/depthwise_convolution_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/u8_testcases/fullyconnected_u8.c b/tests/validation_xt800/u8_testcases/fullyconnected_u8.c index cf36bbc1..f20f3eb4 100644 --- a/tests/validation_xt800/u8_testcases/fullyconnected_u8.c +++ b/tests/validation_xt800/u8_testcases/fullyconnected_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/u8_testcases/maxpool_u8.c b/tests/validation_xt800/u8_testcases/maxpool_u8.c index a968ad3d..49376826 100644 --- a/tests/validation_xt800/u8_testcases/maxpool_u8.c +++ b/tests/validation_xt800/u8_testcases/maxpool_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/u8_testcases/mul_u8.c b/tests/validation_xt800/u8_testcases/mul_u8.c index adfe3ec5..6b9339dc 100644 --- a/tests/validation_xt800/u8_testcases/mul_u8.c +++ b/tests/validation_xt800/u8_testcases/mul_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/u8_testcases/relu6_u8.c b/tests/validation_xt800/u8_testcases/relu6_u8.c index dc196b49..e65737c3 100644 --- a/tests/validation_xt800/u8_testcases/relu6_u8.c +++ b/tests/validation_xt800/u8_testcases/relu6_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/u8_testcases/relu_u8.c b/tests/validation_xt800/u8_testcases/relu_u8.c index 739aa759..7dd7e8b8 100644 --- a/tests/validation_xt800/u8_testcases/relu_u8.c +++ b/tests/validation_xt800/u8_testcases/relu_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/u8_testcases/reshape_u8.c b/tests/validation_xt800/u8_testcases/reshape_u8.c index 155a507f..8a75c3af 100644 --- a/tests/validation_xt800/u8_testcases/reshape_u8.c +++ b/tests/validation_xt800/u8_testcases/reshape_u8.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/verify_avgpool_q7.c b/tests/validation_xt800/verify_avgpool_q7.c index c7e94095..cc027b8e 100644 --- a/tests/validation_xt800/verify_avgpool_q7.c +++ b/tests/validation_xt800/verify_avgpool_q7.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/verify_convolution_q15.c b/tests/validation_xt800/verify_convolution_q15.c index 8b704cb2..747c4595 100644 --- a/tests/validation_xt800/verify_convolution_q15.c +++ b/tests/validation_xt800/verify_convolution_q15.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/verify_convolution_q7.c b/tests/validation_xt800/verify_convolution_q7.c index 908c802c..f1eef0dd 100644 --- a/tests/validation_xt800/verify_convolution_q7.c +++ b/tests/validation_xt800/verify_convolution_q7.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/verify_depthwise_conv2d_q7.c b/tests/validation_xt800/verify_depthwise_conv2d_q7.c index f80b7230..b5ccdf88 100644 --- a/tests/validation_xt800/verify_depthwise_conv2d_q7.c +++ b/tests/validation_xt800/verify_depthwise_conv2d_q7.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/tests/validation_xt800/verify_maxpool_q7.c b/tests/validation_xt800/verify_maxpool_q7.c index 96cfd15a..8682a717 100644 --- a/tests/validation_xt800/verify_maxpool_q7.c +++ b/tests/validation_xt800/verify_maxpool_q7.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2021 C-SKY Limited. All rights reserved. + * Copyright (C) 2016-2022 T-Head Semiconductor Co., Ltd. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -16,7 +16,7 @@ * limitations under the License. */ -/* CSI-NN2 version 1.10.x */ +/* CSI-NN2 version 1.12.x */ #include "test_utils.h" #include "csi_nn.h" diff --git a/version b/version index 0e73a4b8..393ccdb5 100644 --- a/version +++ b/version @@ -1 +1 @@ -1.10.17 +1.12.10