Skip to content

Commit

Permalink
NN2: version 1.10
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangwm-pt committed Jan 4, 2022
1 parent c775d49 commit 3317645
Show file tree
Hide file tree
Showing 1,624 changed files with 360,760 additions and 25,554 deletions.
47 changes: 11 additions & 36 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,7 @@ endif
export CROSS_COMPILE INSTALL_DIR


all: nn2_ref_x86 nn2_ref

nn2_ref:
DSP_LIB="libcsi_nn2_ref" CFLAGS="-mcpu=c860v -DCSI_BUILD_REF $(EXTRA_CFLAGS)" \
CROSS_COMPILE="csky-abiv2-linux-" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_ref -j8
cd source/; find . -name *.o | xargs rm; cd -
all: nn2_ref_x86

nn2_c860:
DSP_LIB="libcsi_nn2_c860" CFLAGS="-mcpu=c860v -DCSI_BUILD_REF $(EXTRA_CFLAGS)" \
Expand All @@ -37,48 +32,28 @@ nn2_ref_x86:
CROSS_COMPILE="" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_ref nn2_shared -j8
cd source/; find . -name *.o | xargs rm; cd -

nn2_openvx:
DSP_LIB="libcsi_nn2_openvx.a" CFLAGS="-mcpu=c860v -DCSI_BUILD_OPENVX -mhard-float $(EXTRA_CFLAGS)" \
CROSS_COMPILE="csky-abiv2-linux-" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_openvx -j8
cd source/; find . -name *.o | xargs rm; cd -
DSP_LIB="libcsi_nn2_openvx.so" CFLAGS="-mcpu=c860v -fPIC -DCSI_BUILD_OPENVX -mhard-float $(EXTRA_CFLAGS)" \
CROSS_COMPILE="csky-abiv2-linux-" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_openvx nn2_shared -j8
nn2_ref_i805:
DSP_LIB="libcsi_nn2_ref_i805.a" CFLAGS="-DCSI_BUILD_REF_I805 -DCSI_MATH_DSP -mcpu=i805 $(EXTRA_CFLAGS)" \
CROSS_COMPILE="csky-abiv2-elf-" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_ref_i805 -j8
cd source/; find . -name *.o | xargs rm; cd -

nn2_pnna:
DSP_LIB="libcsi_nn2_pnna.a" CFLAGS="-DCSI_BUILD_PNNA $(EXTRA_CFLAGS)" \
CROSS_COMPILE="riscv64-unknown-linux-gnu-" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_pnna -j8
cd source/; find . -name *.o | xargs rm; cd -
DSP_LIB="libcsi_nn2_pnna.so" CFLAGS="-fPIC -DCSI_BUILD_PNNA $(EXTRA_CFLAGS)" \
CROSS_COMPILE="riscv64-unknown-linux-gnu-" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_pnna nn2_shared -j8
nn2_e804:
DSP_LIB="libcsi_nn2_e804.a" CFLAGS="-DCSI_BUILD_E804 -mcpu=e804d -mno-required-attr-fpu-abi $(EXTRA_CFLAGS)" \
CROSS_COMPILE="csky-abiv2-elf-" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_e804 -j8
cd source/; find . -name *.o | xargs rm; cd -

nn2_pnna_x86:
DSP_LIB="libcsi_nn2_pnna_x86.a" CFLAGS="-DCSI_BUILD_PNNA $(EXTRA_CFLAGS)" \
CROSS_COMPILE="" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_pnna -j8
cd source/; find . -name *.o | xargs rm; cd -
DSP_LIB="libcsi_nn2_pnna_x86.so" CFLAGS="-fPIC -DCSI_BUILD_PNNA $(EXTRA_CFLAGS)" \
CROSS_COMPILE="" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_pnna nn2_shared_x86 -j8
nn2_i805:
DSP_LIB="libcsi_nn2_i805.a" CFLAGS="-DCSI_BUILD_I805 -DCSI_BUILD_REF -DCSI_BUILD_GREF -mcpu=ck805ef -mhard-float $(EXTRA_CFLAGS)" \
CROSS_COMPILE="csky-abiv2-elf-" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_i805 -j8
cd source/; find . -name *.o | xargs rm; cd -

nn2_gref:
DSP_LIB="libcsi_nn2_gref.a" CFLAGS="-DCSI_BUILD_REF -DCSI_BUILD_GREF $(EXTRA_CFLAGS)" \
CROSS_COMPILE="" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_gref -j8
cd source/; find . -name *.o | xargs rm; cd -
DSP_LIB="libcsi_nn2_gref.so" CFLAGS="-fPIC -DCSI_BUILD_REF -DCSI_BUILD_GREF $(EXTRA_CFLAGS)" \
CROSS_COMPILE="" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_gref nn2_shared -j8
cd source/; find . -name *.o | xargs rm; cd -

nn2_dp1k:
DSP_LIB="libcsi_nn2_dp1000.so" CFLAGS="-fPIC -DCSI_BUILD_DP1K -DCSI_BUILD_REF $(EXTRA_CFLAGS)" \
CROSS_COMPILE="" NN2_ROOT=${NN2_ROOT} make -C build_script/nn2_dp1k nn2_shared -j8
cd source/; find . -name *.o | xargs rm; cd -

.PHONY: install_nn2
install_nn2: include
mkdir -p install_nn2/lib
cp include install_nn2 -r
cp lib/libcsi_nn2_* install_nn2/lib -rf
cp version install_nn2/ -rf


clean:
Expand Down
17 changes: 10 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@ CSI-NN2 是 T-HEAD 提供的一组针对无剑 SoC 平台的神经网络库 API

CSI-NN2 的特性:

- 开源 c 代码版本的参考实现。
- 提供玄铁 CPU 的汇编优化实现。
- 支持 i8/u8 数据类型的对称和非对称量化。
- C 代码版本的参考实现。
- 提供玄铁系列 CPU 的汇编优化实现。
- 支持对称量化和非对称量化。
- 支持8位定点,16位定点和16位浮点等数据类型。
- 兼容 NCHW 和 NHWC 格式。
- 搭配 HHB 实现代码自动调用。
- 覆盖 CPU,GPU, NPU 体系结构
- 附加一些接口辅助使用,作为参考实现
- 搭配 [HHB](https://occ.t-head.cn/development/series/index?spm=a2cl5.14300690.0.0.4aca475a4yHCxV&id=3865005559921381376&type=kind) 实现代码自动调用。
- 覆盖 CPU,NPU 架构
- 附加一些辅助接口,参考使用

CSI-NN2 只提供接口声明和接口的参考实现,对各个接口的优化工作交由各个设备提供商完成
CSI-NN2 提供了完成的接口声明和接口的参考实现,各个设备提供商可以依此针对性的完成各个接口的优化工作

## 文档说明

Expand All @@ -25,3 +26,5 @@ CSI-NN2 参考、借鉴了下列项目:
- [Tensorflow](https://github.com/tensorflow/tensorflow)
- [ncnn](https://github.com/Tencent/ncnn)
- [MNN](https://github.com/alibaba/MNN)
- [Tengine](https://github.com/OAID/Tengine)
- [CMSIS_5](https://github.com/ARM-software/CMSIS_5)
1 change: 1 addition & 0 deletions build_script/nn2.objs
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,5 @@ objects += ${NN2_ROOT}/source/nn2/utils.o
objects += ${NN2_ROOT}/source/nn2/setup.o
objects += ${NN2_ROOT}/source/nn2/node.o
objects += ${NN2_ROOT}/source/nn2/scatter.o
objects += ${NN2_ROOT}/source/nn2/memory.o
objects += ${NN2_ROOT}/source/nn2/debug.o
25 changes: 25 additions & 0 deletions build_script/nn2_c906/nn2_c906.objs
Original file line number Diff line number Diff line change
@@ -1,22 +1,47 @@
objects += ${NN2_ROOT}/source/c906_opt/abs.o

objects += ${NN2_ROOT}/source/c906_opt/add.o
objects += ${NN2_ROOT}/source/c906_opt/sub.o
objects += ${NN2_ROOT}/source/c906_opt/mul.o
objects += ${NN2_ROOT}/source/c906_opt/minimum.o

objects += ${NN2_ROOT}/source/c906_opt/broadcast_to.o
objects += ${NN2_ROOT}/source/c906_opt/clip.o
objects += ${NN2_ROOT}/source/c906_opt/concat.o
objects += ${NN2_ROOT}/source/c906_opt/split.o
objects += ${NN2_ROOT}/source/c906_opt/convolution.o
objects += ${NN2_ROOT}/source/c906_opt/convolution_relu.o

objects += ${NN2_ROOT}/source/c906_opt/fullyconnected.o
objects += ${NN2_ROOT}/source/c906_opt/maxpool.o
objects += ${NN2_ROOT}/source/c906_opt/global_maxpool.o
objects += ${NN2_ROOT}/source/c906_opt/avgpool.o
objects += ${NN2_ROOT}/source/c906_opt/global_avgpool.o

objects += ${NN2_ROOT}/source/c906_opt/pad.o
objects += ${NN2_ROOT}/source/c906_opt/prelu.o
objects += ${NN2_ROOT}/source/c906_opt/relu.o
objects += ${NN2_ROOT}/source/c906_opt/relu1.o
objects += ${NN2_ROOT}/source/c906_opt/relu6.o

objects += ${NN2_ROOT}/source/c906_opt/leaky_relu.o
objects += ${NN2_ROOT}/source/c906_opt/utils.o

objects += ${NN2_ROOT}/source/c906_opt/setup.o

objects += ${NN2_ROOT}/source/c906_opt/sgemm.o
objects += ${NN2_ROOT}/source/c906_opt/gemm_fp16.o
objects += ${NN2_ROOT}/source/c906_opt/convolution_1x1.o
objects += ${NN2_ROOT}/source/c906_opt/convolution_1x1_fp16.o
objects += ${NN2_ROOT}/source/c906_opt/convolution_3x3.o
objects += ${NN2_ROOT}/source/c906_opt/convolution_3x3_fp16.o
objects += ${NN2_ROOT}/source/c906_opt/convolution_sgemm.o
objects += ${NN2_ROOT}/source/c906_opt/convolution_gemm_fp16.o
objects += ${NN2_ROOT}/source/c906_opt/depthwise_convolution_3x3.o
objects += ${NN2_ROOT}/source/c906_opt/depthwise_convolution_3x3_fp16.o
objects += ${NN2_ROOT}/source/c906_opt/depthwise_convolution_3x3_pack4.o
objects += ${NN2_ROOT}/source/c906_opt/depthwise_convolution_3x3_pack8_fp16.o
objects += ${NN2_ROOT}/source/c906_opt/depthwise_convolution_5x5.o
objects += ${NN2_ROOT}/source/c906_opt/depthwise_convolution_relu_3x3.o
objects += ${NN2_ROOT}/source/c906_opt/depthwise_convolution_relu_3x3_pack4.o
objects += ${NN2_ROOT}/source/c906_opt/depthwise_convolution_relu_5x5.o
22 changes: 0 additions & 22 deletions build_script/nn2_dp1k/Makefile

This file was deleted.

2 changes: 0 additions & 2 deletions build_script/nn2_dp1k/nn2.objs

This file was deleted.

21 changes: 21 additions & 0 deletions build_script/nn2_e804/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
CROSS_COMPILE ?= csky-abiv2-elf-
CC = $(CROSS_COMPILE)gcc
AR = $(CROSS_COMPILE)ar

CFLAGS += -ffunction-sections -fdata-sections -Wl,--gc-sections
INCLUDE = -I../../include/include_xt800/dsp_include/ -I../../include/include_xt800/csi_core_include/ -I../../include/include_xt800/nn_include
INCLUDE += -I../../include

all: nn2_e804

include nn2_dsp2.objs
include ../nn2.objs
include ../nn2_ref/nn2.objs

nn2_e804: $(objects)
$(AR) rcs $(INSTALL_DIR)/$(DSP_LIB) $^

%.o: %.c
$(CC) -c $(CFLAGS) $(INCLUDE) $(MACRO) $(ENDIAN) $< -o $@
%.o: %.S
$(CC) -c $(CFLAGS) $(INCLUDE) $(MACRO) $(ENDIAN) $< -o $@
38 changes: 38 additions & 0 deletions build_script/nn2_e804/nn2_dsp2.objs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#activation
objects += ${NN2_ROOT}/source/e804_opt/activation/csi_xt800p_nn_activations_q15.o
objects += ${NN2_ROOT}/source/e804_opt/activation/csi_xt800p_nn_activations_q7.o
objects += ${NN2_ROOT}/source/e804_opt/activation/csi_xt800p_relu_q15.o
objects += ${NN2_ROOT}/source/e804_opt/activation/csi_xt800p_relu_q7.o

#convolution
objects += ${NN2_ROOT}/source/e804_opt/convolution/csi_xt800p_convolve_1x1_HWC_q7_fast.o
objects += ${NN2_ROOT}/source/e804_opt/convolution/csi_xt800p_convolve_HWC_q15_basic.o
objects += ${NN2_ROOT}/source/e804_opt/convolution/csi_xt800p_convolve_HWC_q7_RGB.o
objects += ${NN2_ROOT}/source/e804_opt/convolution/csi_xt800p_convolve_HWC_q7_basic.o
objects += ${NN2_ROOT}/source/e804_opt/convolution/csi_xt800p_depthwise_separable_conv_HWC_q7.o

#fully-connect
objects += ${NN2_ROOT}/source/e804_opt/fully-connect/csi_xt800p_fully_connected_mat_q7_vec_q15.o
objects += ${NN2_ROOT}/source/e804_opt/fully-connect/csi_xt800p_fully_connected_q15.o
objects += ${NN2_ROOT}/source/e804_opt/fully-connect/csi_xt800p_fully_connected_q7.o

#nn-support
objects += ${NN2_ROOT}/source/e804_opt/nn-support/csi_xt800p_nntables.o

#pooling
objects += ${NN2_ROOT}/source/e804_opt/pooling/csi_xt800p_pool_q7_HWC.o

#softmax
objects += ${NN2_ROOT}/source/e804_opt/softmax/csi_xt800p_softmax_q15.o
objects += ${NN2_ROOT}/source/e804_opt/softmax/csi_xt800p_softmax_q7.o

objects += ${NN2_ROOT}/source/e804_opt/convolution.o
objects += ${NN2_ROOT}/source/e804_opt/fullyconnected.o
objects += ${NN2_ROOT}/source/e804_opt/avgpool.o
objects += ${NN2_ROOT}/source/e804_opt/maxpool.o
objects += ${NN2_ROOT}/source/e804_opt/softmax.o
objects += ${NN2_ROOT}/source/e804_opt/relu.o
objects += ${NN2_ROOT}/source/e804_opt/sigmoid.o
objects += ${NN2_ROOT}/source/e804_opt/tanh.o

objects += ${NN2_ROOT}/source/e804_opt/setup.o
1 change: 1 addition & 0 deletions build_script/nn2_gref/nn2.objs
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,4 @@ objects += ${NN2_ROOT}/source/graph_ref/reorg.o
objects += ${NN2_ROOT}/source/graph_ref/sequence_mask.o
objects += ${NN2_ROOT}/source/graph_ref/where.o
objects += ${NN2_ROOT}/source/graph_ref/space_to_batch_nd.o
objects += ${NN2_ROOT}/source/graph_ref/subgraph.o
22 changes: 22 additions & 0 deletions build_script/nn2_i805/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
CROSS_COMPILE ?= csky-abiv2-elf-
CC = $(CROSS_COMPILE)gcc
AR = $(CROSS_COMPILE)ar

CFLAGS += -ffunction-sections -fdata-sections -Wl,--gc-sections
INCLUDE = -I../../include/include_xt800/dsp_include/ -I../../include/include_xt800/csi_core_include/ -I../../include/include_xt800/nn_include
INCLUDE += -I../../include

all: nn2_i805

include nn2_vdsp2.objs
include ../nn2.objs
include ../nn2_ref/nn2.objs
include ../nn2_gref/nn2.objs

nn2_i805: $(objects)
$(AR) rcs $(INSTALL_DIR)/$(DSP_LIB) $^

%.o: %.c
$(CC) -c $(CFLAGS) $(INCLUDE) $(MACRO) $(ENDIAN) $< -o $@
%.o: %.S
$(CC) -c $(CFLAGS) $(INCLUDE) $(MACRO) $(ENDIAN) $< -o $@
72 changes: 72 additions & 0 deletions build_script/nn2_i805/nn2_vdsp2.objs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#activation
#objects += ${NN2_ROOT}/source/i805_opt/activation/csi_xt800v_nn_activations_q15.o
objects += ${NN2_ROOT}/source/i805_opt/activation/csi_xt800v_nn_activations_q15_fast.o
#objects += ${NN2_ROOT}/source/i805_opt/activation/csi_xt800v_nn_activations_q7.o
objects += ${NN2_ROOT}/source/i805_opt/activation/csi_xt800v_nn_activations_q7_fast.o
objects += ${NN2_ROOT}/source/i805_opt/activation/csi_xt800v_relu_q15.o
objects += ${NN2_ROOT}/source/i805_opt/activation/csi_xt800v_relu_q7.o
objects += ${NN2_ROOT}/source/i805_opt/activation/csi_i805_relu_8.o
objects += ${NN2_ROOT}/source/i805_opt/activation/csi_i805_relu6_8.o
objects += ${NN2_ROOT}/source/i805_opt/activation/csi_i805_clip_8.o

#convolution
objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_xt800v_convolve_1x1_HWC_q7_fast.o
objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q15_basic.o
objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q7_RGB.o
objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q7_basic.o
objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_xt800v_convolve_HWC_q7_fast_nonsquare.o
objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_xt800v_depthwise_separable_conv_HWC_q7.o
objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_xt800v_depthwise_separable_conv_HWC_q7_nonsquare.o
objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_i805_convolution_1x1_8.o
objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_i805_convolution_8.o
objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_i805_depthwise_convolution_8.o
#objects += ${NN2_ROOT}/source/i805_opt/convolution/csi_i805_depthwise_convolution_3x3_8.o


#fully-connect
objects += ${NN2_ROOT}/source/i805_opt/fully-connect/csi_xt800v_fully_connected_mat_q7_vec_q15.o
objects += ${NN2_ROOT}/source/i805_opt/fully-connect/csi_xt800v_fully_connected_q15.o
objects += ${NN2_ROOT}/source/i805_opt/fully-connect/csi_xt800v_fully_connected_q7x4.o
#objects += ${NN2_ROOT}/source/i805_opt/fully-connect/csi_xt800v_fully_connected_q7x16.o
objects += ${NN2_ROOT}/source/i805_opt/fully-connect/csi_i805_fullyconnected_8.o

#nn-support
objects += ${NN2_ROOT}/source/i805_opt/nn-support/csi_xt800v_nntables.o

#pooling
objects += ${NN2_ROOT}/source/i805_opt/pooling/csi_xt800v_pool_q7_HWC.o
objects += ${NN2_ROOT}/source/i805_opt/pooling/csi_xt800v_avepool_q7_HWC_nonsquare.o
objects += ${NN2_ROOT}/source/i805_opt/pooling/csi_i805_maxpool_8.o

#softmax
objects += ${NN2_ROOT}/source/i805_opt/softmax/csi_xt800v_softmax_q15.o
objects += ${NN2_ROOT}/source/i805_opt/softmax/csi_xt800v_softmax_q7.o
#objects += ${NN2_ROOT}/source/i805_opt/softmax/csi_i805_softmax_8.o

#gemm
#objects += ${NN2_ROOT}/source/i805_opt/gemm/csi_i805_gemm_4x4_8.o
#objects += ${NN2_ROOT}/source/i805_opt/gemm/csi_i805_gemm_4x16_8.o
objects += ${NN2_ROOT}/source/i805_opt/gemm/csi_i805_vec_mat_mult_8.o
objects += ${NN2_ROOT}/source/i805_opt/gemm/csi_i805_mat_mult_nt_t_8.o

#basic_math_func
objects += ${NN2_ROOT}/source/i805_opt/basic_math/csi_i805_elementwise_add_8.o
objects += ${NN2_ROOT}/source/i805_opt/basic_math/csi_i805_elementwise_mul_8.o

#reshape
objects += ${NN2_ROOT}/source/i805_opt/reshape/csi_i805_reshape_8.o

objects += ${NN2_ROOT}/source/i805_opt/convolution.o
objects += ${NN2_ROOT}/source/i805_opt/fullyconnected.o
objects += ${NN2_ROOT}/source/i805_opt/add.o
objects += ${NN2_ROOT}/source/i805_opt/avgpool.o
objects += ${NN2_ROOT}/source/i805_opt/maxpool.o
objects += ${NN2_ROOT}/source/i805_opt/mul.o
objects += ${NN2_ROOT}/source/i805_opt/softmax.o
objects += ${NN2_ROOT}/source/i805_opt/relu.o
objects += ${NN2_ROOT}/source/i805_opt/relu6.o
objects += ${NN2_ROOT}/source/i805_opt/clip.o
objects += ${NN2_ROOT}/source/i805_opt/reshape.o
objects += ${NN2_ROOT}/source/i805_opt/sigmoid.o
objects += ${NN2_ROOT}/source/i805_opt/tanh.o
objects += ${NN2_ROOT}/source/i805_opt/setup.o
22 changes: 0 additions & 22 deletions build_script/nn2_openvx/Makefile

This file was deleted.

Loading

0 comments on commit 3317645

Please sign in to comment.