Skip to content

Commit

Permalink
Update to run on Jetson
Browse files Browse the repository at this point in the history
  • Loading branch information
Richard Zhao committed May 10, 2017
1 parent 21e559a commit 5eaaafa
Show file tree
Hide file tree
Showing 11 changed files with 522 additions and 838 deletions.
63 changes: 47 additions & 16 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,27 +1,46 @@
cmake_minimum_required (VERSION 2.8)
project (flow)

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O3 -Wno-unknown-pragmas -Wall -std=c++11 -msse4 -fopenmp") #-Wall
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O3 -Wno-unknown-pragmas -Wall -msse4 -fopenmp") #-Wall
message(STATUS "Architecture: ${ARCH}")

# On ghc machines
if (ARCH STREQUAL "x86")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O3 -Wno-unknown-pragmas -Wall -std=c++11 -msse4 -fopenmp") #-Wall
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O3 -Wno-unknown-pragmas -Wall -msse4 -fopenmp") #-Wall

# For ghc machines
set(OpenCV_DIR "/afs/cs/academic/class/15418-s17/public/sw/opencv/build")
set(Eigen3_DIR "/afs/cs.cmu.edu/academic/class/15418-s17/public/sw/eigen/build")

# On NVIDIA Jetson
elseif (ARCH STREQUAL "ARM")

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O3 -Wno-unknown-pragmas -Wall -std=c++11 -fopenmp") #-Wall
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O3 -Wno-unknown-pragmas -Wall -fopenmp") #-Wall

else()
message(STATUS "specify using -DARCH=x86 or -DARCH=ARM")
message(FATAL_ERROR "aborting: missing required architecture")
endif ()

# For ghc machines
set(OpenCV_DIR "/afs/cs/academic/class/15418-s17/public/sw/opencv/build")
set(Eigen3_DIR "/afs/cs.cmu.edu/academic/class/15418-s17/public/sw/eigen/build")

find_package(OpenCV REQUIRED)
find_package(Eigen3 REQUIRED)
find_package(CUDA REQUIRED)

# On ghc, include DIR from FIND_PACKAGE is set wrong
set(EIGEN3_INCLUDE_DIR "/afs/cs.cmu.edu/academic/class/15418-s17/public/sw/eigen/include/eigen3")
if (ARCH STREQUAL "x86")
set(EIGEN3_INCLUDE_DIR "/afs/cs.cmu.edu/academic/class/15418-s17/public/sw/eigen/include/eigen3")
endif()
include_directories(${EIGEN3_INCLUDE_DIR})

# Add extra cuda libraries
set(CUDA_CUSOLVER_LIBRARIES
"/usr/local/depot/cuda-8.0/lib64/libcusolver.so"
"/usr/local/depot/cuda-8.0/lib64/libcublas.so.8.0")
"${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcusolver.so"
"${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublas.so.8.0")
set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_CUSOLVER_LIBRARIES})

set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_nppi_LIBRARY} ${CUDA_CUSOLVER_LIBRARIES})
set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_nppi_LIBRARY})

message(STATUS "OpenCV library status:")
message(STATUS " version: ${OpenCV_VERSION}")
Expand All @@ -34,6 +53,7 @@ message(STATUS " include path: ${EIGEN3_INCLUDE_DIR} ${EIGEN3_INCLUDE_DIRS}")

message(STATUS "CUDA library status:")
message(STATUS " version: ${CUDA_VERSION}")
message(STATUS " toolkit path: ${CUDA_TOOLKIT_ROOT_DIR}")
message(STATUS " libraries: ${CUDA_LIBRARIES}")
message(STATUS " include path: ${CUDA_INCLUDE_DIRS}")
message(STATUS " toolkit path: ${CUDA_TOOLKIT_ROOT_DIR}")
Expand All @@ -46,7 +66,14 @@ message(STATUS " cuSOLVER path: ${CUDA_CUSOLVER_LIBRARIES}")
################################################################################

set(CUDA_VERBOSE_BUILD ON)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}; -g -std=c++11 -arch=compute_61 -code=sm_61)

if (ARCH STREQUAL "x86")
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}; -g -arch=compute_61 -code=sm_61)
endif()

if (ARCH STREQUAL "ARM")
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}; -g -arch=compute_62 -code=sm_62)
endif()

set(KERNELS
kernels/densify.cu
Expand All @@ -59,6 +86,7 @@ set(KERNELS
kernels/resizeGrad.cpp
kernels/resize.cpp)


set(COMMON
common/RgbMat.cpp)

Expand All @@ -70,20 +98,23 @@ set(CODEFILES
refine_variational.cpp
FDF1.0.1/image.c
FDF1.0.1/opticalflow_aux.c
FDF1.0.1/solver.c)
FDF1.0.1/solver.c
)

# RGB, Optical Flow
cuda_add_executable(flow ${COMMON} ${KERNELS} ${CODEFILES})
# GrayScale, Optical Flow
cuda_add_executable(flow ${COMMON} ${CODEFILES} ${KERNELS})
set_target_properties (flow PROPERTIES COMPILE_DEFINITIONS "SELECTMODE=1")
set_property(TARGET flow APPEND PROPERTY COMPILE_DEFINITIONS "SELECTCHANNEL=3") # use RGB image
set_property(TARGET flow APPEND PROPERTY COMPILE_DEFINITIONS "VECTOR_WIDTH=4") # 8 wide SIMD (4 floats)
# set_property(TARGET flow APPEND PROPERTY COMPILE_DEFINITIONS "VECTOR_WIDTH=1") # no SIMD
target_link_libraries(flow ${OpenCV_LIBS})

# CUDA sandbox
# # CUDA sandbox
# set(SANDBOX_FILES
# # sandbox/process_sobel.cpp
# # sandbox/process_resize.cpp
# sandbox/process_resize.cpp
# # sandbox/process_resizeGrad.cpp
# sandbox/process_pad.cpp
# # sandbox/process_pad.cpp
# # sandbox/RgbMatTest.cpp
# sandbox/sandbox.cpp)
# cuda_add_executable(sandbox ${COMMON} ${KERNELS} ${SANDBOX_FILES})
Expand Down
42 changes: 36 additions & 6 deletions src/FDF1.0.1/image.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,16 @@

#include "image.h"

#include <xmmintrin.h>
typedef __v4sf v4sf;
// #include <xmmintrin.h>
// typedef __v4sf v4sf;

#include <arm_neon.h>

#if (VECTOR_WIDTH == 4)
typedef float32x4_t v4sf;
#else
typedef float v4sf;
#endif

/********** Create/Delete **********/

Expand Down Expand Up @@ -46,8 +54,12 @@ void image_erase(image_t *image){
void image_mul_scalar(image_t *image, const float scalar){
int i;
v4sf* imp = (v4sf*) image->c1;
#if (VECTOR_WIDTH == 4)
const v4sf scalarp = {scalar,scalar,scalar,scalar};
for( i=0 ; i<image->stride/4*image->height ; i++){
#else
const v4sf scalarp = scalar;
#endif
for( i=0 ; i<image->stride/VECTOR_WIDTH*image->height ; i++){
(*imp) *= scalarp;
imp+=1;
}
Expand All @@ -73,7 +85,7 @@ color_image_t *color_image_new(const int width, const int height){
}
image->width = width;
image->height = height;
image->stride = ( (width+3) / 4 ) * 4;
image->stride = ( (width+VECTOR_WIDTH-1) / VECTOR_WIDTH ) * VECTOR_WIDTH;
image->c1 = (float*) memalign(16, 3*image->stride*height*sizeof(float));
if(image->c1 == NULL){
fprintf(stderr, "Error: color_image_new() - not enough memory !\n");
Expand Down Expand Up @@ -374,7 +386,11 @@ convolution_t *convolution_new(const int order, const float *half_coeffs, const
}

static void convolve_vert_fast_3(image_t *dst, const image_t *src, const convolution_t *conv){
#if (VECTOR_WIDTH == 4)
const int iterline = (src->stride>>2)+1;
#else
const int iterline = (src->stride)+1;
#endif
const float *coeff = conv->coeffs;
//const float *coeff_accu = conv->coeffs_accu;
v4sf *srcp = (v4sf*) src->c1, *dstp = (v4sf*) dst->c1;
Expand All @@ -399,7 +415,11 @@ static void convolve_vert_fast_3(image_t *dst, const image_t *src, const convolu
}

static void convolve_vert_fast_5(image_t *dst, const image_t *src, const convolution_t *conv){
#if (VECTOR_WIDTH == 4)
const int iterline = (src->stride>>2)+1;
#else
const int iterline = (src->stride)+1;
#endif
const float *coeff = conv->coeffs;
//const float *coeff_accu = conv->coeffs_accu;
v4sf *srcp = (v4sf*) src->c1, *dstp = (v4sf*) dst->c1;
Expand Down Expand Up @@ -435,7 +455,11 @@ static void convolve_vert_fast_5(image_t *dst, const image_t *src, const convolu

static void convolve_horiz_fast_3(image_t *dst, const image_t *src, const convolution_t *conv){
const int stride_minus_1 = src->stride-1;
#if (VECTOR_WIDTH == 4)
const int iterline = (src->stride>>2);
#else
const int iterline = (src->stride);
#endif
const float *coeff = conv->coeffs;
v4sf *srcp = (v4sf*) src->c1, *dstp = (v4sf*) dst->c1;
// create shifted version of src
Expand Down Expand Up @@ -466,7 +490,11 @@ static void convolve_horiz_fast_3(image_t *dst, const image_t *src, const convol
static void convolve_horiz_fast_5(image_t *dst, const image_t *src, const convolution_t *conv){
const int stride_minus_1 = src->stride-1;
const int stride_minus_2 = src->stride-2;
#if (VECTOR_WIDTH == 4)
const int iterline = (src->stride>>2);
#else
const int iterline = (src->stride);
#endif
const float *coeff = conv->coeffs;
v4sf *srcp = (v4sf*) src->c1, *dstp = (v4sf*) dst->c1;
float *src_p1 = (float*) malloc(sizeof(float)*src->stride*4);
Expand Down Expand Up @@ -506,7 +534,8 @@ void convolve_horiz(image_t *dest, const image_t *src, const convolution_t *conv
if(conv->order==1){
convolve_horiz_fast_3(dest,src,conv);
return;
}else if(conv->order==2){
}
else if(conv->order==2){
convolve_horiz_fast_5(dest,src,conv);
return;
}
Expand Down Expand Up @@ -556,7 +585,8 @@ void convolve_vert(image_t *dest, const image_t *src, const convolution_t *conv)
if(conv->order==1){
convolve_vert_fast_3(dest,src,conv);
return;
}else if(conv->order==2){
}
else if(conv->order==2){
convolve_vert_fast_5(dest,src,conv);
return;
}
Expand Down
Loading

0 comments on commit 5eaaafa

Please sign in to comment.