Skip to content

Commit

Permalink
Merge pull request #12 from zhaorz/pipeline
Browse files Browse the repository at this point in the history
Pipeline
  • Loading branch information
AshwinSekar authored May 12, 2017
2 parents fb0124c + 018ff52 commit 87a758f
Show file tree
Hide file tree
Showing 33 changed files with 2,618 additions and 1,855 deletions.
80 changes: 57 additions & 23 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,27 +1,46 @@
cmake_minimum_required (VERSION 2.8)
project (flow)

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O3 -Wno-unknown-pragmas -Wall -std=c++11 -msse4 -fopenmp") #-Wall
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O3 -Wno-unknown-pragmas -Wall -msse4 -fopenmp") #-Wall
message(STATUS "Architecture: ${ARCH}")

# On ghc machines
if (ARCH STREQUAL "x86")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O3 -Wno-unknown-pragmas -Wall -std=c++11 -msse4 -fopenmp") #-Wall
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O3 -Wno-unknown-pragmas -Wall -msse4 -fopenmp") #-Wall

# For ghc machines
set(OpenCV_DIR "/afs/cs/academic/class/15418-s17/public/sw/opencv/build")
set(Eigen3_DIR "/afs/cs.cmu.edu/academic/class/15418-s17/public/sw/eigen/build")

# On NVIDIA Jetson
elseif (ARCH STREQUAL "ARM")

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O3 -Wno-unknown-pragmas -Wall -std=c++11 -fopenmp") #-Wall
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O3 -Wno-unknown-pragmas -Wall -fopenmp") #-Wall

else()
message(STATUS "specify using -DARCH=x86 or -DARCH=ARM")
message(FATAL_ERROR "aborting: missing required architecture")
endif ()

# For ghc machines
set(OpenCV_DIR "/afs/cs/academic/class/15418-s17/public/sw/opencv/build")
set(Eigen3_DIR "/afs/cs.cmu.edu/academic/class/15418-s17/public/sw/eigen/build")

find_package(OpenCV REQUIRED)
find_package(Eigen3 REQUIRED)
find_package(CUDA REQUIRED)

# On ghc, include DIR from FIND_PACKAGE is set wrong
set(EIGEN3_INCLUDE_DIR "/afs/cs.cmu.edu/academic/class/15418-s17/public/sw/eigen/include/eigen3")
if (ARCH STREQUAL "x86")
set(EIGEN3_INCLUDE_DIR "/afs/cs.cmu.edu/academic/class/15418-s17/public/sw/eigen/include/eigen3")
endif()
include_directories(${EIGEN3_INCLUDE_DIR})

# Add extra cuda libraries
set(CUDA_CUSOLVER_LIBRARIES
"/usr/local/depot/cuda-8.0/lib64/libcusolver.so"
"/usr/local/depot/cuda-8.0/lib64/libcublas.so.8.0")
"${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcusolver.so"
"${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublas.so.8.0")
set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_CUSOLVER_LIBRARIES})

set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_nppi_LIBRARY} ${CUDA_CUSOLVER_LIBRARIES})
set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_nppi_LIBRARY})

message(STATUS "OpenCV library status:")
message(STATUS " version: ${OpenCV_VERSION}")
Expand All @@ -34,6 +53,7 @@ message(STATUS " include path: ${EIGEN3_INCLUDE_DIR} ${EIGEN3_INCLUDE_DIRS}")

message(STATUS "CUDA library status:")
message(STATUS " version: ${CUDA_VERSION}")
message(STATUS " toolkit path: ${CUDA_TOOLKIT_ROOT_DIR}")
message(STATUS " libraries: ${CUDA_LIBRARIES}")
message(STATUS " include path: ${CUDA_INCLUDE_DIRS}")
message(STATUS " toolkit path: ${CUDA_TOOLKIT_ROOT_DIR}")
Expand All @@ -46,45 +66,59 @@ message(STATUS " cuSOLVER path: ${CUDA_CUSOLVER_LIBRARIES}")
################################################################################

set(CUDA_VERBOSE_BUILD ON)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}; -g -std=c++11 -arch=compute_61 -code=sm_61)

if (ARCH STREQUAL "x86")
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}; -g -arch=compute_61 -code=sm_61)
endif()

if (ARCH STREQUAL "ARM")
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}; -g -arch=compute_62 -code=sm_62)
endif()

set(KERNELS
kernels/densify.cu
kernels/interpolate.cu
kernels/extract.cu
kernels/optimize.cu
kernels/warmup.cpp
kernels/sobel.cpp
# kernels/sobel.cpp
kernels/sobelUnifiedMem.cpp
kernels/pyramid.cpp
kernels/pad.cpp
kernels/resizeGrad.cpp
kernels/resize.cpp)


set(COMMON
common/RgbMat.cpp)

set(CODEFILES
run_dense.cpp
oflow.cpp
patch.cpp
# patch.cpp
patchgrid.cpp
refine_variational.cpp
FDF1.0.1/image.c
FDF1.0.1/opticalflow_aux.c
FDF1.0.1/solver.c)
FDF1.0.1/solver.c
)

# GrayScale, Optical Flow
cuda_add_executable(flow ${COMMON} ${CODEFILES} ${KERNELS})
set_target_properties (flow PROPERTIES COMPILE_DEFINITIONS "SELECTMODE=1")
set_property(TARGET flow APPEND PROPERTY COMPILE_DEFINITIONS "SELECTCHANNEL=3") # use RGB image
set_property(TARGET flow APPEND PROPERTY COMPILE_DEFINITIONS "VECTOR_WIDTH=4") # 8 wide SIMD (4 floats)
# set_property(TARGET flow APPEND PROPERTY COMPILE_DEFINITIONS "VECTOR_WIDTH=1") # no SIMD
target_link_libraries(flow ${OpenCV_LIBS})

# CUDA sandbox
set(SANDBOX_FILES
# sandbox/process_sobel.cpp
# sandbox/process_resize.cpp
# sandbox/process_resizeGrad.cpp
sandbox/process_pad.cpp
# sandbox/RgbMatTest.cpp
sandbox/sandbox.cpp)
cuda_add_executable(sandbox ${COMMON} ${KERNELS} ${SANDBOX_FILES})
target_link_libraries(sandbox ${OpenCV_LIBS})

# set(SANDBOX_FILES
# # sandbox/process_sobel.cpp
# sandbox/process_sobel.cpp
# # sandbox/process_resize.cpp
# # sandbox/process_resizeGrad.cpp
# # sandbox/process_pad.cpp
# # sandbox/RgbMatTest.cpp
# sandbox/sandbox.cpp)
# cuda_add_executable(sandbox ${COMMON} ${KERNELS} ${SANDBOX_FILES})
# target_link_libraries(sandbox ${OpenCV_LIBS})
42 changes: 36 additions & 6 deletions src/FDF1.0.1/image.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,16 @@

#include "image.h"

#include <xmmintrin.h>
typedef __v4sf v4sf;
// #include <xmmintrin.h>
// typedef __v4sf v4sf;

#include <arm_neon.h>

#if (VECTOR_WIDTH == 4)
typedef float32x4_t v4sf;
#else
typedef float v4sf;
#endif

/********** Create/Delete **********/

Expand Down Expand Up @@ -46,8 +54,12 @@ void image_erase(image_t *image){
void image_mul_scalar(image_t *image, const float scalar){
int i;
v4sf* imp = (v4sf*) image->c1;
#if (VECTOR_WIDTH == 4)
const v4sf scalarp = {scalar,scalar,scalar,scalar};
for( i=0 ; i<image->stride/4*image->height ; i++){
#else
const v4sf scalarp = scalar;
#endif
for( i=0 ; i<image->stride/VECTOR_WIDTH*image->height ; i++){
(*imp) *= scalarp;
imp+=1;
}
Expand All @@ -73,7 +85,7 @@ color_image_t *color_image_new(const int width, const int height){
}
image->width = width;
image->height = height;
image->stride = ( (width+3) / 4 ) * 4;
image->stride = ( (width+VECTOR_WIDTH-1) / VECTOR_WIDTH ) * VECTOR_WIDTH;
image->c1 = (float*) memalign(16, 3*image->stride*height*sizeof(float));
if(image->c1 == NULL){
fprintf(stderr, "Error: color_image_new() - not enough memory !\n");
Expand Down Expand Up @@ -374,7 +386,11 @@ convolution_t *convolution_new(const int order, const float *half_coeffs, const
}

static void convolve_vert_fast_3(image_t *dst, const image_t *src, const convolution_t *conv){
#if (VECTOR_WIDTH == 4)
const int iterline = (src->stride>>2)+1;
#else
const int iterline = (src->stride)+1;
#endif
const float *coeff = conv->coeffs;
//const float *coeff_accu = conv->coeffs_accu;
v4sf *srcp = (v4sf*) src->c1, *dstp = (v4sf*) dst->c1;
Expand All @@ -399,7 +415,11 @@ static void convolve_vert_fast_3(image_t *dst, const image_t *src, const convolu
}

static void convolve_vert_fast_5(image_t *dst, const image_t *src, const convolution_t *conv){
#if (VECTOR_WIDTH == 4)
const int iterline = (src->stride>>2)+1;
#else
const int iterline = (src->stride)+1;
#endif
const float *coeff = conv->coeffs;
//const float *coeff_accu = conv->coeffs_accu;
v4sf *srcp = (v4sf*) src->c1, *dstp = (v4sf*) dst->c1;
Expand Down Expand Up @@ -435,7 +455,11 @@ static void convolve_vert_fast_5(image_t *dst, const image_t *src, const convolu

static void convolve_horiz_fast_3(image_t *dst, const image_t *src, const convolution_t *conv){
const int stride_minus_1 = src->stride-1;
#if (VECTOR_WIDTH == 4)
const int iterline = (src->stride>>2);
#else
const int iterline = (src->stride);
#endif
const float *coeff = conv->coeffs;
v4sf *srcp = (v4sf*) src->c1, *dstp = (v4sf*) dst->c1;
// create shifted version of src
Expand Down Expand Up @@ -466,7 +490,11 @@ static void convolve_horiz_fast_3(image_t *dst, const image_t *src, const convol
static void convolve_horiz_fast_5(image_t *dst, const image_t *src, const convolution_t *conv){
const int stride_minus_1 = src->stride-1;
const int stride_minus_2 = src->stride-2;
#if (VECTOR_WIDTH == 4)
const int iterline = (src->stride>>2);
#else
const int iterline = (src->stride);
#endif
const float *coeff = conv->coeffs;
v4sf *srcp = (v4sf*) src->c1, *dstp = (v4sf*) dst->c1;
float *src_p1 = (float*) malloc(sizeof(float)*src->stride*4);
Expand Down Expand Up @@ -506,7 +534,8 @@ void convolve_horiz(image_t *dest, const image_t *src, const convolution_t *conv
if(conv->order==1){
convolve_horiz_fast_3(dest,src,conv);
return;
}else if(conv->order==2){
}
else if(conv->order==2){
convolve_horiz_fast_5(dest,src,conv);
return;
}
Expand Down Expand Up @@ -556,7 +585,8 @@ void convolve_vert(image_t *dest, const image_t *src, const convolution_t *conv)
if(conv->order==1){
convolve_vert_fast_3(dest,src,conv);
return;
}else if(conv->order==2){
}
else if(conv->order==2){
convolve_vert_fast_5(dest,src,conv);
return;
}
Expand Down
Loading

0 comments on commit 87a758f

Please sign in to comment.