Skip to content

Commit

Permalink
Merge pull request #13 from zhaorz/unified_mem_refine
Browse files Browse the repository at this point in the history
Add unified memory refine_variation
  • Loading branch information
Richard Zhao authored May 12, 2017
2 parents 87a758f + 9c77b9c commit cbe149e
Show file tree
Hide file tree
Showing 10 changed files with 1,163 additions and 861 deletions.
10 changes: 6 additions & 4 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ if (ARCH STREQUAL "ARM")
endif()

set(KERNELS
kernels/flowUtil.cu
kernels/densify.cu
kernels/interpolate.cu
kernels/extract.cu
Expand All @@ -98,16 +99,17 @@ set(CODEFILES
# patch.cpp
patchgrid.cpp
refine_variational.cpp
FDF1.0.1/image.c
FDF1.0.1/opticalflow_aux.c
FDF1.0.1/solver.c
# FDF1.0.1/image.c
FDF1.0.1/image.cpp
FDF1.0.1/opticalflow_aux.cpp
FDF1.0.1/solver.cpp
)

# GrayScale, Optical Flow
cuda_add_executable(flow ${COMMON} ${CODEFILES} ${KERNELS})
set_target_properties (flow PROPERTIES COMPILE_DEFINITIONS "SELECTMODE=1")
set_property(TARGET flow APPEND PROPERTY COMPILE_DEFINITIONS "SELECTCHANNEL=3") # use RGB image
set_property(TARGET flow APPEND PROPERTY COMPILE_DEFINITIONS "VECTOR_WIDTH=4") # 8 wide SIMD (4 floats)
set_property(TARGET flow APPEND PROPERTY COMPILE_DEFINITIONS "UNIFIED_MEM=1") # Use zero copy memory
# set_property(TARGET flow APPEND PROPERTY COMPILE_DEFINITIONS "VECTOR_WIDTH=1") # no SIMD
target_link_libraries(flow ${OpenCV_LIBS})

Expand Down
24 changes: 22 additions & 2 deletions src/FDF1.0.1/image.c → src/FDF1.0.1/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

#include "image.h"

#include <cuda.h>
#include <cuda_runtime.h>
#include "../common/cuda_helper.h"

// #include <xmmintrin.h>
// typedef __v4sf v4sf;

Expand All @@ -29,7 +33,11 @@ image_t *image_new(const int width, const int height){
image->width = width;
image->height = height;
image->stride = ( (width+3) / 4 ) * 4;
#if (UNIFIED_MEM)
checkCudaErrors( cudaHostAlloc((void**) &image->c1, image->stride*height*sizeof(float), cudaHostAllocMapped) );
#else
image->c1 = (float*) memalign(16, image->stride*height*sizeof(float));
#endif
if(image->c1== NULL){
fprintf(stderr, "Error: image_new() - not enough memory !\n");
exit(1);
Expand Down Expand Up @@ -70,7 +78,11 @@ void image_delete(image_t *image){
if(image == NULL){
//fprintf(stderr, "Warning: Delete image --> Ignore action (image not allocated)\n");
}else{
#if (UNIFIED_MEM)
cudaFree(image->c1);
#else
free(image->c1);
#endif
free(image);
}
}
Expand All @@ -86,7 +98,11 @@ color_image_t *color_image_new(const int width, const int height){
image->width = width;
image->height = height;
image->stride = ( (width+VECTOR_WIDTH-1) / VECTOR_WIDTH ) * VECTOR_WIDTH;
#if (UNIFIED_MEM)
checkCudaErrors( cudaHostAlloc((void**) &image->c1, 3*image->stride*height*sizeof(float), cudaHostAllocMapped) );
#else
image->c1 = (float*) memalign(16, 3*image->stride*height*sizeof(float));
#endif
if(image->c1 == NULL){
fprintf(stderr, "Error: color_image_new() - not enough memory !\n");
exit(1);
Expand All @@ -111,7 +127,11 @@ void color_image_erase(color_image_t *image){
/* free memory of a color image */
void color_image_delete(color_image_t *image){
if(image){
#if (UNIFIED_MEM)
cudaFree(image->c1);
#else
free(image->c1); // c2 and c3 was allocated at the same moment
#endif
free(image);
}
}
Expand Down Expand Up @@ -665,7 +685,7 @@ void color_image_convolve_hv(color_image_t *dst, const color_image_t *src, const
dst_red = {width,height,stride,dst->c1}, dst_green = {width,height,stride,dst->c2}, dst_blue = {width,height,stride,dst->c3};
// horizontal and vertical
if(horiz_conv != NULL && vert_conv != NULL){
float *tmp_data = malloc(sizeof(float)*stride*height);
float *tmp_data = (float*) malloc(sizeof(float)*stride*height);
if(tmp_data == NULL){
fprintf(stderr,"error color_image_convolve_hv(): not enough memory\n");
exit(1);
Expand Down Expand Up @@ -699,7 +719,7 @@ void image_convolve_hv(image_t *dst, const image_t *src, const convolution_t *ho
dst_red = {width,height,stride,dst->c1};
// horizontal and vertical
if(horiz_conv != NULL && vert_conv != NULL){
float *tmp_data = malloc(sizeof(float)*stride*height);
float *tmp_data = (float*) malloc(sizeof(float)*stride*height);
if(tmp_data == NULL){
fprintf(stderr,"error image_convolve_hv(): not enough memory\n");
exit(1);
Expand Down
Loading

0 comments on commit cbe149e

Please sign in to comment.