Skip to content

Commit

Permalink
Use pre alloc'd array for subLaplacian coeffs
Browse files Browse the repository at this point in the history
  • Loading branch information
Richard Zhao committed May 12, 2017
1 parent 76d45ed commit c90ee15
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 16 deletions.
20 changes: 9 additions & 11 deletions src/kernels/flowUtil.cu
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,6 @@ __global__ void kernelSubLaplacianHorizFillCoeffs(
float *src, float *weights, float *coeffs, int height, int width, int stride) {

int tidx = blockIdx.x * blockDim.x + threadIdx.x;
int row = tidx / stride;
int col = tidx % stride;

// Do not calculate the last column
Expand Down Expand Up @@ -591,12 +590,18 @@ namespace cu {

};

void subLaplacian(
image_t *dst, const image_t *src, const image_t *weight_horiz, const image_t *weight_vert, float *coeffs) {

cu::subLaplacianHoriz(src->c1, dst->c1, weight_horiz->c1, coeffs, src->height, src->width, src->stride);
cu::subLaplacianVert(src->c1, dst->c1, weight_vert->c1, src->height, src->stride);

}

void subLaplacianHoriz(
float *src, float *dst, float *weights, int height, int width, int stride) {
float *src, float *dst, float *weights, float *coeffs, int height, int width, int stride) {

float *pDeviceCoeffs;
checkCudaErrors( cudaMalloc((void**) &pDeviceCoeffs, height * stride * sizeof(float)) );
float *pDeviceCoeffs = coeffs;

// Setup device pointers
float *pDeviceSrc, *pDeviceDst, *pDeviceWeights;
Expand All @@ -621,8 +626,6 @@ namespace cu {
// pDeviceSrc, pDeviceDst, pDeviceWeights, pDeviceCoeffs, height, width, stride);
cudaDeviceSynchronize();
calc_print_elapsed("laplacian horiz", start_horiz);

cudaFree(pDeviceCoeffs);
}

void subLaplacianVert(
Expand Down Expand Up @@ -950,9 +953,4 @@ namespace cu {
color_image_delete(tmp_im2);
}

void subLaplacian(image_t *dst, const image_t *src, const image_t *weight_horiz, const image_t *weight_vert){
cu::subLaplacianHoriz(src->c1, dst->c1, weight_horiz->c1, src->height, src->width, src->stride);
cu::subLaplacianVert(src->c1, dst->c1, weight_vert->c1, src->height, src->stride);
}

}
7 changes: 4 additions & 3 deletions src/kernels/flowUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,11 @@ namespace cu {
color_image_t *Ixz, color_image_t *Iyz,
const float half_delta_over3, const float half_beta, const float half_gamma_over3);

void subLaplacian(
image_t *dst, const image_t *src, const image_t *weight_horiz, const image_t *weight_vert, float *coeffs);

void subLaplacianHoriz(
float *src, float *dst, float *weights, int height, int width, int stride);
float *src, float *dst, float *weights, float *coeffs, int height, int width, int stride);

void subLaplacianVert(
float *src, float *dst, float *weights, int height, int stride);
Expand Down Expand Up @@ -79,8 +82,6 @@ namespace cu {
color_image_t *dx, color_image_t *dy, color_image_t *dt,
color_image_t *dxx, color_image_t *dxy, color_image_t *dyy, color_image_t *dxt, color_image_t *dyt);

void subLaplacian(image_t *dst, const image_t *src, const image_t *weight_horiz, const image_t *weight_vert);

}

#endif // end __KERNEL_FLOW_UTIL_H__
Expand Down
8 changes: 6 additions & 2 deletions src/refine_variational.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ namespace OFC {
}
}

checkCudaErrors( cudaMalloc((void**) &pDeviceSubLaplacianCoeffs,
flow_sep[0]->height * flow_sep[0]->stride * sizeof(float)) );

// copy image data into FV structs
color_image_t * I0, * I1;
I0 = color_image_new(i_params->width, i_params->height);
Expand Down Expand Up @@ -200,8 +203,8 @@ namespace OFC {
calc_print_elapsed(("RefLevelOF " + iterStr + " data").c_str(), start_data);

auto start_lapalcian = now();
cu::subLaplacian(b1, wx, smooth_horiz, smooth_vert);
cu::subLaplacian(b2, wy, smooth_horiz, smooth_vert);
cu::subLaplacian(b1, wx, smooth_horiz, smooth_vert, pDeviceSubLaplacianCoeffs);
cu::subLaplacian(b2, wy, smooth_horiz, smooth_vert, pDeviceSubLaplacianCoeffs);
calc_print_elapsed(("RefLevelOF " + iterStr + " laplacian").c_str(), start_lapalcian);

// solve system
Expand Down Expand Up @@ -246,6 +249,7 @@ namespace OFC {
VarRefClass::~VarRefClass() {
cudaFree(pDeviceColorDerivativeKernel);
cudaFree(pDeviceDerivativeKernel);
cudaFree(pDeviceSubLaplacianCoeffs);
}

}
1 change: 1 addition & 0 deletions src/refine_variational.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ namespace OFC {

float *pDeviceColorDerivativeKernel;
float *pDeviceDerivativeKernel;
float *pDeviceSubLaplacianCoeffs;

};

Expand Down

0 comments on commit c90ee15

Please sign in to comment.