Skip to content

Commit

Permalink
Merge branch 'optimize_before' into optimize_refine
Browse files Browse the repository at this point in the history
  • Loading branch information
Richard Zhao committed May 12, 2017
2 parents c90ee15 + ab07014 commit 9e59020
Show file tree
Hide file tree
Showing 9 changed files with 36 additions and 34 deletions.
10 changes: 6 additions & 4 deletions src/kernels/densify.cu
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,14 @@ __global__ void kernelDensifyPatches(


__global__ void kernelNormalizeFlow(
float* pDeviceFlowOut, float* pDeviceWeights, int N, int numBlocks) {
float* pDeviceFlowOut, float* flow,
float* pDeviceWeights, int N, int numBlocks) {

int i = blockIdx.x * blockDim.x + threadIdx.x;

for (; i < N; i+= blockDim.x * numBlocks) {
if (pDeviceWeights[i / 2] > 0)
pDeviceFlowOut[i] /= pDeviceWeights[i / 2];
flow[i] = pDeviceFlowOut[i] / pDeviceWeights[i / 2];
}

}
Expand All @@ -121,13 +122,14 @@ namespace cu {
patchSize, minErrVal);
}

void normalizeFlow(
void normalizeFlow(float* pHostFlowOut,
float* pDeviceFlowOut, float* pDeviceWeights, int N) {

int nThreadsPerBlock = 64;
int nBlocks = 10;

kernelNormalizeFlow<<<nBlocks, nThreadsPerBlock>>>(pDeviceFlowOut, pDeviceWeights, N, nBlocks);
kernelNormalizeFlow<<<nBlocks, nThreadsPerBlock>>>(pDeviceFlowOut,
pHostFlowOut, pDeviceWeights, N, nBlocks);
}

void densifyPatches(
Expand Down
2 changes: 1 addition & 1 deletion src/kernels/densify.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ namespace cu {


void normalizeFlow(
float* pDeviceFlowOut, float* pDeviceWeights, int N);
float* pDeviceFlowOut, float* pHostFlowOut, float* pDeviceWeights, int N);

}

Expand Down
6 changes: 3 additions & 3 deletions src/kernels/extract.cu
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ __global__ void kernelExtractPatchesAndHessians(

// TODO: merge this with above kernel?
__global__ void kernelInitCoarserOF(
float* flowPrev, dev_patch_state* states, int width,
int lb, int ub_w, int ub_h) {
const float* flowPrev, dev_patch_state* states,
int width, int lb, int ub_w, int ub_h) {

int patchId = blockIdx.x;
int x = floor(states[patchId].midpoint_orgx / 2);
Expand Down Expand Up @@ -207,7 +207,7 @@ namespace cu {
}


void initCoarserOF(float* flowPrev, dev_patch_state* states,
void initCoarserOF(const float* flowPrev, dev_patch_state* states,
int n_patches, const img_params* i_params) {

int nBlocks = n_patches;
Expand Down
2 changes: 1 addition & 1 deletion src/kernels/extract.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ namespace cu {
dev_patch_state* states, int n_patches,
const opt_params* op, const img_params* i_params);

void initCoarserOF(float* flowPrev, dev_patch_state* states,
void initCoarserOF(const float* flowPrev, dev_patch_state* states,
int n_patches, const img_params* i_params);


Expand Down
2 changes: 1 addition & 1 deletion src/kernels/flowUtil.cu
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,7 @@ namespace cu {
auto start_vert = now();
kernelSubLaplacianVert<<<nBlocks, nThreadsPerBlock>>>(
d_src, d_src + stride, d_dst, d_dst + stride, d_weights, height, stride);
calc_print_elapsed("laplacian vert", start_vert);
// calc_print_elapsed("laplacian vert", start_vert);

}

Expand Down
7 changes: 5 additions & 2 deletions src/oflow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,10 @@ namespace OFC {
iparams[i].height_pad = iparams[i].height + 2 * _iparams.padding;
iparams[i].curr_lvl = sl;

flow[i] = new float[2 * iparams[i].width * iparams[i].height];
// flow[i] = new float[2 * iparams[i].width * iparams[i].height];
checkCudaErrors(
cudaHostAlloc((void**) &(flow[i]),
2 * iparams[i].width * iparams[i].height * sizeof(float), cudaHostAllocMapped) );
grid[i] = new OFC::PatGridClass(&(iparams[i]), &op);
}

Expand Down Expand Up @@ -147,7 +150,7 @@ namespace OFC {

for (int sl = op.coarsest_scale; sl >= op.finest_scale; --sl) {

delete[] flow[sl - op.finest_scale];
cudaFree(flow[sl - op.finest_scale]);
delete grid[sl - op.finest_scale];

}
Expand Down
12 changes: 2 additions & 10 deletions src/patchgrid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,13 +288,9 @@ namespace OFC {

void PatGridClass::InitializeFromCoarserOF(const float * flow_prev) {

int flow_size = i_params->width * i_params->height / 2;
checkCudaErrors( cudaMemcpy(pDevFlowPrev, flow_prev,
flow_size * sizeof(float), cudaMemcpyHostToDevice) );

gettimeofday(&tv_start, nullptr);

cu::initCoarserOF(pDevFlowPrev, pDevicePatchStates,
cu::initCoarserOF(flow_prev, pDevicePatchStates,
n_patches, i_params);

gettimeofday(&tv_end, nullptr);
Expand Down Expand Up @@ -325,16 +321,12 @@ namespace OFC {
gettimeofday(&tv_start, nullptr);

// Normalize all pixels
cu::normalizeFlow(pDeviceFlowOut, pDeviceWeights, 2 * i_params->width * i_params->height);
cu::normalizeFlow(flowout, pDeviceFlowOut, pDeviceWeights, 2 * i_params->width * i_params->height);

gettimeofday(&tv_end, nullptr);
meanTime += (tv_end.tv_sec - tv_start.tv_sec) * 1000.0f +
(tv_end.tv_usec - tv_start.tv_usec) / 1000.0f;

checkCudaErrors(
cudaMemcpy(flowout, pDeviceFlowOut,
i_params->width * i_params->height * 2 * sizeof(float), cudaMemcpyDeviceToHost) );


}

Expand Down
20 changes: 10 additions & 10 deletions src/refine_variational.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,12 @@ namespace OFC {

copyimage(_I0, I0);
copyimage(_I1, I1);
calc_print_elapsed("refine: flow_sep", start_flow_sep);
// calc_print_elapsed("refine: flow_sep", start_flow_sep);

// Call solver
auto start_solver = now();
RefLevelOF(flow_sep[0], flow_sep[1], I0, I1);
calc_print_elapsed("RefLevelOF [total]", start_solver);
// calc_print_elapsed("RefLevelOF [total]", start_solver);

// Copy flow result back
auto start_copy = now();
Expand All @@ -109,7 +109,7 @@ namespace OFC {

}
}
calc_print_elapsed("refine: copy back", start_copy);
// calc_print_elapsed("refine: copy back", start_copy);

// free FV structs
for (int i = 0; i < noparam; ++i )
Expand Down Expand Up @@ -164,12 +164,12 @@ namespace OFC {
*Ix = color_image_new(width,height), *Iy = color_image_new(width,height), *Iz = color_image_new(width,height), // first order derivatives
*Ixx = color_image_new(width,height), *Ixy = color_image_new(width,height),
*Iyy = color_image_new(width,height), *Ixz = color_image_new(width,height), *Iyz = color_image_new(width,height); // second order derivatives
calc_print_elapsed("RefLevelOF setup", start_setup);
// calc_print_elapsed("RefLevelOF setup", start_setup);

// warp second image
auto start_image_warp = now();
cu::warpImage(w_im2, mask, im2, wx, wy);
calc_print_elapsed("RefLevelOF image_warp", start_image_warp);
// calc_print_elapsed("RefLevelOF image_warp", start_image_warp);

// compute derivatives
auto start_get_derivs = now();
Expand All @@ -180,7 +180,7 @@ namespace OFC {
auto start_image_erase = now();
image_erase(du);
image_erase(dv);
calc_print_elapsed("RefLevelOF image_erase", start_image_erase);
// calc_print_elapsed("RefLevelOF image_erase", start_image_erase);

// initialize uu and vv
memcpy(uu->c1,wx->c1,wx->stride*wx->height*sizeof(float));
Expand Down Expand Up @@ -219,16 +219,16 @@ namespace OFC {
cu::flowUpdate(
uu->c1, vv->c1, wx->c1, wy->c1, du->c1, dv->c1,
height, width, stride);
calc_print_elapsed(("RefLevelOF " + iterStr + " flow update").c_str(), start_flow_update);
// calc_print_elapsed(("RefLevelOF " + iterStr + " flow update").c_str(), start_flow_update);

calc_print_elapsed(("RefLevelOF " + iterStr + " [total]").c_str(), start_iteration);
// calc_print_elapsed(("RefLevelOF " + iterStr + " [total]").c_str(), start_iteration);

}
// add flow increment to current flow
auto start_increment_flow = now();
memcpy(wx->c1,uu->c1,uu->stride*uu->height*sizeof(float));
memcpy(wy->c1,vv->c1,vv->stride*vv->height*sizeof(float));
calc_print_elapsed("RefLevelOF increment flow", start_increment_flow);
// calc_print_elapsed("RefLevelOF increment flow", start_increment_flow);

// free memory
auto start_cleanup = now();
Expand All @@ -242,7 +242,7 @@ namespace OFC {
color_image_delete(w_im2);
color_image_delete(Ix); color_image_delete(Iy); color_image_delete(Iz);
color_image_delete(Ixx); color_image_delete(Ixy); color_image_delete(Iyy); color_image_delete(Ixz); color_image_delete(Iyz);
calc_print_elapsed("RefLevelOF cleanup", start_cleanup);
// calc_print_elapsed("RefLevelOF cleanup", start_cleanup);
}


Expand Down
9 changes: 7 additions & 2 deletions src/run_dense.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,10 +278,15 @@ int main( int argc, char** argv ) {

// Run main optical flow / depth algorithm
float scale_fact = pow(2, op.finest_scale);
cv::Mat flow_mat(iparams.height / scale_fact , iparams.width / scale_fact, CV_32FC2); // Optical Flow
float* outflow;
checkCudaErrors(
cudaHostAlloc((void**) &(outflow), 2 * iparams.height / scale_fact
* iparams.width / scale_fact * sizeof(float), cudaHostAllocMapped) );

ofc.calc(I0, I1, iparams, nullptr, (float*) flow_mat.data);
ofc.calc(I0, I1, iparams, nullptr, outflow);

cv::Mat flow_mat(iparams.height / scale_fact , iparams.width / scale_fact,
CV_32FC2, outflow);

if (op.verbosity > 1) gettimeofday(&start_time, NULL);

Expand Down

0 comments on commit 9e59020

Please sign in to comment.