Skip to content

Commit

Permalink
Batch-fy extract patch (not the mean normal)
Browse files Browse the repository at this point in the history
  • Loading branch information
Ashwin Sekar committed May 10, 2017
1 parent 3fea698 commit 0491dcf
Show file tree
Hide file tree
Showing 6 changed files with 164 additions and 39 deletions.
62 changes: 50 additions & 12 deletions src/kernels/extract.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,42 @@ __global__ void kernelExtractPatch(
const float* I0, const float* I0x, const float* I0y, int patch_offset,
int patchSize, int width_pad) {

int patchIdx = threadIdx.x + blockIdx.x * patchSize;
int imgIdx = patch_offset + threadIdx.x + blockIdx.x * width_pad;
int patchIdx = threadIdx.x + blockIdx.x * 3 * patchSize;
int imgIdx = 3 * patch_offset + threadIdx.x + blockIdx.x * 3 * width_pad;

pDevicePatch[3 * patchIdx] = I0[3 * imgIdx];
pDevicePatchX[3 * patchIdx] = I0x[3 * imgIdx];
pDevicePatchY[3 * patchIdx] = I0y[3 * imgIdx];
pDevicePatch[patchIdx] = I0[imgIdx];
pDevicePatchX[patchIdx] = I0x[imgIdx];
pDevicePatchY[patchIdx] = I0y[imgIdx];

}


__global__ void kernelExtractPatches(
float** patches, float** patchxs, float** patchys,
const float * I0, const float * I0x, const float * I0y,
float* midpointX, float* midpointY, int padding,
int patch_size, int width_pad) {

pDevicePatch[3 * patchIdx + 1] = I0[3 * imgIdx + 1];
pDevicePatchX[3 * patchIdx + 1] = I0x[3 * imgIdx + 1];
pDevicePatchY[3 * patchIdx + 1] = I0y[3 * imgIdx + 1];

pDevicePatch[3 * patchIdx + 2] = I0[3 * imgIdx + 2];
pDevicePatchX[3 * patchIdx + 2] = I0x[3 * imgIdx + 2];
pDevicePatchY[3 * patchIdx + 2] = I0y[3 * imgIdx + 2];
int patchId = blockIdx.x;
int tid = threadIdx.x;
float* patch = patches[patchId];
float* patchX = patchxs[patchId];
float* patchY = patchys[patchId];

int x = round(midpointX[patchId]) + padding;
int y = round(midpointY[patchId]) + padding;

int lb = -patch_size / 2;
int offset = 3 * ((x + lb) + (y + lb) * width_pad) + tid;

for (int i = tid, j = offset; i < patch_size * patch_size * 3;
i += 3 * patch_size, j += 3 * width_pad) {
patch[i] = I0[j];
patchX[i] = I0x[j];
patchY[i] = I0y[j];
}

}


Expand All @@ -48,7 +70,7 @@ namespace cu {
int patchSize, int width_pad) {

int nBlocks = patchSize;
int nThreadsPerBlock = patchSize;
int nThreadsPerBlock = 3 * patchSize;

kernelExtractPatch<<<nBlocks, nThreadsPerBlock>>>(
pDevicePatch, pDevicePatchX, pDevicePatchY,
Expand All @@ -57,4 +79,20 @@ namespace cu {

}


void extractPatches(float** patches, float** patchxs, float** patchys,
const float * I0, const float * I0x, const float * I0y,
float* midpointX, float* midpointY, int n_patches,
const opt_params* op, const img_params* i_params) {

int nBlocks = n_patches;
int nThreadsPerBlock = 3 * op->patch_size;

kernelExtractPatches<<<nBlocks, nThreadsPerBlock>>>(
patches, patchxs, patchys,
I0, I0x, I0y, midpointX, midpointY,
i_params->padding, op->patch_size, i_params->width_pad);

}

}
9 changes: 9 additions & 0 deletions src/kernels/extract.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <chrono>
#include <string>
#include <stdexcept>
#include <vector>

// CUDA
#include <cuda.h>
Expand All @@ -20,6 +21,9 @@
#include "../common/Exceptions.h"
#include "../common/timer.h"
#include "../sandbox/process.h"
#include "../patch.h"

using namespace OFC;

namespace cu {

Expand All @@ -28,6 +32,11 @@ namespace cu {
const float* I0, const float* I0x, const float* I0y, int patch_offset,
int patch_size, int width_pad);

void extractPatches(float** patches, float** patchxs, float** patchys,
const float * I0, const float * I0x, const float * I0y,
float* midpointX, float* midpointY, int n_patches,
const opt_params* op, const img_params* i_params);

}

#endif // end __KERNEL_EXTRACT_H__
31 changes: 16 additions & 15 deletions src/patch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,14 @@ namespace OFC {
checkCudaErrors(
cudaMalloc ((void**) &pDeviceWeights, 4 * sizeof(float)) );


// Timing
extractTime = 0;
hessianTime = 0;
projectionTime = 0;
costTime = 0;
interpolateTime = 0;
meanTime = 0;

extractCalls = 0;
hessianCalls = 0;
projectionCalls = 0;
costCalls = 0;
Expand All @@ -90,12 +89,19 @@ namespace OFC {

}

void PatClass::InitializePatch(const float * _I0,
const float * _I0x, const float * _I0y, const Eigen::Vector2f _midpoint) {
// void PatClass::InitializePatch(const float * _I0,
// const float * _I0x, const float * _I0y, const Eigen::Vector2f _midpoint) {
void PatClass::InitializePatch(float * _patch,
float * _patchx, float * _patchy,
const Eigen::Vector2f _midpoint) {

// I0 = _I0;
// I0x = _I0x;
// I0y = _I0y;

I0 = _I0;
I0x = _I0x;
I0y = _I0y;
pDevicePatch = _patch;
pDevicePatchX = _patchx;
pDevicePatchY = _patchy;

midpoint = _midpoint;

Expand All @@ -122,6 +128,7 @@ namespace OFC {
p_state->hessian(1,0) = p_state->hessian(0,1);

gettimeofday(&tv_end, nullptr);

hessianTime += (tv_end.tv_sec - tv_start.tv_sec) * 1000.0f +
(tv_end.tv_usec - tv_start.tv_usec) / 1000.0f;
hessianCalls++;
Expand Down Expand Up @@ -325,15 +332,9 @@ namespace OFC {
int lb = -op->patch_size / 2;
int patch_offset = (x + lb) + (y + lb) * i_params->width_pad;

gettimeofday(&tv_start, nullptr);
// Extract patch
cu::extractPatch(pDevicePatch, pDevicePatchX, pDevicePatchY,
I0, I0x, I0y, patch_offset, op->patch_size, i_params->width_pad);

gettimeofday(&tv_end, nullptr);
extractTime += (tv_end.tv_sec - tv_start.tv_sec) * 1000.0f +
(tv_end.tv_usec - tv_start.tv_usec) / 1000.0f;
extractCalls++;
/*cu::extractPatch(pDevicePatch, pDevicePatchX, pDevicePatchY,
I0, I0x, I0y, patch_offset, op->patch_size, i_params->width_pad);*/

gettimeofday(&tv_start, nullptr);
// Mean Normalization
Expand Down
11 changes: 7 additions & 4 deletions src/patch.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,11 @@ namespace OFC {

~PatClass();

void InitializePatch(const float * _I0, const float * _I0x,
const float * _I0y, const Eigen::Vector2f _midpoint);
// void InitializePatch(const float * _I0, const float * _I0x,
// const float * _I0y, const Eigen::Vector2f _midpoint);
void InitializePatch(float * _patch,
float * _patchx, float* _patchy,
const Eigen::Vector2f _midpoint);
void SetTargetImage(const float * _I1);

void OptimizeIter(const Eigen::Vector2f p_prev);
Expand All @@ -62,8 +65,8 @@ namespace OFC {
inline const int GetPatchId() const { return patch_id; }

struct timeval tv_start, tv_end;
double extractTime, hessianTime, projectionTime, costTime, interpolateTime, meanTime;
int extractCalls, hessianCalls, projectionCalls, costCalls, interpolateCalls, meanCalls;
double hessianTime, projectionTime, costTime, interpolateTime, meanTime;
int hessianCalls, projectionCalls, costCalls, interpolateCalls, meanCalls;

private:

Expand Down
83 changes: 75 additions & 8 deletions src/patchgrid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "common/cuda_helper.h"
#include "common/timer.h"
#include "kernels/densify.h"
#include "kernels/extract.h"

#include <stdio.h>

Expand Down Expand Up @@ -47,6 +48,9 @@ namespace OFC {
p_init.resize(n_patches);
patches.reserve(n_patches);

midpointX_host = new float[n_patches];
midpointY_host = new float[n_patches];

int patch_id = 0;
for (int x = 0; x < n_patches_width; ++x) {
for (int y = 0; y < n_patches_height; ++y) {
Expand All @@ -55,6 +59,8 @@ namespace OFC {

midpoints_ref[i][0] = x * steps + offsetw;
midpoints_ref[i][1] = y * steps + offseth;
midpointX_host[i] = x * steps + offsetw;
midpointY_host[i] = y * steps + offseth;
p_init[i].setZero();

patches.push_back(new OFC::PatClass(i_params, op, patch_id));
Expand All @@ -63,19 +69,75 @@ namespace OFC {
}
}

// Midpoint
checkCudaErrors(
cudaMalloc ((void**) &pDeviceMidpointX, n_patches * sizeof(float)) );
checkCudaErrors(
cudaMalloc ((void**) &pDeviceMidpointY, n_patches * sizeof(float)) );
checkCudaErrors( cudaMemcpy(pDeviceMidpointX, midpointX_host,
n_patches * sizeof(float), cudaMemcpyHostToDevice) );
checkCudaErrors( cudaMemcpy(pDeviceMidpointY, midpointY_host,
n_patches * sizeof(float), cudaMemcpyHostToDevice) );

// Aggregate flow
checkCudaErrors(
cudaMalloc ((void**) &pDeviceWeights, i_params->width * i_params->height * sizeof(float)) );
checkCudaErrors(
cudaMalloc ((void**) &pDeviceFlowOut, i_params->width * i_params->height * 2 * sizeof(float)) );

// Patches
checkCudaErrors(
cudaMalloc((void**) &pDevicePatches, n_patches * sizeof(float*)) );
checkCudaErrors(
cudaMalloc((void**) &pDevicePatchXs, n_patches * sizeof(float*)) );
checkCudaErrors(
cudaMalloc((void**) &pDevicePatchYs, n_patches * sizeof(float*)) );

pHostDevicePatches = new float*[n_patches];
pHostDevicePatchXs = new float*[n_patches];
pHostDevicePatchYs = new float*[n_patches];
for (int i = 0; i < n_patches; i++) {
checkCudaErrors(
cudaMalloc((void**) &pHostDevicePatches[i], op->n_vals * sizeof(float)) );
checkCudaErrors(
cudaMalloc((void**) &pHostDevicePatchXs[i], op->n_vals * sizeof(float)) );
checkCudaErrors(
cudaMalloc((void**) &pHostDevicePatchYs[i], op->n_vals * sizeof(float)) );
}

checkCudaErrors( cudaMemcpy(pDevicePatches, pHostDevicePatches,
n_patches * sizeof(float*), cudaMemcpyHostToDevice) );
checkCudaErrors( cudaMemcpy(pDevicePatchXs, pHostDevicePatchXs,
n_patches * sizeof(float*), cudaMemcpyHostToDevice) );
checkCudaErrors( cudaMemcpy(pDevicePatchYs, pHostDevicePatchYs,
n_patches * sizeof(float*), cudaMemcpyHostToDevice) );

aggregateTime = 0.0;
meanTime = 0.0;
extractTime = 0.0;
}

PatGridClass::~PatGridClass() {

for (int i = 0; i < n_patches; ++i)
for (int i = 0; i < n_patches; ++i) {
cudaFree(pDevicePatches[i]);
cudaFree(pDevicePatchXs[i]);
cudaFree(pDevicePatchYs[i]);
delete patches[i];
}

cudaFree(pDevicePatches);
cudaFree(pDevicePatchXs);
cudaFree(pDevicePatchYs);

delete pHostDevicePatches;
delete pHostDevicePatchXs;
delete pHostDevicePatchYs;

delete midpointX_host;
delete midpointY_host;
cudaFree(pDeviceMidpointX);
cudaFree(pDeviceMidpointY);

}

Expand All @@ -85,8 +147,16 @@ namespace OFC {
I0x = _I0x;
I0y = _I0y;

gettimeofday(&tv_start, nullptr);
cu::extractPatches(pDevicePatches, pDevicePatchXs, pDevicePatchYs,
I0, I0x, I0y, pDeviceMidpointX, pDeviceMidpointY, n_patches, op, i_params);
gettimeofday(&tv_end, nullptr);
extractTime += (tv_end.tv_sec - tv_start.tv_sec) * 1000.0f +
(tv_end.tv_usec - tv_start.tv_usec) / 1000.0f;

for (int i = 0; i < n_patches; ++i) {
patches[i]->InitializePatch(I0, I0x, I0y, midpoints_ref[i]);
patches[i]->InitializePatch(pHostDevicePatches[i],
pHostDevicePatchXs[i], pHostDevicePatchYs[i], midpoints_ref[i]);
p_init[i].setZero();
}

Expand Down Expand Up @@ -171,22 +241,20 @@ namespace OFC {

void PatGridClass::printTimings() {

double tot_extractTime = 0, tot_hessianTime = 0,
double tot_hessianTime = 0,
tot_projectionTime = 0, tot_costTime = 0,
tot_interpolateTime = 0, tot_meanTime = 0;
int tot_extractCalls = 0, tot_hessianCalls = 0,
int tot_hessianCalls = 0,
tot_projectionCalls = 0, tot_costCalls = 0,
tot_interpolateCalls = 0, tot_meanCalls = 0;

for (auto & element : patches) {
tot_extractTime += element->extractTime;
tot_hessianTime += element->hessianTime;
tot_projectionTime += element->projectionTime;
tot_costTime += element->costTime;
tot_interpolateTime += element->interpolateTime;
tot_meanTime += element->meanTime;

tot_extractCalls += element->extractCalls;
tot_hessianCalls += element->hessianCalls;
tot_projectionCalls += element->projectionCalls;
tot_costCalls += element->costCalls;
Expand All @@ -197,8 +265,6 @@ namespace OFC {
cout << endl;
cout << "===============Timings (ms)===============" << endl;
cout << "Avg grad descent iterations: " << float(tot_costCalls) / float(n_patches) << endl;
cout << "[extract] " << tot_extractTime;
cout << " tot => " << tot_extractTime / tot_extractCalls << " avg" << endl;
cout << "[hessian] " << tot_hessianTime;
cout << " tot => " << tot_hessianTime / tot_hessianCalls << " avg" << endl;
cout << "[project] " << tot_projectionTime;
Expand All @@ -209,6 +275,7 @@ namespace OFC {
cout << " tot => " << tot_interpolateTime / tot_interpolateCalls << " avg" << endl;
cout << "[mean norm] " << tot_meanTime;
cout << " tot => " << tot_meanTime / tot_meanCalls << " avg" << endl;
cout << "[extract] " << extractTime << endl;
cout << "[aggregate] " << aggregateTime << endl;
cout << "[flow norm] " << meanTime << endl;
cout << "==========================================" << endl;
Expand Down
7 changes: 7 additions & 0 deletions src/patchgrid.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ namespace OFC {

float* pDeviceWeights, *pDeviceFlowOut;

float** pDevicePatches, ** pDevicePatchXs, ** pDevicePatchYs;
float** pHostDevicePatches, **pHostDevicePatchXs, **pHostDevicePatchYs;
float* pDeviceMidpointX, * pDeviceMidpointY;

const img_params* i_params;
const opt_params* op;

Expand All @@ -56,7 +60,10 @@ namespace OFC {
struct timeval tv_start, tv_end;
double aggregateTime;
double meanTime;
double extractTime;

float* midpointX_host;
float* midpointY_host;
std::vector<OFC::PatClass*> patches; // Patch Objects
std::vector<Eigen::Vector2f> midpoints_ref; // Midpoints for reference patches
std::vector<Eigen::Vector2f> p_init; // starting parameters for query patches
Expand Down

0 comments on commit 0491dcf

Please sign in to comment.