Skip to content

Commit

Permalink
Finish pipeline but its slow :(
Browse files Browse the repository at this point in the history
  • Loading branch information
Ashwin Sekar committed May 8, 2017
1 parent 4fe277f commit 2c09960
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 39 deletions.
17 changes: 7 additions & 10 deletions src/kernels/interpolate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,24 +19,21 @@
#include "interpolate.h"

__global__ void kernelInterpolatePatch(
float* pDeviceRawDiff, float* pDeviceI, float* weight,
float* pDeviceRawDiff, const float* pDeviceI, float* weight,
int width_pad, int starty, int startx, int patchSize) {

int x = threadIdx.x + startx;
int y = blockIdx.x + starty;
int patchIdx = threadIdx.x + blockIdx.x * patchSize;

if (x < startx + patchSize && y < starty + patchSize) {
float* img_e = pDeviceI + x * 3;
float* img_a = img_e + y * width_pad * 3;
float* img_c = img_e + (y - 1) * width_pad * 3;
float* img_b = img_a - 3;
float* img_d = img_c - 3;
const float* img_e = pDeviceI + x * 3;
const float* img_a = img_e + y * width_pad * 3;
const float* img_c = img_e + (y - 1) * width_pad * 3;
const float* img_b = img_a - 3;
const float* img_d = img_c - 3;

int diff = x * 3 + y * width_pad * 3;
if (diff < 0 || patchIdx >= patchSize * patchSize) {
printf("DEVICE: uh oh. patchIdx %d, x %d, y %d, startx %d, starty %d\n", patchIdx, x, y, startx, starty);
}
pDeviceRawDiff[3 * patchIdx] =
weight[0] * (*img_a) + weight[1] * (*img_b) + weight[2] * (*img_c) + weight[3] * (*img_d);
++img_a; ++img_b; ++img_c; ++img_d;
Expand Down Expand Up @@ -64,7 +61,7 @@ __global__ void kernelNormalizeMean(
namespace cu {

void interpolatePatch(
float* pDeviceRawDiff, float* pDeviceI, float* weight,
float* pDeviceRawDiff, const float* pDeviceI, float* weight,
int width_pad, int starty, int startx, int patchSize) {

int nBlocks = patchSize;
Expand Down
2 changes: 1 addition & 1 deletion src/kernels/interpolate.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
namespace cu {

void interpolatePatch(
float* pDeviceRawDiff, float* pDeviceI, float* weight,
float* pDeviceRawDiff, const float* pDeviceI, float* weight,
int width_pad, int starty, int startx, int patchSize);

void normalizeMean(
Expand Down
25 changes: 1 addition & 24 deletions src/patch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,15 +111,7 @@ namespace OFC {

void PatClass::SetTargetImage(const float * _I1) {

I1 = _I1;

int size = i_params->width_pad * i_params->height_pad * 3;
checkCudaErrors(
cudaMalloc ((void**) &pDeviceI, size * sizeof(float)) );

CUBLAS_CHECK (
cublasSetVector(size, sizeof(float),
I1, 1, pDeviceI, 1) );
pDeviceI = _I1;

ResetPatchState();

Expand Down Expand Up @@ -295,21 +287,6 @@ namespace OFC {
int lb = -op->patch_size / 2;
int patch_offset = 3 * ((x + lb) + (y + lb) * i_params->width_pad);

/*float* pDeviceI0, *pDeviceI0x, *pDeviceI0y;
int size = i_params->width_pad * i_params->height_pad * 3;
checkCudaErrors(
cudaMalloc ((void**) &pDeviceI0, size * sizeof(float)) );
checkCudaErrors(
cudaMalloc ((void**) &pDeviceI0x, size * sizeof(float)) );
checkCudaErrors(
cudaMalloc ((void**) &pDeviceI0y, size * sizeof(float)) );
CUBLAS_CHECK (
cublasSetVector(size, sizeof(float), I0, 1, pDeviceI0, 1) );
CUBLAS_CHECK (
cublasSetVector(size, sizeof(float), I0x, 1, pDeviceI0x, 1) );
CUBLAS_CHECK (
cublasSetVector(size, sizeof(float), I0y, 1, pDeviceI0y, 1) );*/

// Extract patch
checkCudaErrors(
cudaMemcpy2D (pDevicePatch, 3 * op->patch_size * sizeof(float),
Expand Down
3 changes: 1 addition & 2 deletions src/patch.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ namespace OFC {
void InterpolatePatch();


float* pDeviceI;
const float* pDeviceI;

float* pDevicePatch;
float* pDevicePatchX;
Expand All @@ -94,7 +94,6 @@ namespace OFC {
Eigen::Matrix<float, Eigen::Dynamic, 1> patch_y;

const float * I0, * I0x, * I0y;
const float * I1;

const img_params* i_params;
const opt_params* op;
Expand Down
2 changes: 0 additions & 2 deletions src/patchgrid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,6 @@ namespace OFC {

void PatGridClass::AggregateFlowDense(float *flowout) const {

memset(flowout, 0, sizeof(float) * (2 * i_params->width * i_params->height));

// Device mem
checkCudaErrors(
cudaMemset (pDeviceWeights, 0.0, i_params->width * i_params->height * sizeof(float)) );
Expand Down

0 comments on commit 2c09960

Please sign in to comment.