From 2ae3b3bb177230dd0bd8db6f2d7fe8025dcdd954 Mon Sep 17 00:00:00 2001 From: Richard Zhao Date: Wed, 10 May 2017 21:03:37 +0000 Subject: [PATCH] Fix Segfault issue (2D GPU free) --- src/patch.cpp | 8 ++++---- src/patchgrid.cpp | 35 ++++++++++++++++++++--------------- src/patchgrid.h | 1 + 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/patch.cpp b/src/patch.cpp index dbffe0d..8c4864c 100644 --- a/src/patch.cpp +++ b/src/patch.cpp @@ -44,12 +44,12 @@ namespace OFC { patch_x.resize(op->n_vals,1); patch_y.resize(op->n_vals,1); - checkCudaErrors( + /*checkCudaErrors( cudaMalloc ((void**) &pDevicePatch, patch.size() * sizeof(float)) ); checkCudaErrors( cudaMalloc ((void**) &pDevicePatchX, patch_x.size() * sizeof(float)) ); checkCudaErrors( - cudaMalloc ((void**) &pDevicePatchY, patch_y.size() * sizeof(float)) ); + cudaMalloc ((void**) &pDevicePatchY, patch_y.size() * sizeof(float)) );*/ checkCudaErrors( cudaMalloc ((void**) &pDeviceRawDiff, patch.size() * sizeof(float)) ); checkCudaErrors( @@ -76,9 +76,9 @@ namespace OFC { PatClass::~PatClass() { - cudaFree(pDevicePatch); + /*cudaFree(pDevicePatch); cudaFree(pDevicePatchX); - cudaFree(pDevicePatchY); + cudaFree(pDevicePatchY);*/ cudaFree(pDeviceRawDiff); cudaFree(pDeviceCostDiff); diff --git a/src/patchgrid.cpp b/src/patchgrid.cpp index ad2c216..42338ae 100644 --- a/src/patchgrid.cpp +++ b/src/patchgrid.cpp @@ -104,9 +104,9 @@ namespace OFC { pHostDevicePatchXs = new float*[n_patches]; pHostDevicePatchYs = new float*[n_patches]; - float* pHostDeviceTempXX[n_patches]; - float* pHostDeviceTempXY[n_patches]; - float* pHostDeviceTempYY[n_patches]; + pHostDeviceTempXX = new float*[n_patches]; + pHostDeviceTempXY = new float*[n_patches]; + pHostDeviceTempYY = new float*[n_patches]; for (int i = 0; i < n_patches; i++) { checkCudaErrors( @@ -156,13 +156,14 @@ namespace OFC { PatGridClass::~PatGridClass() { for (int i = 0; i < n_patches; ++i) { - cudaFree(pDevicePatches[i]); - cudaFree(pDevicePatchXs[i]); - cudaFree(pDevicePatchYs[i]); - cudaFree(pDeviceTempXX[i]); - cudaFree(pDeviceTempXY[i]); - cudaFree(pDeviceTempYY[i]); + checkCudaErrors( cudaFree(pHostDevicePatches[i]) ); + checkCudaErrors( cudaFree(pHostDevicePatchXs[i]) ); + checkCudaErrors( cudaFree(pHostDevicePatchYs[i]) ); + + cudaFree(pHostDeviceTempXX[i]); + cudaFree(pHostDeviceTempXY[i]); + cudaFree(pHostDeviceTempYY[i]); delete patches[i]; } @@ -175,6 +176,10 @@ namespace OFC { delete pHostDevicePatchXs; delete pHostDevicePatchYs; + delete pHostDeviceTempXX; + delete pHostDeviceTempXY; + delete pHostDeviceTempYY; + delete midpointX_host; delete midpointY_host; cudaFree(pDeviceMidpointX); @@ -263,7 +268,7 @@ namespace OFC { void PatGridClass::AggregateFlowDense(float *flowout) { - bool isValid[n_patches]; + /*bool isValid[n_patches]; float flowXs[n_patches]; float flowYs[n_patches]; float* costs[n_patches]; @@ -295,7 +300,7 @@ namespace OFC { checkCudaErrors( cudaMemcpy(deviceFlowYs, flowYs, n_patches * sizeof(float), cudaMemcpyHostToDevice) ); checkCudaErrors( cudaMemcpy(deviceCosts, costs, - n_patches * sizeof(float*), cudaMemcpyHostToDevice) ); + n_patches * sizeof(float*), cudaMemcpyHostToDevice) );*/ gettimeofday(&tv_start, nullptr); @@ -306,12 +311,12 @@ namespace OFC { checkCudaErrors( cudaMemset (pDeviceFlowOut, 0.0, i_params->width * i_params->height * 2 * sizeof(float)) ); - cu::densifyPatches( + /*cu::densifyPatches( deviceCosts, pDeviceFlowOut, pDeviceWeights, deviceFlowXs, deviceFlowYs, deviceIsValid, pDeviceMidpointX, pDeviceMidpointY, n_patches, - op, i_params); - /*for (int ip = 0; ip < n_patches; ++ip) { + op, i_params);*/ + for (int ip = 0; ip < n_patches; ++ip) { if (patches[ip]->IsValid()) { const Eigen::Vector2f* fl = patches[ip]->GetCurP(); // flow displacement of this patch @@ -326,7 +331,7 @@ namespace OFC { op->patch_size, op->min_errval); } - }*/ + } gettimeofday(&tv_end, nullptr); aggregateTime += (tv_end.tv_sec - tv_start.tv_sec) * 1000.0f + diff --git a/src/patchgrid.h b/src/patchgrid.h index 36881f8..498a1ba 100644 --- a/src/patchgrid.h +++ b/src/patchgrid.h @@ -53,6 +53,7 @@ namespace OFC { // Hessian // TODO: Can we shared memory? float** pDeviceTempXX, ** pDeviceTempXY, ** pDeviceTempYY; + float** pHostDeviceTempXX, **pHostDeviceTempXY, **pHostDeviceTempYY; float* pDeviceH00, * pDeviceH01, * pDeviceH11; float* H00, * H01, * H11;