diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1557bcc..04128e1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -102,7 +102,6 @@ set(CODEFILES # FDF1.0.1/image.c FDF1.0.1/image.cpp FDF1.0.1/opticalflow_aux.cpp - FDF1.0.1/solver.cpp ) # GrayScale, Optical Flow diff --git a/src/FDF1.0.1/solver.cpp b/src/FDF1.0.1/solver.cpp deleted file mode 100644 index 1f264b1..0000000 --- a/src/FDF1.0.1/solver.cpp +++ /dev/null @@ -1,381 +0,0 @@ -#include -#include -#include -#include -#include - -#include - -#include "image.h" -#include "solver.h" -#include "../kernels/flowUtil.h" - -// #include -// #if (VECTOR_WIDTH == 4) -// typedef float32x4_t v4sf; -// #else -// typedef float v4sf; -// #endif - -//THIS IS A SLOW VERSION BUT READABLE -//Perform n iterations of the sor_coupled algorithm -//du and dv are used as initial guesses -//The system form is the same as in opticalflow.c -void sor_coupled_slow_but_readable(image_t *du, image_t *dv, image_t *a11, image_t *a12, image_t *a22, const image_t *b1, const image_t *b2, const image_t *dpsis_horiz, const image_t *dpsis_vert, const int iterations, const float omega) -{ - - cu::sor(du->c1, dv->c1, a11->c1, a12->c1, a22->c1, - b1->c1, b2->c1, dpsis_horiz->c1, dpsis_vert->c1, - iterations, omega, du->height, du->width, du->stride); - -} - -// THIS IS A FASTER VERSION BUT UNREADABLE, ONLY OPTICAL FLOW WITHOUT OPENMP PARALLELIZATION -// the first iteration is separated from the other to compute the inverse of the 2x2 block diagonal -// each iteration is split in two first line / middle lines / last line, and the left block is computed separately on each line -// void sor_coupled(image_t *du, image_t *dv, image_t *a11, image_t *a12, image_t *a22, const image_t *b1, const image_t *b2, const image_t *dpsis_horiz, const image_t *dpsis_vert, const int iterations, const float omega){ -// //sor_coupled_slow(du,dv,a11,a12,a22,b1,b2,dpsis_horiz,dpsis_vert,iterations,omega); return; printf("test\n"); -// -// if(du->width<2 || du->height<2 || iterations < 1){ -// sor_coupled_slow_but_readable(du,dv,a11,a12,a22,b1,b2,dpsis_horiz,dpsis_vert,iterations,omega); -// return; -// } -// -// const int stride = du->stride, width = du->width; -// const int iterheight = du->height-1, iterline = (stride)/4, width_minus_1_sizeoffloat = sizeof(float)*(width-1); -// int j,iter,i,k; -// float *floatarray = (float*) memalign(16, stride*sizeof(float)*3); -// if(floatarray==NULL){ -// fprintf(stderr, "error in sor_coupled(): not enough memory\n"); -// exit(1); -// } -// float *f1 = floatarray; -// float *f2 = f1+stride; -// float *f3 = f2+stride; -// f1[0] = 0.0f; -// memset(&f1[width], 0, sizeof(float)*(stride-width)); -// memset(&f2[width-1], 0, sizeof(float)*(stride-width+1)); -// memset(&f3[width-1], 0, sizeof(float)*(stride-width+1)); -// -// { // first iteration -// v4sf *a11p = (v4sf*) a11->c1, *a12p = (v4sf*) a12->c1, *a22p = (v4sf*) a22->c1, *b1p = (v4sf*) b1->c1, *b2p = (v4sf*) b2->c1, *hp = (v4sf*) dpsis_horiz->c1, *vp = (v4sf*) dpsis_vert->c1; -// float *du_ptr = du->c1, *dv_ptr = dv->c1; -// v4sf *dub = (v4sf*) (du_ptr+stride), *dvb = (v4sf*) (dv_ptr+stride); -// -// { // first iteration - first line -// -// memcpy(f1+1, ((float*) hp), width_minus_1_sizeoffloat); -// memcpy(f2, du_ptr+1, width_minus_1_sizeoffloat); -// memcpy(f3, dv_ptr+1, width_minus_1_sizeoffloat); -// v4sf* hpl = (v4sf*) f1, *dur = (v4sf*) f2, *dvr = (v4sf*) f3; -// -// { // left block -// // reverse 2x2 diagonal block -// const v4sf dpsis = (*hpl) + (*hp) + (*vp); -// const v4sf A11 = (*a22p)+dpsis, A22 = (*a11p)+dpsis; -// const v4sf det = A11*A22 - (*a12p)*(*a12p); -// *a11p = A11/det; -// *a22p = A22/det; -// *a12p /= -det; -// // do one iteration -// const v4sf s1 = (*hp)*(*dur) + (*vp)*(*dub) + (*b1p); -// const v4sf s2 = (*hp)*(*dvr) + (*vp)*(*dvb) + (*b2p); -// du_ptr[0] += omega*( a11p[0][0]*s1[0] + a12p[0][0]*s2[0] - du_ptr[0] ); -// dv_ptr[0] += omega*( a12p[0][0]*s1[0] + a22p[0][0]*s2[0] - dv_ptr[0] ); -// for(k=1;k<4;k++){ -// const float B1 = hpl[0][k]*du_ptr[k-1] + s1[k]; -// const float B2 = hpl[0][k]*dv_ptr[k-1] + s2[k]; -// du_ptr[k] += omega*( a11p[0][k]*B1 + a12p[0][k]*B2 - du_ptr[k] ); -// dv_ptr[k] += omega*( a12p[0][k]*B1 + a22p[0][k]*B2 - dv_ptr[k] ); -// } -// // increment pointer -// hpl+=1; hp+=1; vp+=1; a11p+=1; a12p+=1; a22p+=1; -// dur+=1; dvr+=1; dub+=1; dvb +=1; b1p+=1; b2p+=1; -// du_ptr += 4; dv_ptr += 4; -// } -// for(i=iterline;--i;){ -// // reverse 2x2 diagonal block -// const v4sf dpsis = (*hpl) + (*hp) + (*vp); -// const v4sf A11 = (*a22p)+dpsis, A22 = (*a11p)+dpsis; -// const v4sf det = A11*A22 - (*a12p)*(*a12p); -// *a11p = A11/det; -// *a22p = A22/det; -// *a12p /= -det; -// // do one iteration -// const v4sf s1 = (*hp)*(*dur) + (*vp)*(*dub) + (*b1p); -// const v4sf s2 = (*hp)*(*dvr) + (*vp)*(*dvb) + (*b2p); -// for(k=0;k<4;k++){ -// const float B1 = hpl[0][k]*du_ptr[k-1] + s1[k]; -// const float B2 = hpl[0][k]*dv_ptr[k-1] + s2[k]; -// du_ptr[k] += omega*( a11p[0][k]*B1 + a12p[0][k]*B2 - du_ptr[k] ); -// dv_ptr[k] += omega*( a12p[0][k]*B1 + a22p[0][k]*B2 - dv_ptr[k] ); -// } -// // increment pointer -// hpl+=1; hp+=1; vp+=1; a11p+=1; a12p+=1; a22p+=1; -// dur+=1; dvr+=1; dub+=1; dvb +=1; b1p+=1; b2p+=1; -// du_ptr += 4; dv_ptr += 4; -// } -// -// } -// -// v4sf *vpt = (v4sf*) dpsis_vert->c1; -// v4sf *dut = (v4sf*) du->c1, *dvt = (v4sf*) dv->c1; -// -// for(j=iterheight;--j;){ // first iteration - middle lines -// memcpy(f1+1, ((float*) hp), width_minus_1_sizeoffloat); -// memcpy(f2, du_ptr+1, width_minus_1_sizeoffloat); -// memcpy(f3, dv_ptr+1, width_minus_1_sizeoffloat); -// v4sf* hpl = (v4sf*) f1, *dur = (v4sf*) f2, *dvr = (v4sf*) f3; -// -// { // left block -// // reverse 2x2 diagonal block -// const v4sf dpsis = (*hpl) + (*hp) + (*vpt) + (*vp); -// const v4sf A11 = (*a22p)+dpsis, A22 = (*a11p)+dpsis; -// const v4sf det = A11*A22 - (*a12p)*(*a12p); -// *a11p = A11/det; -// *a22p = A22/det; -// *a12p /= -det; -// // do one iteration -// const v4sf s1 = (*hp)*(*dur) + (*vpt)*(*dut) + (*vp)*(*dub) + (*b1p); -// const v4sf s2 = (*hp)*(*dvr) + (*vpt)*(*dvt) + (*vp)*(*dvb) + (*b2p); -// du_ptr[0] += omega*( a11p[0][0]*s1[0] + a12p[0][0]*s2[0] - du_ptr[0] ); -// dv_ptr[0] += omega*( a12p[0][0]*s1[0] + a22p[0][0]*s2[0] - dv_ptr[0] ); -// for(k=1;k<4;k++){ -// const float B1 = hpl[0][k]*du_ptr[k-1] + s1[k]; -// const float B2 = hpl[0][k]*dv_ptr[k-1] + s2[k]; -// du_ptr[k] += omega*( a11p[0][k]*B1 + a12p[0][k]*B2 - du_ptr[k] ); -// dv_ptr[k] += omega*( a12p[0][k]*B1 + a22p[0][k]*B2 - dv_ptr[k] ); -// } -// // increment pointer -// hpl+=1; hp+=1; vpt+=1; vp+=1; a11p+=1; a12p+=1; a22p+=1; -// dur+=1; dvr+=1; dut+=1; dvt+=1; dub+=1; dvb +=1; b1p+=1; b2p+=1; -// du_ptr += 4; dv_ptr += 4; -// } -// for(i=iterline;--i;){ -// // reverse 2x2 diagonal block -// const v4sf dpsis = (*hpl) + (*hp) + (*vpt) + (*vp); -// const v4sf A11 = (*a22p)+dpsis, A22 = (*a11p)+dpsis; -// const v4sf det = A11*A22 - (*a12p)*(*a12p); -// *a11p = A11/det; -// *a22p = A22/det; -// *a12p /= -det; -// // do one iteration -// const v4sf s1 = (*hp)*(*dur) + (*vpt)*(*dut) + (*vp)*(*dub) + (*b1p); -// const v4sf s2 = (*hp)*(*dvr) + (*vpt)*(*dvt) + (*vp)*(*dvb) + (*b2p); -// for(k=0;k<4;k++){ -// const float B1 = hpl[0][k]*du_ptr[k-1] + s1[k]; -// const float B2 = hpl[0][k]*dv_ptr[k-1] + s2[k]; -// du_ptr[k] += omega*( a11p[0][k]*B1 + a12p[0][k]*B2 - du_ptr[k] ); -// dv_ptr[k] += omega*( a12p[0][k]*B1 + a22p[0][k]*B2 - dv_ptr[k] ); -// } -// // increment pointer -// hpl+=1; hp+=1; vpt+=1; vp+=1; a11p+=1; a12p+=1; a22p+=1; -// dur+=1; dvr+=1; dut+=1; dvt+=1; dub+=1; dvb +=1; b1p+=1; b2p+=1; -// du_ptr += 4; dv_ptr += 4; -// } -// -// } -// -// { // first iteration - last line -// memcpy(f1+1, ((float*) hp), width_minus_1_sizeoffloat); -// memcpy(f2, du_ptr+1, width_minus_1_sizeoffloat); -// memcpy(f3, dv_ptr+1, width_minus_1_sizeoffloat); -// v4sf* hpl = (v4sf*) f1, *dur = (v4sf*) f2, *dvr = (v4sf*) f3; -// -// { // left block -// // reverse 2x2 diagonal block -// const v4sf dpsis = (*hpl) + (*hp) + (*vpt); -// const v4sf A11 = (*a22p)+dpsis, A22 = (*a11p)+dpsis; -// const v4sf det = A11*A22 - (*a12p)*(*a12p); -// *a11p = A11/det; -// *a22p = A22/det; -// *a12p /= -det; -// // do one iteration -// const v4sf s1 = (*hp)*(*dur) + (*vpt)*(*dut) + (*b1p); -// const v4sf s2 = (*hp)*(*dvr) + (*vpt)*(*dvt) + (*b2p); -// du_ptr[0] += omega*( a11p[0][0]*s1[0] + a12p[0][0]*s2[0] - du_ptr[0] ); -// dv_ptr[0] += omega*( a12p[0][0]*s1[0] + a22p[0][0]*s2[0] - dv_ptr[0] ); -// for(k=1;k<4;k++){ -// const float B1 = hpl[0][k]*du_ptr[k-1] + s1[k]; -// const float B2 = hpl[0][k]*dv_ptr[k-1] + s2[k]; -// du_ptr[k] += omega*( a11p[0][k]*B1 + a12p[0][k]*B2 - du_ptr[k] ); -// dv_ptr[k] += omega*( a12p[0][k]*B1 + a22p[0][k]*B2 - dv_ptr[k] ); -// } -// // increment pointer -// hpl+=1; hp+=1; vpt+=1; a11p+=1; a12p+=1; a22p+=1; -// dur+=1; dvr+=1; dut+=1; dvt+=1; b1p+=1; b2p+=1; -// du_ptr += 4; dv_ptr += 4; -// } -// for(i=iterline;--i;){ -// // reverse 2x2 diagonal block -// const v4sf dpsis = (*hpl) + (*hp) + (*vpt); -// const v4sf A11 = (*a22p)+dpsis, A22 = (*a11p)+dpsis; -// const v4sf det = A11*A22 - (*a12p)*(*a12p); -// *a11p = A11/det; -// *a22p = A22/det; -// *a12p /= -det; -// // do one iteration -// const v4sf s1 = (*hp)*(*dur) + (*vpt)*(*dut) + (*b1p); -// const v4sf s2 = (*hp)*(*dvr) + (*vpt)*(*dvt) + (*b2p); -// for(k=0;k<4;k++){ -// const float B1 = hpl[0][k]*du_ptr[k-1] + s1[k]; -// const float B2 = hpl[0][k]*dv_ptr[k-1] + s2[k]; -// du_ptr[k] += omega*( a11p[0][k]*B1 + a12p[0][k]*B2 - du_ptr[k] ); -// dv_ptr[k] += omega*( a12p[0][k]*B1 + a22p[0][k]*B2 - dv_ptr[k] ); -// } -// // increment pointer -// hpl+=1; hp+=1; vpt+=1; a11p+=1; a12p+=1; a22p+=1; -// dur+=1; dvr+=1; dut+=1; dvt+=1; b1p+=1; b2p+=1; -// du_ptr += 4; dv_ptr += 4; -// } -// -// } -// } -// -// -// -// for(iter=iterations;--iter;) // other iterations -// { -// v4sf *a11p = (v4sf*) a11->c1, *a12p = (v4sf*) a12->c1, *a22p = (v4sf*) a22->c1, *b1p = (v4sf*) b1->c1, *b2p = (v4sf*) b2->c1, *hp = (v4sf*) dpsis_horiz->c1, *vp = (v4sf*) dpsis_vert->c1; -// float *du_ptr = du->c1, *dv_ptr = dv->c1; -// v4sf *dub = (v4sf*) (du_ptr+stride), *dvb = (v4sf*) (dv_ptr+stride); -// -// { // other iteration - first line -// -// memcpy(f1+1, ((float*) hp), width_minus_1_sizeoffloat); -// memcpy(f2, du_ptr+1, width_minus_1_sizeoffloat); -// memcpy(f3, dv_ptr+1, width_minus_1_sizeoffloat); -// v4sf* hpl = (v4sf*) f1, *dur = (v4sf*) f2, *dvr = (v4sf*) f3; -// -// { // left block -// // do one iteration -// const v4sf s1 = (*hp)*(*dur) + (*vp)*(*dub) + (*b1p); -// const v4sf s2 = (*hp)*(*dvr) + (*vp)*(*dvb) + (*b2p); -// du_ptr[0] += omega*( a11p[0][0]*s1[0] + a12p[0][0]*s2[0] - du_ptr[0] ); -// dv_ptr[0] += omega*( a12p[0][0]*s1[0] + a22p[0][0]*s2[0] - dv_ptr[0] ); -// for(k=1;k<4;k++){ -// const float B1 = hpl[0][k]*du_ptr[k-1] + s1[k]; -// const float B2 = hpl[0][k]*dv_ptr[k-1] + s2[k]; -// du_ptr[k] += omega*( a11p[0][k]*B1 + a12p[0][k]*B2 - du_ptr[k] ); -// dv_ptr[k] += omega*( a12p[0][k]*B1 + a22p[0][k]*B2 - dv_ptr[k] ); -// } -// // increment pointer -// hpl+=1; hp+=1; vp+=1; a11p+=1; a12p+=1; a22p+=1; -// dur+=1; dvr+=1; dub+=1; dvb +=1; b1p+=1; b2p+=1; -// du_ptr += 4; dv_ptr += 4; -// } -// for(i=iterline;--i;){ -// // do one iteration -// const v4sf s1 = (*hp)*(*dur) + (*vp)*(*dub) + (*b1p); -// const v4sf s2 = (*hp)*(*dvr) + (*vp)*(*dvb) + (*b2p); -// for(k=0;k<4;k++){ -// const float B1 = hpl[0][k]*du_ptr[k-1] + s1[k]; -// const float B2 = hpl[0][k]*dv_ptr[k-1] + s2[k]; -// du_ptr[k] += omega*( a11p[0][k]*B1 + a12p[0][k]*B2 - du_ptr[k] ); -// dv_ptr[k] += omega*( a12p[0][k]*B1 + a22p[0][k]*B2 - dv_ptr[k] ); -// } -// // increment pointer -// hpl+=1; hp+=1; vp+=1; a11p+=1; a12p+=1; a22p+=1; -// dur+=1; dvr+=1; dub+=1; dvb +=1; b1p+=1; b2p+=1; -// du_ptr += 4; dv_ptr += 4; -// } -// -// } -// -// v4sf *vpt = (v4sf*) dpsis_vert->c1; -// v4sf *dut = (v4sf*) du->c1, *dvt = (v4sf*) dv->c1; -// -// for(j=iterheight;--j;) // other iteration - middle lines -// { -// memcpy(f1+1, ((float*) hp), width_minus_1_sizeoffloat); -// memcpy(f2, du_ptr+1, width_minus_1_sizeoffloat); -// memcpy(f3, dv_ptr+1, width_minus_1_sizeoffloat); -// v4sf* hpl = (v4sf*) f1, *dur = (v4sf*) f2, *dvr = (v4sf*) f3; -// -// { // left block -// // do one iteration -// const v4sf s1 = (*hp)*(*dur) + (*vpt)*(*dut) + (*vp)*(*dub) + (*b1p); -// const v4sf s2 = (*hp)*(*dvr) + (*vpt)*(*dvt) + (*vp)*(*dvb) + (*b2p); -// du_ptr[0] += omega*( a11p[0][0]*s1[0] + a12p[0][0]*s2[0] - du_ptr[0] ); -// dv_ptr[0] += omega*( a12p[0][0]*s1[0] + a22p[0][0]*s2[0] - dv_ptr[0] ); -// for(k=1;k<4;k++) -// { -// const float B1 = hpl[0][k]*du_ptr[k-1] + s1[k]; -// const float B2 = hpl[0][k]*dv_ptr[k-1] + s2[k]; -// du_ptr[k] += omega*( a11p[0][k]*B1 + a12p[0][k]*B2 - du_ptr[k] ); -// dv_ptr[k] += omega*( a12p[0][k]*B1 + a22p[0][k]*B2 - dv_ptr[k] ); -// } -// // increment pointer -// hpl+=1; hp+=1; vpt+=1; vp+=1; a11p+=1; a12p+=1; a22p+=1; -// dur+=1; dvr+=1; dut+=1; dvt+=1; dub+=1; dvb +=1; b1p+=1; b2p+=1; -// du_ptr += 4; dv_ptr += 4; -// } -// -// for(i=iterline; --i;) -// { -// // do one iteration -// const v4sf s1 = (*hp)*(*dur) + (*vpt)*(*dut) + (*vp)*(*dub) + (*b1p); -// const v4sf s2 = (*hp)*(*dvr) + (*vpt)*(*dvt) + (*vp)*(*dvb) + (*b2p); -// for(k=0;k<4;k++) -// { -// const float B1 = hpl[0][k]*du_ptr[k-1] + s1[k]; -// const float B2 = hpl[0][k]*dv_ptr[k-1] + s2[k]; -// du_ptr[k] += omega*( a11p[0][k]*B1 + a12p[0][k]*B2 - du_ptr[k] ); -// dv_ptr[k] += omega*( a12p[0][k]*B1 + a22p[0][k]*B2 - dv_ptr[k] ); -// } -// // increment pointer -// hpl+=1; hp+=1; vpt+=1; vp+=1; a11p+=1; a12p+=1; a22p+=1; -// dur+=1; dvr+=1; dut+=1; dvt+=1; dub+=1; dvb +=1; b1p+=1; b2p+=1; -// du_ptr += 4; dv_ptr += 4; -// } -// -// } -// -// { // other iteration - last line -// memcpy(f1+1, ((float*) hp), width_minus_1_sizeoffloat); -// memcpy(f2, du_ptr+1, width_minus_1_sizeoffloat); -// memcpy(f3, dv_ptr+1, width_minus_1_sizeoffloat); -// v4sf* hpl = (v4sf*) f1, *dur = (v4sf*) f2, *dvr = (v4sf*) f3; -// -// { // left block -// // do one iteration -// const v4sf s1 = (*hp)*(*dur) + (*vpt)*(*dut) + (*b1p); -// const v4sf s2 = (*hp)*(*dvr) + (*vpt)*(*dvt) + (*b2p); -// du_ptr[0] += omega*( a11p[0][0]*s1[0] + a12p[0][0]*s2[0] - du_ptr[0] ); -// dv_ptr[0] += omega*( a12p[0][0]*s1[0] + a22p[0][0]*s2[0] - dv_ptr[0] ); -// for(k=1;k<4;k++){ -// const float B1 = hpl[0][k]*du_ptr[k-1] + s1[k]; -// const float B2 = hpl[0][k]*dv_ptr[k-1] + s2[k]; -// du_ptr[k] += omega*( a11p[0][k]*B1 + a12p[0][k]*B2 - du_ptr[k] ); -// dv_ptr[k] += omega*( a12p[0][k]*B1 + a22p[0][k]*B2 - dv_ptr[k] ); -// } -// // increment pointer -// hpl+=1; hp+=1; vpt+=1; a11p+=1; a12p+=1; a22p+=1; -// dur+=1; dvr+=1; dut+=1; dvt+=1; b1p+=1; b2p+=1; -// du_ptr += 4; dv_ptr += 4; -// } -// for(i=iterline;--i;){ -// // do one iteration -// const v4sf s1 = (*hp)*(*dur) + (*vpt)*(*dut) + (*b1p); -// const v4sf s2 = (*hp)*(*dvr) + (*vpt)*(*dvt) + (*b2p); -// for(k=0;k<4;k++){ -// const float B1 = hpl[0][k]*du_ptr[k-1] + s1[k]; -// const float B2 = hpl[0][k]*dv_ptr[k-1] + s2[k]; -// du_ptr[k] += omega*( a11p[0][k]*B1 + a12p[0][k]*B2 - du_ptr[k] ); -// dv_ptr[k] += omega*( a12p[0][k]*B1 + a22p[0][k]*B2 - dv_ptr[k] ); -// } -// // increment pointer -// hpl+=1; hp+=1; vpt+=1; a11p+=1; a12p+=1; a22p+=1; -// dur+=1; dvr+=1; dut+=1; dvt+=1; b1p+=1; b2p+=1; -// du_ptr += 4; dv_ptr += 4; -// } -// -// } -// } -// -// -// -// free(floatarray); -// -// } - diff --git a/src/FDF1.0.1/solver.h b/src/FDF1.0.1/solver.h deleted file mode 100644 index ee75e7a..0000000 --- a/src/FDF1.0.1/solver.h +++ /dev/null @@ -1,17 +0,0 @@ -#include -#include - -#include "image.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Perform n iterations of the sor_coupled algorithm for a system of the form as described in opticalflow.c -void sor_coupled(image_t *du, image_t *dv, image_t *a11, image_t *a12, image_t *a22, const image_t *b1, const image_t *b2, const image_t *dpsis_horiz, const image_t *dpsis_vert, const int iterations, const float omega); - -void sor_coupled_slow_but_readable(image_t *du, image_t *dv, image_t *a11, image_t *a12, image_t *a22, const image_t *b1, const image_t *b2, const image_t *dpsis_horiz, const image_t *dpsis_vert, const int iterations, const float omega); - -#ifdef __cplusplus -} -#endif diff --git a/src/refine_variational.cpp b/src/refine_variational.cpp index 41d7b46..7b795a3 100644 --- a/src/refine_variational.cpp +++ b/src/refine_variational.cpp @@ -47,11 +47,6 @@ namespace OFC { vr.tmp_half_delta_over3 = vr.delta * 0.5f / 3.0f; vr.tmp_half_beta = vr.beta * 0.5f; - float deriv_filter[3] = {0.0f, -8.0f / 12.0f, 1.0f / 12.0f}; - deriv = convolution_new(2, deriv_filter, 0); - float deriv_filter_flow[2] = {0.0f, -0.5f}; - deriv_flow = convolution_new(1, deriv_filter_flow, 0); - float pHostColorDerivativeKernel[5] = { 1.0 / 12.0, 2.0 / 3.0, 0.0, -2.0 / 3.0, 1.0 / 12.0 }; checkCudaErrors( cudaMalloc((void**) &pDeviceColorDerivativeKernel, 5 * sizeof(float)) ); checkCudaErrors( @@ -117,10 +112,6 @@ namespace OFC { for (int i = 0; i < noparam; ++i ) image_delete(flow_sep[i]); - convolution_delete(deriv); - convolution_delete(deriv_flow); - - color_image_delete(I0); color_image_delete(I1); @@ -213,13 +204,11 @@ namespace OFC { calc_print_elapsed(("RefLevelOF " + iterStr + " laplacian").c_str(), start_lapalcian); // solve system -// #ifdef WITH_OPENMP auto start_sor = now(); - sor_coupled_slow_but_readable(du, dv, a11, a12, a22, b1, b2, smooth_horiz, smooth_vert, vr.solve_iter, vr.sor_omega); // slower but parallelized + cu::sor(du->c1, dv->c1, a11->c1, a12->c1, a22->c1, + b1->c1, b2->c1, smooth_horiz->c1, smooth_vert->c1, + vr.solve_iter, vr.sor_omega, du->height, du->width, du->stride); calc_print_elapsed(("RefLevelOF " + iterStr + " sor").c_str(), start_sor); -// #else -// sor_coupled(du, dv, a11, a12, a22, b1, b2, smooth_horiz, smooth_vert, vr.solve_iter, vr.sor_omega); -// #endif // update flow plus flow increment auto start_flow_update = now(); diff --git a/src/refine_variational.h b/src/refine_variational.h index 8495e55..2c3ad30 100644 --- a/src/refine_variational.h +++ b/src/refine_variational.h @@ -3,7 +3,6 @@ #include "FDF1.0.1/image.h" #include "FDF1.0.1/opticalflow_aux.h" -#include "FDF1.0.1/solver.h" #include "oflow.h"