Fixing tests and OpenCL support working with REAL

pankajp · Apr 25, 2011 · 5e932b2 · 5e932b2
1 parent 8564d3a
commit 5e932b2
Show file tree

Hide file tree

Showing 15 changed files with 205 additions and 142 deletions.
diff --git a/bench/cl_summation_density.py b/bench/cl_summation_density.py
@@ -34,60 +34,69 @@
         print("Device max clock speed:", device.max_clock_frequency, 'MHz')
         print("Device compute units:", device.max_compute_units)
 
-        # create the partice array with doubles as the default
-
-        pa = base.get_particle_array(cl_precision="single",
-                                     name="test", x=x,h=h,m=m,rho=rho)
-
-        particles = base.Particles(arrays=[pa,] ,
-                                   locator_type=NSquareNeighborLocator)
-
-        kernel = base.CubicSplineKernel(dim=1)
+        precision_types = ['single']
+
+        device_extensions = device.get_info(cl.device_info.EXTENSIONS)
+        if 'cl_khr_fp64' in device_extensions:
+            precision_types.append('double')
+
+        for prec in precision_types:
 
-        # create the function
-        func = sph.SPHRho.get_func(pa,pa)
+            print "\n========================================================"
+            print "Summation Density Comparison using %s precision"%(prec)
+
+            pa = base.get_particle_array(cl_precision=prec,
+                                         name="test", x=x,h=h,m=m,rho=rho)
 
-        # create the CLCalc object
-        cl_calc = sph.CLCalc(particles=particles, sources=[pa,], dest=pa,
-                             kernel=kernel, funcs=[func,], updates=['rho'] )
+            particles = base.Particles(arrays=[pa,] ,
+                                       locator_type=NSquareNeighborLocator)
 
-        # create a normal calc object
-        calc = sph.SPHCalc(particles=particles, sources=[pa,], dest=pa,
-                           kernel=kernel, funcs=[func,], updates=['rho'] )
+            kernel = base.CubicSplineKernel(dim=1)
 
+            # create the function
+            func = sph.SPHRho.get_func(pa,pa)
 
-        # setup OpenCL for PySPH
-        cl_calc.setup_cl(ctx)
+            # create the CLCalc object
+            cl_calc = sph.CLCalc(particles=particles, sources=[pa,], dest=pa,
+                                 kernel=kernel, funcs=[func,], updates=['rho'] )
 
-        # evaluate pysph on the OpenCL device!
-        t1 = time.time()
-        cl_calc.sph()
-        cl_elapsed = time.time() - t1
-        print "Execution time for PyOpenCL: %g s" %(cl_elapsed)
+            # create a normal calc object
+            calc = sph.SPHCalc(particles=particles, sources=[pa,], dest=pa,
+                               kernel=kernel, funcs=[func,], updates=['rho'] )
 
-        # Read the buffer contents
-        t1 = time.time()
-        pa.read_from_buffer()
-        read_elapsed = time.time() - t1
+
+            # setup OpenCL for PySPH
+            cl_calc.setup_cl(ctx)
 
-        print "Read from buffer time: %g s "%(read_elapsed)
+            # evaluate pysph on the OpenCL device!
+            t1 = time.time()
+            cl_calc.sph()
+            cl_elapsed = time.time() - t1
+            print "Execution time for PyOpenCL: %g s" %(cl_elapsed)
 
-        cl_rho = pa.get('tmpx').copy()
+            # Read the buffer contents
+            t1 = time.time()
+            pa.read_from_buffer()
+            read_elapsed = time.time() - t1
 
-        # Do the same thing with Cython.
-        t1 = time.time()
-        calc.sph('tmpx')
-        cython_elapsed = time.time() - t1
-        print "Execution time for PySPH Cython: %g s" %(cython_elapsed)
+            print "Read from buffer time: %g s "%(read_elapsed)
+
+            cl_rho = pa.get('tmpx').copy()
+
+            # Do the same thing with Cython.
+            t1 = time.time()
+            calc.sph('tmpx')
+            cython_elapsed = time.time() - t1
+            print "Execution time for PySPH Cython: %g s" %(cython_elapsed)
 
-        # Compare the results
+            # Compare the results
 
-        cython_rho = pa.get('tmpx')
-        diff = sum(abs(cl_rho - cython_rho))
+            cython_rho = pa.get('tmpx')
+            diff = sum(abs(cl_rho - cython_rho))
 
-        if diff/np < 1e-6:
-            print "CL == Cython: True"
-            print "Speedup: %g "%(cython_elapsed/cl_elapsed)
-        else:
-            print "Results Don't Match!"
+            if diff/np < 1e-6:
+                print "CL == Cython: True"
+                print "Speedup: %g "%(cython_elapsed/cl_elapsed)
+            else:
+                print "Results Don't Match!"
 
diff --git a/source/pysph/base/kernels.h b/source/pysph/base/kernels.h
@@ -2,68 +2,68 @@
 
 #define PI 2.0f*acos(0.0f)
 
-float cubic_spline_fac(unsigned int dim, float h)
+REAL cubic_spline_fac(unsigned int dim, REAL h)
 {
-  float fac = 0.0f;
+  REAL fac = 0.0;
 
   if ( dim == 1 )
-    fac = ( 2.0f/3.0f ) / (h);
+    fac = ( 2.0/3.0 ) / (h);
 
   if (dim == 2 )
-    fac = ( 10.0f ) / ( 7.0f*PI ) / ( h*h );
+    fac = ( 10.0 ) / ( 7.0*PI ) / ( h*h );
 
   if ( dim == 3 )
-    fac = 1.0f /( PI*h*h*h );
+    fac = 1.0 /( PI*h*h*h );
 
   return fac;
 }
 
-float cubic_spline_function(float4 pa, float4 pb, unsigned int dim)
+REAL cubic_spline_function(REAL4 pa, REAL4 pb, unsigned int dim)
 {
   // {s0, s1, s2, s3} == {x, y, z, h}
 
-  float4 rab = pa - pb;
+  REAL4 rab = pa - pb;
 
-  float h = 0.5f * ( pa.s3 + pb.s3 );
-  float q = length(rab)/h;
+  REAL h = 0.5 * ( pa.s3 + pb.s3 );
+  REAL q = length(rab)/h;
 
-  float val = 0.0f;
-  float fac = cubic_spline_fac(dim, h);
+  REAL val = 0.0;
+  REAL fac = cubic_spline_fac(dim, h);
 
-  if ( q >= 0.0f )
-    val = 1.0f - 1.5f * (q*q) * (1.0f - 0.5f * q);
+  if ( q >= 0.0 )
+    val = 1.0 - 1.5 * (q*q) * (1.0 - 0.5 * q);
 
-  if ( q >= 1.0f )
-    val = 0.25f * (2.0f-q) * (2.0f-q) * (2.0f-q);
+  if ( q >= 1.0 )
+    val = 0.25 * (2.0-q) * (2.0-q) * (2.0-q);
 
-  if ( q >= 2.0f )
-      val = 0.0f;
+  if ( q >= 2.0 )
+      val = 0.0;
 
   return val * fac;
 }
 
-void cubic_spline_gradient(float4 pa, float4 pb, float4* grad, 
+void cubic_spline_gradient(REAL4 pa, REAL4 pb, REAL4* grad, 
 			   unsigned int dim)
 {
-  float4 rab = pa - pb;
+  REAL4 rab = pa - pb;
 
-  float h = 0.5f * ( pa.s3 + pb.s3 );
-  float q = length(rab)/h;
+  REAL h = 0.5 * ( pa.s3 + pb.s3 );
+  REAL q = length(rab)/h;
 
-  float val = 0.0f;
-  float fac = cubic_spline_fac(dim, h);
+  REAL val = 0.0;
+  REAL fac = cubic_spline_fac(dim, h);
 
-  if ( q == 0.0f )
-    val = 0.0f;
+  if ( q == 0.0 )
+    val = 0.0;
 
-  if ( q > 0.0f )
-    val = 3.0f*(0.75f*q - 1.0f)/(h*h);
+  if ( q > 0.0 )
+    val = 3.0*(0.75*q - 1.0)/(h*h);
 
-  if ( q >= 1.0f )
-    val = -0.75f * (2.0f-q) * (2.0f-q)/(h * length(rab));
+  if ( q >= 1.0 )
+    val = -0.75 * (2.0-q) * (2.0-q)/(h * length(rab));
 
-  if ( q >= 2.0f )
-    val = 0.0f;
+  if ( q >= 2.0 )
+    val = 0.0;
 
   val *= fac;
 

diff --git a/source/pysph/base/nnps.pyx b/source/pysph/base/nnps.pyx
@@ -799,8 +799,9 @@ cdef class NNPSManager:
         
         """
         if dest is None:
-            return NbrParticleLocatorBase(source, self.cell_manager,
-                                          locator_type=self.locator_type)
+            loc = NbrParticleLocatorBase(source, self.cell_manager)
+            loc.set_locator_type(self.locator_type)
+            return loc
         else:
             self.add_interaction(source, dest, radius_scale)
             return self.get_cached_locator(source.name, dest.name, radius_scale)

diff --git a/source/pysph/base/particle_array.pyx b/source/pysph/base/particle_array.pyx
@@ -1173,23 +1173,26 @@ cdef class ParticleArray:
         """ Create device arrays for the device associated with the
         CommandQueue provided.
 
-        Parameters:
-        ------------
-
-        quque -- OpenCL CommandQueue
-
         Notes:
         ------
 
         The default floating point representation is `float` which
         corresponds to numpy.float32
 
         """
+        queue = self.queue
+
         if self.cl_setup_done:
             return
+
+        if self.cl_precision == "double":
+            if 'cl_khr_fp64' not in queue.device.extensions:
+                devname = queue.device.name
+                msg = "Device %s does not support double precision"%(devname)
+                raise RuntimeError(msg)
 
         if not cl_utils.HAS_CL:
-            raise RuntimeWarning, "PyOpenCL not found!"
+            raise RuntimeWarning("PyOpenCL not found!")
 
         np = self.get_number_of_particles()
         self.cl_properties = {}

diff --git a/source/pysph/base/particles.py b/source/pysph/base/particles.py
@@ -329,7 +329,7 @@ def get_particle_array(cl_precision="double", **props):
     # Add the property idx
     if not prop_dict.has_key('idx') and np != 0:
         prop_dict['idx'] = {'name':'idx', 'data':numpy.arange(np),
-                            'type':'int'}
+                            'type':'long'}
 
     #handle the name and particle_type information separately
 

diff --git a/source/pysph/base/tests/test_particle_array_cl.py b/source/pysph/base/tests/test_particle_array_cl.py
@@ -83,12 +83,16 @@ def test_create_cl_buffers(self):
             # read the contents of the OpenCL buffer in a dummy array
             _array = numpy.ones_like(pysph_arr)
 
-            carray = pa.properties[prop]
+            carray = pa.properties[prop] 
             dtype = carray.get_c_type()
-            if dtype == "double":
-                _array = _array.astype(numpy.float32)
-            if dtype == "long":
-                _array = _array.astype(numpy.int32)
+            if pa.cl_precision == "single":            
+
+                if dtype == "double":
+                    _array = _array.astype(numpy.float32)
+                    pysph_arr = pysph_arr.astype(numpy.float32)
+                if dtype == "long":
+                    _array = _array.astype(numpy.int32)
+                    pysph_arr = pysph_arr.astype(numpy.int32)
 
             cl.enqueue_read_buffer(self.queue, buffer, _array).wait()
 
@@ -97,7 +101,7 @@ def test_create_cl_buffers(self):
             np = len(_array)
 
             for i in range(np):
-                self.assertEqual( _array[i], pysph_arr[i] )
+                self.assertAlmostEqual( _array[i], pysph_arr[i], 10 )
 
     def test_read_from_buffer(self):
 
@@ -106,11 +110,20 @@ def test_read_from_buffer(self):
         # create the OpenCL buffers
         pa.setup_cl(self.ctx, self.queue)
 
+        # copy the array contents to local copies 
         copies = {}
         for prop in pa.properties:
-            copies[prop] = pa.get(prop).copy()
+            ctype = pa.properties[prop].get_c_type()
+
+            if pa.cl_precision == "single":
+                if ctype == "double":
+                    copies[prop] = pa.get(prop).astype(numpy.float32)
+                if ctype in ['long', 'int']:
+                    copies[prop] = pa.get(prop).astype(numpy.int32)
+            else:
+                copies[prop] = pa.get(prop).copy()
 
-        # read the contents back into the array
+        # read the buffer contents back into PySPH arrays
 
         pa.read_from_buffer()
 
@@ -120,7 +133,6 @@ def test_read_from_buffer(self):
             orig_array = copies.get(prop)
 
             self.assertEqual( len(buffer_array), len(orig_array) )
-
             for i in range( len(buffer_array) ):
                 self.assertAlmostEqual(buffer_array[i], orig_array[i], 10 )
 

diff --git a/source/pysph/solver/api.py b/source/pysph/solver/api.py
@@ -26,4 +26,5 @@
 from utils import savez, savez_compressed, get_distributed_particles, mkdir, \
     get_pickled_data, get_pysph_root
 
-from cl_utils import HAS_CL, get_cl_devices, get_cl_include, get_scalar_buffer
+from cl_utils import HAS_CL, get_cl_devices, get_cl_include, \
+     get_scalar_buffer, cl_read, get_real
diff --git a/source/pysph/solver/cl_common.cl b/source/pysph/solver/cl_common.cl
@@ -1,16 +1,16 @@
 #include "cl_common.h"
 
 // set the tmpx, tmpy, tmpz to 0.0
-__kernel void set_tmp_to_zero(__global float* tmpx,
-			      __global float* tmpy,
-			      __global float* tmpz)
+__kernel void set_tmp_to_zero(__global REAL* tmpx,
+			      __global REAL* tmpy,
+			      __global REAL* tmpz)
 {
   unsigned int work_dim = get_work_dim();
   unsigned int gid = get_gid(work_dim);
 
-  tmpx[gid] = 0.0f;
-  tmpy[gid] = 0.0f;
-  tmpz[gid] = 0.0f;
+  tmpx[gid] = 0.0;
+  tmpy[gid] = 0.0;
+  tmpz[gid] = 0.0;
 
 }