Skip to content

Commit

Permalink
Fixing tests and OpenCL support working with REAL
Browse files Browse the repository at this point in the history
  • Loading branch information
kunal-puri committed Apr 25, 2011
1 parent 8564d3a commit 5e932b2
Show file tree
Hide file tree
Showing 15 changed files with 205 additions and 142 deletions.
95 changes: 52 additions & 43 deletions bench/cl_summation_density.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,60 +34,69 @@
print("Device max clock speed:", device.max_clock_frequency, 'MHz')
print("Device compute units:", device.max_compute_units)

# create the partice array with doubles as the default

pa = base.get_particle_array(cl_precision="single",
name="test", x=x,h=h,m=m,rho=rho)

particles = base.Particles(arrays=[pa,] ,
locator_type=NSquareNeighborLocator)

kernel = base.CubicSplineKernel(dim=1)
precision_types = ['single']

device_extensions = device.get_info(cl.device_info.EXTENSIONS)
if 'cl_khr_fp64' in device_extensions:
precision_types.append('double')

for prec in precision_types:

# create the function
func = sph.SPHRho.get_func(pa,pa)
print "\n========================================================"
print "Summation Density Comparison using %s precision"%(prec)

pa = base.get_particle_array(cl_precision=prec,
name="test", x=x,h=h,m=m,rho=rho)

# create the CLCalc object
cl_calc = sph.CLCalc(particles=particles, sources=[pa,], dest=pa,
kernel=kernel, funcs=[func,], updates=['rho'] )
particles = base.Particles(arrays=[pa,] ,
locator_type=NSquareNeighborLocator)

# create a normal calc object
calc = sph.SPHCalc(particles=particles, sources=[pa,], dest=pa,
kernel=kernel, funcs=[func,], updates=['rho'] )
kernel = base.CubicSplineKernel(dim=1)

# create the function
func = sph.SPHRho.get_func(pa,pa)

# setup OpenCL for PySPH
cl_calc.setup_cl(ctx)
# create the CLCalc object
cl_calc = sph.CLCalc(particles=particles, sources=[pa,], dest=pa,
kernel=kernel, funcs=[func,], updates=['rho'] )

# evaluate pysph on the OpenCL device!
t1 = time.time()
cl_calc.sph()
cl_elapsed = time.time() - t1
print "Execution time for PyOpenCL: %g s" %(cl_elapsed)
# create a normal calc object
calc = sph.SPHCalc(particles=particles, sources=[pa,], dest=pa,
kernel=kernel, funcs=[func,], updates=['rho'] )

# Read the buffer contents
t1 = time.time()
pa.read_from_buffer()
read_elapsed = time.time() - t1

# setup OpenCL for PySPH
cl_calc.setup_cl(ctx)

print "Read from buffer time: %g s "%(read_elapsed)
# evaluate pysph on the OpenCL device!
t1 = time.time()
cl_calc.sph()
cl_elapsed = time.time() - t1
print "Execution time for PyOpenCL: %g s" %(cl_elapsed)

cl_rho = pa.get('tmpx').copy()
# Read the buffer contents
t1 = time.time()
pa.read_from_buffer()
read_elapsed = time.time() - t1

# Do the same thing with Cython.
t1 = time.time()
calc.sph('tmpx')
cython_elapsed = time.time() - t1
print "Execution time for PySPH Cython: %g s" %(cython_elapsed)
print "Read from buffer time: %g s "%(read_elapsed)

cl_rho = pa.get('tmpx').copy()

# Do the same thing with Cython.
t1 = time.time()
calc.sph('tmpx')
cython_elapsed = time.time() - t1
print "Execution time for PySPH Cython: %g s" %(cython_elapsed)

# Compare the results
# Compare the results

cython_rho = pa.get('tmpx')
diff = sum(abs(cl_rho - cython_rho))
cython_rho = pa.get('tmpx')
diff = sum(abs(cl_rho - cython_rho))

if diff/np < 1e-6:
print "CL == Cython: True"
print "Speedup: %g "%(cython_elapsed/cl_elapsed)
else:
print "Results Don't Match!"
if diff/np < 1e-6:
print "CL == Cython: True"
print "Speedup: %g "%(cython_elapsed/cl_elapsed)
else:
print "Results Don't Match!"

62 changes: 31 additions & 31 deletions source/pysph/base/kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,68 +2,68 @@

#define PI 2.0f*acos(0.0f)

float cubic_spline_fac(unsigned int dim, float h)
REAL cubic_spline_fac(unsigned int dim, REAL h)
{
float fac = 0.0f;
REAL fac = 0.0;

if ( dim == 1 )
fac = ( 2.0f/3.0f ) / (h);
fac = ( 2.0/3.0 ) / (h);

if (dim == 2 )
fac = ( 10.0f ) / ( 7.0f*PI ) / ( h*h );
fac = ( 10.0 ) / ( 7.0*PI ) / ( h*h );

if ( dim == 3 )
fac = 1.0f /( PI*h*h*h );
fac = 1.0 /( PI*h*h*h );

return fac;
}

float cubic_spline_function(float4 pa, float4 pb, unsigned int dim)
REAL cubic_spline_function(REAL4 pa, REAL4 pb, unsigned int dim)
{
// {s0, s1, s2, s3} == {x, y, z, h}

float4 rab = pa - pb;
REAL4 rab = pa - pb;

float h = 0.5f * ( pa.s3 + pb.s3 );
float q = length(rab)/h;
REAL h = 0.5 * ( pa.s3 + pb.s3 );
REAL q = length(rab)/h;

float val = 0.0f;
float fac = cubic_spline_fac(dim, h);
REAL val = 0.0;
REAL fac = cubic_spline_fac(dim, h);

if ( q >= 0.0f )
val = 1.0f - 1.5f * (q*q) * (1.0f - 0.5f * q);
if ( q >= 0.0 )
val = 1.0 - 1.5 * (q*q) * (1.0 - 0.5 * q);

if ( q >= 1.0f )
val = 0.25f * (2.0f-q) * (2.0f-q) * (2.0f-q);
if ( q >= 1.0 )
val = 0.25 * (2.0-q) * (2.0-q) * (2.0-q);

if ( q >= 2.0f )
val = 0.0f;
if ( q >= 2.0 )
val = 0.0;

return val * fac;
}

void cubic_spline_gradient(float4 pa, float4 pb, float4* grad,
void cubic_spline_gradient(REAL4 pa, REAL4 pb, REAL4* grad,
unsigned int dim)
{
float4 rab = pa - pb;
REAL4 rab = pa - pb;

float h = 0.5f * ( pa.s3 + pb.s3 );
float q = length(rab)/h;
REAL h = 0.5 * ( pa.s3 + pb.s3 );
REAL q = length(rab)/h;

float val = 0.0f;
float fac = cubic_spline_fac(dim, h);
REAL val = 0.0;
REAL fac = cubic_spline_fac(dim, h);

if ( q == 0.0f )
val = 0.0f;
if ( q == 0.0 )
val = 0.0;

if ( q > 0.0f )
val = 3.0f*(0.75f*q - 1.0f)/(h*h);
if ( q > 0.0 )
val = 3.0*(0.75*q - 1.0)/(h*h);

if ( q >= 1.0f )
val = -0.75f * (2.0f-q) * (2.0f-q)/(h * length(rab));
if ( q >= 1.0 )
val = -0.75 * (2.0-q) * (2.0-q)/(h * length(rab));

if ( q >= 2.0f )
val = 0.0f;
if ( q >= 2.0 )
val = 0.0;

val *= fac;

Expand Down
5 changes: 3 additions & 2 deletions source/pysph/base/nnps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -799,8 +799,9 @@ cdef class NNPSManager:
"""
if dest is None:
return NbrParticleLocatorBase(source, self.cell_manager,
locator_type=self.locator_type)
loc = NbrParticleLocatorBase(source, self.cell_manager)
loc.set_locator_type(self.locator_type)
return loc
else:
self.add_interaction(source, dest, radius_scale)
return self.get_cached_locator(source.name, dest.name, radius_scale)
Expand Down
15 changes: 9 additions & 6 deletions source/pysph/base/particle_array.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1173,23 +1173,26 @@ cdef class ParticleArray:
""" Create device arrays for the device associated with the
CommandQueue provided.
Parameters:
------------
quque -- OpenCL CommandQueue
Notes:
------
The default floating point representation is `float` which
corresponds to numpy.float32
"""
queue = self.queue

if self.cl_setup_done:
return

if self.cl_precision == "double":
if 'cl_khr_fp64' not in queue.device.extensions:
devname = queue.device.name
msg = "Device %s does not support double precision"%(devname)
raise RuntimeError(msg)

if not cl_utils.HAS_CL:
raise RuntimeWarning, "PyOpenCL not found!"
raise RuntimeWarning("PyOpenCL not found!")

np = self.get_number_of_particles()
self.cl_properties = {}
Expand Down
2 changes: 1 addition & 1 deletion source/pysph/base/particles.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ def get_particle_array(cl_precision="double", **props):
# Add the property idx
if not prop_dict.has_key('idx') and np != 0:
prop_dict['idx'] = {'name':'idx', 'data':numpy.arange(np),
'type':'int'}
'type':'long'}

#handle the name and particle_type information separately

Expand Down
30 changes: 21 additions & 9 deletions source/pysph/base/tests/test_particle_array_cl.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,16 @@ def test_create_cl_buffers(self):
# read the contents of the OpenCL buffer in a dummy array
_array = numpy.ones_like(pysph_arr)

carray = pa.properties[prop]
carray = pa.properties[prop]
dtype = carray.get_c_type()
if dtype == "double":
_array = _array.astype(numpy.float32)
if dtype == "long":
_array = _array.astype(numpy.int32)
if pa.cl_precision == "single":

if dtype == "double":
_array = _array.astype(numpy.float32)
pysph_arr = pysph_arr.astype(numpy.float32)
if dtype == "long":
_array = _array.astype(numpy.int32)
pysph_arr = pysph_arr.astype(numpy.int32)

cl.enqueue_read_buffer(self.queue, buffer, _array).wait()

Expand All @@ -97,7 +101,7 @@ def test_create_cl_buffers(self):
np = len(_array)

for i in range(np):
self.assertEqual( _array[i], pysph_arr[i] )
self.assertAlmostEqual( _array[i], pysph_arr[i], 10 )

def test_read_from_buffer(self):

Expand All @@ -106,11 +110,20 @@ def test_read_from_buffer(self):
# create the OpenCL buffers
pa.setup_cl(self.ctx, self.queue)

# copy the array contents to local copies
copies = {}
for prop in pa.properties:
copies[prop] = pa.get(prop).copy()
ctype = pa.properties[prop].get_c_type()

if pa.cl_precision == "single":
if ctype == "double":
copies[prop] = pa.get(prop).astype(numpy.float32)
if ctype in ['long', 'int']:
copies[prop] = pa.get(prop).astype(numpy.int32)
else:
copies[prop] = pa.get(prop).copy()

# read the contents back into the array
# read the buffer contents back into PySPH arrays

pa.read_from_buffer()

Expand All @@ -120,7 +133,6 @@ def test_read_from_buffer(self):
orig_array = copies.get(prop)

self.assertEqual( len(buffer_array), len(orig_array) )

for i in range( len(buffer_array) ):
self.assertAlmostEqual(buffer_array[i], orig_array[i], 10 )

Expand Down
3 changes: 2 additions & 1 deletion source/pysph/solver/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,5 @@
from utils import savez, savez_compressed, get_distributed_particles, mkdir, \
get_pickled_data, get_pysph_root

from cl_utils import HAS_CL, get_cl_devices, get_cl_include, get_scalar_buffer
from cl_utils import HAS_CL, get_cl_devices, get_cl_include, \
get_scalar_buffer, cl_read, get_real
12 changes: 6 additions & 6 deletions source/pysph/solver/cl_common.cl
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
#include "cl_common.h"

// set the tmpx, tmpy, tmpz to 0.0
__kernel void set_tmp_to_zero(__global float* tmpx,
__global float* tmpy,
__global float* tmpz)
__kernel void set_tmp_to_zero(__global REAL* tmpx,
__global REAL* tmpy,
__global REAL* tmpz)
{
unsigned int work_dim = get_work_dim();
unsigned int gid = get_gid(work_dim);

tmpx[gid] = 0.0f;
tmpy[gid] = 0.0f;
tmpz[gid] = 0.0f;
tmpx[gid] = 0.0;
tmpy[gid] = 0.0;
tmpz[gid] = 0.0;

}

Loading

0 comments on commit 5e932b2

Please sign in to comment.