Skip to content

Commit

Permalink
Merge pull request #138 from brian-team/single_precision_support
Browse files Browse the repository at this point in the history
  • Loading branch information
denisalevi authored Jul 16, 2018
2 parents b00b7fd + 29911fd commit c3eee3b
Show file tree
Hide file tree
Showing 13 changed files with 105 additions and 106 deletions.
6 changes: 3 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
[submodule "brian2_frozen"]
path = frozen_repos/brian2
url = https://github.com/brian-team/brian2.git
[submodule "genn_frozen"]
path = frozen_repos/genn
url = https://github.com/genn-team/genn.git
[submodule "brian2genn_frozen"]
path = frozen_repos/brian2genn
url = https://github.com/brian-team/brian2genn.git
[submodule "frozen_repos/brian2"]
path = frozen_repos/brian2
url = https://github.com/denisalevi/brian2.git
3 changes: 1 addition & 2 deletions brian2cuda/brianlib/spikequeue.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ using namespace std;
// variables (delays, dt) are assumed to use the same data type
typedef int32_t DTYPE_int;

template <class scalar>
class CudaSpikeQueue
{
private:
Expand Down Expand Up @@ -74,7 +73,7 @@ class CudaSpikeQueue
int tid,
int num_threads,
int _num_blocks,
scalar _dt,
double _dt,
int _neuron_N,
int _syn_N,
int _num_queues,
Expand Down
18 changes: 9 additions & 9 deletions brian2cuda/cuda_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
types is not type safe. And convertion from 64bit integral types to double precision (64bit)
floating-point types neither. In those cases the closest higher or lower (implementation
defined) representable value will be selected.''',
validator=lambda v: v in ['single_precision', 'double_precision'],
default='double_precision')
validator=lambda v: v in ['float32', 'float64'],
default='float64')
)


Expand Down Expand Up @@ -258,7 +258,7 @@ def translate_one_statement_sequence(self, statements, scalar=False):
brian_funcs = re.search('_brian_(' + '|'.join(functions_C99) + ')', line)
if brian_funcs is not None:
for identifier in get_identifiers(line):
if convertion_pref == 'double_precision':
if convertion_pref == 'float64':
# 64bit integer to floating-point conversions are not type safe
int64_type = re.search(r'\bu?int64_t\s*{}\b'.format(identifier), code)
if int64_type is not None:
Expand All @@ -269,20 +269,20 @@ def translate_one_statement_sequence(self, statements, scalar=False):
"statement:\n\t{}\nGenerated from abstract code statements:\n\t{}\n".format(line, statements),
once=True)
self.warned_integral_convertion = True
self.previous_convertion_pref = 'double_precision'
else: # convertion_pref = 'single_precision'
self.previous_convertion_pref = 'float64'
else: # convertion_pref = 'float32'
# 32bit and 64bit integer to floating-point conversions are not type safe
int32_64_type = re.search(r'\bu?int(32|64)_t\s*{}\b'.format(identifier), code)
if int32_64_type is not None:
logger.warn("Detected code statement with default function and 32bit or 64bit integer type in the same line and the "
"preference for default_functions_integral_convertion is 'single_precision'. "
"preference for default_functions_integral_convertion is 'float32'. "
"Using 32bit or 64bit integer types as default function arguments is not type safe due to convertion of "
"integer to single-precision floating-point types in device code. (relevant functions: sin, cos, tan, sinh, "
"cosh, tanh, exp, log, log10, sqrt, ceil, floor, arcsin, arccos, arctan)\nDetected code "
"statement:\n\t{}\nGenerated from abstract code statements:\n\t{}\n".format(line, statements),
once=True)
self.warned_integral_convertion = True
self.previous_convertion_pref = 'single_precision'
self.previous_convertion_pref = 'float32'
return stripped_deindented_lines(code)

def denormals_to_zero_code(self):
Expand Down Expand Up @@ -374,10 +374,10 @@ def determine_keywords(self):
support_code = ''
hash_defines = ''
# set convertion types for standard C99 functions in device code
if prefs.codegen.generators.cuda.default_functions_integral_convertion == 'double_precision':
if prefs.codegen.generators.cuda.default_functions_integral_convertion == 'float64':
default_func_type = 'double'
other_func_type = 'float'
else: # 'single_precision'
else: # 'float32'
default_func_type = 'float'
other_func_type = 'double'
for varname, variable in self.variables.items():
Expand Down
43 changes: 18 additions & 25 deletions brian2cuda/device.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,6 @@
validator=lambda v: isinstance(v, int) and v >= 0,
default=128),

curand_float_type=BrianPreference(
docs='''
Floating point type of generated random numbers (float/double).
''',
validator=lambda v: v in ['float', 'double'],
default='float'),

launch_bounds=BrianPreference(
docs='''
Weather or not to use `__launch_bounds__` to optimise register usage in kernels.
Expand Down Expand Up @@ -266,9 +259,9 @@ def code_object(self, owner, name, abstract_code, variables, template_name,
for varname in variables.iterkeys():
if varname in read_write:
idx = variable_indices[varname]
if idx == '_presynaptic_idx':
if idx == '_presynaptic_idx' or varname == 'i':
self.delete_synaptic_pre[synaptic_pre_array_name] = False
if idx == '_postsynaptic_idx':
if idx == '_postsynaptic_idx' or varname == 'j':
self.delete_synaptic_post[synaptic_post_array_name] = False
if template_name == "synapses":
prepost = template_kwds['pathway'].prepost
Expand Down Expand Up @@ -343,7 +336,7 @@ def generate_objects_source(self, writer, arange_arrays, synapses, static_array_
num_parallel_blocks=num_parallel_blocks,
curand_generator_type=curand_generator_type,
curand_generator_ordering=curand_generator_ordering,
curand_float_type=prefs['devices.cuda_standalone.curand_float_type'],
curand_float_type=c_data_type(prefs['core.default_float_dtype']),
eventspace_arrays=self.eventspace_arrays,
multisynaptic_idx_vars=multisyn_vars,
profiled_codeobjects=self.profiled_codeobjects)
Expand Down Expand Up @@ -531,13 +524,13 @@ def generate_codeobj_source(self, writer):
cudaMalloc((void**)&dev_array_randn, sizeof({dtype})*{number_elements}*{codeobj.randn_calls})
);
curandGenerateNormal{curand_suffix}(curand_generator, dev_array_randn, {number_elements}*{codeobj.randn_calls}, 0, 1);
'''.format(number_elements=number_elements, codeobj=codeobj, dtype=prefs['devices.cuda_standalone.curand_float_type'],
curand_suffix='Double' if prefs['devices.cuda_standalone.curand_float_type']=='double' else '')
'''.format(number_elements=number_elements, codeobj=codeobj, dtype=c_data_type(prefs['core.default_float_dtype']),
curand_suffix='Double' if prefs['core.default_float_dtype']=='float64' else '')
additional_code.append(code_snippet)
line = "{dtype}* par_array_{name}_randn".format(dtype=prefs['devices.cuda_standalone.curand_float_type'], name=codeobj.name)
line = "{dtype}* par_array_{name}_randn".format(dtype=c_data_type(prefs['core.default_float_dtype']), name=codeobj.name)
device_parameters_lines.append(line)
kernel_variables_lines.append("{dtype}* _ptr_array_{name}_randn = par_array_{name}_randn;".format(dtype=prefs['devices.cuda_standalone.curand_float_type'],
name=codeobj.name))
kernel_variables_lines.append("{dtype}* _ptr_array_{name}_randn = par_array_{name}_randn;".format(dtype=c_data_type(prefs['core.default_float_dtype']),
name=codeobj.name))
host_parameters_lines.append("dev_array_randn")
elif k == "_python_rand" and codeobj.runs_every_tick == False and codeobj.template_name != "synapses_create_generator":
code_snippet = '''
Expand All @@ -547,13 +540,13 @@ def generate_codeobj_source(self, writer):
cudaMalloc((void**)&dev_array_rand, sizeof({dtype})*{number_elements}*{codeobj.rand_calls})
);
curandGenerateUniform{curand_suffix}(curand_generator, dev_array_rand, {number_elements}*{codeobj.rand_calls});
'''.format(number_elements=number_elements, codeobj=codeobj, dtype=prefs['devices.cuda_standalone.curand_float_type'],
curand_suffix='Double' if prefs['devices.cuda_standalone.curand_float_type']=='double' else '')
'''.format(number_elements=number_elements, codeobj=codeobj, dtype=c_data_type(prefs['core.default_float_dtype']),
curand_suffix='Double' if prefs['core.default_float_dtype']=='float64' else '')
additional_code.append(code_snippet)
line = "{dtype}* par_array_{name}_rand".format(dtype=prefs['devices.cuda_standalone.curand_float_type'], name=codeobj.name)
line = "{dtype}* par_array_{name}_rand".format(dtype=c_data_type(prefs['core.default_float_dtype']), name=codeobj.name)
device_parameters_lines.append(line)
kernel_variables_lines.append("{dtype}* _ptr_array_{name}_rand = par_array_{name}_rand;".format(dtype=prefs['devices.cuda_standalone.curand_float_type'],
name=codeobj.name))
kernel_variables_lines.append("{dtype}* _ptr_array_{name}_rand = par_array_{name}_rand;".format(dtype=c_data_type(prefs['core.default_float_dtype']),
name=codeobj.name))
host_parameters_lines.append("dev_array_rand")
elif isinstance(v, ArrayVariable):
if k in ['t', 'timestep', '_clock_t', '_clock_timestep', '_source_t', '_source_timestep'] and v.scalar: # monitors have not scalar t variables
Expand Down Expand Up @@ -605,15 +598,15 @@ def generate_codeobj_source(self, writer):
# TODO can we just include this in the k == '_python_rand' test above?
if codeobj.rand_calls >= 1 and codeobj.runs_every_tick:
host_parameters_lines.append("dev_{name}_rand".format(name=codeobj.name))
device_parameters_lines.append("{dtype}* par_array_{name}_rand".format(dtype=prefs['devices.cuda_standalone.curand_float_type'],
device_parameters_lines.append("{dtype}* par_array_{name}_rand".format(dtype=c_data_type(prefs['core.default_float_dtype']),
name=codeobj.name))
kernel_variables_lines.append("{dtype}* _ptr_array_{name}_rand = par_array_{name}_rand;".format(dtype=prefs['devices.cuda_standalone.curand_float_type'],
kernel_variables_lines.append("{dtype}* _ptr_array_{name}_rand = par_array_{name}_rand;".format(dtype=c_data_type(prefs['core.default_float_dtype']),
name=codeobj.name))
if codeobj.randn_calls >= 1 and codeobj.runs_every_tick:
host_parameters_lines.append("dev_{name}_randn".format(name=codeobj.name))
device_parameters_lines.append("{dtype}* par_array_{name}_randn".format(dtype=prefs['devices.cuda_standalone.curand_float_type'],
device_parameters_lines.append("{dtype}* par_array_{name}_randn".format(dtype=c_data_type(prefs['core.default_float_dtype']),
name=codeobj.name))
kernel_variables_lines.append("{dtype}* _ptr_array_{name}_randn = par_array_{name}_randn;".format(dtype=prefs['devices.cuda_standalone.curand_float_type'],
kernel_variables_lines.append("{dtype}* _ptr_array_{name}_randn = par_array_{name}_randn;".format(dtype=c_data_type(prefs['core.default_float_dtype']),
name=codeobj.name))

# Sometimes an array is referred to by to different keys in our
Expand Down Expand Up @@ -663,7 +656,7 @@ def generate_rand_source(self, writer):
codeobj_with_rand=codeobj_with_rand,
codeobj_with_randn=codeobj_with_randn,
profiled=self.enable_profiling,
curand_float_type=prefs['devices.cuda_standalone.curand_float_type'])
curand_float_type=c_data_type(prefs['core.default_float_dtype']))
writer.write('rand.*', rand_tmp)

def copy_source_files(self, writer, directory):
Expand Down
10 changes: 3 additions & 7 deletions brian2cuda/templates/objects.cu
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ const int brian::_num_{{name}} = {{N}};
//////////////// synapses /////////////////
{% for S in synapses | sort(attribute='name') %}
// {{S.name}}
Synapses<double> brian::{{S.name}}({{S.source|length}}, {{S.target|length}});
int32_t {{S.name}}_source_start_index;
int32_t {{S.name}}_source_stop_index;
bool brian::{{S.name}}_multiple_pre_post = false;
Expand All @@ -95,7 +94,7 @@ __device__ int32_t** brian::{{path.name}}_synapse_ids_by_pre;
__device__ int32_t* brian::{{path.name}}_synapse_ids;
__device__ int* brian::{{path.name}}_unique_delay_start_idcs;
__device__ int* brian::{{path.name}}_unique_delays_offset_by_pre;
__device__ SynapticPathway<double> brian::{{path.name}};
__device__ SynapticPathway brian::{{path.name}};
int brian::{{path.name}}_eventspace_idx = 0;
int brian::{{path.name}}_delay;
bool brian::{{path.name}}_scalar_delay;
Expand All @@ -113,7 +112,6 @@ int brian::num_threads_per_warp;
__global__ void {{path.name}}_init(
int Nsource,
int Ntarget,
double* delays,
int32_t* sources,
int32_t* targets,
double dt,
Expand All @@ -123,7 +121,7 @@ __global__ void {{path.name}}_init(
{
using namespace brian;

{{path.name}}.init(Nsource, Ntarget, delays, sources, targets, dt, start, stop);
{{path.name}}.init(Nsource, Ntarget, sources, targets, dt, start, stop);
}
{% endfor %}
{% endfor %}
Expand Down Expand Up @@ -186,7 +184,6 @@ void _init_arrays()
{{path.name}}_init<<<1,1>>>(
{{path.source|length}},
{{path.target|length}},
thrust::raw_pointer_cast(&dev{{dynamic_array_specs[path.variables['delay']]}}[0]),
thrust::raw_pointer_cast(&dev{{dynamic_array_specs[path.synapse_sources]}}[0]),
thrust::raw_pointer_cast(&dev{{dynamic_array_specs[path.synapse_targets]}}[0]),
0, //was dt, maybe irrelevant?
Expand Down Expand Up @@ -551,7 +548,6 @@ extern const int _num_{{name}};
//////////////// synapses /////////////////
{% for S in synapses | sort(attribute='name') %}
// {{S.name}}
extern Synapses<double> {{S.name}};
extern bool {{S.name}}_multiple_pre_post;
{% for path in S._pathways | sort(attribute='name') %}
extern __device__ int* {{path.name}}_num_synapses_by_pre;
Expand All @@ -568,7 +564,7 @@ extern __device__ int32_t** {{path.name}}_synapse_ids_by_pre;
extern __device__ int32_t* {{path.name}}_synapse_ids;
extern __device__ int* {{path.name}}_unique_delay_start_idcs;
extern __device__ int* {{path.name}}_unique_delays_offset_by_pre;
extern __device__ SynapticPathway<double> {{path.name}};
extern __device__ SynapticPathway {{path.name}};
extern int {{path.name}}_eventspace_idx;
extern int {{path.name}}_delay;
extern bool {{path.name}}_scalar_delay;
Expand Down
6 changes: 4 additions & 2 deletions brian2cuda/templates/ratemonitor.cu
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,10 @@ __launch_bounds__(1024, {{sm_multiplier}})
{% endif %}
kernel_{{codeobj_name}}(
int32_t current_iteration,
double* ratemonitor_rate,
double* ratemonitor_t,
{% set c_type = c_data_type(variables['rate'].dtype) %}
{{c_type}}* ratemonitor_rate,
{% set c_type = c_data_type(variables['t'].dtype) %}
{{c_type}}* ratemonitor_t,
///// DEVICE_PARAMETERS /////
%DEVICE_PARAMETERS%
)
Expand Down
39 changes: 5 additions & 34 deletions brian2cuda/templates/synapses_classes.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,13 @@

#include "brianlib/spikequeue.h"

template<class scalar> class Synapses;
template<class scalar> class SynapticPathway;

template <class scalar>
class SynapticPathway
{
public:
// total number of neurons in source and target NeuronGroup / Subgroup
int Nsource;
int Ntarget;

scalar* dev_delay;
int32_t* dev_sources;
int32_t* dev_targets;

Expand All @@ -31,23 +26,22 @@ public:
int spikes_start;
int spikes_stop;

scalar dt;
CudaSpikeQueue<scalar>* queue;
double dt;
CudaSpikeQueue* queue;
bool no_or_const_delay_mode;

//our real constructor
__device__ void init(int _Nsource, int _Ntarget, scalar* d_delay, int32_t* _sources,
int32_t* _targets, scalar _dt, int _spikes_start, int _spikes_stop)
__device__ void init(int _Nsource, int _Ntarget, int32_t* _sources,
int32_t* _targets, double _dt, int _spikes_start, int _spikes_stop)
{
Nsource = _Nsource;
Ntarget = _Ntarget;
dev_delay = d_delay;
dev_sources = _sources;
dev_targets = _targets;
dt = _dt;
spikes_start = _spikes_start;
spikes_stop = _spikes_stop;
queue = new CudaSpikeQueue<scalar>;
queue = new CudaSpikeQueue;
};

//our real destructor
Expand All @@ -57,29 +51,6 @@ public:
delete queue;
}
};

template <class scalar>
class Synapses
{
public:
int _N_value;
inline double _N() { return _N_value;};
int Nsource;
int Ntarget;
std::vector< std::vector<int> > _pre_synaptic;
std::vector< std::vector<int> > _post_synaptic;

Synapses(int _Nsource, int _Ntarget)
: Nsource(_Nsource), Ntarget(_Ntarget)
{
for(int i=0; i<Nsource; i++)
_pre_synaptic.push_back(std::vector<int>());
for(int i=0; i<Ntarget; i++)
_post_synaptic.push_back(std::vector<int>());
_N_value = 0;
};
};

#endif

{% endmacro %}
4 changes: 0 additions & 4 deletions brian2cuda/tests/features/cuda_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,10 +163,6 @@ class CUDAStandaloneConfigurationNoAssert(CUDAStandaloneConfigurationBase):
name = 'CUDA standalone (asserts disabled)'
device_kwargs = {'disable_asserts': True}

class CUDAStandaloneConfigurationCurandDouble(CUDAStandaloneConfigurationBase):
name = 'CUDA standalone (curand_float_type = double)'
extra_prefs = {'devices.cuda_standalone.curand_float_type': 'double'}

class CUDAStandaloneConfigurationNoCudaOccupancyAPI(CUDAStandaloneConfigurationBase):
name = 'CUDA standalone (not using cuda occupancy API)'
extra_prefs = {'devices.cuda_standalone.calc_occupancy': False}
Expand Down
Loading

0 comments on commit c3eee3b

Please sign in to comment.