Free memory for delay (in sec) on device in synapse init

That array in device memory is not used. This allows simulation of higher network sizes for our benchmarks, but introduces a bug when trying to change delays between multiple `run()` statements, see #83. The commit includes tests, which should pass as soon as #83 and #86 are fixed.
brian-team · Mar 16, 2018 · 335c42a · 335c42a
1 parent 688c14f
commit 335c42a
Show file tree

Hide file tree

Showing 4 changed files with 85 additions and 11 deletions.
diff --git a/brian2cuda/device.py b/brian2cuda/device.py
@@ -732,6 +732,16 @@ def build(self, directory='output',
                                'and "device.activate()". Note that you '
                                'will have to set build options (e.g. the '
                                'directory) and defaultclock.dt again.')
+        # TODO: remove this when #83 is fixed
+        if not self.build_on_run:
+            run_count = 0
+            for func, args in self.main_queue:
+                if func == 'run_network':
+                    run_count += 1
+            if run_count > 1:
+                logger.warn("Multiple run statements are currently error prone. "
+                            "See #83, #85, #86.")
+
         renames = {'project_dir': 'directory',
                    'compile_project': 'compile',
                    'run_project': 'run'}

diff --git a/brian2cuda/templates/objects.cu b/brian2cuda/templates/objects.cu
@@ -336,7 +336,7 @@ void _write_arrays()
 	{% endfor %}
 
 	{% for var, varname in dynamic_array_specs | dictsort(by='value') %}
-	{% if not var in multisynaptic_idx_vars %}
+	{% if not var in multisynaptic_idx_vars and not var.name == 'delay' %}
 	{{varname}} = dev{{varname}};
 	{% endif %}
 	ofstream outfile_{{varname}};

diff --git a/brian2cuda/templates/synapses_initialise_queue.cu b/brian2cuda/templates/synapses_initialise_queue.cu
@@ -106,10 +106,7 @@ void _run_{{pathobj}}_initialise_queue()
 
 	// pre neuron IDs, post neuron IDs and delays for all synapses (sorted by synapse IDs)
 	//TODO: for multiple SynapticPathways for the same Synapses object (on_pre and on_post) the following copy is identical in both pathways initialise templates
-	{% if no_or_const_delay_mode %}
-	// delay (on host) was potentially set in main and needs to be copied to device for later use
-	dev{{_dynamic_delay}} = {{_dynamic_delay}};
-	{% else %}
+	{% if not no_or_const_delay_mode %}
 	// delay (on device) was set in group_variable_set_conditional and needs to be copied to host
 	{{_dynamic_delay}} = dev{{_dynamic_delay}};
 	{% endif %}
@@ -173,6 +170,12 @@ void _run_{{pathobj}}_initialise_queue()
 	if (scalar_delay)
 		{{owner.name}}_delay = max_delay;
 	{% endif %}
+	// Delete delay (in sec) on device, we don't need it
+	// TODO: don't copy these delays to the device in first place, see #83
+	dev{{_dynamic_delay}}.clear();
+	dev{{_dynamic_delay}}.shrink_to_fit();
+	CUDA_CHECK_MEMORY();
+	size_t used_device_memory_after_dealloc = used_device_memory;
 
 	///////////////////////////////////////
 	// Create arrays for device pointers //
@@ -707,12 +710,14 @@ void _run_{{pathobj}}_initialise_queue()
 	CUDA_CHECK_MEMORY();
 	double time_passed = (double)(std::clock() - start_timer) / CLOCKS_PER_SEC;
 	std::cout << "INFO: {{pathobj}} initialisation took " <<  time_passed << "s";
-	if (used_device_memory > used_device_memory_start)
-		double tot_memory_MB = (used_device_memory - used_device_memory_start) * to_MB;
-		std::cout << " and used " << tot_memory_MB << "MB of device memory.";
-	else if (used_device_memory < used_device_memory_start)
-		double tot_memory_MB = (used_device_memory_start - used_device_memory) * to_MB;
-		std::cout << " and freed " << tot_memory_MB << "MB of device memory.";
+	if (used_device_memory_after_dealloc < used_device_memory_start){
+		size_t freed_bytes = used_device_memory_start - used_device_memory_after_dealloc;
+		std::cout << ", freed " << freed_bytes * to_MB << "MB";
+	}
+	if (used_device_memory > used_device_memory_start){
+		size_t used_bytes = used_device_memory - used_device_memory_start;
+		std::cout << " and used " << used_bytes * to_MB << "MB of device memory.";
+	}
 	std::cout << std::endl;
 }
 

diff --git a/brian2cuda/tests/test_network_multiple_runs.py b/brian2cuda/tests/test_network_multiple_runs.py
@@ -0,0 +1,59 @@
+from nose import with_setup
+from nose.plugins.attrib import attr
+from numpy.testing.utils import assert_allclose, assert_equal, assert_raises
+
+from brian2 import *
+from brian2.devices.device import reinit_devices
+
+import brian2cuda
+
+
+@attr('standalone-compatible', 'multiple-runs')
+@with_setup(teardown=reinit_devices)
+def test_changing_delay_scalar():
+
+    set_device('cuda_standalone', directory=None, build_on_run=False)
+    inG = NeuronGroup(1, 'v : 1', threshold='True')
+    G = NeuronGroup(1, 'v : 1')
+    G.v[:] = 0
+    S = Synapses(inG, G, on_pre='v += 1')
+    S.connect()
+    S.delay[:] = 1*defaultclock.dt
+    mon = StateMonitor(G, 'v', record=True)
+
+    run(1*defaultclock.dt)
+    S.delay[:] = 2*defaultclock.dt
+    run(5*defaultclock.dt)
+
+    device.build(direct_call=False, **device.build_options)
+
+    # mon.v[i, t]
+    assert_allclose(mon.v[:], [[0, 0, 1, 1, 2, 3]])
+
+
+@attr('standalone-compatible', 'multiple-runs')
+@with_setup(teardown=reinit_devices)
+def test_changing_delay_heterogeneous():
+
+    set_device('cuda_standalone', directory=None, build_on_run=False)
+    inG = NeuronGroup(1, 'v : 1', threshold='True')
+    G = NeuronGroup(2, 'v : 1')
+    G.v[:] = 0
+    S = Synapses(inG, G, on_pre='v += 1')
+    S.connect()
+    S.delay[:] = '1*j*dt'
+    mon = StateMonitor(G, 'v', record=True)
+
+    run(1*defaultclock.dt)
+    S.delay[:] = '2*j*dt'
+    run(5*defaultclock.dt)
+
+    device.build(direct_call=False, **device.build_options)
+
+    # mon.v[i, t]
+    assert_allclose(mon.v[0, :], [0, 1, 2, 3, 4, 5])
+    assert_allclose(mon.v[1, :], [0, 0, 1, 1, 2, 3])
+
+if __name__ == '__main__':
+    #test_changing_delay_scalar()
+    test_changing_delay_heterogeneous()