From 28d7fd881b909c00b19149d0f15a9fd51e24917b Mon Sep 17 00:00:00 2001
From: Denis Alevi <mail@denisalevi.de>
Date: Wed, 14 Apr 2021 13:38:42 +0200
Subject: [PATCH] Fix bug when indexing StateMonitor with `[:3]` type idnexing

Fixes #50. Issue was the same as in PR brian-team/brian2#1119. We need
to update the monitor's `N` variable on CUDA side and we need to update
the device variable since it is copied to the host variable in
`write_arrays()`. And we need to use `WRITES_TO_READ_ONLY_VARIABLES`,
otherwise `N` will be cached in brian2 and not loaded after simulation.
---
 brian2cuda/templates/statemonitor.cu | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/brian2cuda/templates/statemonitor.cu b/brian2cuda/templates/statemonitor.cu
index d8418b1c..5f0600c3 100644
--- a/brian2cuda/templates/statemonitor.cu
+++ b/brian2cuda/templates/statemonitor.cu
@@ -39,6 +39,15 @@ if (_num__array_{{owner.name}}__indices > 1024)
 // inefficient, can we keep the t values on the host instead? Do we need them
 // on the device?
 dev_dynamic_array_{{owner.name}}_t.push_back({{owner.clock.name}}.t[0]);
+// Update size variables for Python side indexing to work
+// (Note: Need to update device variable which will be copied to host in write_arrays())
+// TODO: This is one cudaMemcpy per time step, this should be done only once in the last
+// time step, fix when fixing the statemonitor (currently only works for <=1024 threads)
+_array_{{owner.name}}_N[0] += 1;
+CUDA_SAFE_CALL(
+        cudaMemcpy(dev_array_{{owner.name}}_N, _array_{{owner.name}}_N, sizeof(int32_t),
+                   cudaMemcpyHostToDevice)
+        );
 
 int num_iterations = {{owner.clock.name}}.i_end;
 int current_iteration = {{owner.clock.name}}.timestep[0];