Skip to content

Commit

Permalink
Optimized taichi customized cpu kernels about event csr matvec and cs…
Browse files Browse the repository at this point in the history
…r matvec
  • Loading branch information
Routhleck committed Dec 14, 2023
1 parent 5e8f2cf commit 2319169
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 101 deletions.
152 changes: 85 additions & 67 deletions brainpy/_src/math/event/_csr_matvec_taichi.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,101 +20,119 @@
]

### CPU

'''
According to the benchmarks, all transpose kernels should be serialized.
'''
@ti.kernel
def _event_csr_matvec_transpose_bool_cpu(values: ti.types.ndarray(ndim=1),
def _event_csr_matvec_transpose_bool_homo_cpu(values: ti.types.ndarray(ndim=1),
indices: ti.types.ndarray(ndim=1),
indptr: ti.types.ndarray(ndim=1),
events: ti.types.ndarray(ndim=1),
out: ti.types.ndarray(ndim=1)):
if values.shape[0] == 1:
value = values[0]
ti.loop_config(serialize=True)
for row_i in range(indptr.shape[0] - 1):
if events[row_i]:
for j in range(indptr[row_i], indptr[row_i + 1]):
out[indices[j]] += value
value = values[0]
ti.loop_config(serialize=True)
for row_i in range(indptr.shape[0] - 1):
if events[row_i]:
for j in range(indptr[row_i], indptr[row_i + 1]):
out[indices[j]] += value

else:
ti.loop_config(serialize=True)
for row_i in range(indptr.shape[0] - 1):
if events[row_i]:
for j in range(indptr[row_i], indptr[row_i + 1]):
out[indices[j]] += values[j]
@ti.kernel
def _event_csr_matvec_transpose_bool_heter_cpu(values: ti.types.ndarray(ndim=1),
indices: ti.types.ndarray(ndim=1),
indptr: ti.types.ndarray(ndim=1),
events: ti.types.ndarray(ndim=1),
out: ti.types.ndarray(ndim=1)):
ti.loop_config(serialize=True)
for row_i in range(indptr.shape[0] - 1):
if events[row_i]:
for j in range(indptr[row_i], indptr[row_i + 1]):
out[indices[j]] += values[j]


@ti.kernel
def _event_csr_matvec_transpose_cpu(values: ti.types.ndarray(ndim=1),
def _event_csr_matvec_transpose_homo_cpu(values: ti.types.ndarray(ndim=1),
indices: ti.types.ndarray(ndim=1),
indptr: ti.types.ndarray(ndim=1),
events: ti.types.ndarray(ndim=1),
out: ti.types.ndarray(ndim=1)):
if values.shape[0] == 1:
value = values[0]
ti.loop_config(serialize=True)
for row_i in range(indptr.shape[0] - 1):
if events[row_i] > 0.:
for j in range(indptr[row_i], indptr[row_i + 1]):
out[indices[j]] += value

else:
ti.loop_config(serialize=True)
for row_i in range(indptr.shape[0] - 1):
if events[row_i] > 0.:
for j in range(indptr[row_i], indptr[row_i + 1]):
out[indices[j]] += values[j]
@ti.kernel
def _event_csr_matvec_transpose_heter_cpu(values: ti.types.ndarray(ndim=1),
indices: ti.types.ndarray(ndim=1),
indptr: ti.types.ndarray(ndim=1),
events: ti.types.ndarray(ndim=1),
out: ti.types.ndarray(ndim=1)):
ti.loop_config(serialize=True)
for row_i in range(indptr.shape[0] - 1):
if events[row_i] > 0.:
for j in range(indptr[row_i], indptr[row_i + 1]):
out[indices[j]] += values[j]


@ti.kernel
def _event_csr_matvec_bool_cpu(values: ti.types.ndarray(ndim=1),
def _event_csr_matvec_bool_homo_cpu(values: ti.types.ndarray(ndim=1),
indices: ti.types.ndarray(ndim=1),
indptr: ti.types.ndarray(ndim=1),
events: ti.types.ndarray(ndim=1),
out: ti.types.ndarray(ndim=1)):
if values.shape[0] == 1:
value = values[0]
# ti.loop_config(serialize=True)
for row_i in range(indptr.shape[0] - 1):
r = 0.
for j in range(indptr[row_i], indptr[row_i + 1]):
if events[indices[j]]:
r += value
out[row_i] = r
value = values[0]
# ti.loop_config(serialize=True)
for row_i in range(indptr.shape[0] - 1):
r = 0.
for j in range(indptr[row_i], indptr[row_i + 1]):
if events[indices[j]]:
r += value
out[row_i] = r

else:
# ti.loop_config(serialize=True)
for row_i in range(indptr.shape[0] - 1):
r = 0.
for j in range(indptr[row_i], indptr[row_i + 1]):
if events[indices[j]]:
r += values[j]
out[row_i] = r
@ti.kernel
def _event_csr_matvec_bool_heter_cpu(values: ti.types.ndarray(ndim=1),
indices: ti.types.ndarray(ndim=1),
indptr: ti.types.ndarray(ndim=1),
events: ti.types.ndarray(ndim=1),
out: ti.types.ndarray(ndim=1)):
# ti.loop_config(serialize=True)
for row_i in range(indptr.shape[0] - 1):
r = 0.
for j in range(indptr[row_i], indptr[row_i + 1]):
if events[indices[j]]:
r += values[j]
out[row_i] = r


@ti.kernel
def _event_csr_matvec_cpu(values: ti.types.ndarray(ndim=1),
def _event_csr_matvec_homo_cpu(values: ti.types.ndarray(ndim=1),
indices: ti.types.ndarray(ndim=1),
indptr: ti.types.ndarray(ndim=1),
events: ti.types.ndarray(ndim=1),
out: ti.types.ndarray(ndim=1)):
if values.shape[0] == 1:
value = values[0]
# ti.loop_config(serialize=True)
for row_i in range(indptr.shape[0] - 1):
r = 0.
for j in range(indptr[row_i], indptr[row_i + 1]):
if events[indices[j]] > 0.:
r += value
out[row_i] = r
value = values[0]
# ti.loop_config(serialize=True)
for row_i in range(indptr.shape[0] - 1):
r = 0.
for j in range(indptr[row_i], indptr[row_i + 1]):
if events[indices[j]] > 0.:
r += value
out[row_i] = r

else:
# ti.loop_config(serialize=True)
for row_i in range(indptr.shape[0] - 1):
r = 0.
for j in range(indptr[row_i], indptr[row_i + 1]):
if events[indices[j]] > 0.:
r += values[j]
out[row_i] = r
@ti.kernel
def _event_csr_matvec_heter_cpu(values: ti.types.ndarray(ndim=1),
indices: ti.types.ndarray(ndim=1),
indptr: ti.types.ndarray(ndim=1),
events: ti.types.ndarray(ndim=1),
out: ti.types.ndarray(ndim=1)):
# ti.loop_config(serialize=True)
for row_i in range(indptr.shape[0] - 1):
r = 0.
for j in range(indptr[row_i], indptr[row_i + 1]):
if events[indices[j]] > 0.:
r += values[j]
out[row_i] = r

### GPU
# homo
Expand Down Expand Up @@ -462,26 +480,26 @@ def _define_op(cpu_kernel, gpu_kernel):


# transpose bool homo
_event_csrmv_transpose_bool_homo_p = _define_op(_event_csr_matvec_transpose_bool_cpu, _event_csr_matvec_transpose_bool_homo_gpu)
_event_csrmv_transpose_bool_homo_p = _define_op(_event_csr_matvec_transpose_bool_homo_cpu, _event_csr_matvec_transpose_bool_homo_gpu)

# transpose homo
_event_csrmv_transpose_homo_p = _define_op(_event_csr_matvec_transpose_cpu, _event_csr_matvec_transpose_homo_gpu)
_event_csrmv_transpose_homo_p = _define_op(_event_csr_matvec_transpose_homo_cpu, _event_csr_matvec_transpose_homo_gpu)

# not transpose bool homo
_event_csrmv_bool_homo_p = _define_op(_event_csr_matvec_bool_cpu, _event_csr_matvec_bool_homo_gpu)
_event_csrmv_bool_homo_p = _define_op(_event_csr_matvec_bool_homo_cpu, _event_csr_matvec_bool_homo_gpu)

# not transpose homo
_event_csrmv_homo_p = _define_op(_event_csr_matvec_cpu, _event_csr_matvec_homo_gpu)
_event_csrmv_homo_p = _define_op(_event_csr_matvec_homo_cpu, _event_csr_matvec_homo_gpu)

# transpose bool heter
_event_csrmv_transpose_bool_heter_p = _define_op(_event_csr_matvec_transpose_bool_cpu, _event_csr_matvec_transpose_bool_heter_gpu)
_event_csrmv_transpose_bool_heter_p = _define_op(_event_csr_matvec_transpose_bool_heter_cpu, _event_csr_matvec_transpose_bool_heter_gpu)

# transpose heter
_event_csrmv_transpose_heter_p = _define_op(_event_csr_matvec_transpose_cpu, _event_csr_matvec_transpose_heter_gpu)
_event_csrmv_transpose_heter_p = _define_op(_event_csr_matvec_transpose_heter_cpu, _event_csr_matvec_transpose_heter_gpu)

# not transpose bool heter
_event_csrmv_bool_heter_p = _define_op(_event_csr_matvec_bool_cpu, _event_csr_matvec_bool_heter_gpu)
_event_csrmv_bool_heter_p = _define_op(_event_csr_matvec_bool_heter_cpu, _event_csr_matvec_bool_heter_gpu)

# not transpose heter
_event_csrmv_heter_p = _define_op(_event_csr_matvec_cpu, _event_csr_matvec_heter_gpu)
_event_csrmv_heter_p = _define_op(_event_csr_matvec_heter_cpu, _event_csr_matvec_heter_gpu)

77 changes: 43 additions & 34 deletions brainpy/_src/math/sparse/_csr_mv_taichi.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,49 +20,58 @@
]

### CPU

'''
According to the benchmarks, all transpose kernels should be serialized.
'''
@ti.kernel
def _sparse_csr_matvec_transpose_cpu(values: ti.types.ndarray(ndim=1),
def _sparse_csr_matvec_transpose_homo_cpu(values: ti.types.ndarray(ndim=1),
col_indices: ti.types.ndarray(ndim=1),
row_ptr: ti.types.ndarray(ndim=1),
vector: ti.types.ndarray(ndim=1),
out: ti.types.ndarray(ndim=1)):
if values.shape[0] == 1:
value = values[0]
ti.loop_config(serialize=True)
for row_i in range(row_ptr.shape[0] - 1):
for j in range(row_ptr[row_i], row_ptr[row_i + 1]):
out[col_indices[j]] += value * vector[row_i]

else:
ti.loop_config(serialize=True)
for row_i in range(row_ptr.shape[0] - 1):
for j in range(row_ptr[row_i], row_ptr[row_i + 1]):
out[col_indices[j]] += vector[row_i] * values[j]
value = values[0]
ti.loop_config(serialize=True)
for row_i in range(row_ptr.shape[0] - 1):
for j in range(row_ptr[row_i], row_ptr[row_i + 1]):
out[col_indices[j]] += value * vector[row_i]

@ti.kernel
def _sparse_csr_matvec_transpose_heter_cpu(values: ti.types.ndarray(ndim=1),
col_indices: ti.types.ndarray(ndim=1),
row_ptr: ti.types.ndarray(ndim=1),
vector: ti.types.ndarray(ndim=1),
out: ti.types.ndarray(ndim=1)):
ti.loop_config(serialize=True)
for row_i in range(row_ptr.shape[0] - 1):
for j in range(row_ptr[row_i], row_ptr[row_i + 1]):
out[col_indices[j]] += vector[row_i] * values[j]

@ti.kernel
def _sparse_csr_matvec_cpu(values: ti.types.ndarray(ndim=1),
def _sparse_csr_matvec_homo_cpu(values: ti.types.ndarray(ndim=1),
col_indices: ti.types.ndarray(ndim=1),
row_ptr: ti.types.ndarray(ndim=1),
vector: ti.types.ndarray(ndim=1),
out: ti.types.ndarray(ndim=1)):
if values.shape[0] == 1:
value = values[0]
ti.loop_config(serialize=True)
for row_i in range(row_ptr.shape[0] - 1):
r = 0.
for j in range(row_ptr[row_i], row_ptr[row_i + 1]):
r += value * vector[col_indices[j]]
out[row_i] = r
value = values[0]
# ti.loop_config(serialize=True)
for row_i in range(row_ptr.shape[0] - 1):
r = 0.
for j in range(row_ptr[row_i], row_ptr[row_i + 1]):
r += value * vector[col_indices[j]]
out[row_i] = r

else:
ti.loop_config(serialize=True)
for row_i in range(row_ptr.shape[0] - 1):
r = 0.
for j in range(row_ptr[row_i], row_ptr[row_i + 1]):
r += values[j] * vector[col_indices[j]]
out[row_i] = r
@ti.kernel
def _sparse_csr_matvec_heter_cpu(values: ti.types.ndarray(ndim=1),
col_indices: ti.types.ndarray(ndim=1),
row_ptr: ti.types.ndarray(ndim=1),
vector: ti.types.ndarray(ndim=1),
out: ti.types.ndarray(ndim=1)):
# ti.loop_config(serialize=True)
for row_i in range(row_ptr.shape[0] - 1):
r = 0.
for j in range(row_ptr[row_i], row_ptr[row_i + 1]):
r += values[j] * vector[col_indices[j]]
out[row_i] = r

### GPU
# homo
Expand Down Expand Up @@ -274,18 +283,18 @@ def _define_op(cpu_kernel, gpu_kernel):


# transpose homo
_csr_matvec_transpose_homo_p = _define_op(cpu_kernel=_sparse_csr_matvec_transpose_cpu,
_csr_matvec_transpose_homo_p = _define_op(cpu_kernel=_sparse_csr_matvec_transpose_homo_cpu,
gpu_kernel=_sparse_csr_matvec_transpose_homo_gpu)

# no transpose homo
_csr_matvec_homo_p = _define_op(cpu_kernel=_sparse_csr_matvec_cpu,
_csr_matvec_homo_p = _define_op(cpu_kernel=_sparse_csr_matvec_homo_cpu,
gpu_kernel=_sparse_csr_matvec_homo_gpu)

# transpose heter
_csr_matvec_transpose_heter_p = _define_op(cpu_kernel=_sparse_csr_matvec_transpose_cpu,
_csr_matvec_transpose_heter_p = _define_op(cpu_kernel=_sparse_csr_matvec_transpose_heter_cpu,
gpu_kernel=_sparse_csr_matvec_transpose_heter_gpu)

# no transpose heter
_csr_matvec_heter_p = _define_op(cpu_kernel=_sparse_csr_matvec_cpu,
_csr_matvec_heter_p = _define_op(cpu_kernel=_sparse_csr_matvec_heter_cpu,
gpu_kernel=_sparse_csr_matvec_heter_gpu)

0 comments on commit 2319169

Please sign in to comment.