Skip to content

Commit

Permalink
Add more benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
Routhleck committed Dec 12, 2023
1 parent 3ff5dbc commit 66724b1
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 75 deletions.
59 changes: 54 additions & 5 deletions brainpy/_src/math/event/_csr_matvec_taichi.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,19 +126,34 @@ def _event_csr_matvec_transpose_bool_homo_gpu(values: ti.types.ndarray(ndim=1),
events: ti.types.ndarray(ndim=1),
out: ti.types.ndarray(ndim=1)):
value = values[0]
# total_rows = indptr.shape[0] - 1
# for i in range(total_rows * 32):
# row_i = ti.cast(ti.floor(i / 32), ti.i32)
# index = i % 32
# if events[row_i]:
# for j in range(indptr[row_i], indptr[row_i + 1]):
# if j % 32 == index:
# out[indices[j]] += value
for row_i in ti.ndrange(indptr.shape[0] - 1):
if events[row_i]:
for j in range(indptr[row_i], indptr[row_i + 1]):
out[indices[j]] += value


@ti.kernel
def _event_csr_matvec_transpose_homo_gpu(values: ti.types.ndarray(ndim=1),
indices: ti.types.ndarray(ndim=1),
indptr: ti.types.ndarray(ndim=1),
events: ti.types.ndarray(ndim=1),
out: ti.types.ndarray(ndim=1)):
value = values[0]
# total_rows = indptr.shape[0] - 1
# for i in range(total_rows * 32):
# row_i = ti.cast(ti.floor(i / 32), ti.i32)
# index = i % 32
# if events[row_i] > 0.:
# for j in range(indptr[row_i], indptr[row_i + 1]):
# if j % 32 == index:
# out[indices[j]] += value
for row_i in ti.ndrange(indptr.shape[0] - 1):
if events[row_i] > 0.:
for j in range(indptr[row_i], indptr[row_i + 1]):
Expand All @@ -152,7 +167,16 @@ def _event_csr_matvec_bool_homo_gpu(values: ti.types.ndarray(ndim=1),
events: ti.types.ndarray(ndim=1),
out: ti.types.ndarray(ndim=1)):
value = values[0]
for row_i in ti.ndrange(indptr.shape[0] - 1):
# total_rows = indptr.shape[0] - 1
# for i in ti.ndrange(total_rows * 32):
# row_i = ti.cast(ti.floor(i / 32), ti.i32)
# index = i % 32
# r = 0.
# for j in range(indptr[row_i], indptr[row_i + 1]):
# if j % 32 == index and events[indices[j]]:
# r += value
# out[row_i] += r
for row_i in range(indptr.shape[0] - 1):
r = 0.
for j in range(indptr[row_i], indptr[row_i + 1]):
if events[indices[j]]:
Expand All @@ -166,7 +190,7 @@ def _event_csr_matvec_homo_gpu(values: ti.types.ndarray(ndim=1),
events: ti.types.ndarray(ndim=1),
out: ti.types.ndarray(ndim=1)):
value = values[0]
for row_i in ti.ndrange(indptr.shape[0] - 1):
for row_i in range(indptr.shape[0] - 1):
r = 0.
for j in range(indptr[row_i], indptr[row_i + 1]):
if events[indices[j]] > 0.:
Expand All @@ -181,6 +205,14 @@ def _event_csr_matvec_transpose_bool_heter_gpu(values: ti.types.ndarray(ndim=1),
indptr: ti.types.ndarray(ndim=1),
events: ti.types.ndarray(ndim=1),
out: ti.types.ndarray(ndim=1)):
# total_rows = indptr.shape[0] - 1
# for i in range(total_rows * 32):
# row_i = ti.cast(ti.floor(i / 32), ti.i32)
# index = i % 32
# if events[row_i]:
# for j in range(indptr[row_i], indptr[row_i + 1]):
# if j % 32 == index:
# out[indices[j]] += values[j]
for row_i in ti.ndrange(indptr.shape[0] - 1):
if events[row_i]:
for j in range(indptr[row_i], indptr[row_i + 1]):
Expand All @@ -193,6 +225,14 @@ def _event_csr_matvec_transpose_heter_gpu(values: ti.types.ndarray(ndim=1),
indptr: ti.types.ndarray(ndim=1),
events: ti.types.ndarray(ndim=1),
out: ti.types.ndarray(ndim=1)):
# total_rows = indptr.shape[0] - 1
# for i in range(total_rows * 32):
# row_i = ti.cast(ti.floor(i / 32), ti.i32)
# index = i % 32
# if events[row_i] > 0.:
# for j in range(indptr[row_i], indptr[row_i + 1]):
# if j % 32 == index:
# out[indices[j]] += values[j]
for row_i in ti.ndrange(indptr.shape[0] - 1):
if events[row_i] > 0.:
for j in range(indptr[row_i], indptr[row_i + 1]):
Expand All @@ -205,7 +245,16 @@ def _event_csr_matvec_bool_heter_gpu(values: ti.types.ndarray(ndim=1),
indptr: ti.types.ndarray(ndim=1),
events: ti.types.ndarray(ndim=1),
out: ti.types.ndarray(ndim=1)):
for row_i in ti.ndrange(indptr.shape[0] - 1):
# total_rows = indptr.shape[0] - 1
# for i in ti.ndrange(total_rows * 32):
# row_i = ti.cast(ti.floor(i / 32), ti.i32)
# index = i % 32
# r = 0.
# for j in range(indptr[row_i], indptr[row_i + 1]):
# if j % 32 == index and events[indices[j]]:
# r += values[j]
# out[row_i] += r
for row_i in range(indptr.shape[0] - 1):
r = 0.
for j in range(indptr[row_i], indptr[row_i + 1]):
if events[indices[j]]:
Expand All @@ -218,7 +267,7 @@ def _event_csr_matvec_heter_gpu(values: ti.types.ndarray(ndim=1),
indptr: ti.types.ndarray(ndim=1),
events: ti.types.ndarray(ndim=1),
out: ti.types.ndarray(ndim=1)):
for row_i in ti.ndrange(indptr.shape[0] - 1):
for row_i in range(indptr.shape[0] - 1):
r = 0.
for j in range(indptr[row_i], indptr[row_i + 1]):
if events[indices[j]] > 0.:
Expand Down
91 changes: 49 additions & 42 deletions brainpy/_src/math/event/tests/event_csrmv_taichi_VS_event_csrmv.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,18 @@

s = [1000, 2500, 5000, 10000, 25000, 50000]
p = [0.1, 0.2, 0.3, 0.4, 0.5]
values_type = ['homo', 'heter']
events_type = ['bool', 'float']
values_type = ['homo',
'heter']
events_type = ['bool',
'float',
]
transpose = [True,
False]

print(bm.get_platform())


def test_event_ell_cpu(s, p, values_type, events_type):
def test_event_ell_cpu(s, p, values_type, events_type, transpose):
print('s: ', s, 'p: ', p)
k = int(s * p)
bm.random.seed(1234)
Expand All @@ -39,43 +44,42 @@ def test_event_ell_cpu(s, p, values_type, events_type):
dense[pre_indices, csr_indices] = 1.0

if events_type == 'float':
vector = vector.astype(np.float32)
vector[vector == 1.0] = bm.random.rand(bm.sum(vector == 1.0))
vector = vector.astype(bm.float32)
if values_type == 'heter':
heter_data = bm.as_jax(rng.random(csr_indices.shape))
weight = heter_data

# groundtruth = bm.as_jax(vector, dtype=float) @ bm.as_jax(dense)

result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
# time.sleep(2)

time0 = time.time()
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
time1 = time.time()
# time.sleep(2)

time2 = time.time()
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
time3 = time.time()
# time.sleep(2)

time4 = time.time()
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
time5 = time.time()
# time.sleep(2)

time6 = time.time()
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
time7 = time.time()

time8 = time.time()
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
time9 = time.time()

result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
# print(result1[0])
# print(result2)
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
# print(result1[0])
# print(result2)
# print(groundtruth - result1[0])
# print(groundtruth - result2)

Expand All @@ -85,26 +89,26 @@ def test_event_ell_cpu(s, p, values_type, events_type):
# assert bm.allclose(result1[0], result2)

time12 = time.time()
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
time13 = time.time()
# time.sleep(2)

time14 = time.time()
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
time15 = time.time()
# time.sleep(2)

time16 = time.time()
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
time17 = time.time()
# time.sleep(2)

time18 = time.time()
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
time19 = time.time()

time20 = time.time()
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
time21 = time.time()

taichi_aot_time1 = (time1 - time0) * 1000
Expand Down Expand Up @@ -136,7 +140,7 @@ def test_event_ell_cpu(s, p, values_type, events_type):
return taichi_aot_time1, taichi_aot_time2, taichi_aot_time3, taichi_aot_time4, taichi_aot_time5,\
brainpy_time1, brainpy_time2, brainpy_time3, brainpy_time4, brainpy_time5, speedup

def test_event_ell_gpu(s, p, values_type, events_type):
def test_event_ell_gpu(s, p, values_type, events_type, transpose):
print('s: ', s, 'p: ', p)
k = int(s * p)
bm.random.seed(1234)
Expand All @@ -152,8 +156,7 @@ def test_event_ell_gpu(s, p, values_type, events_type):
dense[pre_indices, csr_indices] = 1.0

if events_type == 'float':
vector = vector.astype(np.float32)
vector[vector == 1.0] = bm.random.rand(bm.sum(vector == 1.0))
vector = vector.astype(bm.float32)
if values_type == 'heter':
heter_data = bm.as_jax(rng.random(csr_indices.shape))
weight = heter_data
Expand All @@ -162,37 +165,39 @@ def test_event_ell_gpu(s, p, values_type, events_type):



result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
# time.sleep(2)

time0 = time.time()
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
time1 = time.time()
# time.sleep(2)

time2 = time.time()
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
time3 = time.time()
# time.sleep(2)

time4 = time.time()
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
time5 = time.time()
# time.sleep(2)

time6 = time.time()
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
time7 = time.time()

time8 = time.time()
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
time9 = time.time()

result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
# print('--------------------result1[0]------------------')
# print(result1[0])
# print('--------------------result2------------------')
# print(result2)
# print('--------------------gt------------------')
# print(groundtruth)
# print('--------------------gt - result1[0]------------------')
# print(groundtruth - result1[0])
# print('--------------------gt - result2------------------')
Expand All @@ -204,26 +209,26 @@ def test_event_ell_gpu(s, p, values_type, events_type):
# assert bm.allclose(result1[0], result2)

time12 = time.time()
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
time13 = time.time()
# time.sleep(2)

time14 = time.time()
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
time15 = time.time()
# time.sleep(2)

time16 = time.time()
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
time17 = time.time()
# time.sleep(2)

time18 = time.time()
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
time19 = time.time()

time20 = time.time()
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True))
result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose))
time21 = time.time()

taichi_aot_time1 = (time1 - time0) * 1000
Expand All @@ -236,7 +241,7 @@ def test_event_ell_gpu(s, p, values_type, events_type):
brainpy_time3 = (time17 - time16) * 1000
brainpy_time4 = (time19 - time18) * 1000
brainpy_time5 = (time21 - time20) * 1000

print('s: ', s, 'p: ', p, 'values_type: ', values_type, 'events_type: ', events_type, 'transpose: ', transpose)
print('taichi_aot_1: ', taichi_aot_time1, 'ms')
print('taichi_aot_2: ', taichi_aot_time2, 'ms')
print('taichi_aot_3: ', taichi_aot_time3, 'ms')
Expand All @@ -257,7 +262,7 @@ def test_event_ell_gpu(s, p, values_type, events_type):
brainpy_time1, brainpy_time2, brainpy_time3, brainpy_time4, brainpy_time5, speedup

# init dataframe
df = pd.DataFrame(columns=['s', 'p', 'backend', 'values type', 'events type',
df = pd.DataFrame(columns=['s', 'p', 'backend', 'values type', 'events type', 'transpose',
'taichi aot time1(ms)', 'taichi aot time2(ms)', 'taichi aot time3(ms)', 'taichi aot time4(ms)', 'taichi aot time5(ms)',
'brainpy time1(ms)', 'brainpy time2(ms)', 'brainpy time3(ms)', 'brainpy time4(ms)', 'brainpy time5(ms)',
'speedup'])
Expand All @@ -267,10 +272,11 @@ def test_event_ell_gpu(s, p, values_type, events_type):
for _p in p:
for _values_type in values_type:
for _events_type in events_type:
taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5,\
brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup = test_event_ell_cpu(_s, _p, _values_type, _events_type)
for _transpose in transpose:
taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5,\
brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup = test_event_ell_cpu(_s, _p, _values_type, _events_type, _transpose)
# append to dataframe
df.loc[df.shape[0]] = [_s, _p, 'cpu', _values_type, _events_type,
df.loc[df.shape[0]] = [_s, _p, 'cpu', _values_type, _events_type, _transpose,
taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5,
brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup]
df.to_csv('event_csrmv_cpu.csv', index=False)
Expand All @@ -280,10 +286,11 @@ def test_event_ell_gpu(s, p, values_type, events_type):
for _p in p:
for _values_type in values_type:
for _events_type in events_type:
taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5,\
brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup = test_event_ell_gpu(_s, _p, _values_type, _events_type)
for _transpose in transpose:
taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5,\
brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup = test_event_ell_gpu(_s, _p, _values_type, _events_type, _transpose)
# append to dataframe
df.loc[df.shape[0]] = [_s, _p, 'gpu', _values_type, _events_type,
df.loc[df.shape[0]] = [_s, _p, 'gpu', _values_type, _events_type, transpose,
taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5,
brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup]
df.to_csv('event_csrmv_gpu.csv', index=False)
Expand Down
Loading

0 comments on commit 66724b1

Please sign in to comment.