From 66724b1ef71a1d55561b0191084843fe0c006665 Mon Sep 17 00:00:00 2001 From: routhleck <1310722434@qq.com> Date: Tue, 12 Dec 2023 13:55:02 +0800 Subject: [PATCH] Add more benchmarks --- brainpy/_src/math/event/_csr_matvec_taichi.py | 59 +++++++++++- .../event_csrmv_taichi_VS_event_csrmv.py | 91 ++++++++++--------- .../sparse/tests/csrmv_taichi_VS_csrmv.py | 59 ++++++------ 3 files changed, 134 insertions(+), 75 deletions(-) diff --git a/brainpy/_src/math/event/_csr_matvec_taichi.py b/brainpy/_src/math/event/_csr_matvec_taichi.py index 829b343aa..4c48f27d6 100644 --- a/brainpy/_src/math/event/_csr_matvec_taichi.py +++ b/brainpy/_src/math/event/_csr_matvec_taichi.py @@ -126,12 +126,19 @@ def _event_csr_matvec_transpose_bool_homo_gpu(values: ti.types.ndarray(ndim=1), events: ti.types.ndarray(ndim=1), out: ti.types.ndarray(ndim=1)): value = values[0] + # total_rows = indptr.shape[0] - 1 + # for i in range(total_rows * 32): + # row_i = ti.cast(ti.floor(i / 32), ti.i32) + # index = i % 32 + # if events[row_i]: + # for j in range(indptr[row_i], indptr[row_i + 1]): + # if j % 32 == index: + # out[indices[j]] += value for row_i in ti.ndrange(indptr.shape[0] - 1): if events[row_i]: for j in range(indptr[row_i], indptr[row_i + 1]): out[indices[j]] += value - @ti.kernel def _event_csr_matvec_transpose_homo_gpu(values: ti.types.ndarray(ndim=1), indices: ti.types.ndarray(ndim=1), @@ -139,6 +146,14 @@ def _event_csr_matvec_transpose_homo_gpu(values: ti.types.ndarray(ndim=1), events: ti.types.ndarray(ndim=1), out: ti.types.ndarray(ndim=1)): value = values[0] + # total_rows = indptr.shape[0] - 1 + # for i in range(total_rows * 32): + # row_i = ti.cast(ti.floor(i / 32), ti.i32) + # index = i % 32 + # if events[row_i] > 0.: + # for j in range(indptr[row_i], indptr[row_i + 1]): + # if j % 32 == index: + # out[indices[j]] += value for row_i in ti.ndrange(indptr.shape[0] - 1): if events[row_i] > 0.: for j in range(indptr[row_i], indptr[row_i + 1]): @@ -152,7 +167,16 @@ def _event_csr_matvec_bool_homo_gpu(values: ti.types.ndarray(ndim=1), events: ti.types.ndarray(ndim=1), out: ti.types.ndarray(ndim=1)): value = values[0] - for row_i in ti.ndrange(indptr.shape[0] - 1): + # total_rows = indptr.shape[0] - 1 + # for i in ti.ndrange(total_rows * 32): + # row_i = ti.cast(ti.floor(i / 32), ti.i32) + # index = i % 32 + # r = 0. + # for j in range(indptr[row_i], indptr[row_i + 1]): + # if j % 32 == index and events[indices[j]]: + # r += value + # out[row_i] += r + for row_i in range(indptr.shape[0] - 1): r = 0. for j in range(indptr[row_i], indptr[row_i + 1]): if events[indices[j]]: @@ -166,7 +190,7 @@ def _event_csr_matvec_homo_gpu(values: ti.types.ndarray(ndim=1), events: ti.types.ndarray(ndim=1), out: ti.types.ndarray(ndim=1)): value = values[0] - for row_i in ti.ndrange(indptr.shape[0] - 1): + for row_i in range(indptr.shape[0] - 1): r = 0. for j in range(indptr[row_i], indptr[row_i + 1]): if events[indices[j]] > 0.: @@ -181,6 +205,14 @@ def _event_csr_matvec_transpose_bool_heter_gpu(values: ti.types.ndarray(ndim=1), indptr: ti.types.ndarray(ndim=1), events: ti.types.ndarray(ndim=1), out: ti.types.ndarray(ndim=1)): + # total_rows = indptr.shape[0] - 1 + # for i in range(total_rows * 32): + # row_i = ti.cast(ti.floor(i / 32), ti.i32) + # index = i % 32 + # if events[row_i]: + # for j in range(indptr[row_i], indptr[row_i + 1]): + # if j % 32 == index: + # out[indices[j]] += values[j] for row_i in ti.ndrange(indptr.shape[0] - 1): if events[row_i]: for j in range(indptr[row_i], indptr[row_i + 1]): @@ -193,6 +225,14 @@ def _event_csr_matvec_transpose_heter_gpu(values: ti.types.ndarray(ndim=1), indptr: ti.types.ndarray(ndim=1), events: ti.types.ndarray(ndim=1), out: ti.types.ndarray(ndim=1)): + # total_rows = indptr.shape[0] - 1 + # for i in range(total_rows * 32): + # row_i = ti.cast(ti.floor(i / 32), ti.i32) + # index = i % 32 + # if events[row_i] > 0.: + # for j in range(indptr[row_i], indptr[row_i + 1]): + # if j % 32 == index: + # out[indices[j]] += values[j] for row_i in ti.ndrange(indptr.shape[0] - 1): if events[row_i] > 0.: for j in range(indptr[row_i], indptr[row_i + 1]): @@ -205,7 +245,16 @@ def _event_csr_matvec_bool_heter_gpu(values: ti.types.ndarray(ndim=1), indptr: ti.types.ndarray(ndim=1), events: ti.types.ndarray(ndim=1), out: ti.types.ndarray(ndim=1)): - for row_i in ti.ndrange(indptr.shape[0] - 1): + # total_rows = indptr.shape[0] - 1 + # for i in ti.ndrange(total_rows * 32): + # row_i = ti.cast(ti.floor(i / 32), ti.i32) + # index = i % 32 + # r = 0. + # for j in range(indptr[row_i], indptr[row_i + 1]): + # if j % 32 == index and events[indices[j]]: + # r += values[j] + # out[row_i] += r + for row_i in range(indptr.shape[0] - 1): r = 0. for j in range(indptr[row_i], indptr[row_i + 1]): if events[indices[j]]: @@ -218,7 +267,7 @@ def _event_csr_matvec_heter_gpu(values: ti.types.ndarray(ndim=1), indptr: ti.types.ndarray(ndim=1), events: ti.types.ndarray(ndim=1), out: ti.types.ndarray(ndim=1)): - for row_i in ti.ndrange(indptr.shape[0] - 1): + for row_i in range(indptr.shape[0] - 1): r = 0. for j in range(indptr[row_i], indptr[row_i + 1]): if events[indices[j]] > 0.: diff --git a/brainpy/_src/math/event/tests/event_csrmv_taichi_VS_event_csrmv.py b/brainpy/_src/math/event/tests/event_csrmv_taichi_VS_event_csrmv.py index 8a8c9a067..e81d2ea1e 100644 --- a/brainpy/_src/math/event/tests/event_csrmv_taichi_VS_event_csrmv.py +++ b/brainpy/_src/math/event/tests/event_csrmv_taichi_VS_event_csrmv.py @@ -16,13 +16,18 @@ s = [1000, 2500, 5000, 10000, 25000, 50000] p = [0.1, 0.2, 0.3, 0.4, 0.5] -values_type = ['homo', 'heter'] -events_type = ['bool', 'float'] +values_type = ['homo', + 'heter'] +events_type = ['bool', + 'float', + ] +transpose = [True, + False] print(bm.get_platform()) -def test_event_ell_cpu(s, p, values_type, events_type): +def test_event_ell_cpu(s, p, values_type, events_type, transpose): print('s: ', s, 'p: ', p) k = int(s * p) bm.random.seed(1234) @@ -39,43 +44,42 @@ def test_event_ell_cpu(s, p, values_type, events_type): dense[pre_indices, csr_indices] = 1.0 if events_type == 'float': - vector = vector.astype(np.float32) - vector[vector == 1.0] = bm.random.rand(bm.sum(vector == 1.0)) + vector = vector.astype(bm.float32) if values_type == 'heter': heter_data = bm.as_jax(rng.random(csr_indices.shape)) weight = heter_data # groundtruth = bm.as_jax(vector, dtype=float) @ bm.as_jax(dense) - result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) # time.sleep(2) time0 = time.time() - result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time1 = time.time() # time.sleep(2) time2 = time.time() - result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time3 = time.time() # time.sleep(2) time4 = time.time() - result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time5 = time.time() # time.sleep(2) time6 = time.time() - result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time7 = time.time() time8 = time.time() - result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time9 = time.time() - result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) -# print(result1[0]) -# print(result2) + result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) + # print(result1[0]) + # print(result2) # print(groundtruth - result1[0]) # print(groundtruth - result2) @@ -85,26 +89,26 @@ def test_event_ell_cpu(s, p, values_type, events_type): # assert bm.allclose(result1[0], result2) time12 = time.time() - result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time13 = time.time() # time.sleep(2) time14 = time.time() - result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time15 = time.time() # time.sleep(2) time16 = time.time() - result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time17 = time.time() # time.sleep(2) time18 = time.time() - result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time19 = time.time() time20 = time.time() - result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time21 = time.time() taichi_aot_time1 = (time1 - time0) * 1000 @@ -136,7 +140,7 @@ def test_event_ell_cpu(s, p, values_type, events_type): return taichi_aot_time1, taichi_aot_time2, taichi_aot_time3, taichi_aot_time4, taichi_aot_time5,\ brainpy_time1, brainpy_time2, brainpy_time3, brainpy_time4, brainpy_time5, speedup -def test_event_ell_gpu(s, p, values_type, events_type): +def test_event_ell_gpu(s, p, values_type, events_type, transpose): print('s: ', s, 'p: ', p) k = int(s * p) bm.random.seed(1234) @@ -152,8 +156,7 @@ def test_event_ell_gpu(s, p, values_type, events_type): dense[pre_indices, csr_indices] = 1.0 if events_type == 'float': - vector = vector.astype(np.float32) - vector[vector == 1.0] = bm.random.rand(bm.sum(vector == 1.0)) + vector = vector.astype(bm.float32) if values_type == 'heter': heter_data = bm.as_jax(rng.random(csr_indices.shape)) weight = heter_data @@ -162,37 +165,39 @@ def test_event_ell_gpu(s, p, values_type, events_type): - result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) # time.sleep(2) time0 = time.time() - result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time1 = time.time() # time.sleep(2) time2 = time.time() - result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time3 = time.time() # time.sleep(2) time4 = time.time() - result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time5 = time.time() # time.sleep(2) time6 = time.time() - result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time7 = time.time() time8 = time.time() - result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.event.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time9 = time.time() - result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) # print('--------------------result1[0]------------------') # print(result1[0]) # print('--------------------result2------------------') # print(result2) + # print('--------------------gt------------------') + # print(groundtruth) # print('--------------------gt - result1[0]------------------') # print(groundtruth - result1[0]) # print('--------------------gt - result2------------------') @@ -204,26 +209,26 @@ def test_event_ell_gpu(s, p, values_type, events_type): # assert bm.allclose(result1[0], result2) time12 = time.time() - result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time13 = time.time() # time.sleep(2) time14 = time.time() - result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time15 = time.time() # time.sleep(2) time16 = time.time() - result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time17 = time.time() # time.sleep(2) time18 = time.time() - result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time19 = time.time() time20 = time.time() - result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result2 = jax.block_until_ready(bm.event.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time21 = time.time() taichi_aot_time1 = (time1 - time0) * 1000 @@ -236,7 +241,7 @@ def test_event_ell_gpu(s, p, values_type, events_type): brainpy_time3 = (time17 - time16) * 1000 brainpy_time4 = (time19 - time18) * 1000 brainpy_time5 = (time21 - time20) * 1000 - + print('s: ', s, 'p: ', p, 'values_type: ', values_type, 'events_type: ', events_type, 'transpose: ', transpose) print('taichi_aot_1: ', taichi_aot_time1, 'ms') print('taichi_aot_2: ', taichi_aot_time2, 'ms') print('taichi_aot_3: ', taichi_aot_time3, 'ms') @@ -257,7 +262,7 @@ def test_event_ell_gpu(s, p, values_type, events_type): brainpy_time1, brainpy_time2, brainpy_time3, brainpy_time4, brainpy_time5, speedup # init dataframe -df = pd.DataFrame(columns=['s', 'p', 'backend', 'values type', 'events type', +df = pd.DataFrame(columns=['s', 'p', 'backend', 'values type', 'events type', 'transpose', 'taichi aot time1(ms)', 'taichi aot time2(ms)', 'taichi aot time3(ms)', 'taichi aot time4(ms)', 'taichi aot time5(ms)', 'brainpy time1(ms)', 'brainpy time2(ms)', 'brainpy time3(ms)', 'brainpy time4(ms)', 'brainpy time5(ms)', 'speedup']) @@ -267,10 +272,11 @@ def test_event_ell_gpu(s, p, values_type, events_type): for _p in p: for _values_type in values_type: for _events_type in events_type: - taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5,\ - brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup = test_event_ell_cpu(_s, _p, _values_type, _events_type) + for _transpose in transpose: + taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5,\ + brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup = test_event_ell_cpu(_s, _p, _values_type, _events_type, _transpose) # append to dataframe - df.loc[df.shape[0]] = [_s, _p, 'cpu', _values_type, _events_type, + df.loc[df.shape[0]] = [_s, _p, 'cpu', _values_type, _events_type, _transpose, taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5, brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup] df.to_csv('event_csrmv_cpu.csv', index=False) @@ -280,10 +286,11 @@ def test_event_ell_gpu(s, p, values_type, events_type): for _p in p: for _values_type in values_type: for _events_type in events_type: - taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5,\ - brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup = test_event_ell_gpu(_s, _p, _values_type, _events_type) + for _transpose in transpose: + taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5,\ + brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup = test_event_ell_gpu(_s, _p, _values_type, _events_type, _transpose) # append to dataframe - df.loc[df.shape[0]] = [_s, _p, 'gpu', _values_type, _events_type, + df.loc[df.shape[0]] = [_s, _p, 'gpu', _values_type, _events_type, transpose, taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5, brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup] df.to_csv('event_csrmv_gpu.csv', index=False) diff --git a/brainpy/_src/math/sparse/tests/csrmv_taichi_VS_csrmv.py b/brainpy/_src/math/sparse/tests/csrmv_taichi_VS_csrmv.py index 887d501be..b11e98abc 100644 --- a/brainpy/_src/math/sparse/tests/csrmv_taichi_VS_csrmv.py +++ b/brainpy/_src/math/sparse/tests/csrmv_taichi_VS_csrmv.py @@ -18,11 +18,12 @@ p = [0.1, 0.2, 0.3, 0.4, 0.5] values_type = ['homo', 'heter'] events_type = ['float'] +transpose = [True, False] print(bm.get_platform()) -def test_event_ell_cpu(s, p, values_type, events_type): +def test_event_ell_cpu(s, p, values_type, events_type, transpose): print('s: ', s, 'p: ', p) k = int(s * p) rng = bm.random.RandomState(seed=1234) @@ -43,33 +44,33 @@ def test_event_ell_cpu(s, p, values_type, events_type): # groundtruth = bm.as_jax(vector, dtype=float) @ bm.as_jax(dense) - result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) # time.sleep(2) time0 = time.time() - result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time1 = time.time() # time.sleep(2) time2 = time.time() - result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time3 = time.time() # time.sleep(2) time4 = time.time() - result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time5 = time.time() # time.sleep(2) time6 = time.time() - result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time7 = time.time() time8 = time.time() - result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time9 = time.time() - result2 = jax.block_until_ready(bm.sparse.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result2 = jax.block_until_ready(bm.sparse.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) # print(result1[0]) # print(result2) # print(groundtruth - result1[0]) @@ -81,26 +82,26 @@ def test_event_ell_cpu(s, p, values_type, events_type): # assert bm.allclose(result1[0], result2) time12 = time.time() - result2 = jax.block_until_ready(bm.sparse.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result2 = jax.block_until_ready(bm.sparse.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time13 = time.time() # time.sleep(2) time14 = time.time() - result2 = jax.block_until_ready(bm.sparse.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result2 = jax.block_until_ready(bm.sparse.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time15 = time.time() # time.sleep(2) time16 = time.time() - result2 = jax.block_until_ready(bm.sparse.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result2 = jax.block_until_ready(bm.sparse.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time17 = time.time() # time.sleep(2) time18 = time.time() - result2 = jax.block_until_ready(bm.sparse.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result2 = jax.block_until_ready(bm.sparse.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time19 = time.time() time20 = time.time() - result2 = jax.block_until_ready(bm.sparse.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result2 = jax.block_until_ready(bm.sparse.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time21 = time.time() taichi_aot_time1 = (time1 - time0) * 1000 @@ -132,7 +133,7 @@ def test_event_ell_cpu(s, p, values_type, events_type): return taichi_aot_time1, taichi_aot_time2, taichi_aot_time3, taichi_aot_time4, taichi_aot_time5,\ brainpy_time1, brainpy_time2, brainpy_time3, brainpy_time4, brainpy_time5, speedup -def test_event_ell_gpu(s, p, values_type, events_type): +def test_event_ell_gpu(s, p, values_type, events_type, transpose): print('s: ', s, 'p: ', p) k = int(s * p) bm.random.seed(1234) @@ -155,33 +156,33 @@ def test_event_ell_gpu(s, p, values_type, events_type): - result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) # time.sleep(2) time0 = time.time() - result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time1 = time.time() # time.sleep(2) time2 = time.time() - result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time3 = time.time() # time.sleep(2) time4 = time.time() - result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time5 = time.time() # time.sleep(2) time6 = time.time() - result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time7 = time.time() time8 = time.time() - result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result1 = jax.block_until_ready(bm.sparse.csrmv_taichi(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) time9 = time.time() - result2 = jax.block_until_ready(bm.sparse.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=True)) + result2 = jax.block_until_ready(bm.sparse.csrmv(weight, csr_indices, csr_indptr, vector, shape=(s, s), transpose=transpose)) # print('--------------------result1[0]------------------') # print(result1[0]) # print('--------------------result2------------------') @@ -252,7 +253,7 @@ def test_event_ell_gpu(s, p, values_type, events_type): bm.set_platform('cpu') block_dim = 64 # init dataframe -df = pd.DataFrame(columns=['s', 'p', 'backend', 'values type', 'events type', +df = pd.DataFrame(columns=['s', 'p', 'backend', 'values type', 'events type', 'transpose', 'taichi aot time1(ms)', 'taichi aot time2(ms)', 'taichi aot time3(ms)', 'taichi aot time4(ms)', 'taichi aot time5(ms)', 'brainpy time1(ms)', 'brainpy time2(ms)', 'brainpy time3(ms)', 'brainpy time4(ms)', 'brainpy time5(ms)', 'speedup']) @@ -262,10 +263,11 @@ def test_event_ell_gpu(s, p, values_type, events_type): for _p in p: for _values_type in values_type: for _events_type in events_type: - taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5,\ - brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup = test_event_ell_cpu(_s, _p, _values_type, _events_type) + for _transpose in transpose: + taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5,\ + brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup = test_event_ell_cpu(_s, _p, _values_type, _events_type, _transpose) # append to dataframe - df.loc[df.shape[0]] = [_s, _p, 'cpu', _values_type, _events_type, + df.loc[df.shape[0]] = [_s, _p, 'cpu', _values_type, _events_type, _transpose, taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5, brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup] df.to_csv('event_csrmv_cpu.csv', index=False) @@ -275,10 +277,11 @@ def test_event_ell_gpu(s, p, values_type, events_type): for _p in p: for _values_type in values_type: for _events_type in events_type: - taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5,\ - brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup = test_event_ell_gpu(_s, _p, _values_type, _events_type) + for _transpose in transpose: + taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5,\ + brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup = test_event_ell_gpu(_s, _p, _values_type, _events_type, _transpose) # append to dataframe - df.loc[df.shape[0]] = [_s, _p, 'gpu', _values_type, _events_type, + df.loc[df.shape[0]] = [_s, _p, 'gpu', _values_type, _events_type, transpose, taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5, brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup] df.to_csv('event_csrmv_gpu.csv', index=False)