diff --git a/brainpy/_src/math/event/tests/event_csrmv_taichi_VS_event_csrmv.py b/brainpy/_src/math/event/tests/event_csrmv_taichi_VS_event_csrmv.py index 39a813264..47c60b2d6 100644 --- a/brainpy/_src/math/event/tests/event_csrmv_taichi_VS_event_csrmv.py +++ b/brainpy/_src/math/event/tests/event_csrmv_taichi_VS_event_csrmv.py @@ -278,10 +278,10 @@ def test_event_ell_gpu(s, p, values_type, events_type, transpose): for _transpose in transpose: taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5,\ brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup = test_event_ell_cpu(_s, _p, _values_type, _events_type, _transpose) - # append to dataframe - df.loc[df.shape[0]] = [_s, _p, 'cpu', _values_type, _events_type, _transpose, - taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5, - brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup] + # append to dataframe + df.loc[df.shape[0]] = [_s, _p, 'cpu', _values_type, _events_type, _transpose, + taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5, + brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup] df.to_csv(f'{PATH}/event_csrmv_cpu.csv', index=False) if (bm.get_platform() == 'gpu'): @@ -292,10 +292,10 @@ def test_event_ell_gpu(s, p, values_type, events_type, transpose): for _transpose in transpose: taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5,\ brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup = test_event_ell_gpu(_s, _p, _values_type, _events_type, _transpose) - # append to dataframe - df.loc[df.shape[0]] = [_s, _p, 'gpu', _values_type, _events_type, transpose, - taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5, - brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup] + # append to dataframe + df.loc[df.shape[0]] = [_s, _p, 'gpu', _values_type, _events_type, _transpose, + taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5, + brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup] df.to_csv(f'{PATH}/event_csrmv_gpu.csv', index=False) # if (bm.get_platform() == 'gpu'): diff --git a/brainpy/_src/math/sparse/_csr_mv_taichi.py b/brainpy/_src/math/sparse/_csr_mv_taichi.py index 6f739e7bd..d3a6e10b6 100644 --- a/brainpy/_src/math/sparse/_csr_mv_taichi.py +++ b/brainpy/_src/math/sparse/_csr_mv_taichi.py @@ -74,9 +74,18 @@ def _sparse_csr_matvec_transpose_homo_gpu(values: ti.types.ndarray(ndim=1), vector: ti.types.ndarray(ndim=1), out: ti.types.ndarray(ndim=1)): value = values[0] - for row_i in range(row_ptr.shape[0] - 1): - for j in range(row_ptr[row_i], row_ptr[row_i + 1]): + total_rows = row_ptr.shape[0] - 1 + for i in range(total_rows * 32): + row_i = i >> 5 + index = i & 31 + j = row_ptr[row_i] + index + end_index = row_ptr[row_i + 1] + while j < end_index: out[col_indices[j]] += value * vector[row_i] + j += 32 + # for row_i in range(row_ptr.shape[0] - 1): + # for j in range(row_ptr[row_i], row_ptr[row_i + 1]): + # out[col_indices[j]] += value * vector[row_i] @ti.kernel @@ -86,11 +95,21 @@ def _sparse_csr_matvec_homo_gpu(values: ti.types.ndarray(ndim=1), vector: ti.types.ndarray(ndim=1), out: ti.types.ndarray(ndim=1)): value = values[0] - for row_i in range(row_ptr.shape[0] - 1): + total_rows = row_ptr.shape[0] - 1 + for i in range(total_rows * 32): + row_i = i >> 5 + index = i & 31 r = 0. - for j in range(row_ptr[row_i], row_ptr[row_i + 1]): + j = row_ptr[row_i] + index + end_index = row_ptr[row_i + 1] + while j < end_index: r += value * vector[col_indices[j]] - out[row_i] = r + j += 32 + # for row_i in range(row_ptr.shape[0] - 1): + # r = 0. + # for j in range(row_ptr[row_i], row_ptr[row_i + 1]): + # r += value * vector[col_indices[j]] + # out[row_i] = r # heter @@ -100,9 +119,18 @@ def _sparse_csr_matvec_transpose_heter_gpu(values: ti.types.ndarray(ndim=1), row_ptr: ti.types.ndarray(ndim=1), vector: ti.types.ndarray(ndim=1), out: ti.types.ndarray(ndim=1)): - for row_i in range(row_ptr.shape[0] - 1): - for j in range(row_ptr[row_i], row_ptr[row_i + 1]): + total_rows = row_ptr.shape[0] - 1 + for i in range(total_rows * 32): + row_i = i >> 5 + index = i & 31 + j = row_ptr[row_i] + index + end_index = row_ptr[row_i + 1] + while j < end_index: out[col_indices[j]] += values[j] * vector[row_i] + j += 32 + # for row_i in range(row_ptr.shape[0] - 1): + # for j in range(row_ptr[row_i], row_ptr[row_i + 1]): + # out[col_indices[j]] += values[j] * vector[row_i] @ti.kernel @@ -111,11 +139,21 @@ def _sparse_csr_matvec_heter_gpu(values: ti.types.ndarray(ndim=1), row_ptr: ti.types.ndarray(ndim=1), vector: ti.types.ndarray(ndim=1), out: ti.types.ndarray(ndim=1)): - for row_i in range(row_ptr.shape[0] - 1): + total_rows = row_ptr.shape[0] - 1 + for i in range(total_rows * 32): + row_i = i >> 5 + index = i & 31 r = 0. - for j in range(row_ptr[row_i], row_ptr[row_i + 1]): + j = row_ptr[row_i] + index + end_index = row_ptr[row_i + 1] + while j < end_index: r += values[j] * vector[col_indices[j]] - out[row_i] = r + j += 32 + # for row_i in range(row_ptr.shape[0] - 1): + # r = 0. + # for j in range(row_ptr[row_i], row_ptr[row_i + 1]): + # r += values[j] * vector[col_indices[j]] + # out[row_i] = r def _sparse_csr_matvec_jvp_values(val_dot, values, col_indices, row_ptr, vector, *, outs, transpose, shape): diff --git a/brainpy/_src/math/sparse/tests/csrmv_taichi_VS_csrmv.py b/brainpy/_src/math/sparse/tests/csrmv_taichi_VS_csrmv.py index 9583743fb..606715e91 100644 --- a/brainpy/_src/math/sparse/tests/csrmv_taichi_VS_csrmv.py +++ b/brainpy/_src/math/sparse/tests/csrmv_taichi_VS_csrmv.py @@ -14,7 +14,7 @@ bm.set_platform('gpu') -s = [1000, 2500, 5000, 10000, 25000, 50000] +s = [1000, 5000, 10000, 15000, 20000, 25000, 30000] p = [0.1, 0.2, 0.3, 0.4, 0.5] values_type = ['homo', 'heter'] events_type = ['float'] @@ -266,11 +266,11 @@ def test_event_ell_gpu(s, p, values_type, events_type, transpose): for _transpose in transpose: taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5,\ brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup = test_event_ell_cpu(_s, _p, _values_type, _events_type, _transpose) - # append to dataframe - df.loc[df.shape[0]] = [_s, _p, 'cpu', _values_type, _events_type, _transpose, - taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5, - brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup] - df.to_csv(f'{PATH}/event_csrmv_cpu.csv', index=False) + # append to dataframe + df.loc[df.shape[0]] = [_s, _p, 'cpu', _values_type, _events_type, _transpose, + taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5, + brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup] + df.to_csv(f'{PATH}/csrmv_cpu.csv', index=False) if (bm.get_platform() == 'gpu'): for _s in s: @@ -280,11 +280,11 @@ def test_event_ell_gpu(s, p, values_type, events_type, transpose): for _transpose in transpose: taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5,\ brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup = test_event_ell_gpu(_s, _p, _values_type, _events_type, _transpose) - # append to dataframe - df.loc[df.shape[0]] = [_s, _p, 'gpu', _values_type, _events_type, transpose, - taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5, - brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup] - df.to_csv(f'{PATH}/event_csrmv_gpu.csv', index=False) + # append to dataframe + df.loc[df.shape[0]] = [_s, _p, 'gpu', _values_type, _events_type, _transpose, + taichi_aot_time_1, taichi_aot_time_2, taichi_aot_time_3, taichi_aot_time_4, taichi_aot_time_5, + brainpy_time_1, brainpy_time_2, brainpy_time_3, brainpy_time_4, brainpy_time_5, speedup] + df.to_csv(f'{PATH}/csrmv_gpu.csv', index=False) # if (bm.get_platform() == 'gpu'): # for _s in s: