Skip to content

Commit

Permalink
extra device memory allocation for reduce kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
ryichando committed Jan 10, 2025
1 parent 757639e commit 457d845
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 12 deletions.
12 changes: 6 additions & 6 deletions examples/domino.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
"from frontend import App\n",
"\n",
"app = App.create(\"domino\")\n",
"V, F, T = app.mesh.box().subdivide(2).tetrahedralize().scale(0.1, 0.3, 0.025)\n",
"V, F, T = app.mesh.box().subdivide(1).tetrahedralize().scale(0.1, 0.3, 0.025)\n",
"app.asset.add.tet(\"block\", V, F, T)\n",
"scene = app.scene.create(\"domino\")\n",
"\n",
"R, minR, d, N, C = 1.25, 0.6, 0.15, 4096, 3\n",
"R, minR, d, N, C = 1.0, 0.5, 0.15, 4096, 2\n",
"xz = None\n",
"for i in reversed(range(N)):\n",
" t = 2.0 * C * np.pi * i / N - np.pi / 2\n",
Expand Down Expand Up @@ -44,13 +44,13 @@
"source": [
"param = app.session.param()\n",
"(\n",
" param.set(\"volume-young-mod\", 6000)\n",
" .set(\"volume-poiss-rat\", 0.49)\n",
" param.set(\"volume-young-mod\", 1000)\n",
" .set(\"volume-poiss-rat\", 0.499)\n",
" .set(\"friction\", 0.1)\n",
" .set(\"min-newton-steps\", 32)\n",
" .set(\"dt\", 0.01)\n",
" .set(\"fps\", 15)\n",
" .set(\"frames\", 300)\n",
" .set(\"fps\", 30)\n",
" .set(\"frames\", 200)\n",
")\n",
"\n",
"session = app.session.create(fixed)\n",
Expand Down
13 changes: 7 additions & 6 deletions src/cpp/utility/utility.cu
Original file line number Diff line number Diff line change
Expand Up @@ -219,19 +219,20 @@ __global__ void reduce_op_kernel(const T *input, Y *output, Op func, Y init_val,
template <class T, class Y, typename Op>
Y reduce(const T *d_input, Op func, Y init_val, unsigned n) {
unsigned grid_size = (n + BLOCK_SIZE - 1) / BLOCK_SIZE;
const unsigned scale_factor = 2;
static Y *d_output = nullptr;
static Y *h_results = nullptr;
static unsigned max_grid_size = 0;
if (d_output == nullptr) {
cudaMalloc(&d_output, grid_size * sizeof(Y));
h_results = new Y[grid_size];
max_grid_size = grid_size;
max_grid_size = scale_factor * grid_size;
cudaMalloc(&d_output, max_grid_size * sizeof(Y));
h_results = new Y[max_grid_size];
} else if (grid_size > max_grid_size) {
max_grid_size = grid_size;
max_grid_size = scale_factor * grid_size;
cudaFree(d_output);
delete[] h_results;
cudaMalloc(&d_output, grid_size * sizeof(Y));
h_results = new Y[grid_size];
cudaMalloc(&d_output, max_grid_size * sizeof(Y));
h_results = new Y[max_grid_size];
}
size_t shared_mem_size = sizeof(Y) * BLOCK_SIZE;
reduce_op_kernel<T, Y><<<grid_size, BLOCK_SIZE, shared_mem_size>>>(
Expand Down

0 comments on commit 457d845

Please sign in to comment.