-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathgpu.cu
86 lines (71 loc) · 2.4 KB
/
gpu.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#include "common.h"
#include <cuda.h>
#define NUM_THREADS 256
// Put any static global variables here that you will use throughout the simulation.
int blks;
__device__ void apply_force_gpu(particle_t& particle, particle_t& neighbor) {
double dx = neighbor.x - particle.x;
double dy = neighbor.y - particle.y;
double r2 = dx * dx + dy * dy;
if (r2 > cutoff * cutoff)
return;
// r2 = fmax( r2, min_r*min_r );
r2 = (r2 > min_r * min_r) ? r2 : min_r * min_r;
double r = sqrt(r2);
//
// very simple short-range repulsive force
//
double coef = (1 - cutoff / r) / r2 / mass;
particle.ax += coef * dx;
particle.ay += coef * dy;
}
__global__ void compute_forces_gpu(particle_t* particles, int num_parts) {
// Get thread (particle) ID
int tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid >= num_parts)
return;
particles[tid].ax = particles[tid].ay = 0;
for (int j = 0; j < num_parts; j++)
apply_force_gpu(particles[tid], particles[j]);
}
__global__ void move_gpu(particle_t* particles, int num_parts, double size) {
// Get thread (particle) ID
int tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid >= num_parts)
return;
particle_t* p = &particles[tid];
//
// slightly simplified Velocity Verlet integration
// conserves energy better than explicit Euler method
//
p->vx += p->ax * dt;
p->vy += p->ay * dt;
p->x += p->vx * dt;
p->y += p->vy * dt;
//
// bounce from walls
//
while (p->x < 0 || p->x > size) {
p->x = p->x < 0 ? -(p->x) : 2 * size - p->x;
p->vx = -(p->vx);
}
while (p->y < 0 || p->y > size) {
p->y = p->y < 0 ? -(p->y) : 2 * size - p->y;
p->vy = -(p->vy);
}
}
void init_simulation(particle_t* parts, int num_parts, double size) {
// You can use this space to initialize data objects that you may need
// This function will be called once before the algorithm begins
// parts live in GPU memory
// Do not do any particle simulation here
blks = (num_parts + NUM_THREADS - 1) / NUM_THREADS;
}
void simulate_one_step(particle_t* parts, int num_parts, double size) {
// parts live in GPU memory
// Rewrite this function
// Compute forces
compute_forces_gpu<<<blks, NUM_THREADS>>>(parts, num_parts);
// Move particles
move_gpu<<<blks, NUM_THREADS>>>(parts, num_parts, size);
}