-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkernels.cu
137 lines (114 loc) · 4.57 KB
/
kernels.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#include <stdio.h>
#include <math.h>
#include <curand.h>
#include <curand_kernel.h>
#include <cuda.h>
#include "kernels.cuh"
#include "brute_force.h"
// Function to initialize the particles in a square with uniform distribution
__global__ void initialize_particles_uni(double *x_pos, double *y_pos, double *x_vel, double *y_vel, double *x_acc, double *y_acc, double *mass)
{
int i = threadIdx.x + blockIdx.x*blockDim.x;
int stride = gridDim.x * blockDim.x;
int offset = 0;
// to initialize the cuda rand
curandState state;
curand_init(clock64(), i, 0, &state);
while (i + offset < N_PARTICLES)
{
mass[i + offset] = (double)PARTICLE_MASS;
x_pos[i + offset] = curand_uniform(&state)*GRID_MAX*2 + GRID_MIN;
y_pos[i + offset] = curand_uniform(&state)*GRID_MAX*2 + GRID_MIN;
// set velocity to 0
x_vel[i + offset] = 0.0;
y_vel[i + offset] = 0.0;
// set acceleration to 0
x_acc[i + offset] = 0.0;
y_acc[i + offset] = 0.0;
offset += stride;
}
}
// Function to initialize the particles in a holed circle
__global__ void initialize_particles_circle(double *x_pos, double *y_pos, double *x_vel, double *y_vel, double *x_acc, double *y_acc, double *mass)
{
int i = threadIdx.x + blockIdx.x*blockDim.x;
int stride = gridDim.x * blockDim.x;
int offset = 0;
// to initialize the cuda rand
curandState state;
curand_init(clock64(), i, 0, &state);
while (i + offset < N_PARTICLES)
{
mass[i + offset] = (double)PARTICLE_MASS;
double r = R_OFFSET + curand_uniform(&state)*R_MAX;
double alpha = curand_uniform(&state)*2*M_PI;
x_pos[i + offset] = r*cos(alpha);
y_pos[i + offset] = r*sin(alpha);
// set velocity to 0
x_vel[i + offset] = 0.0;
y_vel[i + offset] = 0.0;
// set acceleration to 0
x_acc[i + offset] = 0.0;
y_acc[i + offset] = 0.0;
offset += stride;
}
}
// Kernel computing the force and displacement per particle
__global__ void compute_forces(double *x_pos, double *y_pos, double *x_vel, double *y_vel, double *x_acc, double *y_acc, double *mass)
{
int i = threadIdx.x + blockIdx.x*blockDim.x;
int stride = gridDim.x * blockDim.x;
int offset = 0;
double fx=0;
double fy=0;
// loop on given particles if not enough threads
while (i + offset < N_PARTICLES)
{
fx = 0;
fy = 0;
// if outside the defined box, stop taking this particle into consideration
if (x_pos[i + offset] > SIMU_BOUND_X || x_pos[i + offset] < -SIMU_BOUND_X || y_pos[i + offset] > SIMU_BOUND_Y || y_pos[i + offset] < -SIMU_BOUND_Y)
{
x_acc[i + offset] = 0;
y_acc[i + offset] = 0;
}
else
{
// when looping on itself, will just add a 0 composant to the force.
for (int j=0; j<N_PARTICLES; j++)
{
// if target in the defined grid
if (!(x_pos[j] > SIMU_BOUND_X || x_pos[j] < -SIMU_BOUND_X || y_pos[j] > SIMU_BOUND_Y || y_pos[j] < -SIMU_BOUND_Y))
{
double r = sqrt((x_pos[j]-x_pos[i+offset])*(x_pos[j]-x_pos[i+offset]) + (y_pos[j]-y_pos[i+offset])*(y_pos[j]-y_pos[i+offset]));
// if distance between the points smaller than epsilon, fix d = epsilon for computation.
if (r > EPSILON)
{
fx += (G*mass[i+offset]*mass[j]*(x_pos[j]-x_pos[i+offset]))/r;
fy += (G*mass[i+offset]*mass[j]*(y_pos[j]-y_pos[i+offset]))/r;
}
else
{
fx += (G*mass[i+offset]*mass[j]*(x_pos[j]-x_pos[i+offset]))/EPSILON;
fy += (G*mass[i+offset]*mass[j]*(y_pos[j]-y_pos[i+offset]))/EPSILON;
}
}
}
// F = ma -> a = F/m
x_acc[i+offset] = fx / mass[i+offset];
y_acc[i+offset] = fy / mass[i+offset];
}
offset += stride;
}
__syncthreads();
// have to define this second loop otherwise would move particles before having computed the force for all of them
offset = 0;
while (i + offset < N_PARTICLES)
{
x_pos[i+offset] += 0.5*x_acc[i+offset]*TIMESTEP*TIMESTEP + x_vel[i+offset]*TIMESTEP;
y_pos[i+offset] += 0.5*y_acc[i+offset]*TIMESTEP*TIMESTEP + y_vel[i+offset]*TIMESTEP;
x_vel[i+offset] += x_acc[i+offset]*TIMESTEP;
y_vel[i+offset] += y_acc[i+offset]*TIMESTEP;
offset+=stride;
}
}