-
Notifications
You must be signed in to change notification settings - Fork 2
/
saxpy.cu
59 lines (47 loc) · 1.37 KB
/
saxpy.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#include <stdio.h>
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
__global__
void saxpy(int n, float a, float *x, float *y) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n)
y[i] = a * x[i] + y[i];
}
int main(void) {
int N = 1 << 3;
float *x, *y, *d_x, *d_y;
cudaError_t err;
x = (float *) malloc(N * sizeof(float));
y = (float *) malloc(N * sizeof(float));
err = cudaMalloc(&d_x, N * sizeof(float));
gpuErrchk(err);
err = cudaMalloc(&d_y, N * sizeof(float));
gpuErrchk(err);
for (int i = 0; i < N; i++) {
x[i] = 1.0f;
y[i] = 2.0f;
}
err = cudaMemcpy(d_x, x, N * sizeof(float), cudaMemcpyHostToDevice);
gpuErrchk(err);
err = cudaMemcpy(d_y, y, N * sizeof(float), cudaMemcpyHostToDevice);
gpuErrchk(err);
saxpy<<<(N + 255) / 256, 256>>>(N, 2.0f, d_x, d_y);
gpuErrchk( cudaPeekAtLastError() );
err = cudaMemcpy(y, d_y, N * sizeof(float), cudaMemcpyDeviceToHost);
gpuErrchk(err);
float maxError = 0.0f;
for (int i = 0; i < N; i++)
maxError = max(maxError, abs(y[i] - 4.0f));
printf("Max error: %f\n", maxError);
cudaFree(d_x);
cudaFree(d_y);
free(x);
free(y);
}