-
Notifications
You must be signed in to change notification settings - Fork 0
/
sum_array.cu
53 lines (42 loc) · 1.42 KB
/
sum_array.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#include <stdio.h>
// Kernel function to add two arrays
__global__ void addArrays(int *a, int *b, int *c, int size) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid < size) {
c[tid] = a[tid] + b[tid];
}
}
int main() {
int size = 100; // Size of the arrays
int a[size], b[size], c[size]; // Host arrays
int *d_a, *d_b, *d_c; // Device arrays
// Initialize arrays
for (int i = 0; i < size; i++) {
a[i] = i;
b[i] = i * 2;
}
// Allocate memory on GPU
cudaMalloc((void **)&d_a, size * sizeof(int));
cudaMalloc((void **)&d_b, size * sizeof(int));
cudaMalloc((void **)&d_c, size * sizeof(int));
// Copy data from host to device
cudaMemcpy(d_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_b, b, size * sizeof(int), cudaMemcpyHostToDevice);
// Define grid and block dimensions
int blockSize = 256;
int numBlocks = (size + blockSize - 1) / blockSize;
// Launch kernel
addArrays<<<numBlocks, blockSize>>>(d_a, d_b, d_c, size);
// Copy result from device to host
cudaMemcpy(c, d_c, size * sizeof(int), cudaMemcpyDeviceToHost);
// Print result
printf("Result:\n");
for (int i = 0; i < size; i++) {
printf("%d + %d = %d\n", a[i], b[i], c[i]);
}
// Free device memory
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
return 0;
}