-
Notifications
You must be signed in to change notification settings - Fork 48
/
Copy pathdbcsr_acc_test.c
196 lines (179 loc) · 7.76 KB
/
dbcsr_acc_test.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
/*------------------------------------------------------------------------------------------------*/
/* Copyright (C) by the DBCSR developers group - All rights reserved */
/* This file is part of the DBCSR library. */
/* */
/* For information on the license, see the LICENSE file. */
/* For further information please visit https://dbcsr.cp2k.org */
/* SPDX-License-Identifier: GPL-2.0+ */
/*------------------------------------------------------------------------------------------------*/
#include "acc/acc.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#if !defined(NDEBUG)
# include <assert.h>
#endif
#if defined(_OPENMP)
# include <omp.h>
#endif
#if !defined(ACC_STRING_MAXLEN)
# define ACC_STRING_MAXLEN 32
#endif
#if !defined(ACC_STREAM_MAXCOUNT)
# define ACC_STREAM_MAXCOUNT 16
#endif
#if !defined(ACC_EVENT_MAXCOUNT)
# define ACC_EVENT_MAXCOUNT (16 * ACC_STREAM_MAXCOUNT)
#endif
#if !defined(ACC_STREAM_MAXNTH_DESTROY)
# define ACC_STREAM_MAXNTH_DESTROY 2
#endif
#if !defined(ACC_EVENT_MAXNTH_DESTROY)
# define ACC_EVENT_MAXNTH_DESTROY 3
#endif
#if defined(NDEBUG)
# define ACC_CHECK(RESULT) \
do { \
const int acc_check_result_ = (RESULT); \
if (EXIT_SUCCESS != acc_check_result_) exit(acc_check_result_); \
} while (0)
# define PRINTF(A, ...)
#else /* debug */
# define ACC_CHECK(RESULT) assert(EXIT_SUCCESS == (RESULT))
# define PRINTF(A, ...) printf(A, __VA_ARGS__)
#endif
/**
* This program tests the ACC interface (include/acc.h) for adhering to expectations.
* The expected behavior is to match the CUDA based implementation, which was available
* first. This test program can serve as a specification for other backends such as the
* OpenMP based backend. It may also be used to stress-test any backend including the
* CUDA based backend for thread-safety. Thread-safety is an implicit requirement
* induced by DBCSR (and CP2K). To test any backend (other than the OpenMP backend),
* the Makefile must be adjusted to link with the desired backend.
*/
int main(int argc, char* argv[]) {
const int device = (1 < argc ? atoi(argv[1]) : 0);
#if defined(_OPENMP)
const int max_nthreads = omp_get_max_threads();
#else
const int max_nthreads = 1;
#endif
const int cli_nthreads = (2 < argc ? atoi(argv[2]) : max_nthreads);
const int nthreads = ((0 < cli_nthreads && cli_nthreads <= max_nthreads) ? cli_nthreads : max_nthreads);
int randnums[ACC_EVENT_MAXCOUNT], ndevices, priomin, priomax, i, nt;
void *event[ACC_EVENT_MAXCOUNT], *s = NULL;
const size_t mem_alloc = (16 /*MB*/ << 20);
size_t mem_free, mem_total, mem_chunk;
void *host_mem = NULL, *dev_mem = NULL;
for (i = 0; i < ACC_EVENT_MAXCOUNT; ++i) {
randnums[i] = rand();
}
/* allow get_ndevices/set_active_device before init */
ACC_CHECK(c_dbcsr_acc_get_ndevices(&ndevices));
if (0 <= device && device < ndevices) { /* not an error */
ACC_CHECK(c_dbcsr_acc_set_active_device(device));
}
ACC_CHECK(c_dbcsr_acc_init());
ACC_CHECK(c_dbcsr_acc_get_ndevices(&ndevices));
PRINTF("ndevices: %i\n", ndevices);
/* continue tests even with no device */
if (0 <= device && device < ndevices) { /* not an error */
ACC_CHECK(c_dbcsr_acc_set_active_device(device));
}
if (0 < ndevices) {
ACC_CHECK(c_dbcsr_acc_dev_mem_info(&mem_free, &mem_total));
ACC_CHECK(mem_free <= mem_total ? EXIT_SUCCESS : EXIT_FAILURE);
PRINTF("device memory: free=%i MB total=%i MB\n", (int)(mem_free >> 20), (int)(mem_total >> 20));
ACC_CHECK(c_dbcsr_acc_stream_priority_range(&priomin, &priomax));
PRINTF("stream priority: lowest=%i highest=%i\n", priomin, priomax);
for (i = 0; i < ACC_EVENT_MAXCOUNT; ++i) {
event[i] = NULL;
}
/* create stream with NULL-name and low priority */
ACC_CHECK(c_dbcsr_acc_stream_create(&s, NULL /*name*/, priomin));
ACC_CHECK(c_dbcsr_acc_stream_destroy(s));
/* create stream with empty name and medium priority */
ACC_CHECK(c_dbcsr_acc_stream_create(&s, "", (priomin + priomax) / 2));
ACC_CHECK(c_dbcsr_acc_stream_destroy(s));
/* destroying NULL-stream shall be valid (just like delete/free) */
ACC_CHECK(c_dbcsr_acc_stream_destroy(NULL));
ACC_CHECK(c_dbcsr_acc_event_destroy(NULL));
#if defined(_OPENMP)
# pragma omp parallel for num_threads(nthreads) private(i)
#endif
for (i = 0; i < ACC_EVENT_MAXCOUNT; ++i) {
const int r = randnums[i] % ACC_EVENT_MAXCOUNT;
ACC_CHECK(c_dbcsr_acc_event_create(event + i));
if (ACC_EVENT_MAXNTH_DESTROY * r < ACC_EVENT_MAXCOUNT) {
void* const ei = event[i];
event[i] = NULL;
ACC_CHECK(c_dbcsr_acc_event_destroy(ei));
}
}
#if defined(_OPENMP)
# pragma omp parallel for num_threads(nthreads) private(i)
#endif
for (i = 0; i < ACC_EVENT_MAXCOUNT; ++i) {
if (NULL == event[i]) {
ACC_CHECK(c_dbcsr_acc_event_create(event + i));
}
ACC_CHECK(c_dbcsr_acc_event_destroy(event[i]));
}
#if defined(_OPENMP)
# pragma omp parallel for num_threads(nthreads) private(i)
#endif
for (i = 0; i < ACC_EVENT_MAXCOUNT; ++i) ACC_CHECK(c_dbcsr_acc_event_create(event + i));
for (i = 0; i < ACC_EVENT_MAXCOUNT; ++i) {
c_dbcsr_acc_bool_t has_occurred = 0;
ACC_CHECK(c_dbcsr_acc_event_query(event[i], &has_occurred));
ACC_CHECK(has_occurred ? EXIT_SUCCESS : EXIT_FAILURE);
}
ACC_CHECK(c_dbcsr_acc_stream_create(&s, "stream", priomax));
if (NULL != s) {
ACC_CHECK(c_dbcsr_acc_host_mem_allocate(&host_mem, mem_alloc, s));
ACC_CHECK(c_dbcsr_acc_stream_sync(s)); /* wait for completion */
memset(host_mem, 0xFF, mem_alloc); /* non-zero pattern */
}
ACC_CHECK(c_dbcsr_acc_dev_mem_allocate(&dev_mem, mem_alloc));
nt = (nthreads < ACC_EVENT_MAXCOUNT ? nthreads : ACC_EVENT_MAXCOUNT);
mem_chunk = (mem_alloc + nt - 1) / nt;
#if defined(_OPENMP)
# pragma omp parallel num_threads(nt)
#endif
{
#if defined(_OPENMP)
const int tid = omp_get_thread_num();
#else
const int tid = 0;
#endif
const size_t offset = tid * mem_chunk, mem_rest = mem_alloc - offset;
const size_t size = (mem_chunk <= mem_rest ? mem_chunk : mem_rest);
c_dbcsr_acc_bool_t has_occurred = 0;
ACC_CHECK(c_dbcsr_acc_memset_zero(dev_mem, offset, size, s));
/* can enqueue multiple/duplicate copies for the same memory region */
ACC_CHECK(c_dbcsr_acc_memcpy_d2h(dev_mem, host_mem, mem_alloc, s));
ACC_CHECK(c_dbcsr_acc_event_query(event[tid], &has_occurred));
/* unrecorded event has no work to wait for, hence it occurred */
ACC_CHECK(has_occurred ? EXIT_SUCCESS : EXIT_FAILURE);
ACC_CHECK(c_dbcsr_acc_event_record(event[tid], s));
ACC_CHECK(c_dbcsr_acc_stream_wait_event(s, event[tid]));
ACC_CHECK(c_dbcsr_acc_event_synchronize(event[tid]));
ACC_CHECK(c_dbcsr_acc_event_query(event[tid], &has_occurred));
ACC_CHECK(has_occurred ? EXIT_SUCCESS : EXIT_FAILURE);
}
/* validate backwards from where the last transfers occurred */
for (i = (int)(mem_alloc - 1); 0 <= i; --i) {
ACC_CHECK(0 == ((char*)host_mem)[i] ? EXIT_SUCCESS : EXIT_FAILURE);
}
#if defined(_OPENMP)
# pragma omp parallel for num_threads(nthreads) private(i)
#endif
for (i = 0; i < ACC_EVENT_MAXCOUNT; ++i) ACC_CHECK(c_dbcsr_acc_event_destroy(event[i]));
}
ACC_CHECK(c_dbcsr_acc_dev_mem_deallocate(dev_mem));
if (NULL != s) ACC_CHECK(c_dbcsr_acc_host_mem_deallocate(host_mem, s));
ACC_CHECK(c_dbcsr_acc_stream_destroy(s));
c_dbcsr_acc_clear_errors(); /* no result code */
ACC_CHECK(c_dbcsr_acc_finalize());
return EXIT_SUCCESS;
}