-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathhip_nw_types.h
3676 lines (3227 loc) · 132 KB
/
hip_nw_types.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/** NightWatch MODIFIED header for HIP (version 1.9.0)
* Lines marked with "NWR:" were removed by NightWatch.
* All lines are from the original headers: hip_cpp_bridge.h, hip/hip_runtime.h, hip/hcc_detail/hip_runtime_api.h, hsa_limited.h, stdint.h
*/
#ifndef _HIP_CPP_BRIDGE_H_
#define _HIP_CPP_BRIDGE_H_ 1
#include <stddef.h>
#include <hsa/hsa.h>
#include <hip/hip_runtime.h>
#include <amd_hsa_kernel_code.h>
#ifdef __cplusplus
extern "C" {
#endif
struct nw_kern_info {
uint64_t workgroup_group_segment_byte_size;
uint64_t workitem_private_segment_byte_size;
uint64_t _object;
};
struct hipFuncAttributes;
typedef struct hipFuncAttributes hipFuncAttributes;
hipError_t
__do_c_hipGetDeviceProperties(char* prop, int deviceId);
hsa_status_t HSA_API __do_c_hsa_executable_symbol_get_info(
hsa_executable_symbol_t executable_symbol,
hsa_executable_symbol_info_t attribute, char *value, size_t max_value);
hsa_status_t HSA_API __do_c_hsa_agent_get_info(
hsa_agent_t agent,
hsa_agent_info_t attribute,
void* value,
size_t max_value);
hsa_status_t HSA_API __do_c_query_host_address(
uint64_t kernel_object_,
char *kernel_header_);
int __do_c_load_executable(
const char *file_buf,
size_t file_len,
hsa_executable_t * executable,
hsa_agent_t * agent);
size_t __do_c_get_agents(
hsa_agent_t *agents,
size_t agents_len);
size_t __do_c_get_isas(
hsa_agent_t agent,
hsa_isa_t *isas,
size_t isas_len);
size_t __do_c_get_kerenel_symbols(
const hsa_executable_t *exec,
const hsa_agent_t *agent,
hsa_executable_symbol_t *symbols,
size_t symbols_len);
hipError_t
__do_c_mass_symbol_info(size_t n, const hsa_executable_symbol_t *symbols,
hsa_symbol_kind_t *types, hipFunction_t *descriptors,
uint8_t *agents, /* uint8 to ensure ava works properly */
unsigned *offsets, char *pool,
size_t pool_size);
hipError_t
__do_c_hipHccModuleLaunchMultiKernel(
int numKernels, hsa_kernel_dispatch_packet_t *aql,
hipStream_t stream,
char* all_extra, size_t total_extra_size, size_t* extra_size,
hipEvent_t *start, hipEvent_t *stop);
hipError_t
__do_c_hipHccModuleLaunchMultiKernel_and_memcpy(
int numKernels, hsa_kernel_dispatch_packet_t *aql,
hipStream_t stream,
char* all_extra, size_t total_extra_size, size_t* extra_size,
hipEvent_t *start, hipEvent_t *stop,
void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind);
hipError_t
nw_hipMemcpySync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
hipStream_t stream);
hipError_t
nw_hipMemcpy(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind);
hipError_t
nw_hipMemcpyPeerAsync(void* dst, int dstDeviceId, const void* src, int srcDevice,
size_t sizeBytes, hipStream_t stream);
hipError_t
nw_hipCtxGetDevice(hipDevice_t* device);
hipError_t
nw_hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int deviceId);
hipError_t
nw_hipStreamSynchronize(hipStream_t stream);
hipError_t
nw_hipCtxSetCurrent(hipCtx_t ctx);
hipError_t
nw_hipGetDevice(int* deviceId);
hipError_t
nw_hipSetDevice(int deviceId);
hipError_t
nw_hipStreamCreate(hipStream_t* stream, hsa_agent_t *agent);
hipError_t
nw_hipStreamDestroy(hipStream_t stream);
hipError_t
__do_c_hipHccModuleLaunchKernel(hsa_kernel_dispatch_packet_t *aql,
hipStream_t stream, void** kernelParams, char* extra,
size_t extra_size, hipEvent_t start, hipEvent_t stop);
hipError_t
__do_c_get_kernel_descriptor(const hsa_executable_symbol_t *symbol, const char *name, hipFunction_t *f);
hipError_t
nw_lookup_kern_info(hipFunction_t f, struct nw_kern_info *info);
#ifdef __cplusplus
}
#endif
#endif
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
//! HIP = Heterogeneous-compute Interface for Portability
//!
//! Define a extremely thin runtime layer that allows source code to be compiled unmodified
//! through either AMD HCC or NVCC. Key features tend to be in the spirit
//! and terminology of CUDA, but with a portable path to other accelerators as well:
//
//! Both paths support rich C++ features including classes, templates, lambdas, etc.
//! Runtime API is C
//! Memory management is based on pure pointers and resembles malloc/free/copy.
//
//! hip_runtime.h : includes everything in hip_api.h, plus math builtins and kernel launch
//! macros. hip_runtime_api.h : Defines HIP API. This is a C header file and does not use any C++
//! features.
#ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_H
#define HIP_INCLUDE_HIP_HIP_RUNTIME_H
// Some standard header files, these are included by hc.hpp and so want to make them avail on both
// paths to provide a consistent include env and avoid "missing symbol" errors that only appears
// on NVCC path:
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#if __cplusplus > 199711L
#include <thread>
#endif
#include <hip/hip_common.h>
#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__)
#include <hip/hcc_detail/hip_runtime.h>
#elif defined(__HIP_PLATFORM_NVCC__) && !defined(__HIP_PLATFORM_HCC__)
#include <hip/nvcc_detail/hip_runtime.h>
#else
#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__");
#endif
#include <hip/hip_runtime_api.h>
#include <hip/hip_vector_types.h>
#endif
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
//#pragma once
#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_API_H
#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_API_H
/**
* @file hcc_detail/hip_runtime_api.h
* @brief Contains C function APIs for HIP runtime. This file does not use any HCC builtin or
* special language extensions (-hc mode) ; those functions in hip_runtime.h.
*/
#include <stdint.h>
#include <stddef.h>
#ifndef GENERIC_GRID_LAUNCH
#define GENERIC_GRID_LAUNCH 1
#endif
#include <hip/hcc_detail/host_defines.h>
#include <hip/hip_runtime_api.h>
#include <hip/hcc_detail/driver_types.h>
#include <hip/hcc_detail/hip_texture_types.h>
#include <hip/hcc_detail/hip_surface_types.h>
#define DEPRECATED(msg) __attribute__ ((deprecated(msg)))
#define DEPRECATED_MSG "This API is marked as deprecated and may not be supported in future releases.For more details please refer https://github.com/ROCm-Developer-Tools/HIP/tree/master/docs/markdown/hip_deprecated_api_list"
#if defined(__HCC__) && (__hcc_workweek__ < 16155)
#error("This version of HIP requires a newer version of HCC.");
#endif
#define HIP_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01)
#define HIP_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02)
#define HIP_LAUNCH_PARAM_END ((void*)0x03)
#ifdef __cplusplus
#define __dparm(x) \
= x
#else
#define __dparm(x)
#endif
// Structure definitions:
#ifdef __cplusplus
extern "C" {
#endif
//---
// API-visible structures
typedef struct ihipCtx_t* hipCtx_t;
// Note many APIs also use integer deviceIds as an alternative to the device pointer:
typedef int hipDevice_t;
typedef struct ihipStream_t* hipStream_t;
// TODO: IPC implementation
#define hipIpcMemLazyEnablePeerAccess 0
#define HIP_IPC_HANDLE_SIZE 64
typedef struct hipIpcMemHandle_st {
char reserved[HIP_IPC_HANDLE_SIZE];
} hipIpcMemHandle_t;
// TODO: IPC event handle currently unsupported
struct ihipIpcEventHandle_t;
typedef struct ihipIpcEventHandle_t* hipIpcEventHandle_t;
// END TODO
typedef struct ihipModule_t* hipModule_t;
typedef struct ihipModuleSymbol_t* hipFunction_t;
struct hipFuncAttributes {
int binaryVersion;
int cacheModeCA;
size_t constSizeBytes;
size_t localSizeBytes;
int maxDynamicSharedSizeBytes;
int maxThreadsPerBlock;
int numRegs;
int preferredShmemCarveout;
int ptxVersion;
size_t sharedSizeBytes;
};
typedef struct ihipEvent_t* hipEvent_t;
enum hipLimit_t {
hipLimitMallocHeapSize = 0x02,
};
/**
* @addtogroup GlobalDefs More
* @{
*/
//! Flags that can be used with hipStreamCreateWithFlags
#define hipStreamDefault \
0x00 ///< Default stream creation flags. These are used with hipStreamCreate().
#define hipStreamNonBlocking 0x01 ///< Stream does not implicitly synchronize with null stream
//! Flags that can be used with hipEventCreateWithFlags:
#define hipEventDefault 0x0 ///< Default flags
#define hipEventBlockingSync \
0x1 ///< Waiting will yield CPU. Power-friendly and usage-friendly but may increase latency.
#define hipEventDisableTiming \
0x2 ///< Disable event's capability to record timing information. May improve performance.
#define hipEventInterprocess 0x4 ///< Event can support IPC. @warning - not supported in HIP.
#define hipEventReleaseToDevice \
0x40000000 /// < Use a device-scope release when recording this event. This flag is useful to
/// obtain more precise timings of commands between events. The flag is a no-op on
/// CUDA platforms.
#define hipEventReleaseToSystem \
0x80000000 /// < Use a system-scope release that when recording this event. This flag is
/// useful to make non-coherent host memory visible to the host. The flag is a
/// no-op on CUDA platforms.
//! Flags that can be used with hipHostMalloc
#define hipHostMallocDefault 0x0
#define hipHostMallocPortable 0x1 ///< Memory is considered allocated by all contexts.
#define hipHostMallocMapped \
0x2 ///< Map the allocation into the address space for the current device. The device pointer
///< can be obtained with #hipHostGetDevicePointer.
#define hipHostMallocWriteCombined 0x4
#define hipHostMallocCoherent \
0x40000000 ///< Allocate coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific
///< allocation.
#define hipHostMallocNonCoherent \
0x80000000 ///< Allocate non-coherent memory. Overrides HIP_COHERENT_HOST_ALLOC for specific
///< allocation.
//! Flags that can be used with hipHostRegister
#define hipHostRegisterDefault 0x0 ///< Memory is Mapped and Portable
#define hipHostRegisterPortable 0x1 ///< Memory is considered registered by all contexts.
#define hipHostRegisterMapped \
0x2 ///< Map the allocation into the address space for the current device. The device pointer
///< can be obtained with #hipHostGetDevicePointer.
#define hipHostRegisterIoMemory 0x4 ///< Not supported.
#define hipDeviceScheduleAuto 0x0 ///< Automatically select between Spin and Yield
#define hipDeviceScheduleSpin \
0x1 ///< Dedicate a CPU core to spin-wait. Provides lowest latency, but burns a CPU core and
///< may consume more power.
#define hipDeviceScheduleYield \
0x2 ///< Yield the CPU to the operating system when waiting. May increase latency, but lowers
///< power and is friendlier to other threads in the system.
#define hipDeviceScheduleBlockingSync 0x4
#define hipDeviceScheduleMask 0x7
#define hipDeviceMapHost 0x8
#define hipDeviceLmemResizeToMax 0x16
#define hipArrayDefault 0x00 ///< Default HIP array allocation flag
#define hipArrayLayered 0x01
#define hipArraySurfaceLoadStore 0x02
#define hipArrayCubemap 0x04
#define hipArrayTextureGather 0x08
/*
* @brief hipJitOption
* @enum
* @ingroup Enumerations
*/
typedef enum hipJitOption {
hipJitOptionMaxRegisters = 0,
hipJitOptionThreadsPerBlock,
hipJitOptionWallTime,
hipJitOptionInfoLogBuffer,
hipJitOptionInfoLogBufferSizeBytes,
hipJitOptionErrorLogBuffer,
hipJitOptionErrorLogBufferSizeBytes,
hipJitOptionOptimizationLevel,
hipJitOptionTargetFromContext,
hipJitOptionTarget,
hipJitOptionFallbackStrategy,
hipJitOptionGenerateDebugInfo,
hipJitOptionLogVerbose,
hipJitOptionGenerateLineInfo,
hipJitOptionCacheMode,
hipJitOptionSm3xOpt,
hipJitOptionFastCompile,
hipJitOptionNumOptions
} hipJitOption;
/**
* @warning On AMD devices and some Nvidia devices, these hints and controls are ignored.
*/
typedef enum hipFuncCache_t {
hipFuncCachePreferNone, ///< no preference for shared memory or L1 (default)
hipFuncCachePreferShared, ///< prefer larger shared memory and smaller L1 cache
hipFuncCachePreferL1, ///< prefer larger L1 cache and smaller shared memory
hipFuncCachePreferEqual, ///< prefer equal size L1 cache and shared memory
} hipFuncCache_t;
/**
* @warning On AMD devices and some Nvidia devices, these hints and controls are ignored.
*/
typedef enum hipSharedMemConfig {
hipSharedMemBankSizeDefault, ///< The compiler selects a device-specific value for the banking.
hipSharedMemBankSizeFourByte, ///< Shared mem is banked at 4-bytes intervals and performs best
///< when adjacent threads access data 4 bytes apart.
hipSharedMemBankSizeEightByte ///< Shared mem is banked at 8-byte intervals and performs best
///< when adjacent threads access data 4 bytes apart.
} hipSharedMemConfig;
/**
* Struct for data in 3D
*
*/
typedef struct dim3 {
uint32_t x; ///< x
uint32_t y; ///< y
uint32_t z; ///< z
#ifdef __cplusplus
dim3(uint32_t _x = 1, uint32_t _y = 1, uint32_t _z = 1) : x(_x), y(_y), z(_z){};
#endif
} dim3;
// Doxygen end group GlobalDefs
/** @} */
//-------------------------------------------------------------------------------------------------
// The handle allows the async commands to use the stream even if the parent hipStream_t goes
// out-of-scope.
// typedef class ihipStream_t * hipStream_t;
/*
* Opaque structure allows the true event (pointed at by the handle) to remain "live" even if the
* surrounding hipEvent_t goes out-of-scope. This is handy for cases where the hipEvent_t goes
* out-of-scope but the true event is being written by some async queue or device */
// typedef struct hipEvent_t {
// struct ihipEvent_t *_handle;
//} hipEvent_t;
/**
* @defgroup API HIP API
* @{
*
* Defines the HIP API. See the individual sections for more information.
*/
/**
*-------------------------------------------------------------------------------------------------
*-------------------------------------------------------------------------------------------------
* @defgroup Device Device Management
* @{
*/
/**
* @brief Waits on all active streams on current device
*
* When this command is invoked, the host thread gets blocked until all the commands associated
* with streams associated with the device. HIP does not support multiple blocking modes (yet!).
*
* @returns #hipSuccess
*
* @see hipSetDevice, hipDeviceReset
*/
hipError_t hipDeviceSynchronize(void);
/**
* @brief The state of current device is discarded and updated to a fresh state.
*
* Calling this function deletes all streams created, memory allocated, kernels running, events
* created. Make sure that no other thread is using the device or streams, memory, kernels, events
* associated with the current device.
*
* @returns #hipSuccess
*
* @see hipDeviceSynchronize
*/
hipError_t hipDeviceReset(void);
/**
* @brief Set default device to be used for subsequent hip API calls from this thread.
*
* @param[in] deviceId Valid device in range 0...hipGetDeviceCount().
*
* Sets @p device as the default device for the calling host thread. Valid device id's are 0...
* (hipGetDeviceCount()-1).
*
* Many HIP APIs implicitly use the "default device" :
*
* - Any device memory subsequently allocated from this host thread (using hipMalloc) will be
* allocated on device.
* - Any streams or events created from this host thread will be associated with device.
* - Any kernels launched from this host thread (using hipLaunchKernel) will be executed on device
* (unless a specific stream is specified, in which case the device associated with that stream will
* be used).
*
* This function may be called from any host thread. Multiple host threads may use the same device.
* This function does no synchronization with the previous or new device, and has very little
* runtime overhead. Applications can use hipSetDevice to quickly switch the default device before
* making a HIP runtime call which uses the default device.
*
* The default device is stored in thread-local-storage for each thread.
* Thread-pool implementations may inherit the default device of the previous thread. A good
* practice is to always call hipSetDevice at the start of HIP coding sequency to establish a known
* standard device.
*
* @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorDeviceAlreadyInUse
*
* @see hipGetDevice, hipGetDeviceCount
*/
hipError_t hipSetDevice(int deviceId);
/**
* @brief Return the default device id for the calling host thread.
*
* @param [out] device *device is written with the default device
*
* HIP maintains an default device for each thread using thread-local-storage.
* This device is used implicitly for HIP runtime APIs called by this thread.
* hipGetDevice returns in * @p device the default device for the calling host thread.
*
* @returns #hipSuccess
*
* @see hipSetDevice, hipGetDevicesizeBytes
*/
hipError_t hipGetDevice(int* deviceId);
/**
* @brief Return number of compute-capable devices.
*
* @param [output] count Returns number of compute-capable devices.
*
* @returns #hipSuccess, #hipErrorNoDevice
*
*
* Returns in @p *count the number of devices that have ability to run compute commands. If there
* are no such devices, then @ref hipGetDeviceCount will return #hipErrorNoDevice. If 1 or more
* devices can be found, then hipGetDeviceCount returns #hipSuccess.
*/
hipError_t hipGetDeviceCount(int* count);
/**
* @brief Query for a specific device attribute.
*
* @param [out] pi pointer to value to return
* @param [in] attr attribute to query
* @param [in] deviceId which device to query for information
*
* @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue
*/
hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int deviceId);
/**
* @brief Returns device properties.
*
* @param [out] prop written with device properties
* @param [in] deviceId which device to query for information
*
* @return #hipSuccess, #hipErrorInvalidDevice
* @bug HCC always returns 0 for maxThreadsPerMultiProcessor
* @bug HCC always returns 0 for regsPerBlock
* @bug HCC always returns 0 for l2CacheSize
*
* Populates hipGetDeviceProperties with information for the specified device.
*/
hipError_t hipGetDeviceProperties(hipDeviceProp_t* prop, int deviceId);
/**
* @brief Set L1/Shared cache partition.
*
* @param [in] cacheConfig
*
* @returns #hipSuccess, #hipErrorInitializationError
* Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored
* on those architectures.
*
*/
hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig);
/**
* @brief Set Cache configuration for a specific function
*
* @param [in] cacheConfig
*
* @returns #hipSuccess, #hipErrorInitializationError
* Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored
* on those architectures.
*
*/
hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* cacheConfig);
/**
* @brief Get Resource limits of current device
*
* @param [out] pValue
* @param [in] limit
*
* @returns #hipSuccess, #hipErrorUnsupportedLimit, #hipErrorInvalidValue
* Note: Currently, only hipLimitMallocHeapSize is available
*
*/
hipError_t hipDeviceGetLimit(size_t* pValue, enum hipLimit_t limit);
/**
* @brief Set Cache configuration for a specific function
*
* @param [in] config;
*
* @returns #hipSuccess, #hipErrorInitializationError
* Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored
* on those architectures.
*
*/
hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t config);
/**
* @brief Returns bank width of shared memory for current device
*
* @param [out] pConfig
*
* @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInitializationError
*
* Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is
* ignored on those architectures.
*
*/
hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* pConfig);
/**
* @brief The bank width of shared memory on current device is set
*
* @param [in] config
*
* @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInitializationError
*
* Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is
* ignored on those architectures.
*
*/
hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config);
/**
* @brief The current device behavior is changed according the flags passed.
*
* @param [in] flags
*
* The schedule flags impact how HIP waits for the completion of a command running on a device.
* hipDeviceScheduleSpin : HIP runtime will actively spin in the thread which submitted the
* work until the command completes. This offers the lowest latency, but will consume a CPU core
* and may increase power. hipDeviceScheduleYield : The HIP runtime will yield the CPU to
* system so that other tasks can use it. This may increase latency to detect the completion but
* will consume less power and is friendlier to other tasks in the system.
* hipDeviceScheduleBlockingSync : On ROCm platform, this is a synonym for hipDeviceScheduleYield.
* hipDeviceScheduleAuto : Use a hueristic to select between Spin and Yield modes. If the
* number of HIP contexts is greater than the number of logical processors in the system, use Spin
* scheduling. Else use Yield scheduling.
*
*
* hipDeviceMapHost : Allow mapping host memory. On ROCM, this is always allowed and
* the flag is ignored. hipDeviceLmemResizeToMax : @warning ROCm silently ignores this flag.
*
* @returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorSetOnActiveProcess
*
*
*/
hipError_t hipSetDeviceFlags(unsigned flags);
/**
* @brief Device which matches hipDeviceProp_t is returned
*
* @param [out] device ID
* @param [in] device properties pointer
*
* @returns #hipSuccess, #hipErrorInvalidValue
*/
hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop);
// end doxygen Device
/**
* @}
*/
/**
*-------------------------------------------------------------------------------------------------
*-------------------------------------------------------------------------------------------------
* @defgroup Error Error Handling
* @{
*/
/**
* @brief Return last error returned by any HIP runtime API call and resets the stored error code to
* #hipSuccess
*
* @returns return code from last HIP called from the active host thread
*
* Returns the last error that has been returned by any of the runtime calls in the same host
* thread, and then resets the saved error to #hipSuccess.
*
* @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t
*/
hipError_t hipGetLastError(void);
/**
* @brief Return last error returned by any HIP runtime API call.
*
* @return #hipSuccess
*
* Returns the last error that has been returned by any of the runtime calls in the same host
* thread. Unlike hipGetLastError, this function does not reset the saved error code.
*
* @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t
*/
hipError_t hipPeekAtLastError(void);
/**
* @brief Return name of the specified error code in text form.
*
* @param hip_error Error code to convert to name.
* @return const char pointer to the NULL-terminated error name
*
* @see hipGetErrorString, hipGetLastError, hipPeakAtLastError, hipError_t
*/
const char* hipGetErrorName(hipError_t hip_error);
/**
* @brief Return handy text string message to explain the error which occurred
*
* @param hipError Error code to convert to string.
* @return const char pointer to the NULL-terminated error string
*
* @warning : on HCC, this function returns the name of the error (same as hipGetErrorName)
*
* @see hipGetErrorName, hipGetLastError, hipPeakAtLastError, hipError_t
*/
const char* hipGetErrorString(hipError_t hipError);
// end doxygen Error
/**
* @}
*/
/**
*-------------------------------------------------------------------------------------------------
*-------------------------------------------------------------------------------------------------
* @defgroup Stream Stream Management
* @{
*
* The following Stream APIs are not (yet) supported in HIP:
* - cudaStreamAttachMemAsync
* - cudaStreamCreateWithPriority
* - cudaStreamGetPriority
*/
/**
* @brief Create an asynchronous stream.
*
* @param[in, out] stream Valid pointer to hipStream_t. This function writes the memory with the
* newly created stream.
* @return #hipSuccess, #hipErrorInvalidValue
*
* Create a new asynchronous stream. @p stream returns an opaque handle that can be used to
* reference the newly created stream in subsequent hipStream* commands. The stream is allocated on
* the heap and will remain allocated even if the handle goes out-of-scope. To release the memory
* used by the stream, applicaiton must call hipStreamDestroy.
*
* @return #hipSuccess, #hipErrorInvalidValue
*
* @see hipStreamCreateWithFlags, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy
*/
hipError_t hipStreamCreate(hipStream_t* stream);
/**
* @brief Create an asynchronous stream.
*
* @param[in, out] stream Pointer to new stream
* @param[in ] flags to control stream creation.
* @return #hipSuccess, #hipErrorInvalidValue
*
* Create a new asynchronous stream. @p stream returns an opaque handle that can be used to
* reference the newly created stream in subsequent hipStream* commands. The stream is allocated on
* the heap and will remain allocated even if the handle goes out-of-scope. To release the memory
* used by the stream, applicaiton must call hipStreamDestroy. Flags controls behavior of the
* stream. See #hipStreamDefault, #hipStreamNonBlocking.
*
*
* @see hipStreamCreate, hipStreamSynchronize, hipStreamWaitEvent, hipStreamDestroy
*/
hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags);
/**
* @brief Destroys the specified stream.
*
* @param[in, out] stream Valid pointer to hipStream_t. This function writes the memory with the
* newly created stream.
* @return #hipSuccess #hipErrorInvalidResourceHandle
*
* Destroys the specified stream.
*
* If commands are still executing on the specified stream, some may complete execution before the
* queue is deleted.
*
* The queue may be destroyed while some commands are still inflight, or may wait for all commands
* queued to the stream before destroying it.
*
* @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamQuery, hipStreamWaitEvent,
* hipStreamSynchronize
*/
hipError_t hipStreamDestroy(hipStream_t stream);
/**
* @brief Return #hipSuccess if all of the operations in the specified @p stream have completed, or
* #hipErrorNotReady if not.
*
* @param[in] stream stream to query
*
* @return #hipSuccess, #hipErrorNotReady, #hipErrorInvalidResourceHandle
*
* This is thread-safe and returns a snapshot of the current state of the queue. However, if other
* host threads are sending work to the stream, the status may change immediately after the function
* is called. It is typically used for debug.
*
* @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamWaitEvent, hipStreamSynchronize,
* hipStreamDestroy
*/
hipError_t hipStreamQuery(hipStream_t stream);
/**
* @brief Wait for all commands in stream to complete.
*
* @param[in] stream stream identifier.
*
* @return #hipSuccess, #hipErrorInvalidResourceHandle
*
* This command is host-synchronous : the host will block until the specified stream is empty.
*
* This command follows standard null-stream semantics. Specifically, specifying the null stream
* will cause the command to wait for other streams on the same device to complete all pending
* operations.
*
* This command honors the hipDeviceLaunchBlocking flag, which controls whether the wait is active
* or blocking.
*
* @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamWaitEvent, hipStreamDestroy
*
*/
hipError_t hipStreamSynchronize(hipStream_t stream);
/**
* @brief Make the specified compute stream wait for an event
*
* @param[in] stream stream to make wait.
* @param[in] event event to wait on
* @param[in] flags control operation [must be 0]
*
* @return #hipSuccess, #hipErrorInvalidResourceHandle
*
* This function inserts a wait operation into the specified stream.
* All future work submitted to @p stream will wait until @p event reports completion before
* beginning execution.
*
* This function only waits for commands in the current stream to complete. Notably,, this function
* does not impliciy wait for commands in the default stream to complete, even if the specified
* stream is created with hipStreamNonBlocking = 0.
*
* @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamSynchronize, hipStreamDestroy
*/
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags);
/**
* @brief Return flags associated with this stream.
*
* @param[in] stream stream to be queried
* @param[in,out] flags Pointer to an unsigned integer in which the stream's flags are returned
* @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidResourceHandle
*
* @returns #hipSuccess #hipErrorInvalidValue #hipErrorInvalidResourceHandle
*
* Return flags associated with this stream in *@p flags.
*
* @see hipStreamCreateWithFlags
*/
hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int* flags);
/**
* Stream CallBack struct
*/
typedef void (*hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData);
/**
* @brief Adds a callback to be called on the host after all currently enqueued
* items in the stream have completed. For each
* cudaStreamAddCallback call, a callback will be executed exactly once.
* The callback will block later work in the stream until it is finished.
* @param[in] stream - Stream to add callback to
* @param[in] callback - The function to call once preceding stream operations are complete
* @param[in] userData - User specified data to be passed to the callback function
* @param[in] flags - Reserved for future use, must be 0
* @return #hipSuccess, #hipErrorInvalidResourceHandle, #hipErrorNotSupported
*
* @see hipStreamCreate, hipStreamCreateWithFlags, hipStreamQuery, hipStreamSynchronize,
* hipStreamWaitEvent, hipStreamDestroy
*
*/
hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData,
unsigned int flags);
// end doxygen Stream
/**
* @}
*/
/**
*-------------------------------------------------------------------------------------------------
*-------------------------------------------------------------------------------------------------
* @defgroup Event Event Management
* @{
*/
/**
* @brief Create an event with the specified flags
*
* @param[in,out] event Returns the newly created event.
* @param[in] flags Flags to control event behavior. Valid values are #hipEventDefault,
#hipEventBlockingSync, #hipEventDisableTiming, #hipEventInterprocess
* #hipEventDefault : Default flag. The event will use active synchronization and will support
timing. Blocking synchronization provides lowest possible latency at the expense of dedicating a
CPU to poll on the eevent.
* #hipEventBlockingSync : The event will use blocking synchronization : if hipEventSynchronize is
called on this event, the thread will block until the event completes. This can increase latency
for the synchroniation but can result in lower power and more resources for other CPU threads.
* #hipEventDisableTiming : Disable recording of timing information. On ROCM platform, timing
information is always recorded and this flag has no performance benefit.
* @warning On HCC platform, hipEventInterprocess support is under development. Use of this flag
will return an error.
*
* @returns #hipSuccess, #hipErrorInitializationError, #hipErrorInvalidValue,
#hipErrorLaunchFailure, #hipErrorMemoryAllocation
*
* @see hipEventCreate, hipEventSynchronize, hipEventDestroy, hipEventElapsedTime
*/
hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags);