diff --git a/clang/examples/DPCT/Runtime/cudaStreamIsCapturing.cu b/clang/examples/DPCT/Runtime/cudaStreamIsCapturing.cu index ba7065231926..3f73b095b6c9 100644 --- a/clang/examples/DPCT/Runtime/cudaStreamIsCapturing.cu +++ b/clang/examples/DPCT/Runtime/cudaStreamIsCapturing.cu @@ -1,3 +1,5 @@ +// Option: --use-experimental-features=graph + void test(cudaStream_t s, enum cudaStreamCaptureStatus *ps) { // Start cudaStreamIsCapturing(s /*cudaStream_t*/, diff --git a/clang/lib/DPCT/APINames.inc b/clang/lib/DPCT/APINames.inc index 3a0c2f1095ba..f92d582cfa72 100644 --- a/clang/lib/DPCT/APINames.inc +++ b/clang/lib/DPCT/APINames.inc @@ -102,7 +102,7 @@ ENTRY(cudaStreamGetCaptureInfo_v3, cudaStreamGetCaptureInfo_v3, false, NO_FLAG, ENTRY(cudaStreamGetFlags, cudaStreamGetFlags, true, NO_FLAG, P4, "DPCT1014") ENTRY(cudaStreamGetId, cudaStreamGetId, false, NO_FLAG, P4, "comment") ENTRY(cudaStreamGetPriority, cudaStreamGetPriority, true, NO_FLAG, P4, "DPCT1014") -ENTRY(cudaStreamIsCapturing, cudaStreamIsCapturing, true, API_CALL_REMOVED, P4, "DPCT1026/DPCT1027") +ENTRY(cudaStreamIsCapturing, cudaStreamIsCapturing, true, NO_FLAG, P4, "Successful/DPCT1119") ENTRY(cudaStreamQuery, cudaStreamQuery, true, API_CALL_REMOVED, P0, "DPCT1026/DPCT1027") ENTRY(cudaStreamSetAttribute, cudaStreamSetAttribute, true, NO_FLAG, P7, "DPCT1026/DPCT1027") ENTRY(cudaStreamSynchronize, cudaStreamSynchronize, true, NO_FLAG, P0, "Successful") @@ -1429,6 +1429,7 @@ ENTRY(make_ushort4, make_ushort4, true, NO_FLAG, P0, "Successful") ENTRY(__threadfence_block, __threadfence_block, true, NO_FLAG, P0, "Successful: DPCT1078") ENTRY(__threadfence, __threadfence, true, NO_FLAG, P0, "Successful: DPCT1078") ENTRY(__threadfence_system, __threadfence_system, true, NO_FLAG, P0, "Successful: DPCT1078") +ENTRY(__barrier_sync, __barrier_sync, true, NO_FLAG, P0, "Successful: DPCT1078") ENTRY(__syncthreads, __syncthreads, true, NO_FLAG, P0, "Successful: DPCT1078") ENTRY(__syncthreads_count, __syncthreads_count, true, NO_FLAG, P0, "Successful: DPCT1078") ENTRY(__syncthreads_and, __syncthreads_and, true, NO_FLAG, P0, "Successful: DPCT1078") diff --git a/clang/lib/DPCT/APINamesCooperativeGroups.inc b/clang/lib/DPCT/APINamesCooperativeGroups.inc index 4f2cc052ff2b..142bfe30989d 100644 --- a/clang/lib/DPCT/APINamesCooperativeGroups.inc +++ b/clang/lib/DPCT/APINamesCooperativeGroups.inc @@ -970,10 +970,12 @@ MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_group.num_threads", MemberExprBase(), false, "get_local_linear_range") MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_group.get_type", MemberExprBase(), false, "get_type") -MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_block.group_index", MemberExprBase(), - false, "get_group_id") -MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_block.thread_index", MemberExprBase(), - false, "get_local_id") +CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_block.group_index", + CALL(MapNames::getDpctNamespace() + "dim3", + MEMBER_CALL(MemberExprBase(), false, "get_group_id"))) +CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_block.thread_index", + CALL(MapNames::getDpctNamespace() + "dim3", + MEMBER_CALL(MemberExprBase(), false, "get_local_id"))) CONDITIONAL_FACTORY_ENTRY( UseNonUniformGroups, diff --git a/clang/lib/DPCT/APINamesMemory.inc b/clang/lib/DPCT/APINamesMemory.inc index f24c1cc0a0bb..8e353282a72e 100644 --- a/clang/lib/DPCT/APINamesMemory.inc +++ b/clang/lib/DPCT/APINamesMemory.inc @@ -417,123 +417,184 @@ ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, - CALL_FACTORY_ENTRY( - "cuMemcpyAtoH_v2", - CALL(MapNames::getDpctNamespace() + "dpct_memcpy", - makeCallArgCreatorWithCall(0), - BO(BinaryOperatorKind::BO_Add, - makeCastExprCreator( - makeCharPtrCreator(), - MEMBER_CALL(MEMBER_CALL(makeCallArgCreatorWithCall(1), true, - "to_pitched_data"), - false, "get_data_ptr"), - true), - makeCallArgCreatorWithCall(2)), - makeCallArgCreatorWithCall(3))))) + CONDITIONAL_FACTORY_ENTRY( + UseExtBindlessImages, + CALL_FACTORY_ENTRY( + "cuMemcpyAtoH_v2", + CALL(MapNames::getDpctNamespace() + "experimental::dpct_memcpy", + makeCallArgCreatorWithCall(0), makeCallArgCreatorWithCall(1), + makeCallArgCreatorWithCall(2), ARG("0"), + makeCallArgCreatorWithCall(3))), + CALL_FACTORY_ENTRY( + "cuMemcpyAtoH_v2", + CALL(MapNames::getDpctNamespace() + "dpct_memcpy", + makeCallArgCreatorWithCall(0), + BO(BinaryOperatorKind::BO_Add, + makeCastExprCreator( + makeCharPtrCreator(), + MEMBER_CALL(MEMBER_CALL(makeCallArgCreatorWithCall(1), + true, "to_pitched_data"), + false, "get_data_ptr"), + true), + makeCallArgCreatorWithCall(2)), + makeCallArgCreatorWithCall(3)))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, - CALL_FACTORY_ENTRY( - "cuMemcpyHtoA_v2", - CALL(MapNames::getDpctNamespace() + "dpct_memcpy", - BO(BinaryOperatorKind::BO_Add, - makeCastExprCreator( - makeCharPtrCreator(), - MEMBER_CALL(MEMBER_CALL(makeCallArgCreatorWithCall(0), true, - "to_pitched_data"), - false, "get_data_ptr"), - true), - makeCallArgCreatorWithCall(1)), - makeCallArgCreatorWithCall(2), makeCallArgCreatorWithCall(3))))) + CONDITIONAL_FACTORY_ENTRY( + UseExtBindlessImages, + CALL_FACTORY_ENTRY( + "cuMemcpyHtoA_v2", + CALL(MapNames::getDpctNamespace() + "experimental::dpct_memcpy", + makeCallArgCreatorWithCall(0), makeCallArgCreatorWithCall(1), + ARG("0"), makeCallArgCreatorWithCall(2), + makeCallArgCreatorWithCall(3))), + CALL_FACTORY_ENTRY( + "cuMemcpyHtoA_v2", + CALL(MapNames::getDpctNamespace() + "dpct_memcpy", + BO(BinaryOperatorKind::BO_Add, + makeCastExprCreator( + makeCharPtrCreator(), + MEMBER_CALL(MEMBER_CALL(makeCallArgCreatorWithCall(0), + true, "to_pitched_data"), + false, "get_data_ptr"), + true), + makeCallArgCreatorWithCall(1)), + makeCallArgCreatorWithCall(2), + makeCallArgCreatorWithCall(3)))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, - CALL_FACTORY_ENTRY( - "cuMemcpyAtoHAsync_v2", - CALL(MapNames::getDpctNamespace() + "async_dpct_memcpy", - makeCallArgCreatorWithCall(0), - BO(BinaryOperatorKind::BO_Add, - makeCastExprCreator( - makeCharPtrCreator(), - MEMBER_CALL(MEMBER_CALL(makeCallArgCreatorWithCall(1), true, - "to_pitched_data"), - false, "get_data_ptr"), - true), - makeCallArgCreatorWithCall(2)), - makeCallArgCreatorWithCall(3), - ARG(MapNames::getDpctNamespace() + "automatic"), - DEREF(makeCallArgCreatorWithCall(4)))))) + CONDITIONAL_FACTORY_ENTRY( + UseExtBindlessImages, + CALL_FACTORY_ENTRY("cuMemcpyAtoHAsync_v2", + CALL(MapNames::getDpctNamespace() + + "experimental::async_dpct_memcpy", + makeCallArgCreatorWithCall(0), + makeCallArgCreatorWithCall(1), + makeCallArgCreatorWithCall(2), ARG("0"), + makeCallArgCreatorWithCall(3), STREAM(4))), + CALL_FACTORY_ENTRY( + "cuMemcpyAtoHAsync_v2", + CALL(MapNames::getDpctNamespace() + "async_dpct_memcpy", + makeCallArgCreatorWithCall(0), + BO(BinaryOperatorKind::BO_Add, + makeCastExprCreator( + makeCharPtrCreator(), + MEMBER_CALL(MEMBER_CALL(makeCallArgCreatorWithCall(1), + true, "to_pitched_data"), + false, "get_data_ptr"), + true), + makeCallArgCreatorWithCall(2)), + makeCallArgCreatorWithCall(3), + ARG(MapNames::getDpctNamespace() + "automatic"), + DEREF(makeCallArgCreatorWithCall(4))))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, - CALL_FACTORY_ENTRY( - "cuMemcpyHtoAAsync_v2", - CALL(MapNames::getDpctNamespace() + "async_dpct_memcpy", - BO(BinaryOperatorKind::BO_Add, - makeCastExprCreator( - makeCharPtrCreator(), - MEMBER_CALL(MEMBER_CALL(makeCallArgCreatorWithCall(0), true, - "to_pitched_data"), - false, "get_data_ptr"), - true), - makeCallArgCreatorWithCall(1)), - makeCallArgCreatorWithCall(2), makeCallArgCreatorWithCall(3), - ARG(MapNames::getDpctNamespace() + "automatic"), - DEREF(makeCallArgCreatorWithCall(4)))))) + CONDITIONAL_FACTORY_ENTRY( + UseExtBindlessImages, + CALL_FACTORY_ENTRY("cuMemcpyHtoAAsync_v2", + CALL(MapNames::getDpctNamespace() + + "experimental::async_dpct_memcpy", + makeCallArgCreatorWithCall(0), + makeCallArgCreatorWithCall(1), ARG("0"), + makeCallArgCreatorWithCall(2), + makeCallArgCreatorWithCall(3), STREAM(4))), + CALL_FACTORY_ENTRY( + "cuMemcpyHtoAAsync_v2", + CALL(MapNames::getDpctNamespace() + "async_dpct_memcpy", + BO(BinaryOperatorKind::BO_Add, + makeCastExprCreator( + makeCharPtrCreator(), + MEMBER_CALL(MEMBER_CALL(makeCallArgCreatorWithCall(0), + true, "to_pitched_data"), + false, "get_data_ptr"), + true), + makeCallArgCreatorWithCall(1)), + makeCallArgCreatorWithCall(2), makeCallArgCreatorWithCall(3), + ARG(MapNames::getDpctNamespace() + "automatic"), + DEREF(makeCallArgCreatorWithCall(4))))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, - CALL_FACTORY_ENTRY( - "cuMemcpyAtoD_v2", - CALL(MapNames::getDpctNamespace() + "dpct_memcpy", - makeCallArgCreatorWithCall(0), - BO(BinaryOperatorKind::BO_Add, - makeCastExprCreator( - makeCharPtrCreator(), - MEMBER_CALL(MEMBER_CALL(makeCallArgCreatorWithCall(1), true, - "to_pitched_data"), - false, "get_data_ptr"), - true), - makeCallArgCreatorWithCall(2)), - makeCallArgCreatorWithCall(3))))) + CONDITIONAL_FACTORY_ENTRY( + UseExtBindlessImages, + CALL_FACTORY_ENTRY( + "cuMemcpyAtoD_v2", + CALL(MapNames::getDpctNamespace() + "experimental::dpct_memcpy", + makeCallArgCreatorWithCall(0), makeCallArgCreatorWithCall(1), + makeCallArgCreatorWithCall(2), ARG("0"), + makeCallArgCreatorWithCall(3))), + CALL_FACTORY_ENTRY( + "cuMemcpyAtoD_v2", + CALL(MapNames::getDpctNamespace() + "dpct_memcpy", + makeCallArgCreatorWithCall(0), + BO(BinaryOperatorKind::BO_Add, + makeCastExprCreator( + makeCharPtrCreator(), + MEMBER_CALL(MEMBER_CALL(makeCallArgCreatorWithCall(1), + true, "to_pitched_data"), + false, "get_data_ptr"), + true), + makeCallArgCreatorWithCall(2)), + makeCallArgCreatorWithCall(3)))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, - CALL_FACTORY_ENTRY( - "cuMemcpyDtoA_v2", - CALL(MapNames::getDpctNamespace() + "dpct_memcpy", - BO(BinaryOperatorKind::BO_Add, - makeCastExprCreator( - makeCharPtrCreator(), - MEMBER_CALL(MEMBER_CALL(makeCallArgCreatorWithCall(0), true, - "to_pitched_data"), - false, "get_data_ptr"), - true), - makeCallArgCreatorWithCall(1)), - makeCallArgCreatorWithCall(2), makeCallArgCreatorWithCall(3))))) + CONDITIONAL_FACTORY_ENTRY( + UseExtBindlessImages, + CALL_FACTORY_ENTRY( + "cuMemcpyDtoA_v2", + CALL(MapNames::getDpctNamespace() + "experimental::dpct_memcpy", + makeCallArgCreatorWithCall(0), makeCallArgCreatorWithCall(1), + ARG("0"), makeCallArgCreatorWithCall(2), + makeCallArgCreatorWithCall(3))), + CALL_FACTORY_ENTRY( + "cuMemcpyDtoA_v2", + CALL(MapNames::getDpctNamespace() + "dpct_memcpy", + BO(BinaryOperatorKind::BO_Add, + makeCastExprCreator( + makeCharPtrCreator(), + MEMBER_CALL(MEMBER_CALL(makeCallArgCreatorWithCall(0), + true, "to_pitched_data"), + false, "get_data_ptr"), + true), + makeCallArgCreatorWithCall(1)), + makeCallArgCreatorWithCall(2), + makeCallArgCreatorWithCall(3)))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, - CALL_FACTORY_ENTRY( - "cuMemcpyAtoA_v2", - CALL(MapNames::getDpctNamespace() + "dpct_memcpy", - BO(BinaryOperatorKind::BO_Add, - makeCastExprCreator( - makeCharPtrCreator(), - MEMBER_CALL(MEMBER_CALL(makeCallArgCreatorWithCall(0), true, - "to_pitched_data"), - false, "get_data_ptr"), - true), - makeCallArgCreatorWithCall(1)), - BO(BinaryOperatorKind::BO_Add, - makeCastExprCreator( - makeCharPtrCreator(), - MEMBER_CALL(MEMBER_CALL(makeCallArgCreatorWithCall(2), true, - "to_pitched_data"), - false, "get_data_ptr"), - true), - makeCallArgCreatorWithCall(3)), - makeCallArgCreatorWithCall(4))))) + CONDITIONAL_FACTORY_ENTRY( + UseExtBindlessImages, + CALL_FACTORY_ENTRY( + "cuMemcpyAtoA_v2", + CALL(MapNames::getDpctNamespace() + "experimental::dpct_memcpy", + makeCallArgCreatorWithCall(0), makeCallArgCreatorWithCall(1), + ARG("0"), makeCallArgCreatorWithCall(2), + makeCallArgCreatorWithCall(3), ARG("0"), + makeCallArgCreatorWithCall(4))), + CALL_FACTORY_ENTRY( + "cuMemcpyAtoA_v2", + CALL(MapNames::getDpctNamespace() + "dpct_memcpy", + BO(BinaryOperatorKind::BO_Add, + makeCastExprCreator( + makeCharPtrCreator(), + MEMBER_CALL(MEMBER_CALL(makeCallArgCreatorWithCall(0), + true, "to_pitched_data"), + false, "get_data_ptr"), + true), + makeCallArgCreatorWithCall(1)), + BO(BinaryOperatorKind::BO_Add, + makeCastExprCreator( + makeCharPtrCreator(), + MEMBER_CALL(MEMBER_CALL(makeCallArgCreatorWithCall(2), + true, "to_pitched_data"), + false, "get_data_ptr"), + true), + makeCallArgCreatorWithCall(3)), + makeCallArgCreatorWithCall(4)))))) ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, diff --git a/clang/lib/DPCT/APINamesStream.inc b/clang/lib/DPCT/APINamesStream.inc index aa70765acbd0..78d769b06379 100644 --- a/clang/lib/DPCT/APINamesStream.inc +++ b/clang/lib/DPCT/APINamesStream.inc @@ -52,3 +52,13 @@ CONDITIONAL_FACTORY_ENTRY( Diagnostics::TRY_EXPERIMENTAL_FEATURE, ARG("cudaStreamEndCapture"), ARG("--use-experimental-features=graph"))) + +CONDITIONAL_FACTORY_ENTRY( + UseExtGraph, + ASSIGNABLE_FACTORY(ASSIGN_FACTORY_ENTRY( + "cudaStreamIsCapturing", DEREF(1), + MEMBER_CALL(ARG(0), true, "ext_oneapi_get_state"))), + UNSUPPORT_FACTORY_ENTRY("cudaStreamIsCapturing", + Diagnostics::TRY_EXPERIMENTAL_FEATURE, + ARG("cudaStreamIsCapturing"), + ARG("--use-experimental-features=graph"))) diff --git a/clang/lib/DPCT/APINamesTexture.inc b/clang/lib/DPCT/APINamesTexture.inc index 2f552034ddcb..2821650b71e4 100644 --- a/clang/lib/DPCT/APINamesTexture.inc +++ b/clang/lib/DPCT/APINamesTexture.inc @@ -184,7 +184,11 @@ ENTRY_UNSUPPORTED("cudaGetTextureObjectResourceViewDesc", FEATURE_REQUEST_FACTORY(HelperFeatureEnum::device_ext, ASSIGNABLE_FACTORY(ASSIGN_FACTORY_ENTRY( "cuArrayCreate_v2", DEREF(0), - NEW(MapNames::getDpctNamespace() + "image_matrix", + NEW(DpctGlobalInfo::useExtBindlessImages() + ? MapNames::getDpctNamespace() + + "experimental::image_mem_wrapper" + : MapNames::getDpctNamespace() + + "image_matrix", ARG(1))))) ASSIGNABLE_FACTORY(DELETER_FACTORY_ENTRY("cuArrayDestroy", ARG(0))) ENTRY_UNSUPPORTED("cuTexObjectGetResourceViewDesc", Diagnostics::API_NOT_MIGRATED) diff --git a/clang/lib/DPCT/APINames_cuBLAS.inc b/clang/lib/DPCT/APINames_cuBLAS.inc index 9fed09697ba4..990687f8e87c 100644 --- a/clang/lib/DPCT/APINames_cuBLAS.inc +++ b/clang/lib/DPCT/APINames_cuBLAS.inc @@ -655,50 +655,50 @@ ENTRY(cublasStpsv_64, cublasStpsv_v2_64, true, NO_FLAG, P4, "DPCT1020") ENTRY(cublasDtpsv_64, cublasDtpsv_v2_64, true, NO_FLAG, P4, "DPCT1020") ENTRY(cublasCtpsv_64, cublasCtpsv_v2_64, true, NO_FLAG, P4, "DPCT1020") ENTRY(cublasZtpsv_64, cublasZtpsv_v2_64, true, NO_FLAG, P4, "DPCT1020") -ENTRY(cublasStbsv_64, cublasStbsv_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasDtbsv_64, cublasDtbsv_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasCtbsv_64, cublasCtbsv_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasZtbsv_64, cublasZtbsv_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasSsymv_64, cublasSsymv_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasDsymv_64, cublasDsymv_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasCsymv_64, cublasCsymv_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasZsymv_64, cublasZsymv_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasChemv_64, cublasChemv_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasZhemv_64, cublasZhemv_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasSsbmv_64, cublasSsbmv_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasDsbmv_64, cublasDsbmv_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasChbmv_64, cublasChbmv_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasZhbmv_64, cublasZhbmv_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasSspmv_64, cublasSspmv_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasDspmv_64, cublasDspmv_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasChpmv_64, cublasChpmv_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasZhpmv_64, cublasZhpmv_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasSger_64, cublasSger_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasDger_64, cublasDger_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasCgeru_64, cublasCgeru_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasCgerc_64, cublasCgerc_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasZgeru_64, cublasZgeru_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasZgerc_64, cublasZgerc_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasSsyr_64, cublasSsyr_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasDsyr_64, cublasDsyr_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasCsyr_64, cublasCsyr_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasZsyr_64, cublasZsyr_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasCher_64, cublasCher_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasZher_64, cublasZher_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasSspr_64, cublasSspr_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasDspr_64, cublasDspr_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasChpr_64, cublasChpr_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasZhpr_64, cublasZhpr_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasSsyr2_64, cublasSsyr2_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasDsyr2_64, cublasDsyr2_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasCsyr2_64, cublasCsyr2_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasZsyr2_64, cublasZsyr2_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasCher2_64, cublasCher2_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasZher2_64, cublasZher2_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasSspr2_64, cublasSspr2_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasDspr2_64, cublasDspr2_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasChpr2_64, cublasChpr2_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasZhpr2_64, cublasZhpr2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasStbsv_64, cublasStbsv_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasDtbsv_64, cublasDtbsv_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCtbsv_64, cublasCtbsv_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZtbsv_64, cublasZtbsv_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasSsymv_64, cublasSsymv_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasDsymv_64, cublasDsymv_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCsymv_64, cublasCsymv_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZsymv_64, cublasZsymv_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasChemv_64, cublasChemv_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZhemv_64, cublasZhemv_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasSsbmv_64, cublasSsbmv_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasDsbmv_64, cublasDsbmv_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasChbmv_64, cublasChbmv_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZhbmv_64, cublasZhbmv_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasSspmv_64, cublasSspmv_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasDspmv_64, cublasDspmv_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasChpmv_64, cublasChpmv_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZhpmv_64, cublasZhpmv_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasSger_64, cublasSger_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasDger_64, cublasDger_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCgeru_64, cublasCgeru_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCgerc_64, cublasCgerc_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZgeru_64, cublasZgeru_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZgerc_64, cublasZgerc_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasSsyr_64, cublasSsyr_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasDsyr_64, cublasDsyr_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCsyr_64, cublasCsyr_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZsyr_64, cublasZsyr_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCher_64, cublasCher_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZher_64, cublasZher_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasSspr_64, cublasSspr_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasDspr_64, cublasDspr_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasChpr_64, cublasChpr_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZhpr_64, cublasZhpr_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasSsyr2_64, cublasSsyr2_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasDsyr2_64, cublasDsyr2_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCsyr2_64, cublasCsyr2_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZsyr2_64, cublasZsyr2_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCher2_64, cublasCher2_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZher2_64, cublasZher2_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasSspr2_64, cublasSspr2_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasDspr2_64, cublasDspr2_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasChpr2_64, cublasChpr2_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZhpr2_64, cublasZhpr2_v2_64, false, NO_FLAG, P4, "comment") ENTRY(cublasSgemm_64, cublasSgemm_v2_64, true, NO_FLAG, P4, "DPCT1020") ENTRY(cublasDgemm_64, cublasDgemm_v2_64, true, NO_FLAG, P4, "DPCT1020") ENTRY(cublasCgemm_64, cublasCgemm_v2_64, true, NO_FLAG, P4, "DPCT1020") @@ -725,10 +725,10 @@ ENTRY(cublasStrsm_64, cublasStrsm_v2_64, true, NO_FLAG, P4, "DPCT1020") ENTRY(cublasDtrsm_64, cublasDtrsm_v2_64, true, NO_FLAG, P4, "DPCT1020") ENTRY(cublasCtrsm_64, cublasCtrsm_v2_64, true, NO_FLAG, P4, "DPCT1020") ENTRY(cublasZtrsm_64, cublasZtrsm_v2_64, true, NO_FLAG, P4, "DPCT1020") -ENTRY(cublasStrmm_64, cublasStrmm_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasDtrmm_64, cublasDtrmm_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasCtrmm_64, cublasCtrmm_64, false, NO_FLAG, P4, "comment") -ENTRY(cublasZtrmm_64, cublasZtrmm_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasStrmm_64, cublasStrmm_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasDtrmm_64, cublasDtrmm_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCtrmm_64, cublasCtrmm_v2_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZtrmm_64, cublasZtrmm_v2_64, false, NO_FLAG, P4, "comment") ENTRY(cublasSgeam_64, cublasSgeam_64, true, NO_FLAG, P4, "DPCT1020") ENTRY(cublasDgeam_64, cublasDgeam_64, true, NO_FLAG, P4, "DPCT1020") ENTRY(cublasCgeam_64, cublasCgeam_64, true, NO_FLAG, P4, "DPCT1020") @@ -745,3 +745,76 @@ ENTRY(cublasSetVectorAsync_64, cublasSetVectorAsync_64, true, NO_FLAG, P4, "DPCT ENTRY(cublasGetVectorAsync_64, cublasGetVectorAsync_64, true, NO_FLAG, P4, "DPCT1018/DPCT1020") ENTRY(cublasSetMatrixAsync_64, cublasSetMatrixAsync_64, true, NO_FLAG, P4, "DPCT1018/DPCT1020") ENTRY(cublasGetMatrixAsync_64, cublasGetMatrixAsync_64, true, NO_FLAG, P4, "DPCT1018/DPCT1020") +ENTRY(cublasNrm2Ex_64, cublasNrm2Ex_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasDotEx_64, cublasDotEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasDotcEx_64, cublasDotcEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasScalEx_64, cublasScalEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasAxpyEx_64, cublasAxpyEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCopyEx_64, cublasCopyEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasSwapEx_64, cublasSwapEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasIamaxEx_64, cublasIamaxEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasIaminEx_64, cublasIaminEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasAsumEx_64, cublasAsumEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasRotEx_64, cublasRotEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasRotmEx_64, cublasRotmEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasSgemvBatched_64, cublasSgemvBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasDgemvBatched_64, cublasDgemvBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCgemvBatched_64, cublasCgemvBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZgemvBatched_64, cublasZgemvBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasHSHgemvBatched_64, cublasHSHgemvBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasHSSgemvBatched_64, cublasHSSgemvBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasTSTgemvBatched_64, cublasTSTgemvBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasTSSgemvBatched_64, cublasTSSgemvBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasSgemvStridedBatched_64, cublasSgemvStridedBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasDgemvStridedBatched_64, cublasDgemvStridedBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCgemvStridedBatched_64, cublasCgemvStridedBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZgemvStridedBatched_64, cublasZgemvStridedBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasHSHgemvStridedBatched_64, cublasHSHgemvStridedBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasHSSgemvStridedBatched_64, cublasHSSgemvStridedBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasTSTgemvStridedBatched_64, cublasTSTgemvStridedBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasTSSgemvStridedBatched_64, cublasTSSgemvStridedBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCgemm3m_64, cublasCgemm3m_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCgemm3mEx_64, cublasCgemm3mEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZgemm3m_64, cublasZgemm3m_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasHgemm_64, cublasHgemm_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasSgemmEx_64, cublasSgemmEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasGemmEx_64, cublasGemmEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCgemmEx_64, cublasCgemmEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCsyrkEx_64, cublasCsyrkEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCsyrk3mEx_64, cublasCsyrk3mEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCherkEx_64, cublasCherkEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCherk3mEx_64, cublasCherk3mEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasSsyrkx_64, cublasSsyrkx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasDsyrkx_64, cublasDsyrkx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCsyrkx_64, cublasCsyrkx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZsyrkx_64, cublasZsyrkx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCherkx_64, cublasCherkx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZherkx_64, cublasZherkx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasHgemmBatched_64, cublasHgemmBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasSgemmBatched_64, cublasSgemmBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasDgemmBatched_64, cublasDgemmBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCgemmBatched_64, cublasCgemmBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCgemm3mBatched_64, cublasCgemm3mBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZgemmBatched_64, cublasZgemmBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasHgemmStridedBatched_64, cublasHgemmStridedBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasSgemmStridedBatched_64, cublasSgemmStridedBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasDgemmStridedBatched_64, cublasDgemmStridedBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCgemmStridedBatched_64, cublasCgemmStridedBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCgemm3mStridedBatched_64, cublasCgemm3mStridedBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZgemmStridedBatched_64, cublasZgemmStridedBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasGemmBatchedEx_64, cublasGemmBatchedEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasGemmStridedBatchedEx_64, cublasGemmStridedBatchedEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasSgemmGroupedBatched, cublasSgemmGroupedBatched, false, NO_FLAG, P4, "comment") +ENTRY(cublasSgemmGroupedBatched_64, cublasSgemmGroupedBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasDgemmGroupedBatched, cublasDgemmGroupedBatched, false, NO_FLAG, P4, "comment") +ENTRY(cublasDgemmGroupedBatched_64, cublasDgemmGroupedBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasGemmGroupedBatchedEx, cublasGemmGroupedBatchedEx, false, NO_FLAG, P4, "comment") +ENTRY(cublasGemmGroupedBatchedEx_64, cublasGemmGroupedBatchedEx_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasStrsmBatched_64, cublasStrsmBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasDtrsmBatched_64, cublasDtrsmBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasCtrsmBatched_64, cublasCtrsmBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasZtrsmBatched_64, cublasZtrsmBatched_64, false, NO_FLAG, P4, "comment") +ENTRY(cublasLtHeuristicsCacheGetCapacity, cublasLtHeuristicsCacheGetCapacity, false, NO_FLAG, P4, "comment") +ENTRY(cublasLtHeuristicsCacheSetCapacity, cublasLtHeuristicsCacheSetCapacity, false, NO_FLAG, P4, "comment") +ENTRY(cublasLtDisableCpuInstructionsSetMask, cublasLtDisableCpuInstructionsSetMask, false, NO_FLAG, P4, "comment") + diff --git a/clang/lib/DPCT/APINames_cuFFT.inc b/clang/lib/DPCT/APINames_cuFFT.inc index d68a33053624..cc5737be5966 100644 --- a/clang/lib/DPCT/APINames_cuFFT.inc +++ b/clang/lib/DPCT/APINames_cuFFT.inc @@ -83,3 +83,6 @@ ENTRY(cufftXtExecDescriptor, cufftXtExecDescriptor, false, NO_FLAG, P4, "comment ENTRY(cufftSetStream, cufftSetStream, true, NO_FLAG, P4, "Successful") ENTRY(cufftGetVersion, cufftGetVersion, true, NO_FLAG, P4, "Successful") ENTRY(cufftGetProperty, cufftGetProperty, true, NO_FLAG, P4, "Successful") +ENTRY(cufftSetPlanPropertyInt64, cufftSetPlanPropertyInt64, true, NO_FLAG, P4, "Successful") +ENTRY(cufftGetPlanPropertyInt64, cufftGetPlanPropertyInt64, true, NO_FLAG, P4, "Successful") +ENTRY(cufftResetPlanProperty, cufftResetPlanProperty, true, NO_FLAG, P4, "Successful") diff --git a/clang/lib/DPCT/APINames_cuSOLVER.inc b/clang/lib/DPCT/APINames_cuSOLVER.inc index 17469c304aea..7df66ecc4be5 100644 --- a/clang/lib/DPCT/APINames_cuSOLVER.inc +++ b/clang/lib/DPCT/APINames_cuSOLVER.inc @@ -526,4 +526,136 @@ ENTRY(cusolverSpScsreigsHost, cusolverSpScsreigsHost, false, NO_FLAG, P4, "comme ENTRY(cusolverSpXcsrissym, cusolverSpXcsrissym, false, NO_FLAG, P4, "comment") ENTRY(cusolverSpXcsrqrBatchedHost, cusolverSpXcsrqrBatchedHost, false, NO_FLAG, P4, "comment") ENTRY(cusolverSpXcsrzfdHost, cusolverSpXcsrzfdHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverGetProperty, cusolverGetProperty, false, NO_FLAG, P4, "comment") +ENTRY(cusolverGetVersion, cusolverGetVersion, false, NO_FLAG, P4, "comment") +ENTRY(cusolverDnSetDeterministicMode, cusolverDnSetDeterministicMode, false, NO_FLAG, P4, "comment") +ENTRY(cusolverDnGetDeterministicMode, cusolverDnGetDeterministicMode, false, NO_FLAG, P4, "comment") +ENTRY(cusolverDnXlarft_bufferSize, cusolverDnXlarft_bufferSize, false, NO_FLAG, P4, "comment") +ENTRY(cusolverDnXlarft, cusolverDnXlarft, false, NO_FLAG, P4, "comment") +ENTRY(cusolverDnLoggerSetCallback, cusolverDnLoggerSetCallback, false, NO_FLAG, P4, "comment") +ENTRY(cusolverDnLoggerSetFile, cusolverDnLoggerSetFile, false, NO_FLAG, P4, "comment") +ENTRY(cusolverDnLoggerOpenFile, cusolverDnLoggerOpenFile, false, NO_FLAG, P4, "comment") +ENTRY(cusolverDnLoggerSetLevel, cusolverDnLoggerSetLevel, false, NO_FLAG, P4, "comment") +ENTRY(cusolverDnLoggerSetMask, cusolverDnLoggerSetMask, false, NO_FLAG, P4, "comment") +ENTRY(cusolverDnLoggerForceDisable, cusolverDnLoggerForceDisable, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCreateCsrluInfoHost, cusolverSpCreateCsrluInfoHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDestroyCsrluInfoHost, cusolverSpDestroyCsrluInfoHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpXcsrluAnalysisHost, cusolverSpXcsrluAnalysisHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrluBufferInfoHost, cusolverSpScsrluBufferInfoHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrluBufferInfoHost, cusolverSpDcsrluBufferInfoHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrluBufferInfoHost, cusolverSpCcsrluBufferInfoHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrluBufferInfoHost, cusolverSpZcsrluBufferInfoHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrluFactorHost, cusolverSpScsrluFactorHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrluFactorHost, cusolverSpDcsrluFactorHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrluFactorHost, cusolverSpCcsrluFactorHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrluFactorHost, cusolverSpZcsrluFactorHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrluZeroPivotHost, cusolverSpScsrluZeroPivotHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrluZeroPivotHost, cusolverSpDcsrluZeroPivotHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrluZeroPivotHost, cusolverSpCcsrluZeroPivotHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrluZeroPivotHost, cusolverSpZcsrluZeroPivotHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrluSolveHost, cusolverSpScsrluSolveHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrluSolveHost, cusolverSpDcsrluSolveHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrluSolveHost, cusolverSpCcsrluSolveHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrluSolveHost, cusolverSpZcsrluSolveHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpXcsrluNnzHost, cusolverSpXcsrluNnzHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrluExtractHost, cusolverSpScsrluExtractHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrluExtractHost, cusolverSpDcsrluExtractHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrluExtractHost, cusolverSpCcsrluExtractHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrluExtractHost, cusolverSpZcsrluExtractHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCreateCsrqrInfoHost, cusolverSpCreateCsrqrInfoHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDestroyCsrqrInfoHost, cusolverSpDestroyCsrqrInfoHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpXcsrqrAnalysisHost, cusolverSpXcsrqrAnalysisHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrqrBufferInfoHost, cusolverSpScsrqrBufferInfoHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrqrBufferInfoHost, cusolverSpDcsrqrBufferInfoHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrqrBufferInfoHost, cusolverSpCcsrqrBufferInfoHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrqrBufferInfoHost, cusolverSpZcsrqrBufferInfoHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrqrSetupHost, cusolverSpScsrqrSetupHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrqrSetupHost, cusolverSpDcsrqrSetupHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrqrSetupHost, cusolverSpCcsrqrSetupHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrqrSetupHost, cusolverSpZcsrqrSetupHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrqrFactorHost, cusolverSpScsrqrFactorHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrqrFactorHost, cusolverSpDcsrqrFactorHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrqrFactorHost, cusolverSpCcsrqrFactorHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrqrFactorHost, cusolverSpZcsrqrFactorHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrqrZeroPivotHost, cusolverSpScsrqrZeroPivotHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrqrZeroPivotHost, cusolverSpDcsrqrZeroPivotHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrqrZeroPivotHost, cusolverSpCcsrqrZeroPivotHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrqrZeroPivotHost, cusolverSpZcsrqrZeroPivotHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrqrSolveHost, cusolverSpScsrqrSolveHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrqrSolveHost, cusolverSpDcsrqrSolveHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrqrSolveHost, cusolverSpCcsrqrSolveHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrqrSolveHost, cusolverSpZcsrqrSolveHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpXcsrqrAnalysis, cusolverSpXcsrqrAnalysis, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrqrBufferInfo, cusolverSpScsrqrBufferInfo, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrqrBufferInfo, cusolverSpDcsrqrBufferInfo, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrqrBufferInfo, cusolverSpCcsrqrBufferInfo, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrqrBufferInfo, cusolverSpZcsrqrBufferInfo, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrqrSetup, cusolverSpScsrqrSetup, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrqrSetup, cusolverSpDcsrqrSetup, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrqrSetup, cusolverSpCcsrqrSetup, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrqrSetup, cusolverSpZcsrqrSetup, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrqrFactor, cusolverSpScsrqrFactor, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrqrFactor, cusolverSpDcsrqrFactor, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrqrFactor, cusolverSpCcsrqrFactor, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrqrFactor, cusolverSpZcsrqrFactor, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrqrZeroPivot, cusolverSpScsrqrZeroPivot, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrqrZeroPivot, cusolverSpDcsrqrZeroPivot, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrqrZeroPivot, cusolverSpCcsrqrZeroPivot, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrqrZeroPivot, cusolverSpZcsrqrZeroPivot, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrqrSolve, cusolverSpScsrqrSolve, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrqrSolve, cusolverSpDcsrqrSolve, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrqrSolve, cusolverSpCcsrqrSolve, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrqrSolve, cusolverSpZcsrqrSolve, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCreateCsrcholInfoHost, cusolverSpCreateCsrcholInfoHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDestroyCsrcholInfoHost, cusolverSpDestroyCsrcholInfoHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpXcsrcholAnalysisHost, cusolverSpXcsrcholAnalysisHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrcholBufferInfoHost, cusolverSpScsrcholBufferInfoHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrcholBufferInfoHost, cusolverSpDcsrcholBufferInfoHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrcholBufferInfoHost, cusolverSpCcsrcholBufferInfoHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrcholBufferInfoHost, cusolverSpZcsrcholBufferInfoHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrcholFactorHost, cusolverSpScsrcholFactorHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrcholFactorHost, cusolverSpDcsrcholFactorHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrcholFactorHost, cusolverSpCcsrcholFactorHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrcholFactorHost, cusolverSpZcsrcholFactorHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrcholZeroPivotHost, cusolverSpScsrcholZeroPivotHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrcholZeroPivotHost, cusolverSpDcsrcholZeroPivotHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrcholZeroPivotHost, cusolverSpCcsrcholZeroPivotHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrcholZeroPivotHost, cusolverSpZcsrcholZeroPivotHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrcholSolveHost, cusolverSpScsrcholSolveHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrcholSolveHost, cusolverSpDcsrcholSolveHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrcholSolveHost, cusolverSpCcsrcholSolveHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrcholSolveHost, cusolverSpZcsrcholSolveHost, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCreateCsrcholInfo, cusolverSpCreateCsrcholInfo, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDestroyCsrcholInfo, cusolverSpDestroyCsrcholInfo, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpXcsrcholAnalysis, cusolverSpXcsrcholAnalysis, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrcholBufferInfo, cusolverSpScsrcholBufferInfo, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrcholBufferInfo, cusolverSpDcsrcholBufferInfo, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrcholBufferInfo, cusolverSpCcsrcholBufferInfo, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrcholBufferInfo, cusolverSpZcsrcholBufferInfo, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrcholFactor, cusolverSpScsrcholFactor, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrcholFactor, cusolverSpDcsrcholFactor, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrcholFactor, cusolverSpCcsrcholFactor, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrcholFactor, cusolverSpZcsrcholFactor, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrcholZeroPivot, cusolverSpScsrcholZeroPivot, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrcholZeroPivot, cusolverSpDcsrcholZeroPivot, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrcholZeroPivot, cusolverSpCcsrcholZeroPivot, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrcholZeroPivot, cusolverSpZcsrcholZeroPivot, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrcholSolve, cusolverSpScsrcholSolve, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrcholSolve, cusolverSpDcsrcholSolve, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrcholSolve, cusolverSpCcsrcholSolve, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrcholSolve, cusolverSpZcsrcholSolve, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpScsrcholDiag, cusolverSpScsrcholDiag, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpDcsrcholDiag, cusolverSpDcsrcholDiag, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpCcsrcholDiag, cusolverSpCcsrcholDiag, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpZcsrcholDiag, cusolverSpZcsrcholDiag, false, NO_FLAG, P4, "comment") +ENTRY(cusolverMgCreateMatrixDesc, cusolverMgCreateMatrixDesc, false, NO_FLAG, P4, "comment") +ENTRY(cusolverMgSyevd_bufferSize, cusolverMgSyevd_bufferSize, false, NO_FLAG, P4, "comment") +ENTRY(cusolverMgGetrf_bufferSize, cusolverMgGetrf_bufferSize, false, NO_FLAG, P4, "comment") +ENTRY(cusolverMgGetrs_bufferSize, cusolverMgGetrs_bufferSize, false, NO_FLAG, P4, "comment") +ENTRY(cusolverMgPotrf_bufferSize, cusolverMgPotrf_bufferSize, false, NO_FLAG, P4, "comment") +ENTRY(cusolverMgPotrs_bufferSize, cusolverMgPotrs_bufferSize, false, NO_FLAG, P4, "comment") +ENTRY(cusolverMgPotri_bufferSize, cusolverMgPotri_bufferSize, false, NO_FLAG, P4, "comment") +ENTRY(cusolverRfGetAlgs, cusolverRfGetAlgs, false, NO_FLAG, P4, "comment") +ENTRY(cusolverRfAccessBundledFactorsDevice, cusolverRfAccessBundledFactorsDevice, false, NO_FLAG, P4, "comment") +ENTRY(cusolverSpGetStream, cusolverSpGetStream, false, NO_FLAG, P4, "comment") diff --git a/clang/lib/DPCT/APINames_cuSPARSE.inc b/clang/lib/DPCT/APINames_cuSPARSE.inc index 5c1d6f1503b1..b1ffaf5597b1 100644 --- a/clang/lib/DPCT/APINames_cuSPARSE.inc +++ b/clang/lib/DPCT/APINames_cuSPARSE.inc @@ -714,3 +714,13 @@ ENTRY(cusparseConstDnMatGet, cusparseConstDnMatGet, false, NO_FLAG, P4, "comment ENTRY(cusparseConstDnMatGetValues, cusparseConstDnMatGetValues, false, NO_FLAG, P4, "comment") ENTRY(cusparseSpGEMM_getNumProducts, cusparseSpGEMM_getNumProducts, false, NO_FLAG, P4, "comment") ENTRY(cusparseSpGEMM_estimateMemory, cusparseSpGEMM_estimateMemory, false, NO_FLAG, P4, "comment") +ENTRY(cusparseConstSpVecGet, cusparseConstSpVecGet, false, NO_FLAG, P4, "comment") +ENTRY(cusparseBsrSetStridedBatch, cusparseBsrSetStridedBatch, false, NO_FLAG, P4, "comment") +ENTRY(cusparseCreateBsr, cusparseCreateBsr, false, NO_FLAG, P4, "comment") +ENTRY(cusparseCreateConstBsr, cusparseCreateConstBsr, false, NO_FLAG, P4, "comment") +ENTRY(cusparseConstBlockedEllGet, cusparseConstBlockedEllGet, false, NO_FLAG, P4, "comment") +ENTRY(cusparseCreateSlicedEll, cusparseCreateSlicedEll, false, NO_FLAG, P4, "comment") +ENTRY(cusparseCreateConstSlicedEll, cusparseCreateConstSlicedEll, false, NO_FLAG, P4, "comment") +ENTRY(cusparseSpMV_preprocess, cusparseSpMV_preprocess, false, NO_FLAG, P4, "comment") +ENTRY(cusparseSpSV_updateMatrix, cusparseSpSV_updateMatrix, false, NO_FLAG, P4, "comment") +ENTRY(cusparseSpSM_updateMatrix, cusparseSpSM_updateMatrix, false, NO_FLAG, P4, "comment") diff --git a/clang/lib/DPCT/APINames_removed.inc b/clang/lib/DPCT/APINames_removed.inc index 5009ae6d976f..43f38ba19803 100644 --- a/clang/lib/DPCT/APINames_removed.inc +++ b/clang/lib/DPCT/APINames_removed.inc @@ -29,7 +29,6 @@ ENTRY(cuEventCreate, "this functionality is redundant in SYCL. ENTRY(cudaStreamAttachMemAsync, "SYCL currently does not support associating USM with a specific queue.") ENTRY(cuStreamAttachMemAsync, "SYCL currently does not support associating USM with a specific queue.") ENTRY(cudaStreamQuery, "SYCL currently does not support query operations on queues.") -ENTRY(cudaStreamIsCapturing, "SYCL currently does not support capture operations on queues.") ENTRY(cudaDeviceGetStreamPriorityRange, "SYCL currently does not support get queue priority range.") ENTRY(cudaHostRegister, "SYCL currently does not support registering of existing host memory for use by device. Use USM to allocate memory for use by host and device.") diff --git a/clang/lib/DPCT/ASTTraversal.cpp b/clang/lib/DPCT/ASTTraversal.cpp index 60d969d26d8e..c82d5b7cf550 100644 --- a/clang/lib/DPCT/ASTTraversal.cpp +++ b/clang/lib/DPCT/ASTTraversal.cpp @@ -1689,12 +1689,13 @@ void TypeInDeclRule::registerMatcher(MatchFinder &MF) { MF.addMatcher( typeLoc( loc(qualType(hasDeclaration(namedDecl(hasAnyName( - "cudaError", "curandStatus", "cublasStatus", "CUstream", + "dim3", "cudaError", "curandStatus", "cublasStatus", "CUstream", "CUstream_st", "thrust::complex", "thrust::device_vector", "thrust::device_ptr", "thrust::device_reference", "thrust::host_vector", "cublasHandle_t", "CUevent_st", "__half", "half", "__half2", "half2", "cudaMemoryAdvise", "cudaError_enum", - "cudaDeviceProp", "cudaGraphExecUpdateResult", "cudaPitchedPtr", + "cudaDeviceProp", "cudaStreamCaptureStatus", + "cudaGraphExecUpdateResult", "cudaPitchedPtr", "thrust::counting_iterator", "thrust::transform_iterator", "thrust::permutation_iterator", "thrust::iterator_difference", "cusolverDnHandle_t", "cusolverDnParams_t", "gesvdjInfo_t", @@ -2285,11 +2286,19 @@ void TypeInDeclRule::runRule(const MatchFinder::MatchResult &Result) { std::string CanonicalTypeStr = DpctGlobalInfo::getUnqualifiedTypeName( TL->getType().getCanonicalType()); + if (CanonicalTypeStr == "cudaStreamCaptureStatus") { + if (!DpctGlobalInfo::useExtGraph()) { + report(TL->getBeginLoc(), Diagnostics::TRY_EXPERIMENTAL_FEATURE, false, + "cudaStreamCaptureStatus", "--use-experimental-features=graph"); + } + } + if (CanonicalTypeStr == "cudaGraphExecUpdateResult") { report(TL->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, CanonicalTypeStr); return; } + if (CanonicalTypeStr == "cooperative_groups::__v1::thread_group" || CanonicalTypeStr == "cooperative_groups::__v1::thread_block") { if (auto ETL = TL->getUnqualifiedLoc().getAs()) { @@ -3037,198 +3046,6 @@ void VectorTypeOperatorRule::runRule(const MatchFinder::MatchResult &Result) { REGISTER_RULE(VectorTypeOperatorRule, PassKind::PK_Migration) -void ReplaceDim3CtorRule::registerMatcher(MatchFinder &MF) { - // Find dim3 constructors which are part of different casts (representing - // different syntaxes). This includes copy constructors. All constructors - // will be visited once. - MF.addMatcher(cxxConstructExpr(hasType(namedDecl(hasName("dim3"))), - argumentCountIs(1), - unless(hasAncestor(cxxConstructExpr( - hasType(namedDecl(hasName("dim3"))))))) - .bind("dim3Top"), - this); - - MF.addMatcher(cxxConstructExpr( - hasType(namedDecl(hasName("dim3"))), argumentCountIs(3), - anyOf(hasParent(varDecl()), hasParent(exprWithCleanups())), - unless(hasParent(initListExpr())), - unless(hasAncestor( - cxxConstructExpr(hasType(namedDecl(hasName("dim3"))))))) - .bind("dim3CtorDecl"), - this); - - MF.addMatcher( - cxxConstructExpr(hasType(namedDecl(hasName("dim3"))), argumentCountIs(3), - // skip fields in a struct. The source loc is - // messed up (points to the start of the struct) - unless(hasParent(initListExpr())), - unless(hasAncestor(cxxRecordDecl())), - unless(hasParent(varDecl())), - unless(hasParent(exprWithCleanups())), - unless(hasAncestor(cxxConstructExpr( - hasType(namedDecl(hasName("dim3"))))))) - .bind("dim3CtorNoDecl"), - this); - - MF.addMatcher( - typeLoc(loc(qualType(hasDeclaration(anyOf( - namedDecl(hasAnyName("dim3")), - typedefDecl(hasAnyName("dim3"))))))) - .bind("dim3Type"), - this); -} - -ReplaceDim3Ctor *ReplaceDim3CtorRule::getReplaceDim3Modification( - const MatchFinder::MatchResult &Result) { - if (auto Ctor = getNodeAsType(Result, "dim3CtorDecl")) { - if(getParentKernelCall(Ctor)) - return nullptr; - // dim3 a; or dim3 a(1); - return new ReplaceDim3Ctor(Ctor, true /*isDecl*/); - } else if (auto Ctor = - getNodeAsType(Result, "dim3CtorNoDecl")) { - if(getParentKernelCall(Ctor)) - return nullptr; - // deflt = dim3(3); - return new ReplaceDim3Ctor(Ctor, false /*isDecl*/); - } else if (auto Ctor = getNodeAsType(Result, "dim3Top")) { - if(getParentKernelCall(Ctor)) - return nullptr; - // dim3 d3_6_3 = dim3(ceil(test.x + NUM), NUM + test.y, NUM + test.z + NUM); - if (auto A = ReplaceDim3Ctor::getConstructExpr(Ctor->getArg(0))) { - // strip the top CXXConstructExpr, if there's a CXXConstructExpr further - // down - return new ReplaceDim3Ctor(Ctor, A); - } else { - // Copy constructor case: dim3 a(copyfrom) - // No replacements are needed - return nullptr; - } - } - - return nullptr; -} - -void ReplaceDim3CtorRule::runRule(const MatchFinder::MatchResult &Result) { - ReplaceDim3Ctor *R = getReplaceDim3Modification(Result); - if (R) { - emplaceTransformation(R); - } - - if (auto TL = getNodeAsType(Result, "dim3Type")) { - if (TL->getBeginLoc().isInvalid()) - return; - - auto BeginLoc = - getDefinitionRange(TL->getBeginLoc(), TL->getEndLoc()).getBegin(); - SourceManager *SM = Result.SourceManager; - - // WA for concatenated macro token - if (SM->isWrittenInScratchSpace(SM->getSpellingLoc(TL->getBeginLoc()))) { - BeginLoc = SM->getExpansionLoc(TL->getBeginLoc()); - } - - Token Tok; - auto LOpts = Result.Context->getLangOpts(); - Lexer::getRawToken(BeginLoc, Tok, *SM, LOpts, true); - if (Tok.isAnyIdentifier()) { - if (TL->getType()->isElaboratedTypeSpecifier()) { - // To handle case like "struct cudaExtent extent;" - auto ETC = TL->getUnqualifiedLoc().getAs(); - auto NTL = ETC.getNamedTypeLoc(); - - if (NTL.getTypeLocClass() == clang::TypeLoc::Record) { - auto TSL = NTL.getUnqualifiedLoc().getAs(); - - const std::string TyName = - dpct::DpctGlobalInfo::getTypeName(TSL.getType()); - std::string Str = - MapNames::findReplacedName(MapNames::TypeNamesMap, TyName); - insertHeaderForTypeRule(TyName, BeginLoc); - requestHelperFeatureForTypeNames(TyName); - - if (!Str.empty()) { - emplaceTransformation( - new ReplaceToken(BeginLoc, TSL.getEndLoc(), std::move(Str))); - return; - } - } - } - - std::string TypeName = Tok.getRawIdentifier().str(); - std::string Str = - MapNames::findReplacedName(MapNames::TypeNamesMap, TypeName); - insertHeaderForTypeRule(TypeName, BeginLoc); - requestHelperFeatureForTypeNames(TypeName); - if (auto VD = DpctGlobalInfo::findAncestor(TL)) { - auto TypeStr = VD->getType().getAsString(); - if (VD->getKind() == Decl::Var && TypeStr == "dim3") { - std::string Replacement; - std::string ReplacedType = "range"; - llvm::raw_string_ostream OS(Replacement); - DpctGlobalInfo::printCtadClass( - OS, buildString(MapNames::getClNamespace(), ReplacedType), 3); - Str = OS.str(); - } - } - - if (!Str.empty()) { - SrcAPIStaticsMap[TypeName]++; - emplaceTransformation(new ReplaceToken(BeginLoc, std::move(Str))); - return; - } - } - } -} - -REGISTER_RULE(ReplaceDim3CtorRule, PassKind::PK_Migration) - -// rule for dim3 types member fields replacements. -void Dim3MemberFieldsRule::registerMatcher(MatchFinder &MF) { - // dim3->x/y/z => (*dim3)[0]/[1]/[2] - // dim3.x/y/z => dim3[0]/[1]/[2] - // int64_t{dim3->x/y/z} => int64_t((*dim3)[0]/[1]/[2]) - // int64_t{dim3.x/y/z} => int64_t(dim3[0]/[1]/[2]) - auto Dim3MemberExpr = [&]() { - return memberExpr(anyOf( - has(implicitCastExpr(hasType(pointsTo(typedefDecl(hasName("dim3")))))), - hasObjectExpression(hasType(qualType(hasCanonicalType( - recordType(hasDeclaration(cxxRecordDecl(hasName("dim3")))))))))); - }; - MF.addMatcher(Dim3MemberExpr().bind("Dim3MemberExpr"), this); - MF.addMatcher( - cxxFunctionalCastExpr( - allOf(hasTypeLoc(loc(isSignedInteger())), - hasDescendant( - initListExpr(hasInit(0, ignoringImplicit(Dim3MemberExpr()))) - .bind("InitListExpr")))), - this); -} - -void Dim3MemberFieldsRule::runRule(const MatchFinder::MatchResult &Result) { - // E.g. - // dim3 *pd3, d3; - // pd3->z; d3.z; - // int64_t{d3.x}, int64_t{pd3->x}; - // will migrate to: - // (*pd3)[0]; d3[0]; - // sycl::range<3> *pd3, d3; - // int64_t(d3[0]), int64_t((*pd3)[0]); - ExprAnalysis EA; - if (const auto *ILE = getNodeAsType(Result, "InitListExpr")) { - EA.analyze(ILE); - } else if (const auto *ME = - getNodeAsType(Result, "Dim3MemberExpr")) { - EA.analyze(ME); - } else { - return; - } - emplaceTransformation(EA.getReplacement()); - EA.applyAllSubExprRepl(); -} - -REGISTER_RULE(Dim3MemberFieldsRule, PassKind::PK_Migration) - void DeviceInfoVarRule::registerMatcher(MatchFinder &MF) { MF.addMatcher( memberExpr( @@ -3389,7 +3206,8 @@ void EnumConstantRule::registerMatcher(MatchFinder &MF) { to(enumConstantDecl(anyOf( hasType(enumDecl(hasAnyName( "cudaComputeMode", "cudaMemcpyKind", "cudaMemoryAdvise", - "cudaDeviceAttr", "libraryPropertyType_t", "cudaDataType_t", + "cudaStreamCaptureStatus", "cudaDeviceAttr", + "libraryPropertyType_t", "cudaDataType_t", "cublasComputeType_t", "CUmem_advise_enum", "cufftType_t", "cufftType", "cudaMemoryType", "CUctx_flags_enum"))), matchesName("CUDNN_.*"), matchesName("CUSOLVER_.*"))))) @@ -3458,6 +3276,15 @@ void EnumConstantRule::runRule(const MatchFinder::MatchResult &Result) { EnumName == "cudaComputeModeExclusiveProcess") { handleComputeMode(EnumName, E); return; + } else if ((EnumName == "cudaStreamCaptureStatusActive" || + EnumName == "cudaStreamCaptureStatusNone") && + !DpctGlobalInfo::useExtGraph()) { + report(E->getBeginLoc(), Diagnostics::TRY_EXPERIMENTAL_FEATURE, false, + EnumName, "--use-experimental-features=graph"); + return; + } else if (EnumName == "cudaStreamCaptureStatusInvalidated") { + report(E->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, EnumName); + return; } else if (auto ET = dyn_cast(E->getType())) { if (auto ETD = ET->getDecl()) { auto EnumTypeName = ETD->getName().str(); @@ -8369,7 +8196,6 @@ void StreamAPICallRule::runRule(const MatchFinder::MatchResult &Result) { CE->getCalleeDecl()->getAsFunction()->getNameAsString(); emplaceTransformation(new ReplaceStmt(CE, ReplStr)); } else if (FuncName == "cudaStreamAttachMemAsync" || - FuncName == "cudaStreamIsCapturing" || FuncName == "cudaStreamQuery" || FuncName == "cudaDeviceGetStreamPriorityRange") { @@ -8817,6 +8643,10 @@ void DeviceFunctionDeclRule::runRule( if (!FuncInfo) return; + if (FD->isOverloadedOperator()) { + FuncInfo->setOverloadedOperatorKind(FD->getOverloadedOperator()); + } + if (FD->doesThisDeclarationHaveABody()) { size_t ParamCounter = 0; for (auto &Param : FD->parameters()) { @@ -11947,9 +11777,7 @@ void MathFunctionsRule::registerMatcher(MatchFinder &MF) { internal::Matcher( new internal::HasNameMatcher(MathFunctionsCallExpr)), anyOf(unless(hasDeclContext(namespaceDecl(anything()))), - hasDeclContext(namespaceDecl(hasName("std")))))), - unless(hasAncestor( - cxxConstructExpr(hasType(typedefDecl(hasName("dim3"))))))) + hasDeclContext(namespaceDecl(hasName("std"))))))) .bind("math"), this); @@ -12175,7 +12003,8 @@ void SyncThreadsRule::registerMatcher(MatchFinder &MF) { auto SyncAPI = [&]() { return hasAnyName("__syncthreads", "__threadfence_block", "__threadfence", "__threadfence_system", "__syncthreads_and", - "__syncthreads_or", "__syncthreads_count", "__syncwarp"); + "__syncthreads_or", "__syncthreads_count", "__syncwarp", + "__barrier_sync"); }; MF.addMatcher( callExpr(allOf(callee(functionDecl(SyncAPI())), parentStmt(), @@ -12209,7 +12038,7 @@ void SyncThreadsRule::runRule(const MatchFinder::MatchResult &Result) { std::string FuncName = CE->getDirectCallee()->getNameInfo().getName().getAsString(); - if (FuncName == "__syncthreads") { + if (FuncName == "__syncthreads" || FuncName == "__barrier_sync") { DpctGlobalInfo::registerNDItemUser(CE); const FunctionDecl *FD = nullptr; if (FD = getAssistNodeAsType(Result, "FuncDecl")) { @@ -12289,7 +12118,9 @@ void SyncThreadsRule::runRule(const MatchFinder::MatchResult &Result) { REGISTER_RULE(SyncThreadsRule, PassKind::PK_Analysis) void SyncThreadsMigrationRule::registerMatcher(MatchFinder &MF) { - auto SyncAPI = [&]() { return hasAnyName("__syncthreads"); }; + auto SyncAPI = [&]() { + return hasAnyName("__syncthreads", "__barrier_sync"); + }; MF.addMatcher( callExpr(allOf(callee(functionDecl(SyncAPI())), parentStmt(), hasAncestor(functionDecl(anyOf(hasAttr(attr::CUDADevice), @@ -12336,7 +12167,7 @@ void SyncThreadsMigrationRule::runRule(const MatchFinder::MatchResult &Result) { std::string FuncName = CE->getDirectCallee()->getNameInfo().getName().getAsString(); - if (FuncName == "__syncthreads") { + if (FuncName == "__syncthreads" || FuncName == "__barrier_sync") { BarrierFenceSpaceAnalyzer A; const FunctionTemplateDecl *FTD = FD->getDescribedFunctionTemplate(); if (FTD) { diff --git a/clang/lib/DPCT/ASTTraversal.h b/clang/lib/DPCT/ASTTraversal.h index 57ba49a649b4..6af58b26e34f 100644 --- a/clang/lib/DPCT/ASTTraversal.h +++ b/clang/lib/DPCT/ASTTraversal.h @@ -580,22 +580,6 @@ class VectorTypeOperatorRule static const char NamespaceName[]; }; -class ReplaceDim3CtorRule : public NamedMigrationRule { - ReplaceDim3Ctor *getReplaceDim3Modification( - const ast_matchers::MatchFinder::MatchResult &Result); - -public: - void registerMatcher(ast_matchers::MatchFinder &MF) override; - void runRule(const ast_matchers::MatchFinder::MatchResult &Result); -}; - -/// Migration rule for dim3 types member fields replacements. -class Dim3MemberFieldsRule : public NamedMigrationRule { -public: - void registerMatcher(ast_matchers::MatchFinder &MF) override; - void runRule(const ast_matchers::MatchFinder::MatchResult &Result); -}; - class CudaExtentRule : public NamedMigrationRule { CharSourceRange getConstructorRange(const CXXConstructExpr *Ctor); void replaceConstructor(const CXXConstructExpr *Ctor); diff --git a/clang/lib/DPCT/AnalysisInfo.cpp b/clang/lib/DPCT/AnalysisInfo.cpp index 8542cba7ce06..d041f24e170d 100644 --- a/clang/lib/DPCT/AnalysisInfo.cpp +++ b/clang/lib/DPCT/AnalysisInfo.cpp @@ -4076,6 +4076,12 @@ void CallFunctionExpr::buildCallExprInfo(const CallExpr *CE) { } auto Info = getFuncInfo(); if (Info) { + if ((Info->getOverloadedOperatorKind() != + OverloadedOperatorKind::OO_None) && + (Info->getOverloadedOperatorKind() != + OverloadedOperatorKind::OO_Call)) { + return; + } if (Info->ParamsNum == 0) { ExtraArgLoc = DpctGlobalInfo::getSourceManager().getFileOffset(CE->getRParenLoc()); diff --git a/clang/lib/DPCT/AnalysisInfo.h b/clang/lib/DPCT/AnalysisInfo.h index 4f883cb928cf..2a9a3df7879a 100644 --- a/clang/lib/DPCT/AnalysisInfo.h +++ b/clang/lib/DPCT/AnalysisInfo.h @@ -2648,6 +2648,10 @@ class DeviceFunctionInfo { bool IsAlwaysInlineDevFunc() { return AlwaysInlineDevFunc; } void setForceInlineDevFunc() { ForceInlineDevFunc = true; } bool IsForceInlineDevFunc() { return ForceInlineDevFunc; } + void setOverloadedOperatorKind(OverloadedOperatorKind Kind) { + OO_Kind = Kind; + } + OverloadedOperatorKind getOverloadedOperatorKind() { return OO_Kind; } void merge(std::shared_ptr Other); size_t ParamsNum; size_t NonDefaultParamNum; @@ -2693,6 +2697,7 @@ class DeviceFunctionInfo { bool IsKernelInvoked = false; bool CallGroupFunctionInControlFlow = false; bool HasCheckedCallGroupFunctionInControlFlow = false; + OverloadedOperatorKind OO_Kind = OverloadedOperatorKind::OO_None; }; class KernelCallExpr : public CallFunctionExpr { diff --git a/clang/lib/DPCT/BarrierFenceSpaceAnalyzer.cpp b/clang/lib/DPCT/BarrierFenceSpaceAnalyzer.cpp index 691a2c110a61..5d4029a7d818 100644 --- a/clang/lib/DPCT/BarrierFenceSpaceAnalyzer.cpp +++ b/clang/lib/DPCT/BarrierFenceSpaceAnalyzer.cpp @@ -58,7 +58,7 @@ bool clang::dpct::BarrierFenceSpaceAnalyzer::Visit(const CallExpr *CE) { for (const auto &Arg : CE->arguments()) DeviceFunctionCallArgs.insert(Arg); - if (FuncName == "__syncthreads") { + if (FuncName == "__syncthreads" || FuncName == "__barrier_sync") { SyncCallInfo SCI; SCI.Predecessors.push_back( SourceRange(FD->getBody()->getBeginLoc(), CE->getBeginLoc())); diff --git a/clang/lib/DPCT/CallExprRewriterCommon.h b/clang/lib/DPCT/CallExprRewriterCommon.h index caa98cf18cad..11fc6d7750d9 100644 --- a/clang/lib/DPCT/CallExprRewriterCommon.h +++ b/clang/lib/DPCT/CallExprRewriterCommon.h @@ -1235,7 +1235,8 @@ createMemberCallExprRewriterFactory( } template -inline std::shared_ptr +inline std::shared_ptr, CallExprRewriterFactoryBase>> createMemberCallExprRewriterFactory( const std::string &SourceName, BaseT BaseCreator, bool IsArrow, std::string MemberName, diff --git a/clang/lib/DPCT/ExprAnalysis.cpp b/clang/lib/DPCT/ExprAnalysis.cpp index 7310975ce944..347f931103e6 100644 --- a/clang/lib/DPCT/ExprAnalysis.cpp +++ b/clang/lib/DPCT/ExprAnalysis.cpp @@ -600,19 +600,6 @@ void ExprAnalysis::analyzeExpr(const InitListExpr *ILE) { if (QT->isPointerType()) { QT = QT->getPointeeType(); } - if (DpctGlobalInfo::getUnqualifiedTypeName( - QT->getCanonicalTypeUnqualified()) == "dim3") { - // Replace initializer list with explicit type conversion (e.g., - // 'int64_t{d3[2]}' to 'int64_t(d3[2])') to slience narrowing - // error (e.g., 'size_t -> int64_t') for - // non-constant-expression in int64_t initializer list. - // E.g., - // dim3 d3; int64_t{d3.x}; - // will be migratd to - // sycl::range<3> d3; int64_t(d3[2]); - addReplacement(ILE->getLBraceLoc(), "("); - addReplacement(ILE->getRBraceLoc(), ")"); - } } } } @@ -628,44 +615,11 @@ void ExprAnalysis::analyzeExpr(const CXXUnresolvedConstructExpr *Ctor) { } void ExprAnalysis::analyzeExpr(const CXXTemporaryObjectExpr *Temp) { - if (Temp->getConstructor()->getDeclName().getAsString() != "dim3") { - analyzeType(Temp->getTypeSourceInfo()->getTypeLoc()); - } + analyzeType(Temp->getTypeSourceInfo()->getTypeLoc()); analyzeExpr(static_cast(Temp)); } void ExprAnalysis::analyzeExpr(const CXXConstructExpr *Ctor) { - if (Ctor->getConstructor()->getDeclName().getAsString() == "dim3") { - std::string ArgsString; - llvm::raw_string_ostream OS(ArgsString); - DpctGlobalInfo::printCtadClass(OS, MapNames::getClNamespace() + "range", 3) - << "("; - ArgumentAnalysis A; - std::string ArgStr = ""; - for (auto Arg : Ctor->arguments()) { - A.analyze(Arg); - ArgStr = ", " + A.getReplacedString() + ArgStr; - } - ArgStr.replace(0, 2, ""); - OS << ArgStr << ")"; - OS.flush(); - - // Special handling for implicit ctor. - // #define GET_BLOCKS(a) a - // dim3 A = GET_BLOCKS(1); - // Result if using SM.getExpansionRange: - // sycl::range<3> A = sycl::range<3>(1, 1, GET_BLOCKS(1)); - // Result if using addReplacement(E): - // #define GET_BLOCKS(a) sycl::range<3>(1, 1, a) - // sycl::range<3> A = GET_BLOCKS(1); - if (Ctor->getParenOrBraceRange().isInvalid() && isOuterMostMacro(Ctor)) { - return addReplacement( - SM.getExpansionRange(Ctor->getBeginLoc()).getBegin(), - SM.getExpansionRange(Ctor->getEndLoc()).getEnd(), ArgsString); - } - addReplacement(Ctor, ArgsString); - return; - } for (auto It = Ctor->arg_begin(); It != Ctor->arg_end(); It++) { dispatch(*It); } @@ -754,63 +708,6 @@ void ExprAnalysis::analyzeExpr(const MemberExpr *ME) { } } } - } else if (BaseType == "dim3") { - if (ME->isArrow()) { - addReplacement(ME->getBase(), "(" + getDrefName(ME->getBase()) + ")"); - } - addReplacement( - ME->getOperatorLoc(), ME->getMemberLoc(), - MapNames::findReplacedName(MapNames::Dim3MemberNamesMap, - ME->getMemberNameInfo().getAsString())); - - auto needAddTypecast = [](const Expr *E) -> bool { - auto &Context = DpctGlobalInfo::getContext(); - clang::DynTypedNodeList Parents = Context.getParents(*E); - bool hasCast = false; - while (!Parents.empty()) { - auto &Cur = Parents[0]; - if (const auto ICE = Cur.get()) { - CastKind CK = ICE->getCastKind(); - if (CK == CastKind::CK_FloatingCast || - CK == CastKind::CK_IntegralCast) { - hasCast = true; - Parents = Context.getParents(Cur); - continue; - } - } else if (Cur.get() || Cur.get() || - Cur.get()) { - hasCast = true; - Parents = Context.getParents(Cur); - continue; - } else if (const auto CE = Cur.get()) { - if (hasCast) - return false; - auto *Callee = - dyn_cast(CE->getCallee()->IgnoreParenImpCasts()); - if (!Callee) - return false; - if (CE->getDirectCallee()->isTemplateInstantiation()) - return true; - if (!Callee->getQualifier()) - return false; - if (Callee->getQualifier()->getKind() != - NestedNameSpecifier::SpecifierKind::Namespace) - return false; - if (Callee->getQualifier()->getAsNamespace()->getNameAsString() != - "std") - return false; - if (Callee->getNameInfo().getAsString() == "max" || - Callee->getNameInfo().getAsString() == "min") - return true; - return false; - } - Parents = Context.getParents(Cur); - } - return false; - }; - if (needAddTypecast(ME)) { - addReplacement(ME->getBeginLoc(), 0, "(unsigned int)"); - } } else if (BaseType == "cudaDeviceProp") { auto MemberName = ME->getMemberNameInfo().getAsString(); @@ -934,11 +831,6 @@ inline void ExprAnalysis::analyzeExpr(const UnresolvedLookupExpr *ULE) { } void ExprAnalysis::analyzeExpr(const ExplicitCastExpr *Cast) { - if (Cast->getCastKind() == CastKind::CK_ConstructorConversion) { - if (DpctGlobalInfo::getUnqualifiedTypeName(Cast->getTypeAsWritten()) == - "dim3") - return dispatch(Cast->getSubExpr()); - } analyzeType(Cast->getTypeInfoAsWritten(), Cast); dispatch(Cast->getSubExprAsWritten()); } @@ -1031,7 +923,14 @@ void ExprAnalysis::analyzeExpr(const CallExpr *CE) { if (auto FD = DpctGlobalInfo::getParentFunction(CE)) { if (auto F = DpctGlobalInfo::getInstance().findDeviceFunctionDecl(FD)) { - if (auto C = F->getFuncInfo()->findCallee(CE)) { + auto FuncInfo = F->getFuncInfo(); + if ((FuncInfo->getOverloadedOperatorKind() != + OverloadedOperatorKind::OO_None) && + (FuncInfo->getOverloadedOperatorKind() != + OverloadedOperatorKind::OO_Call)) { + return; + } + if (auto C = FuncInfo->findCallee(CE)) { auto Extra = C->getExtraArguments(); if (Extra.empty()) return; @@ -1305,10 +1204,6 @@ void ExprAnalysis::analyzeDecltypeType(DecltypeTypeLoc TL) { return; auto Name = getNestedNameSpecifierString(Qualifier); auto Range = getDefinitionRange(SR.getBegin(), SR.getEnd()); - // Types like 'dim3::x' should be migrated to 'size_t'. - if (Name == "dim3::") { - addReplacement(Range.getBegin(), Range.getEnd(), "size_t"); - } Name.resize(Name.length() - 2); // Remove the "::". if (MapNames::SupportedVectorTypes.count(Name)) { auto ReplacedStr = diff --git a/clang/lib/DPCT/GroupFunctionAnalyzer.cpp b/clang/lib/DPCT/GroupFunctionAnalyzer.cpp index ba6db4ea76e3..00f66fef2b36 100644 --- a/clang/lib/DPCT/GroupFunctionAnalyzer.cpp +++ b/clang/lib/DPCT/GroupFunctionAnalyzer.cpp @@ -86,7 +86,9 @@ bool GroupFunctionCallInControlFlowAnalyzer::isSyncThreadsCallExpr( const CallExpr *CE) const { const auto *FD = CE->getDirectCallee(); - return FD && !isa(FD) && FD->getName() == "__syncthreads" && + return FD && !isa(FD) && + (FD->getName() == "__syncthreads" || + FD->getName() == "__barrier_sync") && (FD->hasAttr() || FD->hasAttr()); } diff --git a/clang/lib/DPCT/MapNames.cpp b/clang/lib/DPCT/MapNames.cpp index 5bc8b25945af..3e245414b42f 100644 --- a/clang/lib/DPCT/MapNames.cpp +++ b/clang/lib/DPCT/MapNames.cpp @@ -111,7 +111,7 @@ void MapNames::setExplicitNamespaceMap() { {"cudaPointerAttributes", std::make_shared(getDpctNamespace() + "pointer_attributes", HelperFeatureEnum::device_ext)}, - {"dim3", std::make_shared(getClNamespace() + "range<3>")}, + {"dim3", std::make_shared(getDpctNamespace() + "dim3")}, {"int2", std::make_shared(getClNamespace() + "int2")}, {"double2", std::make_shared(getClNamespace() + "double2")}, {"__half", std::make_shared(getClNamespace() + "half")}, @@ -364,6 +364,11 @@ void MapNames::setExplicitNamespaceMap() { {"cusparseHandle_t", std::make_shared( getDpctNamespace() + "sparse::descriptor_ptr")}, {"cudaMemoryAdvise", std::make_shared("int")}, + {"cudaStreamCaptureStatus", + std::make_shared( + DpctGlobalInfo::useExtGraph() + ? getClNamespace() + "ext::oneapi::experimental::queue_state" + : "cudaStreamCaptureStatus")}, {"CUmem_advise", std::make_shared("int")}, {"cudaPos", std::make_shared(getClNamespace() + "id<3>")}, {"cudaExtent", @@ -373,8 +378,12 @@ void MapNames::setExplicitNamespaceMap() { HelperFeatureEnum::device_ext)}, {"cudaMemcpyKind", std::make_shared(getDpctNamespace() + "memcpy_direction")}, - {"CUDA_ARRAY_DESCRIPTOR", std::make_shared( - getDpctNamespace() + "image_matrix_desc")}, + {"CUDA_ARRAY_DESCRIPTOR", + std::make_shared( + DpctGlobalInfo::useExtBindlessImages() + ? getClNamespace() + + "ext::oneapi::experimental::image_descriptor" + : getDpctNamespace() + "image_matrix_desc")}, {"cudaMemcpy3DParms", std::make_shared(getDpctNamespace() + "memcpy_parameter")}, {"CUDA_MEMCPY3D", @@ -402,11 +411,17 @@ void MapNames::setExplicitNamespaceMap() { HelperFeatureEnum::device_ext)}, {"CUdevice", std::make_shared("int")}, {"CUarray_st", - std::make_shared(getDpctNamespace() + "image_matrix", - HelperFeatureEnum::device_ext)}, + std::make_shared( + DpctGlobalInfo::useExtBindlessImages() + ? getDpctNamespace() + "experimental::image_mem_wrapper" + : getDpctNamespace() + "image_matrix", + HelperFeatureEnum::device_ext)}, {"CUarray", - std::make_shared(getDpctNamespace() + "image_matrix_p", - HelperFeatureEnum::device_ext)}, + std::make_shared( + DpctGlobalInfo::useExtBindlessImages() + ? getDpctNamespace() + "experimental::image_mem_wrapper_ptr" + : getDpctNamespace() + "image_matrix_p", + HelperFeatureEnum::device_ext)}, {"CUarray_format", std::make_shared(getClNamespace() + "image_channel_type")}, {"CUarray_format_enum", @@ -1067,6 +1082,21 @@ void MapNames::setExplicitNamespaceMap() { std::make_shared("0")}, {"cudaMemAdviseSetAccessedBy", std::make_shared("0")}, {"cudaMemAdviseUnsetAccessedBy", std::make_shared("0")}, + // enum cudaStreamCaptureStatus + {"cudaStreamCaptureStatusNone", + std::make_shared( + DpctGlobalInfo::useExtGraph() + ? getClNamespace() + + "ext::oneapi::experimental::queue_state::executing" + : "cudaStreamCaptureStatusNone")}, + {"cudaStreamCaptureStatusActive", + std::make_shared( + DpctGlobalInfo::useExtGraph() + ? getClNamespace() + + "ext::oneapi::experimental::queue_state::recording" + : "cudaStreamCaptureStatusActive")}, + {"cudaStreamCaptureStatusInvalidated", + std::make_shared("cudaStreamCaptureStatusInvalidated")}, // enum CUmem_advise_enum {"CU_MEM_ADVISE_SET_READ_MOSTLY", std::make_shared("0")}, {"CU_MEM_ADVISE_UNSET_READ_MOSTLY", std::make_shared("0")}, @@ -4309,7 +4339,7 @@ const MapNames::MapTy MemoryDataTypeRule::ArrayDescMemberNames{ {"Width", "width"}, {"Height", "height"}, {"Format", "channel_type"}, - {"NumChannels", "channel_num"}}; + {"NumChannels", "num_channels"}}; const MapNames::MapTy MemoryDataTypeRule::DirectReplMemberNames{ // cudaMemcpy3DParms fields. diff --git a/clang/lib/DPCT/TextModification.cpp b/clang/lib/DPCT/TextModification.cpp index 0010962e66da..a11c402a744c 100644 --- a/clang/lib/DPCT/TextModification.cpp +++ b/clang/lib/DPCT/TextModification.cpp @@ -523,135 +523,6 @@ ReplaceInclude::getReplacement(const ASTContext &Context) const { this); } -void ReplaceDim3Ctor::setRange() { - auto &SM = DpctGlobalInfo::getSourceManager(); - if (isDecl) { - SourceRange SR = Ctor->getParenOrBraceRange(); - if (SR.isInvalid()) { - // convert to spelling location if the dim3 constructor is in a macro - // otherwise, Lexer::getLocForEndOfToken returns invalid source location - auto CtorLoc = Ctor->getLocation().isMacroID() - ? SM.getSpellingLoc(Ctor->getLocation()) - : Ctor->getLocation(); - // dim3 a; - // MACRO(... dim3 a; ...) - auto CtorEndLoc = Lexer::getLocForEndOfToken( - CtorLoc, 0, SM, DpctGlobalInfo::getContext().getLangOpts()); - CSR = CharSourceRange(SourceRange(CtorEndLoc, CtorEndLoc), false); - } else { - SourceRange SR1 = - SourceRange(SR.getBegin().getLocWithOffset(1), SR.getEnd()); - CSR = CharSourceRange(SR1, false); - } - } else { - // adjust the statement to replace if top-level constructor includes the - // variable being defined - const Stmt *S = getReplaceStmt(Ctor); - if (!S) { - return; - } - if (S->getBeginLoc().isMacroID() && !isOuterMostMacro(S)) { - auto Range = getDefinitionRange(S->getBeginLoc(), S->getEndLoc()); - auto Begin = Range.getBegin(); - auto End = Range.getEnd(); - End = End.getLocWithOffset(Lexer::MeasureTokenLength( - End, SM, dpct::DpctGlobalInfo::getContext().getLangOpts())); - CSR = CharSourceRange::getTokenRange(Begin, End); - } else { - // Use getStmtExpansionSourceRange(S) to support cases like - // dim3 a = MACRO; - auto Range = getStmtExpansionSourceRange(S); - auto Begin = Range.getBegin(); - auto End = Range.getEnd(); - CSR = CharSourceRange::getTokenRange( - Begin, - End.getLocWithOffset(Lexer::MeasureTokenLength( - End, SM, dpct::DpctGlobalInfo::getContext().getLangOpts()))); - } - } -} - -ReplaceInclude *ReplaceDim3Ctor::getEmpty() { - return new ReplaceInclude(CSR, ""); -} - -// Strips possible Materialize and Cast operators from CXXConstructor -const CXXConstructExpr *ReplaceDim3Ctor::getConstructExpr(const Expr *E) { - if (auto C = dyn_cast_or_null(E)) { - return C; - } else if (isa(E)) { - return getConstructExpr( - dyn_cast(E)->getSubExpr()); - } else if (isa(E)) { - return getConstructExpr(dyn_cast(E)->getSubExpr()); - } else { - return nullptr; - } -} - -// Returns the full replacement string for the CXXConstructorExpr -std::string -ReplaceDim3Ctor::getSyclRangeCtor(const CXXConstructExpr *Ctor) const { - ExprAnalysis Analysis(Ctor); - return Analysis.getReplacedString(); -} - -const Stmt *ReplaceDim3Ctor::getReplaceStmt(const Stmt *S) const { - if (auto Ctor = dyn_cast_or_null(S)) { - if (Ctor->getNumArgs() == 1) { - return getConstructExpr(Ctor->getArg(0)); - } - } - return S; -} - -std::string ReplaceDim3Ctor::getReplaceString() const { - if (isDecl) { - // Get the new parameter list for the replaced constructor, without the - // parens - std::string ReplacedString; - llvm::raw_string_ostream OS(ReplacedString); - ArgumentAnalysis AA; - std::string ArgStr = ""; - for (auto Arg : Ctor->arguments()) { - AA.analyze(Arg); - ArgStr = ", " + AA.getReplacedString() + ArgStr; - } - ArgStr.replace(0, 2, ""); - OS << ArgStr; - OS.flush(); - if (Ctor->getParenOrBraceRange().isInvalid()) { - // dim3 = a; - ReplacedString = "(" + ReplacedString + ")"; - } - return ReplacedString; - } else { - std::string S; - if (FinalCtor) { - S = getSyclRangeCtor(FinalCtor); - } else { - S = getSyclRangeCtor(Ctor); - } - return S; - } -} - -std::shared_ptr -ReplaceDim3Ctor::getReplacement(const ASTContext &Context) const { - if (this->isIgnoreTM()) - return nullptr; - // Use getDefinitionRange in general cases, - // For cases like dim3 a = MACRO; - // CSR is already set to the expansion range. - auto &SM = dpct::DpctGlobalInfo::getSourceManager(); - ReplacementString = getReplaceString(); - auto Range = getDefinitionRange(CSR.getBegin(), CSR.getEnd()); - auto Length = SM.getDecomposedLoc(Range.getEnd()).second - - SM.getDecomposedLoc(Range.getBegin()).second; - return std::make_shared(SM, Range.getBegin(), Length, - getReplaceString(), this); -} - std::shared_ptr InsertComment::getReplacement(const ASTContext &Context) const { if (this->isIgnoreTM()) @@ -930,14 +801,6 @@ void ReplaceInclude::print(llvm::raw_ostream &OS, ASTContext &Context, printReplacement(OS, T); } -void ReplaceDim3Ctor::print(llvm::raw_ostream &OS, ASTContext &Context, - const bool PrintDetail) const { - printHeader(OS, getID(), PrintDetail ? getParentRuleName() : StringRef()); - printLocation(OS, CSR.getBegin(), Context, PrintDetail); - Ctor->printPretty(OS, nullptr, PrintingPolicy(Context.getLangOpts())); - printReplacement(OS, ReplacementString); -} - void InsertComment::print(llvm::raw_ostream &OS, ASTContext &Context, const bool PrintDetail) const { printHeader(OS, getID(), PrintDetail ? getParentRuleName() : StringRef()); diff --git a/clang/lib/DPCT/TextModification.h b/clang/lib/DPCT/TextModification.h index 6549bc8be1a4..2435b1fe805f 100644 --- a/clang/lib/DPCT/TextModification.h +++ b/clang/lib/DPCT/TextModification.h @@ -529,39 +529,6 @@ class ReplaceInclude : public TextModification { const bool PrintDetail = true) const override; }; -/// Replace Dim3 constructors -class ReplaceDim3Ctor : public TextModification { - bool isDecl; - const CXXConstructExpr *Ctor; - const CXXConstructExpr *FinalCtor; - CharSourceRange CSR; - mutable std::string ReplacementString; - - void setRange(); - const Stmt *getReplaceStmt(const Stmt *S) const; - std::string getSyclRangeCtor(const CXXConstructExpr *Ctor) const; - std::string getReplaceString() const; - -public: - ReplaceDim3Ctor(const CXXConstructExpr *_Ctor, bool _isDecl = false) - : TextModification(TMID::ReplaceDim3Ctor, G2), isDecl(_isDecl), - Ctor(_Ctor), FinalCtor(nullptr) { - setRange(); - } - ReplaceDim3Ctor(const CXXConstructExpr *_Ctor, - const CXXConstructExpr *_FinalCtor) - : TextModification(TMID::ReplaceDim3Ctor, G2), isDecl(false), Ctor(_Ctor), - FinalCtor(_FinalCtor) { - setRange(); - } - static const CXXConstructExpr *getConstructExpr(const Expr *E); - ReplaceInclude *getEmpty(); - std::shared_ptr - getReplacement(const ASTContext &Context) const override; - void print(llvm::raw_ostream &OS, ASTContext &Context, - const bool PrintDetail = true) const override; -}; - class InsertBeforeStmt : public TextModification { const Stmt *S; std::string T; diff --git a/clang/runtime/dpct-rt/include/dpct/bindless_images.hpp b/clang/runtime/dpct-rt/include/dpct/bindless_images.hpp index df006be25702..9b40e7ecdefa 100644 --- a/clang/runtime/dpct-rt/include/dpct/bindless_images.hpp +++ b/clang/runtime/dpct-rt/include/dpct/bindless_images.hpp @@ -55,6 +55,18 @@ class image_mem_wrapper { image_mem_wrapper(image_channel channel, size_t width, size_t height = 0, size_t depth = 0) : image_mem_wrapper(channel, {width, height, depth}) {} + /// Create bindless image memory wrapper. + /// \param [in] desc The image descriptor used to create bindless image + image_mem_wrapper( + const sycl::ext::oneapi::experimental::image_descriptor *desc) + : _desc(*desc) { + _channel.set_channel_type(desc->channel_type); +#if (__SYCL_COMPILER_VERSION && __SYCL_COMPILER_VERSION >= 20240725) + _channel.set_channel_num(desc->num_channels); +#endif + auto q = get_default_queue(); + _handle = alloc_image_mem(_desc, q); + } image_mem_wrapper(const image_mem_wrapper &) = delete; image_mem_wrapper &operator=(const image_mem_wrapper &) = delete; /// Destroy bindless image memory wrapper. @@ -99,7 +111,7 @@ class image_mem_wrapper { const sycl::ext::oneapi::experimental::image_mem_handle &handle) : _channel(channel), _desc(desc), _handle(handle) {} - const image_channel _channel; + image_channel _channel; const sycl::ext::oneapi::experimental::image_descriptor _desc; sycl::ext::oneapi::experimental::image_mem_handle _handle; image_mem_wrapper *_sub_wrappers{nullptr}; @@ -190,11 +202,11 @@ dpct_memcpy(sycl::ext::oneapi::experimental::image_mem_handle src, dest_extend, copy_extend); } -static inline std::vector -dpct_memcpy(sycl::ext::oneapi::experimental::image_mem_handle src, - const sycl::ext::oneapi::experimental::image_descriptor &desc_src, - size_t w_offset_src, size_t h_offset_src, void *dest, size_t s, - sycl::queue q) { +static inline std::vector dpct_memcpy_to_host( + sycl::ext::oneapi::experimental::image_mem_handle src, + const sycl::ext::oneapi::experimental::image_descriptor &desc_src, + size_t w_offset_src, size_t h_offset_src, void *dest_host_ptr, size_t s, + sycl::queue q) { std::vector event_list; const auto ele_size = get_ele_size(desc_src); const auto w = desc_src.width * ele_size; @@ -206,9 +218,9 @@ dpct_memcpy(sycl::ext::oneapi::experimental::image_mem_handle src, const auto dest_extend = sycl::range<3>(0, 0, 0); const auto copy_extend = sycl::range<3>((w - w_offset_src) / ele_size, 1, 0); - event_list.push_back(q.ext_oneapi_copy(src, src_offset, desc_src, dest, - dest_offset, dest_extend, - copy_extend)); + event_list.push_back(q.ext_oneapi_copy(src, src_offset, desc_src, + dest_host_ptr, dest_offset, + dest_extend, copy_extend)); offset_dest += w - w_offset_src; w_offset_src = 0; ++h_offset_src; @@ -218,11 +230,32 @@ dpct_memcpy(sycl::ext::oneapi::experimental::image_mem_handle src, const auto dest_offset = sycl::range<3>(offset_dest / ele_size, 0, 0); const auto dest_extend = sycl::range<3>(0, 0, 0); const auto copy_extend = sycl::range<3>((s - w_offset_src) / ele_size, 1, 0); - event_list.push_back(q.ext_oneapi_copy( - src, src_offset, desc_src, dest, dest_offset, dest_extend, copy_extend)); + event_list.push_back(q.ext_oneapi_copy(src, src_offset, desc_src, + dest_host_ptr, dest_offset, + dest_extend, copy_extend)); return event_list; } +static inline std::vector +dpct_memcpy(sycl::ext::oneapi::experimental::image_mem_handle src, + const sycl::ext::oneapi::experimental::image_descriptor &desc_src, + size_t w_offset_src, size_t h_offset_src, void *dest, size_t s, + sycl::queue q) { + if (dpct::detail::get_pointer_attribute(q, dest) == + dpct::detail::pointer_access_attribute::device_only) { + std::vector event_list; + dpct::detail::host_buffer buf(s, q, event_list); + auto copy_events = dpct_memcpy_to_host(src, desc_src, w_offset_src, + h_offset_src, buf.get_ptr(), s, q); + event_list.push_back(dpct::detail::dpct_memcpy( + q, dest, buf.get_ptr(), s, memcpy_direction::host_to_device, + copy_events)); + return event_list; + } + return dpct_memcpy_to_host(src, desc_src, w_offset_src, h_offset_src, dest, s, + q); +} + static inline sycl::event dpct_memcpy(const void *src, sycl::ext::oneapi::experimental::image_mem_handle dest, @@ -240,12 +273,11 @@ dpct_memcpy(const void *src, dest, dest_offset, desc_dest, copy_extend); } -static inline std::vector -dpct_memcpy(const void *src, - sycl::ext::oneapi::experimental::image_mem_handle dest, - const sycl::ext::oneapi::experimental::image_descriptor &desc_dest, - size_t w_offset_dest, size_t h_offset_dest, size_t s, - sycl::queue q = get_default_queue()) { +static inline std::vector dpct_memcpy_from_host( + const void *src_host_ptr, + sycl::ext::oneapi::experimental::image_mem_handle dest, + const sycl::ext::oneapi::experimental::image_descriptor &desc_dest, + size_t w_offset_dest, size_t h_offset_dest, size_t s, sycl::queue q) { std::vector event_list; const auto ele_size = get_ele_size(desc_dest); const auto w = desc_dest.width * ele_size; @@ -258,9 +290,9 @@ dpct_memcpy(const void *src, const auto copy_extend = sycl::range<3>((w - w_offset_dest) / ele_size, 1, 0); // TODO: Remove const_cast after refining the signature of ext_oneapi_copy. - event_list.push_back(q.ext_oneapi_copy(const_cast(src), src_offset, - src_extend, dest, dest_offset, - desc_dest, copy_extend)); + event_list.push_back(q.ext_oneapi_copy( + const_cast(src_host_ptr), src_offset, src_extend, dest, + dest_offset, desc_dest, copy_extend)); offset_src += w - w_offset_dest; w_offset_dest = 0; ++h_offset_dest; @@ -272,12 +304,31 @@ dpct_memcpy(const void *src, const auto copy_extend = sycl::range<3>((s - offset_src - w_offset_dest) / ele_size, 1, 0); // TODO: Remove const_cast after refining the signature of ext_oneapi_copy. - event_list.push_back(q.ext_oneapi_copy(const_cast(src), src_offset, - src_extend, dest, dest_offset, - desc_dest, copy_extend)); + event_list.push_back(q.ext_oneapi_copy(const_cast(src_host_ptr), + src_offset, src_extend, dest, + dest_offset, desc_dest, copy_extend)); return event_list; } +static inline std::vector dpct_memcpy( + const void *src, sycl::ext::oneapi::experimental::image_mem_handle dest, + const sycl::ext::oneapi::experimental::image_descriptor &desc_dest, + size_t w_offset_dest, size_t h_offset_dest, size_t s, sycl::queue q) { + if (dpct::detail::get_pointer_attribute(q, src) == + dpct::detail::pointer_access_attribute::device_only) { + std::vector event_list; + dpct::detail::host_buffer buf(s, q, event_list); + event_list.push_back(dpct::detail::dpct_memcpy( + q, buf.get_ptr(), src, s, memcpy_direction::device_to_host)); + auto copy_events = dpct_memcpy_from_host( + buf.get_ptr(), dest, desc_dest, w_offset_dest, h_offset_dest, s, q); + event_list.insert(event_list.end(), copy_events.begin(), copy_events.end()); + return event_list; + } + return dpct_memcpy_from_host(src, dest, desc_dest, w_offset_dest, + h_offset_dest, s, q); +} + static inline sycl::event dpct_memcpy(const image_mem_wrapper *src, const sycl::id<3> &src_id, pitched_data &dest, const sycl::id<3> &dest_id, diff --git a/clang/runtime/dpct-rt/include/dpct/image.hpp b/clang/runtime/dpct-rt/include/dpct/image.hpp index dc7bd291ace5..9daf8965fbd9 100644 --- a/clang/runtime/dpct-rt/include/dpct/image.hpp +++ b/clang/runtime/dpct-rt/include/dpct/image.hpp @@ -144,7 +144,7 @@ struct image_matrix_desc { size_t height = 0; sycl::image_channel_type channel_type = sycl::image_channel_type::signed_int32; - unsigned channel_num = 0; + unsigned num_channels = 0; }; /// Image channel info, include channel number, order, data width and type @@ -340,7 +340,7 @@ class image_matrix { _host_data = std::malloc(_range[0] * _range[1] * _channel.get_total_size()); } image_matrix(const image_matrix_desc *desc) - : image_matrix(desc->channel_type, desc->channel_num, desc->width, + : image_matrix(desc->channel_type, desc->num_channels, desc->width, desc->height) {} /// Construct a new image class with the matrix data. @@ -641,7 +641,7 @@ class image_wrapper_base { void attach(const image_matrix_desc *desc, device_ptr ptr, size_t pitch) { detach(); image_channel channel; - channel.set_channel_num(desc->channel_num); + channel.set_channel_num(desc->num_channels); channel.set_channel_type(desc->channel_type); image_wrapper_base::set_data( image_data(ptr, desc->width, desc->height, pitch, channel)); diff --git a/clang/runtime/dpct-rt/include/dpct/util.hpp b/clang/runtime/dpct-rt/include/dpct/util.hpp index 74c979499d19..e2e21ef42a7e 100644 --- a/clang/runtime/dpct-rt/include/dpct/util.hpp +++ b/clang/runtime/dpct-rt/include/dpct/util.hpp @@ -31,6 +31,22 @@ T __spirv_GroupNonUniformShuffleUp(__spv::Scope::Flag, T, unsigned) noexcept; #endif namespace dpct { +/// dim3 is used to store 3 component dimensions. +class dim3 { +public: + unsigned x, y, z; + + constexpr dim3(unsigned x = 1, unsigned y = 1, unsigned z = 1) + : x(x), y(y), z(z) {} + + dim3(const sycl::id<3> &r) : dim3(r[2], r[1], r[0]) {} + + operator sycl::range<3>() const { return sycl::range<3>(z, y, x); } +}; + +inline dim3 operator*(const dim3 &a, const dim3 &b) { + return dim3{a.x * b.x, a.y * b.y, a.z * b.z}; +} namespace detail { diff --git a/clang/test/dpct/checkFormatAll.cu b/clang/test/dpct/checkFormatAll.cu index f5f870a4b061..f84b301ae225 100644 --- a/clang/test/dpct/checkFormatAll.cu +++ b/clang/test/dpct/checkFormatAll.cu @@ -33,8 +33,8 @@ __global__ void testKernelPtr(const int *L, const int *M, int N) { //CHECK-NEXT: sycl::device dev_ct1; //CHECK-NEXT: sycl::queue q_ct1(dev_ct1, //CHECK-NEXT: sycl::property_list{sycl::property::queue::in_order()}); -//CHECK-NEXT: sycl::range<3> griddim = sycl::range<3>(1, 1, 2); -//CHECK-NEXT: sycl::range<3> threaddim = sycl::range<3>(1, 1, 32); +//CHECK-NEXT: dpct::dim3 griddim = 2; +//CHECK-NEXT: dpct::dim3 threaddim = 32; //CHECK-NEXT: int *karg1, *karg2; //CHECK-NEXT: karg1 = sycl::malloc_device(32, q_ct1); //CHECK-NEXT: karg2 = sycl::malloc_device(32, q_ct1); diff --git a/clang/test/dpct/checkFormatMigrated.cu b/clang/test/dpct/checkFormatMigrated.cu index 730f565dfcbe..c880b2af0d79 100644 --- a/clang/test/dpct/checkFormatMigrated.cu +++ b/clang/test/dpct/checkFormatMigrated.cu @@ -34,8 +34,8 @@ __global__ void testKernelPtr(const int *L, const int *M, int N) { //CHECK:int main() { //CHECK-NEXT: dpct::device_ext &dev_ct1 = dpct::get_current_device(); //CHECK-NEXT: sycl::queue &q_ct1 = dev_ct1.in_order_queue(); -//CHECK-NEXT: sycl::range<3> griddim = sycl::range<3>(1, 1, 2); -//CHECK-NEXT: sycl::range<3> threaddim = sycl::range<3>(1, 1, 32); +//CHECK-NEXT: dpct::dim3 griddim = 2; +//CHECK-NEXT: dpct::dim3 threaddim = 32; //CHECK-NEXT: int *karg1, *karg2; //CHECK-NEXT: karg1 = sycl::malloc_device(32, q_ct1); //CHECK-NEXT: karg2 = sycl::malloc_device(32, q_ct1); diff --git a/clang/test/dpct/cmake_migration/case_001/expected.txt b/clang/test/dpct/cmake_migration/case_001/expected.txt index 0725a51a18e4..e0f2fd9df2f0 100644 --- a/clang/test/dpct/cmake_migration/case_001/expected.txt +++ b/clang/test/dpct/cmake_migration/case_001/expected.txt @@ -53,3 +53,6 @@ if(FOO_OPENMP) endif() endif() endif() + +if(SYCL_COMPILER_EXECUTABLE) +endif() diff --git a/clang/test/dpct/cmake_migration/case_001/input.cmake b/clang/test/dpct/cmake_migration/case_001/input.cmake index bb744b98827c..8ff57289bc86 100644 --- a/clang/test/dpct/cmake_migration/case_001/input.cmake +++ b/clang/test/dpct/cmake_migration/case_001/input.cmake @@ -43,3 +43,6 @@ if(FOO_OPENMP) endif() endif() endif() + +if(CUDA_NVCC_EXECUTABLE) +endif() diff --git a/clang/test/dpct/cmake_migration/case_055/expected.txt b/clang/test/dpct/cmake_migration/case_055/expected.txt index af0eab32d899..a07e2341b50c 100644 --- a/clang/test/dpct/cmake_migration/case_055/expected.txt +++ b/clang/test/dpct/cmake_migration/case_055/expected.txt @@ -8,3 +8,9 @@ set(SYCL_HOST_FLAGS "--extra-warnings -Wdeprecated") set (SYCL_HOST_FLAGS "--extra-warnings -Wdeprecated") set(SYCL_HOST_FLAGS "${host_flags}") + +list(APPEND CMAKE_SYCL_FLAGS "-Wno-float-conversion") +list(APPEND CMAKE_SYCL_FLAGS "-fno-strict-aliasing") +list(APPEND CMAKE_SYCL_FLAGS "") +list(APPEND CMAKE_SYCL_FLAGS "") +list(APPEND CMAKE_SYCL_FLAGS "") diff --git a/clang/test/dpct/cmake_migration/case_055/input.cmake b/clang/test/dpct/cmake_migration/case_055/input.cmake index 2f496e539995..8556305918a1 100644 --- a/clang/test/dpct/cmake_migration/case_055/input.cmake +++ b/clang/test/dpct/cmake_migration/case_055/input.cmake @@ -8,3 +8,9 @@ set(CUDA_HOST_FLAGS "--extra-warnings -Wdeprecated") set (CUDA_HOST_FLAGS "--extra-warnings -Wdeprecated") set(CUDA_HOST_FLAGS "${host_flags}") + +list(APPEND CMAKE_CUDA_FLAGS "-Xcompiler=-Wno-float-conversion") +list(APPEND CMAKE_CUDA_FLAGS "-Xcompiler=-fno-strict-aliasing") +list(APPEND CMAKE_CUDA_FLAGS "-Xcudafe=--diag_suppress=unrecognized_gcc_pragma") +list(APPEND CMAKE_CUDA_FLAGS "--extended-lambda") +list(APPEND CMAKE_CUDA_FLAGS "--expt-relaxed-constexpr") diff --git a/clang/test/dpct/compat_with_clang.cu b/clang/test/dpct/compat_with_clang.cu index bdf14197a321..0cfbdfb2eb44 100644 --- a/clang/test/dpct/compat_with_clang.cu +++ b/clang/test/dpct/compat_with_clang.cu @@ -14,7 +14,7 @@ __device__ inline void foo1(__half2 *array, __half a) { } // CHECK: void foo2(int a, int b) { -// CHECK-NEXT: sycl::range<3> block{1, 1, dpct::min(512, uint32_t(a * b))}; +// CHECK-NEXT: dpct::dim3 block{dpct::min(512, uint32_t(a * b))}; // CHECK-NEXT: } void foo2(int a, int b) { dim3 block{min(512, uint32_t(a * b))}; diff --git a/clang/test/dpct/cooperative_groups2.cu b/clang/test/dpct/cooperative_groups2.cu index b60fc4efabf2..851c56b2d688 100644 --- a/clang/test/dpct/cooperative_groups2.cu +++ b/clang/test/dpct/cooperative_groups2.cu @@ -17,8 +17,8 @@ __device__ void foo() { // CHECK: auto block = item_ct1.get_group(); auto block = cg::this_thread_block(); - // CHECK: auto group_x = block.get_group_id()[2]; - // CHECK-NEXT: auto thread_x = block.get_local_id()[2]; + // CHECK: auto group_x = dpct::dim3(block.get_group_id()).x; + // CHECK-NEXT: auto thread_x = dpct::dim3(block.get_local_id()).x; auto group_x = block.group_index().x; auto thread_x = block.thread_index().x; diff --git a/clang/test/dpct/cooperative_groups_thread_group.cu b/clang/test/dpct/cooperative_groups_thread_group.cu index 8ab84d4ea5dd..d4148da5794f 100644 --- a/clang/test/dpct/cooperative_groups_thread_group.cu +++ b/clang/test/dpct/cooperative_groups_thread_group.cu @@ -23,13 +23,13 @@ __device__ void testThreadGroup(cg::thread_group g) { g.size(); auto block = cg::this_thread_block(); - // CHECK: block.get_local_id(); + // CHECK: dpct::dim3(block.get_local_id()); block.thread_index(); } __global__ void kernelFunc() { auto block = cg::this_thread_block(); - // CHECK: block.get_local_id(); + // CHECK: dpct::dim3(block.get_local_id()); block.thread_index(); // CHECK: auto threadBlockGroup = sycl::ext::oneapi::experimental::this_group<3>(); auto threadBlockGroup = cg::this_thread_block(); diff --git a/clang/test/dpct/cooperative_groups_thread_group_no_free_query.cu b/clang/test/dpct/cooperative_groups_thread_group_no_free_query.cu index 1bdb01c6e256..c28883535bc8 100644 --- a/clang/test/dpct/cooperative_groups_thread_group_no_free_query.cu +++ b/clang/test/dpct/cooperative_groups_thread_group_no_free_query.cu @@ -25,13 +25,13 @@ __device__ void testThreadGroup(cg::thread_group g) { g.size(); auto block = cg::this_thread_block(); - // CHECK: block.get_local_id(); + // CHECK: dpct::dim3(block.get_local_id()); block.thread_index(); } __global__ void kernelFunc() { auto block = cg::this_thread_block(); - // CHECK: block.get_local_id(); + // CHECK: dpct::dim3(block.get_local_id()); block.thread_index(); // CHECK: auto threadBlockGroup = item_ct1.get_group(); auto threadBlockGroup = cg::this_thread_block(); diff --git a/clang/test/dpct/ctad.cu b/clang/test/dpct/ctad.cu index 7da089990679..c5b1abd2ff61 100644 --- a/clang/test/dpct/ctad.cu +++ b/clang/test/dpct/ctad.cu @@ -8,7 +8,7 @@ #define NUM 23 -// CHECK: void func(sycl::range<3> a, sycl::range<3> b, sycl::range<3> c, sycl::range<3> d) { +// CHECK: void func(dpct::dim3 a, dpct::dim3 b, dpct::dim3 c, dpct::dim3 d) { void func(dim3 a, dim3 b, dim3 c, dim3 d) { } @@ -20,7 +20,7 @@ int main() { // CHECK: dpct::device_ext &dev_ct1 = dpct::get_current_device(); // CHECK-NEXT: sycl::queue &q_ct1 = dev_ct1.out_of_order_queue(); // range default constructor does the right thing. - // CHECK: sycl::range deflt(1, 1, 1); + // CHECK: dpct::dim3 deflt; dim3 deflt; // CHECK: sycl::range deflt_1{0, 0, 0}; @@ -28,12 +28,12 @@ int main() { cudaExtent deflt_1; cudaPos deflt_2; - // CHECK: sycl::range round1_1(1, 1, NUM); + // CHECK: dpct::dim3 round1_1(NUM); dim3 round1_1(NUM); cudaExtent exten = make_cudaExtent(1,1,1);; - // CHECK: sycl::range castini = sycl::range(1, 1, 4); + // CHECK: dpct::dim3 castini = (dpct::dim3)4; dim3 castini = (dim3)4; // CHECK: sycl::range castini_1 = exten; @@ -41,14 +41,14 @@ int main() { cudaExtent castini_1 = exten; cudaPos castini_2 = deflt_2; - // CHECK: sycl::range copyctor1 = sycl::range(sycl::range(1, 1, 33)); + // CHECK: dpct::dim3 copyctor1 = dpct::dim3((dpct::dim3)33); dim3 copyctor1 = dim3((dim3)33); - // CHECK: sycl::range copyctor2 = sycl::range(copyctor1); + // CHECK: dpct::dim3 copyctor2 = dpct::dim3(copyctor1); dim3 copyctor2 = dim3(copyctor1); - // CHECK: sycl::range copyctor3(copyctor1); + // CHECK: dpct::dim3 copyctor3(copyctor1); dim3 copyctor3(copyctor1); // CHECK: sycl::range copyctor31(exten); @@ -56,17 +56,17 @@ int main() { cudaExtent copyctor31(exten); cudaPos copyctor32(deflt_2); - // CHECK: func(sycl::range(1, 1, 1), sycl::range(1, 1, 1), sycl::range(1, 1, 2), sycl::range(1, 2, 3)); + // CHECK: func((dpct::dim3)1, dpct::dim3(1), dpct::dim3(2, 1), dpct::dim3(3, 2, 1)); func((dim3)1, dim3(1), dim3(2, 1), dim3(3, 2, 1)); - // CHECK: func(deflt, sycl::range(deflt), sycl::range(deflt), sycl::range(1, 1, 2 + 3 * 3)); + // CHECK: func(deflt, dpct::dim3(deflt), (dpct::dim3)deflt, 2 + 3 * 3); func(deflt, dim3(deflt), (dim3)deflt, 2 + 3 * 3); // CHECK: sycl::range<3> *p_extent = nullptr; cudaExtent *p_extent = nullptr; - // CHECK: sycl::range<3> *p = &deflt; + // CHECK: dpct::dim3 *p = &deflt; dim3 *p = &deflt; - // CHECK: sycl::range<3> **pp = &p; + // CHECK: dpct::dim3 **pp = &p; dim3 **pp = &p; // CHECK: sycl::range<3> *p_1 = &deflt_1; @@ -77,15 +77,15 @@ int main() { struct container { unsigned int x, y, z; - // CHECK: sycl::range<3> w; + // CHECK: dpct::dim3 w; dim3 w; - // CHECK: sycl::range<3> *pw; + // CHECK: dpct::dim3 *pw; dim3 *pw; - // CHECK: sycl::range<3> **ppw; + // CHECK: dpct::dim3 **ppw; dim3 **ppw; }; - // CHECK: sycl::range gpu_blocks(1, 1, 1 / (castini[2] * 200)); + // CHECK: dpct::dim3 gpu_blocks(1 / (castini.x * 200)); dim3 gpu_blocks(1 / (castini.x * 200)); // CHECK: q_ct1.submit( // CHECK-NEXT: [&](sycl::handler &cgh) { diff --git a/clang/test/dpct/cudaGraph_test_default_option.cu b/clang/test/dpct/cudaGraph_test_default_option.cu index 0249cd067a9c..a84d88073de7 100644 --- a/clang/test/dpct/cudaGraph_test_default_option.cu +++ b/clang/test/dpct/cudaGraph_test_default_option.cu @@ -31,6 +31,32 @@ int main() { // CHECK-NEXT: */ cudaStreamEndCapture(stream, &graph); + // CHECK: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaStreamCaptureStatus is not supported, please try to remigrate with option: --use-experimental-features=graph. + // CHECK-NEXT: */ + cudaStreamCaptureStatus captureStatus; + + // CHECK: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaStreamCaptureStatusActive is not supported, please try to remigrate with option: --use-experimental-features=graph. + // CHECK-NEXT: */ + captureStatus = cudaStreamCaptureStatusActive; + + // CHECK: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaStreamCaptureStatusNone is not supported, please try to remigrate with option: --use-experimental-features=graph. + // CHECK-NEXT: */ + captureStatus = cudaStreamCaptureStatusNone; + + // CHECK: /* + // CHECK-NEXT: DPCT1007:{{[0-9]+}}: Migration of cudaStreamCaptureStatusInvalidated is not supported. + // CHECK-NEXT: */ + captureStatus = cudaStreamCaptureStatusInvalidated; + + + // CHECK: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaStreamIsCapturing is not supported, please try to remigrate with option: --use-experimental-features=graph. + // CHECK-NEXT: */ + cudaStreamIsCapturing(stream, &captureStatus); + // CHECK: /* // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphNode_t is not supported, please try to remigrate with option: --use-experimental-features=graph. // CHECK-NEXT: */ diff --git a/clang/test/dpct/cudaStreamCaptureStatus_enum_test.cu b/clang/test/dpct/cudaStreamCaptureStatus_enum_test.cu new file mode 100644 index 000000000000..10428048b329 --- /dev/null +++ b/clang/test/dpct/cudaStreamCaptureStatus_enum_test.cu @@ -0,0 +1,31 @@ +// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2 +// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2 +// RUN: dpct --use-experimental-features=graph --format-range=none -out-root %T/cudaStreamCaptureStatus_enum_test %s --cuda-include-path="%cuda-path/include" -- -x cuda --cuda-host-only --std=c++14 +// RUN: FileCheck --input-file %T/cudaStreamCaptureStatus_enum_test/cudaStreamCaptureStatus_enum_test.dp.cpp --match-full-lines %s +// RUN: %if build_lit %{icpx -c -fsycl -DBUILD_TEST %T/cudaStreamCaptureStatus_enum_test/cudaStreamCaptureStatus_enum_test.dp.cpp -o %T/cudaStreamCaptureStatus_enum_test/cudaStreamCaptureStatus_enum_test.dp.o %} + +#ifndef BUILD_TEST +#include + +int main() { + cudaStream_t stream; + cudaStreamCreate(&stream); + // CHECK: sycl::ext::oneapi::experimental::queue_state captureStatus = sycl::ext::oneapi::experimental::queue_state::executing; + // CHECK-NEXT: captureStatus = sycl::ext::oneapi::experimental::queue_state::recording; + // CHECK-NEXT: /* + // CHECK-NEXT: DPCT1007:{{[0-9]+}}: Migration of cudaStreamCaptureStatusInvalidated is not supported. + // CHECK-NEXT: */ + // CHECK-NEXT: captureStatus = cudaStreamCaptureStatusInvalidated; + cudaStreamCaptureStatus captureStatus = cudaStreamCaptureStatusNone; + captureStatus = cudaStreamCaptureStatusActive; + captureStatus = cudaStreamCaptureStatusInvalidated; + + // CHECK: /* + // CHECK-NEXT: DPCT1007:{{[0-9]+}}: Migration of cudaStreamCaptureStatusInvalidated is not supported. + // CHECK-NEXT: */ + // CHECK-NEXT: if (captureStatus == cudaStreamCaptureStatusInvalidated) { + if (captureStatus == cudaStreamCaptureStatusInvalidated) { + return -1; + } +} +#endif diff --git a/clang/test/dpct/cudaStreamCapture_test.cu b/clang/test/dpct/cudaStreamCapture_test.cu index f39e7d452b8d..1f20669816c3 100644 --- a/clang/test/dpct/cudaStreamCapture_test.cu +++ b/clang/test/dpct/cudaStreamCapture_test.cu @@ -23,5 +23,10 @@ int main() { cudaStreamEndCapture(stream, graph2); cudaStreamEndCapture(stream, *graph3); + cudaStreamCaptureStatus captureStatus; + + // CHECK: captureStatus = stream->ext_oneapi_get_state(); + cudaStreamIsCapturing(stream, &captureStatus); + return 0; } diff --git a/clang/test/dpct/curand-device-usm.cu b/clang/test/dpct/curand-device-usm.cu index 454fef9fd442..a35ad49db26c 100644 --- a/clang/test/dpct/curand-device-usm.cu +++ b/clang/test/dpct/curand-device-usm.cu @@ -110,9 +110,9 @@ int main(int argc, char **argv) { CHECK(cudaMalloc((void **)&dOut, sizeof(int) * 10)); //CHECK: CHECK(DPCT_CHECK_ERROR(RandomStates = (dpct::rng::device::rng_generator> *)sycl::malloc_device(sizeof(dpct::rng::device::rng_generator>) * 10 * 10, q_ct1))); CHECK(cudaMalloc((void **)&RandomStates, sizeof(curandState) * 10 * 10)); - //CHECK: sycl::range<3> grid(1, 1, 10); + //CHECK: dpct::dim3 grid(10, 1); dim3 grid(10, 1); - //CHECK: CHECK(DPCT_CHECK_ERROR(dOut = sycl::malloc_device(grid[2], q_ct1))); + //CHECK: CHECK(DPCT_CHECK_ERROR(dOut = sycl::malloc_device(grid.x, q_ct1))); CHECK(cudaMalloc((void **)&dOut, sizeof(int) * grid.x)); return 0; diff --git a/clang/test/dpct/datatypes_test_part2.cu b/clang/test/dpct/datatypes_test_part2.cu index 9fdaa3f64900..19235fb7c168 100644 --- a/clang/test/dpct/datatypes_test_part2.cu +++ b/clang/test/dpct/datatypes_test_part2.cu @@ -12,10 +12,10 @@ void case_1(void) { { -// CHECK: sycl::range<3> var1(1, 1, 1); -// CHECK-NEXT: sycl::range<3> *var2; -// CHECK-NEXT: sycl::range<3> &var3 = var1; -// CHECK-NEXT: sycl::range<3> &&var4 = std::move(var1); +// CHECK: dpct::dim3 var1; +// CHECK-NEXT: dpct::dim3 *var2; +// CHECK-NEXT: dpct::dim3 &var3 = var1; +// CHECK-NEXT: dpct::dim3 &&var4 = std::move(var1); dim3 var1; dim3 *var2; dim3 &var3 = var1; @@ -240,8 +240,8 @@ CUstream_st *var2; // case 2 void case_2(void) { { -// CHECK: new sycl::range<3>(1, 1, 1); -// CHECK-NEXT: new sycl::range<3> *(); +// CHECK: new dpct::dim3(); +// CHECK-NEXT: new dpct::dim3 *(); new dim3(); new dim3 *(); } @@ -386,9 +386,9 @@ void case_2(void) { } // case 3 -// CHECK: sycl::range<3> foo0(); -// CHECK-NEXT: sycl::range<3> *foo1(); -// CHECK-NEXT: sycl::range<3> &foo2(); +// CHECK: dpct::dim3 foo0(); +// CHECK-NEXT: dpct::dim3 *foo1(); +// CHECK-NEXT: dpct::dim3 &foo2(); dim3 foo0(); dim3 *foo1(); dim3 &foo2(); @@ -535,10 +535,10 @@ CUstream_st *foo_2(); // case 4 template struct S {}; -// CHECK: template <> struct S> {}; -// CHECK-NEXT: template <> struct S *> {}; -// CHECK-NEXT: template <> struct S &> {}; -// CHECK-NEXT: template <> struct S &&> {}; +// CHECK: template <> struct S {}; +// CHECK-NEXT: template <> struct S {}; +// CHECK-NEXT: template <> struct S {}; +// CHECK-NEXT: template <> struct S {}; template <> struct S {}; template <> struct S {}; template <> struct S {}; @@ -710,10 +710,10 @@ template <> struct S {}; template void template_foo() {} void case_5(){ -// CHECK: template_foo>(); -// CHECK-NEXT: template_foo *>(); -// CHECK-NEXT: template_foo &>(); -// CHECK-NEXT: template_foo &&>(); +// CHECK: template_foo(); +// CHECK-NEXT: template_foo(); +// CHECK-NEXT: template_foo(); +// CHECK-NEXT: template_foo(); template_foo(); template_foo(); template_foo(); @@ -903,10 +903,10 @@ template_foo(); // case 6 -// CHECK: using UT0 = sycl::range<3>; -// CHECK-NEXT: using UT1 = sycl::range<3> *; -// CHECK-NEXT: using UT2 = sycl::range<3> &; -// CHECK-NEXT: using UT3 = sycl::range<3> &&; +// CHECK: using UT0 = dpct::dim3; +// CHECK-NEXT: using UT1 = dpct::dim3 *; +// CHECK-NEXT: using UT2 = dpct::dim3 &; +// CHECK-NEXT: using UT3 = dpct::dim3 &&; using UT0 = dim3; using UT1 = dim3 *; using UT2 = dim3 &; @@ -1095,10 +1095,10 @@ using UT_4 = CUstream_st &&; // case 7 -// CHECK: typedef sycl::range<3> T0; -// CHECK-NEXT: typedef sycl::range<3>* T1; -// CHECK-NEXT: typedef sycl::range<3>& T2; -// CHECK-NEXT: typedef sycl::range<3>&& T3; +// CHECK: typedef dpct::dim3 T0; +// CHECK-NEXT: typedef dpct::dim3* T1; +// CHECK-NEXT: typedef dpct::dim3& T2; +// CHECK-NEXT: typedef dpct::dim3&& T3; typedef dim3 T0; typedef dim3* T1; typedef dim3& T2; @@ -1289,11 +1289,11 @@ typedef CUstream_st&& T_4; __device__ void foo_t(){ { -// CHECK: #define T8_0 sycl::range<3> -// CHECK-NEXT: #define T8_1 sycl::range<3> * -// CHECK-NEXT: #define T8_2 sycl::range<3> & -// CHECK-NEXT: #define T8_3 sycl::range<3> && -// CHECK-NEXT: T8_0 a1(1, 1, 1); +// CHECK: #define T8_0 dpct::dim3 +// CHECK-NEXT: #define T8_1 dpct::dim3 * +// CHECK-NEXT: #define T8_2 dpct::dim3 & +// CHECK-NEXT: #define T8_3 dpct::dim3 && +// CHECK-NEXT: T8_0 a1; // CHECK-NEXT: T8_1 a2; // CHECK-NEXT: T8_2 a3=a1; // CHECK-NEXT: T8_3 a4=std::move(a1); @@ -1716,10 +1716,10 @@ template void template_foo(T var) {} #define foo3(DataType) template_foo(DataType & varname) #define foo4(DataType) template_foo(DataType && varname) -// CHECK: template <> void foo1(sycl::range<3>){} -// CHECK-NEXT: template <> void foo2(sycl::range<3>){} -// CHECK-NEXT: template <> void foo3(sycl::range<3>){} -// CHECK-NEXT: template <> void foo4(sycl::range<3>){} +// CHECK: template <> void foo1(dpct::dim3){} +// CHECK-NEXT: template <> void foo2(dpct::dim3){} +// CHECK-NEXT: template <> void foo3(dpct::dim3){} +// CHECK-NEXT: template <> void foo4(dpct::dim3){} template <> void foo1(dim3){} template <> void foo2(dim3){} template <> void foo3(dim3){} diff --git a/clang/test/dpct/decltype_of_vector_type_field.cu b/clang/test/dpct/decltype_of_vector_type_field.cu index f670a837c996..6560c2a0f277 100644 --- a/clang/test/dpct/decltype_of_vector_type_field.cu +++ b/clang/test/dpct/decltype_of_vector_type_field.cu @@ -3,11 +3,11 @@ // RUN: %if build_lit %{icpx -c -fsycl %T/decltype_of_vector_type_field/decltype_of_vector_type_field.dp.cpp -o %T/decltype_of_vector_type_field/decltype_of_vector_type_field.dp.o %} void f() { - // CHECK: using dim3_x_type = size_t; + // CHECK: using dim3_x_type = decltype(dpct::dim3::x); using dim3_x_type = decltype(dim3::x); - // CHECK: using dim3_y_type = size_t; + // CHECK: using dim3_y_type = decltype(dpct::dim3::y); using dim3_y_type = decltype(dim3::y); - // CHECK: using dim3_z_type = size_t; + // CHECK: using dim3_z_type = decltype(dpct::dim3::z); using dim3_z_type = decltype(dim3::z); // CHECK: using int1_x_type = int32_t; using int1_x_type = decltype(int1::x); diff --git a/clang/test/dpct/device001.cu b/clang/test/dpct/device001.cu index ae4cf24fdb5b..2267b885cfcb 100644 --- a/clang/test/dpct/device001.cu +++ b/clang/test/dpct/device001.cu @@ -142,7 +142,7 @@ int main(int argc, char **argv) { // CHECK-NEXT:size_t share_multi_proc_mem_size = deviceProp.get_local_mem_size(); size_t share_multi_proc_mem_size = deviceProp.sharedMemPerMultiprocessor; - // CHECK: sycl::range<3> grid(1, 1, deviceProp.get_max_compute_units() * (deviceProp.get_max_work_items_per_compute_unit() / deviceProp.get_max_sub_group_size())); + // CHECK: dpct::dim3 grid(deviceProp.get_max_compute_units() * (deviceProp.get_max_work_items_per_compute_unit() / deviceProp.get_max_sub_group_size())); dim3 grid(deviceProp.multiProcessorCount * (deviceProp.maxThreadsPerMultiProcessor / deviceProp.warpSize)); // CHECK:/* diff --git a/clang/test/dpct/dim3.cu b/clang/test/dpct/dim3.cu index 544d39526b6a..e93fe3e3bb44 100644 --- a/clang/test/dpct/dim3.cu +++ b/clang/test/dpct/dim3.cu @@ -19,12 +19,12 @@ int main() { // CHECK: /* // CHECK-NEXT: DPCT1082:{{[0-9]+}}: Migration of cudaKernelNodeParams type is not supported. // CHECK-NEXT: */ - // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam2 = {0, sycl::range<3>(1, 1, 0)}; + // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam2 = {0, 0}; cudaKernelNodeParams kernelNodeParam2 = {0, 0}; // CHECK: /* // CHECK-NEXT: DPCT1082:{{[0-9]+}}: Migration of cudaKernelNodeParams type is not supported. // CHECK-NEXT: */ - // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam3 = {0, sycl::range<3>(1, 1, 0), sycl::range<3>(1, 1, 0)}; + // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam3 = {0, 0, 0}; cudaKernelNodeParams kernelNodeParam3 = {0, 0, 0}; // CHECK: /* @@ -40,11 +40,11 @@ int main() { // CHECK: /* // CHECK-NEXT: DPCT1082:{{[0-9]+}}: Migration of cudaKernelNodeParams type is not supported. // CHECK-NEXT: */ - // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam6{0, sycl::range<3>(1, 1, 0)}; + // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam6{0, 0}; cudaKernelNodeParams kernelNodeParam6{0, 0}; // CHECK: /* // CHECK-NEXT: DPCT1082:{{[0-9]+}}: Migration of cudaKernelNodeParams type is not supported. // CHECK-NEXT: */ - // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam7{0, sycl::range<3>(1, 1, 0), sycl::range<3>(1, 1, 0)}; + // CHECK-NEXT: cudaKernelNodeParams kernelNodeParam7{0, 0, 0}; cudaKernelNodeParams kernelNodeParam7{0, 0, 0}; } diff --git a/clang/test/dpct/enable-all-experimental-features.cu b/clang/test/dpct/enable-all-experimental-features.cu index 272253b6950e..0a24e9bb89a8 100644 --- a/clang/test/dpct/enable-all-experimental-features.cu +++ b/clang/test/dpct/enable-all-experimental-features.cu @@ -29,13 +29,13 @@ __device__ void testThreadGroup(cg::thread_group g) { g.size(); auto block = cg::this_thread_block(); - // CHECK: block.get_local_id(); + // CHECK: dpct::dim3(block.get_local_id()); block.thread_index(); } __global__ void kernelFunc() { auto block = cg::this_thread_block(); - // CHECK: block.get_local_id(); + // CHECK: dpct::dim3(block.get_local_id()); block.thread_index(); // CHECK: auto threadBlockGroup = sycl::ext::oneapi::experimental::this_group<3>(); auto threadBlockGroup = cg::this_thread_block(); @@ -94,7 +94,7 @@ namespace cg = cooperative_groups; __global__ void kernelFunc1() { auto block = cg::this_thread_block(); - // CHECK: block.get_local_id(); + // CHECK: dpct::dim3(block.get_local_id()); block.thread_index(); // CHECK: auto threadBlockGroup = sycl::ext::oneapi::experimental::this_group<3>(); auto threadBlockGroup = cg::this_thread_block(); diff --git a/clang/test/dpct/enable-all-extensions.cu b/clang/test/dpct/enable-all-extensions.cu index 7b24ea6f968f..720504a58102 100644 --- a/clang/test/dpct/enable-all-extensions.cu +++ b/clang/test/dpct/enable-all-extensions.cu @@ -67,9 +67,9 @@ void h() { void foo1() { int n; - // CHECK: sycl::range<3> abc(1, 1, 1); - // CHECK-NEXT: abc[1] = std::min(std::max(512 / (unsigned int)abc[2], 1u), (unsigned int)n); - // CHECK-NEXT: abc[0] = std::min(std::max(512 / ((unsigned int)abc[2] * (unsigned int)abc[1]), 1u), (unsigned int)n); + // CHECK: dpct::dim3 abc; + // CHECK-NEXT: abc.y = std::min(std::max(512 / abc.x, 1u), (unsigned int)n); + // CHECK-NEXT: abc.z = std::min(std::max(512 / (abc.x * abc.y), 1u), (unsigned int)n); dim3 abc; abc.y = std::min(std::max(512 / abc.x, 1u), (unsigned int)n); abc.z = std::min(std::max(512 / (abc.x * abc.y), 1u), (unsigned int)n); diff --git a/clang/test/dpct/formatIndent.cu b/clang/test/dpct/formatIndent.cu index 21d988802a53..233f127d6fc7 100644 --- a/clang/test/dpct/formatIndent.cu +++ b/clang/test/dpct/formatIndent.cu @@ -17,7 +17,7 @@ void foo(){ //CHECK:void foo1(){ //CHECK-NEXT: //some comments //CHECK-NEXT: //some comments -//CHECK-NEXT: sycl::range<3> griddim = sycl::range<3>(1, 1, 2); +//CHECK-NEXT: dpct::dim3 griddim = 2; //CHECK-NEXT:} void foo1(){ //some comments @@ -27,7 +27,7 @@ void foo1(){ //CHECK:void foo2(){ //CHECK-NEXT: //some comments -//CHECK-NEXT: sycl::range<3> griddim = sycl::range<3>(1, 1, 2); +//CHECK-NEXT: dpct::dim3 griddim = 2; //CHECK-NEXT:} void foo2(){ //some comments @@ -36,7 +36,7 @@ void foo2(){ //CHECK:void foo3(){ //CHECK-NEXT: int test; -//CHECK-NEXT: sycl::range<3> griddim = sycl::range<3>(1, 1, 2); +//CHECK-NEXT: dpct::dim3 griddim = 2; //CHECK-NEXT:} void foo3(){ int test; diff --git a/clang/test/dpct/formatMigratedExplicitly.cu b/clang/test/dpct/formatMigratedExplicitly.cu index 77bf7b84a15d..2fe138312d2a 100644 --- a/clang/test/dpct/formatMigratedExplicitly.cu +++ b/clang/test/dpct/formatMigratedExplicitly.cu @@ -32,8 +32,8 @@ __global__ void testKernelPtr(const int *L, const int *M, int N) { //CHECK:int main() { //CHECK-NEXT: dpct::device_ext &dev_ct1 = dpct::get_current_device(); //CHECK-NEXT: sycl::queue &q_ct1 = dev_ct1.in_order_queue(); -//CHECK-NEXT: sycl::range<3> griddim = sycl::range<3>(1, 1, 2); -//CHECK-NEXT: sycl::range<3> threaddim = sycl::range<3>(1, 1, 32); +//CHECK-NEXT: dpct::dim3 griddim = 2; +//CHECK-NEXT: dpct::dim3 threaddim = 32; //CHECK-NEXT: int *karg1, *karg2; //CHECK-NEXT: karg1 = sycl::malloc_device(32, q_ct1); //CHECK-NEXT: karg2 = sycl::malloc_device(32, q_ct1); diff --git a/clang/test/dpct/formatMigratedGoogle.cu b/clang/test/dpct/formatMigratedGoogle.cu index 2563eb7d529e..f7a79791a44b 100644 --- a/clang/test/dpct/formatMigratedGoogle.cu +++ b/clang/test/dpct/formatMigratedGoogle.cu @@ -32,8 +32,8 @@ __global__ void testKernelPtr(const int *L, const int *M, int N) { //CHECK:int main() { //CHECK-NEXT: dpct::device_ext &dev_ct1 = dpct::get_current_device(); //CHECK-NEXT: sycl::queue &q_ct1 = dev_ct1.in_order_queue(); -//CHECK-NEXT: sycl::range<3> griddim = sycl::range<3>(1, 1, 2); -//CHECK-NEXT: sycl::range<3> threaddim = sycl::range<3>(1, 1, 32); +//CHECK-NEXT: dpct::dim3 griddim = 2; +//CHECK-NEXT: dpct::dim3 threaddim = 32; //CHECK-NEXT: int *karg1, *karg2; //CHECK-NEXT: karg1 = sycl::malloc_device(32, q_ct1); //CHECK-NEXT: karg2 = sycl::malloc_device(32, q_ct1); diff --git a/clang/test/dpct/formatMigratedLLVM.cu b/clang/test/dpct/formatMigratedLLVM.cu index 3d48029e8708..4d5c25111e97 100644 --- a/clang/test/dpct/formatMigratedLLVM.cu +++ b/clang/test/dpct/formatMigratedLLVM.cu @@ -32,8 +32,8 @@ __global__ void testKernelPtr(const int *L, const int *M, int N) { //CHECK:int main() { //CHECK-NEXT: dpct::device_ext &dev_ct1 = dpct::get_current_device(); //CHECK-NEXT: sycl::queue &q_ct1 = dev_ct1.in_order_queue(); -//CHECK-NEXT: sycl::range<3> griddim = sycl::range<3>(1, 1, 2); -//CHECK-NEXT: sycl::range<3> threaddim = sycl::range<3>(1, 1, 32); +//CHECK-NEXT: dpct::dim3 griddim = 2; +//CHECK-NEXT: dpct::dim3 threaddim = 32; //CHECK-NEXT: int *karg1, *karg2; //CHECK-NEXT: karg1 = sycl::malloc_device(32, q_ct1); //CHECK-NEXT: karg2 = sycl::malloc_device(32, q_ct1); diff --git a/clang/test/dpct/kernel-call-origcode-embedded.cu b/clang/test/dpct/kernel-call-origcode-embedded.cu index 0deffd8d922a..bdb3874e21e8 100644 --- a/clang/test/dpct/kernel-call-origcode-embedded.cu +++ b/clang/test/dpct/kernel-call-origcode-embedded.cu @@ -54,11 +54,11 @@ int main() { // CHECK: dpct::device_ext &dev_ct1 = dpct::get_current_device(); // CHECK-NEXT: sycl::queue &q_ct1 = dev_ct1.out_of_order_queue(); // CHECK: /* DPCT_ORIG dim3 griddim = 2;*/ - // CHECK-NEXT: sycl::range<3> griddim = sycl::range<3>(1, 1, 2); + // CHECK-NEXT: dpct::dim3 griddim = 2; dim3 griddim = 2; // CHECK: /* DPCT_ORIG dim3 threaddim = 32;*/ - // CHECK-NEXT: sycl::range<3> threaddim = sycl::range<3>(1, 1, 32); + // CHECK-NEXT: dpct::dim3 threaddim = 32; dim3 threaddim = 32; void *karg1 = 0; @@ -130,7 +130,7 @@ int main() { // CHECK-NEXT: DPCT1049:{{[0-9]+}}: The work-group size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the work-group size if needed. // CHECK-NEXT: */ // CHECK-NEXT: q_ct1.parallel_for>( - // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, griddim[2]) * sycl::range<3>(1, 1, griddim[1] + 2), sycl::range<3>(1, 1, griddim[1] + 2)), + // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, griddim.x) * sycl::range<3>(1, 1, griddim.y + 2), sycl::range<3>(1, 1, griddim.y + 2)), // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { // CHECK-NEXT: testKernel(karg1int, karg2int, karg3int, item_ct1); // CHECK-NEXT: }); diff --git a/clang/test/dpct/kernel-call.cu b/clang/test/dpct/kernel-call.cu index cf6d04f6426e..9764f08d7d39 100644 --- a/clang/test/dpct/kernel-call.cu +++ b/clang/test/dpct/kernel-call.cu @@ -198,7 +198,7 @@ int main() { // CHECK-NEXT: auto arr_karg3int_ct2 = arr[karg3int]; // CHECK-EMPTY: // CHECK-NEXT: cgh.parallel_for>( - // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, griddim[2]) * sycl::range<3>(1, 1, griddim[1] + 2), sycl::range<3>(1, 1, griddim[1] + 2)), + // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, griddim.x) * sycl::range<3>(1, 1, griddim.y + 2), sycl::range<3>(1, 1, griddim.y + 2)), // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { // CHECK-NEXT: testKernel(karg1int, karg2int, item_ct1, arr_karg3int_ct2); // CHECK-NEXT: }); @@ -293,7 +293,7 @@ int *g_a; __global__ void foo_kernel3(int *d) { d[0]; } -//CHECK:void run_foo(sycl::range<3> c, sycl::range<3> d) { +//CHECK:void run_foo(dpct::dim3 c, dpct::dim3 d) { //CHECK-NEXT: if (1) //CHECK-NEXT: dpct::get_out_of_order_queue().submit( //CHECK-NEXT: [&](sycl::handler &cgh) { @@ -310,7 +310,7 @@ void run_foo(dim3 c, dim3 d) { if (1) foo_kernel3<<>>(&g_a[0]); } -//CHECK:void run_foo2(sycl::range<3> c, sycl::range<3> d) { +//CHECK:void run_foo2(dpct::dim3 c, dpct::dim3 d) { //CHECK-NEXT: dpct::device_ext &dev_ct1 = dpct::get_current_device(); //CHECK-NEXT: sycl::queue &q_ct1 = dev_ct1.out_of_order_queue(); //CHECK-NEXT: if (1) @@ -345,7 +345,7 @@ void run_foo2(dim3 c, dim3 d) { else foo_kernel3<<>>(g_a); } -//CHECK:void run_foo3(sycl::range<3> c, sycl::range<3> d) { +//CHECK:void run_foo3(dpct::dim3 c, dpct::dim3 d) { //CHECK-NEXT: for (;;) //CHECK-NEXT: /* //CHECK-NEXT: DPCT1049:{{[0-9]+}}: The work-group size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the work-group size if needed. @@ -365,7 +365,7 @@ void run_foo3(dim3 c, dim3 d) { for (;;) foo_kernel3<<>>(g_a); } -//CHECK:void run_foo4(sycl::range<3> c, sycl::range<3> d) { +//CHECK:void run_foo4(dpct::dim3 c, dpct::dim3 d) { //CHECK-NEXT: while (1) //CHECK-NEXT: dpct::get_out_of_order_queue().submit( //CHECK-NEXT: [&](sycl::handler &cgh) { diff --git a/clang/test/dpct/kernel-usm.cu b/clang/test/dpct/kernel-usm.cu index 4959a17296c1..670d26d76583 100644 --- a/clang/test/dpct/kernel-usm.cu +++ b/clang/test/dpct/kernel-usm.cu @@ -247,7 +247,7 @@ int *g_a; __global__ void foo_kernel3(int *d) { } -//CHECK:void run_foo(sycl::range<3> c, sycl::range<3> d) { +//CHECK:void run_foo(dpct::dim3 c, dpct::dim3 d) { //CHECK-NEXT: if (1) //CHECK-NEXT: dpct::get_in_order_queue().submit( //CHECK-NEXT: [&](sycl::handler &cgh) { diff --git a/clang/test/dpct/kernel_1d_range.cu b/clang/test/dpct/kernel_1d_range.cu index 76ce2dde45df..df8d01e20e31 100644 --- a/clang/test/dpct/kernel_1d_range.cu +++ b/clang/test/dpct/kernel_1d_range.cu @@ -647,8 +647,8 @@ int query_block(const int x) { void foo7() { int n = 128; - //CHECK:sycl::range<3> block(1, 1, n); - //CHECK-NEXT:sycl::range<3> grid(1, 1, query_block(n)); + //CHECK:dpct::dim3 block(n); + //CHECK-NEXT:dpct::dim3 grid(query_block(n)); dim3 block(n); dim3 grid(query_block(n)); //CHECK:dpct::get_in_order_queue().parallel_for( diff --git a/clang/test/dpct/kernel_without_name.cu b/clang/test/dpct/kernel_without_name.cu index 7c3cc8196a67..f3a484c95ecf 100644 --- a/clang/test/dpct/kernel_without_name.cu +++ b/clang/test/dpct/kernel_without_name.cu @@ -224,7 +224,7 @@ void run_foo6() { dim3 grid; //CHECK:q_ct1.submit( //CHECK-NEXT: [&](sycl::handler &cgh) { - //CHECK-NEXT: auto grid_x_grid_y_ct0 = grid[2] * grid[1]; + //CHECK-NEXT: auto grid_x_grid_y_ct0 = grid.x * grid.y; //CHECK-EMPTY: //CHECK-NEXT: cgh.parallel_for( //CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), @@ -235,7 +235,7 @@ void run_foo6() { foo_kernel5<<<1, 1>>>(grid.x * grid.y); //CHECK:q_ct1.submit( //CHECK-NEXT: [&](sycl::handler &cgh) { - //CHECK-NEXT: auto grid_x_ct0 = ++grid[2]; + //CHECK-NEXT: auto grid_x_ct0 = ++grid.x; //CHECK-EMPTY: //CHECK-NEXT: cgh.parallel_for( //CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), diff --git a/clang/test/dpct/macro_test.cu b/clang/test/dpct/macro_test.cu index ebf7ac0ce590..6862447c1a70 100644 --- a/clang/test/dpct/macro_test.cu +++ b/clang/test/dpct/macro_test.cu @@ -58,7 +58,7 @@ public: #define CALL(x) x; #define EMPTY_MACRO(x) x -//CHECK:#define GET_MEMBER_MACRO(x) x[1] = 5 +//CHECK:#define GET_MEMBER_MACRO(x) x.y = 5 #define GET_MEMBER_MACRO(x) x.y = 5 __global__ void foo_kernel() {} @@ -99,9 +99,9 @@ void foo() { #endif - // CHECK: (*d3.A)[2] = 3; - // CHECK-NEXT: d3.B[2] = 2; - // CHECK-NEXT: EMPTY_MACRO(d3.B[2]); + // CHECK: d3.A->x = 3; + // CHECK-NEXT: d3.B.x = 2; + // CHECK-NEXT: EMPTY_MACRO(d3.B.x); // CHECK-NEXT: GET_MEMBER_MACRO(d3.B); d3.A->x = 3; d3.B.x = 2; @@ -268,7 +268,7 @@ MACRO_KC2(griddim,threaddim,1,0) // CHECK: MACRO_KC2(3,2,1,0) MACRO_KC2(3,2,1,0) -// CHECK: MACRO_KC2(sycl::range<3>(5, 4, 3), 2, 1, 0) +// CHECK: MACRO_KC2(dpct::dim3(5, 4, 3), 2, 1, 0) MACRO_KC2(dim3(5,4,3),2,1,0) int *a; @@ -1355,7 +1355,7 @@ void foo38() { template void foo38(T *t); -//CHECK: #define GRID grid[2] = 3; +//CHECK: #define GRID grid.x = 3; #define GRID grid.x = 3; template diff --git a/clang/test/dpct/math_functions_std.cu b/clang/test/dpct/math_functions_std.cu index 386f7ff98a6f..2d4433cf2dfb 100644 --- a/clang/test/dpct/math_functions_std.cu +++ b/clang/test/dpct/math_functions_std.cu @@ -64,9 +64,9 @@ void h() { void foo1() { int n; - //CHECK: sycl::range<3> abc(1, 1, 1); - //CHECK-NEXT: abc[1] = std::min(std::max(512 / (unsigned int)abc[2], 1u), (unsigned int) n); - //CHECK-NEXT: abc[0] = std::min(std::max(512 / ((unsigned int)abc[2] * (unsigned int)abc[1]), 1u), (unsigned int)n); + //CHECK: dpct::dim3 abc; + //CHECK-NEXT: abc.y = std::min(std::max(512 / abc.x, 1u), (unsigned int) n); + //CHECK-NEXT: abc.z = std::min(std::max(512 / (abc.x * abc.y), 1u), (unsigned int)n); dim3 abc; abc.y = std::min(std::max(512 / abc.x, 1u), (unsigned int) n); abc.z = std::min(std::max(512 / (abc.x * abc.y), 1u), (unsigned int)n); diff --git a/clang/test/dpct/query_api_mapping/Runtime/test-after10.cu b/clang/test/dpct/query_api_mapping/Runtime/test-after10.cu index 6d464785ec7d..46736c156e5a 100644 --- a/clang/test/dpct/query_api_mapping/Runtime/test-after10.cu +++ b/clang/test/dpct/query_api_mapping/Runtime/test-after10.cu @@ -19,5 +19,5 @@ // CUDASTREAMISCAPTURING: CUDA API: // CUDASTREAMISCAPTURING-NEXT: cudaStreamIsCapturing(s /*cudaStream_t*/, // CUDASTREAMISCAPTURING-NEXT: ps /* enum cudaStreamCaptureStatus **/); -// CUDASTREAMISCAPTURING-NEXT: The API is Removed. -// CUDASTREAMISCAPTURING-EMPTY: +// CUDASTREAMISCAPTURING-NEXT: Is migrated to (with the option --use-experimental-features=graph): +// CUDASTREAMISCAPTURING-NEXT: *ps = s->ext_oneapi_get_state(); diff --git a/clang/test/dpct/replace-dim3.cu b/clang/test/dpct/replace-dim3.cu index 268eb166c2f3..8ac34b85e002 100644 --- a/clang/test/dpct/replace-dim3.cu +++ b/clang/test/dpct/replace-dim3.cu @@ -4,33 +4,33 @@ // RUN: FileCheck --input-file %T/replace-dim3/replace-dim3.dp.cpp --match-full-lines %s // RUN: %if build_lit %{icpx -c -fsycl -DBUILD_TEST %T/replace-dim3/replace-dim3.dp.cpp -o %T/replace-dim3/replace-dim3.dp.o %} -#ifndef BUILD_TEST #include #include +#ifndef BUILD_TEST #define NUM 23 #define CALL_FUNC(func) func() -// CHECK: #define DIM3_DEFAULT_VAR(name) sycl::range<3> name +// CHECK: #define DIM3_DEFAULT_VAR(name) dpct::dim3 name #define DIM3_DEFAULT_VAR(name) dim3 name -// CHECK: void func(sycl::range<3> a, sycl::range<3> b, sycl::range<3> c, sycl::range<3> d) { +// CHECK: void func(dpct::dim3 a, dpct::dim3 b, dpct::dim3 c, dpct::dim3 d) { void func(dim3 a, dim3 b, dim3 c, dim3 d) { } -// CHECK: void test(const sycl::range<3>& a, const sycl::range<3>& b) { +// CHECK: void test(const dpct::dim3& a, const dpct::dim3& b) { void test(const dim3& a, const dim3& b) { } -// CHECK: void test(sycl::range<3>&& a, sycl::range<3>&& b) { +// CHECK: void test(dpct::dim3&& a, dpct::dim3&& b) { void test(dim3&& a, dim3&& b) { } -// CHECK: void test(const sycl::range<3>* a, const sycl::range<3>* b) { +// CHECK: void test(const dpct::dim3* a, const dpct::dim3* b) { void test(const dim3* a, const dim3* b) { } -// CHECK: void test(const sycl::range<3>** a, const sycl::range<3>** b) { +// CHECK: void test(const dpct::dim3** a, const dpct::dim3** b) { void test(const dim3** a, const dim3** b) { } @@ -40,148 +40,148 @@ int main() { // CHECK: dpct::device_ext &dev_ct1 = dpct::get_current_device(); // CHECK-NEXT: sycl::queue &q_ct1 = dev_ct1.out_of_order_queue(); // range default constructor does the right thing. - // CHECK: sycl::range<3> deflt(1, 1, 1); + // CHECK: dpct::dim3 deflt; dim3 deflt; - // CHECK: sycl::range<3> round1(1, 1, 1); + // CHECK: dpct::dim3 round1(1); dim3 round1(1); - // CHECK: sycl::range<3> round1_1(1, 1, NUM); + // CHECK: dpct::dim3 round1_1(NUM); dim3 round1_1(NUM); - // CHECK: sycl::range<3> round2(1, 1, 2); + // CHECK: dpct::dim3 round2(2, 1); dim3 round2(2, 1); - // CHECK: sycl::range<3> round2_1(1, NUM, NUM); + // CHECK: dpct::dim3 round2_1(NUM, NUM); dim3 round2_1(NUM, NUM); - // CHECK: sycl::range<3> assign = sycl::range<3>(1, 1, 32); + // CHECK: dpct::dim3 assign = 32; dim3 assign = 32; - // CHECK: sycl::range<3> assign_1 = sycl::range<3>(1, 1, NUM); + // CHECK: dpct::dim3 assign_1 = NUM; dim3 assign_1 = NUM; - // CHECK: sycl::range<3> castini = sycl::range<3>(1, 1, 4); + // CHECK: dpct::dim3 castini = (dpct::dim3)4; dim3 castini = (dim3)4; - // CHECK: sycl::range<3> castini_1 = sycl::range<3>(1, 1, NUM); + // CHECK: dpct::dim3 castini_1 = (dpct::dim3)NUM; dim3 castini_1 = (dim3)NUM; - // CHECK: sycl::range<3> castini2 = sycl::range<3>(1, 2, 2); + // CHECK: dpct::dim3 castini2 = dpct::dim3(2, 2); dim3 castini2 = dim3(2, 2); - // CHECK: sycl::range<3> castini2_1 = sycl::range<3>(1, NUM, NUM); + // CHECK: dpct::dim3 castini2_1 = dpct::dim3(NUM, NUM); dim3 castini2_1 = dim3(NUM, NUM); - // CHECK: sycl::range<3> castini3 = sycl::range<3>(10, 1, 3); + // CHECK: dpct::dim3 castini3 = dpct::dim3(3, 1, 10); dim3 castini3 = dim3(3, 1, 10); - // CHECK: sycl::range<3> castini3_1 = sycl::range<3>(NUM, NUM, NUM); + // CHECK: dpct::dim3 castini3_1 = dpct::dim3(NUM, NUM, NUM); dim3 castini3_1 = dim3(NUM, NUM, NUM); - // CHECK: deflt = sycl::range<3>(1, 1, 3); + // CHECK: deflt = dpct::dim3(3); deflt = dim3(3); - // CHECK: deflt = sycl::range<3>(1, 1, NUM); + // CHECK: deflt = dpct::dim3(NUM); deflt = dim3(NUM); - // CHECK: deflt = sycl::range<3>(1, 1, 5); + // CHECK: deflt = 5; deflt = 5; - // CHECK: deflt = sycl::range<3>(1, 1, ((NUM%32 == 0) ? NUM/32 : (NUM/32 + 1))); + // CHECK: deflt = ((NUM%32 == 0) ? NUM/32 : (NUM/32 + 1)); deflt = ((NUM%32 == 0) ? NUM/32 : (NUM/32 + 1)); - // CHECK: sycl::range<3> copyctor1 = sycl::range<3>(sycl::range<3>(1, 1, 33)); + // CHECK: dpct::dim3 copyctor1 = dpct::dim3((dpct::dim3)33); dim3 copyctor1 = dim3((dim3)33); - // CHECK: sycl::range<3> copyctor1_1 = sycl::range<3>(sycl::range<3>(1, 1, NUM)); + // CHECK: dpct::dim3 copyctor1_1 = dpct::dim3((dpct::dim3)NUM); dim3 copyctor1_1 = dim3((dim3)NUM); - // CHECK: sycl::range<3> copyctor2 = sycl::range<3>(copyctor1); + // CHECK: dpct::dim3 copyctor2 = dpct::dim3(copyctor1); dim3 copyctor2 = dim3(copyctor1); - // CHECK: sycl::range<3> copyctor3(copyctor1); + // CHECK: dpct::dim3 copyctor3(copyctor1); dim3 copyctor3(copyctor1); - // CHECK: func(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 2), sycl::range<3>(1, 2, 3)); + // CHECK: func((dpct::dim3)1, dpct::dim3(1), dpct::dim3(2, 1), dpct::dim3(3, 2, 1)); func((dim3)1, dim3(1), dim3(2, 1), dim3(3, 2, 1)); - // CHECK: func(sycl::range<3>(1, 1, NUM), sycl::range<3>(1, 1, NUM), sycl::range<3>(1, NUM, NUM), sycl::range<3>(NUM, NUM, NUM)); + // CHECK: func((dpct::dim3)NUM, dpct::dim3(NUM), dpct::dim3(NUM, NUM), dpct::dim3(NUM, NUM, NUM)); func((dim3)NUM, dim3(NUM), dim3(NUM, NUM), dim3(NUM, NUM, NUM)); - // CHECK: func(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 2), sycl::range<3>(1, 1, 3), sycl::range<3>(1, 1, 4)); + // CHECK: func(1, 2, 3, 4); func(1, 2, 3, 4); - // CHECK: func(sycl::range<3>(1, 1, NUM), sycl::range<3>(1, 1, NUM), sycl::range<3>(1, 1, NUM), sycl::range<3>(1, 1, NUM)); + // CHECK: func(NUM, NUM, NUM, NUM); func(NUM, NUM, NUM, NUM); - // CHECK: func(deflt, sycl::range<3>(deflt), sycl::range<3>(deflt), sycl::range<3>(1, 1, 2 + 3 * 3)); + // CHECK: func(deflt, dpct::dim3(deflt), (dpct::dim3)deflt, 2 + 3 * 3); func(deflt, dim3(deflt), (dim3)deflt, 2 + 3 * 3); - // CHECK: func(deflt, sycl::range<3>(deflt), sycl::range<3>(deflt), sycl::range<3>(1, 1, NUM + NUM * NUM)); + // CHECK: func(deflt, dpct::dim3(deflt), (dpct::dim3)deflt, NUM + NUM * NUM); func(deflt, dim3(deflt), (dim3)deflt, NUM + NUM * NUM); - // CHECK: sycl::range<3> test(3, 2, 1); + // CHECK: dpct::dim3 test(1, 2, 3); dim3 test(1, 2, 3); - // CHECK: sycl::range<3> test_1(NUM, NUM, NUM); + // CHECK: dpct::dim3 test_1(NUM, NUM, NUM); dim3 test_1(NUM, NUM, NUM); - // CHECK: int b = test[2] + test[1] + test [0]; + // CHECK: int b = test.x + test. y + test .z; int b = test.x + test. y + test .z; - // CHECK: sycl::range<3> *p = &test; + // CHECK: dpct::dim3 *p = &test; dim3 *p = &test; - // CHECK: sycl::range<3> **pp = &p; + // CHECK: dpct::dim3 **pp = &p; dim3 **pp = &p; - // CHECK: int a = (*p)[2] + (*p)[1] + (*p)[0]; + // CHECK: int a = p->x + p->y + p->z; int a = p->x + p->y + p->z; - // CHECK: int aa = (*(*pp))[2] + (*(*pp))[1] + (*(*pp))[0]; + // CHECK: int aa = (*pp)->x + (*pp)->y + (*pp)->z; int aa = (*pp)->x + (*pp)->y + (*pp)->z; struct container { unsigned int x, y, z; - // CHECK: sycl::range<3> w; + // CHECK: dpct::dim3 w; dim3 w; - // CHECK: sycl::range<3> *pw; + // CHECK: dpct::dim3 *pw; dim3 *pw; - // CHECK: sycl::range<3> **ppw; + // CHECK: dpct::dim3 **ppw; dim3 **ppw; }; typedef struct container container; container t; - // CHECK: int c = t.w[2] + t.w[1] + t.w[0]; + // CHECK: int c = t.w.x + t.w.y + t.w.z; int c = t.w.x + t.w.y + t.w.z; - // CHECK: int c2 = (*t.pw)[2] + (*t.pw)[1] + (*t.pw)[0]; + // CHECK: int c2 = t.pw->x + t.pw->y + t.pw->z; int c2 = t.pw->x + t.pw->y + t.pw->z; - // CHECK: int c3 = (*(*t.ppw))[2] + (*(*t.ppw))[1] + (*(*t.ppw))[0]; + // CHECK: int c3 = (*t.ppw)->x + (*t.ppw)->y + (*t.ppw)->z; int c3 = (*t.ppw)->x + (*t.ppw)->y + (*t.ppw)->z; - // CHECK: sycl::range<3> d3_1(1, 1, test[2]); + // CHECK: dpct::dim3 d3_1(test.x); dim3 d3_1(test.x); - // CHECK: sycl::range<3> d3_2(1, 1, test[2] + 1); + // CHECK: dpct::dim3 d3_2(test.x + 1); dim3 d3_2(test.x + 1); - // CHECK: sycl::range<3> d3_2_1(1, 1, static_cast(test[2] + 32)); + // CHECK: dpct::dim3 d3_2_1(static_cast(test.x + 32)); dim3 d3_2_1(static_cast(test.x + 32)); - // CHECK: sycl::range<3> d3_2_2(1, 1, test[2] + NUM); + // CHECK: dpct::dim3 d3_2_2(test.x + NUM); dim3 d3_2_2(test.x + NUM); - // CHECK: sycl::range<3> d3_3(1, 1, 2 + test[2] + 1); + // CHECK: dpct::dim3 d3_3(2 + test.x + 1); dim3 d3_3(2 + test.x + 1); - // CHECK: sycl::range<3> d3_3_1(1, 1, 32 + test[2] + 64); + // CHECK: dpct::dim3 d3_3_1(32 + test.x + 64); dim3 d3_3_1(32 + test.x + 64); - // CHECK: sycl::range<3> d3_3_2(1, 1, NUM + test[2] + NUM); + // CHECK: dpct::dim3 d3_3_2(NUM + test.x + NUM); dim3 d3_3_2(NUM + test.x + NUM); - // CHECK: sycl::range<3> d3_4(1, test[1], test[2]); + // CHECK: dpct::dim3 d3_4(test.x, test.y); dim3 d3_4(test.x, test.y); - // CHECK: sycl::range<3> d3_5(test[0], test[1], test[2]); + // CHECK: dpct::dim3 d3_5(test.x, test.y, test.z); dim3 d3_5(test.x, test.y, test.z); - // CHECK: sycl::range<3> d3_6 = sycl::range<3>(3 + test[0] + 4, 2 + test[1], test[2] + 1); + // CHECK: dpct::dim3 d3_6 = dpct::dim3(test.x + 1, 2 + test.y, 3 + test.z + 4); dim3 d3_6 = dim3(test.x + 1, 2 + test.y, 3 + test.z + 4); - // CHECK: sycl::range<3> d3_6_1 = sycl::range<3>(113 + test[0] + 114, 112 + test[1], test[2] + 111); + // CHECK: dpct::dim3 d3_6_1 = dpct::dim3(test.x + 111, 112 + test.y, 113 + test.z + 114); dim3 d3_6_1 = dim3(test.x + 111, 112 + test.y, 113 + test.z + 114); - // CHECK: sycl::range<3> d3_6_2 = sycl::range<3>(NUM + test[0] + NUM, NUM + test[1], test[2] + NUM); + // CHECK: dpct::dim3 d3_6_2 = dpct::dim3(test.x + NUM, NUM + test.y, NUM + test.z + NUM); dim3 d3_6_2 = dim3(test.x + NUM, NUM + test.y, NUM + test.z + NUM); - // todoCHECK: sycl::range<3> d3_6_3 = sycl::range<3>(NUM + test[0] + NUM, NUM + test[1], sycl::ceil(test[2] + NUM)); + // todoCHECK: dpct::dim3 d3_6_3 = dpct::dim3(ceil(test.x + NUM), NUM + test.y, NUM + test.z + NUM); dim3 d3_6_3 = dim3(ceil(test.x + NUM), NUM + test.y, NUM + test.z + NUM); - // CHECK: sycl::range<3> gpu_blocks(1, 1, 1 / (d3_6_3[2] * 200)); + // CHECK: dpct::dim3 gpu_blocks(1 / (d3_6_3.x * 200)); dim3 gpu_blocks(1 / (d3_6_3.x * 200)); // CHECK: q_ct1.parallel_for>( // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { - // CHECK-NEXT: kernel(d3_6[2]); + // CHECK-NEXT: kernel(d3_6.x); // CHECK-NEXT: }); kernel<<<1, 1>>>(d3_6.x); // CHECK: q_ct1.parallel_for>( // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, NUM) * sycl::range<3>(1, 1, NUM), sycl::range<3>(1, 1, NUM)), // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { - // CHECK-NEXT: kernel(d3_6[2]); + // CHECK-NEXT: kernel(d3_6.x); // CHECK-NEXT: }); kernel<<>>(d3_6.x); } @@ -195,11 +195,11 @@ __host__ __device__ T getgriddim(T totallen, T blockdim) template static void memsetCuda(T * d_mem, T v, int n) { - // CHECK: sycl::range<3> dimBlock(1, 1, 256); - // CHECK: sycl::range<3> dimGrid_2(1, 1, std::max(2048, 3)); - // CHECK: sycl::range<3> dimGrid_1(1, 1, std::max(2048, 3)); - // CHECK: std::min(2048, getgriddim(n, dimBlock[2])); - // CHECK: sycl::range<3> dimGrid(1, 1, std::min(2048, getgriddim(n, dimBlock[2]))); + // CHECK: dpct::dim3 dimBlock(256); + // CHECK: dpct::dim3 dimGrid_2(std::max(2048, 3)); + // CHECK: dpct::dim3 dimGrid_1(std::max(2048, 3)); + // CHECK: std::min(2048, getgriddim(n, dimBlock.x)); + // CHECK: dpct::dim3 dimGrid(std::min(2048, getgriddim(n, dimBlock.x))); dim3 dimBlock(256); dim3 dimGrid_2(max(2048, 3)); dim3 dimGrid_1(std::max(2048, 3)); @@ -262,14 +262,14 @@ __global__ void kernel_foo(float *a, wrap *mt, unsigned int N) { } // CHECK: void dim3_foo() { -// CHECK-NEXT: DIM3_DEFAULT_VAR(block0(1, 1, 1)); +// CHECK-NEXT: DIM3_DEFAULT_VAR(block0); // CHECK-NEXT: CALL_FUNC( []() { -// CHECK-NEXT: sycl::range<3> block1(1, 1, 1); -// CHECK-NEXT: sycl::range<3> block2{1, 1, 1}; -// CHECK-NEXT: sycl::range<3> block3(1, 1, 2); -// CHECK-NEXT: sycl::range<3> block4(1, 3, 2); -// CHECK-NEXT: sycl::range<3> block5(4, 3, 2); -// CHECK-NEXT: DIM3_DEFAULT_VAR(block6(1, 1, 1)); +// CHECK-NEXT: dpct::dim3 block1; +// CHECK-NEXT: dpct::dim3 block2{}; +// CHECK-NEXT: dpct::dim3 block3(2); +// CHECK-NEXT: dpct::dim3 block4(2,3); +// CHECK-NEXT: dpct::dim3 block5(2,3,4); +// CHECK-NEXT: DIM3_DEFAULT_VAR(block6); // CHECK-NEXT: }); // CHECK-NEXT: } void dim3_foo() { @@ -284,3 +284,40 @@ void dim3_foo() { }); } #endif + +// CHECK: class Dim3Struct { +// CHECK-NEXT: Dim3Struct() : x(dpct::dim3(1, 2)) {} +// CHECK-NEXT: dpct::dim3 x = dpct::dim3(3, 4); +// CHECK-NEXT: void f() { dpct::dim3(5, 6); } +// CHECK-NEXT: }; +class Dim3Struct { + Dim3Struct() : x(dim3(1, 2)) {} + dim3 x = dim3(3, 4); + void f() { dim3(5, 6); } +}; + +struct A { + int x; + dim3 y; + int z; +}; +struct B { + int x; + A y; + dim3 z; +}; + +int dim3_implicit_ctor() { + dim3 d; + d.x = 5; + // CHECK: B b1 = {}; + B b1 = {}; + // CHECK: B b2 = {0}; + B b2 = {0}; + // CHECK: B b3 = {0, {}}; + B b3 = {0, {}}; + // CHECK: B b4 = {0, {1}}; + B b4 = {0, {1}}; + // CHECK: B b5 = {0, {1, {1}}}; + B b5 = {0, {1, {1}}}; +} diff --git a/clang/test/dpct/sync_api.cu b/clang/test/dpct/sync_api.cu index 2890a843b189..2ed0ccdf9b5e 100644 --- a/clang/test/dpct/sync_api.cu +++ b/clang/test/dpct/sync_api.cu @@ -30,6 +30,8 @@ __global__ void k() { cg::thread_block block = cg::this_thread_block(); // CHECK: item_ct1.barrier(sycl::access::fence_space::local_space); __syncthreads(); + // CHECK: item_ct1.barrier(sycl::access::fence_space::local_space); + __barrier_sync(0); // CHECK: item_ct1.barrier(); block.sync(); // CHECK: item_ct1.barrier(); diff --git a/clang/test/dpct/template-instantiation.cu b/clang/test/dpct/template-instantiation.cu index 858702345dbb..0baa365c1145 100644 --- a/clang/test/dpct/template-instantiation.cu +++ b/clang/test/dpct/template-instantiation.cu @@ -136,11 +136,11 @@ int main() { unsigned u; dim3 dim; - // CHECK: func_2_same_pram(u, (unsigned int)dim[1]); + // CHECK: func_2_same_pram(u, dim.y); func_2_same_pram(u, dim.y); - // CHECK: func_2_same_pram(u, (unsigned int)dim[1] + 1); + // CHECK: func_2_same_pram(u, dim.y + 1); func_2_same_pram(u, dim.y + 1); - // CHECK: func_2_same_pram(u, func_same_return((unsigned int)dim[1])); + // CHECK: func_2_same_pram(u, func_same_return(dim.y)); func_2_same_pram(u, func_same_return(dim.y)); } diff --git a/clang/test/dpct/texture/texture_object_bindless_image.cu b/clang/test/dpct/texture/texture_object_bindless_image.cu index 727e9efa6195..e4adb0efa93f 100644 --- a/clang/test/dpct/texture/texture_object_bindless_image.cu +++ b/clang/test/dpct/texture/texture_object_bindless_image.cu @@ -2,6 +2,8 @@ // RUN: FileCheck --input-file %T/texture/texture_object_bindless_image/texture_object_bindless_image.dp.cpp --match-full-lines %s // RUN: %if build_lit %{icpx -c -fsycl -DBUILD_TEST %T/texture/texture_object_bindless_image/texture_object_bindless_image.dp.cpp -o %T/texture/texture_object_bindless_image/texture_object_bindless_image.dp.o %} +#include "cuda.h" + // CHECK: template void kernel(sycl::ext::oneapi::experimental::sampled_image_handle tex) { template __global__ void kernel(cudaTextureObject_t tex) { int i; @@ -51,6 +53,161 @@ template __global__ void kernel(cudaTextureObject_t tex) { #endif } +void driverMemoryManagement() { + size_t s, s1, s2; + unsigned u; + void *pV; + // CHECK: sycl::image_channel_type f; + CUarray_format f; + // CHECK: dpct::experimental::image_mem_wrapper_ptr *pArr; + CUarray *pArr; + // CHECK: dpct::device_ptr pD; + CUdeviceptr pD; + // CHECK: dpct::queue_ptr st; + CUstream st; + // CHECK: sycl::ext::oneapi::experimental::image_descriptor pDesc; + CUDA_ARRAY_DESCRIPTOR pDesc; + // CHECK: pDesc.channel_type = f; + pDesc.Format = f; + // CHECK: pDesc.height = s; + pDesc.Height = s; +#ifndef BUILD_TEST // TODO: Need delete later. + // CHECK: pDesc.num_channels = u; + pDesc.NumChannels = u; +#endif + // CHECK: pDesc.width = s; + pDesc.Width = s; + // CHECK: dpct::memcpy_parameter p2d; + CUDA_MEMCPY2D p2d; + // CHECK: p2d.from.pos[0] = s; + p2d.srcXInBytes = s; + // CHECK: p2d.from.pos[1] = s; + p2d.srcY = s; + // CHECK: ; + p2d.srcMemoryType; + // CHECK: p2d.from.pitched.set_data_ptr(pV); + p2d.srcHost = pV; + // CHECK: p2d.from.pitched.set_data_ptr(pD); + p2d.srcDevice = pD; + // CHECK: p2d.from.image_bindless = *pArr; + p2d.srcArray = *pArr; + // CHECK: p2d.from.pitched.set_pitch(s); + p2d.srcPitch = s; + // CHECK: p2d.to.pos[0] = s; + p2d.dstXInBytes = s; + // CHECK: p2d.to.pos[1] = s; + p2d.dstY = s; + // CHECK: ; + p2d.dstMemoryType; + // CHECK: p2d.to.pitched.set_data_ptr(pV); + p2d.dstHost = pV; + // CHECK: p2d.to.pitched.set_data_ptr(pD); + p2d.dstDevice = pD; + // CHECK: p2d.to.image_bindless = *pArr; + p2d.dstArray = *pArr; + // CHECK: p2d.to.pitched.set_pitch(s); + p2d.dstPitch = s; + // CHECK: p2d.size[0] = s; + p2d.WidthInBytes = s; + // CHECK: p2d.size[1] = s; + p2d.Height = s; + // CHECK: dpct::memcpy_parameter p3d; + CUDA_MEMCPY3D p3d; + // CHECK: p3d.from.pos[0] = s; + p3d.srcXInBytes = s; + // CHECK: p3d.from.pos[1] = s; + p3d.srcY = s; + // CHECK: p3d.from.pos[2] = s; + p3d.srcZ = s; + // CHECK: ; + p3d.srcLOD; + // CHECK: ; + p3d.srcMemoryType; + // CHECK: p3d.from.pitched.set_data_ptr(pV); + p3d.srcHost = pV; + // CHECK: p3d.from.pitched.set_data_ptr(pD); + p3d.srcDevice = pD; + // CHECK: p3d.from.image_bindless = *pArr; + p3d.srcArray = *pArr; + // CHECK: p3d.from.pitched.set_pitch(s); + p3d.srcPitch = s; + // CHECK: p3d.from.pitched.set_y(s); + p3d.srcHeight = s; + // CHECK: p3d.to.pos[0] = s; + p3d.dstXInBytes = s; + // CHECK: p3d.to.pos[1] = s; + p3d.dstY = s; + // CHECK: p3d.to.pos[2] = s; + p3d.dstZ = s; + // CHECK: ; + p3d.dstLOD; + // CHECK: ; + p3d.dstMemoryType; + // CHECK: p3d.to.pitched.set_data_ptr(pV); + p3d.dstHost = pV; + // CHECK: p3d.to.pitched.set_data_ptr(pD); + p3d.dstDevice = pD; + // CHECK: p3d.to.image_bindless = *pArr; + p3d.dstArray = *pArr; + // CHECK: p3d.to.pitched.set_pitch(s); + p3d.dstPitch = s; + // CHECK: p3d.to.pitched.set_y(s); + p3d.dstHeight = s; + // CHECK: p3d.size[0] = s; + p3d.WidthInBytes = s; + // CHECK: p3d.size[1] = s; + p3d.Height = s; + // CHECK: p3d.size[2] = s; + p3d.Depth = s; + // CHECK: *pArr = new dpct::experimental::image_mem_wrapper(&pDesc); + cuArrayCreate(pArr, &pDesc); + // CHECK: delete (*pArr); + cuArrayDestroy(*pArr); + // CHECK: dpct::dpct_memcpy(p2d); + cuMemcpy2D(&p2d); + // CHECK: /* + // CHECK-NEXT: DPCT1124:{{[0-9]+}}: cuMemcpy2DAsync_v2 is migrated to asynchronous memcpy API. While the origin API might be synchronous, it depends on the type of operand memory, so you may need to call wait() on event return by memcpy API to ensure synchronization behavior. + // CHECK-NEXT: */ + // CHECK-NEXT: dpct::async_dpct_memcpy(p2d, *st); + cuMemcpy2DAsync(&p2d, st); + // CHECK: dpct::dpct_memcpy(p3d); + cuMemcpy3D(&p3d); + // CHECK: /* + // CHECK-NEXT: DPCT1124:{{[0-9]+}}: cuMemcpy3DAsync_v2 is migrated to asynchronous memcpy API. While the origin API might be synchronous, it depends on the type of operand memory, so you may need to call wait() on event return by memcpy API to ensure synchronization behavior. + // CHECK-NEXT: */ + // CHECK-NEXT: dpct::async_dpct_memcpy(p3d, *st); + cuMemcpy3DAsync(&p3d, st); + // CHECK: dpct::experimental::dpct_memcpy(*pArr, s, 0, *pArr, s1, 0, s2); + cuMemcpyAtoA(*pArr, s, *pArr, s1, s2); + // CHECK: dpct::experimental::dpct_memcpy(pD, *pArr, s, 0, s1); + cuMemcpyAtoD(pD, *pArr, s, s1); + // CHECK: dpct::experimental::dpct_memcpy(pV, *pArr, s, 0, s1); + cuMemcpyAtoH(pV, *pArr, s, s1); + // CHECK: dpct::experimental::async_dpct_memcpy(pV, *pArr, s, 0, s1, *st); + cuMemcpyAtoHAsync(pV, *pArr, s, s1, st); + // CHECK: dpct::experimental::dpct_memcpy(*pArr, s, 0, pD, s1); + cuMemcpyDtoA(*pArr, s, pD, s1); + // CHECK: q_ct1.memcpy(pD, pD, s).wait(); + cuMemcpyDtoD(pD, pD, s); + // CHECK: st->memcpy(pD, pD, s); + cuMemcpyDtoDAsync(pD, pD, s, st); + // CHECK: q_ct1.memcpy(pV, pD, s).wait(); + cuMemcpyDtoH(pV, pD, s); + // CHECK: /* + // CHECK-NEXT: DPCT1124:{{[0-9]+}}: cuMemcpyDtoHAsync_v2 is migrated to asynchronous memcpy API. While the origin API might be synchronous, it depends on the type of operand memory, so you may need to call wait() on event return by memcpy API to ensure synchronization behavior. + // CHECK-NEXT: */ + // CHECK-NEXT: st->memcpy(pV, pD, s); + cuMemcpyDtoHAsync(pV, pD, s, st); + // CHECK: dpct::experimental::dpct_memcpy(*pArr, s, 0, pV, s1); + cuMemcpyHtoA(*pArr, s, pV, s1); + // CHECK: dpct::experimental::async_dpct_memcpy(*pArr, s, 0, pV, s1, *st); + cuMemcpyHtoAAsync(*pArr, s, pV, s1, st); + // CHECK: q_ct1.memcpy(pD, pV, s).wait(); + cuMemcpyHtoD(pD, pV, s); + // CHECK: st->memcpy(pD, pV, s); + cuMemcpyHtoDAsync(pD, pV, s, st); +} + void driver() { // CHECK: sycl::ext::oneapi::experimental::sampled_image_handle o; CUtexObject o; diff --git a/clang/test/dpct/texture_driver.cu b/clang/test/dpct/texture_driver.cu index 65a29658d4ae..3bfa7c943838 100644 --- a/clang/test/dpct/texture_driver.cu +++ b/clang/test/dpct/texture_driver.cu @@ -24,7 +24,7 @@ int main() { // CHECK-NEXT: halfDesc.height = 32; // CHECK-NEXT: halfDesc.width = 64; // CHECK-NEXT: halfDesc.channel_type = sycl::image_channel_type::fp16; - // CHECK-NEXT: halfDesc.channel_num = 1; + // CHECK-NEXT: halfDesc.num_channels = 1; CUDA_ARRAY_DESCRIPTOR halfDesc; halfDesc.Height = 32; halfDesc.Width = 64; @@ -34,7 +34,7 @@ int main() { // CHECK: dpct::image_matrix_desc float4Desc; // CHECK-NEXT: float4Desc.width = 64; // CHECK-NEXT: float4Desc.channel_type = sycl::image_channel_type::fp32; - // CHECK-NEXT: float4Desc.channel_num = 4; + // CHECK-NEXT: float4Desc.num_channels = 4; // CHECK-NEXT: float4Desc.height = 32; CUDA_ARRAY_DESCRIPTOR float4Desc; float4Desc.Width = 64; diff --git a/clang/test/dpct/texture_object_driver.cu b/clang/test/dpct/texture_object_driver.cu index d33b4246ae5d..347b35c9be71 100644 --- a/clang/test/dpct/texture_object_driver.cu +++ b/clang/test/dpct/texture_object_driver.cu @@ -46,7 +46,7 @@ int main() { // CHECK: sycl::float4 *d_data42; // CHECK-NEXT: dpct::image_matrix_p a42; // CHECK-NEXT: dpct::image_matrix_desc desc42; - // CHECK-NEXT: desc42.channel_num = 4; + // CHECK-NEXT: desc42.num_channels = 4; // CHECK-NEXT: desc42.channel_type = sycl::image_channel_type::fp32; // CHECK-NEXT: desc42.width = 32; // CHECK-NEXT: desc42.height = 32; diff --git a/clang/test/dpct/thrust/thrust_testing/source/foo.cu b/clang/test/dpct/thrust/thrust_testing/source/foo.cu index c8efd013b9be..916c26d46242 100644 --- a/clang/test/dpct/thrust/thrust_testing/source/foo.cu +++ b/clang/test/dpct/thrust/thrust_testing/source/foo.cu @@ -33,7 +33,7 @@ void baz(ForwardIterator1 first1, ForwardIterator1 last1, int main() { - // CHECK: sycl::range<3> t(1, 1, 1); + // CHECK: dpct::dim3 t; dim3 t; return 0; } diff --git a/clang/test/dpct/types001.cu b/clang/test/dpct/types001.cu index 84d6eee49838..fda32b0c145a 100644 --- a/clang/test/dpct/types001.cu +++ b/clang/test/dpct/types001.cu @@ -41,11 +41,11 @@ const cudaError *perrors1[23]; // CHECK: const dpct::err0 **pperrors1[23]; const cudaError **pperrors1[23]; -// CHECK: sycl::range<3> dims[23]; +// CHECK: dpct::dim3 dims[23]; dim3 dims[23]; -// CHECK: const sycl::range<3> *pdims[23]; +// CHECK: const dpct::dim3 *pdims[23]; const dim3 *pdims[23]; -// CHECK: const sycl::range<3> **ppdims[23]; +// CHECK: const dpct::dim3 **ppdims[23]; const dim3 **ppdims[23]; struct s { @@ -70,11 +70,11 @@ struct s { // CHECK: const dpct::err0 **pperrors1[23]; const cudaError **pperrors1[23]; - // CHECK: sycl::range<3> dims[23]; + // CHECK: dpct::dim3 dims[23]; dim3 dims[23]; - // CHECK: const sycl::range<3> *pdims[23]; + // CHECK: const dpct::dim3 *pdims[23]; const dim3 *pdims[23]; - // CHECK: const sycl::range<3> **ppdims[23]; + // CHECK: const dpct::dim3 **ppdims[23]; const dim3 **ppdims[23]; }; @@ -111,8 +111,8 @@ void my_error_checker(T ReturnValue, char const *const FuncName) { #define MY_ERROR_CHECKER(CALL) my_error_checker((CALL), #CALL) int main(int argc, char **argv) { - //CHECK:sycl::range<3> d3(1, 1, 1); - //CHECK-NEXT:int a = sizeof(sycl::range<3>); + //CHECK:dpct::dim3 d3; + //CHECK-NEXT:int a = sizeof(dpct::dim3); //CHECK-NEXT:a = sizeof(d3); //CHECK-NEXT:a = sizeof d3; dim3 d3; @@ -609,15 +609,15 @@ void foo_2(cudaDataType_t a1, cudaDataType a2, cublasDataType_t a3) { } __device__ void foo_3() { - // CHECK: sycl::range<3> d3 = {3, 2, 1}, *pd3 = &d3; + // CHECK: dpct::dim3 d3 = {1, 2, 3}, *pd3 = &d3; dim3 d3 = {1, 2, 3}, *pd3 = &d3; int64_t m = 0; - // CHECK: m = std::min(m, int64_t((*pd3)[2])); - // CHECK-NEXT: m = std::min(m, int64_t((*pd3)[1])); - // CHECK-NEXT: m = std::min(m, int64_t((*pd3)[0])); - // CHECK-NEXT: m = std::min(m, int64_t(d3[2])); - // CHECK-NEXT: m = std::min(m, int64_t(d3[1])); - // CHECK-NEXT: m = std::min(m, int64_t(d3[0])); + // CHECK: m = std::min(m, int64_t{pd3->x}); + // CHECK-NEXT: m = std::min(m, int64_t{pd3->y}); + // CHECK-NEXT: m = std::min(m, int64_t{pd3->z}); + // CHECK-NEXT: m = std::min(m, int64_t{d3.x}); + // CHECK-NEXT: m = std::min(m, int64_t{d3.y}); + // CHECK-NEXT: m = std::min(m, int64_t{d3.z}); m = std::min(m, int64_t{pd3->x}); m = std::min(m, int64_t{pd3->y}); m = std::min(m, int64_t{pd3->z}); @@ -634,28 +634,28 @@ constexpr inline integer ceil_div(integer n, integer m) { void foo_4() { const int64_t num_irows = 32; const int64_t num_orows = 32; - // CHECK: sycl::range<3> threads(1, 1, 32); + // CHECK: dpct::dim3 threads(32); dim3 threads(32); int64_t maxGridDim = 1024; - // CHECK: sycl::range<3> grid_1(1, std::min(maxGridDim, ceil_div(num_irows, int64_t(threads[2]))), std::min(maxGridDim, num_orows)); + // CHECK: dpct::dim3 grid_1(std::min(maxGridDim, num_orows), std::min(maxGridDim, ceil_div(num_irows, int64_t{threads.x}))); dim3 grid_1(std::min(maxGridDim, num_orows), std::min(maxGridDim, ceil_div(num_irows, int64_t{threads.x}))); int row_size = 16; - // CHECK: sycl::range<3> grid_2(1, 1, std::min(maxGridDim, ceil_div(row_size, int(threads[1])))); + // CHECK: dpct::dim3 grid_2(std::min(maxGridDim, ceil_div(row_size, int(threads.y)))); dim3 grid_2(std::min(maxGridDim, ceil_div(row_size, int(threads.y)))); - // CHECK: int64_t m = int64_t(threads[1]); + // CHECK: int64_t m = int64_t{threads.y}; int64_t m = int64_t{threads.y}; - // CHECK: m = int64_t(threads[1]); + // CHECK: m = int64_t{threads.y}; m = int64_t{threads.y}; typedef int64_t MY_INT64; - // CHECK: m = std::min(int64_t(threads[2]), MY_INT64(threads[0])); + // CHECK: m = std::min(int64_t{threads.x}, MY_INT64{threads.z}); m = std::min(int64_t{threads.x}, MY_INT64{threads.z}); int num = 1024; // CHECK: m = int64_t{num}; m = int64_t{num}; - // CHECK: m = std::min(int64_t(threads[2]), MY_INT64{num}); + // CHECK: m = std::min(int64_t{threads.x}, MY_INT64{num}); m = std::min(int64_t{threads.x}, MY_INT64{num}); struct CFoo { @@ -665,7 +665,7 @@ void foo_4() { }; // CHECK: CFoo cfoo{num}; CFoo cfoo{num}; - // CHECK: m = std::min(int64_t(threads[2]), int64_t{cfoo}); + // CHECK: m = std::min(int64_t{threads.x}, int64_t{cfoo}); m = std::min(int64_t{threads.x}, int64_t{cfoo}); } diff --git a/clang/tools/dpct/DpctOptRules/cmake_script_migration_rule.yaml b/clang/tools/dpct/DpctOptRules/cmake_script_migration_rule.yaml index a27d5456b725..7e093030aa24 100644 --- a/clang/tools/dpct/DpctOptRules/cmake_script_migration_rule.yaml +++ b/clang/tools/dpct/DpctOptRules/cmake_script_migration_rule.yaml @@ -2571,3 +2571,63 @@ MatchMode: Full In: CMAKE_CUDA_COMPILER_VERSION Out: COMPATIBILITY_VERSION + +- Rule: rule_xcompiler_option_filter + Kind: CMakeRule + Priority: Fallback + CmakeSyntax: xcompiler_option_filter + In: ${func_name}(${value}) + Out: ${func_name}(${value}) + Subrules: + value: + MatchMode: Full + In: -Xcompiler=${arg} + Out: ${arg} + +- Rule: rule_xcudafe_filter + Kind: CMakeRule + Priority: Fallback + CmakeSyntax: xcudafe_filter + In: ${func_name}(${value}) + Out: ${func_name}(${value}) + Subrules: + value: + MatchMode: Full + In: -Xcudafe=${arg} + Out: "" + +- Rule: rule_extended_lambda_remove + Kind: CMakeRule + Priority: Fallback + CmakeSyntax: extended_lambda_remove + In: ${func_name}(${value}) + Out: ${func_name}(${value}) + Subrules: + value: + MatchMode: Full + In: --extended-lambda + Out: "" + +- Rule: rule_expt_relaxed_constexpr_remove + Kind: CMakeRule + Priority: Fallback + CmakeSyntax: expt_relaxed_constexpr_remove + In: ${func_name}(${value}) + Out: ${func_name}(${value}) + Subrules: + value: + MatchMode: Full + In: --expt-relaxed-constexpr + Out: "" + +- Rule: rule_CUDA_NVCC_EXECUTABLE + Kind: CMakeRule + Priority: Fallback + CmakeSyntax: CUDA_NVCC_EXECUTABLE + In: ${func_name}(${value}) + Out: ${func_name}(${value}) + Subrules: + value: + MatchMode: Full + In: CUDA_NVCC_EXECUTABLE + Out: SYCL_COMPILER_EXECUTABLE diff --git a/clang/tools/dpct/cmake/dpct.cmake b/clang/tools/dpct/cmake/dpct.cmake index 1a98ce3a7a90..d202ada0c1e2 100644 --- a/clang/tools/dpct/cmake/dpct.cmake +++ b/clang/tools/dpct/cmake/dpct.cmake @@ -126,3 +126,6 @@ set(SYCL_TOOLKIT_INCLUDE "${SYCL_INCLUDE_DIR}") set(SYCLToolkit_LIBRARY_DIR "${SYCL_INCLUDE_DIR}/../lib") set(SYCL_HOST_COMPILER "icpx") set(SYCL_HOST_FLAGS "") + +# 'SYCL_COMPILER_EXECUTABLE' is used to specify the path to the SYCL Compiler (icpx). +set(SYCL_COMPILER_EXECUTABLE "${SYCL_INCLUDE_DIR}/../bin/icpx") diff --git a/llvm/docs/requirements-hashed.txt b/llvm/docs/requirements-hashed.txt index 9cb18f072bff..3b652e658c8c 100644 --- a/llvm/docs/requirements-hashed.txt +++ b/llvm/docs/requirements-hashed.txt @@ -16,9 +16,9 @@ beautifulsoup4==4.12.3 \ --hash=sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051 \ --hash=sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed # via furo -certifi==2024.6.2 \ - --hash=sha256:3cd43f1c6fa7dedc5899d69d3ad0398fd018ad1a17fba83ddaf78aa46c747516 \ - --hash=sha256:ddc6c8ce995e6987e7faf5e3f1b02b302836a0e5d98ece18392cb1a36c72ad56 +certifi==2024.7.4 \ + --hash=sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b \ + --hash=sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90 # via requests charset-normalizer==3.3.2 \ --hash=sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027 \ diff --git a/llvm/utils/git/requirements_formatting.txt b/llvm/utils/git/requirements_formatting.txt index 2741c03fa26b..f6b7df5affdf 100644 --- a/llvm/utils/git/requirements_formatting.txt +++ b/llvm/utils/git/requirements_formatting.txt @@ -8,7 +8,7 @@ black==24.3.0 # via # -r llvm/utils/git/requirements_formatting.txt.in # darker -certifi==2024.2.2 +certifi==2024.7.4 # via requests cffi==1.16.0 # via