diff --git a/clang/lib/DPCT/APINames.inc b/clang/lib/DPCT/APINames.inc index f86a76782c7d..3a0c2f1095ba 100644 --- a/clang/lib/DPCT/APINames.inc +++ b/clang/lib/DPCT/APINames.inc @@ -385,14 +385,15 @@ ENTRY(cudaRuntimeGetVersion, cudaRuntimeGetVersion, true, NO_FLAG, P0, "DPCT1043 // Graph Management ENTRY(cudaGraph_t, cudaGraph_t, true, NO_FLAG, P4, "Successful/DPCT1119") ENTRY(cudaGraphExec_t, cudaGraphExec_t, true, NO_FLAG, P4, "Successful/DPCT1119") +ENTRY(cudaGraphNode_t, cudaGraphNode_t, true, NO_FLAG, P4, "Successful/DPCT1119") ENTRY(cudaDeviceGetGraphMemAttribute, cudaDeviceGetGraphMemAttribute, false, NO_FLAG, P4, "comment") ENTRY(cudaDeviceGraphMemTrim, cudaDeviceGraphMemTrim, false, NO_FLAG, P4, "comment") ENTRY(cudaDeviceSetGraphMemAttribute, cudaDeviceSetGraphMemAttribute, false, NO_FLAG, P4, "comment") ENTRY(cudaGetCurrentGraphExec, cudaGetCurrentGraphExec, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphAddChildGraphNode, cudaGraphAddChildGraphNode, false, NO_FLAG, P4, "comment") -ENTRY(cudaGraphAddDependencies, cudaGraphAddDependencies, false, NO_FLAG, P4, "comment") +ENTRY(cudaGraphAddDependencies, cudaGraphAddDependencies, true, NO_FLAG, P4, "Successful/DPCT1119") ENTRY(cudaGraphAddDependencies_v2, cudaGraphAddDependencies_v2, false, NO_FLAG, P4, "comment") -ENTRY(cudaGraphAddEmptyNode, cudaGraphAddEmptyNode, false, NO_FLAG, P4, "comment") +ENTRY(cudaGraphAddEmptyNode, cudaGraphAddEmptyNode, true, NO_FLAG, P4, "Successful/DPCT1119") ENTRY(cudaGraphAddEventRecordNode, cudaGraphAddEventRecordNode, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphAddEventWaitNode, cudaGraphAddEventWaitNode, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphAddExternalSemaphoresSignalNode, cudaGraphAddExternalSemaphoresSignalNode, false, NO_FLAG, P4, "comment") @@ -433,7 +434,7 @@ ENTRY(cudaGraphExecMemcpyNodeSetParamsFromSymbol, cudaGraphExecMemcpyNodeSetPara ENTRY(cudaGraphExecMemcpyNodeSetParamsToSymbol, cudaGraphExecMemcpyNodeSetParamsToSymbol, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphExecMemsetNodeSetParams, cudaGraphExecMemsetNodeSetParams, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphExecNodeSetParams, cudaGraphExecNodeSetParams, false, NO_FLAG, P4, "comment") -ENTRY(cudaGraphExecUpdate, cudaGraphExecUpdate, false, NO_FLAG, P4, "comment") +ENTRY(cudaGraphExecUpdate, cudaGraphExecUpdate, true, NO_FLAG, P4, "Successful/DPCT1119") ENTRY(cudaGraphExternalSemaphoresSignalNodeGetParams, cudaGraphExternalSemaphoresSignalNodeGetParams, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphExternalSemaphoresSignalNodeSetParams, cudaGraphExternalSemaphoresSignalNodeSetParams, false, NO_FLAG, P4, "comment") ENTRY(cudaGraphExternalSemaphoresWaitNodeGetParams, cudaGraphExternalSemaphoresWaitNodeGetParams, false, NO_FLAG, P4, "comment") diff --git a/clang/lib/DPCT/APINamesGraph.inc b/clang/lib/DPCT/APINamesGraph.inc index 7e161ac106ec..3d9f49a5bf81 100644 --- a/clang/lib/DPCT/APINamesGraph.inc +++ b/clang/lib/DPCT/APINamesGraph.inc @@ -35,3 +35,34 @@ CONDITIONAL_FACTORY_ENTRY( Diagnostics::TRY_EXPERIMENTAL_FEATURE, ARG("cudaGraphLaunch"), ARG("--use-experimental-features=graph"))) + +CONDITIONAL_FACTORY_ENTRY( + UseExtGraph, + CALL_FACTORY_ENTRY("cudaGraphAddEmptyNode", + CALL(MapNames::getDpctNamespace() + + "experimental::add_empty_node", + ARG(0), ARG(1), ARG(2), ARG(3))), + UNSUPPORT_FACTORY_ENTRY("cudaGraphAddEmptyNode", + Diagnostics::TRY_EXPERIMENTAL_FEATURE, + ARG("cudaGraphAddEmptyNode"), + ARG("--use-experimental-features=graph"))) + +CONDITIONAL_FACTORY_ENTRY( + UseExtGraph, + CALL_FACTORY_ENTRY("cudaGraphAddDependencies", + CALL(MapNames::getDpctNamespace() + + "experimental::add_dependencies", + ARG(0), ARG(1), ARG(2), ARG(3))), + UNSUPPORT_FACTORY_ENTRY("cudaGraphAddDependencies", + Diagnostics::TRY_EXPERIMENTAL_FEATURE, + ARG("cudaGraphAddDependencies"), + ARG("--use-experimental-features=graph"))) + +CONDITIONAL_FACTORY_ENTRY( + UseExtGraph, + MEMBER_CALL_FACTORY_ENTRY("cudaGraphExecUpdate", ARG(0), true, "update", + DEREF(1)), + UNSUPPORT_FACTORY_ENTRY("cudaGraphExecUpdate", + Diagnostics::TRY_EXPERIMENTAL_FEATURE, + ARG("cudaGraphExecUpdate"), + ARG("--use-experimental-features=graph"))) diff --git a/clang/lib/DPCT/APINamesTemplateType.inc b/clang/lib/DPCT/APINamesTemplateType.inc index 735fe48ee135..837fd4830eca 100644 --- a/clang/lib/DPCT/APINamesTemplateType.inc +++ b/clang/lib/DPCT/APINamesTemplateType.inc @@ -393,3 +393,12 @@ TYPE_REWRITE_ENTRY( "experimental::command_graph_exec_ptr")), WARNING_FACTORY(Diagnostics::TRY_EXPERIMENTAL_FEATURE, TYPESTR, STR("--use-experimental-features=graph")))) + +TYPE_REWRITE_ENTRY( + "cudaGraphNode_t", + TYPE_CONDITIONAL_FACTORY( + checkEnableGraphForType(), + TYPE_FACTORY(STR(MapNames::getDpctNamespace() + + "experimental::node_ptr")), + WARNING_FACTORY(Diagnostics::TRY_EXPERIMENTAL_FEATURE, TYPESTR, + STR("--use-experimental-features=graph")))) diff --git a/clang/lib/DPCT/ASTTraversal.cpp b/clang/lib/DPCT/ASTTraversal.cpp index 45f277cdf0d2..60d969d26d8e 100644 --- a/clang/lib/DPCT/ASTTraversal.cpp +++ b/clang/lib/DPCT/ASTTraversal.cpp @@ -1694,35 +1694,35 @@ void TypeInDeclRule::registerMatcher(MatchFinder &MF) { "thrust::device_ptr", "thrust::device_reference", "thrust::host_vector", "cublasHandle_t", "CUevent_st", "__half", "half", "__half2", "half2", "cudaMemoryAdvise", "cudaError_enum", - "cudaDeviceProp", "cudaPitchedPtr", "thrust::counting_iterator", - "thrust::transform_iterator", "thrust::permutation_iterator", - "thrust::iterator_difference", "cusolverDnHandle_t", - "cusolverDnParams_t", "gesvdjInfo_t", "syevjInfo_t", - "thrust::device_malloc_allocator", "thrust::divides", - "thrust::tuple", "thrust::maximum", "thrust::multiplies", - "thrust::plus", "cudaDataType_t", "cudaError_t", "CUresult", - "CUdevice", "cudaEvent_t", "cublasStatus_t", "cuComplex", - "cuFloatComplex", "cuDoubleComplex", "CUevent", - "cublasFillMode_t", "cublasDiagType_t", "cublasSideMode_t", - "cublasOperation_t", "cusolverStatus_t", "cusolverEigType_t", - "cusolverEigMode_t", "curandStatus_t", "cudaStream_t", - "cusparseStatus_t", "cusparseDiagType_t", "cusparseFillMode_t", - "cusparseIndexBase_t", "cusparseMatrixType_t", - "cusparseAlgMode_t", "cusparseOperation_t", "cusparseMatDescr_t", - "cusparseHandle_t", "CUcontext", "cublasPointerMode_t", - "cusparsePointerMode_t", "cublasGemmAlgo_t", - "cusparseSolveAnalysisInfo_t", "cudaDataType", "cublasDataType_t", - "curandState_t", "curandState", "curandStateXORWOW_t", - "curandStateXORWOW", "curandStatePhilox4_32_10_t", - "curandStatePhilox4_32_10", "curandStateMRG32k3a_t", - "curandStateMRG32k3a", "thrust::minus", "thrust::negate", - "thrust::logical_or", "thrust::equal_to", "thrust::less", - "cudaSharedMemConfig", "curandGenerator_t", "curandRngType_t", - "curandOrdering_t", "cufftHandle", "cufftReal", "cufftDoubleReal", - "cufftComplex", "cufftDoubleComplex", "cufftResult_t", - "cufftResult", "cufftType_t", "cufftType", "thrust::pair", - "CUdeviceptr", "cudaDeviceAttr", "CUmodule", "CUjit_option", - "CUfunction", "cudaMemcpyKind", "cudaComputeMode", + "cudaDeviceProp", "cudaGraphExecUpdateResult", "cudaPitchedPtr", + "thrust::counting_iterator", "thrust::transform_iterator", + "thrust::permutation_iterator", "thrust::iterator_difference", + "cusolverDnHandle_t", "cusolverDnParams_t", "gesvdjInfo_t", + "syevjInfo_t", "thrust::device_malloc_allocator", + "thrust::divides", "thrust::tuple", "thrust::maximum", + "thrust::multiplies", "thrust::plus", "cudaDataType_t", + "cudaError_t", "CUresult", "CUdevice", "cudaEvent_t", + "cublasStatus_t", "cuComplex", "cuFloatComplex", + "cuDoubleComplex", "CUevent", "cublasFillMode_t", + "cublasDiagType_t", "cublasSideMode_t", "cublasOperation_t", + "cusolverStatus_t", "cusolverEigType_t", "cusolverEigMode_t", + "curandStatus_t", "cudaStream_t", "cusparseStatus_t", + "cusparseDiagType_t", "cusparseFillMode_t", "cusparseIndexBase_t", + "cusparseMatrixType_t", "cusparseAlgMode_t", + "cusparseOperation_t", "cusparseMatDescr_t", "cusparseHandle_t", + "CUcontext", "cublasPointerMode_t", "cusparsePointerMode_t", + "cublasGemmAlgo_t", "cusparseSolveAnalysisInfo_t", "cudaDataType", + "cublasDataType_t", "curandState_t", "curandState", + "curandStateXORWOW_t", "curandStateXORWOW", + "curandStatePhilox4_32_10_t", "curandStatePhilox4_32_10", + "curandStateMRG32k3a_t", "curandStateMRG32k3a", "thrust::minus", + "thrust::negate", "thrust::logical_or", "thrust::equal_to", + "thrust::less", "cudaSharedMemConfig", "curandGenerator_t", + "curandRngType_t", "curandOrdering_t", "cufftHandle", "cufftReal", + "cufftDoubleReal", "cufftComplex", "cufftDoubleComplex", + "cufftResult_t", "cufftResult", "cufftType_t", "cufftType", + "thrust::pair", "CUdeviceptr", "cudaDeviceAttr", "CUmodule", + "CUjit_option", "CUfunction", "cudaMemcpyKind", "cudaComputeMode", "__nv_bfloat16", "cooperative_groups::__v1::thread_group", "cooperative_groups::__v1::thread_block", "libraryPropertyType_t", "libraryPropertyType", "cudaDataType_t", "cudaDataType", @@ -1750,12 +1750,13 @@ void TypeInDeclRule::registerMatcher(MatchFinder &MF) { .bind("cudaTypeDef"), this); - MF.addMatcher(typeLoc(loc(qualType(hasDeclaration(namedDecl(hasAnyName( - "cooperative_groups::__v1::coalesced_group", - "cooperative_groups::__v1::thread_block_tile", - "cudaGraph_t", "cudaGraphExec_t")))))) - .bind("cudaTypeDefEA"), - this); + MF.addMatcher( + typeLoc(loc(qualType(hasDeclaration(namedDecl(hasAnyName( + "cooperative_groups::__v1::coalesced_group", + "cooperative_groups::__v1::thread_block_tile", "cudaGraph_t", + "cudaGraphExec_t", "cudaGraphNode_t")))))) + .bind("cudaTypeDefEA"), + this); MF.addMatcher(varDecl(hasType(classTemplateSpecializationDecl( hasAnyTemplateArgument(refersToType(hasDeclaration( namedDecl(hasName("use_default")))))))) @@ -2284,6 +2285,11 @@ void TypeInDeclRule::runRule(const MatchFinder::MatchResult &Result) { std::string CanonicalTypeStr = DpctGlobalInfo::getUnqualifiedTypeName( TL->getType().getCanonicalType()); + if (CanonicalTypeStr == "cudaGraphExecUpdateResult") { + report(TL->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, + CanonicalTypeStr); + return; + } if (CanonicalTypeStr == "cooperative_groups::__v1::thread_group" || CanonicalTypeStr == "cooperative_groups::__v1::thread_block") { if (auto ETL = TL->getUnqualifiedLoc().getAs()) { @@ -2883,9 +2889,16 @@ void VectorTypeOperatorRule::registerMatcher(MatchFinder &MF) { this); // Matches call of user overloaded operator - MF.addMatcher(cxxOperatorCallExpr(callee(vectorTypeOverLoadedOperator())) - .bind("callOverloadedOperator"), + MF.addMatcher(cxxOperatorCallExpr(callee(vectorTypeOverLoadedOperator()), + hasAncestor(vectorTypeOverLoadedOperator())) + .bind("callOverloadedOperatorInOverloadedOperator"), this); + + MF.addMatcher( + cxxOperatorCallExpr(callee(vectorTypeOverLoadedOperator()), + unless(hasAncestor(vectorTypeOverLoadedOperator()))) + .bind("callOverloadedOperatorNotInOverloadedOperator"), + this); } const char VectorTypeOperatorRule::NamespaceName[] = @@ -2967,10 +2980,15 @@ void VectorTypeOperatorRule::MigrateOverloadedOperatorDecl( } void VectorTypeOperatorRule::MigrateOverloadedOperatorCall( - const MatchFinder::MatchResult &Result, const CXXOperatorCallExpr *CE) { + const MatchFinder::MatchResult &Result, const CXXOperatorCallExpr *CE, + bool InOverloadedOperator) { if (!CE) return; - + if (!InOverloadedOperator && + (DpctGlobalInfo::findAncestor(CE) || + DpctGlobalInfo::findAncestor(CE))) { + return; + } // Explicitly call user overloaded operator // // For non-assignment operator: @@ -3004,8 +3022,17 @@ void VectorTypeOperatorRule::runRule(const MatchFinder::MatchResult &Result) { Result, getNodeAsType(Result, "overloadedOperatorDecl")); // Explicitly call user overloaded operator - MigrateOverloadedOperatorCall(Result, getNodeAsType( - Result, "callOverloadedOperator")); + MigrateOverloadedOperatorCall( + Result, + getNodeAsType( + Result, "callOverloadedOperatorInOverloadedOperator"), + true); + + MigrateOverloadedOperatorCall( + Result, + getNodeAsType( + Result, "callOverloadedOperatorNotInOverloadedOperator"), + false); } REGISTER_RULE(VectorTypeOperatorRule, PassKind::PK_Migration) @@ -6781,8 +6808,8 @@ EventQueryTraversal::buildCallReplacement(const CallExpr *Call) { static std::string MemberName = "get_info<" + MapNames::getClNamespace() + "info::event::command_execution_status>"; std::string ReplStr; - MemberCallPrinter Printer(Call->getArg(0), true, - MemberName); + MemberCallPrinter Printer(Call->getArg(0), + true, MemberName); llvm::raw_string_ostream OS(ReplStr); Printer.print(OS); return new ReplaceStmt(Call, std::move(OS.str())); @@ -13474,7 +13501,7 @@ void TextureRule::runRule(const MatchFinder::MatchResult &Result) { std::shared_ptr Rewriter = std::make_shared( CE, std::make_shared>>( + const Expr *, RenameWithSuffix, false, StringRef>>>( CE, Name, CE->getArg(0), true, RenameWithSuffix("set", MethodName), Value)); std::optional Result = Rewriter->rewrite(); @@ -13832,8 +13859,8 @@ bool TextureRule::SettersMerger::applyResult() { std::string ReplacedText; llvm::raw_string_ostream OS(ReplacedText); - MemberCallPrinter> Printer( - D->getName(), IsArrow, "set", std::move(ArgsList)); + MemberCallPrinter> + Printer(D->getName(), IsArrow, "set", std::move(ArgsList)); Printer.print(OS); Inserter.success(OS.str()); @@ -15127,7 +15154,8 @@ REGISTER_RULE(AssertRule, PassKind::PK_Migration) void GraphRule::registerMatcher(MatchFinder &MF) { auto functionName = [&]() { return hasAnyName("cudaGraphInstantiate", "cudaGraphLaunch", - "cudaGraphExecDestroy"); + "cudaGraphExecDestroy", "cudaGraphAddEmptyNode", + "cudaGraphAddDependencies", "cudaGraphExecUpdate"); }; MF.addMatcher( callExpr(callee(functionDecl(functionName()))).bind("FunctionCall"), diff --git a/clang/lib/DPCT/ASTTraversal.h b/clang/lib/DPCT/ASTTraversal.h index 88e92fa391ae..57ba49a649b4 100644 --- a/clang/lib/DPCT/ASTTraversal.h +++ b/clang/lib/DPCT/ASTTraversal.h @@ -574,7 +574,7 @@ class VectorTypeOperatorRule const FunctionDecl *FD); void MigrateOverloadedOperatorCall( const ast_matchers::MatchFinder::MatchResult &Result, - const CXXOperatorCallExpr *CE); + const CXXOperatorCallExpr *CE, bool InOverloadedOperator); private: static const char NamespaceName[]; diff --git a/clang/lib/DPCT/AnalysisInfo.cpp b/clang/lib/DPCT/AnalysisInfo.cpp index c56015bd3e69..8542cba7ce06 100644 --- a/clang/lib/DPCT/AnalysisInfo.cpp +++ b/clang/lib/DPCT/AnalysisInfo.cpp @@ -4027,11 +4027,12 @@ void CallFunctionExpr::buildCallExprInfo(const CXXConstructExpr *Ctor) { SourceLocation InsertLocation; auto &SM = DpctGlobalInfo::getSourceManager(); - if (FuncInfo) { - if (FuncInfo->NonDefaultParamNum) { - if (Ctor->getNumArgs() >= FuncInfo->NonDefaultParamNum) { + auto Info = getFuncInfo(); + if (Info) { + if (Info->NonDefaultParamNum) { + if (Ctor->getNumArgs() >= Info->NonDefaultParamNum) { InsertLocation = - Ctor->getArg(FuncInfo->NonDefaultParamNum - 1)->getEndLoc(); + Ctor->getArg(Info->NonDefaultParamNum - 1)->getEndLoc(); } else { ExtraArgLoc = 0; return; @@ -4073,12 +4074,12 @@ void CallFunctionExpr::buildCallExprInfo(const CallExpr *CE) { } else { HasArgs = CE->getNumArgs(); } - - if (FuncInfo) { - if (FuncInfo->ParamsNum == 0) { + auto Info = getFuncInfo(); + if (Info) { + if (Info->ParamsNum == 0) { ExtraArgLoc = DpctGlobalInfo::getSourceManager().getFileOffset(CE->getRParenLoc()); - } else if (FuncInfo->NonDefaultParamNum == 0) { + } else if (Info->NonDefaultParamNum == 0) { // if all params have default value if (CE->getNumArgs()) { ExtraArgLoc = DpctGlobalInfo::getSourceManager().getFileOffset( @@ -4090,14 +4091,14 @@ void CallFunctionExpr::buildCallExprInfo(const CallExpr *CE) { } else { // if some params have default value, set ExtraArgLoc to the location // before the comma - if (CE->getNumArgs() > FuncInfo->NonDefaultParamNum - 1) { + if (CE->getNumArgs() > Info->NonDefaultParamNum - 1) { auto &SM = DpctGlobalInfo::getSourceManager(); auto CERange = getDefinitionRange(CE->getBeginLoc(), CE->getEndLoc()); auto TempLoc = Lexer::getLocForEndOfToken( CERange.getEnd(), 0, SM, DpctGlobalInfo::getContext().getLangOpts()); auto PairRange = getRangeInRange( - CE->getArg(FuncInfo->NonDefaultParamNum - 1 + HasImplicitArg), + CE->getArg(Info->NonDefaultParamNum - 1 + HasImplicitArg), CERange.getBegin(), TempLoc); auto RealEnd = PairRange.second; auto IT = dpct::DpctGlobalInfo::getExpansionRangeToMacroRecord().find( @@ -4105,11 +4106,11 @@ void CallFunctionExpr::buildCallExprInfo(const CallExpr *CE) { if (IT != dpct::DpctGlobalInfo::getExpansionRangeToMacroRecord().end() && IT->second->TokenIndex == IT->second->NumTokens) { - RealEnd = SM.getImmediateExpansionRange( - CE->getArg(FuncInfo->NonDefaultParamNum - 1 + - HasImplicitArg) - ->getEndLoc()) - .getEnd(); + RealEnd = + SM.getImmediateExpansionRange( + CE->getArg(Info->NonDefaultParamNum - 1 + HasImplicitArg) + ->getEndLoc()) + .getEnd(); RealEnd = Lexer::getLocForEndOfToken( RealEnd, 0, SM, DpctGlobalInfo::getContext().getLangOpts()); IT = dpct::DpctGlobalInfo::getExpansionRangeToMacroRecord().find( @@ -4181,11 +4182,11 @@ std::string CallFunctionExpr::getTemplateArguments(bool &IsNeedWarning, return (Result.empty()) ? Result : Result.erase(Result.size() - 2); } std::string CallFunctionExpr::getExtraArguments() { - if (!FuncInfo) + auto Info = getFuncInfo(); + if (!Info) return ""; - return getVarMap().getExtraCallArguments(FuncInfo->NonDefaultParamNum, - FuncInfo->ParamsNum - - FuncInfo->NonDefaultParamNum); + return getVarMap().getExtraCallArguments( + Info->NonDefaultParamNum, Info->ParamsNum - Info->NonDefaultParamNum); } std::shared_ptr CallFunctionExpr::addTextureObjectArgInfo( unsigned ArgIdx, std::shared_ptr Info) { @@ -4247,14 +4248,12 @@ std::shared_ptr CallFunctionExpr::addTextureObjectArg( return std::shared_ptr(); } void CallFunctionExpr::setFuncInfo(std::shared_ptr Info) { - if (FuncInfo && Info && (FuncInfo != Info)) { - if (!FuncInfo->getVarMap().isSameAs(Info->getVarMap())) { - DiagnosticsUtils::report(getFilePath(), getOffset(), - Warnings::DEVICE_CALL_DIFFERENT, true, false, - FuncInfo->getFunctionName()); - } + if (!Info) { + return; + } + if (std::find(FuncInfo.begin(), FuncInfo.end(), Info) == FuncInfo.end()) { + FuncInfo.push_back(Info); } - FuncInfo = Info; } void CallFunctionExpr::buildCalleeInfo(const Expr *Callee, std::optional NumArgs) { @@ -4417,12 +4416,13 @@ void CallFunctionExpr::buildTextureObjectArgsInfo(const CallT *C) { ArgItr++; } } -void CallFunctionExpr::mergeTextureObjectInfo() { +void CallFunctionExpr::mergeTextureObjectInfo( + std::shared_ptr Info) { if (BaseTextureObject) - BaseTextureObject->merge(FuncInfo->getBaseTextureObject()); + BaseTextureObject->merge(Info->getBaseTextureObject()); for (unsigned Idx = 0; Idx < TextureObjectList.size(); ++Idx) { if (auto &Obj = TextureObjectList[Idx]) { - Obj->merge(FuncInfo->getTextureObject(Idx)); + Obj->merge(Info->getTextureObject(Idx)); } } } @@ -6484,41 +6484,61 @@ void deduceTemplateArgument(std::vector &TAIList, deduceTemplateArgumentFromType(TAIList, ParmType, ArgType, TL); } +std::shared_ptr CallFunctionExpr::getFuncInfo() { + if (FuncInfo.empty()) { + return std::shared_ptr(); + } + return FuncInfo.front(); +} + void CallFunctionExpr::buildInfo() { - if (!FuncInfo) - return; + for (auto &Info : FuncInfo) { + const clang::tooling::UnifiedPath &DefFilePath = + Info->getDefinitionFilePath(); + // SYCL_EXTERNAL macro is not needed if the device function is lambda + // expression, becuase 'sycl_device' attribute cannot be applied or will be + // ignored. + // + // e.g., + // [] (T a, T b ) -> SYCL_EXTERNAL T { return a * b; } + // [] (T a, T b ) SYCL_EXTERNAL { return a * b; } + // + // Intel(R) oneAPI DPC++ Compiler emits warning of ignoring SYCL_EXTERNAL in + // the first example and emits error when compiling the second example. + // + // TODO: Need to revisit the condition to add SYCL_EXTERNAL macro if issues + // are observed in the future. + if (!DefFilePath.getCanonicalPath().empty() && + DefFilePath != getFilePath() && + !isIncludedFile(getFilePath(), DefFilePath) && !Info->isLambda()) { + Info->setNeedSyclExternMacro(); + } + + if (DpctGlobalInfo::isOptimizeMigration() && !Info->isInlined() && + !Info->IsSyclExternMacroNeeded()) { + if (Info->isKernel()) + Info->setForceInlineDevFunc(); + else + Info->setAlwaysInlineDevFunc(); + } - const clang::tooling::UnifiedPath &DefFilePath = - FuncInfo->getDefinitionFilePath(); - // SYCL_EXTERNAL macro is not needed if the device function is lambda - // expression, becuase 'sycl_device' attribute cannot be applied or will be - // ignored. - // - // e.g., - // [] (T a, T b ) -> SYCL_EXTERNAL T { return a * b; } - // [] (T a, T b ) SYCL_EXTERNAL { return a * b; } - // - // Intel(R) oneAPI DPC++ Compiler emits warning of ignoring SYCL_EXTERNAL in - // the first example and emits error when compiling the second example. - // - // TODO: Need to revisit the condition to add SYCL_EXTERNAL macro if issues - // are observed in the future. - if (!DefFilePath.getCanonicalPath().empty() && DefFilePath != getFilePath() && - !isIncludedFile(getFilePath(), DefFilePath) && !FuncInfo->isLambda()) { - FuncInfo->setNeedSyclExternMacro(); + Info->buildInfo(); } - - if (DpctGlobalInfo::isOptimizeMigration() && !FuncInfo->isInlined() && - !FuncInfo->IsSyclExternMacroNeeded()) { - if (FuncInfo->isKernel()) - FuncInfo->setForceInlineDevFunc(); - else - FuncInfo->setAlwaysInlineDevFunc(); + size_t FuncInfoSize = FuncInfo.size(); + if (FuncInfoSize) { + VarMap.merge(FuncInfo.front()->getVarMap(), TemplateArgs); + mergeTextureObjectInfo(FuncInfo.front()); + } + for (size_t i = 0; i < FuncInfoSize; i++) { + for (size_t j = i + 1; j < FuncInfoSize; j++) { + if (!FuncInfo[i]->getVarMap().isSameAs(FuncInfo[j]->getVarMap())) { + DiagnosticsUtils::report(getFilePath(), getOffset(), + Warnings::DEVICE_CALL_DIFFERENT, true, false, + FuncInfo[i]->getFunctionName()); + return; + } + } } - - FuncInfo->buildInfo(); - VarMap.merge(FuncInfo->getVarMap(), TemplateArgs); - mergeTextureObjectInfo(); } bool isInSameLine(SourceLocation First, SourceLocation Second, diff --git a/clang/lib/DPCT/AnalysisInfo.h b/clang/lib/DPCT/AnalysisInfo.h index ee1e787318ca..4f883cb928cf 100644 --- a/clang/lib/DPCT/AnalysisInfo.h +++ b/clang/lib/DPCT/AnalysisInfo.h @@ -2381,7 +2381,7 @@ class CallFunctionExpr { virtual std::shared_ptr addTextureObjectArg(unsigned ArgIdx, const ArraySubscriptExpr *TexRef, bool isKernelCall = false); - std::shared_ptr getFuncInfo() { return FuncInfo; } + std::shared_ptr getFuncInfo(); bool IsAllTemplateArgsSpecified = false; virtual ~CallFunctionExpr() = default; @@ -2410,12 +2410,12 @@ class CallFunctionExpr { void buildTextureObjectArgsInfo(const CallExpr *CE); template void buildTextureObjectArgsInfo(const CallT *C); - void mergeTextureObjectInfo(); + void mergeTextureObjectInfo(std::shared_ptr Info); const clang::tooling::UnifiedPath FilePath; unsigned Offset = 0; unsigned ExtraArgLoc = 0; - std::shared_ptr FuncInfo; + std::vector> FuncInfo; std::vector TemplateArgs; // diff --git a/clang/lib/DPCT/CUBAPIMigration.cpp b/clang/lib/DPCT/CUBAPIMigration.cpp index c361befaf209..70d88ed3f85a 100644 --- a/clang/lib/DPCT/CUBAPIMigration.cpp +++ b/clang/lib/DPCT/CUBAPIMigration.cpp @@ -21,6 +21,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/OperationKinds.h" +#include "clang/AST/PrettyPrinter.h" #include "clang/AST/Stmt.h" #include "clang/AST/Type.h" #include "clang/AST/TypeLoc.h" @@ -39,6 +40,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" +#include #include #include #include @@ -862,6 +864,27 @@ void CubRule::processThreadLevelFuncCall(const CallExpr *CE, } } +static std::string GetFunctionName(const CallExpr *CE) { + std::string s; + llvm::raw_string_ostream OS(s); + if (isa(CE)) { + CE->getDirectCallee()->getNameForDiagnostic( + OS, DpctGlobalInfo::getContext().getLangOpts(), /*Qualified=*/true); + } else { + OS << "cub::" << CE->getDirectCallee()->getName(); + } + + OS << '('; + for (unsigned I = 0, E = CE->getNumArgs(); I != E; ++I) { + auto *Arg = CE->getArg(I); + Arg->getType().print(OS, DpctGlobalInfo::getContext().getLangOpts()); + if (I < E - 1) + OS << ", "; + } + OS << ')'; + return s; +} + void CubRule::processWarpLevelFuncCall(const CallExpr *CE, bool FuncCallUsed) { std::string Repl; size_t WarpSize = 32; @@ -894,7 +917,7 @@ void CubRule::processWarpLevelFuncCall(const CallExpr *CE, bool FuncCallUsed) { } } else { report(CE->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, - "cub::" + FuncName); + GetFunctionName(CE)); } } } @@ -971,7 +994,7 @@ void CubRule::processBlockLevelMemberCall(const CXXMemberCallExpr *BlockMC) { IsReferenceOutput = true; } else { report(BlockMC->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, - "cub::" + FuncName); + GetFunctionName(BlockMC)); return; } } else { @@ -1016,7 +1039,7 @@ void CubRule::processBlockLevelMemberCall(const CXXMemberCallExpr *BlockMC) { HT_DPCT_DPL_Utils); } else { report(BlockMC->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, - "cub::" + FuncName); + GetFunctionName(BlockMC)); return; } } @@ -1044,7 +1067,7 @@ void CubRule::processBlockLevelMemberCall(const CXXMemberCallExpr *BlockMC) { ->getType() ->isLValueReferenceType()) { report(BlockMC->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, - "cub::" + FuncName); + GetFunctionName(BlockMC)); return; } GroupOrWorkitem = DpctGlobalInfo::getItem(BlockMC); @@ -1098,7 +1121,7 @@ void CubRule::processBlockLevelMemberCall(const CXXMemberCallExpr *BlockMC) { HT_DPCT_DPL_Utils); } else { report(BlockMC->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, - "cub::" + FuncName); + GetFunctionName(BlockMC)); return; } } @@ -1126,7 +1149,7 @@ void CubRule::processBlockLevelMemberCall(const CXXMemberCallExpr *BlockMC) { ->getType() ->isLValueReferenceType()) { report(BlockMC->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, - "cub::" + FuncName); + GetFunctionName(BlockMC)); return; } GroupOrWorkitem = DpctGlobalInfo::getItem(BlockMC); @@ -1140,7 +1163,7 @@ void CubRule::processBlockLevelMemberCall(const CXXMemberCallExpr *BlockMC) { } } else { report(BlockMC->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, - "cub::" + FuncName); + GetFunctionName(BlockMC)); return; } if (IsReferenceOutput) { @@ -1275,7 +1298,7 @@ void CubRule::processBlockLevelMemberCall(const CXXMemberCallExpr *BlockMC) { ValidItemParamIdx = 1; } else { report(BlockMC->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, - "cub::" + FuncName); + GetFunctionName(BlockMC)); return; } std::string In; @@ -1352,7 +1375,7 @@ void CubRule::processWarpLevelMemberCall(const CXXMemberCallExpr *WarpMC) { OpRepl = getOpRepl(FuncArgs[3]); } else { report(WarpMC->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, - "cub::" + FuncName); + GetFunctionName(WarpMC)); return; } NewFuncName = "exclusive_scan_over_group"; @@ -1367,7 +1390,7 @@ void CubRule::processWarpLevelMemberCall(const CXXMemberCallExpr *WarpMC) { NewFuncName = "inclusive_scan_over_group"; } else { report(WarpMC->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, - "cub::" + FuncName); + GetFunctionName(WarpMC)); return; } ExprAnalysis InEA(InData); @@ -1413,7 +1436,7 @@ void CubRule::processWarpLevelMemberCall(const CXXMemberCallExpr *WarpMC) { } default: report(WarpMC->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, - "cub::" + FuncName); + GetFunctionName(WarpMC)); return; } emplaceTransformation(new ReplaceStmt(WarpMC, Repl)); @@ -1439,7 +1462,7 @@ void CubRule::processWarpLevelMemberCall(const CXXMemberCallExpr *WarpMC) { ", " + OpRepl + ")"; } else { report(WarpMC->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, - "cub::" + FuncName); + GetFunctionName(WarpMC)); return; } emplaceTransformation(new ReplaceStmt(WarpMC, Repl)); diff --git a/clang/lib/DPCT/CallExprRewriter.h b/clang/lib/DPCT/CallExprRewriter.h index 9fecd3465d95..6bbcbfdaa676 100644 --- a/clang/lib/DPCT/CallExprRewriter.h +++ b/clang/lib/DPCT/CallExprRewriter.h @@ -690,6 +690,10 @@ template void printMemberOp(StreamT &Stream, bool IsArrow) { Stream << "."; } +template void printDisambiguator(StreamT &Stream) { + Stream << "template "; +} + template void printCapture(StreamT &Stream, bool IsCaptureRef) { if (IsCaptureRef) @@ -923,7 +927,7 @@ class ArgsPrinter template void printBase(StreamT &Stream, std::pair P, - bool IsArrow) { + bool IsArrow, bool NeedDisambiguator) { { std::unique_ptr> Paren; if (needExtraParensInMemberExpr(P.second)) @@ -931,10 +935,13 @@ void printBase(StreamT &Stream, std::pair P, print(Stream, P); } printMemberOp(Stream, IsArrow); + if (NeedDisambiguator) + printDisambiguator(Stream); } template -void printBase(StreamT &Stream, const Expr *E, bool IsArrow) { +void printBase(StreamT &Stream, const Expr *E, bool IsArrow, + bool NeedDisambiguator) { { std::unique_ptr> Paren; if (needExtraParensInMemberExpr(E)) @@ -942,15 +949,23 @@ void printBase(StreamT &Stream, const Expr *E, bool IsArrow) { print(Stream, E); } printMemberOp(Stream, IsArrow); + if (NeedDisambiguator) + printDisambiguator(Stream); } template -void printBase(StreamT &Stream, const DerefExpr &D, bool) { +void printBase(StreamT &Stream, const DerefExpr &D, bool, + bool NeedDisambiguator) { D.printMemberBase(Stream); + if (NeedDisambiguator) + printDisambiguator(Stream); } template -void printBase(StreamT &Stream, const T &Val, bool IsArrow) { +void printBase(StreamT &Stream, const T &Val, bool IsArrow, + bool NeedDisambiguator) { print(Stream, Val); printMemberOp(Stream, IsArrow); + if (NeedDisambiguator) + printDisambiguator(Stream); } template class CallExprPrinter { @@ -1009,17 +1024,24 @@ template class TypeNamePrinter { } }; -template class MemberExprPrinter { +template +class MemberExprPrinter { BaseT Base; bool IsArrow; MemberT MemberName; + bool IsBaseDependentType = false; public: MemberExprPrinter(const BaseT &Base, bool IsArrow, MemberT MemberName) - : Base(Base), IsArrow(IsArrow), MemberName(MemberName) {} + : Base(Base), IsArrow(IsArrow), MemberName(MemberName) { + if constexpr (std::is_same_v) { + IsBaseDependentType = Base->getType()->isDependentType(); + } + } template void print(StreamT &Stream) const { - printBase(Stream, Base, IsArrow); + printBase(Stream, Base, IsArrow, + HasExplicitTemplateArg && IsBaseDependentType); dpct::print(Stream, MemberName); } }; @@ -1038,15 +1060,20 @@ template class StaticMemberExprPrinter { } }; -template +template class MemberCallPrinter - : public CallExprPrinter, CallArgsT...> { + : public CallExprPrinter< + MemberExprPrinter, + CallArgsT...> { public: MemberCallPrinter(const BaseT &Base, bool IsArrow, MemberT MemberName, CallArgsT &&...Args) - : CallExprPrinter, CallArgsT...>( - MemberExprPrinter(std::move(Base), IsArrow, - std::move(MemberName)), + : CallExprPrinter< + MemberExprPrinter, + CallArgsT...>( + MemberExprPrinter( + std::move(Base), IsArrow, std::move(MemberName)), std::forward(Args)...) {} }; @@ -1409,32 +1436,35 @@ class TemplatedCallExprRewriter template class MemberExprRewriter - : public PrinterRewriter> { + : public PrinterRewriter> { public: MemberExprRewriter( const CallExpr *C, StringRef Source, const std::function &BaseCreator, bool IsArrow, const std::function &MemberCreator) - : PrinterRewriter>( + : PrinterRewriter>( C, Source, BaseCreator(C), IsArrow, MemberCreator(C)) {} }; -template +template class MemberCallExprRewriter - : public PrinterRewriter> { + : public PrinterRewriter> { public: MemberCallExprRewriter( const CallExpr *C, StringRef Source, const std::function &BaseCreator, bool IsArrow, StringRef Member, const std::function &...ArgsCreator) - : PrinterRewriter>( + : PrinterRewriter>( C, Source, BaseCreator(C), IsArrow, Member, ArgsCreator(C)...) {} MemberCallExprRewriter( const CallExpr *C, StringRef Source, const BaseT &BaseCreator, bool IsArrow, StringRef Member, const std::function &...ArgsCreator) - : PrinterRewriter>( + : PrinterRewriter>( C, Source, BaseCreator, IsArrow, Member, ArgsCreator(C)...) {} }; diff --git a/clang/lib/DPCT/CallExprRewriterCommon.h b/clang/lib/DPCT/CallExprRewriterCommon.h index c186e71ea472..caa98cf18cad 100644 --- a/clang/lib/DPCT/CallExprRewriterCommon.h +++ b/clang/lib/DPCT/CallExprRewriterCommon.h @@ -370,33 +370,32 @@ inline std::function makeDeviceStr() { }; } -template -using MemberCallPrinterCreator = - PrinterCreator, - std::function, bool, std::string, - std::function...>; - -template -inline std::function< - MemberCallPrinter(const CallExpr *)> +template +using MemberCallPrinterCreator = PrinterCreator< + MemberCallPrinter, + std::function, bool, std::string, + std::function...>; + +template +inline std::function(const CallExpr *)> makeMemberCallCreator(std::function BaseFunc, bool IsArrow, std::string Member, std::function... Args) { - return MemberCallPrinterCreator(BaseFunc, IsArrow, - Member, Args...); + return MemberCallPrinterCreator( + BaseFunc, IsArrow, Member, Args...); } -template +template inline std::function< - MemberCallPrinter(const CallExpr *)> + MemberCallPrinter(const CallExpr *)> makeMemberCallCreator(std::function BaseFunc, bool IsArrow, std::function Member) { - - return PrinterCreator, - std::function, bool, - std::function>(BaseFunc, IsArrow, - Member); + return PrinterCreator< + MemberCallPrinter, + std::function, bool, + std::function>(BaseFunc, IsArrow, Member); } template @@ -623,10 +622,10 @@ makeArgWithAddressSpaceCast(int ArgIdx) { } template -inline std::function(const CallExpr *)> +inline std::function(const CallExpr *)> makeMemberExprCreator(std::function Base, bool IsArrow, std::function Member) { - return PrinterCreator, + return PrinterCreator, std::function, bool, std::function>(Base, IsArrow, Member); @@ -1144,7 +1143,8 @@ createNewDeleteRewriterFactory( } template -inline std::shared_ptr createMemberExprRewriterFactory( +inline std::shared_ptr +createMemberExprRewriterFactory( const std::string &SourceName, std::function &&BaseCreator, bool IsArrow, std::function &&MemberCreator) { @@ -1217,7 +1217,7 @@ createTemplatedCallExprRewriterFactory( /// \p BaseCreator use to get base expr from original call expr. /// \p IsArrow the member operator is arrow or dot as default. /// \p ArgsCreator use to get call args from original call expr. -template +template inline std::shared_ptr createMemberCallExprRewriterFactory( const std::string &SourceName, @@ -1225,7 +1225,7 @@ createMemberCallExprRewriterFactory( std::string MemberName, std::function... ArgsCreator) { return std::make_shared, + MemberCallExprRewriter, std::function, bool, std::string, std::function...>>( SourceName, @@ -1234,15 +1234,15 @@ createMemberCallExprRewriterFactory( std::forward>(ArgsCreator)...); } -template +template inline std::shared_ptr createMemberCallExprRewriterFactory( const std::string &SourceName, BaseT BaseCreator, bool IsArrow, std::string MemberName, std::function... ArgsCreator) { return std::make_shared, BaseT, bool, std::string, - std::function...>>( + MemberCallExprRewriter, BaseT, + bool, std::string, std::function...>>( SourceName, BaseCreator, IsArrow, MemberName, std::forward>(ArgsCreator)...); } @@ -1507,19 +1507,19 @@ createBindTextureRewriterFactory(const std::string &Source) { return std::make_shared( makePointerChecker(StartIdx + 0), - createMemberCallExprRewriterFactory( + createMemberCallExprRewriterFactory( Source, makeDerefExprCreator(StartIdx + 0), true, "attach", makeCallArgCreator(StartIdx + 1), makeCallArgCreator(StartIdx + Idx + 1)..., makeDerefExprCreator(StartIdx + 2)), std::make_shared( TypeChecker, - createMemberCallExprRewriterFactory( + createMemberCallExprRewriterFactory( Source, makeCallArgCreatorWithCall(StartIdx + 0), false, "attach", makeCallArgCreatorWithCall(StartIdx + 1), makeCallArgCreatorWithCall(StartIdx + Idx + 1)..., makeCallArgCreatorWithCall(StartIdx + 2)), - createMemberCallExprRewriterFactory( + createMemberCallExprRewriterFactory( Source, makeCallArgCreatorWithCall(StartIdx + 0), false, "attach", makeCallArgCreatorWithCall(StartIdx + 1), makeCallArgCreatorWithCall(StartIdx + Idx)...))); @@ -2038,7 +2038,9 @@ const std::string MipmapNeedBindlessImage = #define UO(Op, E) makeUnaryOperatorCreator(E) #define BO(Op, L, R) makeBinaryOperatorCreator(L, R) #define PAREN(E) makeParenExprCreator(E) -#define MEMBER_CALL(...) makeMemberCallCreator(__VA_ARGS__) +#define MEMBER_CALL(...) makeMemberCallCreator(__VA_ARGS__) +#define MEMBER_CALL_HAS_EXPLICIT_TEMP_ARG(...) \ + makeMemberCallCreator(__VA_ARGS__) #define MEMBER_EXPR(...) makeMemberExprCreator(__VA_ARGS__) #define STATIC_MEMBER_EXPR(...) makeStaticMemberExprCreator(__VA_ARGS__) #define LAMBDA(...) makeLambdaCreator(__VA_ARGS__) @@ -2100,8 +2102,11 @@ const std::string MipmapNeedBindlessImage = #define CALL_FACTORY_ENTRY(FuncName, C) \ std::make_pair(FuncName, createCallExprRewriterFactory(FuncName, C)), #define MEMBER_CALL_FACTORY_ENTRY(FuncName, ...) \ - std::make_pair(FuncName, \ - createMemberCallExprRewriterFactory(FuncName, __VA_ARGS__)), + std::make_pair(FuncName, createMemberCallExprRewriterFactory( \ + FuncName, __VA_ARGS__)), +#define MEMBER_CALL_HAS_EXPLICIT_TEMP_ARG_FACTORY_ENTRY(FuncName, ...) \ + std::make_pair(FuncName, createMemberCallExprRewriterFactory( \ + FuncName, __VA_ARGS__)), #define ARRAYSUBSCRIPT_EXPR_FACTORY_ENTRY(FuncName, ...) \ std::make_pair(FuncName, createArraySubscriptExprRewriterFactory( \ FuncName, __VA_ARGS__)), diff --git a/clang/lib/DPCT/CallExprRewriterTexture.cpp b/clang/lib/DPCT/CallExprRewriterTexture.cpp index a3cc85149c8a..ad3645324d7e 100644 --- a/clang/lib/DPCT/CallExprRewriterTexture.cpp +++ b/clang/lib/DPCT/CallExprRewriterTexture.cpp @@ -22,7 +22,7 @@ class TextureReadRewriterFactory : public CallExprRewriterFactoryBase { std::shared_ptr createRewriter(const CallExpr *C, bool RetAssign, BaseT Base) const { const static std::string MemberName = "read"; - using ReaderPrinter = decltype(makeMemberCallCreator( + using ReaderPrinter = decltype(makeMemberCallCreator( std::declval>(), false, MemberName, makeCallArgCreatorWithCall(Idx)...)(C)); if (RetAssign) { diff --git a/clang/lib/DPCT/MemberExprRewriter.cpp b/clang/lib/DPCT/MemberExprRewriter.cpp index c31e27e40c98..b127c13a9c80 100644 --- a/clang/lib/DPCT/MemberExprRewriter.cpp +++ b/clang/lib/DPCT/MemberExprRewriter.cpp @@ -40,7 +40,7 @@ template class MEMemberExprPrinter { : Base(Base), IsArrow(IsArrow), MemberName(MemberName) {} template void print(StreamT &Stream) const { - printBase(Stream, Base, IsArrow); + printBase(Stream, Base, IsArrow, false); dpct::print(Stream, MemberName); } }; diff --git a/clang/lib/DPCT/Rewriters/Math/RewriterHalfPrecisionConversionAndDataMovement.cpp b/clang/lib/DPCT/Rewriters/Math/RewriterHalfPrecisionConversionAndDataMovement.cpp index c28986414cec..e1f86eb4c88f 100644 --- a/clang/lib/DPCT/Rewriters/Math/RewriterHalfPrecisionConversionAndDataMovement.cpp +++ b/clang/lib/DPCT/Rewriters/Math/RewriterHalfPrecisionConversionAndDataMovement.cpp @@ -30,10 +30,10 @@ RewriterMap dpct::createHalfPrecisionConversionAndDataMovementRewriterMap() { CALL(MapNames::getClNamespace() + "ext::intel::math::float2half_rn", ARRAY_SUBSCRIPT(ARG(0), LITERAL("1")))))), - MEMBER_CALL_FACTORY_ENTRY("__float22half2_rn", ARG(0), false, - "convert<" + MapNames::getClNamespace() + - "half, " + MapNames::getClNamespace() + - "rounding_mode::rte>")) + MEMBER_CALL_HAS_EXPLICIT_TEMP_ARG_FACTORY_ENTRY( + "__float22half2_rn", ARG(0), false, + "convert<" + MapNames::getClNamespace() + "half, " + + MapNames::getClNamespace() + "rounding_mode::rte>")) // __float2half CONDITIONAL_FACTORY_ENTRY( math::UseIntelDeviceMath, @@ -169,10 +169,10 @@ RewriterMap dpct::createHalfPrecisionConversionAndDataMovementRewriterMap() { CALL(MapNames::getClNamespace() + "ext::intel::math::half2float", ARRAY_SUBSCRIPT(ARG(0), LITERAL("1")))))), - MEMBER_CALL_FACTORY_ENTRY("__half22float2", ARG(0), false, - "convert")) + MEMBER_CALL_HAS_EXPLICIT_TEMP_ARG_FACTORY_ENTRY( + "__half22float2", ARG(0), false, + "convert")) // __half2float CONDITIONAL_FACTORY_ENTRY( math::UseIntelDeviceMath, diff --git a/clang/runtime/dpct-rt/include/dpct/graph.hpp b/clang/runtime/dpct-rt/include/dpct/graph.hpp index 1830bf22a806..29371c383d29 100644 --- a/clang/runtime/dpct-rt/include/dpct/graph.hpp +++ b/clang/runtime/dpct-rt/include/dpct/graph.hpp @@ -22,6 +22,8 @@ typedef sycl::ext::oneapi::experimental::command_graph< sycl::ext::oneapi::experimental::graph_state::executable> *command_graph_exec_ptr; +typedef sycl::ext::oneapi::experimental::node *node_ptr; + namespace detail { class graph_mgr { public: @@ -82,12 +84,57 @@ static inline bool begin_recording(sycl::queue *queue_ptr) { /// were recorded. /// \param [out] graph A pointer to a command_graph_ptr pointer where the /// command graph will be assigned. -/// \returns `true` if the recording is successfully ended and the graph is -/// assigned. +/// \returns `true` if the recording is successfully ended and the +/// graph is assigned. static inline bool end_recording(sycl::queue *queue_ptr, dpct::experimental::command_graph_ptr *graph) { return detail::graph_mgr::instance().end_recording(queue_ptr, graph); } +/// Adds an empty node to the command graph with optional +/// dependencies. +/// \param [out] newNode A pointer to the node_ptr that will be +/// added to the graph. +/// \param [in] graph A pointer to the command graph. +/// \param [in] dependenciesArray An array of node pointers +/// representing the dependencies of the new node. +/// \param [in] numberOfDependencies The number of dependencies in +/// the dependenciesArray. +static void +add_empty_node(dpct::experimental::node_ptr *newNode, + dpct::experimental::command_graph_ptr graph, + const dpct::experimental::node_ptr *dependenciesArray, + std::size_t numberOfDependencies) { + if (numberOfDependencies == 0) { + *newNode = new sycl::ext::oneapi::experimental::node(graph->add()); + return; + } + std::vector dependencies; + for (std::size_t i = 0; i < numberOfDependencies; i++) { + dependencies.push_back(*dependenciesArray[i]); + } + *newNode = + new sycl::ext::oneapi::experimental::node(graph->add(sycl::property_list{ + sycl::ext::oneapi::experimental::property::node::depends_on( + dependencies)})); +} + +/// Adds dependencies between nodes in the command graph. +/// \param [in] graph A pointer to the command graph. +/// \param [in] fromNodes An array of node pointers representing +/// the source nodes. +/// \param [in] toNodes An array of node pointers representing +/// the destination nodes. +/// \param [in] numberOfDependencies The number of dependencies +/// to be added. +static void add_dependencies(dpct::experimental::command_graph_ptr graph, + const dpct::experimental::node_ptr *fromNodes, + const dpct::experimental::node_ptr *toNodes, + std::size_t numberOfDependencies) { + for (std::size_t i = 0; i < numberOfDependencies; i++) { + graph->make_edge(*fromNodes[i], *toNodes[i]); + } +} + } // namespace experimental } // namespace dpct diff --git a/clang/test/dpct/cub/warplevel/shuffle.cu b/clang/test/dpct/cub/warplevel/shuffle.cu index d933daa071ee..8a09e40d2a03 100644 --- a/clang/test/dpct/cub/warplevel/shuffle.cu +++ b/clang/test/dpct/cub/warplevel/shuffle.cu @@ -52,7 +52,7 @@ __global__ void ShuffleIndexKernel2(int* data) { int input = data[threadid]; int output = 0; // CHECK: /* -// CHECK-NEXT: DPCT1007:{{[0-9]+}}: Migration of cub::ShuffleIndex is not supported. +// CHECK-NEXT: DPCT1007:{{[0-9]+}}: Migration of cub::ShuffleIndex(int, int, unsigned int) is not supported. // CHECK-NEXT: */ // CHECK-NEXT: output = cub::ShuffleIndex<32>(input, 0, 0xaaaaaaaa); output = cub::ShuffleIndex<32>(input, 0, 0xaaaaaaaa); diff --git a/clang/test/dpct/cudaGraph_test.cu b/clang/test/dpct/cudaGraph_test.cu index f3c0dd057647..71fca9a800f1 100644 --- a/clang/test/dpct/cudaGraph_test.cu +++ b/clang/test/dpct/cudaGraph_test.cu @@ -2,7 +2,7 @@ // UNSUPPORTED: v8.0, v9.0, v9.1, v9.2 // RUN: dpct --use-experimental-features=graph --format-range=none -out-root %T/cudaGraph_test %s --cuda-include-path="%cuda-path/include" -- -x cuda --cuda-host-only --std=c++14 // RUN: FileCheck --input-file %T/cudaGraph_test/cudaGraph_test.dp.cpp --match-full-lines %s -// RUN: %if build_lit %{icpx -c -fsycl %T/cudaGraph_test/cudaGraph_test.dp.cpp -o %T/cudaGraph_test/cudaGraph_test.dp.o %} +// RUN: %if build_lit %{icpx -c -DBUILD_TEST -fsycl %T/cudaGraph_test/cudaGraph_test.dp.cpp -o %T/cudaGraph_test/cudaGraph_test.dp.o %} #include @@ -33,6 +33,43 @@ int main() { // CHECK: dpct::experimental::command_graph_exec_ptr execGraph5, *execGraph6, **execGraph7; cudaGraphExec_t execGraph5, *execGraph6, **execGraph7; + // CHECK: dpct::experimental::node_ptr node; + // CHECK-NEXT: dpct::experimental::node_ptr *node2; + // CHECK-NEXT: dpct::experimental::node_ptr **node3; + cudaGraphNode_t node; + cudaGraphNode_t *node2; + cudaGraphNode_t **node3; + + // CHECK: dpct::experimental::node_ptr node4[10]; + // CHECK-NEXT: dpct::experimental::node_ptr node5[10]; + cudaGraphNode_t node4[10]; + cudaGraphNode_t node5[10]; + + // CHECK: dpct::experimental::node_ptr node6, *node7, **node8; + cudaGraphNode_t node6, *node7, **node8; + + // CHECK: const dpct::experimental::node_ptr node9 = nullptr; + // CHECK-NEXT: const dpct::experimental::node_ptr node10[1] = {node}; + // CHECK-NEXT: const dpct::experimental::node_ptr node11[1] = {*node2}; + const cudaGraphNode_t node9 = nullptr; + const cudaGraphNode_t node10[1] = {node}; + const cudaGraphNode_t node11[1] = {*node2}; + + // CHECK: dpct::experimental::add_empty_node(&node, graph, node4, 10); + cudaGraphAddEmptyNode(&node, graph, node4, 10); + + // CHECK: dpct::experimental::add_empty_node(node2, *graph2, NULL, 0); + cudaGraphAddEmptyNode(node2, *graph2, NULL, 0); + + // CHECK: dpct::experimental::add_empty_node(&node, graph, node10, 1); + cudaGraphAddEmptyNode(&node, graph, node10, 1); + + // CHECK: dpct::experimental::add_dependencies(graph, node4, node5, 10); + cudaGraphAddDependencies(graph, node4, node5, 10); + + // CHECK: dpct::experimental::add_dependencies(graph, node10, node11, 1); + cudaGraphAddDependencies(graph, node10, node11, 1); + // CHECK: execGraph = new sycl::ext::oneapi::experimental::command_graph((*graph2)->finalize()); // CHECK-NEXT: *execGraph2 = new sycl::ext::oneapi::experimental::command_graph(graph->finalize()); // CHECK-NEXT: **execGraph3 = new sycl::ext::oneapi::experimental::command_graph((*graph2)->finalize()); @@ -50,6 +87,13 @@ int main() { cudaGraphLaunch(execGraph, stream); cudaGraphLaunch(*execGraph2, *stream2); +#ifndef DBUILD_TEST + + // CHECK: execGraph->update(*graph); + cudaGraphExecUpdate(execGraph, graph, nullptr, nullptr); + +#endif + // CHECK: delete (execGraph); // CHECK-NEXT: delete (*execGraph2); // CHECK-NEXT: delete (**execGraph3); diff --git a/clang/test/dpct/cudaGraph_test_default_option.cu b/clang/test/dpct/cudaGraph_test_default_option.cu index 06d5fc101b63..0249cd067a9c 100644 --- a/clang/test/dpct/cudaGraph_test_default_option.cu +++ b/clang/test/dpct/cudaGraph_test_default_option.cu @@ -25,12 +25,27 @@ int main() { // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaStreamBeginCapture is not supported, please try to remigrate with option: --use-experimental-features=graph. // CHECK-NEXT: */ cudaStreamBeginCapture(stream, cudaStreamCaptureModeGlobal); - + // CHECK: /* // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaStreamEndCapture is not supported, please try to remigrate with option: --use-experimental-features=graph. // CHECK-NEXT: */ cudaStreamEndCapture(stream, &graph); + // CHECK: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphNode_t is not supported, please try to remigrate with option: --use-experimental-features=graph. + // CHECK-NEXT: */ + cudaGraphNode_t node; + + // CHECK: /* + // CHECK: DPCT1119:{{[0-9]+}}: Migration of cudaGraphAddEmptyNode is not supported, please try to remigrate with option: --use-experimental-features=graph. + // CHECK-NEXT: */ + cudaGraphAddEmptyNode(&node, graph, NULL, 0); + + // CHECK: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphAddDependencies is not supported, please try to remigrate with option: --use-experimental-features=graph. + // CHECK-NEXT: */ + cudaGraphAddDependencies(graph, NULL, NULL, 0); + // CHECK: /* // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphInstantiate is not supported, please try to remigrate with option: --use-experimental-features=graph. // CHECK-NEXT: */ @@ -41,6 +56,16 @@ int main() { // CHECK-NEXT: */ cudaGraphLaunch(execGraph, stream); + // CHECK: /* + // CHECK-NEXT: DPCT1007:{{[0-9]+}}: Migration of cudaGraphExecUpdateResult is not supported. + // CHECK-NEXT: */ + cudaGraphExecUpdateResult status; + + // CHECK: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphExecUpdate is not supported, please try to remigrate with option: --use-experimental-features=graph. + // CHECK-NEXT: */ + cudaGraphExecUpdate(execGraph, graph, nullptr, &status); + // CHECK: /* // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaGraphExecDestroy is not supported, please try to remigrate with option: --use-experimental-features=graph. // CHECK-NEXT: */ diff --git a/clang/test/dpct/double2_overloaded_operator.cu b/clang/test/dpct/double2_overloaded_operator.cu index 0accaeb1dfa0..6f4cb811a0e0 100644 --- a/clang/test/dpct/double2_overloaded_operator.cu +++ b/clang/test/dpct/double2_overloaded_operator.cu @@ -520,4 +520,41 @@ void foo(){ int2 i2; A2 a; a - i2; -} \ No newline at end of file +} + +inline __device__ float2 operator+(const float2 & a, const float2 & b) { + return {a.x + b.x, a.y + b.y}; +} + +// CHECK: template +// CHECK: struct Sum { +// CHECK: inline Sum() {} +// CHECK: inline T operator()(const T &a, const T &b) const { +// CHECK: return a + b; +// CHECK: } +// CHECK: }; +template +struct Sum { + inline __device__ Sum() {} + inline __device__ T operator()(const T &a, const T &b) const { + return a + b; + } +}; + +// CHECK: template +// CHECK: void bar() { +// CHECK: T a, b, c; +// CHECK: c = a + b; +// CHECK: } +template +__device__ void bar() { + T a, b, c; + c = a + b; +} + +__global__ void kernel() { + bar(); + bar(); + Sum a; + Sum b; +} diff --git a/clang/test/dpct/math/half/half.cu b/clang/test/dpct/math/half/half.cu index 3b2f30c6959c..084ee4409bd7 100644 --- a/clang/test/dpct/math/half/half.cu +++ b/clang/test/dpct/math/half/half.cu @@ -165,4 +165,15 @@ __global__ void kernelFuncHalfConversion() { h = __ushort_as_half(us); } +template +void __global__ kernel(){ + int idx = threadIdx.x; + half2 local_val1[BS]; + float2 local_val2[BS]; + // CHECK: sycl::float2 tmp_float2 = local_val1[idx].template convert(); + // CHECK-NEXT: sycl::half2 tmp_half2 = local_val2[idx].template convert(); + float2 tmp_float2 = __half22float2(local_val1[idx]); + half2 tmp_half2 = __float22half2_rn(local_val2[idx]); +} + int main() { return 0; } diff --git a/clang/test/dpct/template-kernel-call-cuda12.0-not-support.cu b/clang/test/dpct/template-kernel-call-cuda12.0-not-support.cu index 324cf5498f1c..c40f904cf55c 100644 --- a/clang/test/dpct/template-kernel-call-cuda12.0-not-support.cu +++ b/clang/test/dpct/template-kernel-call-cuda12.0-not-support.cu @@ -26,18 +26,14 @@ struct texReader_dp { } }; -// CHECK: template -// CHECK-NEXT: void compute_lj_force(const sycl::nd_item<3> &item_ct1, -// CHECK-NEXT: dpct::image_accessor_ext posTexture_dp) template __global__ void compute_lj_force() { int idx = blockIdx.x*blockDim.x + threadIdx.x; texReader positionTexReader; // CHECK: /* - // CHECK-NEXT: DPCT1084:{{[0-9]+}}: The function call "texReader_sp::operator()" has multiple migration results in different template instantiations that could not be unified. You may need to adjust the code. - // CHECK-NEXT: */ - // CHECK-NEXT: float j = positionTexReader(idx, posTexture_dp).x(); + // CHECK: DPCT1084:{{[0-9]+}}: The function call "texReader_sp::operator()" has multiple migration results in different template instantiations that could not be unified. You may need to adjust the code. + // CHECK: */ float j = positionTexReader(idx).x; } diff --git a/clang/test/dpct/template-kernel-call.cu b/clang/test/dpct/template-kernel-call.cu index 6f9d936f916b..b15279e6789f 100644 --- a/clang/test/dpct/template-kernel-call.cu +++ b/clang/test/dpct/template-kernel-call.cu @@ -565,3 +565,29 @@ void test_host() { // CHECK-NEXT: }); test_kernel<<>>(); } + +template class A { +public: + __device__ void foo() { int i = threadIdx.x; }; + __device__ void f1() { foo(); } +}; + +template class A { +public: + __device__ void f1() { foo(); } + __device__ void foo(){}; +}; + +template __global__ void kernel2() { + A a; +//CHECK: /* +//CHECK: DPCT1084:{{[0-9]+}}: The function call "A::f1" has multiple migration results in different template instantiations that could not be unified. You may need to adjust the code. +//CHECK: */ + a.f1(); +} + +int main() { + kernel2<<<1, 1>>>(); + kernel2<<<1, 1>>>(); + return 0; +}