diff --git a/.github/workflows/sycl-linux-precommit.yml b/.github/workflows/sycl-linux-precommit.yml index 6d578580a13db..79beaaeb274be 100644 --- a/.github/workflows/sycl-linux-precommit.yml +++ b/.github/workflows/sycl-linux-precommit.yml @@ -242,7 +242,6 @@ jobs: env: '{"LIT_FILTER":"PerformanceTests/"}' extra_lit_opts: -a -j 1 --param enable-perf-tests=True - extra_cmake_args: ${{ matrix.extra_cmake_args }} ref: ${{ github.sha }} merge_ref: '' diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml index 3e4066fdc1904..6fafe298584e8 100644 --- a/.github/workflows/sycl-linux-run-tests.yml +++ b/.github/workflows/sycl-linux-run-tests.yml @@ -127,12 +127,9 @@ on: - '["cts-cpu"]' - '["Linux", "build"]' image: - description: | - Use option ending with ":build" for AMDGPU, ":latest" for the rest. type: choice options: - - 'ghcr.io/intel/llvm/sycl_ubuntu2204_nightly:latest' - - 'ghcr.io/intel/llvm/sycl_ubuntu2204_nightly:build' + - 'ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest' image_options: description: | Use option with "--device=/dev/kfd" for AMDGPU, without it for the rest. @@ -361,7 +358,9 @@ jobs: cts_exclude_filter="" # If CTS_TESTS_TO_BUILD is null - use filter if [ -z "$CTS_TESTS_TO_BUILD" ]; then - if [ "${{ contains(inputs.target_devices, 'opencl:cpu') }}" = "true" ]; then + if [ "${{ contains(inputs.cts_testing_mode, 'build-only') }}" = "true" ]; then + cts_exclude_filter=$PWD/devops/cts_exclude_filter_compfails + elif [ "${{ contains(inputs.target_devices, 'opencl:cpu') }}" = "true" ]; then cts_exclude_filter=$PWD/devops/cts_exclude_filter_OCL_CPU elif [ "${{ contains(inputs.target_devices, 'level_zero:gpu') }}" = "true" ]; then cts_exclude_filter=$PWD/devops/cts_exclude_filter_L0_GPU diff --git a/.github/workflows/sycl-nightly.yml b/.github/workflows/sycl-nightly.yml index 0c3ff68b27efd..01831b428088c 100644 --- a/.github/workflows/sycl-nightly.yml +++ b/.github/workflows/sycl-nightly.yml @@ -242,7 +242,7 @@ jobs: body: "Daily build ${{ steps.tag.outputs.TAG }}" target_commitish: ${{ github.sha }} - ubuntu2204_docker_build_push: + docker_build_push: if: github.repository == 'intel/llvm' runs-on: [Linux, build] permissions: @@ -254,42 +254,16 @@ jobs: with: name: sycl_linux_default path: devops/ - - name: Build and Push Container (with drivers) + - name: Build and Push Container uses: ./devops/actions/build_container with: push: ${{ github.ref_name == 'sycl' }} - file: ubuntu2204_preinstalled + file: nightly username: ${{ github.repository_owner }} password: ${{ secrets.GITHUB_TOKEN }} build-args: | base_image=ghcr.io/intel/llvm/ubuntu2404_intel_drivers - base_tag=latest + base_tag=alldeps tags: | - ghcr.io/${{ github.repository }}/sycl_ubuntu2204_nightly:${{ github.sha }} - ghcr.io/${{ github.repository }}/sycl_ubuntu2204_nightly:latest - - name: Build and Push Container (no drivers) - uses: ./devops/actions/build_container - with: - push: ${{ github.ref_name == 'sycl' }} - file: ubuntu2204_preinstalled - username: ${{ github.repository_owner }} - password: ${{ secrets.GITHUB_TOKEN }} - build-args: | - base_image=ghcr.io/intel/llvm/ubuntu2204_base - base_tag=latest - tags: | - ghcr.io/${{ github.repository }}/sycl_ubuntu2204_nightly:no-drivers-${{ github.sha }} - ghcr.io/${{ github.repository }}/sycl_ubuntu2204_nightly:no-drivers - - name: Build and Push Container (Build image) - uses: ./devops/actions/build_container - with: - push: ${{ github.ref_name == 'sycl' }} - file: ubuntu2204_preinstalled - username: ${{ github.repository_owner }} - password: ${{ secrets.GITHUB_TOKEN }} - build-args: | - base_image=ghcr.io/intel/llvm/ubuntu2204_build - base_tag=latest - tags: | - ghcr.io/${{ github.repository }}/sycl_ubuntu2204_nightly:build-${{ github.sha }} - ghcr.io/${{ github.repository }}/sycl_ubuntu2204_nightly:build + ghcr.io/${{ github.repository }}/sycl_ubuntu2404_nightly:${{ github.sha }} + ghcr.io/${{ github.repository }}/sycl_ubuntu2404_nightly:latest diff --git a/devops/containers/ubuntu2204_preinstalled.Dockerfile b/devops/containers/nightly.Dockerfile similarity index 79% rename from devops/containers/ubuntu2204_preinstalled.Dockerfile rename to devops/containers/nightly.Dockerfile index 18ec7de01acb3..4c62f2668b2b8 100644 --- a/devops/containers/ubuntu2204_preinstalled.Dockerfile +++ b/devops/containers/nightly.Dockerfile @@ -1,5 +1,5 @@ -ARG base_tag=latest -ARG base_image=ghcr.io/intel/llvm/ubuntu2204_intel_drivers +ARG base_tag=alldeps +ARG base_image=ghcr.io/intel/llvm/ubuntu2404_intel_drivers FROM $base_image:$base_tag diff --git a/devops/cts_exclude_filter_compfails b/devops/cts_exclude_filter_compfails new file mode 100644 index 0000000000000..b8b4bb2843ded --- /dev/null +++ b/devops/cts_exclude_filter_compfails @@ -0,0 +1,4 @@ +# Please use "#" to add comments here. +# Do not delete the file even if it's empty. +# See https://github.com/intel/llvm/pull/16615 +handler diff --git a/devops/dependencies.json b/devops/dependencies.json index 79892387df4c1..3d42a52a2f497 100644 --- a/devops/dependencies.json +++ b/devops/dependencies.json @@ -8,7 +8,7 @@ }, "igc": { "github_tag": "v2.5.6", - "version": "2.5.6", + "version": "v2.5.6", "url": "https://github.com/intel/intel-graphics-compiler/releases/tag/v2.5.6", "root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu" }, diff --git a/libdevice/nativecpu_utils.cpp b/libdevice/nativecpu_utils.cpp index c3e8bb61657a7..eb5c3ff2ebdf4 100644 --- a/libdevice/nativecpu_utils.cpp +++ b/libdevice/nativecpu_utils.cpp @@ -31,16 +31,7 @@ using __nativecpu_state = native_cpu::state; #define OCL_LOCAL __attribute__((opencl_local)) #define OCL_GLOBAL __attribute__((opencl_global)) - -DEVICE_EXTERNAL OCL_LOCAL void * -__spirv_GenericCastToPtrExplicit_ToLocal(void *p, int) { - return (OCL_LOCAL void *)p; -} - -DEVICE_EXTERNAL OCL_GLOBAL void * -__spirv_GenericCastToPtrExplicit_ToGlobal(void *p, int) { - return (OCL_GLOBAL void *)p; -} +#define OCL_PRIVATE __attribute__((opencl_private)) DEVICE_EXTERN_C void __mux_work_group_barrier(uint32_t id, uint32_t scope, uint32_t semantics); @@ -61,6 +52,23 @@ __spirv_MemoryBarrier(uint32_t Memory, uint32_t Semantics) { // Turning clang format off here because it reorders macro invocations // making the following code very difficult to read. // clang-format off + +#define DefGenericCastToPtrExplImpl(sfx, asp, cv)\ +DEVICE_EXTERNAL cv asp void *\ +__spirv_GenericCastToPtrExplicit_##sfx(cv void *p ,int) {\ + return (cv asp void *)p;\ +} + +#define DefGenericCastToPtrExpl(sfx, asp)\ + DefGenericCastToPtrExplImpl(sfx, asp, )\ + DefGenericCastToPtrExplImpl(sfx, asp, const)\ + DefGenericCastToPtrExplImpl(sfx, asp, volatile)\ + DefGenericCastToPtrExplImpl(sfx, asp, const volatile) + +DefGenericCastToPtrExpl(ToPrivate, OCL_PRIVATE) +DefGenericCastToPtrExpl(ToLocal, OCL_LOCAL) +DefGenericCastToPtrExpl(ToGlobal, OCL_GLOBAL) + #define DefSubgroupBlockINTEL1(Type, PType) \ template <> \ __SYCL_CONVERGENT__ DEVICE_EXTERNAL Type \ diff --git a/llvm/docs/requirements-hashed.txt b/llvm/docs/requirements-hashed.txt index 08fe7f3573605..636aa7d5ed25c 100644 --- a/llvm/docs/requirements-hashed.txt +++ b/llvm/docs/requirements-hashed.txt @@ -151,7 +151,7 @@ imagesize==1.4.1 \ --hash=sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b \ --hash=sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a # via sphinx -jinja2==3.1.4 \ +jinja2==3.1.5 \ --hash=sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369 \ --hash=sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d # via diff --git a/llvm/lib/SYCLLowerIR/ESIMD/ESIMDOptimizeVecArgCallConv.cpp b/llvm/lib/SYCLLowerIR/ESIMD/ESIMDOptimizeVecArgCallConv.cpp index 806c0c20cb01b..4c20a879a5b64 100644 --- a/llvm/lib/SYCLLowerIR/ESIMD/ESIMDOptimizeVecArgCallConv.cpp +++ b/llvm/lib/SYCLLowerIR/ESIMD/ESIMDOptimizeVecArgCallConv.cpp @@ -354,9 +354,8 @@ optimizeFunction(Function *OldF, // preserve data flow equality to the original. unsigned OldArgNo = PI.getFormalParam().getArgNo(); unsigned NewArgNo = oldArgNo2NewArgNo(OldArgNo, SretInd); - Instruction *At = nullptr; Value *Val = NewF->getArg(NewArgNo); - StoreInst *St = new StoreInst(Val, Alloca, false, Al, At); + StoreInst *St = new StoreInst(Val, Alloca, false, Al); NewInsts.push_back(St); } } diff --git a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp index 09101f0df7207..0ec014c702628 100644 --- a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp +++ b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp @@ -1239,7 +1239,7 @@ static Instruction *addCastInstIfNeeded(Instruction *OldI, Instruction *NewI, if (OITy != NITy) { auto CastOpcode = CastInst::getCastOpcode(NewI, false, OITy, false); NewI = CastInst::Create(CastOpcode, NewI, OITy, - NewI->getName() + ".cast.ty", OldI); + NewI->getName() + ".cast.ty", OldI->getIterator()); NewI->setDebugLoc(OldI->getDebugLoc()); } return NewI; @@ -1565,7 +1565,7 @@ static void translateESIMDIntrinsicCall(CallInst &CI) { CallInst *NewCI = IntrinsicInst::Create( NewFDecl, GenXArgs, NewFDecl->getReturnType()->isVoidTy() ? "" : CI.getName() + ".esimd", - &CI); + CI.getIterator()); NewCI->setDebugLoc(CI.getDebugLoc()); if (DoesFunctionReturnStructure) { IRBuilder<> Builder(&CI); diff --git a/llvm/lib/SYCLLowerIR/GlobalOffset.cpp b/llvm/lib/SYCLLowerIR/GlobalOffset.cpp index 3873b2f8837e5..67c75b49a4881 100644 --- a/llvm/lib/SYCLLowerIR/GlobalOffset.cpp +++ b/llvm/lib/SYCLLowerIR/GlobalOffset.cpp @@ -250,7 +250,7 @@ void GlobalOffsetPass::addImplicitParameterToCallers( /* Func= */ CalleeWithImplicitParam, /* Args= */ ImplicitOffsets, /* NameStr= */ Twine(), - /* InsertBefore= */ CallToOld); + /* InsertBefore= */ CallToOld->getIterator()); NewCallInst->setTailCallKind(CallToOld->getTailCallKind()); NewCallInst->copyMetadata(*CallToOld); CallToOld->replaceAllUsesWith(NewCallInst); diff --git a/llvm/lib/SYCLLowerIR/LocalAccessorToSharedMemory.cpp b/llvm/lib/SYCLLowerIR/LocalAccessorToSharedMemory.cpp index 220e04de41157..0d30d2725b43e 100644 --- a/llvm/lib/SYCLLowerIR/LocalAccessorToSharedMemory.cpp +++ b/llvm/lib/SYCLLowerIR/LocalAccessorToSharedMemory.cpp @@ -177,7 +177,7 @@ Function *LocalAccessorToSharedMemoryPass::processKernel(Module &M, ConstantInt::get(Type::getInt32Ty(M.getContext()), 0, false), NFA, }, - /* NameStr= */ Twine{NFA->getName()}, InsertBefore); + /* NameStr= */ Twine{NFA->getName()}, InsertBefore->getIterator()); // Then create a bitcast to make sure the new pointer is the same type // as the old one. This will only ever be a `i8 addrspace(3)*` to `i32 // addrspace(3)*` type of cast. diff --git a/llvm/lib/SYCLLowerIR/LowerInvokeSimd.cpp b/llvm/lib/SYCLLowerIR/LowerInvokeSimd.cpp index 8d72f8466335f..e118ecb0152b6 100644 --- a/llvm/lib/SYCLLowerIR/LowerInvokeSimd.cpp +++ b/llvm/lib/SYCLLowerIR/LowerInvokeSimd.cpp @@ -461,8 +461,8 @@ bool processInvokeSimdCall(CallInst *InvokeSimd, NewInvokeSimdArgs.push_back(NewHelper); auto ThirdArg = std::next(InvokeSimd->arg_begin(), 2); NewInvokeSimdArgs.append(ThirdArg, InvokeSimd->arg_end()); - CallInst *NewInvokeSimd = - CallInst::Create(NewInvokeSimdF, NewInvokeSimdArgs, "", InvokeSimd); + CallInst *NewInvokeSimd = CallInst::Create( + NewInvokeSimdF, NewInvokeSimdArgs, "", InvokeSimd->getIterator()); // - transfer flags, attributes (with shrinking), calling convention: NewInvokeSimd->copyIRFlags(InvokeSimd); NewInvokeSimd->setCallingConv(InvokeSimd->getCallingConv()); diff --git a/llvm/lib/SYCLLowerIR/LowerWGScope.cpp b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp index b087221e5f37a..815f5cfd248a6 100644 --- a/llvm/lib/SYCLLowerIR/LowerWGScope.cpp +++ b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp @@ -981,7 +981,7 @@ Value *spirv::genPseudoLocalID(Instruction &Before, const Triple &TT) { Align Alignment = M.getDataLayout().getPreferredAlign(G); G->setAlignment(MaybeAlign(Alignment)); } - Value *Res = new LoadInst(G->getValueType(), G, "", &Before); + Value *Res = new LoadInst(G->getValueType(), G, "", Before.getIterator()); return Res; } } diff --git a/llvm/lib/SYCLLowerIR/SYCLJointMatrixTransform.cpp b/llvm/lib/SYCLLowerIR/SYCLJointMatrixTransform.cpp index c5c03b2ae1c16..1a39c994b1ede 100644 --- a/llvm/lib/SYCLLowerIR/SYCLJointMatrixTransform.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLJointMatrixTransform.cpp @@ -21,6 +21,7 @@ namespace { static constexpr char ACCESS_CHAIN[] = "_Z19__spirv_AccessChain"; static constexpr char MATRIX_TYPE[] = "spirv.CooperativeMatrixKHR"; +static constexpr char MATRIX_LAYOUT[] = "joint_matrix_layout_to_spv"; Type *getInnermostType(Type *Ty) { while (auto *ArrayTy = dyn_cast(Ty)) @@ -184,17 +185,99 @@ bool transformAccessChain(Function *F) { } return ModuleChanged; } + +StoreInst *findLastStoreBeforeLoad(Value *Ptr, Instruction *Load) { + BasicBlock::iterator It(Load); + while (It != Load->getParent()->begin()) { + --It; + if (auto *Store = dyn_cast(&*It)) + if (Store->getPointerOperand() == Ptr) + return Store; + } + return nullptr; +} + +// Per SPIR-V specification Layout of a matrix must be a constant instruction +// aka a constexpr or specialization constant. Meanwhile in SYCL headers +// layout is passed as a parameter to joint_matrix_load function, so even if +// that layout is a constant expression in the user's code - it's not possible +// to prove that to the compiler, so constant propagation will happen only +// after inlining, not in AST. That means, that with O0 layout would remain +// to be a runtime variable in LLVM IR. +// SYCL matrix layout is being mapped on SPIR-V matrix layout by +// joint_matrix_layout_to_spv function. The following routine finds calls to +// this function and replaces them with the found constant. +// This function also cleans up code, that becomes dead. Pattern of the dead +// code is stable, as user's code doesn't affect it. +bool propagateConstexprLayout(Function *F) { + llvm::SmallVector ToErase; + for (auto I = F->user_begin(), E = F->user_end(); I != E;) { + User *U = *I++; + auto *CI = dyn_cast(U); + if (!CI) + continue; + auto *Op = dyn_cast(CI->getArgOperand(0)); + if (!Op || !isa(Op)) + continue; + auto *Ptr = dyn_cast(cast(Op)->getPointerOperand()); + if (!Ptr) + continue; + + ConstantInt *ConstLayout = nullptr; + StoreInst *SI = findLastStoreBeforeLoad(Ptr, Op); + if (!SI) + continue; + ConstLayout = dyn_cast(SI->getValueOperand()); + if (ConstLayout) { + CI->replaceAllUsesWith(ConstLayout); + ToErase.push_back(CI); + ToErase.push_back(SI); + ToErase.push_back(Op); + ToErase.push_back(Ptr); + if (auto *Cast = dyn_cast(Ptr)) { + auto *OrigPtr = Cast->getPointerOperand(); + if (auto *AI = dyn_cast(OrigPtr)) + ToErase.push_back(AI); + } + } + } + + // There are possible cases, when a single instruction result is used multiple + // times. For this case we have to use a vector to store such instructions + // and keep track if we have removed them before to avoid double free(). + SmallPtrSet Erased; + for (Instruction *II : ToErase) { + if (!II->use_empty()) + continue; + if (Erased.contains(II)) + continue; + II->dropAllReferences(); + II->eraseFromParent(); + Erased.insert(II); + } + return !ToErase.empty(); +} } // namespace PreservedAnalyses SYCLJointMatrixTransformPass::run(Module &M, ModuleAnalysisManager &MAM) { bool ModuleChanged = false; + llvm::SmallVector ToErase; for (Function &F : M) { - if (!F.isDeclaration()) - continue; + if (!F.isDeclaration()) { + if (F.getName() == MATRIX_LAYOUT) { + ModuleChanged |= propagateConstexprLayout(&F); + ToErase.push_back(&F); + } else + continue; + } if (F.getName().starts_with(ACCESS_CHAIN)) ModuleChanged |= transformAccessChain(&F); } + for (auto *F : ToErase) + if (F->users().empty()) + F->eraseFromParent(); + return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all(); } diff --git a/llvm/lib/SYCLLowerIR/SpecConstants.cpp b/llvm/lib/SYCLLowerIR/SpecConstants.cpp index c00fe479ad365..08be340512f9b 100644 --- a/llvm/lib/SYCLLowerIR/SpecConstants.cpp +++ b/llvm/lib/SYCLLowerIR/SpecConstants.cpp @@ -514,15 +514,15 @@ Instruction *emitCall(Type *RetTy, StringRef BaseFunctionName, auto *NewFT = FunctionType::get(NewRetTy, ArgTys, false /*isVarArg*/); auto NewFC = M->getOrInsertFunction(FunctionName, NewFT); - auto *Call = - CallInst::Create(NewFT, NewFC.getCallee(), Args, "", InsertBefore); + auto *Call = CallInst::Create(NewFT, NewFC.getCallee(), Args, "", + InsertBefore->getIterator()); if (IsSPIROrSPIRV) { cast(NewFC.getCallee()) ->setCallingConv(CallingConv::SPIR_FUNC); Call->setCallingConv(CallingConv::SPIR_FUNC); } return CastInst::CreateTruncOrBitCast(Call, RetTy, "tobool", - InsertBefore); + InsertBefore->getIterator()); } } @@ -711,7 +711,7 @@ Value *createLoadFromBuffer(CallInst *InsertBefore, Value *Buffer, Type *Int32Ty = Type::getInt32Ty(C); GetElementPtrInst *GEP = GetElementPtrInst::Create( Int8Ty, Buffer, {ConstantInt::get(Int32Ty, Offset, false)}, "gep", - InsertBefore); + InsertBefore->getIterator()); Instruction *BitCast = nullptr; if (SCType->isIntegerTy(1)) // No bitcast to i1 before load @@ -719,14 +719,14 @@ Value *createLoadFromBuffer(CallInst *InsertBefore, Value *Buffer, else BitCast = new BitCastInst(GEP, PointerType::get(SCType, GEP->getAddressSpace()), - "bc", InsertBefore); + "bc", InsertBefore->getIterator()); // When we encounter i1 spec constant, we still load the whole byte Value *Load = new LoadInst(SCType->isIntegerTy(1) ? Int8Ty : SCType, BitCast, - "load", InsertBefore); + "load", InsertBefore->getIterator()); if (SCType->isIntegerTy(1)) // trunc back to i1 if necessary Load = CastInst::CreateIntegerCast(Load, SCType, /* IsSigned */ false, - "tobool", InsertBefore); + "tobool", InsertBefore->getIterator()); return Load; } @@ -993,8 +993,8 @@ PreservedAnalyses SpecConstantsPass::run(Module &M, if (SCTy->isIntegerTy(1)) { assert(DefaultValue->getType()->isIntegerTy(8) && "For bool spec constant default value is expected to be i8"); - Replacement = - new TruncInst(DefaultValue, Type::getInt1Ty(Ctx), "bool", CI); + Replacement = new TruncInst(DefaultValue, Type::getInt1Ty(Ctx), + "bool", CI->getIterator()); } else Replacement = DefaultValue; } @@ -1021,9 +1021,9 @@ PreservedAnalyses SpecConstantsPass::run(Module &M, Value *ArraySize = Mode == HandlingMode::emulation ? DefaultValue : Replacement; assert(ArraySize->getType()->isIntegerTy() && "Expecting integer type"); - Replacement = - new AllocaInst(Intr->getAllocatedType(), Intr->getAddressSpace(), - ArraySize, Intr->getAlign(), "alloca", CI); + Replacement = new AllocaInst( + Intr->getAllocatedType(), Intr->getAddressSpace(), ArraySize, + Intr->getAlign(), "alloca", CI->getIterator()); } if (HasSretParameter) diff --git a/llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp index 340df9d4e7264..2316aae2da6e4 100644 --- a/llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp @@ -426,7 +426,7 @@ PreservedAnalyses PrepareSYCLNativeCPUPass::run(Module &M, if (nullptr == ReplaceFunc) ReplaceFunc = getReplaceFunc(M, Entry.second, Use, Args); auto *NewI = CallInst::Create(ReplaceFunc->getFunctionType(), ReplaceFunc, - Args, "", I); + Args, "", I->getIterator()); // If the parent function has debug info, we need to make sure that the // CallInstructions in it have debug info, otherwise we end up with // invalid IR after inlining. diff --git a/llvm/test/SYCLLowerIR/JointMatrixTransform/constexpr-layout.ll b/llvm/test/SYCLLowerIR/JointMatrixTransform/constexpr-layout.ll new file mode 100644 index 0000000000000..b2d352a809be9 --- /dev/null +++ b/llvm/test/SYCLLowerIR/JointMatrixTransform/constexpr-layout.ll @@ -0,0 +1,90 @@ +; The test checks, that users of the call to joint_matrix_layout_to_spv matrix +; are replaced with the layout constant. + +; RUN: opt -passes=sycl-joint-matrix-transform < %s -S | FileCheck %s + +; ModuleID = 'test.bc' +source_filename = "test.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +$joint_matrix_layout_to_spv = comdat any + +; CHECK: define weak_odr dso_local spir_kernel void @test +; CHECK-NEXT: entry: +; CHECK-NEXT: %{{.*}} = call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 16, 16, 2) @_Z32__spirv_CooperativeMatrixLoadKHR{{.*}}(ptr addrspace(1){{.*}}, i32 noundef 0, i64 noundef{{.*}} +; CHECK-NEXT: %{{.*}} = call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 16, 16, 2) @_Z32__spirv_CooperativeMatrixLoadKHR{{.*}}(ptr addrspace(1){{.*}}, i32 noundef 1, i64 noundef{{.*}} +; CHECK-NEXT: %{{.*}} = call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 16, 16, 2) @_Z32__spirv_CooperativeMatrixLoadKHR{{.*}}(ptr addrspace(1){{.*}}, i32 noundef 1, i64 noundef{{.*}} +; CHECK-NEXT: %{{.*}} = call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 16, 16, 2) @_Z32__spirv_CooperativeMatrixLoadKHR{{.*}}(ptr addrspace(1){{.*}}, i32 noundef 2, i64 noundef{{.*}} +; CHECK-NEXT: ret void + +; CHECK-NOT: joint_matrix_layout_to_spv + +define weak_odr dso_local spir_kernel void @test(ptr addrspace(1) %matrix.1, ptr addrspace(1) %matrix.2, i64 noundef %stride) { +entry: + %layout.1 = alloca i32, align 4 + %layout.2 = alloca i32, align 4 + %layout.ascast.1 = addrspacecast ptr %layout.1 to ptr addrspace(4) + %layout.ascast.2 = addrspacecast ptr %layout.2 to ptr addrspace(4) + store i32 0, ptr addrspace(4) %layout.ascast.1, align 4 + store i32 1, ptr addrspace(4) %layout.ascast.2, align 4 + + %layout.val.1 = load i32, ptr addrspace(4) %layout.ascast.1, align 4 + %layout.spv.1 = call spir_func noundef i32 @joint_matrix_layout_to_spv(i32 noundef %layout.val.1) + %mload.1 = call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 16, 16, 2) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1ffLm16ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEPNS1_28__spirv_CooperativeMatrixKHRIT0_XT5_EXT1_EXT2_EXT3_EEEPT_S3_mi(ptr addrspace(1) noundef %matrix.1, i32 noundef %layout.spv.1, i64 noundef %stride, i32 noundef 0) + + %layout.val.2 = load i32, ptr addrspace(4) %layout.ascast.2, align 4 + %layout.spv.2 = call spir_func noundef i32 @joint_matrix_layout_to_spv(i32 noundef %layout.val.2) + %mload.2 = call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 16, 16, 2) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1ffLm16ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEPNS1_28__spirv_CooperativeMatrixKHRIT0_XT5_EXT1_EXT2_EXT3_EEEPT_S3_mi(ptr addrspace(1) noundef %matrix.2, i32 noundef %layout.spv.2, i64 noundef %stride, i32 noundef 0) + + %layout.spv.3 = call spir_func noundef i32 @joint_matrix_layout_to_spv(i32 noundef %layout.val.2) + %mload.3 = call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 16, 16, 2) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1ffLm16ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEPNS1_28__spirv_CooperativeMatrixKHRIT0_XT5_EXT1_EXT2_EXT3_EEEPT_S3_mi(ptr addrspace(1) noundef %matrix.2, i32 noundef %layout.spv.3, i64 noundef %stride, i32 noundef 0) + + store i32 2, ptr addrspace(4) %layout.ascast.2, align 4 + %layout.val.4 = load i32, ptr addrspace(4) %layout.ascast.2, align 4 + %layout.spv.4 = call spir_func noundef i32 @joint_matrix_layout_to_spv(i32 noundef %layout.val.4) + %mload.4 = call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 16, 16, 2) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1ffLm16ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEPNS1_28__spirv_CooperativeMatrixKHRIT0_XT5_EXT1_EXT2_EXT3_EEEPT_S3_mi(ptr addrspace(1) noundef %matrix.2, i32 noundef %layout.spv.4, i64 noundef %stride, i32 noundef 0) + ret void +} + +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 16, 16, 2) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1ffLm16ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEPNS1_28__spirv_CooperativeMatrixKHRIT0_XT5_EXT1_EXT2_EXT3_EEEPT_S3_mi(ptr addrspace(1) noundef, i32 noundef, i64 noundef, i32 noundef) + +define linkonce_odr dso_local spir_func noundef i32 @joint_matrix_layout_to_spv(i32 noundef %Layout) comdat { +entry: + %retval = alloca i32, align 4 + %Layout.addr = alloca i32, align 4 + %retval.ascast = addrspacecast ptr %retval to ptr addrspace(4) + %Layout.addr.ascast = addrspacecast ptr %Layout.addr to ptr addrspace(4) + store i32 %Layout, ptr addrspace(4) %Layout.addr.ascast, align 4 + %0 = load i32, ptr addrspace(4) %Layout.addr.ascast, align 4 + switch i32 %0, label %sw.epilog [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + ] + +sw.bb: ; preds = %entry + store i32 0, ptr addrspace(4) %retval.ascast, align 4 + br label %return + +sw.bb1: ; preds = %entry + store i32 1, ptr addrspace(4) %retval.ascast, align 4 + br label %return + +sw.bb2: ; preds = %entry + store i32 2, ptr addrspace(4) %retval.ascast, align 4 + br label %return + +sw.bb3: ; preds = %entry + store i32 3, ptr addrspace(4) %retval.ascast, align 4 + br label %return + +sw.epilog: ; preds = %entry + call void @llvm.trap() + unreachable + +return: ; preds = %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb + %1 = load i32, ptr addrspace(4) %retval.ascast, align 4 + ret i32 %1 +} diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 891fe4ea0c308..2bfa1cbef4657 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1,8 @@ -# commit eaea885d5477c8936209175a5b00062ca44f5765 -# Merge: af4ab49c 2a03334c -# Author: Kenneth Benzie (Benie) -# Date: Thu Jan 16 14:30:47 2025 +0000 -# Merge pull request #2569 from zhaomaosu/asan-only-warn-host-ptr -# [DevASAN] Only report warning if passing host ptr to kernel -set(UNIFIED_RUNTIME_TAG eaea885d5477c8936209175a5b00062ca44f5765) +# commit b074893e854d28141cd67bc5935ed87e47eb3bb6 +# Merge: 71a5eab0 128ea023 +# Author: Ross Brunton +# Date: Tue Jan 21 11:21:50 2025 +0000 +# Merge pull request #2539 from RossBrunton/ross/specconst +# +# Added `DEVICE_INFO_PROGRAM_SET_SPECIALIZATION_CONSTANTS` +set(UNIFIED_RUNTIME_TAG b074893e854d28141cd67bc5935ed87e47eb3bb6) diff --git a/sycl/doc/developer/DockerBKMs.md b/sycl/doc/developer/DockerBKMs.md index 4b761c4075a2b..6f1db70e37ffa 100644 --- a/sycl/doc/developer/DockerBKMs.md +++ b/sycl/doc/developer/DockerBKMs.md @@ -53,10 +53,6 @@ development containers: NVidia/AMD and can be used for building DPC++ compiler from source with all backends enabled or for end-to-end testing with HIP/CUDA on machines with corresponding GPUs available. - - `devops/containers/sycl_ubuntu2204_nightly`: contains the latest successfully - built nightly build of DPC++ compiler. The Dockerfile comes in three flavors: - with pre-installed Intel drivers (`latest`), without them (`no-drivers`) and - with development kits installed (`build`). ### Ubuntu 24.04-based Dockerfiles @@ -78,7 +74,8 @@ development containers: NVidia/AMD and can be used for building DPC++ compiler from source with all backends enabled or for end-to-end testing with HIP/CUDA on machines with corresponding GPUs available. - + - `devops/containers/nightly`: contains the latest successfully + built nightly build of DPC++ compiler. ## Running Docker container interactively @@ -199,7 +196,7 @@ Docker containers can be built with the following command: docker build -f path/to/devops/containers/file.Dockerfile path/to/devops/ ``` -The `ubuntu2204_preinstalled.Dockerfile` script expects `llvm_sycl.tar.xz` file +The `nightly.Dockerfile` script expects `llvm_sycl.tar.xz` file to be present in `devops/` directory. Containers other than base provide several configurable arguments, the most diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc index 130e54b386cda..d99f988d49cbb 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc @@ -428,6 +428,15 @@ The second way to allocate image memory is to use USM allocations. SYCL already provides a number of USM allocation functions. This proposal would add another, pitched memory allocation, through `pitched_alloc_device`. +Bindless images can be backed by device, host, or shared USM memory allocations. + +[NOTE] +==== +Image memory backed by USM device and host allocations is generally supported, +whereas shared USM allocations depend on the SYCL backend as well as the device +capabilities. +==== + ```cpp namespace sycl::ext::oneapi::experimental { @@ -2328,4 +2337,5 @@ These features still need to be handled: |6.4|2024-10-15| - Fix bindless spec examples and include examples in bindless spec using asciidoc include. |6.5|2024-10-22| - Allow 3-channel image formats on some backends. +|6.6|2025-01-20| - Clarify support for the specific types of USM allocations. |====================== diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_launch_queries.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_launch_queries.asciidoc index ee52d75b8fd21..16f5a27357d6d 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_launch_queries.asciidoc +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_launch_queries.asciidoc @@ -10,6 +10,7 @@ :encoding: utf-8 :lang: en :dpcpp: pass:[DPC++] +:endnote: —{nbsp}end{nbsp}note // Set the default source code type in this document to C++, // for syntax highlighting purposes. This is needed because @@ -106,14 +107,12 @@ If the `sycl::nd_range` parameter used to launch a kernel is incompatible with the results of a kernel's launch queries, an implementation must throw a synchronous exception with the `errc::nd_range` error code. -[NOTE] -==== -The values returned by `ext_oneapi_get_info` account for all properties -attached to a kernel (via the mechanisms defined in the +[_Note_: The values returned by `ext_oneapi_get_info` account for all +properties attached to a kernel (via the mechanisms defined in the sycl_ext_oneapi_kernel_properties extension), as well as the usage of features -like group algorithms and work-group local memory. Developers should assume -that the values will differ across kernels. -==== +like group algorithms and work-group local memory. +Developers should assume that the values will differ across +kernels._{endnote}_] [source,c++] ---- @@ -121,46 +120,61 @@ namespace sycl { class kernel { public: - template - /*return-type*/ ext_oneapi_get_info(T... args) const; -}; -} ----- + // Only available if Param is max_work_item_sizes<1> + template + id<1> ext_oneapi_get_info(sycl::queue q) const; -[source,c++] ----- -template -/*return-type*/ ext_oneapi_get_info(T... args) const; ----- -_Constraints_: Available only when the types `+T...+` described by the parameter -pack match the types defined in the table below. + // Only available if Param is max_work_item_sizes<2> + template + id<2> ext_oneapi_get_info(sycl::queue q) const; -_Preconditions_: `Param` must be one of the `info::kernel` descriptors defined -in this extension. + // Only available if Param is max_work_item_sizes<3> + template + id<3> ext_oneapi_get_info(sycl::queue q) const; -_Returns_: Information about the kernel that applies when the kernel is -submitted with the configuration described by the parameter pack `+T...+`. -The return type is defined in the table below. + // Only available if Param is max_work_group_size + template + size_t ext_oneapi_get_info(sycl::queue q) const; -This extension adds several new queries to this interface, many of which -already have equivalents in the `kernel_device_specific` or `device` -namespaces. + // Only available if Param is max_num_work_groups + template + uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<1> r, size_t bytes = 0) const; -NOTE: These queries are queue- and not device-specific because it is -anticipated that implementations will introduce finer-grained queue -controls that impact the scheduling of kernels. + // Only available if Param is max_num_work_groups + template + uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<2> r, size_t bytes = 0) const; -NOTE: Allowing devices to return a value of 1 for these queries maximizes the -chances that code written to use certain extension remains portable. However, -the performance of kernels using only one work-group, sub-group or work-item -may be limited on some (highly parallel) devices. If certain properties (e.g. -forward progress guarantees, cross-work-group synchronization) are being used -as part of a performance optimization, developers should check that the values -returned by these queries is not 1. + // Only available if Param is max_num_work_groups + template + uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<3> r, size_t bytes = 0) const; + + // Only available if Param is max_sub_group_size + template + uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<1> r) const; + + // Only available if Param is max_sub_group_size + template + uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<2> r) const; + + // Only available if Param is max_sub_group_size + template + uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<3> r) const; + + // Only available if Param is num_sub_groups + template + uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<1> r) const; + + // Only available if Param is num_sub_groups + template + uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<2> r) const; + + // Only available if Param is num_sub_groups + template + uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<3> r) const; + +}; -[source, c++] ----- namespace ext::oneapi::experimental::info::kernel { template @@ -169,91 +183,156 @@ struct max_work_item_sizes; struct max_work_group_size; struct max_num_work_groups; -} +struct max_sub_group_size; +struct num_sub_groups; + +} // namespace ext::oneapi::experimental::info::kernel + +} // namespace sycl ---- -[%header,cols="1,5,5,5"] -|=== -|Kernel Descriptor -|Argument Types -|Return Type -|Description +==== Querying valid launch configurations + +This extension adds several new queries for reasoning about the set of valid +launch configurations for a given kernel, many of which already have +equivalents in the `kernel_device_specific` or `device` namespaces. + +[_Note_: These queries are queue- and not device-specific because it is +anticipated that implementations will introduce finer-grained queue +controls that impact the scheduling of kernels._{endnote}_] + +[_Note_: Allowing devices to return a value of 1 for these queries maximizes +the chances that code written to use certain extension remains portable. +However, the performance of kernels using only one work-group, sub-group or +work-item may be limited on some (highly parallel) devices. +If certain properties (e.g. forward progress guarantees, cross-work-group +synchronization) are being used as part of a performance optimization, +developers should check that the values returned by these queries is not +1._{endnote}_] + +''' + +[source,c++] +---- +template +id<1> ext_oneapi_get_info(sycl::queue q) const; // (1) + +template +id<2> ext_oneapi_get_info(sycl::queue q) const; // (2) + +template +id<3> ext_oneapi_get_info(sycl::queue q) const; // (3) +---- +_Constraints (1)_: `Param` is `max_work_item_sizes<1>`. + +_Constraints (2)_: `Param` is `max_work_item_sizes<2>`. + +_Constraints (3)_: `Param` is `max_work_item_sizes<3>`. + +_Returns_: The maximum number of work-items that are permitted in each +dimension of a work-group, when the kernel is submitted to the specified queue, +accounting for any kernel properties or features. +If the kernel can be submitted to the specified queue without an error, the +minimum value returned by this query is 1, otherwise it is 0. -|`template - max_work_item_sizes` -|`sycl::queue` -|`id` -|Returns the maximum number of work-items that are permitted in each dimension - of a work-group, when the kernel is submitted to the specified queue, - accounting for any kernel properties or features. If the kernel can be - submitted to the specified queue without an error, the minimum value returned - by this query is 1, otherwise it is 0. - -|`max_work_group_size` -|`sycl::queue` -|`size_t` -|Returns the maximum number of work-items that are permitted in a work-group, +''' + +[source,c++] +---- +template +size_t ext_oneapi_get_info(sycl::queue q) const; +---- +_Constraints_: `Param` is `max_work_group_size`. + +_Returns_: The maximum number of work-items that are permitted in a work-group, when the kernel is submitted to the specified queue, accounting for any -kernel properties or features. If the kernel can be submitted to the specified -queue without an error, the minimum value returned by this query is 1, -otherwise it is 0. - -|`max_num_work_groups` -|`sycl::queue`, `sycl::range`, `size_t` -|`size_t` -|Returns the maximum number of work-groups, when the kernel is submitted to the -specified queue with the specified work-group size and the specified amount of -dynamic work-group local memory (in bytes), accounting for any kernel -properties or features. If the specified work-group size is 0, which is -invalid, then the implementation will throw a synchronous exception with the -`errc::invalid` error code. If the kernel can be submitted to the specified -queue without an error, the minimum value returned by this query is 1, -otherwise it is 0. +kernel properties or features. +If the kernel can be submitted to the specified queue without an error, the +minimum value returned by this query is 1, otherwise it is 0. -|=== +''' -A separate set of launch queries can be used to reason about how an -implementation will launch a kernel on the specified queue. The values of these -queries should also be checked if a kernel is expected to be launched in a -specific way (e.g., if the kernel requires two sub-groups for correctness). +[source,c++] +---- +template +size_t ext_oneapi_get_info(sycl::queue q, sycl::range<1> r, size_t bytes = 0) const; -[source, c++] +template +size_t ext_oneapi_get_info(sycl::queue q, sycl::range<2> r, size_t bytes = 0) const; + +template +size_t ext_oneapi_get_info(sycl::queue q, sycl::range<3> r, size_t bytes = 0) const; ---- -namespace ext::oneapi::experimental::info::kernel { +_Constraints_: `Param` is `max_num_work_groups`. -struct max_sub_group_size; -struct num_sub_groups; +_Returns_: The maximum number of work-groups, when the kernel is submitted to +the specified queue with the specified work-group size and the specified amount +of dynamic work-group local memory (in bytes), accounting for any kernel +properties or features. +If the kernel can be submitted to the specified queue without an +error, the minimum value returned by this query is 1, otherwise it is 0. + +_Throws_: A synchronous `exception` with the error code `errc::invalid` if the +work-group size `r` is 0. + + +==== Querying launch behavior + +A separate set of launch queries can be used to reason about how an +implementation will launch a kernel on the specified queue. +The values of these queries should also be checked if a kernel is expected to +be launched in a specific way (e.g., if the kernel requires two sub-groups for +correctness). + +''' -} +[source,c++] ---- +template +uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<1> r) const; -[%header,cols="1,5,5,5"] -|=== -|Kernel Descriptor -|Argument Types -|Return Type -|Description +template +uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<2> r) const; -|`max_sub_group_size` -|`sycl::queue`, `sycl::range` -|`uint32_t` -|Returns the maximum sub-group size, when the kernel is submitted to the +template +uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<3> r) const; +---- +_Constraints_: `Param` is `max_sub_group_size`. + +_Returns_: The maximum sub-group size, when the kernel is submitted to the specified queue with the specified work-group size, accounting for any kernel -properties or features. The return value of this query must match the value -returned by `sub_group::get_max_local_range()` inside the kernel. If the kernel -can be submitted to the specified queue without an error, the minimum value -returned by this query is 1, otherwise it is 0. - -|`num_sub_groups` -|`sycl::queue`, `sycl::range` -|`uint32_t` -|Returns the number of sub-groups per work-group, when the kernel is submitted -to the specified queue with the specified work-group size, accounting for any -kernel properties or features. If the kernel can be submitted to the specified -queue without an error, the minimum value returned by this query is 1, -otherwise it is 0. +properties or features. +The return value of this query must match the value returned by +`sub_group::get_max_local_range()` inside the kernel. +If the kernel can be submitted to the specified queue without an error, the +minimum value returned by this query is 1, otherwise it is 0. -|=== +_Throws_: A synchronous `exception` with the error code `errc::invalid` if the +work-group size `r` is 0. + +''' + +[source,c++] +---- +template +uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<1> r) const; + +template +uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<2> r) const; + +template +uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<3> r) const; +---- +_Constraints_: `Param` is `num_sub_groups`. + +_Returns_: The number of sub-groups per work-group, when the kernel is +submitted to the specified queue with the specified work-group size, accounting +for any kernel properties or features. +If the kernel can be submitted to the specified queue without an error, the +minimum value returned by this query is 1, otherwise it is 0. + +_Throws_: A synchronous `exception` with the error code `errc::invalid` if the +work-group size `r` is 0. == Issues diff --git a/sycl/doc/syclcompat/README.md b/sycl/doc/syclcompat/README.md index a518ea04e3e74..699e904092b32 100644 --- a/sycl/doc/syclcompat/README.md +++ b/sycl/doc/syclcompat/README.md @@ -1389,6 +1389,10 @@ with bytes selected according to a third unsigned integer argument. `match_all_over_sub_group` and `match_any_over_sub_group` allows comparison of values across work-items within a sub-group. +The function `ternary_logic_op`performs bitwise logical operations on three input values of +`a`, `b` and `c` based on the specified 8-bit truth table `lut` and return the +result. + The functions `select_from_sub_group`, `shift_sub_group_left`, `shift_sub_group_right` and `permute_sub_group_by_xor` provide equivalent functionality to `sycl::select_from_group`, `sycl::shift_group_left`, @@ -1419,6 +1423,8 @@ inline double cast_ints_to_double(int high32, int low32); inline unsigned int byte_level_permute(unsigned int a, unsigned int b, unsigned int s); +inline uint32_t lop3(uint32_t a, uint32_t b, uint32_t c, uint8_t lut) + template inline int ffs(ValueT a); template diff --git a/sycl/include/sycl/detail/is_device_copyable.hpp b/sycl/include/sycl/detail/is_device_copyable.hpp index 388029e6a16a3..bac24f4df3a11 100644 --- a/sycl/include/sycl/detail/is_device_copyable.hpp +++ b/sycl/include/sycl/detail/is_device_copyable.hpp @@ -31,6 +31,8 @@ inline namespace _V1 { template struct is_device_copyable; namespace detail { +template struct tuple; + template struct is_device_copyable_impl : std::is_trivially_copyable {}; @@ -70,6 +72,10 @@ template struct is_device_copyable> : std::bool_constant<(... && is_device_copyable::value)> {}; +template +struct is_device_copyable> + : std::bool_constant<(... && is_device_copyable::value)> {}; + // std::variant is implicitly device copyable type if each type T of // Ts... is device copyable. template @@ -83,49 +89,26 @@ struct is_device_copyable : is_device_copyable {}; template inline constexpr bool is_device_copyable_v = is_device_copyable::value; namespace detail { -template -struct IsDeprecatedDeviceCopyable : std::false_type {}; - -// TODO: using C++ attribute [[deprecated]] or the macro __SYCL2020_DEPRECATED -// does not produce expected warning message for the type 'T'. -template -struct __SYCL2020_DEPRECATED("This type isn't device copyable in SYCL 2020") - IsDeprecatedDeviceCopyable< - T, std::enable_if_t && - std::is_trivially_destructible_v && - !is_device_copyable_v>> : std::true_type {}; - -template -struct __SYCL2020_DEPRECATED("This type isn't device copyable in SYCL 2020") - IsDeprecatedDeviceCopyable : IsDeprecatedDeviceCopyable {}; - #ifdef __SYCL_DEVICE_ONLY__ -// Checks that the fields of the type T with indices 0 to (NumFieldsToCheck - -// 1) are device copyable. -template -struct CheckFieldsAreDeviceCopyable - : CheckFieldsAreDeviceCopyable { - using FieldT = decltype(__builtin_field_type(T, NumFieldsToCheck - 1)); - static_assert(is_device_copyable_v || - detail::IsDeprecatedDeviceCopyable::value, - "The specified type is not device copyable"); +template struct CheckFieldsAreDeviceCopyable; +template struct CheckBasesAreDeviceCopyable; + +template +struct CheckFieldsAreDeviceCopyable> { + static_assert( + ((is_device_copyable_v && + ...)), + "The specified type is not device copyable"); }; -template struct CheckFieldsAreDeviceCopyable {}; - -// Checks that the base classes of the type T with indices 0 to -// (NumFieldsToCheck - 1) are device copyable. -template -struct CheckBasesAreDeviceCopyable - : CheckBasesAreDeviceCopyable { - using BaseT = decltype(__builtin_base_type(T, NumBasesToCheck - 1)); - static_assert(is_device_copyable_v || - detail::IsDeprecatedDeviceCopyable::value, - "The specified type is not device copyable"); +template +struct CheckBasesAreDeviceCopyable> { + static_assert( + ((is_device_copyable_v && + ...)), + "The specified type is not device copyable"); }; -template struct CheckBasesAreDeviceCopyable {}; - // All the captures of a lambda or functor of type FuncT passed to a kernel // must be is_device_copyable, which extends to bases and fields of FuncT. // Fields are captures of lambda/functors and bases are possible base classes @@ -139,8 +122,10 @@ template struct CheckBasesAreDeviceCopyable {}; // is currently/temporarily supported only to not break older SYCL programs. template struct CheckDeviceCopyable - : CheckFieldsAreDeviceCopyable, - CheckBasesAreDeviceCopyable {}; + : CheckFieldsAreDeviceCopyable< + FuncT, std::make_index_sequence<__builtin_num_fields(FuncT)>>, + CheckBasesAreDeviceCopyable< + FuncT, std::make_index_sequence<__builtin_num_bases(FuncT)>> {}; template class RoundedRangeKernel; diff --git a/sycl/include/sycl/ext/oneapi/matrix/matrix-unified-utils.hpp b/sycl/include/sycl/ext/oneapi/matrix/matrix-unified-utils.hpp index ec14cf6da1931..349acae157ae7 100644 --- a/sycl/include/sycl/ext/oneapi/matrix/matrix-unified-utils.hpp +++ b/sycl/include/sycl/ext/oneapi/matrix/matrix-unified-utils.hpp @@ -11,6 +11,7 @@ #include // std::optional #include // std::string_view #include // __spv namespace +#include // bfloat16 #include // std::pair namespace sycl { @@ -68,7 +69,9 @@ convertMatrixUseStringToEnum(const char *UseString) { return std::nullopt; } -inline __SYCL_ALWAYS_INLINE __spv::MatrixLayout joint_matrix_layout_to_spv( +// propagateConstexprLayout uses the exact name of the function, so we use +// extern "C" here. +extern "C" constexpr __spv::MatrixLayout joint_matrix_layout_to_spv( sycl::ext::oneapi::experimental::matrix::layout Layout) { switch (Layout) { case sycl::ext::oneapi::experimental::matrix::layout::row_major: diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index 576dac72a3ce2..7f006648b6cad 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -389,7 +389,7 @@ class ReductionIdentityContainer< static constexpr bool has_identity = true; ReductionIdentityContainer(const T &) {} - ReductionIdentityContainer() {} + ReductionIdentityContainer() = default; /// Returns the statically known identity value. static constexpr T getIdentity() { @@ -407,6 +407,10 @@ class ReductionIdentityContainer< ReductionIdentityContainer(const T &Identity) : MIdentity(Identity) {} + // Make it trivially copyable (need at least on of the special member + // functions): + ReductionIdentityContainer(const ReductionIdentityContainer &) = default; + /// Returns the identity value given by user. T getIdentity() const { return MIdentity; } diff --git a/sycl/include/syclcompat/util.hpp b/sycl/include/syclcompat/util.hpp index 2fb085509cf6a..df03599ea6ad0 100644 --- a/sycl/include/syclcompat/util.hpp +++ b/sycl/include/syclcompat/util.hpp @@ -199,6 +199,110 @@ inline unsigned int byte_level_permute(unsigned int a, unsigned int b, return ret; } +/// \brief The function performs bitwise logical operations on three input +/// values of \p a, \p b and \p c based on the specified 8-bit truth table \p +/// lut and return the result +/// +/// \param [in] a Input value +/// \param [in] b Input value +/// \param [in] c Input value +/// \param [in] lut truth table for looking up +/// \returns The result +inline uint32_t ternary_logic_op(uint32_t a, uint32_t b, uint32_t c, + uint8_t lut) { + uint32_t result = 0; +#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) + asm volatile("lop3.b32 %0, %1, %2, %3, %4;" + : "=r"(result) + : "r"(a), "r"(b), "r"(c), "n"(lut)); +#else + switch (lut) { + case 0x0: + result = 0; + break; + case 0x1: + result = ~a & ~b & ~c; + break; + case 0x2: + result = ~a & ~b & c; + case 0x4: + result = ~a & b & ~c; + break; + case 0x8: + result = ~a & b & c; + break; + case 0x10: + result = a & ~b & ~c; + break; + case 0x20: + result = a & ~b & c; + break; + case 0x40: + result = a & b & ~c; + break; + case 0x80: + result = a & b & c; + break; + case 0x1a: + result = (a & b | c) ^ a; + break; + case 0x1e: + result = a ^ (b | c); + break; + case 0x2d: + result = ~a ^ (~b & c); + break; + case 0x78: + result = a ^ (b & c); + break; + case 0x96: + result = a ^ b ^ c; + break; + case 0xb4: + result = a ^ (b & ~c); + break; + case 0xb8: + result = a ^ (b & (c ^ a)); + break; + case 0xd2: + result = a ^ (~b & c); + break; + case 0xe8: + result = a & (b | c) | (b & c); + break; + case 0xea: + result = a & b | c; + break; + case 0xfe: + result = a | b | c; + break; + case 0xff: + result = -1; + break; + default: { + if (lut & 0x01) + result |= ~a & ~b & ~c; + if (lut & 0x02) + result |= ~a & ~b & c; + if (lut & 0x04) + result |= ~a & b & ~c; + if (lut & 0x08) + result |= ~a & b & c; + if (lut & 0x10) + result |= a & ~b & ~c; + if (lut & 0x20) + result |= a & ~b & c; + if (lut & 0x40) + result |= a & b & ~c; + if (lut & 0x80) + result |= a & b & c; + break; + } + } +#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) + return result; +} + /// Find position of first least significant set bit in an integer. /// ffs(0) returns 0. /// @@ -306,6 +410,9 @@ T shift_sub_group_right(sycl::sub_group g, T x, unsigned int delta, template T permute_sub_group_by_xor(sycl::sub_group g, T x, unsigned int mask, int logical_sub_group_size = 32) { + if (logical_sub_group_size == 32) { + return permute_group_by_xor(g, x, mask); + } unsigned int id = g.get_local_linear_id(); unsigned int start_index = id / logical_sub_group_size * logical_sub_group_size; diff --git a/sycl/test-e2e/Basic/fpga_tests/fpga_pipes.cpp b/sycl/test-e2e/Basic/fpga_tests/fpga_pipes.cpp index 382dbf525098c..cf3a87c0088c6 100644 --- a/sycl/test-e2e/Basic/fpga_tests/fpga_pipes.cpp +++ b/sycl/test-e2e/Basic/fpga_tests/fpga_pipes.cpp @@ -329,26 +329,30 @@ int main() { } // Non-blocking pipes - int Result = test_simple_nb_pipe(Queue); - Result &= test_simple_nb_pipe(Queue); + int Error = test_simple_nb_pipe(Queue); + Error |= test_simple_nb_pipe(Queue); class forward_nb_pipe; - Result &= test_simple_nb_pipe(Queue); - Result &= test_simple_nb_pipe, /*test number*/ 4>(Queue); - Result &= test_multiple_nb_pipe(Queue); + Error |= test_simple_nb_pipe(Queue); + Error |= test_simple_nb_pipe, /*test number*/ 4>(Queue); + Error |= test_multiple_nb_pipe(Queue); // Blocking pipes - Result &= test_simple_bl_pipe(Queue); - Result &= test_simple_bl_pipe(Queue); + Error |= test_simple_bl_pipe(Queue); + Error |= test_simple_bl_pipe(Queue); class forward_bl_pipe; - Result &= test_simple_bl_pipe(Queue); - Result &= test_simple_bl_pipe, /*test number*/ 9>(Queue); - Result &= test_multiple_bl_pipe(Queue); + Error |= test_simple_bl_pipe(Queue); + Error |= test_simple_bl_pipe, /*test number*/ 9>(Queue); + Error |= test_multiple_bl_pipe(Queue); // Test for an array data passing through a pipe - Result &= test_array_th_nb_pipe(Queue); - Result &= test_array_th_bl_pipe(Queue); + // These two tests are failing in post-commit testing ( + // https://github.com/intel/llvm/issues/16693 ) disabling them, rather than + // the entire test. + + // Error |= test_array_th_nb_pipe(Queue); + // Error |= test_array_th_bl_pipe(Queue); // TODO Remove when #14308 is closed - std::cerr << "DEBUG: Finished with result " << Result << std::endl; - return Result; + std::cerr << "DEBUG: Finished with result " << Error << std::endl; + return Error; } diff --git a/sycl/test-e2e/Basic/launch_queries/max_num_work_groups.cpp b/sycl/test-e2e/Basic/launch_queries/max_num_work_groups.cpp index 30b1240099111..d85279aeb2bc1 100644 --- a/sycl/test-e2e/Basic/launch_queries/max_num_work_groups.cpp +++ b/sycl/test-e2e/Basic/launch_queries/max_num_work_groups.cpp @@ -206,8 +206,8 @@ int main() { using namespace kernels; - int ret{0}; - ret &= test_max_num_work_groups(q, dev); - ret &= test_max_num_work_groups(q, dev); - return ret; + int Error{0}; + Error |= test_max_num_work_groups(q, dev); + Error |= test_max_num_work_groups(q, dev); + return Error; } diff --git a/sycl/test-e2e/Config/allowlist.cpp b/sycl/test-e2e/Config/allowlist.cpp index cc3a8c6b30221..450116ed05ba1 100644 --- a/sycl/test-e2e/Config/allowlist.cpp +++ b/sycl/test-e2e/Config/allowlist.cpp @@ -1,16 +1,13 @@ -// REQUIRES: opencl && cpu // RUN: %{build} -o %t.out // -// FIXME: Using ONEAPI_DEVICE_SELECTOR=\*:cpu results in seg. faults that I -// cannot reproduce under gdb. -// RUN: env PRINT_DEVICE_INFO=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} %t.out > %t1.conf -// RUN: env TEST_DEVICE_AVAILABLE=1 env SYCL_CONFIG_FILE_NAME=%t1.conf ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} %t.out +// RUN: env PRINT_DEVICE_INFO=1 %{run-unfiltered-devices} %t.out > %t1.conf +// RUN: env TEST_DEVICE_AVAILABLE=1 env SYCL_CONFIG_FILE_NAME=%t1.conf %{run-unfiltered-devices} %t.out // -// RUN: env PRINT_PLATFORM_INFO=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} %t.out > %t2.conf -// RUN: env TEST_DEVICE_AVAILABLE=1 env SYCL_CONFIG_FILE_NAME=%t2.conf ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} %t.out +// RUN: env PRINT_PLATFORM_INFO=1 %{run-unfiltered-devices} %t.out > %t2.conf +// RUN: env TEST_PLATFORM_AVAILABLE=1 env SYCL_CONFIG_FILE_NAME=%t2.conf %{run-unfiltered-devices} %t.out // -// RUN: env TEST_DEVICE_IS_NOT_AVAILABLE=1 env SYCL_DEVICE_ALLOWLIST="PlatformName:{{SUCH NAME DOESN'T EXIST}}" ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} %t.out -// RUN: env TEST_INCORRECT_VALUE=1 env SYCL_DEVICE_ALLOWLIST="IncorrectKey:{{.*}}" ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} %t.out +// RUN: env TEST_DEVICE_IS_NOT_AVAILABLE=1 env SYCL_DEVICE_ALLOWLIST="PlatformName:{{SUCH NAME DOESN'T EXIST}}" %{run-unfiltered-devices} %t.out +// RUN: env TEST_INCORRECT_VALUE=1 env SYCL_DEVICE_ALLOWLIST="IncorrectKey:{{.*}}" %{run-unfiltered-devices} %t.out #include "../helpers.hpp" #include @@ -20,6 +17,16 @@ #include #include +static bool isIdenticalDevices(const std::vector &Devices) { + return std::all_of( + Devices.cbegin(), Devices.cend(), [&](const sycl::device &Dev) { + return (Dev.get_info() == + Devices.at(0).get_info()) && + (Dev.get_info() == + Devices.at(0).get_info()); + }); +} + static void replaceSpecialCharacters(std::string &Str) { // Replace common special symbols with '.' which matches to any character std::replace_if( @@ -48,7 +55,7 @@ int main() { return 0; } - throw std::runtime_error("No device is found"); + throw std::runtime_error("No platform is found"); } // Expected that the allowlist filter is not set @@ -74,12 +81,15 @@ int main() { // Expected the allowlist to be set with the "PRINT_DEVICE_INFO" run result if (env::isDefined("TEST_DEVICE_AVAILABLE")) { for (const sycl::platform &Platform : sycl::platform::get_platforms()) { - if (Platform.get_devices().size() != 1) + auto Devices = Platform.get_devices(); + if (Devices.empty()) + throw std::runtime_error("No device is found"); + + if (!(Devices.size() == 1 || isIdenticalDevices(Devices))) throw std::runtime_error("Expected only one device."); return 0; } - throw std::runtime_error("No device is found"); } // Expected the allowlist to be set but empty @@ -89,26 +99,28 @@ int main() { return 0; } + // Expected the allowlist to be set with the "PRINT_PLATFORM_INFO" run result + if (env::isDefined("TEST_PLATFORM_AVAILABLE")) { + auto Platforms = sycl::platform::get_platforms(); + if (Platforms.empty()) + throw std::runtime_error("No platform is found"); + else if (Platforms.size() != 1) + throw std::runtime_error("Expected only one platform."); + + return 0; + } + if (env::isDefined("TEST_INCORRECT_VALUE")) { try { sycl::platform::get_platforms(); } catch (sycl::exception &E) { - // Workaround to make CI pass. - // TODO: after the submission of PR intel/llvm:3826, create PR to - // intel/llvm-test-suite with removal of 1st parameter of the vector, - // and transformation of std::vector to std::string - const std::vector ExpectedMsgs{ - "Unrecognized key in device allowlist", + const std::string ExpectedMsg{ "Unrecognized key in SYCL_DEVICE_ALLOWLIST"}; const std::string GotMessage(E.what()); - bool CorrectMsg = false; - for (const auto &ExpectedMsg : ExpectedMsgs) { - if (GotMessage.find(ExpectedMsg) != std::string::npos) { - CorrectMsg = true; - break; - } + if (GotMessage.find(ExpectedMsg) != std::string::npos) { + return 0; } - return CorrectMsg ? 0 : 1; + return 1; } } diff --git a/sycl/test-e2e/DeviceGlobal/device_global_copy.cpp b/sycl/test-e2e/DeviceGlobal/device_global_copy.cpp index e46c6286e2e7a..c15320b55d352 100644 --- a/sycl/test-e2e/DeviceGlobal/device_global_copy.cpp +++ b/sycl/test-e2e/DeviceGlobal/device_global_copy.cpp @@ -1,4 +1,4 @@ -// DEFINE: %{cpp23} = %if cl_options %{/std:c++23%} %else %{-std=c++23%} +// DEFINE: %{cpp23} = %if cl_options %{/clang:-std=c++23%} %else %{-std=c++23%} // RUN: %{build} %{cpp23} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/DeviceLib/exp/exp-std-complex-edge-cases.hpp b/sycl/test-e2e/DeviceLib/exp/exp-std-complex-edge-cases.hpp index 992cd6147b535..438cb1ba459a6 100644 --- a/sycl/test-e2e/DeviceLib/exp/exp-std-complex-edge-cases.hpp +++ b/sycl/test-e2e/DeviceLib/exp/exp-std-complex-edge-cases.hpp @@ -335,7 +335,7 @@ template bool test() { CHECK(!std::signbit(r.real()), passed, i); #ifdef _WIN32 // This check fails on win, temporary skipping: - // CMPLRLLVM-61834 + // CMPLRLLVM-64900 // TODO: Delete this macro block when fixed if (std::is_same_v) continue; diff --git a/sycl/test-e2e/Graph/Explicit/work_group_static_memory.cpp b/sycl/test-e2e/Graph/Explicit/work_group_static_memory.cpp new file mode 100644 index 0000000000000..c3d9d4f950dd8 --- /dev/null +++ b/sycl/test-e2e/Graph/Explicit/work_group_static_memory.cpp @@ -0,0 +1,14 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: sycl_ext_oneapi_work_group_static is not supported on +// AMD + +#define GRAPH_E2E_EXPLICIT + +#include "../Inputs/work_group_static_memory.cpp" diff --git a/sycl/test-e2e/Graph/Inputs/work_group_static_memory.cpp b/sycl/test-e2e/Graph/Inputs/work_group_static_memory.cpp new file mode 100644 index 0000000000000..9ccd9f5a21deb --- /dev/null +++ b/sycl/test-e2e/Graph/Inputs/work_group_static_memory.cpp @@ -0,0 +1,50 @@ +// Tests using sycl_ext_oneapi_work_group_static in a graph node + +#include "../graph_common.hpp" +#include + +constexpr size_t WgSize = 32; + +// Local mem used in kernel +sycl::ext::oneapi::experimental::work_group_static LocalIDBuff; + +int main() { + queue Queue; + exp_ext::command_graph Graph{Queue}; + + std::vector HostData(Size, 0); + + int *Ptr = malloc_device(Size, Queue); + Queue.copy(HostData.data(), Ptr, Size).wait(); + + auto node = add_node(Graph, Queue, [&](handler &CGH) { + CGH.parallel_for(nd_range({Size}, {WgSize}), [=](nd_item<1> Item) { + LocalIDBuff[Item.get_local_linear_id()] = Item.get_local_linear_id(); + + Item.barrier(); + + // Check that the memory is accessible from other work-items + size_t LocalIdx = Item.get_local_linear_id() ^ 1; + size_t GlobalIdx = Item.get_global_linear_id() ^ 1; + Ptr[GlobalIdx] = LocalIDBuff[LocalIdx]; + }); + }); + + auto GraphExec = Graph.finalize(); + + for (unsigned N = 0; N < Iterations; N++) { + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); + } + Queue.wait_and_throw(); + + Queue.copy(Ptr, HostData.data(), Size); + Queue.wait_and_throw(); + + for (size_t i = 0; i < Size; i++) { + int Ref = i % WgSize; + assert(check_value(i, Ref, HostData[i], "Ptr")); + } + + free(Ptr, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/RecordReplay/work_group_static_memory.cpp b/sycl/test-e2e/Graph/RecordReplay/work_group_static_memory.cpp new file mode 100644 index 0000000000000..98d4f045df50b --- /dev/null +++ b/sycl/test-e2e/Graph/RecordReplay/work_group_static_memory.cpp @@ -0,0 +1,14 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: sycl_ext_oneapi_work_group_static is not supported on +// AMD + +#define GRAPH_E2E_RECORD_REPLAY + +#include "../Inputs/work_group_static_memory.cpp" diff --git a/sycl/test-e2e/Graph/Update/work_group_static_memory_with_dyn_cgf_and_dyn_params.cpp b/sycl/test-e2e/Graph/Update/work_group_static_memory_with_dyn_cgf_and_dyn_params.cpp new file mode 100644 index 0000000000000..0d4f935d4c479 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/work_group_static_memory_with_dyn_cgf_and_dyn_params.cpp @@ -0,0 +1,98 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: sycl_ext_oneapi_work_group_static is not supported on +// AMD + +// Tests using sycl_ext_oneapi_work_group_static in a graph node with dynamic +// cgf and dynamic parameter + +#include "../graph_common.hpp" +#include + +constexpr size_t WgSize = 32; + +// Local mem used in kernel +sycl::ext::oneapi::experimental::work_group_static LocalIDBuff; + +int main() { + queue Queue; + exp_ext::command_graph Graph{Queue}; + + int *PtrA = malloc_device(Size, Queue); + int *PtrB = malloc_device(Size, Queue); + + std::vector HostDataA(Size); + std::vector HostDataB(Size); + + exp_ext::dynamic_parameter DynParam(Graph, PtrA); + + auto CGFA = [&](handler &CGH) { + CGH.set_arg(0, DynParam); + CGH.parallel_for(nd_range({Size}, {WgSize}), [=](nd_item<1> Item) { + LocalIDBuff[Item.get_local_linear_id()] = Item.get_local_linear_id(); + + Item.barrier(); + + // Check that the memory is accessible from other work-items + size_t LocalIdx = Item.get_local_linear_id() ^ 1; + size_t GlobalIdx = Item.get_global_linear_id() ^ 1; + PtrA[GlobalIdx] = LocalIDBuff[LocalIdx]; + }); + }; + + auto CGFB = [&](handler &CGH) { + CGH.set_arg(0, DynParam); + CGH.parallel_for(nd_range({Size}, {WgSize}), [=](nd_item<1> Item) { + LocalIDBuff[Item.get_local_linear_id()] = Item.get_local_linear_id(); + + Item.barrier(); + + // Check that the memory is accessible from other work-items + size_t LocalIdx = Item.get_local_linear_id() ^ 1; + size_t GlobalIdx = Item.get_global_linear_id() ^ 1; + PtrA[GlobalIdx] = LocalIDBuff[LocalIdx] - 1; + }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + auto ExecuteGraphAndVerifyResults = [&](bool A, bool B, bool nextCGF) { + Queue.memset(PtrA, 0, Size * sizeof(int)); + Queue.memset(PtrB, 0, Size * sizeof(int)); + Queue.wait(); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size); + Queue.copy(PtrB, HostDataB.data(), Size); + Queue.wait(); + + for (size_t i = 0; i < Size; i++) { + int Ref = nextCGF ? (i % WgSize) - 1 : i % WgSize; + assert(HostDataA[i] == (A ? Ref : 0)); + assert(HostDataB[i] == (B ? Ref : 0)); + } + }; + + ExecuteGraphAndVerifyResults(true, false, false); + + DynParam.update(PtrB); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(false, true, false); + + DynamicCG.set_active_index(1); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(false, true, true); + + free(PtrA, Queue); + free(PtrB, Queue); + return 0; +} diff --git a/sycl/test-e2e/ProgramManager/multi_device_bundle/device_libs_and_caching.cpp b/sycl/test-e2e/ProgramManager/multi_device_bundle/device_libs_and_caching.cpp index 2a54744ebdef5..78872c04d97ec 100644 --- a/sycl/test-e2e/ProgramManager/multi_device_bundle/device_libs_and_caching.cpp +++ b/sycl/test-e2e/ProgramManager/multi_device_bundle/device_libs_and_caching.cpp @@ -57,7 +57,7 @@ int main() { auto res = sycl::malloc_host(3, ctx); auto KernelLambda = [=]() { - res[0] = sycl::ext::intel::math::float2int_rd(4.0) + (int)sqrtf(4.0f) + + res[0] = sycl::ext::intel::math::float2int_rd(4.0f) + (int)sqrtf(4.0f) + std::exp(std::complex(0.f, 0.f)).real(); }; // Test case 1 diff --git a/sycl/test-e2e/Scheduler/MultipleDevices.cpp b/sycl/test-e2e/Scheduler/MultipleDevices.cpp index 3976512e2d6e7..f848ce79ab9a4 100644 --- a/sycl/test-e2e/Scheduler/MultipleDevices.cpp +++ b/sycl/test-e2e/Scheduler/MultipleDevices.cpp @@ -92,11 +92,11 @@ int multidevice_test(queue MyQueue1, queue MyQueue2) { int main() { - int Result = -1; + int Error = 0; try { queue MyQueue1(cpu_selector_v); queue MyQueue2(cpu_selector_v); - Result &= multidevice_test(MyQueue1, MyQueue2); + Error |= multidevice_test(MyQueue1, MyQueue2); } catch (sycl::exception &) { std::cout << "Skipping CPU and CPU" << std::endl; } @@ -104,7 +104,7 @@ int main() { try { queue MyQueue1(cpu_selector_v); queue MyQueue2(gpu_selector_v); - Result &= multidevice_test(MyQueue1, MyQueue2); + Error |= multidevice_test(MyQueue1, MyQueue2); } catch (sycl::exception &) { std::cout << "Skipping CPU and GPU" << std::endl; } @@ -112,10 +112,10 @@ int main() { try { queue MyQueue1(gpu_selector_v); queue MyQueue2(gpu_selector_v); - Result &= multidevice_test(MyQueue1, MyQueue2); + Error |= multidevice_test(MyQueue1, MyQueue2); } catch (sycl::exception &) { std::cout << "Skipping GPU and GPU" << std::endl; } - return Result; + return Error; } diff --git a/sycl/test-e2e/XPTI/buffer/in_cycle.cpp b/sycl/test-e2e/XPTI/buffer/in_cycle.cpp index 7dbd84d336289..18075e22a7ca8 100644 --- a/sycl/test-e2e/XPTI/buffer/in_cycle.cpp +++ b/sycl/test-e2e/XPTI/buffer/in_cycle.cpp @@ -45,7 +45,7 @@ bool func(sycl::queue &Queue, int depth = 0) { } if (depth > 0) - MismatchFound &= func(Queue, depth - 1); + MismatchFound |= func(Queue, depth - 1); return MismatchFound; } int main() { @@ -66,7 +66,7 @@ int main() { // CHECK:{{[0-9]+}}|Release buffer|[[USERID3]]|[[BEID3]] // CHECK:{{[0-9]+}}|Destruct buffer|[[USERID3]] for (int i = 0; i < 3; i++) - MismatchFound &= func(Queue); + MismatchFound |= func(Queue); return MismatchFound; } diff --git a/sycl/test-e2e/XPTI/buffer/recursion.cpp b/sycl/test-e2e/XPTI/buffer/recursion.cpp index 00d90390311a1..9a2044c0e8d97 100644 --- a/sycl/test-e2e/XPTI/buffer/recursion.cpp +++ b/sycl/test-e2e/XPTI/buffer/recursion.cpp @@ -65,7 +65,7 @@ int main() { // CHECK:{{[0-9]+}}|Destruct buffer|[[USERID2]] // CHECK:{{[0-9]+}}|Release buffer|[[USERID1]]|[[BEID1]] // CHECK:{{[0-9]+}}|Destruct buffer|[[USERID1]] - MismatchFound &= func(Queue, 2); + MismatchFound |= func(Queue, 2); return MismatchFound; } diff --git a/sycl/test-e2e/bindless_images/3_channel_format.cpp b/sycl/test-e2e/bindless_images/3_channel_format.cpp index a3668f4f31973..3eb46c7458155 100644 --- a/sycl/test-e2e/bindless_images/3_channel_format.cpp +++ b/sycl/test-e2e/bindless_images/3_channel_format.cpp @@ -1,7 +1,10 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images + +// Test requires at least this version of the Intel GPU driver on Arc. +// REQUIRES-INTEL-DRIVER: lin: 32370 // RUN: %{build} -o %t.out -// RUN: %{run} %t.out +// RUN: %{run-unfiltered-devices} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out #include #include @@ -21,19 +24,22 @@ int main() { auto ctxt = q.get_context(); constexpr size_t width = 512; - std::vector out(width); - std::vector expected(width); - std::vector dataIn(width); - float exp = 512; - for (int i = 0; i < width; i++) { + std::vector out(width); + std::vector expected(width); + std::vector dataIn(width * 3); + unsigned short exp = 512; + for (unsigned int i = 0; i < width; i++) { expected[i] = exp; - dataIn[i] = sycl::float3(exp, width, i); + dataIn[(i * 3)] = exp; + dataIn[(i * 3) + 1] = static_cast(width); + dataIn[(i * 3) + 2] = static_cast(i); } try { // Main point of this test is to check creating an image // with a 3-channel format - syclexp::image_descriptor desc({width}, 3, sycl::image_channel_type::fp32); + syclexp::image_descriptor desc({width}, 3, + sycl::image_channel_type::unsigned_int16); syclexp::image_mem imgMem(desc, dev, ctxt); @@ -46,7 +52,7 @@ int main() { syclexp::unsampled_image_handle imgHandle = sycl::ext::oneapi::experimental::create_image(imgMem, desc, dev, ctxt); - sycl::buffer buf(out.data(), width); + sycl::buffer buf(out.data(), width); q.submit([&](sycl::handler &cgh) { sycl::accessor outAcc{buf, cgh}; @@ -55,9 +61,9 @@ int main() { #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) // This shouldn't be hit anyway since CUDA doesn't support // 3-channel formats, but we need to ensure the kernel can compile - using pixel_t = sycl::float4; + using pixel_t = sycl::ushort4; #else - using pixel_t = sycl::float3; + using pixel_t = sycl::ushort3; #endif auto pixel = syclexp::fetch_image(imgHandle, int(id[0])); outAcc[id] = pixel[0]; @@ -83,7 +89,7 @@ int main() { } bool validated = true; - for (int i = 0; i < width; i++) { + for (unsigned int i = 0; i < width; i++) { bool mismatch = false; if (out[i] != expected[i]) { mismatch = true; diff --git a/sycl/test-e2e/syclcompat/util/util_permute_sub_group_by_xor.cpp b/sycl/test-e2e/syclcompat/util/util_permute_sub_group_by_xor.cpp index 7b877d826f18b..6b0b478b1e367 100644 --- a/sycl/test-e2e/syclcompat/util/util_permute_sub_group_by_xor.cpp +++ b/sycl/test-e2e/syclcompat/util/util_permute_sub_group_by_xor.cpp @@ -86,11 +86,9 @@ void test_permute_sub_group_by_xor() { syclcompat::device_ext &dev_ct1 = syclcompat::get_current_device(); sycl::queue *q_ct1 = dev_ct1.default_queue(); bool Result = true; - int *dev_data = nullptr; unsigned int *dev_data_u = nullptr; sycl::range<3> GridSize(1, 1, 1); sycl::range<3> BlockSize(1, 1, 1); - dev_data = sycl::malloc_device(DATA_NUM, *q_ct1); dev_data_u = sycl::malloc_device(DATA_NUM, *q_ct1); GridSize = sycl::range<3>(1, 1, 2); @@ -120,6 +118,19 @@ void test_permute_sub_group_by_xor() { q_ct1->memcpy(host_dev_data_u, dev_data_u, DATA_NUM * sizeof(unsigned int)) .wait(); verify_data(host_dev_data_u, expect1, DATA_NUM); + sycl::free(dev_data_u, *q_ct1); +} + +void test_permute_sub_group_by_xor_extra_arg() { + std::cout << __PRETTY_FUNCTION__ << std::endl; + + syclcompat::device_ext &dev_ct1 = syclcompat::get_current_device(); + sycl::queue *q_ct1 = dev_ct1.default_queue(); + bool Result = true; + unsigned int *dev_data_u = nullptr; + sycl::range<3> GridSize(1, 1, 1); + sycl::range<3> BlockSize(1, 1, 1); + dev_data_u = sycl::malloc_device(DATA_NUM, *q_ct1); GridSize = sycl::range<3>(1, 1, 2); BlockSize = sycl::range<3>(1, 2, 32); @@ -133,6 +144,7 @@ void test_permute_sub_group_by_xor() { 91, 90, 93, 92, 95, 94, 97, 96, 99, 98, 101, 100, 103, 102, 105, 104, 107, 106, 109, 108, 111, 110, 113, 112, 115, 114, 117, 116, 119, 118, 121, 120, 123, 122, 125, 124, 127, 126}; + unsigned int host_dev_data_u[DATA_NUM]; init_data(host_dev_data_u, DATA_NUM); q_ct1->memcpy(dev_data_u, host_dev_data_u, DATA_NUM * sizeof(unsigned int)) @@ -147,13 +159,12 @@ void test_permute_sub_group_by_xor() { q_ct1->memcpy(host_dev_data_u, dev_data_u, DATA_NUM * sizeof(unsigned int)) .wait(); verify_data(host_dev_data_u, expect2, DATA_NUM); - - sycl::free(dev_data, *q_ct1); sycl::free(dev_data_u, *q_ct1); } int main() { test_permute_sub_group_by_xor(); + test_permute_sub_group_by_xor_extra_arg(); return 0; } diff --git a/sycl/test-e2e/syclcompat/util/util_ternary_logic_op_test.cpp b/sycl/test-e2e/syclcompat/util/util_ternary_logic_op_test.cpp new file mode 100644 index 0000000000000..4e252c5df08a5 --- /dev/null +++ b/sycl/test-e2e/syclcompat/util/util_ternary_logic_op_test.cpp @@ -0,0 +1,585 @@ +// ====------ util_ternary_logic_op_test.cpp ------------ *- C/C++ -* ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// ===--------------------------------------------------------------------===// + +// This file is modified from the code migrated by SYCLomatic. + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include +#include +#include +#include + +// clang-format off +void reference_of_ternary_logic_op(uint32_t &R, uint32_t A, uint32_t B, uint32_t C, uint32_t D) { + switch (D) { + case 0: R = 0; break; + case 1: R = (~A & ~B & ~C); break; + case 2: R = (~A & ~B & C); break; + case 3: R = (~A & ~B & ~C) | (~A & ~B & C); break; + case 4: R = (~A & B & ~C); break; + case 5: R = (~A & ~B & ~C) | (~A & B & ~C); break; + case 6: R = (~A & ~B & C) | (~A & B & ~C); break; + case 7: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C); break; + case 8: R = (~A & B & C); break; + case 9: R = (~A & ~B & ~C) | (~A & B & C); break; + case 10: R = (~A & ~B & C) | (~A & B & C); break; + case 11: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & C); break; + case 12: R = (~A & B & ~C) | (~A & B & C); break; + case 13: R = (~A & ~B & ~C) | (~A & B & ~C) | (~A & B & C); break; + case 14: R = (~A & ~B & C) | (~A & B & ~C) | (~A & B & C); break; + case 15: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (~A & B & C); break; + case 16: R = (A & ~B & ~C); break; + case 17: R = (~A & ~B & ~C) | (A & ~B & ~C); break; + case 18: R = (~A & ~B & C) | (A & ~B & ~C); break; + case 19: R = (~A & ~B & ~C) | (~A & ~B & C) | (A & ~B & ~C); break; + case 20: R = (~A & B & ~C) | (A & ~B & ~C); break; + case 21: R = (~A & ~B & ~C) | (~A & B & ~C) | (A & ~B & ~C); break; + case 22: R = (~A & ~B & C) | (~A & B & ~C) | (A & ~B & ~C); break; + case 23: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (A & ~B & ~C); break; + case 24: R = (~A & B & C) | (A & ~B & ~C); break; + case 25: R = (~A & ~B & ~C) | (~A & B & C) | (A & ~B & ~C); break; + case 26: R = (A & B | C) ^ A; break; + case 27: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & C) | (A & ~B & ~C); break; + case 28: R = (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C); break; + case 29: R = (~A & ~B & ~C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C); break; + case 30: R = A ^ (B | C); break; + case 31: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C); break; + case 32: R = (A & ~B & C); break; + case 33: R = (~A & ~B & ~C) | (A & ~B & C); break; + case 34: R = (~A & ~B & C) | (A & ~B & C); break; + case 35: R = (~A & ~B & ~C) | (~A & ~B & C) | (A & ~B & C); break; + case 36: R = (~A & B & ~C) | (A & ~B & C); break; + case 37: R = (~A & ~B & ~C) | (~A & B & ~C) | (A & ~B & C); break; + case 38: R = (~A & ~B & C) | (~A & B & ~C) | (A & ~B & C); break; + case 39: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (A & ~B & C); break; + case 40: R = (~A & B & C) | (A & ~B & C); break; + case 41: R = (~A & ~B & ~C) | (~A & B & C) | (A & ~B & C); break; + case 42: R = (~A & ~B & C) | (~A & B & C) | (A & ~B & C); break; + case 43: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & C) | (A & ~B & C); break; + case 44: R = (~A & B & ~C) | (~A & B & C) | (A & ~B & C); break; + case 45: R = ~A ^ (~B & C); break; + case 46: R = (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & C); break; + case 47: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & C); break; + case 48: R = (A & ~B & ~C) | (A & ~B & C); break; + case 49: R = (~A & ~B & ~C) | (A & ~B & ~C) | (A & ~B & C); break; + case 50: R = (~A & ~B & C) | (A & ~B & ~C) | (A & ~B & C); break; + case 51: R = (~A & ~B & ~C) | (~A & ~B & C) | (A & ~B & ~C) | (A & ~B & C); break; + case 52: R = (~A & B & ~C) | (A & ~B & ~C) | (A & ~B & C); break; + case 53: R = (~A & ~B & ~C) | (~A & B & ~C) | (A & ~B & ~C) | (A & ~B & C); break; + case 54: R = (~A & ~B & C) | (~A & B & ~C) | (A & ~B & ~C) | (A & ~B & C); break; + case 55: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (A & ~B & ~C) | (A & ~B & C); break; + case 56: R = (~A & B & C) | (A & ~B & ~C) | (A & ~B & C); break; + case 57: R = (~A & ~B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C); break; + case 58: R = (~A & ~B & C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C); break; + case 59: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C); break; + case 60: R = (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C); break; + case 61: R = (~A & ~B & ~C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C); break; + case 62: R = (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C); break; + case 63: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C); break; + case 64: R = A & B & ~C; break; + case 65: R = (~A & ~B & ~C) | (A & B & ~C); break; + case 66: R = (~A & ~B & C) | (A & B & ~C); break; + case 67: R = (~A & ~B & ~C) | (~A & ~B & C) | (A & B & ~C); break; + case 68: R = (~A & B & ~C) | (A & B & ~C); break; + case 69: R = (~A & ~B & ~C) | (~A & B & ~C) | (A & B & ~C); break; + case 70: R = (~A & ~B & C) | (~A & B & ~C) | (A & B & ~C); break; + case 71: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (A & B & ~C); break; + case 72: R = (~A & B & C) | (A & B & ~C); break; + case 73: R = (~A & ~B & ~C) | (~A & B & C) | (A & B & ~C); break; + case 74: R = (~A & ~B & C) | (~A & B & C) | (A & B & ~C); break; + case 75: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & C) | (A & B & ~C); break; + case 76: R = (~A & B & ~C) | (~A & B & C) | (A & B & ~C); break; + case 77: R = (~A & ~B & ~C) | (~A & B & ~C) | (~A & B & C) | (A & B & ~C); break; + case 78: R = (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & B & ~C); break; + case 79: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & B & ~C); break; + case 80: R = (A & ~B & ~C) | (A & B & ~C); break; + case 81: R = (~A & ~B & ~C) | (A & ~B & ~C) | (A & B & ~C); break; + case 82: R = (~A & ~B & C) | (A & ~B & ~C) | (A & B & ~C); break; + case 83: R = (~A & ~B & ~C) | (~A & ~B & C) | (A & ~B & ~C) | (A & B & ~C); break; + case 84: R = (~A & B & ~C) | (A & ~B & ~C) | (A & B & ~C); break; + case 85: R = (~A & ~B & ~C) | (~A & B & ~C) | (A & ~B & ~C) | (A & B & ~C); break; + case 86: R = (~A & ~B & C) | (~A & B & ~C) | (A & ~B & ~C) | (A & B & ~C); break; + case 87: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (A & ~B & ~C) | (A & B & ~C); break; + case 88: R = (~A & B & C) | (A & ~B & ~C) | (A & B & ~C); break; + case 89: R = (~A & ~B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & B & ~C); break; + case 90: R = (~A & ~B & C) | (~A & B & C) | (A & ~B & ~C) | (A & B & ~C); break; + case 91: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & C) | (A & ~B & ~C) | (A & B & ~C); break; + case 92: R = (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & B & ~C); break; + case 93: R = (~A & ~B & ~C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & B & ~C); break; + case 94: R = (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & B & ~C); break; + case 95: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & B & ~C); break; + case 96: R = (A & ~B & C) | (A & B & ~C); break; + case 97: R = (~A & ~B & ~C) | (A & ~B & C) | (A & B & ~C); break; + case 98: R = (~A & ~B & C) | (A & ~B & C) | (A & B & ~C); break; + case 99: R = (~A & ~B & ~C) | (~A & ~B & C) | (A & ~B & C) | (A & B & ~C); break; + case 100: R = (~A & B & ~C) | (A & ~B & C) | (A & B & ~C); break; + case 101: R = (~A & ~B & ~C) | (~A & B & ~C) | (A & ~B & C) | (A & B & ~C); break; + case 102: R = (~A & ~B & C) | (~A & B & ~C) | (A & ~B & C) | (A & B & ~C); break; + case 103: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (A & ~B & C) | (A & B & ~C); break; + case 104: R = (~A & B & C) | (A & ~B & C) | (A & B & ~C); break; + case 105: R = (~A & ~B & ~C) | (~A & B & C) | (A & ~B & C) | (A & B & ~C); break; + case 106: R = (~A & ~B & C) | (~A & B & C) | (A & ~B & C) | (A & B & ~C); break; + case 107: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & C) | (A & ~B & C) | (A & B & ~C); break; + case 108: R = (~A & B & ~C) | (~A & B & C) | (A & ~B & C) | (A & B & ~C); break; + case 109: R = (~A & ~B & ~C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & C) | (A & B & ~C); break; + case 110: R = (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & C) | (A & B & ~C); break; + case 111: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & C) | (A & B & ~C); break; + case 112: R = (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C); break; + case 113: R = (~A & ~B & ~C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C); break; + case 114: R = (~A & ~B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C); break; + case 115: R = (~A & ~B & ~C) | (~A & ~B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C); break; + case 116: R = (~A & B & ~C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C); break; + case 117: R = (~A & ~B & ~C) | (~A & B & ~C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C); break; + case 118: R = (~A & ~B & C) | (~A & B & ~C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C); break; + case 119: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C); break; + case 120: R = A ^ (B & C); break; + case 121: R = (~A & ~B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C); break; + case 122: R = (~A & ~B & C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C); break; + case 123: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C); break; + case 124: R = (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C); break; + case 125: R = (~A & ~B & ~C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C); break; + case 126: R = (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C); break; + case 127: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C); break; + case 128: R = A & B & C; break; + case 129: R = (~A & ~B & ~C) | (A & B & C); break; + case 130: R = (~A & ~B & C) | (A & B & C); break; + case 131: R = (~A & ~B & ~C) | (~A & ~B & C) | (A & B & C); break; + case 132: R = (~A & B & ~C) | (A & B & C); break; + case 133: R = (~A & ~B & ~C) | (~A & B & ~C) | (A & B & C); break; + case 134: R = (~A & ~B & C) | (~A & B & ~C) | (A & B & C); break; + case 135: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (A & B & C); break; + case 136: R = (~A & B & C) | (A & B & C); break; + case 137: R = (~A & ~B & ~C) | (~A & B & C) | (A & B & C); break; + case 138: R = (~A & ~B & C) | (~A & B & C) | (A & B & C); break; + case 139: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & C) | (A & B & C); break; + case 140: R = (~A & B & ~C) | (~A & B & C) | (A & B & C); break; + case 141: R = (~A & ~B & ~C) | (~A & B & ~C) | (~A & B & C) | (A & B & C); break; + case 142: R = (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & B & C); break; + case 143: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & B & C); break; + case 144: R = (A & ~B & ~C) | (A & B & C); break; + case 145: R = (~A & ~B & ~C) | (A & ~B & ~C) | (A & B & C); break; + case 146: R = (~A & ~B & C) | (A & ~B & ~C) | (A & B & C); break; + case 147: R = (~A & ~B & ~C) | (~A & ~B & C) | (A & ~B & ~C) | (A & B & C); break; + case 148: R = (~A & B & ~C) | (A & ~B & ~C) | (A & B & C); break; + case 149: R = (~A & ~B & ~C) | (~A & B & ~C) | (A & ~B & ~C) | (A & B & C); break; + case 150: R = A ^ B ^ C; break; + case 151: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (A & ~B & ~C) | (A & B & C); break; + case 152: R = (~A & B & C) | (A & ~B & ~C) | (A & B & C); break; + case 153: R = (~A & ~B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & B & C); break; + case 154: R = (~A & ~B & C) | (~A & B & C) | (A & ~B & ~C) | (A & B & C); break; + case 155: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & C) | (A & ~B & ~C) | (A & B & C); break; + case 156: R = (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & B & C); break; + case 157: R = (~A & ~B & ~C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & B & C); break; + case 158: R = (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & B & C); break; + case 159: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & B & C); break; + case 160: R = (A & ~B & C) | (A & B & C); break; + case 161: R = (~A & ~B & ~C) | (A & ~B & C) | (A & B & C); break; + case 162: R = (~A & ~B & C) | (A & ~B & C) | (A & B & C); break; + case 163: R = (~A & ~B & ~C) | (~A & ~B & C) | (A & ~B & C) | (A & B & C); break; + case 164: R = (~A & B & ~C) | (A & ~B & C) | (A & B & C); break; + case 165: R = (~A & ~B & ~C) | (~A & B & ~C) | (A & ~B & C) | (A & B & C); break; + case 166: R = (~A & ~B & C) | (~A & B & ~C) | (A & ~B & C) | (A & B & C); break; + case 167: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (A & ~B & C) | (A & B & C); break; + case 168: R = (~A & B & C) | (A & ~B & C) | (A & B & C); break; + case 169: R = (~A & ~B & ~C) | (~A & B & C) | (A & ~B & C) | (A & B & C); break; + case 170: R = (~A & ~B & C) | (~A & B & C) | (A & ~B & C) | (A & B & C); break; + case 171: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & C) | (A & ~B & C) | (A & B & C); break; + case 172: R = (~A & B & ~C) | (~A & B & C) | (A & ~B & C) | (A & B & C); break; + case 173: R = (~A & ~B & ~C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & C) | (A & B & C); break; + case 174: R = (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & C) | (A & B & C); break; + case 175: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & C) | (A & B & C); break; + case 176: R = (A & ~B & ~C) | (A & ~B & C) | (A & B & C); break; + case 177: R = (~A & ~B & ~C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & C); break; + case 178: R = (~A & ~B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & C); break; + case 179: R = (~A & ~B & ~C) | (~A & ~B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & C); break; + case 180: R = A ^ (B & ~C); break; + case 181: R = (~A & ~B & ~C) | (~A & B & ~C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & C); break; + case 182: R = (~A & ~B & C) | (~A & B & ~C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & C); break; + case 183: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & C); break; + case 184: R = (A ^ (B & (C ^ A))); break; + case 185: R = (~A & ~B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & C); break; + case 186: R = (~A & ~B & C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & C); break; + case 187: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & C); break; + case 188: R = (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & C); break; + case 189: R = (~A & ~B & ~C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & C); break; + case 190: R = (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & C); break; + case 191: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & C); break; + case 192: R = (A & B & ~C) | (A & B & C); break; + case 193: R = (~A & ~B & ~C) | (A & B & ~C) | (A & B & C); break; + case 194: R = (~A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 195: R = (~A & ~B & ~C) | (~A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 196: R = (~A & B & ~C) | (A & B & ~C) | (A & B & C); break; + case 197: R = (~A & ~B & ~C) | (~A & B & ~C) | (A & B & ~C) | (A & B & C); break; + case 198: R = (~A & ~B & C) | (~A & B & ~C) | (A & B & ~C) | (A & B & C); break; + case 199: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (A & B & ~C) | (A & B & C); break; + case 200: R = (~A & B & C) | (A & B & ~C) | (A & B & C); break; + case 201: R = (~A & ~B & ~C) | (~A & B & C) | (A & B & ~C) | (A & B & C); break; + case 202: R = (~A & ~B & C) | (~A & B & C) | (A & B & ~C) | (A & B & C); break; + case 203: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & C) | (A & B & ~C) | (A & B & C); break; + case 204: R = (~A & B & ~C) | (~A & B & C) | (A & B & ~C) | (A & B & C); break; + case 205: R = (~A & ~B & ~C) | (~A & B & ~C) | (~A & B & C) | (A & B & ~C) | (A & B & C); break; + case 206: R = (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & B & ~C) | (A & B & C); break; + case 207: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & B & ~C) | (A & B & C); break; + case 208: R = (A & ~B & ~C) | (A & B & ~C) | (A & B & C); break; + case 209: R = (~A & ~B & ~C) | (A & ~B & ~C) | (A & B & ~C) | (A & B & C); break; + case 210: R = A ^ (~B & C); break; + case 211: R = (~A & ~B & ~C) | (~A & ~B & C) | (A & ~B & ~C) | (A & B & ~C) | (A & B & C); break; + case 212: R = (~A & B & ~C) | (A & ~B & ~C) | (A & B & ~C) | (A & B & C); break; + case 213: R = (~A & ~B & ~C) | (~A & B & ~C) | (A & ~B & ~C) | (A & B & ~C) | (A & B & C); break; + case 214: R = (~A & ~B & C) | (~A & B & ~C) | (A & ~B & ~C) | (A & B & ~C) | (A & B & C); break; + case 215: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (A & ~B & ~C) | (A & B & ~C) | (A & B & C); break; + case 216: R = (~A & B & C) | (A & ~B & ~C) | (A & B & ~C) | (A & B & C); break; + case 217: R = (~A & ~B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & B & ~C) | (A & B & C); break; + case 218: R = (~A & ~B & C) | (~A & B & C) | (A & ~B & ~C) | (A & B & ~C) | (A & B & C); break; + case 219: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & C) | (A & ~B & ~C) | (A & B & ~C) | (A & B & C); break; + case 220: R = (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & B & ~C) | (A & B & C); break; + case 221: R = (~A & ~B & ~C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & B & ~C) | (A & B & C); break; + case 222: R = (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & B & ~C) | (A & B & C); break; + case 223: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & B & ~C) | (A & B & C); break; + case 224: R = (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 225: R = (~A & ~B & ~C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 226: R = (~A & ~B & C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 227: R = (~A & ~B & ~C) | (~A & ~B & C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 228: R = (~A & B & ~C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 229: R = (~A & ~B & ~C) | (~A & B & ~C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 230: R = (~A & ~B & C) | (~A & B & ~C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 231: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 232: R = ((A & (B | C)) | (B & C)); break; + case 233: R = (~A & ~B & ~C) | (~A & B & C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 234: R = (A & B) | C; break; + case 235: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 236: R = (~A & B & ~C) | (~A & B & C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 237: R = (~A & ~B & ~C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 238: R = (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 239: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 240: R = (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 241: R = (~A & ~B & ~C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 242: R = (~A & ~B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 243: R = (~A & ~B & ~C) | (~A & ~B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 244: R = (~A & B & ~C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 245: R = (~A & ~B & ~C) | (~A & B & ~C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 246: R = (~A & ~B & C) | (~A & B & ~C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 247: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & ~C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 248: R = (~A & B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 249: R = (~A & ~B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 250: R = (~A & ~B & C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 251: R = (~A & ~B & ~C) | (~A & ~B & C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 252: R = (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 253: R = (~A & ~B & ~C) | (~A & B & ~C) | (~A & B & C) | (A & ~B & ~C) | (A & ~B & C) | (A & B & ~C) | (A & B & C); break; + case 254: R = A | B | C; break; + case 255: R = uint32_t(-1); break; + default: break; + } +} + +void asm_ternary_logic_op(uint32_t &R, uint32_t A, uint32_t B, uint32_t C, uint32_t D) { + switch (D) { + case 0: R = syclcompat::ternary_logic_op(A, B, C, 0x0); break; + case 1: R = syclcompat::ternary_logic_op(A, B, C, 0x1); break; + case 2: R = syclcompat::ternary_logic_op(A, B, C, 0x2); break; + case 3: R = syclcompat::ternary_logic_op(A, B, C, 0x3); break; + case 4: R = syclcompat::ternary_logic_op(A, B, C, 0x4); break; + case 5: R = syclcompat::ternary_logic_op(A, B, C, 0x5); break; + case 6: R = syclcompat::ternary_logic_op(A, B, C, 0x6); break; + case 7: R = syclcompat::ternary_logic_op(A, B, C, 0x7); break; + case 8: R = syclcompat::ternary_logic_op(A, B, C, 0x8); break; + case 9: R = syclcompat::ternary_logic_op(A, B, C, 0x9); break; + case 10: R = syclcompat::ternary_logic_op(A, B, C, 0xA); break; + case 11: R = syclcompat::ternary_logic_op(A, B, C, 0xB); break; + case 12: R = syclcompat::ternary_logic_op(A, B, C, 0xC); break; + case 13: R = syclcompat::ternary_logic_op(A, B, C, 0xD); break; + case 14: R = syclcompat::ternary_logic_op(A, B, C, 0xE); break; + case 15: R = syclcompat::ternary_logic_op(A, B, C, 0xF); break; + case 16: R = syclcompat::ternary_logic_op(A, B, C, 0x10); break; + case 17: R = syclcompat::ternary_logic_op(A, B, C, 0x11); break; + case 18: R = syclcompat::ternary_logic_op(A, B, C, 0x12); break; + case 19: R = syclcompat::ternary_logic_op(A, B, C, 0x13); break; + case 20: R = syclcompat::ternary_logic_op(A, B, C, 0x14); break; + case 21: R = syclcompat::ternary_logic_op(A, B, C, 0x15); break; + case 22: R = syclcompat::ternary_logic_op(A, B, C, 0x16); break; + case 23: R = syclcompat::ternary_logic_op(A, B, C, 0x17); break; + case 24: R = syclcompat::ternary_logic_op(A, B, C, 0x18); break; + case 25: R = syclcompat::ternary_logic_op(A, B, C, 0x19); break; + case 26: R = syclcompat::ternary_logic_op(A, B, C, 0x1A); break; + case 27: R = syclcompat::ternary_logic_op(A, B, C, 0x1B); break; + case 28: R = syclcompat::ternary_logic_op(A, B, C, 0x1C); break; + case 29: R = syclcompat::ternary_logic_op(A, B, C, 0x1D); break; + case 30: R = syclcompat::ternary_logic_op(A, B, C, 0x1E); break; + case 31: R = syclcompat::ternary_logic_op(A, B, C, 0x1F); break; + case 32: R = syclcompat::ternary_logic_op(A, B, C, 0x20); break; + case 33: R = syclcompat::ternary_logic_op(A, B, C, 0x21); break; + case 34: R = syclcompat::ternary_logic_op(A, B, C, 0x22); break; + case 35: R = syclcompat::ternary_logic_op(A, B, C, 0x23); break; + case 36: R = syclcompat::ternary_logic_op(A, B, C, 0x24); break; + case 37: R = syclcompat::ternary_logic_op(A, B, C, 0x25); break; + case 38: R = syclcompat::ternary_logic_op(A, B, C, 0x26); break; + case 39: R = syclcompat::ternary_logic_op(A, B, C, 0x27); break; + case 40: R = syclcompat::ternary_logic_op(A, B, C, 0x28); break; + case 41: R = syclcompat::ternary_logic_op(A, B, C, 0x29); break; + case 42: R = syclcompat::ternary_logic_op(A, B, C, 0x2A); break; + case 43: R = syclcompat::ternary_logic_op(A, B, C, 0x2B); break; + case 44: R = syclcompat::ternary_logic_op(A, B, C, 0x2C); break; + case 45: R = syclcompat::ternary_logic_op(A, B, C, 0x2D); break; + case 46: R = syclcompat::ternary_logic_op(A, B, C, 0x2E); break; + case 47: R = syclcompat::ternary_logic_op(A, B, C, 0x2F); break; + case 48: R = syclcompat::ternary_logic_op(A, B, C, 0x30); break; + case 49: R = syclcompat::ternary_logic_op(A, B, C, 0x31); break; + case 50: R = syclcompat::ternary_logic_op(A, B, C, 0x32); break; + case 51: R = syclcompat::ternary_logic_op(A, B, C, 0x33); break; + case 52: R = syclcompat::ternary_logic_op(A, B, C, 0x34); break; + case 53: R = syclcompat::ternary_logic_op(A, B, C, 0x35); break; + case 54: R = syclcompat::ternary_logic_op(A, B, C, 0x36); break; + case 55: R = syclcompat::ternary_logic_op(A, B, C, 0x37); break; + case 56: R = syclcompat::ternary_logic_op(A, B, C, 0x38); break; + case 57: R = syclcompat::ternary_logic_op(A, B, C, 0x39); break; + case 58: R = syclcompat::ternary_logic_op(A, B, C, 0x3A); break; + case 59: R = syclcompat::ternary_logic_op(A, B, C, 0x3B); break; + case 60: R = syclcompat::ternary_logic_op(A, B, C, 0x3C); break; + case 61: R = syclcompat::ternary_logic_op(A, B, C, 0x3D); break; + case 62: R = syclcompat::ternary_logic_op(A, B, C, 0x3E); break; + case 63: R = syclcompat::ternary_logic_op(A, B, C, 0x3F); break; + case 64: R = syclcompat::ternary_logic_op(A, B, C, 0x40); break; + case 65: R = syclcompat::ternary_logic_op(A, B, C, 0x41); break; + case 66: R = syclcompat::ternary_logic_op(A, B, C, 0x42); break; + case 67: R = syclcompat::ternary_logic_op(A, B, C, 0x43); break; + case 68: R = syclcompat::ternary_logic_op(A, B, C, 0x44); break; + case 69: R = syclcompat::ternary_logic_op(A, B, C, 0x45); break; + case 70: R = syclcompat::ternary_logic_op(A, B, C, 0x46); break; + case 71: R = syclcompat::ternary_logic_op(A, B, C, 0x47); break; + case 72: R = syclcompat::ternary_logic_op(A, B, C, 0x48); break; + case 73: R = syclcompat::ternary_logic_op(A, B, C, 0x49); break; + case 74: R = syclcompat::ternary_logic_op(A, B, C, 0x4A); break; + case 75: R = syclcompat::ternary_logic_op(A, B, C, 0x4B); break; + case 76: R = syclcompat::ternary_logic_op(A, B, C, 0x4C); break; + case 77: R = syclcompat::ternary_logic_op(A, B, C, 0x4D); break; + case 78: R = syclcompat::ternary_logic_op(A, B, C, 0x4E); break; + case 79: R = syclcompat::ternary_logic_op(A, B, C, 0x4F); break; + case 80: R = syclcompat::ternary_logic_op(A, B, C, 0x50); break; + case 81: R = syclcompat::ternary_logic_op(A, B, C, 0x51); break; + case 82: R = syclcompat::ternary_logic_op(A, B, C, 0x52); break; + case 83: R = syclcompat::ternary_logic_op(A, B, C, 0x53); break; + case 84: R = syclcompat::ternary_logic_op(A, B, C, 0x54); break; + case 85: R = syclcompat::ternary_logic_op(A, B, C, 0x55); break; + case 86: R = syclcompat::ternary_logic_op(A, B, C, 0x56); break; + case 87: R = syclcompat::ternary_logic_op(A, B, C, 0x57); break; + case 88: R = syclcompat::ternary_logic_op(A, B, C, 0x58); break; + case 89: R = syclcompat::ternary_logic_op(A, B, C, 0x59); break; + case 90: R = syclcompat::ternary_logic_op(A, B, C, 0x5A); break; + case 91: R = syclcompat::ternary_logic_op(A, B, C, 0x5B); break; + case 92: R = syclcompat::ternary_logic_op(A, B, C, 0x5C); break; + case 93: R = syclcompat::ternary_logic_op(A, B, C, 0x5D); break; + case 94: R = syclcompat::ternary_logic_op(A, B, C, 0x5E); break; + case 95: R = syclcompat::ternary_logic_op(A, B, C, 0x5F); break; + case 96: R = syclcompat::ternary_logic_op(A, B, C, 0x60); break; + case 97: R = syclcompat::ternary_logic_op(A, B, C, 0x61); break; + case 98: R = syclcompat::ternary_logic_op(A, B, C, 0x62); break; + case 99: R = syclcompat::ternary_logic_op(A, B, C, 0x63); break; + case 100: R = syclcompat::ternary_logic_op(A, B, C, 0x64); break; + case 101: R = syclcompat::ternary_logic_op(A, B, C, 0x65); break; + case 102: R = syclcompat::ternary_logic_op(A, B, C, 0x66); break; + case 103: R = syclcompat::ternary_logic_op(A, B, C, 0x67); break; + case 104: R = syclcompat::ternary_logic_op(A, B, C, 0x68); break; + case 105: R = syclcompat::ternary_logic_op(A, B, C, 0x69); break; + case 106: R = syclcompat::ternary_logic_op(A, B, C, 0x6A); break; + case 107: R = syclcompat::ternary_logic_op(A, B, C, 0x6B); break; + case 108: R = syclcompat::ternary_logic_op(A, B, C, 0x6C); break; + case 109: R = syclcompat::ternary_logic_op(A, B, C, 0x6D); break; + case 110: R = syclcompat::ternary_logic_op(A, B, C, 0x6E); break; + case 111: R = syclcompat::ternary_logic_op(A, B, C, 0x6F); break; + case 112: R = syclcompat::ternary_logic_op(A, B, C, 0x70); break; + case 113: R = syclcompat::ternary_logic_op(A, B, C, 0x71); break; + case 114: R = syclcompat::ternary_logic_op(A, B, C, 0x72); break; + case 115: R = syclcompat::ternary_logic_op(A, B, C, 0x73); break; + case 116: R = syclcompat::ternary_logic_op(A, B, C, 0x74); break; + case 117: R = syclcompat::ternary_logic_op(A, B, C, 0x75); break; + case 118: R = syclcompat::ternary_logic_op(A, B, C, 0x76); break; + case 119: R = syclcompat::ternary_logic_op(A, B, C, 0x77); break; + case 120: R = syclcompat::ternary_logic_op(A, B, C, 0x78); break; + case 121: R = syclcompat::ternary_logic_op(A, B, C, 0x79); break; + case 122: R = syclcompat::ternary_logic_op(A, B, C, 0x7A); break; + case 123: R = syclcompat::ternary_logic_op(A, B, C, 0x7B); break; + case 124: R = syclcompat::ternary_logic_op(A, B, C, 0x7C); break; + case 125: R = syclcompat::ternary_logic_op(A, B, C, 0x7D); break; + case 126: R = syclcompat::ternary_logic_op(A, B, C, 0x7E); break; + case 127: R = syclcompat::ternary_logic_op(A, B, C, 0x7F); break; + case 128: R = syclcompat::ternary_logic_op(A, B, C, 0x80); break; + case 129: R = syclcompat::ternary_logic_op(A, B, C, 0x81); break; + case 130: R = syclcompat::ternary_logic_op(A, B, C, 0x82); break; + case 131: R = syclcompat::ternary_logic_op(A, B, C, 0x83); break; + case 132: R = syclcompat::ternary_logic_op(A, B, C, 0x84); break; + case 133: R = syclcompat::ternary_logic_op(A, B, C, 0x85); break; + case 134: R = syclcompat::ternary_logic_op(A, B, C, 0x86); break; + case 135: R = syclcompat::ternary_logic_op(A, B, C, 0x87); break; + case 136: R = syclcompat::ternary_logic_op(A, B, C, 0x88); break; + case 137: R = syclcompat::ternary_logic_op(A, B, C, 0x89); break; + case 138: R = syclcompat::ternary_logic_op(A, B, C, 0x8A); break; + case 139: R = syclcompat::ternary_logic_op(A, B, C, 0x8B); break; + case 140: R = syclcompat::ternary_logic_op(A, B, C, 0x8C); break; + case 141: R = syclcompat::ternary_logic_op(A, B, C, 0x8D); break; + case 142: R = syclcompat::ternary_logic_op(A, B, C, 0x8E); break; + case 143: R = syclcompat::ternary_logic_op(A, B, C, 0x8F); break; + case 144: R = syclcompat::ternary_logic_op(A, B, C, 0x90); break; + case 145: R = syclcompat::ternary_logic_op(A, B, C, 0x91); break; + case 146: R = syclcompat::ternary_logic_op(A, B, C, 0x92); break; + case 147: R = syclcompat::ternary_logic_op(A, B, C, 0x93); break; + case 148: R = syclcompat::ternary_logic_op(A, B, C, 0x94); break; + case 149: R = syclcompat::ternary_logic_op(A, B, C, 0x95); break; + case 150: R = syclcompat::ternary_logic_op(A, B, C, 0x96); break; + case 151: R = syclcompat::ternary_logic_op(A, B, C, 0x97); break; + case 152: R = syclcompat::ternary_logic_op(A, B, C, 0x98); break; + case 153: R = syclcompat::ternary_logic_op(A, B, C, 0x99); break; + case 154: R = syclcompat::ternary_logic_op(A, B, C, 0x9A); break; + case 155: R = syclcompat::ternary_logic_op(A, B, C, 0x9B); break; + case 156: R = syclcompat::ternary_logic_op(A, B, C, 0x9C); break; + case 157: R = syclcompat::ternary_logic_op(A, B, C, 0x9D); break; + case 158: R = syclcompat::ternary_logic_op(A, B, C, 0x9E); break; + case 159: R = syclcompat::ternary_logic_op(A, B, C, 0x9F); break; + case 160: R = syclcompat::ternary_logic_op(A, B, C, 0xA0); break; + case 161: R = syclcompat::ternary_logic_op(A, B, C, 0xA1); break; + case 162: R = syclcompat::ternary_logic_op(A, B, C, 0xA2); break; + case 163: R = syclcompat::ternary_logic_op(A, B, C, 0xA3); break; + case 164: R = syclcompat::ternary_logic_op(A, B, C, 0xA4); break; + case 165: R = syclcompat::ternary_logic_op(A, B, C, 0xA5); break; + case 166: R = syclcompat::ternary_logic_op(A, B, C, 0xA6); break; + case 167: R = syclcompat::ternary_logic_op(A, B, C, 0xA7); break; + case 168: R = syclcompat::ternary_logic_op(A, B, C, 0xA8); break; + case 169: R = syclcompat::ternary_logic_op(A, B, C, 0xA9); break; + case 170: R = syclcompat::ternary_logic_op(A, B, C, 0xAA); break; + case 171: R = syclcompat::ternary_logic_op(A, B, C, 0xAB); break; + case 172: R = syclcompat::ternary_logic_op(A, B, C, 0xAC); break; + case 173: R = syclcompat::ternary_logic_op(A, B, C, 0xAD); break; + case 174: R = syclcompat::ternary_logic_op(A, B, C, 0xAE); break; + case 175: R = syclcompat::ternary_logic_op(A, B, C, 0xAF); break; + case 176: R = syclcompat::ternary_logic_op(A, B, C, 0xB0); break; + case 177: R = syclcompat::ternary_logic_op(A, B, C, 0xB1); break; + case 178: R = syclcompat::ternary_logic_op(A, B, C, 0xB2); break; + case 179: R = syclcompat::ternary_logic_op(A, B, C, 0xB3); break; + case 180: R = syclcompat::ternary_logic_op(A, B, C, 0xB4); break; + case 181: R = syclcompat::ternary_logic_op(A, B, C, 0xB5); break; + case 182: R = syclcompat::ternary_logic_op(A, B, C, 0xB6); break; + case 183: R = syclcompat::ternary_logic_op(A, B, C, 0xB7); break; + case 184: R = syclcompat::ternary_logic_op(A, B, C, 0xB8); break; + case 185: R = syclcompat::ternary_logic_op(A, B, C, 0xB9); break; + case 186: R = syclcompat::ternary_logic_op(A, B, C, 0xBA); break; + case 187: R = syclcompat::ternary_logic_op(A, B, C, 0xBB); break; + case 188: R = syclcompat::ternary_logic_op(A, B, C, 0xBC); break; + case 189: R = syclcompat::ternary_logic_op(A, B, C, 0xBD); break; + case 190: R = syclcompat::ternary_logic_op(A, B, C, 0xBE); break; + case 191: R = syclcompat::ternary_logic_op(A, B, C, 0xBF); break; + case 192: R = syclcompat::ternary_logic_op(A, B, C, 0xC0); break; + case 193: R = syclcompat::ternary_logic_op(A, B, C, 0xC1); break; + case 194: R = syclcompat::ternary_logic_op(A, B, C, 0xC2); break; + case 195: R = syclcompat::ternary_logic_op(A, B, C, 0xC3); break; + case 196: R = syclcompat::ternary_logic_op(A, B, C, 0xC4); break; + case 197: R = syclcompat::ternary_logic_op(A, B, C, 0xC5); break; + case 198: R = syclcompat::ternary_logic_op(A, B, C, 0xC6); break; + case 199: R = syclcompat::ternary_logic_op(A, B, C, 0xC7); break; + case 200: R = syclcompat::ternary_logic_op(A, B, C, 0xC8); break; + case 201: R = syclcompat::ternary_logic_op(A, B, C, 0xC9); break; + case 202: R = syclcompat::ternary_logic_op(A, B, C, 0xCA); break; + case 203: R = syclcompat::ternary_logic_op(A, B, C, 0xCB); break; + case 204: R = syclcompat::ternary_logic_op(A, B, C, 0xCC); break; + case 205: R = syclcompat::ternary_logic_op(A, B, C, 0xCD); break; + case 206: R = syclcompat::ternary_logic_op(A, B, C, 0xCE); break; + case 207: R = syclcompat::ternary_logic_op(A, B, C, 0xCF); break; + case 208: R = syclcompat::ternary_logic_op(A, B, C, 0xD0); break; + case 209: R = syclcompat::ternary_logic_op(A, B, C, 0xD1); break; + case 210: R = syclcompat::ternary_logic_op(A, B, C, 0xD2); break; + case 211: R = syclcompat::ternary_logic_op(A, B, C, 0xD3); break; + case 212: R = syclcompat::ternary_logic_op(A, B, C, 0xD4); break; + case 213: R = syclcompat::ternary_logic_op(A, B, C, 0xD5); break; + case 214: R = syclcompat::ternary_logic_op(A, B, C, 0xD6); break; + case 215: R = syclcompat::ternary_logic_op(A, B, C, 0xD7); break; + case 216: R = syclcompat::ternary_logic_op(A, B, C, 0xD8); break; + case 217: R = syclcompat::ternary_logic_op(A, B, C, 0xD9); break; + case 218: R = syclcompat::ternary_logic_op(A, B, C, 0xDA); break; + case 219: R = syclcompat::ternary_logic_op(A, B, C, 0xDB); break; + case 220: R = syclcompat::ternary_logic_op(A, B, C, 0xDC); break; + case 221: R = syclcompat::ternary_logic_op(A, B, C, 0xDD); break; + case 222: R = syclcompat::ternary_logic_op(A, B, C, 0xDE); break; + case 223: R = syclcompat::ternary_logic_op(A, B, C, 0xDF); break; + case 224: R = syclcompat::ternary_logic_op(A, B, C, 0xE0); break; + case 225: R = syclcompat::ternary_logic_op(A, B, C, 0xE1); break; + case 226: R = syclcompat::ternary_logic_op(A, B, C, 0xE2); break; + case 227: R = syclcompat::ternary_logic_op(A, B, C, 0xE3); break; + case 228: R = syclcompat::ternary_logic_op(A, B, C, 0xE4); break; + case 229: R = syclcompat::ternary_logic_op(A, B, C, 0xE5); break; + case 230: R = syclcompat::ternary_logic_op(A, B, C, 0xE6); break; + case 231: R = syclcompat::ternary_logic_op(A, B, C, 0xE7); break; + case 232: R = syclcompat::ternary_logic_op(A, B, C, 0xE8); break; + case 233: R = syclcompat::ternary_logic_op(A, B, C, 0xE9); break; + case 234: R = syclcompat::ternary_logic_op(A, B, C, 0xEA); break; + case 235: R = syclcompat::ternary_logic_op(A, B, C, 0xEB); break; + case 236: R = syclcompat::ternary_logic_op(A, B, C, 0xEC); break; + case 237: R = syclcompat::ternary_logic_op(A, B, C, 0xED); break; + case 238: R = syclcompat::ternary_logic_op(A, B, C, 0xEE); break; + case 239: R = syclcompat::ternary_logic_op(A, B, C, 0xEF); break; + case 240: R = syclcompat::ternary_logic_op(A, B, C, 0xF0); break; + case 241: R = syclcompat::ternary_logic_op(A, B, C, 0xF1); break; + case 242: R = syclcompat::ternary_logic_op(A, B, C, 0xF2); break; + case 243: R = syclcompat::ternary_logic_op(A, B, C, 0xF3); break; + case 244: R = syclcompat::ternary_logic_op(A, B, C, 0xF4); break; + case 245: R = syclcompat::ternary_logic_op(A, B, C, 0xF5); break; + case 246: R = syclcompat::ternary_logic_op(A, B, C, 0xF6); break; + case 247: R = syclcompat::ternary_logic_op(A, B, C, 0xF7); break; + case 248: R = syclcompat::ternary_logic_op(A, B, C, 0xF8); break; + case 249: R = syclcompat::ternary_logic_op(A, B, C, 0xF9); break; + case 250: R = syclcompat::ternary_logic_op(A, B, C, 0xFA); break; + case 251: R = syclcompat::ternary_logic_op(A, B, C, 0xFB); break; + case 252: R = syclcompat::ternary_logic_op(A, B, C, 0xFC); break; + case 253: R = syclcompat::ternary_logic_op(A, B, C, 0xFD); break; + case 254: R = syclcompat::ternary_logic_op(A, B, C, 0xFE); break; + case 255: R = syclcompat::ternary_logic_op(A, B, C, 0xFF); break; + } +} + +// clang-format on + +void ternary_logic_op(int *ec) { + uint32_t X, Y, A = 1, B = 2, C = 3, D; + for (D = 0; D < 256; ++D) { + reference_of_ternary_logic_op(X, A, B, C, D); + asm_ternary_logic_op(Y, A, B, C, D); + if (X != Y) { + *ec = D; + return; + } + } + *ec = 0; +} + +int main() { + syclcompat::device_ext &dev_ct1 = syclcompat::get_current_device(); + sycl::queue &q_ct1 = *dev_ct1.default_queue(); + int ret = 0; + int *d_ec = nullptr; + d_ec = sycl::malloc_device(1, q_ct1); + + auto wait_and_check = [&](const char *case_name) { + syclcompat::get_current_device().queues_wait_and_throw(); + int ec = 0; + syclcompat::get_default_queue().memcpy(&ec, d_ec, sizeof(int)).wait(); + if (ec != 0) + printf("Test %s failed: return code = %d\n", case_name, ec); + ret = ret || ec; + }; + + q_ct1.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { ternary_logic_op(d_ec); }); + wait_and_check("ternary_logic_op"); + + syclcompat::wait_and_free(d_ec, q_ct1); + + return ret; +} diff --git a/sycl/test/basic_tests/is_device_copyable.cpp b/sycl/test/basic_tests/is_device_copyable.cpp index 1c4199e954530..3e48bd5d77857 100644 --- a/sycl/test/basic_tests/is_device_copyable.cpp +++ b/sycl/test/basic_tests/is_device_copyable.cpp @@ -25,14 +25,6 @@ struct BCopyable { BCopyable(const BCopyable &x) : i(x.i) {} }; -// Not trivially copyable, but trivially copy constructible/destructible. -// Such types are passed to kernels to stay compatible with deprecated -// sycl 1.2.1 rules. -struct C : A { - const A C2; - C() : A{0}, C2{2} {} -}; - // Not copyable type, but it will be declared as device copyable. struct DCopyable { int i; @@ -67,7 +59,6 @@ void test() { A IamGood; IamGood.i = 0; BCopyable IamBadButCopyable(1); - C IamAlsoGood; DCopyable IamAlsoBadButCopyable{0}; marray MarrayForCopyableIsCopyable(0); range<2> Range{1,2}; @@ -78,7 +69,6 @@ void test() { int A = IamGood.i; int B = IamBadButCopyable.i; int C = IamAlsoBadButCopyable.i; - int D = IamAlsoGood.i; int E = MarrayForCopyableIsCopyable[0]; int F = Range[1]; int G = Id[2]; diff --git a/sycl/test/basic_tests/is_device_copyable_neg.cpp b/sycl/test/basic_tests/is_device_copyable_neg.cpp index 2e1ca89b3d25a..c9007685ae693 100644 --- a/sycl/test/basic_tests/is_device_copyable_neg.cpp +++ b/sycl/test/basic_tests/is_device_copyable_neg.cpp @@ -1,5 +1,4 @@ -// RUN: not %clangxx -fsycl -fsycl-device-only -fsyntax-only \ -// RUN: %s -I %sycl_include 2>&1 | FileCheck %s +// RUN: %clangxx -fsycl -fsycl-device-only -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected=warning,note %s // This test checks if compiler reports compilation error on an attempt to pass // a struct with type that is not device copyable as SYCL kernel parameter. @@ -57,6 +56,7 @@ void test() { B IamAlsoBad{0}; marray MarrayForNotCopyable; queue Q; + // expected-error@*:* {{static assertion failed due to requirement 'is_device_copyable_v': The specified type is not device copyable}} Q.single_task([=] { int A = IamBad.i; int B = IamAlsoBad.i; @@ -64,23 +64,10 @@ void test() { }); FunctorA FA; + // expected-error@*:* {{static assertion failed due to requirement 'is_device_copyable_v': The specified type is not device copyable}} Q.single_task(FA); FunctorB FB; + // expected-error@*:* {{static assertion failed due to requirement 'is_device_copyable_v': The specified type is not device copyable}} Q.single_task(FB); } - -// CHECK: static assertion failed due to requirement 'is_device_copyable_v -// CHECK: is_device_copyable_neg.cpp:60:5: note: in instantiation of function - -// CHECK: static assertion failed due to requirement 'is_device_copyable_v -// CHECK: is_device_copyable_neg.cpp:60:5: note: in instantiation of function - -// CHECK: static assertion failed due to requirement 'is_device_copyable_v> -// CHECK: is_device_copyable_neg.cpp:60:5: note: in instantiation of function - -// CHECK: static assertion failed due to requirement 'is_device_copyable_v -// CHECK: is_device_copyable_neg.cpp:67:5: note: in instantiation of function - -// CHECK: static assertion failed due to requirement 'is_device_copyable_v -// CHECK: is_device_copyable_neg.cpp:70:5: note: in instantiation of function diff --git a/sycl/test/check_device_code/native_cpu/device_builtins.cpp b/sycl/test/check_device_code/native_cpu/device_builtins.cpp new file mode 100644 index 0000000000000..cd2a392ea4b5e --- /dev/null +++ b/sycl/test/check_device_code/native_cpu/device_builtins.cpp @@ -0,0 +1,36 @@ +// REQUIRES: native_cpu_ock +// RUN: %clangxx -fsycl -fsycl-targets=native_cpu -Xclang -sycl-std=2020 -mllvm -sycl-opt -mllvm -inline-threshold=500 -O0 -mllvm -sycl-native-cpu-no-vecz -mllvm -sycl-native-dump-device-ir %s | FileCheck %s + +// RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu -fno-inline -Xclang -sycl-std=2020 -mllvm -sycl-opt -S -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-DEV + +// check that builtins are defined + +// CHECK-NOT: {{.*}}__spirv_GenericCastToPtrExplicit +// CHECK-DEV: {{.*}}__spirv_GenericCastToPtrExplicit + +#include + +using namespace sycl; +using namespace sycl::ext::oneapi::experimental; + +#define DefTestCast(FName, Space, PType) \ + SYCL_EXTERNAL auto FName(PType p) { return dynamic_address_cast(p); } + +// Turning clang format off here because it would change the indentations of +// the macro invocations making the following code difficult to read. +// clang-format off + +#define DefTestCastForSpace(PType)\ + DefTestCast(to_local, access::address_space::local_space, PType)\ + DefTestCast(to_global, access::address_space::global_space, PType)\ + DefTestCast(to_private, access::address_space::private_space, PType)\ + DefTestCast(to_generic, access::address_space::generic_space, PType) + +DefTestCastForSpace(int*) +DefTestCastForSpace(const int*) +DefTestCastForSpace(volatile int*) +DefTestCastForSpace(const volatile int*) + +int main(){} +// check that the generated module has the is-native-cpu module flag set +// CHECK: !{{[0-9]*}} = !{i32 1, !"is-native-cpu", i32 1} diff --git a/sycl/test/fpga_tests/fpga_io_pipes.cpp b/sycl/test/fpga_tests/fpga_io_pipes.cpp index 0513386f97dd1..89876fc963fb9 100644 --- a/sycl/test/fpga_tests/fpga_io_pipes.cpp +++ b/sycl/test/fpga_tests/fpga_io_pipes.cpp @@ -123,10 +123,10 @@ int main() { } // Non-blocking pipes - int Result = test_io_nb_pipe(Queue); + int Error = test_io_nb_pipe(Queue); // 0 if successful // Blocking pipes - Result &= test_io_bl_pipe(Queue); + Error |= test_io_bl_pipe(Queue); - return Result; + return Error; } diff --git a/sycl/test/self-contained-headers/lit.local.cfg b/sycl/test/self-contained-headers/lit.local.cfg index 0f0e9e15b0e53..d61551742dd85 100644 --- a/sycl/test/self-contained-headers/lit.local.cfg +++ b/sycl/test/self-contained-headers/lit.local.cfg @@ -13,25 +13,4 @@ config.sycl_headers_xfail = [ os.path.join( "sycl", "ext", "intel", "esimd", "detail", "types_elementary.hpp" ), - os.path.join( - "sycl", "ext", "oneapi", "matrix", "matrix-hip.hpp" - ), - os.path.join( - "sycl", "ext", "oneapi", "matrix", "matrix-intel.hpp" - ), - os.path.join( - "sycl", "ext", "oneapi", "matrix", "matrix-tensorcores.hpp" - ), - os.path.join( - "sycl", "ext", "oneapi", "matrix", "matrix-unified-utils.hpp" - ), - os.path.join( - "sycl", "ext", "oneapi", "matrix", "matrix-unified.hpp" - ), - os.path.join( - "sycl", "ext", "oneapi", "matrix", "matrix.hpp" - ), - os.path.join( - "sycl", "ext", "oneapi", "matrix", "static-query-use.hpp" - ), ]