Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/sycl' into sycl
Browse files Browse the repository at this point in the history
  • Loading branch information
sarnex committed Jan 21, 2025
2 parents 54c2b50 + 4c7d48d commit 759ca91
Show file tree
Hide file tree
Showing 48 changed files with 1,496 additions and 362 deletions.
1 change: 0 additions & 1 deletion .github/workflows/sycl-linux-precommit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,6 @@ jobs:

env: '{"LIT_FILTER":"PerformanceTests/"}'
extra_lit_opts: -a -j 1 --param enable-perf-tests=True
extra_cmake_args: ${{ matrix.extra_cmake_args }}

ref: ${{ github.sha }}
merge_ref: ''
Expand Down
9 changes: 4 additions & 5 deletions .github/workflows/sycl-linux-run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,9 @@ on:
- '["cts-cpu"]'
- '["Linux", "build"]'
image:
description: |
Use option ending with ":build" for AMDGPU, ":latest" for the rest.
type: choice
options:
- 'ghcr.io/intel/llvm/sycl_ubuntu2204_nightly:latest'
- 'ghcr.io/intel/llvm/sycl_ubuntu2204_nightly:build'
- 'ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest'
image_options:
description: |
Use option with "--device=/dev/kfd" for AMDGPU, without it for the rest.
Expand Down Expand Up @@ -361,7 +358,9 @@ jobs:
cts_exclude_filter=""
# If CTS_TESTS_TO_BUILD is null - use filter
if [ -z "$CTS_TESTS_TO_BUILD" ]; then
if [ "${{ contains(inputs.target_devices, 'opencl:cpu') }}" = "true" ]; then
if [ "${{ contains(inputs.cts_testing_mode, 'build-only') }}" = "true" ]; then
cts_exclude_filter=$PWD/devops/cts_exclude_filter_compfails
elif [ "${{ contains(inputs.target_devices, 'opencl:cpu') }}" = "true" ]; then
cts_exclude_filter=$PWD/devops/cts_exclude_filter_OCL_CPU
elif [ "${{ contains(inputs.target_devices, 'level_zero:gpu') }}" = "true" ]; then
cts_exclude_filter=$PWD/devops/cts_exclude_filter_L0_GPU
Expand Down
38 changes: 6 additions & 32 deletions .github/workflows/sycl-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ jobs:
body: "Daily build ${{ steps.tag.outputs.TAG }}"
target_commitish: ${{ github.sha }}

ubuntu2204_docker_build_push:
docker_build_push:
if: github.repository == 'intel/llvm'
runs-on: [Linux, build]
permissions:
Expand All @@ -254,42 +254,16 @@ jobs:
with:
name: sycl_linux_default
path: devops/
- name: Build and Push Container (with drivers)
- name: Build and Push Container
uses: ./devops/actions/build_container
with:
push: ${{ github.ref_name == 'sycl' }}
file: ubuntu2204_preinstalled
file: nightly
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
build-args: |
base_image=ghcr.io/intel/llvm/ubuntu2404_intel_drivers
base_tag=latest
base_tag=alldeps
tags: |
ghcr.io/${{ github.repository }}/sycl_ubuntu2204_nightly:${{ github.sha }}
ghcr.io/${{ github.repository }}/sycl_ubuntu2204_nightly:latest
- name: Build and Push Container (no drivers)
uses: ./devops/actions/build_container
with:
push: ${{ github.ref_name == 'sycl' }}
file: ubuntu2204_preinstalled
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
build-args: |
base_image=ghcr.io/intel/llvm/ubuntu2204_base
base_tag=latest
tags: |
ghcr.io/${{ github.repository }}/sycl_ubuntu2204_nightly:no-drivers-${{ github.sha }}
ghcr.io/${{ github.repository }}/sycl_ubuntu2204_nightly:no-drivers
- name: Build and Push Container (Build image)
uses: ./devops/actions/build_container
with:
push: ${{ github.ref_name == 'sycl' }}
file: ubuntu2204_preinstalled
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
build-args: |
base_image=ghcr.io/intel/llvm/ubuntu2204_build
base_tag=latest
tags: |
ghcr.io/${{ github.repository }}/sycl_ubuntu2204_nightly:build-${{ github.sha }}
ghcr.io/${{ github.repository }}/sycl_ubuntu2204_nightly:build
ghcr.io/${{ github.repository }}/sycl_ubuntu2404_nightly:${{ github.sha }}
ghcr.io/${{ github.repository }}/sycl_ubuntu2404_nightly:latest
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
ARG base_tag=latest
ARG base_image=ghcr.io/intel/llvm/ubuntu2204_intel_drivers
ARG base_tag=alldeps
ARG base_image=ghcr.io/intel/llvm/ubuntu2404_intel_drivers

FROM $base_image:$base_tag

Expand Down
4 changes: 4 additions & 0 deletions devops/cts_exclude_filter_compfails
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Please use "#" to add comments here.
# Do not delete the file even if it's empty.
# See https://github.com/intel/llvm/pull/16615
handler
2 changes: 1 addition & 1 deletion devops/dependencies.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
},
"igc": {
"github_tag": "v2.5.6",
"version": "2.5.6",
"version": "v2.5.6",
"url": "https://github.com/intel/intel-graphics-compiler/releases/tag/v2.5.6",
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
},
Expand Down
28 changes: 18 additions & 10 deletions libdevice/nativecpu_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,7 @@ using __nativecpu_state = native_cpu::state;

#define OCL_LOCAL __attribute__((opencl_local))
#define OCL_GLOBAL __attribute__((opencl_global))

DEVICE_EXTERNAL OCL_LOCAL void *
__spirv_GenericCastToPtrExplicit_ToLocal(void *p, int) {
return (OCL_LOCAL void *)p;
}

DEVICE_EXTERNAL OCL_GLOBAL void *
__spirv_GenericCastToPtrExplicit_ToGlobal(void *p, int) {
return (OCL_GLOBAL void *)p;
}
#define OCL_PRIVATE __attribute__((opencl_private))

DEVICE_EXTERN_C void __mux_work_group_barrier(uint32_t id, uint32_t scope,
uint32_t semantics);
Expand All @@ -61,6 +52,23 @@ __spirv_MemoryBarrier(uint32_t Memory, uint32_t Semantics) {
// Turning clang format off here because it reorders macro invocations
// making the following code very difficult to read.
// clang-format off

#define DefGenericCastToPtrExplImpl(sfx, asp, cv)\
DEVICE_EXTERNAL cv asp void *\
__spirv_GenericCastToPtrExplicit_##sfx(cv void *p ,int) {\
return (cv asp void *)p;\
}

#define DefGenericCastToPtrExpl(sfx, asp)\
DefGenericCastToPtrExplImpl(sfx, asp, )\
DefGenericCastToPtrExplImpl(sfx, asp, const)\
DefGenericCastToPtrExplImpl(sfx, asp, volatile)\
DefGenericCastToPtrExplImpl(sfx, asp, const volatile)

DefGenericCastToPtrExpl(ToPrivate, OCL_PRIVATE)
DefGenericCastToPtrExpl(ToLocal, OCL_LOCAL)
DefGenericCastToPtrExpl(ToGlobal, OCL_GLOBAL)

#define DefSubgroupBlockINTEL1(Type, PType) \
template <> \
__SYCL_CONVERGENT__ DEVICE_EXTERNAL Type \
Expand Down
2 changes: 1 addition & 1 deletion llvm/docs/requirements-hashed.txt
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ imagesize==1.4.1 \
--hash=sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b \
--hash=sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a
# via sphinx
jinja2==3.1.4 \
jinja2==3.1.5 \
--hash=sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369 \
--hash=sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d
# via
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/SYCLLowerIR/ESIMD/ESIMDOptimizeVecArgCallConv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,9 +354,8 @@ optimizeFunction(Function *OldF,
// preserve data flow equality to the original.
unsigned OldArgNo = PI.getFormalParam().getArgNo();
unsigned NewArgNo = oldArgNo2NewArgNo(OldArgNo, SretInd);
Instruction *At = nullptr;
Value *Val = NewF->getArg(NewArgNo);
StoreInst *St = new StoreInst(Val, Alloca, false, Al, At);
StoreInst *St = new StoreInst(Val, Alloca, false, Al);
NewInsts.push_back(St);
}
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1239,7 +1239,7 @@ static Instruction *addCastInstIfNeeded(Instruction *OldI, Instruction *NewI,
if (OITy != NITy) {
auto CastOpcode = CastInst::getCastOpcode(NewI, false, OITy, false);
NewI = CastInst::Create(CastOpcode, NewI, OITy,
NewI->getName() + ".cast.ty", OldI);
NewI->getName() + ".cast.ty", OldI->getIterator());
NewI->setDebugLoc(OldI->getDebugLoc());
}
return NewI;
Expand Down Expand Up @@ -1565,7 +1565,7 @@ static void translateESIMDIntrinsicCall(CallInst &CI) {
CallInst *NewCI = IntrinsicInst::Create(
NewFDecl, GenXArgs,
NewFDecl->getReturnType()->isVoidTy() ? "" : CI.getName() + ".esimd",
&CI);
CI.getIterator());
NewCI->setDebugLoc(CI.getDebugLoc());
if (DoesFunctionReturnStructure) {
IRBuilder<> Builder(&CI);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/SYCLLowerIR/GlobalOffset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ void GlobalOffsetPass::addImplicitParameterToCallers(
/* Func= */ CalleeWithImplicitParam,
/* Args= */ ImplicitOffsets,
/* NameStr= */ Twine(),
/* InsertBefore= */ CallToOld);
/* InsertBefore= */ CallToOld->getIterator());
NewCallInst->setTailCallKind(CallToOld->getTailCallKind());
NewCallInst->copyMetadata(*CallToOld);
CallToOld->replaceAllUsesWith(NewCallInst);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/SYCLLowerIR/LocalAccessorToSharedMemory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ Function *LocalAccessorToSharedMemoryPass::processKernel(Module &M,
ConstantInt::get(Type::getInt32Ty(M.getContext()), 0, false),
NFA,
},
/* NameStr= */ Twine{NFA->getName()}, InsertBefore);
/* NameStr= */ Twine{NFA->getName()}, InsertBefore->getIterator());
// Then create a bitcast to make sure the new pointer is the same type
// as the old one. This will only ever be a `i8 addrspace(3)*` to `i32
// addrspace(3)*` type of cast.
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/SYCLLowerIR/LowerInvokeSimd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,8 +461,8 @@ bool processInvokeSimdCall(CallInst *InvokeSimd,
NewInvokeSimdArgs.push_back(NewHelper);
auto ThirdArg = std::next(InvokeSimd->arg_begin(), 2);
NewInvokeSimdArgs.append(ThirdArg, InvokeSimd->arg_end());
CallInst *NewInvokeSimd =
CallInst::Create(NewInvokeSimdF, NewInvokeSimdArgs, "", InvokeSimd);
CallInst *NewInvokeSimd = CallInst::Create(
NewInvokeSimdF, NewInvokeSimdArgs, "", InvokeSimd->getIterator());
// - transfer flags, attributes (with shrinking), calling convention:
NewInvokeSimd->copyIRFlags(InvokeSimd);
NewInvokeSimd->setCallingConv(InvokeSimd->getCallingConv());
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/SYCLLowerIR/LowerWGScope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -981,7 +981,7 @@ Value *spirv::genPseudoLocalID(Instruction &Before, const Triple &TT) {
Align Alignment = M.getDataLayout().getPreferredAlign(G);
G->setAlignment(MaybeAlign(Alignment));
}
Value *Res = new LoadInst(G->getValueType(), G, "", &Before);
Value *Res = new LoadInst(G->getValueType(), G, "", Before.getIterator());
return Res;
}
}
Expand Down
87 changes: 85 additions & 2 deletions llvm/lib/SYCLLowerIR/SYCLJointMatrixTransform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace {

static constexpr char ACCESS_CHAIN[] = "_Z19__spirv_AccessChain";
static constexpr char MATRIX_TYPE[] = "spirv.CooperativeMatrixKHR";
static constexpr char MATRIX_LAYOUT[] = "joint_matrix_layout_to_spv";

Type *getInnermostType(Type *Ty) {
while (auto *ArrayTy = dyn_cast<ArrayType>(Ty))
Expand Down Expand Up @@ -184,17 +185,99 @@ bool transformAccessChain(Function *F) {
}
return ModuleChanged;
}

StoreInst *findLastStoreBeforeLoad(Value *Ptr, Instruction *Load) {
BasicBlock::iterator It(Load);
while (It != Load->getParent()->begin()) {
--It;
if (auto *Store = dyn_cast<StoreInst>(&*It))
if (Store->getPointerOperand() == Ptr)
return Store;
}
return nullptr;
}

// Per SPIR-V specification Layout of a matrix must be a constant instruction
// aka a constexpr or specialization constant. Meanwhile in SYCL headers
// layout is passed as a parameter to joint_matrix_load function, so even if
// that layout is a constant expression in the user's code - it's not possible
// to prove that to the compiler, so constant propagation will happen only
// after inlining, not in AST. That means, that with O0 layout would remain
// to be a runtime variable in LLVM IR.
// SYCL matrix layout is being mapped on SPIR-V matrix layout by
// joint_matrix_layout_to_spv function. The following routine finds calls to
// this function and replaces them with the found constant.
// This function also cleans up code, that becomes dead. Pattern of the dead
// code is stable, as user's code doesn't affect it.
bool propagateConstexprLayout(Function *F) {
llvm::SmallVector<Instruction *, 8> ToErase;
for (auto I = F->user_begin(), E = F->user_end(); I != E;) {
User *U = *I++;
auto *CI = dyn_cast<CallInst>(U);
if (!CI)
continue;
auto *Op = dyn_cast<Instruction>(CI->getArgOperand(0));
if (!Op || !isa<LoadInst>(Op))
continue;
auto *Ptr = dyn_cast<Instruction>(cast<LoadInst>(Op)->getPointerOperand());
if (!Ptr)
continue;

ConstantInt *ConstLayout = nullptr;
StoreInst *SI = findLastStoreBeforeLoad(Ptr, Op);
if (!SI)
continue;
ConstLayout = dyn_cast<ConstantInt>(SI->getValueOperand());
if (ConstLayout) {
CI->replaceAllUsesWith(ConstLayout);
ToErase.push_back(CI);
ToErase.push_back(SI);
ToErase.push_back(Op);
ToErase.push_back(Ptr);
if (auto *Cast = dyn_cast<AddrSpaceCastInst>(Ptr)) {
auto *OrigPtr = Cast->getPointerOperand();
if (auto *AI = dyn_cast<AllocaInst>(OrigPtr))
ToErase.push_back(AI);
}
}
}

// There are possible cases, when a single instruction result is used multiple
// times. For this case we have to use a vector to store such instructions
// and keep track if we have removed them before to avoid double free().
SmallPtrSet<Instruction *, 8> Erased;
for (Instruction *II : ToErase) {
if (!II->use_empty())
continue;
if (Erased.contains(II))
continue;
II->dropAllReferences();
II->eraseFromParent();
Erased.insert(II);
}
return !ToErase.empty();
}
} // namespace

PreservedAnalyses
SYCLJointMatrixTransformPass::run(Module &M, ModuleAnalysisManager &MAM) {
bool ModuleChanged = false;
llvm::SmallVector<Function *, 1> ToErase;
for (Function &F : M) {
if (!F.isDeclaration())
continue;
if (!F.isDeclaration()) {
if (F.getName() == MATRIX_LAYOUT) {
ModuleChanged |= propagateConstexprLayout(&F);
ToErase.push_back(&F);
} else
continue;
}
if (F.getName().starts_with(ACCESS_CHAIN))
ModuleChanged |= transformAccessChain(&F);
}

for (auto *F : ToErase)
if (F->users().empty())
F->eraseFromParent();

return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
Loading

0 comments on commit 759ca91

Please sign in to comment.