Skip to content

Commit

Permalink
[LinkerWrapper] Clean up options after proper forwarding (#126297)
Browse files Browse the repository at this point in the history
Summary:
Recent changes made a lot of this stuff redundant or unused, clean it up
a bit.

Also snuck in a change to pass the CUDA path since we still use it for
`fatbinary` internally.
  • Loading branch information
jhuber6 authored Feb 7, 2025
1 parent 6dbe542 commit addbb44
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 51 deletions.
11 changes: 11 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9220,13 +9220,24 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
for (StringRef Arg : LinkerArgs)
CmdArgs.push_back(Args.MakeArgString(
"--device-linker=" + TC->getTripleString() + "=" + Arg));

// Forward the LTO mode relying on the Driver's parsing.
if (C.getDriver().getOffloadLTOMode() == LTOK_Full)
CmdArgs.push_back(Args.MakeArgString(
"--device-compiler=" + TC->getTripleString() + "=-flto=full"));
else if (C.getDriver().getOffloadLTOMode() == LTOK_Thin)
CmdArgs.push_back(Args.MakeArgString(
"--device-compiler=" + TC->getTripleString() + "=-flto=thin"));
}
}

CmdArgs.push_back(
Args.MakeArgString("--host-triple=" + getToolChain().getTripleString()));
if (Args.hasArg(options::OPT_v))
CmdArgs.push_back("--wrapper-verbose");
if (Arg *A = Args.getLastArg(options::OPT_cuda_path_EQ))
CmdArgs.push_back(
Args.MakeArgString(Twine("--cuda-path=") + A->getValue()));

// Construct the link job so we can wrap around it.
Linker->ConstructJob(C, JA, Output, Inputs, Args, LinkingOutput);
Expand Down
24 changes: 12 additions & 12 deletions clang/test/Driver/linker-wrapper.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@ __attribute__((visibility("protected"), used)) int x;
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK

// NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -flto {{.*}}.o {{.*}}.o
// NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o

// RUN: clang-offload-packager -o %t.out \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-debug -O0 \
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-compiler=-g \
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK-DEBUG

// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -flto {{.*}}.o {{.*}}.o -g
// NVPTX-LINK-DEBUG: clang{{.*}} --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}-g

// RUN: clang-offload-packager -o %t.out \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
Expand All @@ -39,16 +39,16 @@ __attribute__((visibility("protected"), used)) int x;
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LINK

// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -Wl,--no-undefined {{.*}}.o {{.*}}.o

// RUN: clang-offload-packager -o %t.out \
// RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \
// RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --save-temps -O2 \
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-compiler=--save-temps \
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS

// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -flto -Wl,--no-undefined {{.*}}.o -save-temps
// AMDGPU-LTO-TEMPS: clang{{.*}} --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}-save-temps

// RUN: clang-offload-packager -o %t.out \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \
Expand All @@ -59,7 +59,7 @@ __attribute__((visibility("protected"), used)) int x;
// RUN: --linker-path=/usr/bin/ld.lld --whole-archive %t.a --no-whole-archive \
// RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CPU-LINK

// CPU-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu -march=native -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o -Wl,-Bsymbolic -shared -Wl,--whole-archive {{.*}}.a -Wl,--no-whole-archive
// CPU-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu -march=native -Wl,--no-undefined {{.*}}.o {{.*}}.o -Wl,-Bsymbolic -shared -Wl,--whole-archive {{.*}}.a -Wl,--no-whole-archive

// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o
// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu -mllvm -openmp-opt-disable \
Expand Down Expand Up @@ -148,7 +148,7 @@ __attribute__((visibility("protected"), used)) int x;
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --clang-backend \
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CLANG-BACKEND

// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o
// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -Wl,--no-undefined {{.*}}.o

// RUN: clang-offload-packager -o %t.out \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
Expand All @@ -171,8 +171,8 @@ __attribute__((visibility("protected"), used)) int x;
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
// RUN: --linker-path=/usr/bin/ld %t-on.o %t-off.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID

// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -Wl,--no-undefined {{.*}}.o {{.*}}.o
// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -Wl,--no-undefined {{.*}}.o {{.*}}.o

// RUN: clang-offload-packager -o %t-lib.out \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=generic
Expand All @@ -187,8 +187,8 @@ __attribute__((visibility("protected"), used)) int x;
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
// RUN: --linker-path=/usr/bin/ld %t1.o %t2.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=ARCH-ALL

// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a -Wl,--no-undefined {{.*}}.o {{.*}}.o
// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -Wl,--no-undefined {{.*}}.o {{.*}}.o

// RUN: clang-offload-packager -o %t.out \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \
Expand Down
10 changes: 10 additions & 0 deletions clang/test/Driver/openmp-offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,13 @@
// RUN: -fsyntax-only %s 2>&1 | FileCheck -check-prefix=CHK-SYNTAX-ONLY-ARGS %s
// CHK-SYNTAX-ONLY-ARGS: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu"{{.*}}"-fsyntax-only"
// CHK-SYNTAX-ONLY-ARGS: "-cc1" "-triple" "powerpc64le-unknown-linux"{{.*}}"-fsyntax-only"

//
// Ensure `-foffload-lto` is forwarded properly.
//
// RUN: %clang -### --target=powerpc64le-linux -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu \
// RUN: -foffload-lto %s 2>&1 | FileCheck -check-prefix=CHK-DEVICE-LTO-FULL %s
// CHK-DEVICE-LTO-FULL: clang-linker-wrapper{{.*}} "--device-compiler=powerpc64le-ibm-linux-gnu=-flto=full"
// RUN: %clang -### --target=powerpc64le-linux -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu \
// RUN: -foffload-lto=thin %s 2>&1 | FileCheck -check-prefix=CHK-DEVICE-LTO-THIN %s
// CHK-DEVICE-LTO-THIN: clang-linker-wrapper{{.*}} "--device-compiler=powerpc64le-ibm-linux-gnu=-flto=thin"
23 changes: 1 addition & 22 deletions clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -485,20 +485,16 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
if (!TempFileOrErr)
return TempFileOrErr.takeError();

StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2");
SmallVector<StringRef, 16> CmdArgs{
*ClangPath,
"--no-default-config",
"-o",
*TempFileOrErr,
Args.MakeArgString("--target=" + Triple.getTriple()),
Triple.isAMDGPU() ? Args.MakeArgString("-mcpu=" + Arch)
: Args.MakeArgString("-march=" + Arch),
Args.MakeArgString("-" + OptLevel),
};
: Args.MakeArgString("-march=" + Arch)};

// Forward all of the `--offload-opt` and similar options to the device.
CmdArgs.push_back("-flto");
for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm))
CmdArgs.append(
{"-Xlinker",
Expand Down Expand Up @@ -547,29 +543,12 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
CmdArgs.append({"-Xlinker", Args.MakeArgString(
"-mllvm=" + StringRef(Arg->getValue()))});

if (Args.hasArg(OPT_debug))
CmdArgs.push_back("-g");

if (SaveTemps)
CmdArgs.push_back("-save-temps");

if (SaveTemps && linkerSupportsLTO(Args))
CmdArgs.push_back("-Wl,--save-temps");

if (Args.hasArg(OPT_embed_bitcode))
CmdArgs.push_back("-Wl,--lto-emit-llvm");

if (Verbose)
CmdArgs.push_back("-v");

if (!CudaBinaryPath.empty())
CmdArgs.push_back(Args.MakeArgString("--cuda-path=" + CudaBinaryPath));

for (StringRef Arg : Args.getAllArgValues(OPT_ptxas_arg))
llvm::copy(
SmallVector<StringRef>({"-Xcuda-ptxas", Args.MakeArgString(Arg)}),
std::back_inserter(CmdArgs));

for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ))
CmdArgs.append({"-Xlinker", Args.MakeArgString(Arg)});
for (StringRef Arg : Args.getAllArgValues(OPT_compiler_arg_EQ))
Expand Down
22 changes: 5 additions & 17 deletions clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,9 @@ def cuda_path_EQ : Joined<["--"], "cuda-path=">,
Flags<[WrapperOnlyOption]>, MetaVarName<"<dir>">,
HelpText<"Set the system CUDA path">;
def host_triple_EQ : Joined<["--"], "host-triple=">,
Flags<[WrapperOnlyOption]>, MetaVarName<"<triple>">,
HelpText<"Triple to use for the host compilation">;
def opt_level : Joined<["--"], "opt-level=">,
Flags<[WrapperOnlyOption]>, MetaVarName<"<O0, O1, O2, or O3>">,
HelpText<"Optimization level for LTO">;
Flags<[WrapperOnlyOption]>,
MetaVarName<"<triple>">,
HelpText<"Triple to use for the host compilation">;
def device_linker_args_EQ : Joined<["--"], "device-linker=">,
Flags<[WrapperOnlyOption]>, MetaVarName<"<value> or <triple>=<value>">,
HelpText<"Arguments to pass to the device linker invocation">;
Expand All @@ -34,18 +32,8 @@ def dry_run : Flag<["--"], "dry-run">,
def verbose : Flag<["--"], "wrapper-verbose">,
Flags<[WrapperOnlyOption]>, HelpText<"Verbose output from tools">;
def embed_bitcode : Flag<["--"], "embed-bitcode">,
Flags<[WrapperOnlyOption]>, HelpText<"Embed linked bitcode in the module">;
def debug : Flag<["--"], "device-debug">, Flags<[WrapperOnlyOption]>,
HelpText<"Use debugging">;
def ptxas_arg : Joined<["--"], "ptxas-arg=">,
Flags<[WrapperOnlyOption]>,
HelpText<"Argument to pass to the 'ptxas' invocation">;
def pass_remarks_EQ : Joined<["--"], "pass-remarks=">,
Flags<[WrapperOnlyOption]>, HelpText<"Pass remarks for LTO">;
def pass_remarks_missed_EQ : Joined<["--"], "pass-remarks-missed=">,
Flags<[WrapperOnlyOption]>, HelpText<"Pass remarks for LTO">;
def pass_remarks_analysis_EQ : Joined<["--"], "pass-remarks-analysis=">,
Flags<[WrapperOnlyOption]>, HelpText<"Pass remarks for LTO">;
Flags<[WrapperOnlyOption]>,
HelpText<"Embed linked bitcode in the module">;
def print_wrapped_module : Flag<["--"], "print-wrapped-module">,
Flags<[WrapperOnlyOption]>,
HelpText<"Print the wrapped module's IR for testing">;
Expand Down

0 comments on commit addbb44

Please sign in to comment.