diff --git a/src/bootstrap/bolt.rs b/src/bootstrap/bolt.rs new file mode 100644 index 0000000000000..ea37cd47049bf --- /dev/null +++ b/src/bootstrap/bolt.rs @@ -0,0 +1,71 @@ +use std::path::Path; +use std::process::Command; + +/// Uses the `llvm-bolt` binary to instrument the binary/library at the given `path` with BOLT. +/// When the instrumented artifact is executed, it will generate BOLT profiles into +/// `/tmp/prof.fdata..fdata`. +pub fn instrument_with_bolt_inplace(path: &Path) { + let dir = std::env::temp_dir(); + let instrumented_path = dir.join("instrumented.so"); + + let status = Command::new("llvm-bolt") + .arg("-instrument") + .arg(&path) + // Make sure that each process will write its profiles into a separate file + .arg("--instrumentation-file-append-pid") + .arg("-o") + .arg(&instrumented_path) + .status() + .expect("Could not instrument artifact using BOLT"); + + if !status.success() { + panic!("Could not instrument {} with BOLT, exit code {:?}", path.display(), status.code()); + } + + std::fs::copy(&instrumented_path, path).expect("Cannot copy instrumented artifact"); + std::fs::remove_file(instrumented_path).expect("Cannot delete instrumented artifact"); +} + +/// Uses the `llvm-bolt` binary to optimize the binary/library at the given `path` with BOLT, +/// using merged profiles from `profile_path`. +/// +/// The recorded profiles have to be merged using the `merge-fdata` tool from LLVM and the merged +/// profile path should be then passed to this function. +pub fn optimize_library_with_bolt_inplace(path: &Path, profile_path: &Path) { + let dir = std::env::temp_dir(); + let optimized_path = dir.join("optimized.so"); + + let status = Command::new("llvm-bolt") + .arg(&path) + .arg("-data") + .arg(&profile_path) + .arg("-o") + .arg(&optimized_path) + // Reorder basic blocks within functions + .arg("-reorder-blocks=ext-tsp") + // Reorder functions within the binary + .arg("-reorder-functions=hfsort+") + // Split function code into hot and code regions + .arg("-split-functions=2") + // Split as many basic blocks as possible + .arg("-split-all-cold") + // Move jump tables to a separate section + .arg("-jump-tables=move") + // Use GNU_STACK program header for new segment (workaround for issues with strip/objcopy) + .arg("-use-gnu-stack") + // Fold functions with identical code + .arg("-icf=1") + // Update DWARF debug info in the final binary + .arg("-update-debug-sections") + // Print optimization statistics + .arg("-dyno-stats") + .status() + .expect("Could not optimize artifact using BOLT"); + + if !status.success() { + panic!("Could not optimize {} with BOLT, exit code {:?}", path.display(), status.code()); + } + + std::fs::copy(&optimized_path, path).expect("Cannot copy optimized artifact"); + std::fs::remove_file(optimized_path).expect("Cannot delete optimized artifact"); +} diff --git a/src/bootstrap/config.rs b/src/bootstrap/config.rs index 74530dec97b70..e40350ac31d71 100644 --- a/src/bootstrap/config.rs +++ b/src/bootstrap/config.rs @@ -159,6 +159,8 @@ pub struct Config { pub llvm_profile_use: Option, pub llvm_profile_generate: bool, pub llvm_libunwind_default: Option, + pub llvm_bolt_profile_generate: bool, + pub llvm_bolt_profile_use: Option, pub build: TargetSelection, pub hosts: Vec, @@ -803,6 +805,15 @@ impl Config { } config.llvm_profile_use = flags.llvm_profile_use; config.llvm_profile_generate = flags.llvm_profile_generate; + config.llvm_bolt_profile_generate = flags.llvm_bolt_profile_generate; + config.llvm_bolt_profile_use = flags.llvm_bolt_profile_use; + + if config.llvm_bolt_profile_generate && config.llvm_bolt_profile_use.is_some() { + eprintln!( + "Cannot use both `llvm_bolt_profile_generate` and `llvm_bolt_profile_use` at the same time" + ); + crate::detail_exit(1); + } // Infer the rest of the configuration. diff --git a/src/bootstrap/dist.rs b/src/bootstrap/dist.rs index f387496883bbc..e85582d76c9e3 100644 --- a/src/bootstrap/dist.rs +++ b/src/bootstrap/dist.rs @@ -2157,6 +2157,10 @@ impl Step for ReproducibleArtifacts { tarball.add_file(path, ".", 0o644); added_anything = true; } + if let Some(path) = builder.config.llvm_bolt_profile_use.as_ref() { + tarball.add_file(path, ".", 0o644); + added_anything = true; + } if added_anything { Some(tarball.generate()) } else { None } } } diff --git a/src/bootstrap/flags.rs b/src/bootstrap/flags.rs index 802b49d748ac6..ee341a353ac47 100644 --- a/src/bootstrap/flags.rs +++ b/src/bootstrap/flags.rs @@ -78,6 +78,8 @@ pub struct Flags { // // llvm_out/build/profiles/ is the location this writes to. pub llvm_profile_generate: bool, + pub llvm_bolt_profile_generate: bool, + pub llvm_bolt_profile_use: Option, } #[derive(Debug)] @@ -255,6 +257,8 @@ To learn more about a subcommand, run `./x.py -h`", opts.optmulti("D", "", "deny certain clippy lints", "OPT"); opts.optmulti("W", "", "warn about certain clippy lints", "OPT"); opts.optmulti("F", "", "forbid certain clippy lints", "OPT"); + opts.optflag("", "llvm-bolt-profile-generate", "generate BOLT profile for LLVM build"); + opts.optopt("", "llvm-bolt-profile-use", "use BOLT profile for LLVM build", "PROFILE"); // We can't use getopt to parse the options until we have completed specifying which // options are valid, but under the current implementation, some options are conditional on @@ -691,6 +695,8 @@ Arguments: rust_profile_generate: matches.opt_str("rust-profile-generate"), llvm_profile_use: matches.opt_str("llvm-profile-use"), llvm_profile_generate: matches.opt_present("llvm-profile-generate"), + llvm_bolt_profile_generate: matches.opt_present("llvm-bolt-profile-generate"), + llvm_bolt_profile_use: matches.opt_str("llvm-bolt-profile-use"), } } } diff --git a/src/bootstrap/lib.rs b/src/bootstrap/lib.rs index 572adeb6420eb..7f749beffdebd 100644 --- a/src/bootstrap/lib.rs +++ b/src/bootstrap/lib.rs @@ -122,6 +122,7 @@ use crate::util::{ check_run, exe, libdir, mtime, output, run, run_suppressed, try_run, try_run_suppressed, CiEnv, }; +mod bolt; mod builder; mod cache; mod cc_detect; diff --git a/src/bootstrap/native.rs b/src/bootstrap/native.rs index d6ee6d489cf04..c7cb609d3edeb 100644 --- a/src/bootstrap/native.rs +++ b/src/bootstrap/native.rs @@ -16,6 +16,7 @@ use std::io; use std::path::{Path, PathBuf}; use std::process::Command; +use crate::bolt::{instrument_with_bolt_inplace, optimize_library_with_bolt_inplace}; use crate::builder::{Builder, RunConfig, ShouldRun, Step}; use crate::config::TargetSelection; use crate::util::get_clang_cl_resource_dir; @@ -395,6 +396,12 @@ impl Step for Llvm { if let Some(path) = builder.config.llvm_profile_use.as_ref() { cfg.define("LLVM_PROFDATA_FILE", &path); } + if builder.config.llvm_bolt_profile_generate + || builder.config.llvm_bolt_profile_use.is_some() + { + // Relocations are required for BOLT to work. + ldflags.push_all("-Wl,-q"); + } // Disable zstd to avoid a dependency on libzstd.so. cfg.define("LLVM_ENABLE_ZSTD", "OFF"); @@ -563,12 +570,34 @@ impl Step for Llvm { } } + // After LLVM is built, we modify (instrument or optimize) the libLLVM.so library file + // in place. This is fine, because currently we do not support incrementally rebuilding + // LLVM after a configuration change, so to rebuild it the build files have to be removed, + // which will also remove these modified files. + if builder.config.llvm_bolt_profile_generate { + instrument_with_bolt_inplace(&get_built_llvm_lib_path(&build_llvm_config)); + } + if let Some(path) = &builder.config.llvm_bolt_profile_use { + optimize_library_with_bolt_inplace( + &get_built_llvm_lib_path(&build_llvm_config), + &Path::new(path), + ); + } + t!(stamp.write()); build_llvm_config } } +/// Returns path to a built LLVM library (libLLVM.so). +/// Assumes that we have built LLVM into a single library file. +fn get_built_llvm_lib_path(llvm_config_path: &Path) -> PathBuf { + let mut cmd = Command::new(llvm_config_path); + cmd.arg("--libfiles"); + PathBuf::from(output(&mut cmd).trim()) +} + fn check_llvm_version(builder: &Builder<'_>, llvm_config: &Path) { if !builder.config.llvm_version_check { return; diff --git a/src/ci/docker/host-x86_64/dist-x86_64-linux/build-clang.sh b/src/ci/docker/host-x86_64/dist-x86_64-linux/build-clang.sh index fa780e1e45ea6..9abfd4e973115 100755 --- a/src/ci/docker/host-x86_64/dist-x86_64-linux/build-clang.sh +++ b/src/ci/docker/host-x86_64/dist-x86_64-linux/build-clang.sh @@ -22,7 +22,7 @@ INC="/rustroot/include:/usr/include" # We need compiler-rt for the profile runtime (used later to PGO the LLVM build) # but sanitizers aren't currently building. Since we don't need those, just -# disable them. +# disable them. BOLT is used for optimizing LLVM. hide_output \ cmake ../llvm \ -DCMAKE_C_COMPILER=/rustroot/bin/gcc \ @@ -36,7 +36,7 @@ hide_output \ -DLLVM_INCLUDE_BENCHMARKS=OFF \ -DLLVM_INCLUDE_TESTS=OFF \ -DLLVM_INCLUDE_EXAMPLES=OFF \ - -DLLVM_ENABLE_PROJECTS="clang;lld;compiler-rt" \ + -DLLVM_ENABLE_PROJECTS="clang;lld;compiler-rt;bolt" \ -DC_INCLUDE_DIRS="$INC" hide_output make -j$(nproc) diff --git a/src/ci/pgo.sh b/src/ci/pgo.sh index b60b7868d068a..cbe32920a7458 100755 --- a/src/ci/pgo.sh +++ b/src/ci/pgo.sh @@ -190,11 +190,40 @@ rm -r $RUSTC_PROFILE_DIRECTORY_ROOT # directories ourselves. rm -r $BUILD_ARTIFACTS/llvm $BUILD_ARTIFACTS/lld -# This produces the actual final set of artifacts, using both the LLVM and rustc -# collected profiling data. -$@ \ - --rust-profile-use=${RUSTC_PROFILE_MERGED_FILE} \ - --llvm-profile-use=${LLVM_PROFILE_MERGED_FILE} +if isLinux; then + # Gather BOLT profile (BOLT is currently only available on Linux) + python3 ../x.py build --target=$PGO_HOST --host=$PGO_HOST \ + --stage 2 library/std \ + --llvm-profile-use=${LLVM_PROFILE_MERGED_FILE} \ + --llvm-bolt-profile-generate + + BOLT_PROFILE_MERGED_FILE=/tmp/bolt.profdata + + # Here we're profiling Bolt. + gather_profiles "Check,Debug,Opt" "Full" \ + "syn-1.0.89,serde-1.0.136,ripgrep-13.0.0,regex-1.5.5,clap-3.1.6,hyper-0.14.18" + + merge-fdata /tmp/prof.fdata* > ${BOLT_PROFILE_MERGED_FILE} + + echo "BOLT statistics" + du -sh /tmp/prof.fdata* + du -sh ${BOLT_PROFILE_MERGED_FILE} + echo "Profile file count" + find /tmp/prof.fdata* -type f | wc -l + + rm -r $BUILD_ARTIFACTS/llvm $BUILD_ARTIFACTS/lld + + # This produces the actual final set of artifacts, using both the LLVM and rustc + # collected profiling data. + $@ \ + --rust-profile-use=${RUSTC_PROFILE_MERGED_FILE} \ + --llvm-profile-use=${LLVM_PROFILE_MERGED_FILE} \ + --llvm-bolt-profile-use=${BOLT_PROFILE_MERGED_FILE} +else + $@ \ + --rust-profile-use=${RUSTC_PROFILE_MERGED_FILE} \ + --llvm-profile-use=${LLVM_PROFILE_MERGED_FILE} +fi echo "Rustc binary size" ls -la ./build/$PGO_HOST/stage2/bin