From ca01838fbd6a90c2ddfe1c1b7d1bc1739aa0dcfc Mon Sep 17 00:00:00 2001 From: lzydmxy <13126752315@163.com> Date: Wed, 11 Sep 2024 17:30:21 +0800 Subject: [PATCH 1/2] Support Dwarf 5 --- base/daemon/BaseDaemon.cpp | 2 +- cmake/warnings.cmake | 1 + contrib/boost | 2 +- contrib/boost-cmake/CMakeLists.txt | 30 - contrib/libcxx | 2 +- contrib/libcxx-cmake/CMakeLists.txt | 122 +-- contrib/libcxxabi | 2 +- contrib/libcxxabi-cmake/CMakeLists.txt | 55 +- src/Common/Dwarf.cpp | 1079 +++++++++++++++++++----- src/Common/Dwarf.h | 224 ++--- src/Common/Elf.cpp | 66 +- src/Common/Elf.h | 26 +- 12 files changed, 1173 insertions(+), 438 deletions(-) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 05bd9f0207e..30826c7b729 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -736,7 +736,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() std::string executable_path = getExecutablePath(); if (!executable_path.empty()) - stored_binary_hash = RK::Elf(executable_path).getBinaryHash(); + stored_binary_hash = RK::Elf(executable_path).getStoredBinaryHash(); #endif } diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index 1fdfbba0bb9..5289922804f 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -90,6 +90,7 @@ if (WEVERYTHING) no_warning(vla) no_warning(weak-template-vtables) no_warning(weak-vtables) + no_warning(thread-safety-negative) # experimental flag, too many false positives # XXX: libstdc++ has some of these for 3way compare if (NOT USE_LIBCXX) diff --git a/contrib/boost b/contrib/boost index ee24fa55bc4..03d9ec9cd15 160000 --- a/contrib/boost +++ b/contrib/boost @@ -1 +1 @@ -Subproject commit ee24fa55bc46e4d2ce7d0d052cc5a0d9b1be8c36 +Subproject commit 03d9ec9cd159d14bd0b17c05138098451a1ea606 diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index 6998136926a..3445e0a3f86 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -119,32 +119,6 @@ if (NOT EXTERNAL_BOOST_FOUND) add_library (boost::program_options ALIAS _boost_program_options) target_include_directories (_boost_program_options SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}) - # regex - - set (SRCS_REGEX - ${LIBRARY_DIR}/libs/regex/src/c_regex_traits.cpp - ${LIBRARY_DIR}/libs/regex/src/cpp_regex_traits.cpp - ${LIBRARY_DIR}/libs/regex/src/cregex.cpp - ${LIBRARY_DIR}/libs/regex/src/fileiter.cpp - ${LIBRARY_DIR}/libs/regex/src/icu.cpp - ${LIBRARY_DIR}/libs/regex/src/instances.cpp - ${LIBRARY_DIR}/libs/regex/src/internals.hpp - ${LIBRARY_DIR}/libs/regex/src/posix_api.cpp - ${LIBRARY_DIR}/libs/regex/src/regex_debug.cpp - ${LIBRARY_DIR}/libs/regex/src/regex_raw_buffer.cpp - ${LIBRARY_DIR}/libs/regex/src/regex_traits_defaults.cpp - ${LIBRARY_DIR}/libs/regex/src/regex.cpp - ${LIBRARY_DIR}/libs/regex/src/static_mutex.cpp - ${LIBRARY_DIR}/libs/regex/src/usinstances.cpp - ${LIBRARY_DIR}/libs/regex/src/w32_regex_traits.cpp - ${LIBRARY_DIR}/libs/regex/src/wc_regex_traits.cpp - ${LIBRARY_DIR}/libs/regex/src/wide_posix_api.cpp - ${LIBRARY_DIR}/libs/regex/src/winstances.cpp - ) - - add_library (_boost_regex ${SRCS_REGEX}) - add_library (boost::regex ALIAS _boost_regex) - target_include_directories (_boost_regex PRIVATE ${LIBRARY_DIR}) # system @@ -173,7 +147,6 @@ if (NOT EXTERNAL_BOOST_FOUND) ${LIBRARY_DIR}/libs/context/src/fiber.cpp ${LIBRARY_DIR}/libs/context/src/continuation.cpp ${LIBRARY_DIR}/libs/context/src/dummy.cpp - ${LIBRARY_DIR}/libs/context/src/execution_context.cpp ${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp ) elseif (ARCH_ARM) @@ -182,7 +155,6 @@ if (NOT EXTERNAL_BOOST_FOUND) ${LIBRARY_DIR}/libs/context/src/asm/make_arm64_aapcs_elf_gas.S ${LIBRARY_DIR}/libs/context/src/asm/ontop_arm64_aapcs_elf_gas.S ${LIBRARY_DIR}/libs/context/src/dummy.cpp - ${LIBRARY_DIR}/libs/context/src/execution_context.cpp ${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp ) elseif(OS_DARWIN) @@ -191,7 +163,6 @@ if (NOT EXTERNAL_BOOST_FOUND) ${LIBRARY_DIR}/libs/context/src/asm/make_x86_64_sysv_macho_gas.S ${LIBRARY_DIR}/libs/context/src/asm/ontop_x86_64_sysv_macho_gas.S ${LIBRARY_DIR}/libs/context/src/dummy.cpp - ${LIBRARY_DIR}/libs/context/src/execution_context.cpp ${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp ) else() @@ -200,7 +171,6 @@ if (NOT EXTERNAL_BOOST_FOUND) ${LIBRARY_DIR}/libs/context/src/asm/make_x86_64_sysv_elf_gas.S ${LIBRARY_DIR}/libs/context/src/asm/ontop_x86_64_sysv_elf_gas.S ${LIBRARY_DIR}/libs/context/src/dummy.cpp - ${LIBRARY_DIR}/libs/context/src/execution_context.cpp ${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp ) endif() diff --git a/contrib/libcxx b/contrib/libcxx index 8b80a151d12..9a457ab3c64 160000 --- a/contrib/libcxx +++ b/contrib/libcxx @@ -1 +1 @@ -Subproject commit 8b80a151d12b98ffe2d0c22f7cec12c3b9ff88d7 +Subproject commit 9a457ab3c64a533a06922b386b284215c17ce627 diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index e4f5bb5dcb6..3741e5f0192 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -3,52 +3,61 @@ include(CheckCXXCompilerFlag) set(LIBCXX_SOURCE_DIR ${RaftKeeper_SOURCE_DIR}/contrib/libcxx) set(SRCS -${LIBCXX_SOURCE_DIR}/src/algorithm.cpp -${LIBCXX_SOURCE_DIR}/src/any.cpp -${LIBCXX_SOURCE_DIR}/src/atomic.cpp -${LIBCXX_SOURCE_DIR}/src/barrier.cpp -${LIBCXX_SOURCE_DIR}/src/bind.cpp -${LIBCXX_SOURCE_DIR}/src/charconv.cpp -${LIBCXX_SOURCE_DIR}/src/chrono.cpp -${LIBCXX_SOURCE_DIR}/src/condition_variable.cpp -${LIBCXX_SOURCE_DIR}/src/condition_variable_destructor.cpp -${LIBCXX_SOURCE_DIR}/src/debug.cpp -${LIBCXX_SOURCE_DIR}/src/exception.cpp -${LIBCXX_SOURCE_DIR}/src/experimental/memory_resource.cpp -${LIBCXX_SOURCE_DIR}/src/filesystem/directory_iterator.cpp -${LIBCXX_SOURCE_DIR}/src/filesystem/int128_builtins.cpp -${LIBCXX_SOURCE_DIR}/src/filesystem/operations.cpp -${LIBCXX_SOURCE_DIR}/src/functional.cpp -${LIBCXX_SOURCE_DIR}/src/future.cpp -${LIBCXX_SOURCE_DIR}/src/hash.cpp -${LIBCXX_SOURCE_DIR}/src/ios.cpp -${LIBCXX_SOURCE_DIR}/src/ios.instantiations.cpp -${LIBCXX_SOURCE_DIR}/src/iostream.cpp -${LIBCXX_SOURCE_DIR}/src/locale.cpp -${LIBCXX_SOURCE_DIR}/src/memory.cpp -${LIBCXX_SOURCE_DIR}/src/mutex.cpp -${LIBCXX_SOURCE_DIR}/src/mutex_destructor.cpp -${LIBCXX_SOURCE_DIR}/src/new.cpp -${LIBCXX_SOURCE_DIR}/src/optional.cpp -${LIBCXX_SOURCE_DIR}/src/random.cpp -${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp -${LIBCXX_SOURCE_DIR}/src/regex.cpp -${LIBCXX_SOURCE_DIR}/src/shared_mutex.cpp -${LIBCXX_SOURCE_DIR}/src/stdexcept.cpp -${LIBCXX_SOURCE_DIR}/src/string.cpp -${LIBCXX_SOURCE_DIR}/src/strstream.cpp -${LIBCXX_SOURCE_DIR}/src/system_error.cpp -${LIBCXX_SOURCE_DIR}/src/thread.cpp -${LIBCXX_SOURCE_DIR}/src/typeinfo.cpp -${LIBCXX_SOURCE_DIR}/src/utility.cpp -${LIBCXX_SOURCE_DIR}/src/valarray.cpp -${LIBCXX_SOURCE_DIR}/src/variant.cpp -${LIBCXX_SOURCE_DIR}/src/vector.cpp +"${LIBCXX_SOURCE_DIR}/src/algorithm.cpp" +"${LIBCXX_SOURCE_DIR}/src/any.cpp" +"${LIBCXX_SOURCE_DIR}/src/atomic.cpp" +"${LIBCXX_SOURCE_DIR}/src/barrier.cpp" +"${LIBCXX_SOURCE_DIR}/src/bind.cpp" +"${LIBCXX_SOURCE_DIR}/src/charconv.cpp" +"${LIBCXX_SOURCE_DIR}/src/chrono.cpp" +"${LIBCXX_SOURCE_DIR}/src/condition_variable.cpp" +"${LIBCXX_SOURCE_DIR}/src/condition_variable_destructor.cpp" +"${LIBCXX_SOURCE_DIR}/src/debug.cpp" +"${LIBCXX_SOURCE_DIR}/src/exception.cpp" +"${LIBCXX_SOURCE_DIR}/src/experimental/memory_resource.cpp" +"${LIBCXX_SOURCE_DIR}/src/filesystem/directory_iterator.cpp" +"${LIBCXX_SOURCE_DIR}/src/filesystem/int128_builtins.cpp" +"${LIBCXX_SOURCE_DIR}/src/filesystem/operations.cpp" +"${LIBCXX_SOURCE_DIR}/src/format.cpp" +"${LIBCXX_SOURCE_DIR}/src/functional.cpp" +"${LIBCXX_SOURCE_DIR}/src/future.cpp" +"${LIBCXX_SOURCE_DIR}/src/hash.cpp" +"${LIBCXX_SOURCE_DIR}/src/ios.cpp" +"${LIBCXX_SOURCE_DIR}/src/ios.instantiations.cpp" +"${LIBCXX_SOURCE_DIR}/src/iostream.cpp" +"${LIBCXX_SOURCE_DIR}/src/legacy_debug_handler.cpp" +"${LIBCXX_SOURCE_DIR}/src/legacy_pointer_safety.cpp" +"${LIBCXX_SOURCE_DIR}/src/locale.cpp" +"${LIBCXX_SOURCE_DIR}/src/memory.cpp" +"${LIBCXX_SOURCE_DIR}/src/mutex.cpp" +"${LIBCXX_SOURCE_DIR}/src/mutex_destructor.cpp" +"${LIBCXX_SOURCE_DIR}/src/new.cpp" +"${LIBCXX_SOURCE_DIR}/src/optional.cpp" +"${LIBCXX_SOURCE_DIR}/src/random.cpp" +"${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp" +"${LIBCXX_SOURCE_DIR}/src/regex.cpp" +"${LIBCXX_SOURCE_DIR}/src/ryu/d2fixed.cpp" +"${LIBCXX_SOURCE_DIR}/src/ryu/d2s.cpp" +"${LIBCXX_SOURCE_DIR}/src/ryu/f2s.cpp" +"${LIBCXX_SOURCE_DIR}/src/shared_mutex.cpp" +"${LIBCXX_SOURCE_DIR}/src/stdexcept.cpp" +"${LIBCXX_SOURCE_DIR}/src/string.cpp" +"${LIBCXX_SOURCE_DIR}/src/strstream.cpp" +"${LIBCXX_SOURCE_DIR}/src/system_error.cpp" +"${LIBCXX_SOURCE_DIR}/src/thread.cpp" +"${LIBCXX_SOURCE_DIR}/src/typeinfo.cpp" +"${LIBCXX_SOURCE_DIR}/src/utility.cpp" +"${LIBCXX_SOURCE_DIR}/src/valarray.cpp" +"${LIBCXX_SOURCE_DIR}/src/variant.cpp" +"${LIBCXX_SOURCE_DIR}/src/vector.cpp" +"${LIBCXX_SOURCE_DIR}/src/verbose_abort.cpp" ) add_library(cxx ${SRCS}) +set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake") -target_include_directories(cxx SYSTEM BEFORE PUBLIC $) +target_include_directories(cxx SYSTEM BEFORE PRIVATE $) +target_include_directories(cxx SYSTEM BEFORE PUBLIC $) target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI) # Enable capturing stack traces for all exceptions. @@ -56,17 +65,34 @@ if (USE_UNWIND) target_compile_definitions(cxx PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) endif () +if (USE_MUSL) + target_compile_definitions(cxx PUBLIC -D_LIBCPP_HAS_MUSL_LIBC=1) +endif () + +# Override the deduced attribute support that causes error. +if (OS_DARWIN AND COMPILER_GCC) + add_compile_definitions(_LIBCPP_INIT_PRIORITY_MAX) +endif () + target_compile_options(cxx PUBLIC $<$:-nostdinc++>) # Third party library may have substandard code. target_compile_options(cxx PRIVATE -w) +# Enable support for Clang-Thread-Safety-Analysis in libcxx +target_compile_definitions(cxx PUBLIC -D_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS) + target_link_libraries(cxx PUBLIC cxxabi) +# For __udivmodti4, __divmodti4. +if (OS_DARWIN AND COMPILER_GCC) + target_link_libraries(cxx PRIVATE gcc) +endif () + install( - TARGETS cxx - EXPORT global - ARCHIVE DESTINATION lib - RUNTIME DESTINATION lib - LIBRARY DESTINATION lib -) + TARGETS cxx + EXPORT global + ARCHIVE DESTINATION lib + RUNTIME DESTINATION lib + LIBRARY DESTINATION lib +) \ No newline at end of file diff --git a/contrib/libcxxabi b/contrib/libcxxabi index df8f1e727db..65ba93a0ace 160000 --- a/contrib/libcxxabi +++ b/contrib/libcxxabi @@ -1 +1 @@ -Subproject commit df8f1e727dbc9e2bedf2282096fa189dc3fe0076 +Subproject commit 65ba93a0ace1abd7deced33b8c907f27714ce350 diff --git a/contrib/libcxxabi-cmake/CMakeLists.txt b/contrib/libcxxabi-cmake/CMakeLists.txt index 0375cf165e6..21baf2cbb34 100644 --- a/contrib/libcxxabi-cmake/CMakeLists.txt +++ b/contrib/libcxxabi-cmake/CMakeLists.txt @@ -1,34 +1,37 @@ set(LIBCXXABI_SOURCE_DIR ${RaftKeeper_SOURCE_DIR}/contrib/libcxxabi) set(SRCS -${LIBCXXABI_SOURCE_DIR}/src/stdlib_stdexcept.cpp -${LIBCXXABI_SOURCE_DIR}/src/cxa_virtual.cpp -${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp -${LIBCXXABI_SOURCE_DIR}/src/fallback_malloc.cpp -${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp -${LIBCXXABI_SOURCE_DIR}/src/cxa_default_handlers.cpp -${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp -${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp -${LIBCXXABI_SOURCE_DIR}/src/abort_message.cpp -${LIBCXXABI_SOURCE_DIR}/src/cxa_demangle.cpp -${LIBCXXABI_SOURCE_DIR}/src/cxa_exception.cpp -${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp -${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp -${LIBCXXABI_SOURCE_DIR}/src/private_typeinfo.cpp -${LIBCXXABI_SOURCE_DIR}/src/stdlib_typeinfo.cpp -${LIBCXXABI_SOURCE_DIR}/src/cxa_aux_runtime.cpp -${LIBCXXABI_SOURCE_DIR}/src/cxa_vector.cpp -${LIBCXXABI_SOURCE_DIR}/src/stdlib_new_delete.cpp + "${LIBCXXABI_SOURCE_DIR}/src/abort_message.cpp" + "${LIBCXXABI_SOURCE_DIR}/src/cxa_aux_runtime.cpp" + "${LIBCXXABI_SOURCE_DIR}/src/cxa_default_handlers.cpp" + "${LIBCXXABI_SOURCE_DIR}/src/cxa_demangle.cpp" + "${LIBCXXABI_SOURCE_DIR}/src/cxa_exception.cpp" + "${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp" + "${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp" + "${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp" + # "${LIBCXXABI_SOURCE_DIR}/src/cxa_noexception.cpp" + "${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp" + "${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp" + "${LIBCXXABI_SOURCE_DIR}/src/cxa_vector.cpp" + "${LIBCXXABI_SOURCE_DIR}/src/cxa_virtual.cpp" + "${LIBCXXABI_SOURCE_DIR}/src/fallback_malloc.cpp" + "${LIBCXXABI_SOURCE_DIR}/src/private_typeinfo.cpp" + "${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp" + "${LIBCXXABI_SOURCE_DIR}/src/stdlib_new_delete.cpp" + "${LIBCXXABI_SOURCE_DIR}/src/stdlib_stdexcept.cpp" + "${LIBCXXABI_SOURCE_DIR}/src/stdlib_typeinfo.cpp" ) add_library(cxxabi ${SRCS}) +set_target_properties(cxxabi PROPERTIES FOLDER "contrib/libcxxabi-cmake") # Third party library may have substandard code. target_compile_options(cxxabi PRIVATE -w) target_include_directories(cxxabi SYSTEM BEFORE - PUBLIC $ - PRIVATE $ + PUBLIC $ + PRIVATE $ + PRIVATE $ ) target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY) target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast. @@ -40,9 +43,9 @@ if (USE_UNWIND) endif () install( - TARGETS cxxabi - EXPORT global - ARCHIVE DESTINATION lib - RUNTIME DESTINATION lib - LIBRARY DESTINATION lib -) + TARGETS cxxabi + EXPORT global + ARCHIVE DESTINATION lib + RUNTIME DESTINATION lib + LIBRARY DESTINATION lib +) \ No newline at end of file diff --git a/src/Common/Dwarf.cpp b/src/Common/Dwarf.cpp index 86e68ae0593..a6525846e17 100644 --- a/src/Common/Dwarf.cpp +++ b/src/Common/Dwarf.cpp @@ -1,4 +1,4 @@ -#if defined(__ELF__) && !defined(__FreeBSD__) +#if defined(__ELF__) && !defined(OS_FREEBSD) /* * Copyright 2012-present Facebook, Inc. @@ -19,14 +19,14 @@ /** This file was edited for ClickHouse. */ -#include +#include #include #include #include - #define DW_CHILDREN_no 0 + #define DW_FORM_addr 1 #define DW_FORM_block1 0x0a #define DW_FORM_block2 3 @@ -52,6 +52,25 @@ #define DW_FORM_string 0x08 #define DW_FORM_strp 0x0e #define DW_FORM_indirect 0x16 +#define DW_FORM_strx 0x1a +#define DW_FORM_addrx 0x1b +#define DW_FORM_ref_sup4 0x1c +#define DW_FORM_strp_sup 0x1d +#define DW_FORM_data16 0x1e +#define DW_FORM_line_strp 0x1f +#define DW_FORM_implicit_const 0x21 +#define DW_FORM_rnglistx 0x23 +#define DW_FORM_loclistx 0x22 +#define DW_FORM_ref_sup8 0x24 +#define DW_FORM_strx1 0x25 +#define DW_FORM_strx2 0x26 +#define DW_FORM_strx3 0x27 +#define DW_FORM_strx4 0x28 +#define DW_FORM_addrx1 0x29 +#define DW_FORM_addrx2 0x2a +#define DW_FORM_addrx3 0x2b +#define DW_FORM_addrx4 0x2c + #define DW_TAG_compile_unit 0x11 #define DW_TAG_subprogram 0x2e #define DW_TAG_try_block 0x32 @@ -59,6 +78,7 @@ #define DW_TAG_entry_point 0x03 #define DW_TAG_common_block 0x1a #define DW_TAG_lexical_block 0x0b + #define DW_AT_stmt_list 0x10 #define DW_AT_comp_dir 0x1b #define DW_AT_name 0x03 @@ -71,6 +91,13 @@ #define DW_AT_call_file 0x58 #define DW_AT_linkage_name 0x6e #define DW_AT_specification 0x47 +#define DW_AT_str_offsets_base 0x72 +#define DW_AT_addr_base 0x73 +#define DW_AT_rnglists_base 0x74 +#define DW_AT_loclists_base 0x8c +#define DW_AT_GNU_ranges_base 0x2132 +#define DW_AT_GNU_addr_base 0x2133 + #define DW_LNE_define_file 0x03 #define DW_LNS_copy 0x01 #define DW_LNS_advance_pc 0x02 @@ -88,6 +115,21 @@ #define DW_LNE_set_address 0x02 #define DW_LNE_set_discriminator 0x04 +#define DW_LNCT_path 0x1 +#define DW_LNCT_directory_index 0x2 +#define DW_LNCT_timestamp 0x3 +#define DW_LNCT_size 0x4 +#define DW_LNCT_MD5 0x5 + +#define DW_RLE_end_of_list 0x0 +#define DW_RLE_base_addressx 0x1 +#define DW_RLE_startx_endx 0x2 +#define DW_RLE_startx_length 0x3 +#define DW_RLE_offset_pair 0x4 +#define DW_RLE_base_address 0x5 +#define DW_RLE_start_end 0x6 +#define DW_RLE_start_length 0x7 + namespace RK { @@ -98,17 +140,39 @@ namespace ErrorCodes } -Dwarf::Dwarf(const std::shared_ptr & elf) : elf_(elf) +Dwarf::Dwarf(const std::shared_ptr & elf) + : elf_(elf) + , abbrev_(getSection(".debug_abbrev")) + , addr_(getSection(".debug_addr")) + , aranges_(getSection(".debug_aranges")) + , info_(getSection(".debug_info")) + , line_(getSection(".debug_line")) + , line_str_(getSection(".debug_line_str")) + , loclists_(getSection(".debug_loclists")) + , ranges_(getSection(".debug_ranges")) + , rnglists_(getSection(".debug_rnglists")) + , str_(getSection(".debug_str")) + , str_offsets_(getSection(".debug_str_offsets")) { - init(); + // Optional sections: + // - debugAranges_: for fast address range lookup. + // If missing .debug_info can be used - but it's much slower (linear + // scan). + // - debugRanges_ (DWARF 4) / debugRnglists_ (DWARF 5): non-contiguous + // address ranges of debugging information entries. + // Used for inline function address lookup. + if (info_.empty() || abbrev_.empty() || line_.empty() || str_.empty()) + { + elf_ = nullptr; + } } -Dwarf::Section::Section(std::string_view d) : is64Bit_(false), data_(d) +Dwarf::Section::Section(std::string_view d) : is64_bit(false), data(d) { } -#define SAFE_CHECK(cond, message) do { if (!(cond)) throw Exception(message, ErrorCodes::CANNOT_PARSE_DWARF); } while (false) +#define SAFE_CHECK(cond, ...) do { if (!(cond)) throw Exception(ErrorCodes::CANNOT_PARSE_DWARF, __VA_ARGS__); } while (false) namespace @@ -122,15 +186,27 @@ const uint32_t kMaxAbbreviationEntries = 1000; // Read (bitwise) one object of type T template -std::enable_if_t && std::is_standard_layout_v, T> read(std::string_view & sp) +requires std::is_trivial_v && std::is_standard_layout_v +T read(std::string_view & sp) { - SAFE_CHECK(sp.size() >= sizeof(T), "underflow"); + SAFE_CHECK(sp.size() >= sizeof(T), "underflow: expected bytes {}, got bytes {}", sizeof(T), sp.size()); T x; memcpy(&x, sp.data(), sizeof(T)); sp.remove_prefix(sizeof(T)); return x; } +// Read (bitwise) an unsigned number of N bytes (N in 1, 2, 3, 4). +template +uint64_t readU64(std::string_view & sp) +{ + SAFE_CHECK(sp.size() >= N, "underflow"); + uint64_t x = 0; + memcpy(&x, sp.data(), N); + sp.remove_prefix(N); + return x; +} + // Read ULEB (unsigned) varint value; algorithm from the DWARF spec uint64_t readULEB(std::string_view & sp, uint8_t & shift, uint8_t & val) { @@ -168,15 +244,15 @@ int64_t readSLEB(std::string_view & sp) } // Read a value of "section offset" type, which may be 4 or 8 bytes -uint64_t readOffset(std::string_view & sp, bool is64Bit) +uint64_t readOffset(std::string_view & sp, bool is64_bit) { - return is64Bit ? read(sp) : read(sp); + return is64_bit ? read(sp) : read(sp); } // Read "len" bytes std::string_view readBytes(std::string_view & sp, uint64_t len) { - SAFE_CHECK(len <= sp.size(), "invalid string length: " + std::to_string(len) + " vs. " + std::to_string(sp.size())); + SAFE_CHECK(len <= sp.size(), "invalid string length: {} vs. {}", len, sp.size()); std::string_view ret(sp.data(), len); sp.remove_prefix(len); return ret; @@ -192,6 +268,15 @@ std::string_view readNullTerminated(std::string_view & sp) return ret; } +// Get a string from the section +std::string_view getStringFromStringSection(std::string_view section, uint64_t offset) +{ + SAFE_CHECK(offset < section.size(), "invalid section offset"); + std::string_view sp(section); + sp.remove_prefix(offset); + return readNullTerminated(sp); +} + // Skip over padding until sp.data() - start is a multiple of alignment void skipPadding(std::string_view & sp, const char * start, size_t alignment) { @@ -343,7 +428,7 @@ void Dwarf::Path::toString(std::string & dest) const // Next chunk in section bool Dwarf::Section::next(std::string_view & chunk) { - chunk = data_; + chunk = data; if (chunk.empty()) return false; @@ -351,46 +436,26 @@ bool Dwarf::Section::next(std::string_view & chunk) // a 96-bit value (0xffffffff followed by the 64-bit length) for a 64-bit // section. auto initial_length = read(chunk); - is64Bit_ = (initial_length == uint32_t(-1)); - auto length = is64Bit_ ? read(chunk) : initial_length; + is64_bit = (initial_length == uint32_t(-1)); + auto length = is64_bit ? read(chunk) : initial_length; SAFE_CHECK(length <= chunk.size(), "invalid DWARF section"); chunk = std::string_view(chunk.data(), length); - data_ = std::string_view(chunk.end(), data_.end() - chunk.end()); + data = std::string_view(chunk.end(), data.end() - chunk.end()); return true; } -bool Dwarf::getSection(const char * name, std::string_view * section) const +std::string_view Dwarf::getSection(const char * name) const { std::optional elf_section = elf_->findSectionByName(name); if (!elf_section) - return false; + return {}; #ifdef SHF_COMPRESSED if (elf_section->header.sh_flags & SHF_COMPRESSED) - return false; + return {}; #endif - *section = { elf_section->begin(), elf_section->size()}; - return true; -} - -void Dwarf::init() -{ - // Make sure that all .debug_* sections exist - if (!getSection(".debug_info", &info_) - || !getSection(".debug_abbrev", &abbrev_) - || !getSection(".debug_line", &line_) - || !getSection(".debug_str", &strings_)) - { - elf_.reset(); - return; - } - - // Optional: fast address range lookup. If missing .debug_info can - // be used - but it's much slower (linear scan). - getSection(".debug_aranges", &aranges_); - - getSection(".debug_ranges", &ranges_); + return { elf_section->begin(), elf_section->size()}; } // static @@ -473,7 +538,7 @@ size_t Dwarf::forEachAttribute(const CompilationUnit & cu, const Die & die, std: auto values = std::string_view{info_.data() + die.offset + die.attr_offset, cu.offset + cu.size - die.offset - die.attr_offset}; while (auto spec = readAttributeSpec(attrs)) { - auto attr = readAttribute(die, spec, values); + auto attr = readAttribute(cu, die, spec, values); if (!f(attr)) { return static_cast(-1); @@ -482,8 +547,49 @@ size_t Dwarf::forEachAttribute(const CompilationUnit & cu, const Die & die, std: return values.data() - info_.data(); } -Dwarf::Attribute Dwarf::readAttribute(const Die & die, AttributeSpec spec, std::string_view & info) const +Dwarf::Attribute Dwarf::readAttribute(const CompilationUnit & cu, + const Die & die, + AttributeSpec spec, + std::string_view & info) const { + // DWARF 5 introduces new FORMs whose values are relative to some base attrs: + // DW_AT_str_offsets_base, DW_AT_rnglists_base, DW_AT_addr_base. + // Debug Fission DWARF 4 uses GNU DW_AT_GNU_ranges_base & DW_AT_GNU_addr_base. + // + // The order in which attributes appear in a CU is not defined. + // The DW_AT_*_base attrs may appear after attributes that need them. + // The DW_AT_*_base attrs are CU specific; so we read them just after + // reading the CU header. During this first pass return empty values + // when encountering a FORM that depends on DW_AT_*_base. + auto get_string_using_offset_table = [&](uint64_t index) + { + if (!cu.str_offsets_base.has_value()) + { + return std::string_view(); + } + // DWARF 5: 7.26 String Offsets Table + // The DW_AT_str_offsets_base attribute points to the first entry following + // the header. The entries are indexed sequentially from this base entry, + // starting from 0. + auto sp = str_offsets_.substr(*cu.str_offsets_base + index * (cu.is64Bit ? sizeof(uint64_t) : sizeof(uint32_t))); + uint64_t str_offset = readOffset(sp, cu.is64Bit); + return getStringFromStringSection(str_, str_offset); + }; + + auto read_debug_addr = [&](uint64_t index) + { + if (!cu.addr_base.has_value()) + { + return uint64_t(0); + } + // DWARF 5: 7.27 Address Table + // The DW_AT_addr_base attribute points to the first entry following the + // header. The entries are indexed sequentially from this base entry, + // starting from 0. + auto sp = addr_.substr(*cu.addr_base + index * sizeof(uint64_t)); + return read(sp); + }; + switch (spec.form) { case DW_FORM_addr: @@ -517,7 +623,7 @@ Dwarf::Attribute Dwarf::readAttribute(const Die & die, AttributeSpec spec, std:: case DW_FORM_ref_sig8: return {spec, die, read(info)}; case DW_FORM_sdata: - return {spec, die, uint64_t(readSLEB(info))}; + return {spec, die, static_cast(readSLEB(info))}; case DW_FORM_udata: [[fallthrough]]; case DW_FORM_ref_udata: @@ -525,7 +631,7 @@ Dwarf::Attribute Dwarf::readAttribute(const Die & die, AttributeSpec spec, std:: case DW_FORM_flag: return {spec, die, read(info)}; case DW_FORM_flag_present: - return {spec, die, 1u}; + return {spec, die, 1ULL}; case DW_FORM_sec_offset: [[fallthrough]]; case DW_FORM_ref_addr: @@ -533,49 +639,215 @@ Dwarf::Attribute Dwarf::readAttribute(const Die & die, AttributeSpec spec, std:: case DW_FORM_string: return {spec, die, readNullTerminated(info)}; case DW_FORM_strp: - return {spec, die, getStringFromStringSection(readOffset(info, die.is64Bit))}; + return {spec, die, getStringFromStringSection(str_, readOffset(info, die.is64Bit))}; case DW_FORM_indirect: // form is explicitly specified // Update spec with the actual FORM. spec.form = readULEB(info); - return readAttribute(die, spec, info); + return readAttribute(cu, die, spec, info); + + // DWARF 5: + case DW_FORM_implicit_const: // form is explicitly specified + // For attributes with this form, the attribute specification contains a + // third part, which is a signed LEB128 number. The value of this number + // is used as the value of the attribute, and no value is stored in the + // .debug_info section. + return {spec, die, static_cast(spec.implicitConst)}; + + case DW_FORM_addrx: + return {spec, die, read_debug_addr(readULEB(info))}; + case DW_FORM_addrx1: + return {spec, die, read_debug_addr(readU64<1>(info))}; + case DW_FORM_addrx2: + return {spec, die, read_debug_addr(readU64<2>(info))}; + case DW_FORM_addrx3: + return {spec, die, read_debug_addr(readU64<3>(info))}; + case DW_FORM_addrx4: + return {spec, die, read_debug_addr(readU64<4>(info))}; + + case DW_FORM_line_strp: + return {spec, die, getStringFromStringSection(line_str_, readOffset(info, die.is64Bit))}; + + case DW_FORM_strx: + return {spec, die, get_string_using_offset_table(readULEB(info))}; + case DW_FORM_strx1: + return {spec, die, get_string_using_offset_table(readU64<1>(info))}; + case DW_FORM_strx2: + return {spec, die, get_string_using_offset_table(readU64<2>(info))}; + case DW_FORM_strx3: + return {spec, die, get_string_using_offset_table(readU64<3>(info))}; + case DW_FORM_strx4: + return {spec, die, get_string_using_offset_table(readU64<4>(info))}; + + case DW_FORM_rnglistx: { + auto index = readULEB(info); + if (!cu.rnglists_base.has_value()) + { + return {spec, die, 0ULL}; + } + const uint64_t offset_size = cu.is64Bit ? sizeof(uint64_t) : sizeof(uint32_t); + auto sp = rnglists_.substr(*cu.rnglists_base + index * offset_size); + auto offset = readOffset(sp, cu.is64Bit); + return {spec, die, *cu.rnglists_base + offset}; + } + + case DW_FORM_loclistx: { + auto index = readULEB(info); + if (!cu.loclists_base.has_value()) + { + return {spec, die, 0ULL}; + } + const uint64_t offset_size = cu.is64Bit ? sizeof(uint64_t) : sizeof(uint32_t); + auto sp = loclists_.substr(*cu.loclists_base + index * offset_size); + auto offset = readOffset(sp, cu.is64Bit); + return {spec, die, *cu.loclists_base + offset}; + } + + case DW_FORM_data16: + return {spec, die, readBytes(info, 16)}; + + case DW_FORM_ref_sup4: + case DW_FORM_ref_sup8: + case DW_FORM_strp_sup: + SAFE_CHECK(false, "Unexpected DWARF5 supplimentary object files"); + default: SAFE_CHECK(false, "invalid attribute form"); } - - return {spec, die, 0u}; + return {spec, die, 0ULL}; } // static Dwarf::AttributeSpec Dwarf::readAttributeSpec(std::string_view & sp) { - return {readULEB(sp), readULEB(sp)}; + Dwarf::AttributeSpec spec; + spec.name = readULEB(sp); + spec.form = readULEB(sp); + if (spec.form == DW_FORM_implicit_const) + { + spec.implicitConst = readSLEB(sp); + } + return spec; } -// static -Dwarf::CompilationUnit Dwarf::getCompilationUnit(std::string_view info, uint64_t offset) +Dwarf::CompilationUnit Dwarf::getCompilationUnit(uint64_t offset) const { - SAFE_CHECK(offset < info.size(), "unexpected offset"); + // SAFE_CHECK(offset < info_.size(), "unexpected offset"); CompilationUnit cu; - std::string_view chunk(info); + std::string_view chunk(info_); cu.offset = offset; chunk.remove_prefix(offset); + // 1) unit_length auto initial_length = read(chunk); cu.is64Bit = (initial_length == uint32_t(-1)); cu.size = cu.is64Bit ? read(chunk) : initial_length; SAFE_CHECK(cu.size <= chunk.size(), "invalid chunk size"); cu.size += cu.is64Bit ? 12 : 4; + // 2) version cu.version = read(chunk); - SAFE_CHECK(cu.version >= 2 && cu.version <= 4, "invalid info version"); - cu.abbrev_offset = readOffset(chunk, cu.is64Bit); - cu.addr_size = read(chunk); - SAFE_CHECK(cu.addr_size == sizeof(uintptr_t), "invalid address size"); + SAFE_CHECK(cu.version >= 2 && cu.version <= 5, "invalid info version"); + + if (cu.version == 5) + { + // DWARF5: 7.5.1.1 Full and Partial Compilation Unit Headers + // 3) unit_type (new DWARF 5) + cu.unit_type = read(chunk); + if (cu.unit_type != DW_UT_compile && cu.unit_type != DW_UT_skeleton) + { + return cu; + } + // 4) address_size + cu.addr_size = read(chunk); + SAFE_CHECK(cu.addr_size == sizeof(uintptr_t), "invalid address size"); + + // 5) debug_abbrev_offset + cu.abbrev_offset = readOffset(chunk, cu.is64Bit); + + if (cu.unit_type == DW_UT_skeleton) + { + // 6) dwo_id + read(chunk); + } + } + else + { + // DWARF4 has a single type of unit in .debug_info + cu.unit_type = DW_UT_compile; + // 3) debug_abbrev_offset + cu.abbrev_offset = readOffset(chunk, cu.is64Bit); + // 4) address_size + cu.addr_size = read(chunk); + SAFE_CHECK(cu.addr_size == sizeof(uintptr_t), "invalid address size"); + } + cu.first_die = chunk.data() - info_.data(); + if (cu.version < 5) + { + return cu; + } - cu.first_die = chunk.data() - info.data(); + Die die = getDieAtOffset(cu, cu.first_die); + if (die.abbr.tag != DW_TAG_compile_unit) + { + return cu; + } + + // Read the DW_AT_*_base attributes. + // Attributes which use FORMs relative to these base attrs + // will not have valid values during this first pass! + forEachAttribute( + cu, + die, + [&](const Attribute & attr) + { + switch (attr.spec.name) // NOLINT(bugprone-switch-missing-default-case) + { + case DW_AT_addr_base: + case DW_AT_GNU_addr_base: + cu.addr_base = std::get(attr.attr_value); + break; + case DW_AT_loclists_base: + cu.loclists_base = std::get(attr.attr_value); + break; + case DW_AT_rnglists_base: + case DW_AT_GNU_ranges_base: + cu.rnglists_base = std::get(attr.attr_value); + break; + case DW_AT_str_offsets_base: + cu.str_offsets_base = std::get(attr.attr_value); + break; + } + return true; // continue forEachAttribute + }); return cu; } +// Finds the Compilation Unit starting at offset. +Dwarf::CompilationUnit Dwarf::findCompilationUnit(uint64_t targetOffset) const +{ + // SAFE_CHECK(targetOffset < info_.size(), "unexpected target address"); + uint64_t offset = 0; + while (offset < info_.size()) + { + std::string_view chunk(info_); + chunk.remove_prefix(offset); + + auto initial_length = read(chunk); + auto is64_bit = (initial_length == static_cast(-1)); + auto size = is64_bit ? read(chunk) : initial_length; + SAFE_CHECK(size <= chunk.size(), "invalid chunk size"); + size += is64_bit ? 12 : 4; + + if (offset + size > targetOffset) + { + break; + } + offset += size; + } + return getCompilationUnit(offset); +} + + Dwarf::DIEAbbreviation Dwarf::getAbbreviation(uint64_t code, uint64_t offset) const { // Linear search in the .debug_abbrev section, starting at offset @@ -590,7 +862,7 @@ Dwarf::DIEAbbreviation Dwarf::getAbbreviation(uint64_t code, uint64_t offset) co SAFE_CHECK(false, "could not find abbreviation code"); } -Dwarf::AttributeValue Dwarf::readAttributeValue(std::string_view & sp, uint64_t form, bool is64Bit) const +Dwarf::AttributeValue Dwarf::readAttributeValue(std::string_view & sp, uint64_t form, bool is64_bit) const { switch (form) { @@ -628,26 +900,18 @@ Dwarf::AttributeValue Dwarf::readAttributeValue(std::string_view & sp, uint64_t return uint64_t(1); case DW_FORM_sec_offset: [[fallthrough]]; case DW_FORM_ref_addr: - return readOffset(sp, is64Bit); + return readOffset(sp, is64_bit); case DW_FORM_string: return readNullTerminated(sp); case DW_FORM_strp: - return getStringFromStringSection(readOffset(sp, is64Bit)); + return getStringFromStringSection(str_, readOffset(sp, is64_bit)); case DW_FORM_indirect: // form is explicitly specified - return readAttributeValue(sp, readULEB(sp), is64Bit); + return readAttributeValue(sp, readULEB(sp), is64_bit); default: SAFE_CHECK(false, "invalid attribute form"); } } -std::string_view Dwarf::getStringFromStringSection(uint64_t offset) const -{ - SAFE_CHECK(offset < strings_.size(), "invalid strp offset"); - std::string_view sp(strings_); - sp.remove_prefix(offset); - return readNullTerminated(sp); -} - /** * Find @address in .debug_aranges and return the offset in * .debug_info for compilation unit to which this address belongs. @@ -689,7 +953,7 @@ bool Dwarf::findDebugInfoOffset(uintptr_t address, std::string_view aranges, uin Dwarf::Die Dwarf::getDieAtOffset(const CompilationUnit & cu, uint64_t offset) const { - SAFE_CHECK(offset < info_.size(), "unexpected offset"); + SAFE_CHECK(offset < info_.size(), "unexpected offset {}, info size {}", offset, info_.size()); Die die; std::string_view sp{info_.data() + offset, cu.offset + cu.size - offset}; die.offset = offset; @@ -707,17 +971,48 @@ Dwarf::Die Dwarf::getDieAtOffset(const CompilationUnit & cu, uint64_t offset) co return die; } -Dwarf::Die Dwarf::findDefinitionDie(const CompilationUnit & cu, const Die & die) const +std::optional, uint64_t>> Dwarf::getReferenceAttribute( + const CompilationUnit & cu, const Die & die, uint64_t attr_name) const { - // Find the real definition instead of declaration. - // DW_AT_specification: Incomplete, non-defining, or separate declaration - // corresponding to a declaration - auto offset = getAttribute(cu, die, DW_AT_specification); - if (!offset) + bool found = false; + uint64_t value; + uint64_t form; + forEachAttribute(cu, die, [&](const Attribute & attr) { - return die; + if (attr.spec.name == attr_name) + { + found = true; + value = std::get(attr.attr_value); + form = attr.spec.form; + return false; + } + return true; + }); + if (!found) + return std::nullopt; + switch (form) + { + case DW_FORM_ref1: + case DW_FORM_ref2: + case DW_FORM_ref4: + case DW_FORM_ref8: + case DW_FORM_ref_udata: + return std::make_pair(std::nullopt, cu.offset + value); + + case DW_FORM_ref_addr: + return std::make_pair(findCompilationUnit(value), value); + + case DW_FORM_ref_sig8: + /// Currently we don't use this parser for types, so no need to support this. + throw Exception(ErrorCodes::CANNOT_PARSE_DWARF, "Type signatures are not supported (DIE at 0x{:x}, attr 0x{:x}).", die.offset, attr_name); + + case DW_FORM_ref_sup4: + case DW_FORM_ref_sup8: + throw Exception(ErrorCodes::CANNOT_PARSE_DWARF, "Supplementary object files are not supported (DIE at 0x{:x}, attr 0x{:x}).", die.offset, attr_name); + + default: + throw Exception(ErrorCodes::CANNOT_PARSE_DWARF, "Unexpected form of attribute 0x{:x}: 0x{:x} (DIE at 0x{:x}).", attr_name, form, die.offset); } - return getDieAtOffset(cu, cu.offset + offset.value()); } /** @@ -737,7 +1032,7 @@ bool Dwarf::findLocation( // Partial compilation unit (DW_TAG_partial_unit) is not supported. SAFE_CHECK(die.abbr.tag == DW_TAG_compile_unit, "expecting compile unit entry"); - // Read attributes, extracting the few we care about + // Offset in .debug_line for the line number VM program for this CU std::optional line_offset = 0; std::string_view compilation_directory; std::optional main_file_name; @@ -745,7 +1040,7 @@ bool Dwarf::findLocation( forEachAttribute(cu, die, [&](const Attribute & attr) { - switch (attr.spec.name) + switch (attr.spec.name) // NOLINT(bugprone-switch-missing-default-case) { case DW_AT_stmt_list: // Offset in .debug_line for the line number VM program for this @@ -785,7 +1080,7 @@ bool Dwarf::findLocation( std::string_view line_section(line_); line_section.remove_prefix(*line_offset); - LineNumberVM line_vm(line_section, compilation_directory); + LineNumberVM line_vm(line_section, compilation_directory, str_, line_str_); // Execute line number VM program to find file and line info.has_file_and_line = line_vm.findAddress(address, info.file, info.line); @@ -804,7 +1099,7 @@ bool Dwarf::findLocation( findSubProgramDieForAddress(cu, die, address, base_addr_cu, subprogram); // Subprogram is the DIE of caller function. - if (check_inline && subprogram.abbr.has_children) + if (/*check_inline &&*/ subprogram.abbr.has_children) { // Use an extra location and get its call file and call line, so that // they can be used for the second last location when we don't have @@ -838,7 +1133,7 @@ bool Dwarf::findLocation( // The next inlined subroutine's call file and call line is the current // caller's location. - for (size_t i = 0; i < num_found - 1; i++) + for (size_t i = 0; i < num_found - 1; ++i) { call_locations[i].file = call_locations[i + 1].file; call_locations[i].line = call_locations[i + 1].line; @@ -860,7 +1155,10 @@ bool Dwarf::findLocation( SymbolizedFrame inline_frame; inline_frame.found = true; inline_frame.addr = address; - inline_frame.name = call_location.name.data(); + if (!call_location.name.empty()) + inline_frame.name = call_location.name.data(); + else + inline_frame.name = nullptr; inline_frame.location.has_file_and_line = true; inline_frame.location.file = call_location.file; inline_frame.location.line = call_location.line; @@ -873,8 +1171,11 @@ bool Dwarf::findLocation( return info.has_file_and_line; } -void Dwarf::findSubProgramDieForAddress( - const CompilationUnit & cu, const Die & die, uint64_t address, std::optional base_addr_cu, Die & subprogram) const +void Dwarf::findSubProgramDieForAddress(const CompilationUnit & cu, + const Die & die, + uint64_t address, + std::optional base_addr_cu, + Die & subprogram) const { forEachChild(cu, die, [&](const Die & child_die) { @@ -886,7 +1187,7 @@ void Dwarf::findSubProgramDieForAddress( std::optional range_offset; forEachAttribute(cu, child_die, [&](const Attribute & attr) { - switch (attr.spec.name) + switch (attr.spec.name) // NOLINT(bugprone-switch-missing-default-case) { case DW_AT_ranges: range_offset = std::get(attr.attr_value); @@ -895,9 +1196,14 @@ void Dwarf::findSubProgramDieForAddress( low_pc = std::get(attr.attr_value); break; case DW_AT_high_pc: - // Value of DW_AT_high_pc attribute can be an address - // (DW_FORM_addr) or an offset (DW_FORM_data). - is_high_pc_addr = (attr.spec.form == DW_FORM_addr); + // The value of the DW_AT_high_pc attribute can be + // an address (DW_FORM_addr*) or an offset (DW_FORM_data*). + is_high_pc_addr = attr.spec.form == DW_FORM_addr || // + attr.spec.form == DW_FORM_addrx || // + attr.spec.form == DW_FORM_addrx1 || // + attr.spec.form == DW_FORM_addrx2 || // + attr.spec.form == DW_FORM_addrx3 || // + attr.spec.form == DW_FORM_addrx4; high_pc = std::get(attr.attr_value); break; } @@ -906,7 +1212,7 @@ void Dwarf::findSubProgramDieForAddress( }); bool pc_match = low_pc && high_pc && is_high_pc_addr && address >= *low_pc && (address < (*is_high_pc_addr ? *high_pc : *low_pc + *high_pc)); - bool range_match = range_offset && isAddrInRangeList(address, base_addr_cu, range_offset.value(), cu.addr_size); + bool range_match = range_offset && isAddrInRangeList(cu, address, base_addr_cu, range_offset.value(), cu.addr_size); if (pc_match || range_match) { subprogram = child_die; @@ -972,7 +1278,7 @@ void Dwarf::findInlinedSubroutineDieForAddress( std::optional range_offset; forEachAttribute(cu, child_die, [&](const Attribute & attr) { - switch (attr.spec.name) + switch (attr.spec.name) // NOLINT(bugprone-switch-missing-default-case) { case DW_AT_ranges: range_offset = std::get(attr.attr_value); @@ -981,9 +1287,14 @@ void Dwarf::findInlinedSubroutineDieForAddress( low_pc = std::get(attr.attr_value); break; case DW_AT_high_pc: - // Value of DW_AT_high_pc attribute can be an address - // (DW_FORM_addr) or an offset (DW_FORM_data). - is_high_pc_addr = (attr.spec.form == DW_FORM_addr); + // The value of the DW_AT_high_pc attribute can be + // an address (DW_FORM_addr*) or an offset (DW_FORM_data*). + is_high_pc_addr = attr.spec.form == DW_FORM_addr || // + attr.spec.form == DW_FORM_addrx || // + attr.spec.form == DW_FORM_addrx1 || // + attr.spec.form == DW_FORM_addrx2 || // + attr.spec.form == DW_FORM_addrx3 || // + attr.spec.form == DW_FORM_addrx4; high_pc = std::get(attr.attr_value); break; case DW_AT_abstract_origin: @@ -1015,7 +1326,7 @@ void Dwarf::findInlinedSubroutineDieForAddress( // TODO: Support relocated address which requires lookup in relocation map. bool pc_match = low_pc && high_pc && is_high_pc_addr && address >= *low_pc && (address < (*is_high_pc_addr ? *high_pc : *low_pc + *high_pc)); - bool range_match = range_offset && isAddrInRangeList(address, base_addr_cu, range_offset.value(), cu.addr_size); + bool range_match = range_offset && isAddrInRangeList(cu, address, base_addr_cu, range_offset.value(), cu.addr_size); if (!pc_match && !range_match) { // Address doesn't match. Keep searching other children. @@ -1035,17 +1346,27 @@ void Dwarf::findInlinedSubroutineDieForAddress( auto get_function_name = [&](const CompilationUnit & srcu, uint64_t die_offset) { - auto decl_die = getDieAtOffset(srcu, die_offset); + Die die_to_look_for_name = getDieAtOffset(srcu, die_offset); + // Jump to the actual function definition instead of declaration for name // and line info. - auto def_die = findDefinitionDie(srcu, decl_die); + // DW_AT_specification: Incomplete, non-defining, or separate declaration + // corresponding to a declaration + auto def = getReferenceAttribute(srcu, die_to_look_for_name, DW_AT_specification); + if (def.has_value()) + { + auto [def_cu, def_offset] = std::move(def.value()); + const CompilationUnit & def_cu_ref = def_cu.has_value() ? def_cu.value() : srcu; + die_to_look_for_name = getDieAtOffset(def_cu_ref, def_offset); + } std::string_view name; + // The file and line will be set in the next inline subroutine based on // its DW_AT_call_file and DW_AT_call_line. - forEachAttribute(srcu, def_die, [&](const Attribute & attr) + forEachAttribute(srcu, die_to_look_for_name, [&](const Attribute & attr) { - switch (attr.spec.name) + switch (attr.spec.name) // NOLINT(bugprone-switch-missing-default-case) { case DW_AT_linkage_name: name = std::get(attr.attr_value); @@ -1080,7 +1401,7 @@ void Dwarf::findInlinedSubroutineDieForAddress( // Not applicable for DW_AT_abstract_origin. location.name = (*abstract_origin_ref_type != DW_FORM_ref_addr) ? get_function_name(cu, cu.offset + *abstract_origin) - : get_function_name(findCompilationUnit(info_, *abstract_origin), *abstract_origin); + : get_function_name(findCompilationUnit(*abstract_origin), *abstract_origin); locations.push_back(location); @@ -1113,7 +1434,11 @@ bool Dwarf::findAddress( if (findDebugInfoOffset(address, aranges_, offset)) { // Read compilation unit header from .debug_info - auto unit = getCompilationUnit(info_, offset); + auto unit = getCompilationUnit(offset); + if (unit.unit_type != DW_UT_compile && unit.unit_type != DW_UT_skeleton) + { + return false; + } findLocation(address, mode, unit, locationInfo, inline_frames); return locationInfo.has_file_and_line; } @@ -1137,84 +1462,160 @@ bool Dwarf::findAddress( uint64_t offset = 0; while (offset < info_.size() && !locationInfo.has_file_and_line) { - auto unit = getCompilationUnit(info_, offset); + auto unit = getCompilationUnit(offset); offset += unit.size; + if (unit.unit_type != DW_UT_compile && unit.unit_type != DW_UT_skeleton) + { + continue; + } findLocation(address, mode, unit, locationInfo, inline_frames); } return locationInfo.has_file_and_line; } -bool Dwarf::isAddrInRangeList(uint64_t address, std::optional base_addr, size_t offset, uint8_t addr_size) const +bool Dwarf::isAddrInRangeList(const CompilationUnit & cu, + uint64_t address, + std::optional base_addr, + size_t offset, + uint8_t addr_size) const { SAFE_CHECK(addr_size == 4 || addr_size == 8, "wrong address size"); - if (ranges_.empty()) + if (cu.version <= 4 && !ranges_.empty()) { - return false; + const bool is64_bit_addr = addr_size == 8; + std::string_view sp = ranges_; + sp.remove_prefix(offset); + const uint64_t max_addr = is64_bit_addr ? std::numeric_limits::max() : std::numeric_limits::max(); + while (!sp.empty()) + { + uint64_t begin = readOffset(sp, is64_bit_addr); + uint64_t end = readOffset(sp, is64_bit_addr); + // The range list entry is a base address selection entry. + if (begin == max_addr) + { + base_addr = end; + continue; + } + // The range list entry is an end of list entry. + if (begin == 0 && end == 0) + { + break; + } + + // Check if the given address falls in the range list entry. + // 2.17.3 Non-Contiguous Address Ranges + // The applicable base address of a range list entry is determined by the + // closest preceding base address selection entry (see below) in the same + // range list. If there is no such selection entry, then the applicable + // base address defaults to the base address of the compilation unit. + if (base_addr && address >= begin + *base_addr && address < end + *base_addr) + { + return true; + } + } } - const bool is_64bit_addr = addr_size == 8; - std::string_view sp = ranges_; - sp.remove_prefix(offset); - const uint64_t max_addr = is_64bit_addr ? std::numeric_limits::max() : std::numeric_limits::max(); - while (!sp.empty()) + if (cu.version == 5 && !rnglists_.empty() && cu.addr_base.has_value()) { - uint64_t begin = readOffset(sp, is_64bit_addr); - uint64_t end = readOffset(sp, is_64bit_addr); - // The range list entry is a base address selection entry. - if (begin == max_addr) - { - base_addr = end; - continue; - } - // The range list entry is an end of list entry. - if (begin == 0 && end == 0) - { - break; - } - // Check if the given address falls in the range list entry. - // 2.17.3 Non-Contiguous Address Ranges - // The applicable base address of a range list entry is determined by the - // closest preceding base address selection entry (see below) in the same - // range list. If there is no such selection entry, then the applicable base - // address defaults to the base address of the compilation unit. - if (base_addr && address >= begin + *base_addr && address < end + *base_addr) + auto rnglists = rnglists_; + rnglists.remove_prefix(offset); + + while (!rnglists.empty()) { - return true; - } - } + auto kind = read(rnglists); + switch (kind) + { + case DW_RLE_end_of_list: + return false; + case DW_RLE_base_addressx: { + auto index = readULEB(rnglists); + auto sp = addr_.substr(*cu.addr_base + index * sizeof(uint64_t)); + base_addr = read(sp); + } + break; - return false; -} + case DW_RLE_startx_endx: { + auto index_start = readULEB(rnglists); + auto index_end = readULEB(rnglists); + auto sp_start = addr_.substr(*cu.addr_base + index_start * sizeof(uint64_t)); + auto start = read(sp_start); + + auto sp_end = addr_.substr(*cu.addr_base + index_end * sizeof(uint64_t)); + auto end = read(sp_end); + if (address >= start && address < end) + { + return true; + } + } + break; -// static -Dwarf::CompilationUnit Dwarf::findCompilationUnit(std::string_view info, uint64_t targetOffset) -{ - SAFE_CHECK(targetOffset < info.size(), "unexpected target address"); - uint64_t offset = 0; - while (offset < info.size()) - { - std::string_view chunk(info); - chunk.remove_prefix(offset); + case DW_RLE_startx_length: { + auto index_start = readULEB(rnglists); + auto length = readULEB(rnglists); + auto sp_start = addr_.substr(*cu.addr_base + index_start * sizeof(uint64_t)); + auto start = read(sp_start); + + auto sp_end = addr_.substr(*cu.addr_base + index_start * sizeof(uint64_t) + length); + auto end = read(sp_end); + if (start != end && address >= start && address < end) + { + return true; + } + } + break; - auto initial_length = read(chunk); - auto is_64bit = (initial_length == uint32_t(-1)); - auto size = is_64bit ? read(chunk) : initial_length; - SAFE_CHECK(size <= chunk.size(), "invalid chunk size"); - size += is_64bit ? 12 : 4; + case DW_RLE_offset_pair: { + auto offset_start = readULEB(rnglists); + auto offset_end = readULEB(rnglists); + if (base_addr && address >= (*base_addr + offset_start) && address < (*base_addr + offset_end)) + { + return true; + } + } + break; - if (offset + size > targetOffset) - { - break; + case DW_RLE_base_address: + base_addr = read(rnglists); + break; + + case DW_RLE_start_end: { + uint64_t start = read(rnglists); + uint64_t end = read(rnglists); + if (address >= start && address < end) + { + return true; + } + } + break; + + case DW_RLE_start_length: { + uint64_t start = read(rnglists); + uint64_t end = start + readULEB(rnglists); + if (address >= start && address < end) + { + return true; + } + } + break; + + default: + SAFE_CHECK(false, "Unexpected debug_rnglists entry kind"); + } } - offset += size; } - return getCompilationUnit(info, offset); + return false; } -Dwarf::LineNumberVM::LineNumberVM(std::string_view data, std::string_view compilationDirectory) +Dwarf::LineNumberVM::LineNumberVM( + std::string_view data, + std::string_view compilationDirectory, + std::string_view debugStr, + std::string_view debugLineStr) : compilationDirectory_(compilationDirectory) + , debugStr_(debugStr) + , debugLineStr_(debugLineStr) { Section section(data); SAFE_CHECK(section.next(data_), "invalid line number VM"); @@ -1238,17 +1639,154 @@ void Dwarf::LineNumberVM::reset() discriminator_ = 0; } +struct LineNumberAttribute +{ + uint64_t content_type_code; + uint64_t form_code; + std::variant attr_value; +}; + +LineNumberAttribute readLineNumberAttribute( + bool is64_bit, std::string_view & format, std::string_view & entries, std::string_view debugStr, std::string_view debugLineStr) +{ + uint64_t content_type_code = readULEB(format); + uint64_t form_code = readULEB(format); + std::variant attr_value; + + switch (content_type_code) + { + case DW_LNCT_path: { + switch (form_code) + { + case DW_FORM_string: + attr_value = readNullTerminated(entries); + break; + case DW_FORM_line_strp: { + auto off = readOffset(entries, is64_bit); + attr_value = getStringFromStringSection(debugLineStr, off); + } + break; + case DW_FORM_strp: + attr_value = getStringFromStringSection(debugStr, readOffset(entries, is64_bit)); + break; + case DW_FORM_strp_sup: + SAFE_CHECK(false, "Unexpected DW_FORM_strp_sup"); + break; + default: + SAFE_CHECK(false, "Unexpected form for DW_LNCT_path"); + break; + } + } + break; + + case DW_LNCT_directory_index: { + switch (form_code) + { + case DW_FORM_data1: + attr_value = read(entries); + break; + case DW_FORM_data2: + attr_value = read(entries); + break; + case DW_FORM_udata: + attr_value = readULEB(entries); + break; + default: + SAFE_CHECK(false, "Unexpected form for DW_LNCT_directory_index"); + break; + } + } + break; + + case DW_LNCT_timestamp: { + switch (form_code) + { + case DW_FORM_udata: + attr_value = readULEB(entries); + break; + case DW_FORM_data4: + attr_value = read(entries); + break; + case DW_FORM_data8: + attr_value = read(entries); + break; + case DW_FORM_block: + attr_value = readBytes(entries, readULEB(entries)); + break; + default: + SAFE_CHECK(false, "Unexpected form for DW_LNCT_timestamp"); + } + } + break; + + case DW_LNCT_size: { + switch (form_code) + { + case DW_FORM_udata: + attr_value = readULEB(entries); + break; + case DW_FORM_data1: + attr_value = read(entries); + break; + case DW_FORM_data2: + attr_value = read(entries); + break; + case DW_FORM_data4: + attr_value = read(entries); + break; + case DW_FORM_data8: + attr_value = read(entries); + break; + default: + SAFE_CHECK(false, "Unexpected form for DW_LNCT_size"); + break; + } + } + break; + + case DW_LNCT_MD5: { + switch (form_code) + { + case DW_FORM_data16: + attr_value = readBytes(entries, 16); + break; + default: + SAFE_CHECK(false, "Unexpected form for DW_LNCT_MD5"); + break; + } + } + break; + + default: + // TODO: skip over vendor data as specified by the form instead. + SAFE_CHECK(false, "Unexpected vendor content type code"); + break; + } + return { + .content_type_code = content_type_code, + .form_code = form_code, + .attr_value = attr_value, + }; +} + void Dwarf::LineNumberVM::init() { version_ = read(data_); - SAFE_CHECK(version_ >= 2 && version_ <= 4, "invalid version in line number VM"); + SAFE_CHECK(version_ >= 2 && version_ <= 5, "invalid version in line number VM: {}", version_); + if (version_ == 5) + { + auto address_size = read(data_); + SAFE_CHECK(address_size == sizeof(uintptr_t), "Unexpected Line Number Table address_size"); + auto segment_selector_size = read(data_); + SAFE_CHECK(segment_selector_size == 0, "Segments not supported"); + } uint64_t header_length = readOffset(data_, is64Bit_); SAFE_CHECK(header_length <= data_.size(), "invalid line number VM header length"); std::string_view header(data_.data(), header_length); data_ = std::string_view(header.end(), data_.end() - header.end()); minLength_ = read(header); - if (version_ == 4) + if (version_ >= 4) { // Version 2 and 3 records don't have this uint8_t max_ops_per_instruction = read(header); SAFE_CHECK(max_ops_per_instruction == 1, "VLIW not supported"); @@ -1258,29 +1796,78 @@ void Dwarf::LineNumberVM::init() lineRange_ = read(header); opcodeBase_ = read(header); SAFE_CHECK(opcodeBase_ != 0, "invalid opcode base"); - standardOpcodeLengths_ = reinterpret_cast(header.data()); //-V506 + standardOpcodeLengths_ = reinterpret_cast(header.data()); header.remove_prefix(opcodeBase_ - 1); - // We don't want to use heap, so we don't keep an unbounded amount of state. - // We'll just skip over include directories and file names here, and - // we'll loop again when we actually need to retrieve one. - std::string_view sp; - const char * tmp = header.data(); - includeDirectoryCount_ = 0; - while (!(sp = readNullTerminated(header)).empty()) + if (version_ <= 4) { - ++includeDirectoryCount_; - } - includeDirectories_ = std::string_view(tmp, header.data() - tmp); + // We don't want to use heap, so we don't keep an unbounded amount of state. + // We'll just skip over include directories and file names here, and + // we'll loop again when we actually need to retrieve one. + std::string_view sp; + const char * tmp = header.data(); + v4_.includeDirectoryCount = 0; + while (!(sp = readNullTerminated(header)).empty()) + { + ++v4_.includeDirectoryCount; + } + v4_.includeDirectories = {tmp, header.data()}; - tmp = header.data(); - FileName fn; - fileNameCount_ = 0; - while (readFileName(header, fn)) + tmp = header.data(); + FileName fn; + v4_.fileNameCount = 0; + while (readFileName(header, fn)) + { + ++v4_.fileNameCount; + } + v4_.fileNames = {tmp, header.data()}; + } + else if (version_ == 5) { - ++fileNameCount_; + v5_.directoryEntryFormatCount = read(header); + const char * tmp = header.data(); + for (uint8_t i = 0; i < v5_.directoryEntryFormatCount; i++) + { + // A sequence of directory entry format descriptions. Each description + // consists of a pair of ULEB128 values: + readULEB(header); // A content type code + readULEB(header); // A form code using the attribute form codes + } + v5_.directoryEntryFormat = {tmp, header.data()}; + v5_.directoriesCount = readULEB(header); + tmp = header.data(); + for (uint64_t i = 0; i < v5_.directoriesCount; i++) + { + std::string_view format = v5_.directoryEntryFormat; + for (uint8_t f = 0; f < v5_.directoryEntryFormatCount; f++) + { + readLineNumberAttribute(is64Bit_, format, header, debugStr_, debugLineStr_); + } + } + v5_.directories = {tmp, header.data()}; + + v5_.fileNameEntryFormatCount = read(header); + tmp = header.data(); + for (uint8_t i = 0; i < v5_.fileNameEntryFormatCount; i++) + { + // A sequence of file entry format descriptions. Each description + // consists of a pair of ULEB128 values: + readULEB(header); // A content type code + readULEB(header); // A form code using the attribute form codes + } + v5_.fileNameEntryFormat = {tmp, header.data()}; + v5_.fileNamesCount = readULEB(header); + tmp = header.data(); + for (uint64_t i = 0; i < v5_.fileNamesCount; i++) + { + std::string_view format = v5_.fileNameEntryFormat; + for (uint8_t f = 0; f < v5_.fileNameEntryFormatCount; f++) + { + readLineNumberAttribute(is64Bit_, format, header, debugStr_, debugLineStr_); + } + } + v5_.fileNames = {tmp, header.data()}; } - fileNames_ = std::string_view(tmp, header.data() - tmp); } bool Dwarf::LineNumberVM::next(std::string_view & program) @@ -1296,54 +1883,110 @@ bool Dwarf::LineNumberVM::next(std::string_view & program) Dwarf::LineNumberVM::FileName Dwarf::LineNumberVM::getFileName(uint64_t index) const { - SAFE_CHECK(index != 0, "invalid file index 0"); - - FileName fn; - if (index <= fileNameCount_) + if (version_ <= 4) { - std::string_view file_names = fileNames_; - for (; index; --index) + SAFE_CHECK(index != 0, "invalid file index 0"); + FileName fn; + if (index <= v4_.fileNameCount) { - if (!readFileName(file_names, fn)) + std::string_view file_names = v4_.fileNames; + for (; index; --index) { - abort(); + if (!readFileName(file_names, fn)) + { + abort(); + } } + return fn; } - return fn; - } - index -= fileNameCount_; + index -= v4_.fileNameCount; - std::string_view program = data_; - for (; index; --index) + std::string_view program = data_; + for (; index; --index) + { + SAFE_CHECK(nextDefineFile(program, fn), "invalid file index"); + } + + return fn; + } + else { - SAFE_CHECK(nextDefineFile(program, fn), "invalid file index"); + FileName fn; + SAFE_CHECK(index < v5_.fileNamesCount, "invalid file index"); + std::string_view file_names = v5_.fileNames; + for (uint64_t i = 0; i < v5_.fileNamesCount; i++) + { + std::string_view format = v5_.fileNameEntryFormat; + for (uint8_t f = 0; f < v5_.fileNameEntryFormatCount; f++) + { + auto attr = readLineNumberAttribute(is64Bit_, format, file_names, debugStr_, debugLineStr_); + if (i == index) + { + switch (attr.content_type_code) // NOLINT(bugprone-switch-missing-default-case) + { + case DW_LNCT_path: + fn.relativeName = std::get(attr.attr_value); + break; + case DW_LNCT_directory_index: + fn.directoryIndex = std::get(attr.attr_value); + break; + } + } + } + } + return fn; } - - return fn; } std::string_view Dwarf::LineNumberVM::getIncludeDirectory(uint64_t index) const { - if (index == 0) + if (version_ <= 4) { - return std::string_view(); - } + if (index == 0) + { + // In DWARF <= 4 the current directory is not represented in the + // directories field and a directory index of 0 implicitly referred to + // that directory as found in the DW_AT_comp_dir attribute of the + // compilation unit debugging information entry. + return {}; + } + + SAFE_CHECK(index <= v4_.includeDirectoryCount, "invalid include directory"); - SAFE_CHECK(index <= includeDirectoryCount_, "invalid include directory"); + std::string_view include_directories = v4_.includeDirectories; + std::string_view dir; + for (; index; --index) + { + dir = readNullTerminated(include_directories); + if (dir.empty()) + { + abort(); // BUG + } + } - std::string_view include_directories = includeDirectories_; - std::string_view dir; - for (; index; --index) + return dir; + } + else { - dir = readNullTerminated(include_directories); - if (dir.empty()) + SAFE_CHECK(index < v5_.directoriesCount, "invalid file index"); + std::string_view directories = v5_.directories; + for (uint64_t i = 0; i < v5_.directoriesCount; i++) { - abort(); // BUG + std::string_view format = v5_.directoryEntryFormat; + for (uint8_t f = 0; f < v5_.directoryEntryFormatCount; f++) + { + auto attr = readLineNumberAttribute(is64Bit_, format, directories, debugStr_, debugLineStr_); + if (i == index && attr.content_type_code == DW_LNCT_path) + { + return std::get(attr.attr_value); + } + } } + // This could only happen if DWARF5's directory_entry_format doesn't contain + // a DW_LNCT_path. Highly unlikely, but we shouldn't crash. + return std::string_view(""); } - - return dir; } bool Dwarf::LineNumberVM::readFileName(std::string_view & program, FileName & fn) @@ -1391,6 +2034,7 @@ bool Dwarf::LineNumberVM::nextDefineFile(std::string_view & program, FileName & if (opcode == DW_LNE_define_file) { + SAFE_CHECK(version_ < 5, "DW_LNE_define_file deprecated in DWARF5"); SAFE_CHECK(readFileName(program, fn), "invalid empty file in DW_LNE_define_file"); return true; } @@ -1424,7 +2068,7 @@ Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(std::string_view & pro { // standard opcode // Only interpret opcodes that are recognized by the version we're parsing; // the others are vendor extensions and we should ignore them. - switch (opcode) + switch (opcode) // NOLINT(bugprone-switch-missing-default-case) { case DW_LNS_copy: basicBlock_ = false; @@ -1496,7 +2140,7 @@ Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(std::string_view & pro auto extended_opcode = read(program); --length; - switch (extended_opcode) + switch (extended_opcode) // NOLINT(bugprone-switch-missing-default-case) { case DW_LNE_end_sequence: return END; @@ -1504,6 +2148,7 @@ Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(std::string_view & pro address_ = read(program); return CONTINUE; case DW_LNE_define_file: + SAFE_CHECK(version_ < 5, "DW_LNE_define_file deprecated in DWARF5"); // We can't process DW_LNE_define_file here, as it would require us to // use unbounded amounts of state (ie. use the heap). We'll do a second // pass (using nextDefineFile()) if necessary. @@ -1518,6 +2163,16 @@ Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(std::string_view & pro return CONTINUE; } +Dwarf::Path Dwarf::LineNumberVM::getFullFileName(uint64_t index) const +{ + auto fn = getFileName(index); + // DWARF <= 4: the current dir is not represented in the CU's Line Number + // Program Header and relies on the CU's DW_AT_comp_dir. + // DWARF 5: the current directory is explicitly present. + const std::string_view base_dir = version_ == 5 ? "" : compilationDirectory_; + return Path(base_dir, getIncludeDirectory(fn.directoryIndex), fn.relativeName); +} + bool Dwarf::LineNumberVM::findAddress(uintptr_t target, Path & file, uint64_t & line) { std::string_view program = data_; @@ -1557,12 +2212,18 @@ bool Dwarf::LineNumberVM::findAddress(uintptr_t target, Path & file, uint64_t & // Found it! Note that ">" is indeed correct (not ">="), as each // sequence is guaranteed to have one entry past-the-end (emitted by // DW_LNE_end_sequence) - if (prev_file == 0) + // + // NOTE: In DWARF <= 4 the file register is non-zero. + // See DWARF 4: 6.2.4 The Line Number Program Header + // "The line number program assigns numbers to each of the file + // entries in order, beginning with 1, and uses those numbers instead + // of file names in the file register." + // DWARF 5 has a different include directory/file header and 0 is valid. + if (version_ <= 4 && prev_file == 0) { return false; } - auto fn = getFileName(prev_file); - file = Path(compilationDirectory_, getIncludeDirectory(fn.directoryIndex), fn.relativeName); + file = getFullFileName(prev_file); line = prev_line; return true; } diff --git a/src/Common/Dwarf.h b/src/Common/Dwarf.h index 3fea66c9a97..d262178b8a2 100644 --- a/src/Common/Dwarf.h +++ b/src/Common/Dwarf.h @@ -45,8 +45,8 @@ class Elf; * can parse Debug Information Entries (DIEs), abbreviations, attributes (of * all forms), and we can interpret bytecode for the line number VM. * - * We can interpret DWARF records of version 2, 3, or 4, although we don't - * actually support many of the version 4 features (such as VLIW, multiple + * We can interpret DWARF records of version 2, 3, 4, or 5, although we don't + * actually support many of the features of versions 4 and 5 (such as VLIW, multiple * operations per instruction) * * Note that the DWARF record parser does not allocate heap memory at all. @@ -112,16 +112,16 @@ class Dwarf final // TODO(tudorb): Implement operator==, operator!=; not as easy as it // seems as the same path can be represented in multiple ways private: - std::string_view baseDir_; - std::string_view subDir_; - std::string_view file_; + std::string_view baseDir_; /// NOLINT + std::string_view subDir_; /// NOLINT + std::string_view file_; /// NOLINT }; // Indicates inline function `name` is called at `line@file`. struct CallLocation { Path file = {}; - uint64_t line; + uint64_t line = 0; std::string_view name; }; @@ -171,9 +171,7 @@ class Dwarf final private: static bool findDebugInfoOffset(uintptr_t address, std::string_view aranges, uint64_t & offset); - void init(); - - std::shared_ptr elf_; + std::shared_ptr elf_; /// NOLINT // DWARF section made up of chunks, each prefixed with a length header. // The length indicates whether the chunk is DWARF-32 or DWARF-64, which @@ -182,7 +180,7 @@ class Dwarf final class Section { public: - Section() : is64Bit_(false) {} + Section() : is64_bit(false) {} explicit Section(std::string_view d); @@ -191,19 +189,19 @@ class Dwarf final bool next(std::string_view & chunk); // Is the current chunk 64 bit? - bool is64Bit() const { return is64Bit_; } + bool is64Bit() const { return is64_bit; } private: // Yes, 32- and 64- bit sections may coexist. Yikes! - bool is64Bit_; - std::string_view data_; + bool is64_bit; + std::string_view data; }; // Abbreviation for a Debugging Information Entry. struct DIEAbbreviation { - uint64_t code; - uint64_t tag; + uint64_t code = 0; + uint64_t tag = 0; bool has_children = false; std::string_view attributes; @@ -215,11 +213,11 @@ class Dwarf final // provide a description of a corresponding entity in the source program. struct Die { - bool is64Bit; + bool is64Bit; /// NOLINT // Offset from start to first attribute uint8_t attr_offset; // Offset within debug info. - uint32_t offset; + uint64_t offset; uint64_t code; DIEAbbreviation abbr; }; @@ -228,6 +226,7 @@ class Dwarf final { uint64_t name = 0; uint64_t form = 0; + int64_t implicitConst = 0; // only set when form=DW_FORM_implicit_const explicit operator bool() const { return name != 0 || form != 0; } }; @@ -239,32 +238,45 @@ class Dwarf final std::variant attr_value; }; + enum + { + DW_UT_compile = 0x01, + DW_UT_skeleton = 0x04, + }; + struct CompilationUnit { - bool is64Bit; - uint8_t version; - uint8_t addr_size; + bool is64Bit = false; /// NOLINT + uint8_t version = 0; + uint8_t unit_type = DW_UT_compile; // DW_UT_compile or DW_UT_skeleton + uint8_t addr_size = 0; // Offset in .debug_info of this compilation unit. - uint32_t offset; - uint32_t size; + uint64_t offset = 0; + uint64_t size = 0; // Offset in .debug_info for the first DIE in this compilation unit. - uint32_t first_die; - uint64_t abbrev_offset; + uint64_t first_die = 0; + uint64_t abbrev_offset = 0; + + // The beginning of the CU's contribution to .debug_addr + std::optional addr_base; // DW_AT_addr_base (DWARF 5) + // The beginning of the offsets table (immediately following the + // header) of the CU's contribution to .debug_loclists + std::optional loclists_base; // DW_AT_loclists_base (DWARF 5) + // The beginning of the offsets table (immediately following the + // header) of the CU's contribution to .debug_rnglists + std::optional rnglists_base; // DW_AT_rnglists_base (DWARF 5) + // Points to the first string offset of the compilation unit’s + // contribution to the .debug_str_offsets (or .debug_str_offsets.dwo) section. + std::optional str_offsets_base; // DW_AT_str_offsets_base (DWARF 5) + // Only the CompilationUnit that contains the caller functions needs this cache. // Indexed by (abbr.code - 1) if (abbr.code - 1) < abbrCache.size(); std::vector abbr_cache; }; - static CompilationUnit getCompilationUnit(std::string_view info, uint64_t offset); - - /** cu must exist during the life cycle of created detail::Die. */ + /** cu must exist during the life cycle of created Die. */ Die getDieAtOffset(const CompilationUnit & cu, uint64_t offset) const; - /** - * Find the actual definition DIE instead of declaration for the given die. - */ - Die findDefinitionDie(const CompilationUnit & cu, const Die & die) const; - bool findLocation( uintptr_t address, LocationInfoMode mode, @@ -283,16 +295,16 @@ class Dwarf final class LineNumberVM { public: - LineNumberVM(std::string_view data, std::string_view compilationDirectory); + LineNumberVM( + std::string_view data, + std::string_view compilationDirectory, + std::string_view debugStr, + std::string_view debugLineStr); bool findAddress(uintptr_t target, Path & file, uint64_t & line); /** Gets full file name at given index including directory. */ - Path getFullFileName(uint64_t index) const - { - auto fn = getFileName(index); - return Path({}, getIncludeDirectory(fn.directoryIndex), fn.relativeName); - } + Path getFullFileName(uint64_t index) const; private: void init(); @@ -311,10 +323,10 @@ class Dwarf final struct FileName { - std::string_view relativeName; + std::string_view relativeName; /// NOLINT // 0 = current compilation directory // otherwise, 1-based index in the list of include directories - uint64_t directoryIndex; + uint64_t directoryIndex; /// NOLINT }; // Read one FileName object, remove_prefix program static bool readFileName(std::string_view & program, FileName & fn); @@ -332,37 +344,55 @@ class Dwarf final bool nextDefineFile(std::string_view & program, FileName & fn) const; // Initialization - bool is64Bit_; - std::string_view data_; - std::string_view compilationDirectory_; + bool is64Bit_; /// NOLINT + std::string_view data_; /// NOLINT + std::string_view compilationDirectory_; /// NOLINT + std::string_view debugStr_; // needed for DWARF 5 /// NOLINT + std::string_view debugLineStr_; // DWARF 5 /// NOLINT // Header - uint16_t version_; - uint8_t minLength_; - bool defaultIsStmt_; - int8_t lineBase_; - uint8_t lineRange_; - uint8_t opcodeBase_; - const uint8_t * standardOpcodeLengths_; + uint16_t version_; /// NOLINT + uint8_t minLength_; /// NOLINT + bool defaultIsStmt_; /// NOLINT + int8_t lineBase_; /// NOLINT + uint8_t lineRange_; /// NOLINT + uint8_t opcodeBase_; /// NOLINT + const uint8_t * standardOpcodeLengths_; /// NOLINT + + // 6.2.4 The Line Number Program Header. + struct + { + size_t includeDirectoryCount; + std::string_view includeDirectories; + size_t fileNameCount; + std::string_view fileNames; + } v4_; - std::string_view includeDirectories_; - size_t includeDirectoryCount_; + struct + { + uint8_t directoryEntryFormatCount; + std::string_view directoryEntryFormat; + uint64_t directoriesCount; + std::string_view directories; - std::string_view fileNames_; - size_t fileNameCount_; + uint8_t fileNameEntryFormatCount; + std::string_view fileNameEntryFormat; + uint64_t fileNamesCount; + std::string_view fileNames; + } v5_; // State machine registers - uint64_t address_; - uint64_t file_; - uint64_t line_; - uint64_t column_; - bool isStmt_; - bool basicBlock_; - bool endSequence_; - bool prologueEnd_; - bool epilogueBegin_; - uint64_t isa_; - uint64_t discriminator_; + uint64_t address_; /// NOLINT + uint64_t file_; /// NOLINT + uint64_t line_; /// NOLINT + uint64_t column_; /// NOLINT + bool isStmt_; /// NOLINT + bool basicBlock_; /// NOLINT + bool endSequence_; /// NOLINT + bool prologueEnd_; /// NOLINT + bool epilogueBegin_; /// NOLINT + uint64_t isa_; /// NOLINT + uint64_t discriminator_; /// NOLINT }; /** @@ -402,49 +432,51 @@ class Dwarf final */ size_t forEachAttribute(const CompilationUnit & cu, const Die & die, std::function f) const; - Attribute readAttribute(const Die & die, AttributeSpec spec, std::string_view & info) const; + Attribute readAttribute( + const CompilationUnit & cu, + const Die & die, + AttributeSpec spec, + std::string_view & info) const; // Read one attribute pair, remove_prefix sp; returns <0, 0> at end. static AttributeSpec readAttributeSpec(std::string_view & sp); // Read one attribute value, remove_prefix sp using AttributeValue = std::variant; - AttributeValue readAttributeValue(std::string_view & sp, uint64_t form, bool is64Bit) const; + AttributeValue readAttributeValue(std::string_view & sp, uint64_t form, bool is64_bit) const; // Get an ELF section by name, return true if found - bool getSection(const char * name, std::string_view * section) const; + std::string_view getSection(const char * name) const; - // Get a string from the .debug_str section - std::string_view getStringFromStringSection(uint64_t offset) const; + CompilationUnit getCompilationUnit(uint64_t offset) const; + // Finds the Compilation Unit starting at offset. + CompilationUnit findCompilationUnit(uint64_t targetOffset) const; - template - std::optional getAttribute(const CompilationUnit & cu, const Die & die, uint64_t attr_name) const - { - std::optional result; - forEachAttribute(cu, die, [&](const Attribute & attr) - { - if (attr.spec.name == attr_name) - { - result = std::get(attr.attr_value); - return false; - } - return true; - }); - return result; - } + // Parses an attribute of "reference" form class, i.e. a reference to another DIE. + // Returns the unit containing the target DIE (nullopt if it's in the same unit as the source DIE) + // and the offset of the target DIE (relative to .debug_info, not to unit). + std::optional, uint64_t>> getReferenceAttribute( + const CompilationUnit & cu, const Die & die, uint64_t attr_name) const; // Check if the given address is in the range list at the given offset in .debug_ranges. - bool isAddrInRangeList(uint64_t address, std::optional base_addr, size_t offset, uint8_t addr_size) const; - - // Finds the Compilation Unit starting at offset. - static CompilationUnit findCompilationUnit(std::string_view info, uint64_t targetOffset); - - std::string_view info_; // .debug_info - std::string_view abbrev_; // .debug_abbrev - std::string_view aranges_; // .debug_aranges - std::string_view line_; // .debug_line - std::string_view strings_; // .debug_str - std::string_view ranges_; // .debug_ranges + bool isAddrInRangeList( + const CompilationUnit & cu, + uint64_t address, + std::optional base_addr, + size_t offset, + uint8_t addr_size) const; + + std::string_view abbrev_; // .debug_abbrev /// NOLINT + std::string_view addr_; // .debug_addr (DWARF 5) /// NOLINT + std::string_view aranges_; // .debug_aranges /// NOLINT + std::string_view info_; // .debug_info /// NOLINT + std::string_view line_; // .debug_line /// NOLINT + std::string_view line_str_; // .debug_line_str (DWARF 5) /// NOLINT + std::string_view loclists_; // .debug_loclists (DWARF 5) /// NOLINT + std::string_view ranges_; // .debug_ranges /// NOLINT + std::string_view rnglists_; // .debug_rnglists (DWARF 5) /// NOLINT + std::string_view str_; // .debug_str /// NOLINT + std::string_view str_offsets_; // .debug_str_offsets (DWARF 5) /// NOLINT }; } diff --git a/src/Common/Elf.cpp b/src/Common/Elf.cpp index ee8e01c8c32..2015d61e4dd 100644 --- a/src/Common/Elf.cpp +++ b/src/Common/Elf.cpp @@ -16,19 +16,31 @@ namespace ErrorCodes } -Elf::Elf(const std::string & path) - : in(path, 0) +Elf::Elf(const std::string & path_) { + in.emplace(path_, 0); + init(in->buffer().begin(), in->buffer().size(), path_); +} + +Elf::Elf(const char * data, size_t size, const std::string & path_) +{ + init(data, size, path_); +} + +void Elf::init(const char * data, size_t size, const std::string & path_) +{ + path = path_; + mapped = data; + elf_size = size; + /// Check if it's an elf. - elf_size = in.buffer().size(); if (elf_size < sizeof(ElfEhdr)) - throw Exception("The size of supposedly ELF file is too small", ErrorCodes::CANNOT_PARSE_ELF); + throw Exception(ErrorCodes::CANNOT_PARSE_ELF, "The size of supposedly ELF file '{}' is too small", path); - mapped = in.buffer().begin(); header = reinterpret_cast(mapped); if (memcmp(header->e_ident, "\x7F""ELF", 4) != 0) - throw Exception("The file is not ELF according to magic", ErrorCodes::CANNOT_PARSE_ELF); + throw Exception(ErrorCodes::CANNOT_PARSE_ELF, "The file '{}' is not ELF according to magic", path); /// Get section header. ElfOff section_header_offset = header->e_shoff; @@ -37,7 +49,7 @@ Elf::Elf(const std::string & path) if (!section_header_offset || !section_header_num_entries || section_header_offset + section_header_num_entries * sizeof(ElfShdr) > elf_size) - throw Exception("The ELF is truncated (section header points after end of file)", ErrorCodes::CANNOT_PARSE_ELF); + throw Exception(ErrorCodes::CANNOT_PARSE_ELF, "The ELF '{}' is truncated (section header points after end of file)", path); section_headers = reinterpret_cast(mapped + section_header_offset); @@ -48,11 +60,11 @@ Elf::Elf(const std::string & path) }); if (!section_names_strtab) - throw Exception("The ELF doesn't have string table with section names", ErrorCodes::CANNOT_PARSE_ELF); + throw Exception(ErrorCodes::CANNOT_PARSE_ELF, "The ELF '{}' doesn't have string table with section names", path); ElfOff section_names_offset = section_names_strtab->header.sh_offset; if (section_names_offset >= elf_size) - throw Exception("The ELF is truncated (section names string table points after end of file)", ErrorCodes::CANNOT_PARSE_ELF); + throw Exception(ErrorCodes::CANNOT_PARSE_ELF, "The ELF '{}' is truncated (section names string table points after end of file)", path); section_names = reinterpret_cast(mapped + section_names_offset); @@ -64,7 +76,7 @@ Elf::Elf(const std::string & path) if (!program_header_offset || !program_header_num_entries || program_header_offset + program_header_num_entries * sizeof(ElfPhdr) > elf_size) - throw Exception("The ELF is truncated (program header points after end of file)", ErrorCodes::CANNOT_PARSE_ELF); + throw Exception(ErrorCodes::CANNOT_PARSE_ELF, "The ELF '{}' is truncated (program header points after end of file)", path); program_headers = reinterpret_cast(mapped + program_header_offset); } @@ -119,6 +131,24 @@ std::optional Elf::findSectionByName(const char * name) const String Elf::getBuildID() const { + /// Section headers are the first choice for a debuginfo file + if (String build_id; iterateSections([&build_id](const Section & section, size_t) + { + if (section.header.sh_type == SHT_NOTE) + { + build_id = Elf::getBuildID(section.begin(), section.size()); + if (!build_id.empty()) + { + return true; + } + } + return false; + })) + { + return build_id; + } + + /// fallback to PHDR for (size_t idx = 0; idx < header->e_phnum; ++idx) { const ElfPhdr & phdr = program_headers[idx]; @@ -126,10 +156,16 @@ String Elf::getBuildID() const if (phdr.p_type == PT_NOTE) return getBuildID(mapped + phdr.p_offset, phdr.p_filesz); } + return {}; } - +#if defined(OS_SUNOS) +String Elf::getBuildID(const char * nhdr_pos, size_t size) +{ + return {}; +} +#else String Elf::getBuildID(const char * nhdr_pos, size_t size) { const char * nhdr_end = nhdr_pos + size; @@ -149,11 +185,12 @@ String Elf::getBuildID(const char * nhdr_pos, size_t size) return {}; } +#endif // OS_SUNOS -String Elf::getBinaryHash() const +String Elf::getStoredBinaryHash() const { - if (auto section = findSectionByName(".note.ClickHouse.hash")) + if (auto section = findSectionByName(".clickhouse.hash")) return {section->begin(), section->end()}; else return {}; @@ -163,7 +200,7 @@ String Elf::getBinaryHash() const const char * Elf::Section::name() const { if (!elf.section_names) - throw Exception("Section names are not initialized", ErrorCodes::CANNOT_PARSE_ELF); + throw Exception(ErrorCodes::CANNOT_PARSE_ELF, "Section names are not initialized"); /// TODO buffer overflow is possible, we may need to check strlen. return elf.section_names + header.sh_name; @@ -185,6 +222,7 @@ size_t Elf::Section::size() const return header.sh_size; } + } #endif diff --git a/src/Common/Elf.h b/src/Common/Elf.h index 8297cfdee29..07b75d9bd38 100644 --- a/src/Common/Elf.h +++ b/src/Common/Elf.h @@ -12,13 +12,12 @@ #include -using ElfAddr = ElfW(Addr); -using ElfEhdr = ElfW(Ehdr); -using ElfOff = ElfW(Off); -using ElfPhdr = ElfW(Phdr); -using ElfShdr = ElfW(Shdr); -using ElfNhdr = ElfW(Nhdr); -using ElfSym = ElfW(Sym); +using ElfEhdr = Elf64_Ehdr; +using ElfOff = Elf64_Off; +using ElfPhdr = Elf64_Phdr; +using ElfShdr = Elf64_Shdr; +using ElfNhdr = Elf64_Nhdr; +using ElfSym = Elf64_Sym; namespace RK @@ -44,7 +43,8 @@ class Elf final const Elf & elf; }; - explicit Elf(const std::string & path); + explicit Elf(const std::string & path_); + Elf(const char * data, size_t size, const std::string & path_); bool iterateSections(std::function && pred) const; std::optional
findSection(std::function && pred) const; @@ -54,22 +54,26 @@ class Elf final const char * end() const { return mapped + elf_size; } size_t size() const { return elf_size; } - /// Obtain build id from PT_NOTES section of program headers. Return empty string if does not exist. + /// Obtain build id from SHT_NOTE of section headers (fallback to PT_NOTES section of program headers). + /// Return empty string if does not exist. /// The string is returned in binary. Note that "readelf -n ./clickhouse-server" prints it in hex. String getBuildID() const; static String getBuildID(const char * nhdr_pos, size_t size); /// Hash of the binary for integrity checks. - String getBinaryHash() const; + String getStoredBinaryHash() const; private: - MMapReadBufferFromFile in; + std::string path; // just for error messages + std::optional in; size_t elf_size; const char * mapped; const ElfEhdr * header; const ElfShdr * section_headers; const ElfPhdr * program_headers; const char * section_names = nullptr; + + void init(const char * data, size_t size, const std::string & path_); }; } From 5683bb55fe96dc8eeec578b7ec70e5ce77d27050 Mon Sep 17 00:00:00 2001 From: lzydmxy <13126752315@163.com> Date: Wed, 11 Sep 2024 18:09:48 +0800 Subject: [PATCH 2/2] Fix typos --- src/Common/Dwarf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/Dwarf.cpp b/src/Common/Dwarf.cpp index a6525846e17..78808c594a9 100644 --- a/src/Common/Dwarf.cpp +++ b/src/Common/Dwarf.cpp @@ -708,7 +708,7 @@ Dwarf::Attribute Dwarf::readAttribute(const CompilationUnit & cu, case DW_FORM_ref_sup4: case DW_FORM_ref_sup8: case DW_FORM_strp_sup: - SAFE_CHECK(false, "Unexpected DWARF5 supplimentary object files"); + SAFE_CHECK(false, "Unexpected DWARF5 supplementary object files"); default: SAFE_CHECK(false, "invalid attribute form");