From f41bf8147b797b885d2d843bb351c6660036831f Mon Sep 17 00:00:00 2001 From: Ville Juven Date: Tue, 12 Dec 2023 15:54:37 +0200 Subject: [PATCH 1/2] riscv_pmp.c: Revert LOG2_CEIL back to run-time log2ceil function The macro LOG2_CEIL is intended to be used in the pre-processor phase. If used run-time it will generate a massive amount of extra code (~3.5K) which is a problem, as the PMP configuration is quite often executed from a first stage bootloader with a limited amount of code memory. Code size differences pre- and post: Memory region Used Size Region Size %age Used envm: 112064 B 112384 B 99.72% Memory region Used Size Region Size %age Used envm: 108952 B 112384 B 96.95% --- arch/risc-v/src/common/riscv_pmp.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/arch/risc-v/src/common/riscv_pmp.c b/arch/risc-v/src/common/riscv_pmp.c index 2f63c2596db6d..3a9daa2ec24ee 100644 --- a/arch/risc-v/src/common/riscv_pmp.c +++ b/arch/risc-v/src/common/riscv_pmp.c @@ -32,7 +32,6 @@ #include #include #include -#include #include "riscv_internal.h" @@ -89,6 +88,32 @@ typedef struct pmp_entry_s pmp_entry_t; * Private Functions ****************************************************************************/ +/**************************************************************************** + * Name: log2ceil + * + * Description: + * Calculate the up-rounded power-of-two for input. + * + * Input Parameters: + * x - Argument to calculate the power-of-two from. + * + * Returned Value: + * Power-of-two for argument, rounded up. + * + ****************************************************************************/ + +static uintptr_t log2ceil(uintptr_t x) +{ + uintptr_t pot = 0; + + for (x = x - 1; x; x >>= 1) + { + pot++; + } + + return pot; +} + /**************************************************************************** * Name: pmp_check_region_attrs * @@ -143,7 +168,7 @@ static bool pmp_check_region_attrs(uintptr_t base, uintptr_t size, /* Get the power-of-two for size, rounded up */ - if ((base & ((UINT64_C(1) << LOG2_CEIL(size)) - 1)) != 0) + if ((base & ((UINT64_C(1) << log2ceil(size)) - 1)) != 0) { /* The start address is not properly aligned with size */ From 29c2080ea67029533b6bd78d7fc66b57b30970fb Mon Sep 17 00:00:00 2001 From: Ville Juven Date: Wed, 13 Dec 2023 11:39:42 +0200 Subject: [PATCH 2/2] [REVERTME] riscv/arch_elf.c: Handle PCREL_HI20/LO12_I/S relocations correctly [REVERTME] This patch is a stripped version from upstream, there are too many changes to the changed modules (would need to pull dozens of commits) so only the RISC-V elf linker is updated. There is a problem with the current elf loader for risc-v: when a pair of PCREL_HI20 / LO12 relocations are encountered, it is assumed that these will follow each other immediately, as follows: label: auipc a0, %pcrel_hi(symbol) // R_RISCV_PCREL_HI20 load/store a0, %pcrel_lo(label)(a0) // R_RISCV_PCREL_LO12_I/S With this assumption, the hi/lo relocations are both done when a hi20 relocation entry is encountered, first to the current instruction (addr) and to the next instruction (addr + 4). However, this assumption is wrong. There is nothing in the elf relocation specification[1] that mandates this. Thus, the hi/lo relocation always needs to first fixup the hi-part, and when the lo-part is encountered, it needs to find the corresponding hi relocation entry, via the given "label". This necessitates (re-)visiting the relocation entries for the current section as well as looking for "label" in the symbol table. The NuttX elf loader does not allow such operations to be done in the machine specific part, so this patch fixes the relocation issue by introducing an architecture specific cache for the hi20 relocation and symbol table entries. When a lo12 relocation is encountered, the cache can be consulted to find the hi20 part. [1] https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc --- arch/risc-v/include/elf.h | 21 ++++ binfmt/libelf/libelf_bind.c | 21 +++- include/nuttx/elf.h | 6 +- libs/libc/machine/risc-v/arch_elf.c | 170 ++++++++++++++++++++++++---- 4 files changed, 189 insertions(+), 29 deletions(-) diff --git a/arch/risc-v/include/elf.h b/arch/risc-v/include/elf.h index 6fd222b6feebc..2eb6270fb1258 100644 --- a/arch/risc-v/include/elf.h +++ b/arch/risc-v/include/elf.h @@ -83,4 +83,25 @@ #define R_RISCV_SET32 56 #define R_RISCV_32_PCREL 57 +#define ARCH_ELFDATA 1 +#define ARCH_ELF_RELCNT 8 + +/**************************************************************************** + * Public Types + ****************************************************************************/ + +#ifndef __ASSEMBLY__ + +struct arch_elfdata_s +{ + struct hi20_rels_s + { + uintptr_t hi20_rel; + uintptr_t hi20_offset; + } + hi20_rels[ARCH_ELF_RELCNT]; +}; +typedef struct arch_elfdata_s arch_elfdata_t; + +#endif /* __ASSEMBLY__ */ #endif /* __ARCH_RISCV_INCLUDE_ELF_H */ diff --git a/binfmt/libelf/libelf_bind.c b/binfmt/libelf/libelf_bind.c index 564e53f0e1f1c..d6a26305e0047 100644 --- a/binfmt/libelf/libelf_bind.c +++ b/binfmt/libelf/libelf_bind.c @@ -55,6 +55,15 @@ # define elf_dumpbuffer(m,b,n) #endif +#ifdef ARCH_ELFDATA +# define ARCH_ELFDATA_DEF arch_elfdata_t arch_data; \ + memset(&arch_data, 0, sizeof(arch_elfdata_t)) +# define ARCH_ELFDATA_PARM &arch_data +#else +# define ARCH_ELFDATA_DEF +# define ARCH_ELFDATA_PARM NULL +#endif + /**************************************************************************** * Private Types ****************************************************************************/ @@ -185,6 +194,10 @@ static int elf_relocate(FAR struct elf_loadinfo_s *loadinfo, int relidx, int i; int j; + /* Define potential architecture specific elf data container */ + + ARCH_ELFDATA_DEF; + rels = kmm_malloc(CONFIG_ELF_RELOCATION_BUFFERCOUNT * sizeof(Elf_Rel)); if (rels == NULL) { @@ -334,7 +347,7 @@ static int elf_relocate(FAR struct elf_loadinfo_s *loadinfo, int relidx, /* Now perform the architecture-specific relocation */ - ret = up_relocate(rel, sym, addr); + ret = up_relocate(rel, sym, addr, ARCH_ELFDATA_PARM); if (ret < 0) { berr("ERROR: Section %d reloc %d: Relocation failed: %d\n", @@ -370,6 +383,10 @@ static int elf_relocateadd(FAR struct elf_loadinfo_s *loadinfo, int relidx, int i; int j; + /* Define potential architecture specific elf data container */ + + ARCH_ELFDATA_DEF; + relas = kmm_malloc(CONFIG_ELF_RELOCATION_BUFFERCOUNT * sizeof(Elf_Rela)); if (relas == NULL) { @@ -519,7 +536,7 @@ static int elf_relocateadd(FAR struct elf_loadinfo_s *loadinfo, int relidx, /* Now perform the architecture-specific relocation */ - ret = up_relocateadd(rela, sym, addr); + ret = up_relocateadd(rela, sym, addr, ARCH_ELFDATA_PARM); if (ret < 0) { berr("ERROR: Section %d reloc %d: Relocation failed: %d\n", diff --git a/include/nuttx/elf.h b/include/nuttx/elf.h index c0f8d9fffcba4..c7ba1337a3618 100644 --- a/include/nuttx/elf.h +++ b/include/nuttx/elf.h @@ -142,9 +142,9 @@ bool up_checkarch(FAR const Elf_Ehdr *hdr); #ifdef CONFIG_LIBC_ARCH_ELF int up_relocate(FAR const Elf_Rel *rel, FAR const Elf_Sym *sym, - uintptr_t addr); -int up_relocateadd(FAR const Elf_Rela *rel, - FAR const Elf_Sym *sym, uintptr_t addr); + uintptr_t addr, FAR void *arch_data); +int up_relocateadd(FAR const Elf_Rela *rel, FAR const Elf_Sym *sym, + uintptr_t addr, FAR void *arch_data); #endif /**************************************************************************** diff --git a/libs/libc/machine/risc-v/arch_elf.c b/libs/libc/machine/risc-v/arch_elf.c index 96b5e1a4a1586..b559230d23aeb 100644 --- a/libs/libc/machine/risc-v/arch_elf.c +++ b/libs/libc/machine/risc-v/arch_elf.c @@ -178,6 +178,84 @@ static void _calc_imm(long offset, long *imm_hi, long *imm_lo) *imm_hi = hi; } +/**************************************************************************** + * Name: _add_hi20 + * + * Description: + * Add PCREL_HI20 relocation offset to the LUT. When a PCREL_LO12_I/_S is + * encountered, the corresponding PCREL_HI20 value can be found from it. + * + * Input Parameters: + * arch_data - Where the PCREL_HI20 relocations are listed. + * hi20_rel - The PCREL_HI20 relocation entry. + * hi20_offset - The corresponding offset value. + * + * Returned Value: + * None. + * + ****************************************************************************/ + +static void _add_hi20(void *arch_data, uintptr_t hi20_rel, + uintptr_t hi20_offset) +{ + arch_elfdata_t *data = (arch_elfdata_t *)arch_data; + int i; + + /* Try to find a free slot from the list */ + + for (i = 0; i < ARCH_ELF_RELCNT; i++) + { + struct hi20_rels_s *hi20 = &data->hi20_rels[i]; + + if (hi20->hi20_rel == 0) + { + hi20->hi20_rel = hi20_rel; + hi20->hi20_offset = hi20_offset; + break; + } + } +} + +/**************************************************************************** + * Name: _find_hi20 + * + * Description: + * Find PCREL_HI20 relocation offset from the LUT. When a PCREL_LO12_I/_S + * is encountered, the corresponding PCREL_HI20 value is needed to do the + * relocation. + * + * Input Parameters: + * arch_data - Where the PCREL_HI20 relocations are listed. + * hi20_rel - The PCREL_HI20 relocation entry. + * + * Returned Value: + * The corresponding hi20_offset value. + * + ****************************************************************************/ + +static uintptr_t _find_hi20(void *arch_data, uintptr_t hi20_rel) +{ + arch_elfdata_t *data = (arch_elfdata_t *)arch_data; + int i; + + /* Try to find the hi20 value from the list */ + + for (i = 0; i < ARCH_ELF_RELCNT; i++) + { + struct hi20_rels_s *hi20 = &data->hi20_rels[i]; + + if (hi20->hi20_rel == hi20_rel) + { + /* Found it, we can clear the entry now */ + + hi20->hi20_rel = 0; + return hi20->hi20_offset; + } + } + + return 0; +} + /**************************************************************************** * Public Functions ****************************************************************************/ @@ -266,14 +344,15 @@ bool up_checkarch(const Elf_Ehdr *ehdr) * ****************************************************************************/ -int up_relocate(const Elf_Rel *rel, const Elf_Sym *sym, uintptr_t addr) +int up_relocate(const Elf_Rel *rel, const Elf_Sym *sym, uintptr_t addr, + void *arch_data) { berr("Not implemented\n"); return -ENOSYS; } int up_relocateadd(const Elf_Rela *rel, const Elf_Sym *sym, - uintptr_t addr) + uintptr_t addr, void *arch_data) { long offset; unsigned int relotype; @@ -317,58 +396,101 @@ int up_relocateadd(const Elf_Rela *rel, const Elf_Sym *sym, break; case R_RISCV_PCREL_LO12_I: - case R_RISCV_PCREL_LO12_S: { + long imm_hi; + long imm_lo; + binfo("%s at %08" PRIxPTR " [%08" PRIx32 "] " "to sym=%p st_value=%08lx\n", _get_rname(relotype), addr, _get_val((uint16_t *)addr), sym, sym->st_value); - /* NOTE: imm value for mv has been adjusted in previous HI20 */ + offset = _find_hi20(arch_data, sym->st_value); + + /* Adjust imm for MV(ADDI) / JR (JALR) : I-type */ + + _calc_imm(offset, &imm_hi, &imm_lo); + + _add_val((uint16_t *)addr, (int32_t)imm_lo << 20); } break; - case R_RISCV_PCREL_HI20: - case R_RISCV_CALL: - case R_RISCV_CALL_PLT: + case R_RISCV_PCREL_LO12_S: { + uint32_t val; + long imm_hi; + long imm_lo; + binfo("%s at %08" PRIxPTR " [%08" PRIx32 "] " "to sym=%p st_value=%08lx\n", _get_rname(relotype), addr, _get_val((uint16_t *)addr), sym, sym->st_value); - offset = (long)sym->st_value + (long)rel->r_addend - (long)addr; + offset = _find_hi20(arch_data, sym->st_value); + + /* Adjust imm for SW : S-type */ + + _calc_imm(offset, &imm_hi, &imm_lo); + + val = (((int32_t)imm_lo >> 5) << 25) + + (((int32_t)imm_lo & 0x1f) << 7); + binfo("imm_lo=%ld (%lx), val=%" PRIx32 "\n", imm_lo, imm_lo, val); + + _add_val((uint16_t *)addr, val); + } + break; + + case R_RISCV_PCREL_HI20: + { long imm_hi; long imm_lo; + binfo("%s at %08" PRIxPTR " [%08" PRIx32 "] " + "to sym=%p st_value=%08lx\n", + _get_rname(relotype), + addr, _get_val((uint16_t *)addr), + sym, sym->st_value); + + offset = (long)sym->st_value + (long)rel->r_addend - (long)addr; + _calc_imm(offset, &imm_hi, &imm_lo); /* Adjust auipc (add upper immediate to pc) : 20bit */ - _add_val((uint16_t *)addr, (imm_hi << 12)); + _add_val((uint16_t *)addr, imm_hi << 12); - if ((_get_val((uint16_t *)(addr + 4)) & 0x7f) == OPCODE_SW) - { - /* Adjust imm for SW : S-type */ + /* Add the hi20 value to the cache */ - uint32_t val = - (((int32_t)imm_lo >> 5) << 25) + - (((int32_t)imm_lo & 0x1f) << 7); + _add_hi20(arch_data, addr, offset); + } + break; + + case R_RISCV_CALL: + case R_RISCV_CALL_PLT: + { + long imm_hi; + long imm_lo; + + binfo("%s at %08" PRIxPTR " [%08" PRIx32 "] " + "to sym=%p st_value=%08lx\n", + _get_rname(relotype), + addr, _get_val((uint16_t *)addr), + sym, sym->st_value); + + offset = (long)sym->st_value + (long)rel->r_addend - (long)addr; + + _calc_imm(offset, &imm_hi, &imm_lo); + + /* Adjust auipc (add upper immediate to pc) : 20bit */ - binfo("imm_lo=%ld (%lx), val=%" PRIx32 "\n", - imm_lo, imm_lo, val); + _add_val((uint16_t *)addr, imm_hi << 12); - _add_val((uint16_t *)(addr + 4), val); - } - else - { - /* Adjust imm for MV(ADDI)/JALR : I-type */ + /* Adjust imm for CALL (JALR) : I-type */ - _add_val((uint16_t *)(addr + 4), ((int32_t)imm_lo << 20)); - } + _add_val((uint16_t *)(addr + 4), (int32_t)imm_lo << 20); } break;