From 25d215dc7b8d6274b5b50f4dff3fa10aa13de7db Mon Sep 17 00:00:00 2001 From: thekovic <72971433+thekovic@users.noreply.github.com> Date: Sat, 16 Nov 2024 15:23:30 +0100 Subject: [PATCH] ipl3: Introduce separate stage3 module More clearly separates stage2, which runs from RDRAM, from stage3, which runs directly from ROM. Less code is copied to RDRAM during stage1. --- boot/Makefile | 2 +- boot/cleanup.c | 92 ++++++++++++++++++++++++++++++++++++++++++++ boot/ipl3_compat.ld | 3 +- boot/ipl3_dev.ld | 1 + boot/ipl3_prod.ld | 3 +- boot/loader.c | 65 +------------------------------ boot/loader_compat.c | 30 +-------------- 7 files changed, 98 insertions(+), 98 deletions(-) create mode 100644 boot/cleanup.c diff --git a/boot/Makefile b/boot/Makefile index 5f0a697133..c3a9e7f632 100644 --- a/boot/Makefile +++ b/boot/Makefile @@ -36,7 +36,7 @@ N64_ASFLAGS += -mabi=32 -mgp32 -mfp32 -msingle-float -G0 N64_RSPASFLAGS = -march=mips1 -mabi=32 -Wa,--fatal-warnings N64_LDFLAGS = -Wl,-T$(IPL3_LDSCRIPT) -Wl,-Map=build/ipl3.map -Wl,--gc-sections -OBJS = build/ipl3.o build/minidragon.o build/rdram.o build/ique_trampoline.o build/entropy.o +OBJS = build/ipl3.o build/minidragon.o build/rdram.o build/ique_trampoline.o build/entropy.o build/cleanup.o ifeq ($(PROD),1) OBJS += build/loader.o diff --git a/boot/cleanup.c b/boot/cleanup.c new file mode 100644 index 0000000000..8bdf96e59e --- /dev/null +++ b/boot/cleanup.c @@ -0,0 +1,92 @@ +/** + * @file cleanup.c + * @brief IPL3: Stage 3 (Cleanup) + * + * This module implements the third and final stage of the loader, + * which is responsible for cleaning up after the previous stages. + * It runs directly from ROM so that we are free to clean up our breadcrumbs + * in both DMEM and RDRAM. + * + * This stage runs from "high RDRAM", that is, it is placed at the end of RDRAM. + * The code is compiled to be relocatable via a trick in the Makefile, so that + * it can be placed at dynamic addresses (though normally only two would be + * possible: either near 4 MiB or 8 MiB). + * + * The tasks performed by this stage are: + * + * * Notify the PIF that the boot process is finished (in COMPAT mode, + * this is skipped because the game is expected to do it instead). + * * Clear DMEM except the boot flags area (in COMPAT mode, all of DMEM is cleared). + * * Jump to the entrypoint. + */ + +#include "minidragon.h" +#include "loader.h" + +// Inform PIF that the boot process is finished. If this is not written, +// the PIF will halt the CPU after 5 seconds. This is not done by official +// IPL3 but rather left to the game to do, but for our open source IPL3, +// it seems better to leave it to the IPL3. +static inline void pif_terminate_boot(void) +{ + si_write(0x7FC, 0x8); +} + +// This is the last stage of IPL3. It runs directly from ROM so that we are +// free of cleaning up our breadcrumbs in both DMEM and RDRAM. +__attribute__((far, noreturn)) +void stage3(uint32_t entrypoint) +{ +#ifndef COMPAT + // Notify the PIF that the boot process is finished. This will take a while + // so start it in background. + pif_terminate_boot(); + + // Read memory size from boot flags + int memsize = *(volatile uint32_t*) 0xA4000000; +#else + int memsize = *(volatile uint32_t*) 0x80000318; +#endif + + // Reset the CPU cache, so that the application starts from a pristine state + cop0_clear_cache(); + + // Clear the reserved portion of RDRAM. To create a SP_WR_LEN value that works, + // we assume the reserved size is a multiple of 1024. It can be made to work + // also with other sizes, but this code will need to be adjusted. + while (*SP_DMA_FULL) {} + *SP_RSP_ADDR = 0xA4001000; + *SP_DRAM_ADDR = memsize - TOTAL_RESERVED_SIZE; + _Static_assert((TOTAL_RESERVED_SIZE % 1024) == 0, "TOTAL_RESERVED_SIZE must be multiple of 1024"); + *SP_WR_LEN = (((TOTAL_RESERVED_SIZE >> 10) - 1) << 12) | (1024-1); + + // Clear DMEM (leave only the boot flags area intact). Notice that we can't + // call debugf anymore after this, because a small piece of debugging code + // (io_write) is in DMEM, so it can't be used anymore. + while (*SP_DMA_FULL) {} + *SP_DRAM_ADDR = 0x00802000; // Area > 8 MiB which is guaranteed to be empty +#ifndef COMPAT + *SP_RSP_ADDR = 0xA4000010; + *SP_RD_LEN = 4096-16-1; + + // Wait until the PIF is done. This will also clear the interrupt, so that + // we don't leave the interrupt pending when we go to the entrypoint. + si_wait(); +#else + *SP_RSP_ADDR = 0xA4000000; + *SP_RD_LEN = 4096-1; +#endif + + // RSP DMA is guaranteed to be finished by now because stage3 is running from + // ROM and it's very slow. Anyway, let's just wait to avoid bugs in the future, + // because we don't want to begin using the stack (at the end of RDRAM) before it's finished. + while (*SP_DMA_BUSY) {} + +#ifndef COMPAT + // Configure SP at the end of RDRAM. This is a good default in general, + // then of course userspace code is free to reconfigure it. + asm ("move $sp, %0" : : "r" (0x80000000 + memsize - 0x10)); +#endif + + goto *(void*)entrypoint; +} diff --git a/boot/ipl3_compat.ld b/boot/ipl3_compat.ld index 5cae0be81f..2db31dcab1 100644 --- a/boot/ipl3_compat.ld +++ b/boot/ipl3_compat.ld @@ -26,8 +26,6 @@ SECTIONS { } > dmem .text.stage23 0xB0000000 + SIZEOF(.text.dmem) : AT ( 0xA4000000 + SIZEOF(.text.dmem) ) { - KEEP(build/loader_compat.o(.text.stage3)) - . = ALIGN(8); __stage2_start = .; LONG(__stage2_end - __stage2_start); LONG(0); @@ -36,6 +34,7 @@ SECTIONS { KEEP(build/loader_compat.o(.data .data.*)) . = ALIGN(8); __stage2_end = .; + KEEP(build/cleanup.o(.text .text.* .data .rodata .rodata.*)) *(.bss) } > rom diff --git a/boot/ipl3_dev.ld b/boot/ipl3_dev.ld index 6909f5308b..355fc94a5f 100644 --- a/boot/ipl3_dev.ld +++ b/boot/ipl3_dev.ld @@ -41,6 +41,7 @@ SECTIONS { KEEP(build/loader.o(.data .data.*)) . = ALIGN(8); __stage2_end = .; + KEEP(build/cleanup.o(.text .text.* .data .rodata .rodata.*)) build/debug.o (.text .text.* .data) *(.rodata) *(.rodata.*) diff --git a/boot/ipl3_prod.ld b/boot/ipl3_prod.ld index 4fd8259db4..5275770fef 100644 --- a/boot/ipl3_prod.ld +++ b/boot/ipl3_prod.ld @@ -36,8 +36,6 @@ SECTIONS { KEEP(*(.text.ique_trampoline)) } .text.stage23 0xB0001040 : AT ( 0xA4001040 ) { - KEEP(build/loader.o(.text.stage3)) - . = ALIGN(8); __stage2_start = .; LONG(__stage2_end - __stage2_start); LONG(0); @@ -46,5 +44,6 @@ SECTIONS { KEEP(build/loader.o(.data .data.*)) . = ALIGN(8); __stage2_end = .; + KEEP(build/cleanup.o(.text .text.* .data .rodata .rodata.*)) } > rom } diff --git a/boot/loader.c b/boot/loader.c index abb80f32b9..40abd296c9 100644 --- a/boot/loader.c +++ b/boot/loader.c @@ -4,8 +4,7 @@ * @brief IPL3: Stage 2 (ELF loader) * * This module implements the second stage of the loader, which is responsible - * of searching and loading the ELF file embedded in the ROM, and jumping - * to the entrypoint. + * of searching and loading the ELF file embedded in the ROM. * * This stage runs from "high RDRAM", that is, it is placed at the end of RDRAM. * The code is compiled to be relocatable via a trick in the Makefile, so that @@ -20,9 +19,6 @@ * stored in the ELF file itself). * * Reset the RCP hardware (SP, DP, MI, PI, SI, AI). * * Finalize the entropy accumulator and store it in the boot flags. - * * Notify the PIF that the boot process is finished. - * * Clear DMEM (except the boot flags area). - * * Jump to the entrypoint. */ #include "loader.h" #include "minidragon.h" @@ -50,8 +46,6 @@ // Stage 1 functions we want to reuse __attribute__((far)) extern void rsp_bzero_async(uint32_t rdram, int size); -__attribute__((far)) -extern void cop0_clear_cache(void); __attribute__((far, noreturn)) void stage3(uint32_t entrypoint); @@ -102,15 +96,6 @@ static void fast_bzero_range(void *mem, void *mem_end) rsp_bzero_async((uint32_t)mem, size); } -static void pif_terminate_boot(void) -{ - // Inform PIF that the boot process is finished. If this is not written, - // the PIF will halt the CPU after 5 seconds. This is not done by official - // IPL3 but rather left to the game to do, but for our open source IPL3, - // it seems better to leave it to the IPL3. - si_write(0x7FC, 0x8); -} - static const unsigned char font[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xa1, 0x99, 0x85, 0x7e, 0x84, 0x82, 0xff, 0x80, 0x80, 0xc1, 0xa1, @@ -376,51 +361,3 @@ void stage2(void) // Jump to the ROM finish function stage3(entrypoint); } - -// This is the last stage of IPL3. It runs directly from ROM so that we are -// free of cleaning up our breadcrumbs in both DMEM and RDRAM. -__attribute__((far, noreturn)) -void stage3(uint32_t entrypoint) -{ - // Notify the PIF that the boot process is finished. This will take a while - // so start it in background. - pif_terminate_boot(); - - // Reset the CPU cache, so that the application starts from a pristine state - cop0_clear_cache(); - - // Read memory size from boot flags - int memsize = *(volatile uint32_t*)0xA4000000; - - // Clear the reserved portion of RDRAM. To create a SP_WR_LEN value that works, - // we assume the reserved size is a multiple of 1024. It can be made to work - // also with other sizes, but this code will need to be adjusted. - while (*SP_DMA_FULL) {} - *SP_RSP_ADDR = 0xA4001000; - *SP_DRAM_ADDR = memsize - TOTAL_RESERVED_SIZE; - _Static_assert((TOTAL_RESERVED_SIZE % 1024) == 0, "TOTAL_RESERVED_SIZE must be multiple of 1024"); - *SP_WR_LEN = (((TOTAL_RESERVED_SIZE >> 10) - 1) << 12) | (1024-1); - - // Clear DMEM (leave only the boot flags area intact). Notice that we can't - // call debugf anymore after this, because a small piece of debugging code - // (io_write) is in DMEM, so it can't be used anymore. - while (*SP_DMA_FULL) {} - *SP_RSP_ADDR = 0xA4000010; - *SP_DRAM_ADDR = 0x00802000; // Area > 8 MiB which is guaranteed to be empty - *SP_RD_LEN = 4096-16-1; - - // Wait until the PIF is done. This will also clear the interrupt, so that - // we don't leave the interrupt pending when we go to the entrypoint. - si_wait(); - - // RSP DMA is guaranteed to be finished by now because stage3 is running from - // ROM and it's very slow. Anyway, let's just wait to avoid bugs in the future, - // because we don't want to begin using the stack (at the end of RDRAM) before it's finished. - while (*SP_DMA_BUSY) {} - - // Configure SP at the end of RDRAM. This is a good default in general, - // then of course userspace code is free to reconfigure it. - asm ("move $sp, %0" : : "r" (0x80000000 + memsize - 0x10)); - - goto *(void*)entrypoint; -} diff --git a/boot/loader_compat.c b/boot/loader_compat.c index 52298f1722..a84d58b5ac 100644 --- a/boot/loader_compat.c +++ b/boot/loader_compat.c @@ -5,7 +5,7 @@ * * This module implements the "compatibility" version of the second stage of * the loader, which is responsible for loading a flat binary from a fixed - * ROM address and jumping to it. + * ROM address. */ #include "minidragon.h" #include "loader.h" @@ -50,31 +50,3 @@ void stage2(void) // Jump to the ROM finish function stage3(entrypoint); } - -// This is the last stage of IPL3. It runs directly from ROM so that we are -// free of cleaning up our breadcrumbs in both DMEM and RDRAM. -__attribute__((far, noreturn)) -void stage3(uint32_t entrypoint) -{ - // Read memory size from boot flags - int memsize = *(volatile uint32_t*)0x80000318; - - // Reset the CPU cache, so that the application starts from a pristine state - cop0_clear_cache(); - - // Clear DMEM and RDRAM stage 2 area - while (*SP_DMA_FULL) {} - *SP_RSP_ADDR = 0xA4001000; - *SP_DRAM_ADDR = memsize - TOTAL_RESERVED_SIZE; - _Static_assert((TOTAL_RESERVED_SIZE % 1024) == 0, "TOTAL_RESERVED_SIZE must be multiple of 1024"); - *SP_WR_LEN = (((TOTAL_RESERVED_SIZE >> 10) - 1) << 12) | (1024-1); - while (*SP_DMA_FULL) {} - *SP_RSP_ADDR = 0xA4000000; - *SP_DRAM_ADDR = 0x00802000; // Area > 8 MiB which is guaranteed to be empty - *SP_RD_LEN = 4096-1; - - // Wait until the DMA is done - while (*SP_DMA_BUSY) {} - - goto *(void*)entrypoint; -}