Skip to content

Commit

Permalink
ipl3: Introduce separate stage3 module
Browse files Browse the repository at this point in the history
More clearly separates stage2, which runs from RDRAM, from stage3, which runs directly from ROM. Less code is copied to RDRAM during stage1.
  • Loading branch information
thekovic committed Nov 16, 2024
1 parent a2c2f9f commit 25d215d
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 98 deletions.
2 changes: 1 addition & 1 deletion boot/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ N64_ASFLAGS += -mabi=32 -mgp32 -mfp32 -msingle-float -G0
N64_RSPASFLAGS = -march=mips1 -mabi=32 -Wa,--fatal-warnings
N64_LDFLAGS = -Wl,-T$(IPL3_LDSCRIPT) -Wl,-Map=build/ipl3.map -Wl,--gc-sections

OBJS = build/ipl3.o build/minidragon.o build/rdram.o build/ique_trampoline.o build/entropy.o
OBJS = build/ipl3.o build/minidragon.o build/rdram.o build/ique_trampoline.o build/entropy.o build/cleanup.o

ifeq ($(PROD),1)
OBJS += build/loader.o
Expand Down
92 changes: 92 additions & 0 deletions boot/cleanup.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/**
* @file cleanup.c
* @brief IPL3: Stage 3 (Cleanup)
*
* This module implements the third and final stage of the loader,
* which is responsible for cleaning up after the previous stages.
* It runs directly from ROM so that we are free to clean up our breadcrumbs
* in both DMEM and RDRAM.
*
* This stage runs from "high RDRAM", that is, it is placed at the end of RDRAM.
* The code is compiled to be relocatable via a trick in the Makefile, so that
* it can be placed at dynamic addresses (though normally only two would be
* possible: either near 4 MiB or 8 MiB).
*
* The tasks performed by this stage are:
*
* * Notify the PIF that the boot process is finished (in COMPAT mode,
* this is skipped because the game is expected to do it instead).
* * Clear DMEM except the boot flags area (in COMPAT mode, all of DMEM is cleared).
* * Jump to the entrypoint.
*/

#include "minidragon.h"
#include "loader.h"

// Inform PIF that the boot process is finished. If this is not written,
// the PIF will halt the CPU after 5 seconds. This is not done by official
// IPL3 but rather left to the game to do, but for our open source IPL3,
// it seems better to leave it to the IPL3.
static inline void pif_terminate_boot(void)
{
si_write(0x7FC, 0x8);
}

// This is the last stage of IPL3. It runs directly from ROM so that we are
// free of cleaning up our breadcrumbs in both DMEM and RDRAM.
__attribute__((far, noreturn))
void stage3(uint32_t entrypoint)
{
#ifndef COMPAT
// Notify the PIF that the boot process is finished. This will take a while
// so start it in background.
pif_terminate_boot();

// Read memory size from boot flags
int memsize = *(volatile uint32_t*) 0xA4000000;
#else
int memsize = *(volatile uint32_t*) 0x80000318;
#endif

// Reset the CPU cache, so that the application starts from a pristine state
cop0_clear_cache();

// Clear the reserved portion of RDRAM. To create a SP_WR_LEN value that works,
// we assume the reserved size is a multiple of 1024. It can be made to work
// also with other sizes, but this code will need to be adjusted.
while (*SP_DMA_FULL) {}
*SP_RSP_ADDR = 0xA4001000;
*SP_DRAM_ADDR = memsize - TOTAL_RESERVED_SIZE;
_Static_assert((TOTAL_RESERVED_SIZE % 1024) == 0, "TOTAL_RESERVED_SIZE must be multiple of 1024");
*SP_WR_LEN = (((TOTAL_RESERVED_SIZE >> 10) - 1) << 12) | (1024-1);

// Clear DMEM (leave only the boot flags area intact). Notice that we can't
// call debugf anymore after this, because a small piece of debugging code
// (io_write) is in DMEM, so it can't be used anymore.
while (*SP_DMA_FULL) {}
*SP_DRAM_ADDR = 0x00802000; // Area > 8 MiB which is guaranteed to be empty
#ifndef COMPAT
*SP_RSP_ADDR = 0xA4000010;
*SP_RD_LEN = 4096-16-1;

// Wait until the PIF is done. This will also clear the interrupt, so that
// we don't leave the interrupt pending when we go to the entrypoint.
si_wait();
#else
*SP_RSP_ADDR = 0xA4000000;
*SP_RD_LEN = 4096-1;
#endif

// RSP DMA is guaranteed to be finished by now because stage3 is running from
// ROM and it's very slow. Anyway, let's just wait to avoid bugs in the future,
// because we don't want to begin using the stack (at the end of RDRAM) before it's finished.
while (*SP_DMA_BUSY) {}

#ifndef COMPAT
// Configure SP at the end of RDRAM. This is a good default in general,
// then of course userspace code is free to reconfigure it.
asm ("move $sp, %0" : : "r" (0x80000000 + memsize - 0x10));
#endif

goto *(void*)entrypoint;
}
3 changes: 1 addition & 2 deletions boot/ipl3_compat.ld
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ SECTIONS {
} > dmem

.text.stage23 0xB0000000 + SIZEOF(.text.dmem) : AT ( 0xA4000000 + SIZEOF(.text.dmem) ) {
KEEP(build/loader_compat.o(.text.stage3))
. = ALIGN(8);
__stage2_start = .;
LONG(__stage2_end - __stage2_start);
LONG(0);
Expand All @@ -36,6 +34,7 @@ SECTIONS {
KEEP(build/loader_compat.o(.data .data.*))
. = ALIGN(8);
__stage2_end = .;
KEEP(build/cleanup.o(.text .text.* .data .rodata .rodata.*))
*(.bss)
} > rom

Expand Down
1 change: 1 addition & 0 deletions boot/ipl3_dev.ld
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ SECTIONS {
KEEP(build/loader.o(.data .data.*))
. = ALIGN(8);
__stage2_end = .;
KEEP(build/cleanup.o(.text .text.* .data .rodata .rodata.*))
build/debug.o (.text .text.* .data)
*(.rodata)
*(.rodata.*)
Expand Down
3 changes: 1 addition & 2 deletions boot/ipl3_prod.ld
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ SECTIONS {
KEEP(*(.text.ique_trampoline))
}
.text.stage23 0xB0001040 : AT ( 0xA4001040 ) {
KEEP(build/loader.o(.text.stage3))
. = ALIGN(8);
__stage2_start = .;
LONG(__stage2_end - __stage2_start);
LONG(0);
Expand All @@ -46,5 +44,6 @@ SECTIONS {
KEEP(build/loader.o(.data .data.*))
. = ALIGN(8);
__stage2_end = .;
KEEP(build/cleanup.o(.text .text.* .data .rodata .rodata.*))
} > rom
}
65 changes: 1 addition & 64 deletions boot/loader.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
* @brief IPL3: Stage 2 (ELF loader)
*
* This module implements the second stage of the loader, which is responsible
* of searching and loading the ELF file embedded in the ROM, and jumping
* to the entrypoint.
* of searching and loading the ELF file embedded in the ROM.
*
* This stage runs from "high RDRAM", that is, it is placed at the end of RDRAM.
* The code is compiled to be relocatable via a trick in the Makefile, so that
Expand All @@ -20,9 +19,6 @@
* stored in the ELF file itself).
* * Reset the RCP hardware (SP, DP, MI, PI, SI, AI).
* * Finalize the entropy accumulator and store it in the boot flags.
* * Notify the PIF that the boot process is finished.
* * Clear DMEM (except the boot flags area).
* * Jump to the entrypoint.
*/
#include "loader.h"
#include "minidragon.h"
Expand Down Expand Up @@ -50,8 +46,6 @@
// Stage 1 functions we want to reuse
__attribute__((far))
extern void rsp_bzero_async(uint32_t rdram, int size);
__attribute__((far))
extern void cop0_clear_cache(void);

__attribute__((far, noreturn))
void stage3(uint32_t entrypoint);
Expand Down Expand Up @@ -102,15 +96,6 @@ static void fast_bzero_range(void *mem, void *mem_end)
rsp_bzero_async((uint32_t)mem, size);
}

static void pif_terminate_boot(void)
{
// Inform PIF that the boot process is finished. If this is not written,
// the PIF will halt the CPU after 5 seconds. This is not done by official
// IPL3 but rather left to the game to do, but for our open source IPL3,
// it seems better to leave it to the IPL3.
si_write(0x7FC, 0x8);
}

static const unsigned char font[] = {
0x00, 0x00, 0x00, 0x00, 0x00,
0x7e, 0xa1, 0x99, 0x85, 0x7e, 0x84, 0x82, 0xff, 0x80, 0x80, 0xc1, 0xa1,
Expand Down Expand Up @@ -376,51 +361,3 @@ void stage2(void)
// Jump to the ROM finish function
stage3(entrypoint);
}

// This is the last stage of IPL3. It runs directly from ROM so that we are
// free of cleaning up our breadcrumbs in both DMEM and RDRAM.
__attribute__((far, noreturn))
void stage3(uint32_t entrypoint)
{
// Notify the PIF that the boot process is finished. This will take a while
// so start it in background.
pif_terminate_boot();

// Reset the CPU cache, so that the application starts from a pristine state
cop0_clear_cache();

// Read memory size from boot flags
int memsize = *(volatile uint32_t*)0xA4000000;

// Clear the reserved portion of RDRAM. To create a SP_WR_LEN value that works,
// we assume the reserved size is a multiple of 1024. It can be made to work
// also with other sizes, but this code will need to be adjusted.
while (*SP_DMA_FULL) {}
*SP_RSP_ADDR = 0xA4001000;
*SP_DRAM_ADDR = memsize - TOTAL_RESERVED_SIZE;
_Static_assert((TOTAL_RESERVED_SIZE % 1024) == 0, "TOTAL_RESERVED_SIZE must be multiple of 1024");
*SP_WR_LEN = (((TOTAL_RESERVED_SIZE >> 10) - 1) << 12) | (1024-1);

// Clear DMEM (leave only the boot flags area intact). Notice that we can't
// call debugf anymore after this, because a small piece of debugging code
// (io_write) is in DMEM, so it can't be used anymore.
while (*SP_DMA_FULL) {}
*SP_RSP_ADDR = 0xA4000010;
*SP_DRAM_ADDR = 0x00802000; // Area > 8 MiB which is guaranteed to be empty
*SP_RD_LEN = 4096-16-1;

// Wait until the PIF is done. This will also clear the interrupt, so that
// we don't leave the interrupt pending when we go to the entrypoint.
si_wait();

// RSP DMA is guaranteed to be finished by now because stage3 is running from
// ROM and it's very slow. Anyway, let's just wait to avoid bugs in the future,
// because we don't want to begin using the stack (at the end of RDRAM) before it's finished.
while (*SP_DMA_BUSY) {}

// Configure SP at the end of RDRAM. This is a good default in general,
// then of course userspace code is free to reconfigure it.
asm ("move $sp, %0" : : "r" (0x80000000 + memsize - 0x10));

goto *(void*)entrypoint;
}
30 changes: 1 addition & 29 deletions boot/loader_compat.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*
* This module implements the "compatibility" version of the second stage of
* the loader, which is responsible for loading a flat binary from a fixed
* ROM address and jumping to it.
* ROM address.
*/
#include "minidragon.h"
#include "loader.h"
Expand Down Expand Up @@ -50,31 +50,3 @@ void stage2(void)
// Jump to the ROM finish function
stage3(entrypoint);
}

// This is the last stage of IPL3. It runs directly from ROM so that we are
// free of cleaning up our breadcrumbs in both DMEM and RDRAM.
__attribute__((far, noreturn))
void stage3(uint32_t entrypoint)
{
// Read memory size from boot flags
int memsize = *(volatile uint32_t*)0x80000318;

// Reset the CPU cache, so that the application starts from a pristine state
cop0_clear_cache();

// Clear DMEM and RDRAM stage 2 area
while (*SP_DMA_FULL) {}
*SP_RSP_ADDR = 0xA4001000;
*SP_DRAM_ADDR = memsize - TOTAL_RESERVED_SIZE;
_Static_assert((TOTAL_RESERVED_SIZE % 1024) == 0, "TOTAL_RESERVED_SIZE must be multiple of 1024");
*SP_WR_LEN = (((TOTAL_RESERVED_SIZE >> 10) - 1) << 12) | (1024-1);
while (*SP_DMA_FULL) {}
*SP_RSP_ADDR = 0xA4000000;
*SP_DRAM_ADDR = 0x00802000; // Area > 8 MiB which is guaranteed to be empty
*SP_RD_LEN = 4096-1;

// Wait until the DMA is done
while (*SP_DMA_BUSY) {}

goto *(void*)entrypoint;
}

0 comments on commit 25d215d

Please sign in to comment.