Skip to content

Commit

Permalink
#17307: Move ncrisc copy into ncrisc firmware
Browse files Browse the repository at this point in the history
By moving the NCRISC kernel copy into the NCRISC firmware (on wormhole) we can do it in parallel with setting up CBs.
  • Loading branch information
jbaumanTT committed Feb 18, 2025
1 parent 5dc0873 commit 1cd5a87
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 17 deletions.
4 changes: 3 additions & 1 deletion tt_metal/api/tt-metalium/dev_msgs.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,10 @@ constexpr uint32_t RUN_MSG_DONE = 0;
// 0x80808000 is a micro-optimization, calculated with 1 riscv insn
constexpr uint32_t RUN_SYNC_MSG_INIT = 0x40;
constexpr uint32_t RUN_SYNC_MSG_GO = 0x80;
// Trigger loading CBs (and IRAM) before actually running the kernel.
constexpr uint32_t RUN_SYNC_MSG_LOAD = 0x1;
constexpr uint32_t RUN_SYNC_MSG_WAITING_FOR_RESET = 0x2;
constexpr uint32_t RUN_SYNC_MSG_DONE = 0;
constexpr uint32_t RUN_SYNC_MSG_ALL_TRISCS_GO = 0x80808000;
constexpr uint32_t RUN_SYNC_MSG_ALL_GO = 0x80808080;
constexpr uint32_t RUN_SYNC_MSG_ALL_SLAVES_DONE = 0;

Expand Down
26 changes: 18 additions & 8 deletions tt_metal/hw/firmware/src/brisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ constexpr uint32_t RISCV_IC_TRISC_ALL_MASK = RISCV_IC_TRISC0_MASK | RISCV_IC_TRI

#define NCRISC_FIRMWARE_IN_IRAM (defined(ARCH_GRAYSKULL))

#ifdef NCRISC_HAS_IRAM
#if NCRISC_FIRMWARE_IN_IRAM
constexpr uint32_t num_cbs_to_early_init = 4; // safe small number to overlap w/ ncrisc copy
#else
constexpr uint32_t num_cbs_to_early_init = 0;
Expand Down Expand Up @@ -183,7 +183,7 @@ void set_deassert_addresses() {
}

void l1_to_ncrisc_iram_copy(uint32_t src_addr, uint16_t size, uint32_t address_offset = 0) {
#ifdef NCRISC_HAS_IRAM
#if NCRISC_FIRMWARE_IN_IRAM
// Always copy ncrisc even if its size is 0 (save branch)...
// Copy NCRISC firmware from L1 to local IRAM using tensix DMA
tdma_xmov(
Expand All @@ -196,7 +196,7 @@ void l1_to_ncrisc_iram_copy(uint32_t src_addr, uint16_t size, uint32_t address_o
}

void l1_to_ncrisc_iram_copy_wait() {
#ifdef NCRISC_HAS_IRAM
#if NCRISC_FIRMWARE_IN_IRAM
// Wait for DMA to finish
wait_tdma_movers_done(RISCV_TDMA_STATUS_FLAG_MOVER0_BUSY_MASK);
#endif
Expand Down Expand Up @@ -327,11 +327,15 @@ inline void set_ncrisc_kernel_resume_deassert_address() {

inline void run_triscs(dispatch_core_processor_masks enables) {
if (enables & DISPATCH_CLASS_MASK_TENSIX_ENABLE_COMPUTE) {
mailboxes->slave_sync.all = RUN_SYNC_MSG_ALL_TRISCS_GO;
mailboxes->slave_sync.trisc0 = RUN_SYNC_MSG_GO;
mailboxes->slave_sync.trisc1 = RUN_SYNC_MSG_GO;
mailboxes->slave_sync.trisc2 = RUN_SYNC_MSG_GO;
}
}

inline void finish_ncrisc_copy_and_run(dispatch_core_processor_masks enables) {
// On Wormhole, start_ncrisc_kernel_run will reset NCRISC to start the kernel running.
#if !defined(NCRISC_FIRMWARE_KERNEL_SPLIT)
if (enables & DISPATCH_CLASS_MASK_TENSIX_ENABLE_DM1) {
l1_to_ncrisc_iram_copy_wait();
mailboxes->slave_sync.dm1 = RUN_SYNC_MSG_GO;
Expand All @@ -341,14 +345,15 @@ inline void finish_ncrisc_copy_and_run(dispatch_core_processor_masks enables) {
deassert_all_reset();
#endif
}
#endif
}

inline void start_ncrisc_kernel_run(dispatch_core_processor_masks enables) {
#ifdef NCRISC_FIRMWARE_KERNEL_SPLIT
if (enables & DISPATCH_CLASS_MASK_TENSIX_ENABLE_DM1) {
// The NCRISC behaves badly if it jumps from L1 to IRAM, so instead halt it and then reset it to the IRAM
// address it provides.
while (mailboxes->slave_sync.dm1 != RUN_SYNC_MSG_DONE);
while (mailboxes->slave_sync.dm1 != RUN_SYNC_MSG_WAITING_FOR_RESET);
mailboxes->slave_sync.dm1 = RUN_SYNC_MSG_GO;
volatile tt_reg_ptr uint32_t* cfg_regs = core.cfg_regs_base(0);
cfg_regs[NCRISC_RESET_PC_PC_ADDR32] = mailboxes->ncrisc_halt.resume_addr;
Expand Down Expand Up @@ -453,6 +458,14 @@ int main() {
launch_msg_t* launch_msg_address = &(mailboxes->launch[launch_msg_rd_ptr]);
DeviceValidateProfiler(launch_msg_address->kernel_config.enables);
DeviceZoneSetCounter(launch_msg_address->kernel_config.host_assigned_id);
enum dispatch_core_processor_masks enables =
(enum dispatch_core_processor_masks)launch_msg_address->kernel_config.enables;
#if !NCRISC_FIRMWARE_IN_IRAM
// On Wormhole and Blackhole, trigger the NCRISC to start loading CBs and IRAM as soon as possible.
if (enables & DISPATCH_CLASS_MASK_TENSIX_ENABLE_DM1) {
mailboxes->slave_sync.dm1 = RUN_SYNC_MSG_LOAD;
}
#endif
// Copies from L1 to IRAM on chips where NCRISC has IRAM
uint32_t kernel_config_base = firmware_config_init(mailboxes, ProgrammableCoreType::TENSIX, DISPATCH_CLASS_TENSIX_DM0);
int ncrisc_index = static_cast<std::underlying_type<TensixProcessorTypes>::type>(TensixProcessorTypes::DM1);
Expand All @@ -465,9 +478,6 @@ int main() {
volatile tt_reg_ptr uint32_t* cfg_regs = core.cfg_regs_base(0);
cfg_regs[RISCV_IC_INVALIDATE_InvalidateAll_ADDR32] = RISCV_IC_BRISC_MASK | RISCV_IC_TRISC_ALL_MASK | RISCV_IC_NCRISC_MASK;

enum dispatch_core_processor_masks enables =
(enum dispatch_core_processor_masks)launch_msg_address->kernel_config.enables;

run_triscs(enables);

noc_index = launch_msg_address->kernel_config.brisc_noc_id;
Expand Down
33 changes: 30 additions & 3 deletions tt_metal/hw/firmware/src/ncrisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "risc_attribs.h"
#include "circular_buffer.h"
#include "circular_buffer_init.h"
#include "tdma_xmov.h"

#include "debug/waypoint.h"
#include "debug/dprint.h"
Expand Down Expand Up @@ -72,7 +73,7 @@ inline __attribute__((always_inline)) void notify_brisc_and_wait() {
#if NCRISC_FIRMWARE_IN_IRAM
notify_brisc_and_halt(RUN_SYNC_MSG_DONE);
#else
while (*ncrisc_run != RUN_SYNC_MSG_GO) {
while (*ncrisc_run != RUN_SYNC_MSG_GO && *ncrisc_run != RUN_SYNC_MSG_LOAD) {
invalidate_l1_cache();
}
#endif
Expand All @@ -84,6 +85,20 @@ inline __attribute__((always_inline)) void signal_ncrisc_completion() {
#endif
}

#if defined(ARCH_WORMHOLE)
#define MEM_MOVER_VIEW_IRAM_BASE_ADDR (0x4 << 12)
void l1_to_ncrisc_iram_copy(uint32_t src_addr, uint16_t size, uint32_t address_offset = 0) {
// Always copy ncrisc even if its size is 0 (save branch)...
// Copy NCRISC firmware from L1 to local IRAM using tensix DMA
tdma_xmov(TDMA_MOVER0, src_addr, MEM_MOVER_VIEW_IRAM_BASE_ADDR + address_offset, size, XMOV_L1_TO_L0);
}

void l1_to_ncrisc_iram_copy_wait() {
// Wait for DMA to finish
wait_tdma_movers_done(RISCV_TDMA_STATUS_FLAG_MOVER0_BUSY_MASK);
}
#endif

int main(int argc, char *argv[]) {
configure_l1_data_cache();
DIRTY_STACK_MEMORY();
Expand All @@ -109,25 +124,37 @@ int main(int argc, char *argv[]) {
launch_msg_t* launch_msg = &(mailboxes->launch[launch_msg_rd_ptr]);

uint32_t kernel_config_base = firmware_config_init(mailboxes, ProgrammableCoreType::TENSIX, DISPATCH_CLASS_TENSIX_DM1);
int index = static_cast<std::underlying_type<TensixProcessorTypes>::type>(TensixProcessorTypes::DM1);

#if defined(ARCH_WORMHOLE)
uint32_t ncrisc_kernel_src_address = kernel_config_base + launch_msg->kernel_config.kernel_text_offset[index];
l1_to_ncrisc_iram_copy(ncrisc_kernel_src_address >> 4, launch_msg->kernel_config.ncrisc_kernel_size16, 0);
#endif
uint32_t tt_l1_ptr* cb_l1_base =
(uint32_t tt_l1_ptr*)(kernel_config_base + launch_msg->kernel_config.local_cb_offset);
uint32_t end_cb_index = launch_msg->kernel_config.max_local_cb_end_index;
setup_local_cb_read_write_interfaces(cb_l1_base, 0, end_cb_index, true, true, false);

#if defined(ARCH_WORMHOLE)
l1_to_ncrisc_iram_copy_wait();
#endif

cb_l1_base = (uint32_t tt_l1_ptr*)(kernel_config_base + launch_msg->kernel_config.remote_cb_offset);
end_cb_index = launch_msg->kernel_config.min_remote_cb_start_index;
experimental::setup_remote_cb_interfaces(cb_l1_base, end_cb_index);
WAYPOINT("R");

int index = static_cast<std::underlying_type<TensixProcessorTypes>::type>(TensixProcessorTypes::DM1);
void (*kernel_address)(uint32_t) = (void (*)(uint32_t))
(kernel_config_base + launch_msg->kernel_config.kernel_text_offset[index]);
#ifdef ARCH_BLACKHOLE
while (*ncrisc_run != RUN_SYNC_MSG_GO) {
invalidate_l1_cache();
}
(*kernel_address)((uint32_t)kernel_address);
#elif defined(ARCH_WORMHOLE)
// Jumping to IRAM causes bizarre behavior, so signal the brisc to reset the ncrisc to the IRAM address.
mailboxes->ncrisc_halt.resume_addr = (uint32_t)kernel_init;
notify_brisc_and_halt_to_iram(RUN_SYNC_MSG_DONE, (uint32_t)kernel_address);
notify_brisc_and_halt_to_iram(RUN_SYNC_MSG_WAITING_FOR_RESET, (uint32_t)kernel_address);
#else
kernel_init((uint32_t)kernel_address);
#endif
Expand Down
10 changes: 8 additions & 2 deletions tt_metal/impl/debug/watcher_device_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -634,16 +634,22 @@ void WatcherDeviceReader::DumpRunState(CoreDescriptor& core, const launch_msg_t*
code = 'D';
} else if (state == RUN_MSG_RESET_READ_PTR) {
code = 'R';
} else if (state == RUN_SYNC_MSG_LOAD) {
code = 'L';
} else if (state == RUN_SYNC_MSG_WAITING_FOR_RESET) {
code = 'W';
}
if (code == 'U') {
LogRunningKernels(core, launch_msg);
TT_THROW(
"Watcher data corruption, unexpected run state on core{}: {} (expected {} or {} or {})",
"Watcher data corruption, unexpected run state on core{}: {} (expected {}, {}, {}, {}, or {})",
core.coord.str(),
state,
RUN_MSG_INIT,
RUN_MSG_GO,
RUN_MSG_DONE);
RUN_MSG_DONE,
RUN_SYNC_MSG_LOAD,
RUN_SYNC_MSG_WAITING_FOR_RESET);
} else {
fprintf(f, "%c", code);
}
Expand Down
6 changes: 3 additions & 3 deletions tt_metal/jit_build/build.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,18 +219,18 @@ void JitBuildState::finish_init() {
std::string build_dir =
llrt::RunTimeOptions::get_instance().get_root_dir() + "runtime/hw/lib/" + get_alias(env_.arch_) + "/";
if (this->is_fw_) {
if (this->target_name_ == "brisc") {
if (this->target_name_ == "brisc" and this->env_.arch_ == tt::ARCH::GRAYSKULL) {
this->link_objs_ += build_dir + "tdma_xmov.o ";
}
if (this->target_name_ != "erisc") {
this->link_objs_ += build_dir + "tmu-crt0.o ";
}
if (this->target_name_ == "ncrisc" and
((this->env_.arch_ == tt::ARCH::GRAYSKULL or this->env_.arch_ == tt::ARCH::WORMHOLE_B0))) {
if (this->target_name_ == "ncrisc" and this->env_.arch_ == tt::ARCH::GRAYSKULL) {
this->link_objs_ += build_dir + "ncrisc-halt.o ";
}
if (this->target_name_ == "ncrisc" and this->env_.arch_ == tt::ARCH::WORMHOLE_B0) {
this->link_objs_ += build_dir + "ncrisc-halt-wormhole.o ";
this->link_objs_ += build_dir + "tdma_xmov.o ";
}
} else {
if (this->target_name_ == "ncrisc" and this->env_.arch_ == tt::ARCH::WORMHOLE_B0) {
Expand Down

0 comments on commit 1cd5a87

Please sign in to comment.